Actual source code: dsnhep.c
1: /*
2: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3: SLEPc - Scalable Library for Eigenvalue Problem Computations
4: Copyright (c) 2002-2013, Universitat Politecnica de Valencia, Spain
6: This file is part of SLEPc.
8: SLEPc is free software: you can redistribute it and/or modify it under the
9: terms of version 3 of the GNU Lesser General Public License as published by
10: the Free Software Foundation.
12: SLEPc is distributed in the hope that it will be useful, but WITHOUT ANY
13: WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14: FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for
15: more details.
17: You should have received a copy of the GNU Lesser General Public License
18: along with SLEPc. If not, see <http://www.gnu.org/licenses/>.
19: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
20: */
22: #include <slepc-private/dsimpl.h> /*I "slepcds.h" I*/
23: #include <slepcblaslapack.h>
27: PetscErrorCode DSAllocate_NHEP(DS ds,PetscInt ld)
28: {
32: DSAllocateMat_Private(ds,DS_MAT_A);
33: DSAllocateMat_Private(ds,DS_MAT_Q);
34: PetscFree(ds->perm);
35: PetscMalloc(ld*sizeof(PetscInt),&ds->perm);
36: PetscLogObjectMemory(ds,ld*sizeof(PetscInt));
37: return(0);
38: }
42: PetscErrorCode DSView_NHEP(DS ds,PetscViewer viewer)
43: {
47: DSViewMat_Private(ds,viewer,DS_MAT_A);
48: if (ds->state>DS_STATE_INTERMEDIATE) {
49: DSViewMat_Private(ds,viewer,DS_MAT_Q);
50: }
51: if (ds->mat[DS_MAT_X]) {
52: DSViewMat_Private(ds,viewer,DS_MAT_X);
53: }
54: if (ds->mat[DS_MAT_Y]) {
55: DSViewMat_Private(ds,viewer,DS_MAT_Y);
56: }
57: return(0);
58: }
62: PetscErrorCode DSVectors_NHEP_Refined_Some(DS ds,PetscInt *k,PetscReal *rnorm,PetscBool left)
63: {
64: #if defined(SLEPC_MISSING_LAPACK_GESVD)
66: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"GESVD - Lapack routine is unavailable");
67: #else
69: PetscInt i,j;
70: PetscBLASInt info,ld,n,n1,lwork,inc=1;
71: PetscScalar sdummy,done=1.0,zero=0.0;
72: PetscReal *sigma;
73: PetscBool iscomplex = PETSC_FALSE;
74: PetscScalar *A = ds->mat[DS_MAT_A];
75: PetscScalar *Q = ds->mat[DS_MAT_Q];
76: PetscScalar *X = ds->mat[left?DS_MAT_Y:DS_MAT_X];
77: PetscScalar *W;
80: if (left) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not implemented for left vectors");
81: PetscBLASIntCast(ds->n,&n);
82: PetscBLASIntCast(ds->ld,&ld);
83: n1 = n+1;
84: if ((*k)<n-1 && A[(*k)+1+(*k)*ld]!=0.0) iscomplex = PETSC_TRUE;
85: if (iscomplex) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not implemented for complex eigenvalues yet");
86: DSAllocateWork_Private(ds,5*ld,6*ld,0);
87: DSAllocateMat_Private(ds,DS_MAT_W);
88: W = ds->mat[DS_MAT_W];
89: lwork = 5*ld;
90: sigma = ds->rwork+5*ld;
92: /* build A-w*I in W */
93: for (j=0;j<n;j++)
94: for (i=0;i<=n;i++)
95: W[i+j*ld] = A[i+j*ld];
96: for (i=0;i<n;i++)
97: W[i+i*ld] -= A[(*k)+(*k)*ld];
99: /* compute SVD of W */
100: #if !defined(PETSC_USE_COMPLEX)
101: PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("N","O",&n1,&n,W,&ld,sigma,&sdummy,&ld,&sdummy,&ld,ds->work,&lwork,&info));
102: #else
103: PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("N","O",&n1,&n,W,&ld,sigma,&sdummy,&ld,&sdummy,&ld,ds->work,&lwork,ds->rwork,&info));
104: #endif
105: if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in Lapack xGESVD %d",info);
107: /* the smallest singular value is the new error estimate */
108: if (rnorm) *rnorm = sigma[n-1];
110: /* update vector with right singular vector associated to smallest singular value,
111: accumulating the transformation matrix Q */
112: PetscStackCallBLAS("BLASgemv",BLASgemv_("N",&n,&n,&done,Q,&ld,W+n-1,&ld,&zero,X+(*k)*ld,&inc));
113: return(0);
114: #endif
115: }
119: PetscErrorCode DSVectors_NHEP_Refined_All(DS ds,PetscBool left)
120: {
122: PetscInt i;
125: for (i=0;i<ds->n;i++) {
126: DSVectors_NHEP_Refined_Some(ds,&i,NULL,left);
127: }
128: return(0);
129: }
133: PetscErrorCode DSVectors_NHEP_Eigen_Some(DS ds,PetscInt *k,PetscReal *rnorm,PetscBool left)
134: {
135: #if defined(SLEPC_MISSING_LAPACK_TREVC)
137: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"TREVC - Lapack routine is unavailable");
138: #else
140: PetscInt i;
141: PetscBLASInt mm=1,mout,info,ld,n,inc = 1;
142: PetscScalar tmp,done=1.0,zero=0.0;
143: PetscReal norm;
144: PetscBool iscomplex = PETSC_FALSE;
145: PetscBLASInt *select;
146: PetscScalar *A = ds->mat[DS_MAT_A];
147: PetscScalar *Q = ds->mat[DS_MAT_Q];
148: PetscScalar *X = ds->mat[left?DS_MAT_Y:DS_MAT_X];
149: PetscScalar *Y;
152: PetscBLASIntCast(ds->n,&n);
153: PetscBLASIntCast(ds->ld,&ld);
154: DSAllocateWork_Private(ds,0,0,ld);
155: select = ds->iwork;
156: for (i=0;i<n;i++) select[i] = (PetscBLASInt)PETSC_FALSE;
158: /* Compute k-th eigenvector Y of A */
159: Y = X+(*k)*ld;
160: select[*k] = (PetscBLASInt)PETSC_TRUE;
161: #if !defined(PETSC_USE_COMPLEX)
162: if ((*k)<n-1 && A[(*k)+1+(*k)*ld]!=0.0) iscomplex = PETSC_TRUE;
163: mm = iscomplex? 2: 1;
164: if (iscomplex) select[(*k)+1] = (PetscBLASInt)PETSC_TRUE;
165: DSAllocateWork_Private(ds,3*ld,0,0);
166: PetscStackCallBLAS("LAPACKtrevc",LAPACKtrevc_(left?"L":"R","S",select,&n,A,&ld,Y,&ld,Y,&ld,&mm,&mout,ds->work,&info));
167: #else
168: DSAllocateWork_Private(ds,2*ld,ld,0);
169: PetscStackCallBLAS("LAPACKtrevc",LAPACKtrevc_(left?"L":"R","S",select,&n,A,&ld,Y,&ld,Y,&ld,&mm,&mout,ds->work,ds->rwork,&info));
170: #endif
171: if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in Lapack xTREVC %d",info);
172: if (mout != mm) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Inconsistent arguments");
174: /* accumulate and normalize eigenvectors */
175: if (ds->state>=DS_STATE_CONDENSED) {
176: PetscMemcpy(ds->work,Y,mout*ld*sizeof(PetscScalar));
177: PetscStackCallBLAS("BLASgemv",BLASgemv_("N",&n,&n,&done,Q,&ld,ds->work,&inc,&zero,Y,&inc));
178: #if !defined(PETSC_USE_COMPLEX)
179: if (iscomplex) PetscStackCallBLAS("BLASgemv",BLASgemv_("N",&n,&n,&done,Q,&ld,ds->work+ld,&inc,&zero,Y+ld,&inc));
180: #endif
181: norm = BLASnrm2_(&n,Y,&inc);
182: #if !defined(PETSC_USE_COMPLEX)
183: if (iscomplex) {
184: tmp = BLASnrm2_(&n,Y+ld,&inc);
185: norm = SlepcAbsEigenvalue(norm,tmp);
186: }
187: #endif
188: tmp = 1.0 / norm;
189: PetscStackCallBLAS("BLASscal",BLASscal_(&n,&tmp,Y,&inc));
190: #if !defined(PETSC_USE_COMPLEX)
191: if (iscomplex) PetscStackCallBLAS("BLASscal",BLASscal_(&n,&tmp,Y+ld,&inc));
192: #endif
193: }
195: /* set output arguments */
196: if (iscomplex) (*k)++;
197: if (rnorm) {
198: if (iscomplex) *rnorm = SlepcAbsEigenvalue(Y[n-1],Y[n-1+ld]);
199: else *rnorm = PetscAbsScalar(Y[n-1]);
200: }
201: return(0);
202: #endif
203: }
207: PetscErrorCode DSVectors_NHEP_Eigen_All(DS ds,PetscBool left)
208: {
209: #if defined(SLEPC_MISSING_LAPACK_TREVC)
211: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"TREVC - Lapack routine is unavailable");
212: #else
214: PetscBLASInt n,ld,mout,info;
215: PetscScalar *X,*Y,*A = ds->mat[DS_MAT_A];
216: const char *side,*back;
219: PetscBLASIntCast(ds->n,&n);
220: PetscBLASIntCast(ds->ld,&ld);
221: if (left) {
222: X = NULL;
223: Y = ds->mat[DS_MAT_Y];
224: side = "L";
225: } else {
226: X = ds->mat[DS_MAT_X];
227: Y = NULL;
228: side = "R";
229: }
230: if (ds->state>=DS_STATE_CONDENSED) {
231: /* DSSolve() has been called, backtransform with matrix Q */
232: back = "B";
233: PetscMemcpy(left?Y:X,ds->mat[DS_MAT_Q],ld*ld*sizeof(PetscScalar));
234: } else back = "A";
235: #if !defined(PETSC_USE_COMPLEX)
236: DSAllocateWork_Private(ds,3*ld,0,0);
237: PetscStackCallBLAS("LAPACKtrevc",LAPACKtrevc_(side,back,NULL,&n,A,&ld,Y,&ld,X,&ld,&n,&mout,ds->work,&info));
238: #else
239: DSAllocateWork_Private(ds,2*ld,ld,0);
240: PetscStackCallBLAS("LAPACKtrevc",LAPACKtrevc_(side,back,NULL,&n,A,&ld,Y,&ld,X,&ld,&n,&mout,ds->work,ds->rwork,&info));
241: #endif
242: if (info) SETERRQ1(PetscObjectComm((PetscObject)ds),PETSC_ERR_LIB,"Error in Lapack xTREVC %i",info);
243: return(0);
244: #endif
245: }
249: PetscErrorCode DSVectors_NHEP(DS ds,DSMatType mat,PetscInt *j,PetscReal *rnorm)
250: {
254: switch (mat) {
255: case DS_MAT_X:
256: if (ds->refined) {
257: if (!ds->extrarow) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Refined vectors require activating the extra row");
258: if (j) {
259: DSVectors_NHEP_Refined_Some(ds,j,rnorm,PETSC_FALSE);
260: } else {
261: DSVectors_NHEP_Refined_All(ds,PETSC_FALSE);
262: }
263: } else {
264: if (j) {
265: DSVectors_NHEP_Eigen_Some(ds,j,rnorm,PETSC_FALSE);
266: } else {
267: DSVectors_NHEP_Eigen_All(ds,PETSC_FALSE);
268: }
269: }
270: break;
271: case DS_MAT_Y:
272: if (ds->refined) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not implemented yet");
273: if (j) {
274: DSVectors_NHEP_Eigen_Some(ds,j,rnorm,PETSC_TRUE);
275: } else {
276: DSVectors_NHEP_Eigen_All(ds,PETSC_TRUE);
277: }
278: break;
279: case DS_MAT_U:
280: case DS_MAT_VT:
281: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not implemented yet");
282: break;
283: default:
284: SETERRQ(PetscObjectComm((PetscObject)ds),PETSC_ERR_ARG_OUTOFRANGE,"Invalid mat parameter");
285: }
286: if (ds->state < DS_STATE_CONDENSED) {
287: DSSetState(ds,DS_STATE_CONDENSED);
288: }
289: return(0);
290: }
294: PetscErrorCode DSNormalize_NHEP(DS ds,DSMatType mat,PetscInt col)
295: {
297: PetscInt i,i0,i1;
298: PetscBLASInt ld,n,one = 1;
299: PetscScalar *A = ds->mat[DS_MAT_A],norm,*x;
300: #if !defined(PETSC_USE_COMPLEX)
301: PetscScalar norm0;
302: #endif
305: switch (mat) {
306: case DS_MAT_X:
307: case DS_MAT_Y:
308: case DS_MAT_Q:
309: /* Supported matrices */
310: break;
311: case DS_MAT_U:
312: case DS_MAT_VT:
313: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not implemented yet");
314: break;
315: default:
316: SETERRQ(PetscObjectComm((PetscObject)ds),PETSC_ERR_ARG_OUTOFRANGE,"Invalid mat parameter");
317: }
319: PetscBLASIntCast(ds->n,&n);
320: PetscBLASIntCast(ds->ld,&ld);
321: DSGetArray(ds,mat,&x);
322: if (col < 0) {
323: i0 = 0; i1 = ds->n;
324: } else if (col>0 && A[ds->ld*(col-1)+col] != 0.0) {
325: i0 = col-1; i1 = col+1;
326: } else {
327: i0 = col; i1 = col+1;
328: }
329: for (i=i0;i<i1;i++) {
330: #if !defined(PETSC_USE_COMPLEX)
331: if (i<n-1 && A[ds->ld*i+i+1] != 0.0) {
332: norm = BLASnrm2_(&n,&x[ld*i],&one);
333: norm0 = BLASnrm2_(&n,&x[ld*(i+1)],&one);
334: norm = 1.0/SlepcAbsEigenvalue(norm,norm0);
335: PetscStackCallBLAS("BLASscal",BLASscal_(&n,&norm,&x[ld*i],&one));
336: PetscStackCallBLAS("BLASscal",BLASscal_(&n,&norm,&x[ld*(i+1)],&one));
337: i++;
338: } else
339: #endif
340: {
341: norm = BLASnrm2_(&n,&x[ld*i],&one);
342: norm = 1.0/norm;
343: PetscStackCallBLAS("BLASscal",BLASscal_(&n,&norm,&x[ld*i],&one));
344: }
345: }
346: return(0);
347: }
351: PetscErrorCode DSSort_NHEP_Arbitrary(DS ds,PetscScalar *wr,PetscScalar *wi,PetscScalar *rr,PetscScalar *ri,PetscInt *k)
352: {
353: #if defined(SLEPC_MISSING_LAPACK_TRSEN)
355: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"TRSEN - Lapack routine is unavailable");
356: #else
358: PetscInt i;
359: PetscBLASInt info,n,ld,mout,lwork,*selection;
360: PetscScalar *T = ds->mat[DS_MAT_A],*Q = ds->mat[DS_MAT_Q],*work;
361: #if !defined(PETSC_USE_COMPLEX)
362: PetscBLASInt *iwork,liwork;
363: #endif
366: if (!k) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must supply argument k");
367: PetscBLASIntCast(ds->n,&n);
368: PetscBLASIntCast(ds->ld,&ld);
369: #if !defined(PETSC_USE_COMPLEX)
370: lwork = n;
371: liwork = 1;
372: DSAllocateWork_Private(ds,lwork,0,liwork+n);
373: work = ds->work;
374: lwork = ds->lwork;
375: selection = ds->iwork;
376: iwork = ds->iwork + n;
377: liwork = ds->liwork - n;
378: #else
379: lwork = 1;
380: DSAllocateWork_Private(ds,lwork,0,n);
381: work = ds->work;
382: selection = ds->iwork;
383: #endif
384: /* Compute the selected eigenvalue to be in the leading position */
385: DSSortEigenvalues_Private(ds,rr,ri,ds->perm,PETSC_FALSE);
386: PetscMemzero(selection,n*sizeof(PetscBLASInt));
387: for (i=0;i<*k;i++) selection[ds->perm[i]] = 1;
388: #if !defined(PETSC_USE_COMPLEX)
389: PetscStackCallBLAS("LAPACKtrsen",LAPACKtrsen_("N","V",selection,&n,T,&ld,Q,&ld,wr,wi,&mout,NULL,NULL,work,&lwork,iwork,&liwork,&info));
390: #else
391: PetscStackCallBLAS("LAPACKtrsen",LAPACKtrsen_("N","V",selection,&n,T,&ld,Q,&ld,wr,&mout,NULL,NULL,work,&lwork,&info));
392: #endif
393: if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in Lapack xTRSEN %d",info);
394: *k = mout;
395: return(0);
396: #endif
397: }
401: PetscErrorCode DSSort_NHEP_Total(DS ds,PetscScalar *wr,PetscScalar *wi)
402: {
403: #if defined(SLEPC_MISSING_LAPACK_TREXC)
405: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"TREXC - Lapack routine is unavailable");
406: #else
408: PetscScalar re;
409: PetscInt i,j,pos,result;
410: PetscBLASInt ifst,ilst,info,n,ld;
411: PetscScalar *T = ds->mat[DS_MAT_A];
412: PetscScalar *Q = ds->mat[DS_MAT_Q];
413: #if !defined(PETSC_USE_COMPLEX)
414: PetscScalar *work,im;
415: #endif
418: PetscBLASIntCast(ds->n,&n);
419: PetscBLASIntCast(ds->ld,&ld);
420: #if !defined(PETSC_USE_COMPLEX)
421: DSAllocateWork_Private(ds,ld,0,0);
422: work = ds->work;
423: #endif
424: /* selection sort */
425: for (i=ds->l;i<n-1;i++) {
426: re = wr[i];
427: #if !defined(PETSC_USE_COMPLEX)
428: im = wi[i];
429: #endif
430: pos = 0;
431: j=i+1; /* j points to the next eigenvalue */
432: #if !defined(PETSC_USE_COMPLEX)
433: if (im != 0) j=i+2;
434: #endif
435: /* find minimum eigenvalue */
436: for (;j<n;j++) {
437: #if !defined(PETSC_USE_COMPLEX)
438: (*ds->comparison)(re,im,wr[j],wi[j],&result,ds->comparisonctx);
439: #else
440: (*ds->comparison)(re,0.0,wr[j],0.0,&result,ds->comparisonctx);
441: #endif
442: if (result > 0) {
443: re = wr[j];
444: #if !defined(PETSC_USE_COMPLEX)
445: im = wi[j];
446: #endif
447: pos = j;
448: }
449: #if !defined(PETSC_USE_COMPLEX)
450: if (wi[j] != 0) j++;
451: #endif
452: }
453: if (pos) {
454: /* interchange blocks */
455: PetscBLASIntCast(pos+1,&ifst);
456: PetscBLASIntCast(i+1,&ilst);
457: #if !defined(PETSC_USE_COMPLEX)
458: PetscStackCallBLAS("LAPACKtrexc",LAPACKtrexc_("V",&n,T,&ld,Q,&ld,&ifst,&ilst,work,&info));
459: #else
460: PetscStackCallBLAS("LAPACKtrexc",LAPACKtrexc_("V",&n,T,&ld,Q,&ld,&ifst,&ilst,&info));
461: #endif
462: if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in Lapack xTREXC %d",info);
463: /* recover original eigenvalues from T matrix */
464: for (j=i;j<n;j++) {
465: wr[j] = T[j+j*ld];
466: #if !defined(PETSC_USE_COMPLEX)
467: if (j<n-1 && T[j+1+j*ld] != 0.0) {
468: /* complex conjugate eigenvalue */
469: wi[j] = PetscSqrtReal(PetscAbsReal(T[j+1+j*ld])) *
470: PetscSqrtReal(PetscAbsReal(T[j+(j+1)*ld]));
471: wr[j+1] = wr[j];
472: wi[j+1] = -wi[j];
473: j++;
474: } else {
475: wi[j] = 0.0;
476: }
477: #endif
478: }
479: }
480: #if !defined(PETSC_USE_COMPLEX)
481: if (wi[i] != 0) i++;
482: #endif
483: }
484: return(0);
485: #endif
486: }
490: PetscErrorCode DSSort_NHEP(DS ds,PetscScalar *wr,PetscScalar *wi,PetscScalar *rr,PetscScalar *ri,PetscInt *k)
491: {
495: if (!rr || wr == rr) {
496: DSSort_NHEP_Total(ds,wr,wi);
497: } else {
498: DSSort_NHEP_Arbitrary(ds,wr,wi,rr,ri,k);
499: }
500: return(0);
501: }
505: PetscErrorCode DSUpdateExtraRow_NHEP(DS ds)
506: {
508: PetscInt i;
509: PetscBLASInt n,ld,incx=1;
510: PetscScalar *A,*Q,*x,*y,one=1.0,zero=0.0;
513: PetscBLASIntCast(ds->n,&n);
514: PetscBLASIntCast(ds->ld,&ld);
515: A = ds->mat[DS_MAT_A];
516: Q = ds->mat[DS_MAT_Q];
517: DSAllocateWork_Private(ds,2*ld,0,0);
518: x = ds->work;
519: y = ds->work+ld;
520: for (i=0;i<n;i++) x[i] = A[n+i*ld];
521: PetscStackCallBLAS("BLASgemv",BLASgemv_("C",&n,&n,&one,Q,&ld,x,&incx,&zero,y,&incx));
522: for (i=0;i<n;i++) A[n+i*ld] = y[i];
523: ds->k = n;
524: return(0);
525: }
529: PetscErrorCode DSSolve_NHEP(DS ds,PetscScalar *wr,PetscScalar *wi)
530: {
531: #if defined(SLEPC_MISSING_LAPACK_GEHRD) || defined(SLEPC_MISSING_LAPACK_ORGHR) || defined(PETSC_MISSING_LAPACK_HSEQR)
533: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"GEHRD/ORGHR/HSEQR - Lapack routines are unavailable");
534: #else
536: PetscScalar *work,*tau;
537: PetscInt i,j;
538: PetscBLASInt ilo,lwork,info,n,ld;
539: PetscScalar *A = ds->mat[DS_MAT_A];
540: PetscScalar *Q = ds->mat[DS_MAT_Q];
543: #if !defined(PETSC_USE_COMPLEX)
545: #endif
546: PetscBLASIntCast(ds->n,&n);
547: PetscBLASIntCast(ds->ld,&ld);
548: PetscBLASIntCast(ds->l+1,&ilo);
549: DSAllocateWork_Private(ds,ld+ld*ld,0,0);
550: tau = ds->work;
551: work = ds->work+ld;
552: lwork = ld*ld;
554: /* initialize orthogonal matrix */
555: PetscMemzero(Q,ld*ld*sizeof(PetscScalar));
556: for (i=0;i<n;i++)
557: Q[i+i*ld] = 1.0;
558: if (n==1) return(0);
560: /* reduce to upper Hessenberg form */
561: if (ds->state<DS_STATE_INTERMEDIATE) {
562: PetscStackCallBLAS("LAPACKgehrd",LAPACKgehrd_(&n,&ilo,&n,A,&ld,tau,work,&lwork,&info));
563: if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in Lapack xGEHRD %d",info);
564: for (j=0;j<n-1;j++) {
565: for (i=j+2;i<n;i++) {
566: Q[i+j*ld] = A[i+j*ld];
567: A[i+j*ld] = 0.0;
568: }
569: }
570: PetscStackCallBLAS("LAPACKorghr",LAPACKorghr_(&n,&ilo,&n,Q,&ld,tau,work,&lwork,&info));
571: if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in Lapack xORGHR %d",info);
572: }
574: /* compute the (real) Schur form */
575: #if !defined(PETSC_USE_COMPLEX)
576: PetscStackCallBLAS("LAPACKhseqr",LAPACKhseqr_("S","V",&n,&ilo,&n,A,&ld,wr,wi,Q,&ld,work,&lwork,&info));
577: for (j=0;j<ds->l;j++) {
578: if (j==n-1 || A[j+1+j*ld] == 0.0) {
579: /* real eigenvalue */
580: wr[j] = A[j+j*ld];
581: wi[j] = 0.0;
582: } else {
583: /* complex eigenvalue */
584: wr[j] = A[j+j*ld];
585: wr[j+1] = A[j+j*ld];
586: wi[j] = PetscSqrtReal(PetscAbsReal(A[j+1+j*ld])) *
587: PetscSqrtReal(PetscAbsReal(A[j+(j+1)*ld]));
588: wi[j+1] = -wi[j];
589: j++;
590: }
591: }
592: #else
593: PetscStackCallBLAS("LAPACKhseqr",LAPACKhseqr_("S","V",&n,&ilo,&n,A,&ld,wr,Q,&ld,work,&lwork,&info));
594: if (wi) for (i=ds->l;i<n;i++) wi[i] = 0.0;
595: #endif
596: if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in Lapack xHSEQR %d",info);
597: return(0);
598: #endif
599: }
603: PetscErrorCode DSTruncate_NHEP(DS ds,PetscInt n)
604: {
605: PetscInt i,newn,ld=ds->ld,l=ds->l;
606: PetscScalar *A;
609: if (ds->state==DS_STATE_CONDENSED) ds->t = ds->n;
610: A = ds->mat[DS_MAT_A];
611: /* be careful not to break a diagonal 2x2 block */
612: if (A[n+(n-1)*ld]==0.0) newn = n;
613: else {
614: if (n<ds->n-1) newn = n+1;
615: else newn = n-1;
616: }
617: if (ds->extrarow && ds->k==ds->n) {
618: /* copy entries of extra row to the new position, then clean last row */
619: for (i=l;i<newn;i++) A[newn+i*ld] = A[ds->n+i*ld];
620: for (i=l;i<ds->n;i++) A[ds->n+i*ld] = 0.0;
621: }
622: ds->k = 0;
623: ds->n = newn;
624: return(0);
625: }
629: PetscErrorCode DSCond_NHEP(DS ds,PetscReal *cond)
630: {
631: #if defined(PETSC_MISSING_LAPACK_GETRF) || defined(SLEPC_MISSING_LAPACK_GETRI) || defined(SLEPC_MISSING_LAPACK_LANGE) || defined(SLEPC_MISSING_LAPACK_LANHS)
633: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"GETRF/GETRI/LANGE/LANHS - Lapack routines are unavailable");
634: #else
636: PetscScalar *work;
637: PetscReal *rwork;
638: PetscBLASInt *ipiv;
639: PetscBLASInt lwork,info,n,ld;
640: PetscReal hn,hin;
641: PetscScalar *A;
644: PetscBLASIntCast(ds->n,&n);
645: PetscBLASIntCast(ds->ld,&ld);
646: lwork = 8*ld;
647: DSAllocateWork_Private(ds,lwork,ld,ld);
648: work = ds->work;
649: rwork = ds->rwork;
650: ipiv = ds->iwork;
652: /* use workspace matrix W to avoid overwriting A */
653: DSAllocateMat_Private(ds,DS_MAT_W);
654: A = ds->mat[DS_MAT_W];
655: PetscMemcpy(A,ds->mat[DS_MAT_A],sizeof(PetscScalar)*ds->ld*ds->ld);
657: /* norm of A */
658: if (ds->state<DS_STATE_INTERMEDIATE) hn = LAPACKlange_("I",&n,&n,A,&ld,rwork);
659: else hn = LAPACKlanhs_("I",&n,A,&ld,rwork);
661: /* norm of inv(A) */
662: PetscStackCallBLAS("LAPACKgetrf",LAPACKgetrf_(&n,&n,A,&ld,ipiv,&info));
663: if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in Lapack xGETRF %d",info);
664: PetscStackCallBLAS("LAPACKgetri",LAPACKgetri_(&n,A,&ld,ipiv,work,&lwork,&info));
665: if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in Lapack xGETRI %d",info);
666: hin = LAPACKlange_("I",&n,&n,A,&ld,rwork);
668: *cond = hn*hin;
669: return(0);
670: #endif
671: }
675: PetscErrorCode DSTranslateHarmonic_NHEP(DS ds,PetscScalar tau,PetscReal beta,PetscBool recover,PetscScalar *gin,PetscReal *gamma)
676: {
677: #if defined(PETSC_MISSING_LAPACK_GETRF) || defined(PETSC_MISSING_LAPACK_GETRS)
679: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"GETRF/GETRS - Lapack routines are unavailable");
680: #else
682: PetscInt i,j;
683: PetscBLASInt *ipiv,info,n,ld,one=1,ncol;
684: PetscScalar *A,*B,*Q,*g=gin,*ghat;
685: PetscScalar done=1.0,dmone=-1.0,dzero=0.0;
686: PetscReal gnorm;
689: PetscBLASIntCast(ds->n,&n);
690: PetscBLASIntCast(ds->ld,&ld);
691: A = ds->mat[DS_MAT_A];
693: if (!recover) {
695: DSAllocateWork_Private(ds,0,0,ld);
696: ipiv = ds->iwork;
697: if (!g) {
698: DSAllocateWork_Private(ds,ld,0,0);
699: g = ds->work;
700: }
701: /* use workspace matrix W to factor A-tau*eye(n) */
702: DSAllocateMat_Private(ds,DS_MAT_W);
703: B = ds->mat[DS_MAT_W];
704: PetscMemcpy(B,A,sizeof(PetscScalar)*ld*ld);
706: /* Vector g initialy stores b = beta*e_n^T */
707: PetscMemzero(g,n*sizeof(PetscScalar));
708: g[n-1] = beta;
710: /* g = (A-tau*eye(n))'\b */
711: for (i=0;i<n;i++)
712: B[i+i*ld] -= tau;
713: PetscStackCallBLAS("LAPACKgetrf",LAPACKgetrf_(&n,&n,B,&ld,ipiv,&info));
714: if (info<0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Bad argument to LU factorization");
715: if (info>0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Bad LU factorization");
716: PetscLogFlops(2.0*n*n*n/3.0);
717: PetscStackCallBLAS("LAPACKgetrs",LAPACKgetrs_("C",&n,&one,B,&ld,ipiv,g,&ld,&info));
718: if (info) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"GETRS - Bad solve");
719: PetscLogFlops(2.0*n*n-n);
721: /* A = A + g*b' */
722: for (i=0;i<n;i++)
723: A[i+(n-1)*ld] += g[i]*beta;
725: } else { /* recover */
728: DSAllocateWork_Private(ds,ld,0,0);
729: ghat = ds->work;
730: Q = ds->mat[DS_MAT_Q];
732: /* g^ = -Q(:,idx)'*g */
733: PetscBLASIntCast(ds->l+ds->k,&ncol);
734: PetscStackCallBLAS("BLASgemv",BLASgemv_("C",&n,&ncol,&dmone,Q,&ld,g,&one,&dzero,ghat,&one));
736: /* A = A + g^*b' */
737: for (i=0;i<ds->l+ds->k;i++)
738: for (j=ds->l;j<ds->l+ds->k;j++)
739: A[i+j*ld] += ghat[i]*Q[n-1+j*ld]*beta;
741: /* g~ = (I-Q(:,idx)*Q(:,idx)')*g = g+Q(:,idx)*g^ */
742: PetscStackCallBLAS("BLASgemv",BLASgemv_("N",&n,&ncol,&done,Q,&ld,ghat,&one,&done,g,&one));
743: }
745: /* Compute gamma factor */
746: if (gamma) {
747: gnorm = 0.0;
748: for (i=0;i<n;i++)
749: gnorm = gnorm + PetscRealPart(g[i]*PetscConj(g[i]));
750: *gamma = PetscSqrtReal(1.0+gnorm);
751: }
752: return(0);
753: #endif
754: }
756: #define MAX_PADE 6
760: PetscErrorCode DSFunction_EXP_NHEP_PADE(DS ds)
761: {
762: #if defined(PETSC_MISSING_LAPACK_GESV) || defined(SLEPC_MISSING_LAPACK_LANGE)
764: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"GESV/LANGE - Lapack routines are unavailable");
765: #else
767: PetscBLASInt n,ld,ld2,*ipiv,info,inc=1;
768: PetscInt j,k,odd;
769: const PetscInt p=MAX_PADE;
770: PetscReal c[MAX_PADE+1],s;
771: PetscScalar scale,mone=-1.0,one=1.0,two=2.0,zero=0.0;
772: PetscScalar *A,*A2,*Q,*P,*W,*aux;
775: PetscBLASIntCast(ds->n,&n);
776: PetscBLASIntCast(ds->ld,&ld);
777: ld2 = ld*ld;
778: DSAllocateWork_Private(ds,0,ld,ld);
779: ipiv = ds->iwork;
780: if (!ds->mat[DS_MAT_W]) { DSAllocateMat_Private(ds,DS_MAT_W); }
781: if (!ds->mat[DS_MAT_Z]) { DSAllocateMat_Private(ds,DS_MAT_Z); }
782: A = ds->mat[DS_MAT_A];
783: A2 = ds->mat[DS_MAT_Z];
784: Q = ds->mat[DS_MAT_Q];
785: P = ds->mat[DS_MAT_F];
786: W = ds->mat[DS_MAT_W];
788: /* Pade' coefficients */
789: c[0] = 1.0;
790: for (k=1;k<=p;k++) {
791: c[k] = c[k-1]*(p+1-k)/(k*(2*p+1-k));
792: }
794: /* Scaling */
795: s = LAPACKlange_("I",&n,&n,A,&ld,ds->rwork);
796: if (s>0.5) {
797: s = PetscMax(0,(int)(PetscLogReal(s)/PetscLogReal(2.0)) + 2);
798: scale = PetscPowScalar(2,(-1)*s);
799: PetscStackCallBLAS("BLASscal",BLASscal_(&ld2,&scale,A,&inc));
800: }
802: /* Horner evaluation */
803: PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&n,&n,&n,&one,A,&ld,A,&ld,&zero,A2,&ld));
804: PetscMemzero(Q,ld*ld*sizeof(PetscScalar));
805: PetscMemzero(P,ld*ld*sizeof(PetscScalar));
806: for (j=0;j<n;j++) {
807: Q[j+j*ld] = c[p];
808: P[j+j*ld] = c[p-1];
809: }
811: odd = 1;
812: for (k=p-1;k>0;k--) {
813: if (odd==1) {
814: PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&n,&n,&n,&one,Q,&ld,A2,&ld,&zero,W,&ld));
815: aux = Q;
816: Q = W;
817: W = aux;
818: for (j=0;j<n;j++)
819: Q[j+j*ld] = Q[j+j*ld] + c[k-1];
820: } else {
821: PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&n,&n,&n,&one,P,&ld,A2,&ld,&zero,W,&ld));
822: aux = P;
823: P = W;
824: W = aux;
825: for (j=0;j<n;j++)
826: P[j+j*ld] = P[j+j*ld] + c[k-1];
827: }
828: odd = 1-odd;
829: }
830: if (odd==1) {
831: PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&n,&n,&n,&one,Q,&ld,A,&ld,&zero,W,&ld));
832: aux = Q;
833: Q = W;
834: W = aux;
835: PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&ld2,&mone,P,&inc,Q,&inc));
836: PetscStackCallBLAS("LAPACKgesv",LAPACKgesv_(&n,&n,Q,&ld,ipiv,P,&ld,&info));
837: PetscStackCallBLAS("BLASscal",BLASscal_(&ld2,&two,P,&inc));
838: for (j=0;j<n;j++)
839: P[j+j*ld] = P[j+j*ld] + 1.0;
840: PetscStackCallBLAS("BLASscal",BLASscal_(&ld2,&mone,P,&inc));
841: } else {
842: PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&n,&n,&n,&one,P,&ld,A,&ld,&zero,W,&ld));
843: aux = P;
844: P = W;
845: W = aux;
846: PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&ld2,&mone,P,&inc,Q,&inc));
847: PetscStackCallBLAS("LAPACKgesv",LAPACKgesv_(&n,&n,Q,&ld,ipiv,P,&ld,&info));
848: PetscStackCallBLAS("BLASscal",BLASscal_(&ld2,&two,P,&inc));
849: for (j=0;j<n;j++)
850: P[j+j*ld] = P[j+j*ld] + 1.0;
851: }
853: for (k=1;k<=s;k++) {
854: PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&n,&n,&n,&one,P,&ld,P,&ld,&zero,W,&ld));
855: PetscMemcpy(P,W,ld2*sizeof(PetscScalar));
856: }
857: if (P!=ds->mat[DS_MAT_F]) {
858: PetscMemcpy(ds->mat[DS_MAT_F],P,ld2*sizeof(PetscScalar));
859: }
860: return(0);
861: #endif
862: }
866: PETSC_EXTERN PetscErrorCode DSCreate_NHEP(DS ds)
867: {
869: ds->ops->allocate = DSAllocate_NHEP;
870: ds->ops->view = DSView_NHEP;
871: ds->ops->vectors = DSVectors_NHEP;
872: ds->ops->solve[0] = DSSolve_NHEP;
873: ds->ops->sort = DSSort_NHEP;
874: ds->ops->truncate = DSTruncate_NHEP;
875: ds->ops->update = DSUpdateExtraRow_NHEP;
876: ds->ops->cond = DSCond_NHEP;
877: ds->ops->transharm = DSTranslateHarmonic_NHEP;
878: ds->ops->normalize = DSNormalize_NHEP;
880: ds->ops->computefun[SLEPC_FUNCTION_EXP][0] = DSFunction_EXP_NHEP_PADE;
881: return(0);
882: }