Actual source code: contiguous.c

  1: /*
  2:    Subroutines related to special Vecs that share a common contiguous storage.

  4:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  5:    SLEPc - Scalable Library for Eigenvalue Problem Computations
  6:    Copyright (c) 2002-2013, Universitat Politecnica de Valencia, Spain

  8:    This file is part of SLEPc.

 10:    SLEPc is free software: you can redistribute it and/or modify it under  the
 11:    terms of version 3 of the GNU Lesser General Public License as published by
 12:    the Free Software Foundation.

 14:    SLEPc  is  distributed in the hope that it will be useful, but WITHOUT  ANY
 15:    WARRANTY;  without even the implied warranty of MERCHANTABILITY or  FITNESS
 16:    FOR  A  PARTICULAR PURPOSE. See the GNU Lesser General Public  License  for
 17:    more details.

 19:    You  should have received a copy of the GNU Lesser General  Public  License
 20:    along with SLEPc. If not, see <http://www.gnu.org/licenses/>.
 21:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 22: */

 24: #include <slepc-private/vecimplslepc.h>       /*I "slepcvec.h" I*/
 25: #include <petsc-private/vecimpl.h>            /*I "petscvec.h" I*/
 26: #include <petscblaslapack.h>

 28: PetscLogEvent SLEPC_UpdateVectors = 0,SLEPC_VecMAXPBY = 0;

 32: /*
 33:   Frees the array of the contiguous vectors when all vectors have been destroyed.
 34: */
 35: static PetscErrorCode Vecs_ContiguousDestroy(void *ctx)
 36: {
 37:   PetscErrorCode  ierr;
 38:   Vecs_Contiguous *vc = (Vecs_Contiguous*)ctx;

 41:   PetscFree(vc->array);
 42:   PetscFree(vc);
 43:   return(0);
 44: }

 48: /*
 49:   Version of VecDuplicateVecs that sets contiguous storage.
 50: */
 51: static PetscErrorCode VecDuplicateVecs_Contiguous(Vec v,PetscInt m,Vec *V[])
 52: {
 53:   PetscErrorCode  ierr;
 54:   PetscInt        i,nloc;
 55:   PetscScalar     *pV;
 56:   PetscContainer  container;
 57:   Vecs_Contiguous *vc;

 60:   /* Allocate array */
 61:   VecGetLocalSize(v,&nloc);
 62:   PetscMalloc(m*nloc*sizeof(PetscScalar),&pV);
 63:   /* Create container */
 64:   PetscNew(Vecs_Contiguous,&vc);
 65:   vc->nvecs = m;
 66:   vc->array = pV;
 67:   PetscContainerCreate(PetscObjectComm((PetscObject)v),&container);
 68:   PetscContainerSetPointer(container,vc);
 69:   PetscContainerSetUserDestroy(container,Vecs_ContiguousDestroy);
 70:   /* Create vectors */
 71:   PetscMalloc(m*sizeof(Vec),V);
 72:   for (i=0;i<m;i++) {
 73:     VecCreateMPIWithArray(PetscObjectComm((PetscObject)v),1,nloc,PETSC_DECIDE,pV+i*nloc,*V+i);
 74:     PetscObjectCompose((PetscObject)*(*V+i),"contiguous",(PetscObject)container);
 75:   }
 76:   PetscContainerDestroy(&container);
 77:   return(0);
 78: }

 82: /*@
 83:    SlepcVecSetTemplate - Sets a vector as a template for contiguous storage.

 85:    Collective on Vec

 87:    Input Parameters:
 88: .  v - the vector

 90:    Note:
 91:    Once this function is called, subsequent calls to VecDuplicateVecs()
 92:    with this vector will use a special version that generates vectors with
 93:    contiguous storage, that is, the array of values of V[1] immediately
 94:    follows the array of V[0], and so on.

 96:    Level: developer
 97: @*/
 98: PetscErrorCode SlepcVecSetTemplate(Vec v)
 99: {
101:   PetscBool      flg;

105:   PetscObjectTypeCompareAny((PetscObject)v,&flg,VECSEQ,VECMPI,"");
106:   if (!flg) SETERRQ(PetscObjectComm((PetscObject)v),PETSC_ERR_SUP,"Only available for standard vectors (VECSEQ or VECMPI)");
107:   v->ops->duplicatevecs = VecDuplicateVecs_Contiguous;
108:   return(0);
109: }

113: /*@
114:    SlepcMatGetVecsTemplate - Get vectors compatible with a matrix,
115:    i.e. with the same parallel layout, and mark them as templates
116:    for contiguous storage.

118:    Collective on Mat

120:    Input Parameter:
121: .  mat - the matrix

123:    Output Parameters:
124: +  right - (optional) vector that the matrix can be multiplied against
125: -  left  - (optional) vector that the matrix vector product can be stored in

127:    Options Database Keys:
128: .  -slepc_non_contiguous - Disable contiguous vector storage

130:    Notes:
131:    Use -slepc_non_contiguous to disable contiguous storage throughout SLEPc.
132:    Contiguous storage is currently also disabled in AIJCUSP matrices.

134:    Level: developer

136: .seealso: SlepcVecSetTemplate()
137: @*/
138: PetscErrorCode SlepcMatGetVecsTemplate(Mat mat,Vec *right,Vec *left)
139: {
141:   PetscBool      flg;
142:   Vec            v;

147:   MatGetVecs(mat,right,left);
148:   v = right? *right: *left;
149:   PetscObjectTypeCompareAny((PetscObject)v,&flg,VECSEQ,VECMPI,"");
150:   if (!flg) return(0);
151:   PetscOptionsHasName(NULL,"-slepc_non_contiguous",&flg);
152:   if (!flg) {
153:     if (right) { SlepcVecSetTemplate(*right); }
154:     if (left) { SlepcVecSetTemplate(*left); }
155:   }
156:   return(0);
157: }

161: /*
162:    SlepcUpdateVectors_Noncontiguous_Inplace - V = V*Q for regular vectors
163:    (non-contiguous).
164: */
165: static PetscErrorCode SlepcUpdateVectors_Noncontiguous_Inplace(PetscInt m_,Vec *V,const PetscScalar *Q,PetscInt ldq_,PetscBool qtrans)
166: {
167:   PetscInt       l;
168:   PetscBLASInt   j,ls,bs=64,m,k,ldq;
169:   PetscScalar    *pv,*pq=(PetscScalar*)Q,*work,*out,one=1.0,zero=0.0;

173:   PetscLogEventBegin(SLEPC_UpdateVectors,0,0,0,0);
174:   VecGetLocalSize(V[0],&l);
175:   PetscBLASIntCast(l,&ls);
176:   PetscBLASIntCast(m_,&m);
177:   PetscBLASIntCast(ldq_,&ldq);
178:   PetscMalloc(sizeof(PetscScalar)*2*bs*m,&work);
179:   out = work+m*bs;
180:   k = ls % bs;
181:   if (k) {
182:     for (j=0;j<m;j++) {
183:       VecGetArray(V[j],&pv);
184:       PetscMemcpy(work+j*bs,pv,k*sizeof(PetscScalar));
185:       VecRestoreArray(V[j],&pv);
186:     }
187:     PetscStackCallBLAS("BLASgemm",BLASgemm_("N",qtrans?"C":"N",&k,&m,&m,&one,work,&bs,pq,&ldq,&zero,out,&bs));
188:     for (j=0;j<m;j++) {
189:       VecGetArray(V[j],&pv);
190:       PetscMemcpy(pv,out+j*bs,k*sizeof(PetscScalar));
191:       VecRestoreArray(V[j],&pv);
192:     }
193:   }
194:   for (;k<ls;k+=bs) {
195:     for (j=0;j<m;j++) {
196:       VecGetArray(V[j],&pv);
197:       PetscMemcpy(work+j*bs,pv+k,bs*sizeof(PetscScalar));
198:       VecRestoreArray(V[j],&pv);
199:     }
200:     PetscStackCallBLAS("BLASgemm",BLASgemm_("N",qtrans?"C":"N",&bs,&m,&m,&one,work,&bs,pq,&ldq,&zero,out,&bs));
201:     for (j=0;j<m;j++) {
202:       VecGetArray(V[j],&pv);
203:       PetscMemcpy(pv+k,out+j*bs,bs*sizeof(PetscScalar));
204:       VecRestoreArray(V[j],&pv);
205:     }
206:   }
207:   PetscFree(work);
208:   PetscLogFlops(m*m*2.0*ls);
209:   PetscLogEventEnd(SLEPC_UpdateVectors,0,0,0,0);
210:   return(0);
211: }

215: /*
216:    SlepcUpdateVectors_Noncontiguous - V(:,s:e-1) = V*Q(:,s:e-1) for
217:    regular vectors (non-contiguous).

219:    Writing V = [ V1 V2 V3 ] and Q = [ Q1 Q2 Q3 ], where the V2 and Q2
220:    correspond to the columns s:e-1, the computation is done as
221:                   V2 := V2*Q2 + V1*Q1 + V3*Q3
222:    (the first term is computed with SlepcUpdateVectors_Noncontiguous_Inplace).
223: */
224: static PetscErrorCode SlepcUpdateVectors_Noncontiguous(PetscInt n,Vec *V,PetscInt s,PetscInt e,const PetscScalar *Q,PetscInt ldq,PetscBool qtrans)
225: {
226:   PetscInt       i,j,m,ln;
227:   PetscScalar    *pq,qt[100];
228:   PetscBool      allocated = PETSC_FALSE;

232:   m = e-s;
233:   if (qtrans) {
234:     ln = PetscMax(s,n-e);
235:     if (ln<=100) pq = qt;
236:     else {
237:       PetscMalloc(ln*sizeof(PetscScalar),&pq);
238:       allocated = PETSC_TRUE;
239:     }
240:   }
241:   /* V2 */
242:   SlepcUpdateVectors_Noncontiguous_Inplace(m,V+s,Q+s*ldq+s,ldq,qtrans);
243:   /* V1 */
244:   if (s>0) {
245:     for (i=s;i<e;i++) {
246:       if (qtrans) {
247:         for (j=0;j<s;j++) pq[j] = Q[i+j*ldq];
248:       } else pq = (PetscScalar*)Q+i*ldq;
249:       VecMAXPY(V[i],s,pq,V);
250:     }
251:   }
252:   /* V3 */
253:   if (n>e) {
254:     for (i=s;i<e;i++) {
255:       if (qtrans) {
256:         for (j=0;j<n-e;j++) pq[j] = Q[i+(j+e)*ldq];
257:       } else pq = (PetscScalar*)Q+i*ldq+e;
258:       VecMAXPY(V[i],n-e,pq,V+e);
259:     }
260:   }
261:   if (allocated) { PetscFree(pq); }
262:   return(0);
263: }

267: /*@
268:    SlepcUpdateVectors - Update a set of vectors V as V(:,s:e-1) = V*Q(:,s:e-1).

270:    Not Collective

272:    Input parameters:
273: +  n      - number of vectors in V
274: .  s      - first column of V to be overwritten
275: .  e      - first column of V not to be overwritten
276: .  Q      - matrix containing the coefficients of the update
277: .  ldq    - leading dimension of Q
278: -  qtrans - flag indicating if Q is to be transposed

280:    Input/Output parameter:
281: .  V      - set of vectors

283:    Notes:
284:    This function computes V(:,s:e-1) = V*Q(:,s:e-1), that is, given a set of
285:    vectors V, columns from s to e-1 are overwritten with columns from s to
286:    e-1 of the matrix-matrix product V*Q.

288:    Matrix V is represented as an array of Vec, whereas Q is represented as
289:    a column-major dense array of leading dimension ldq. Only columns s to e-1
290:    of Q are referenced.

292:    If qtrans=PETSC_TRUE, the operation is V*Q'.

294:    This routine is implemented with a call to BLAS, therefore V is an array
295:    of Vec which have the data stored contiguously in memory as a Fortran matrix.
296:    PETSc does not create such arrays by default.

298:    Level: developer

300: .seealso: SlepcUpdateStrideVectors()
301: @*/
302: PetscErrorCode SlepcUpdateVectors(PetscInt n,Vec *V,PetscInt s,PetscInt e,const PetscScalar *Q,PetscInt ldq,PetscBool qtrans)
303: {
304:   PetscContainer container;

308:   if (n<0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Number of vectors (given %D) cannot be negative",n);
309:   if (!n || s>=e) return(0);
314:   PetscObjectQuery((PetscObject)(V[0]),"contiguous",(PetscObject*)&container);
315:   if (container) {
316:     /* contiguous Vecs, use BLAS calls */
317:     SlepcUpdateStrideVectors(n,V,s,1,e,Q,ldq,qtrans);
318:   } else {
319:     /* use regular Vec operations */
320:     SlepcUpdateVectors_Noncontiguous(n,V,s,e,Q,ldq,qtrans);
321:   }
322:   return(0);
323: }

327: /*@
328:    SlepcUpdateStrideVectors - Update a set of vectors V as
329:    V(:,s:d:e-1) = V*Q(:,s:e-1).

331:    Not Collective

333:    Input parameters:
334: +  n      - number of vectors in V
335: .  s      - first column of V to be overwritten
336: .  d      - stride
337: .  e      - first column of V not to be overwritten
338: .  Q      - matrix containing the coefficients of the update
339: .  ldq    - leading dimension of Q
340: -  qtrans - flag indicating if Q is to be transposed

342:    Input/Output parameter:
343: .  V      - set of vectors

345:    Notes:
346:    This function computes V(:,s:d:e-1) = V*Q(:,s:e-1), that is, given a set
347:    of vectors V, columns from s to e-1 are overwritten with columns from s to
348:    e-1 of the matrix-matrix product V*Q.

350:    Matrix V is represented as an array of Vec, whereas Q is represented as
351:    a column-major dense array of leading dimension ldq. Only columns s to e-1
352:    of Q are referenced.

354:    If qtrans=PETSC_TRUE, the operation is V*Q'.

356:    This routine is implemented with a call to BLAS, therefore V is an array
357:    of Vec which have the data stored contiguously in memory as a Fortran matrix.
358:    PETSc does not create such arrays by default.

360:    Level: developer

362: .seealso: SlepcUpdateVectors()
363: @*/
364: PetscErrorCode SlepcUpdateStrideVectors(PetscInt n_,Vec *V,PetscInt s,PetscInt d,PetscInt e,const PetscScalar *Q,PetscInt ldq_,PetscBool qtrans)
365: {
367:   PetscInt       l;
368:   PetscBLASInt   i,j,k,bs=64,m,n,ldq,ls,ld;
369:   PetscScalar    *pv,*pw,*pq,*work,*pwork,one=1.0,zero=0.0;
370:   const char     *qt;

373:   PetscBLASIntCast(n_/d,&n);
374:   PetscBLASIntCast(ldq_,&ldq);
375:   m = (e-s)/d;
376:   if (!m) return(0);
378:   if (m<0 || n<0 || s<0 || m>n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index argument out of range");
379:   PetscLogEventBegin(SLEPC_UpdateVectors,0,0,0,0);
380:   VecGetLocalSize(V[0],&l);
381:   PetscBLASIntCast(l,&ls);
382:   PetscBLASIntCast(ls*d,&ld);
383:   VecGetArray(V[0],&pv);
384:   if (qtrans) {
385:     pq = (PetscScalar*)Q+s;
386:     qt = "C";
387:   } else {
388:     pq = (PetscScalar*)Q+s*ldq;
389:     qt = "N";
390:   }
391:   PetscMalloc(sizeof(PetscScalar)*bs*m,&work);
392:   k = ls % bs;
393:   if (k) {
394:     PetscStackCallBLAS("BLASgemm",BLASgemm_("N",qt,&k,&m,&n,&one,pv,&ld,pq,&ldq,&zero,work,&k));
395:     for (j=0;j<m;j++) {
396:       pw = pv+(s+j)*ld;
397:       pwork = work+j*k;
398:       for (i=0;i<k;i++) {
399:         *pw++ = *pwork++;
400:       }
401:     }
402:   }
403:   for (;k<ls;k+=bs) {
404:     PetscStackCallBLAS("BLASgemm",BLASgemm_("N",qt,&bs,&m,&n,&one,pv+k,&ld,pq,&ldq,&zero,work,&bs));
405:     for (j=0;j<m;j++) {
406:       pw = pv+(s+j)*ld+k;
407:       pwork = work+j*bs;
408:       for (i=0;i<bs;i++) {
409:         *pw++ = *pwork++;
410:       }
411:     }
412:   }
413:   VecRestoreArray(V[0],&pv);
414:   PetscFree(work);
415:   PetscLogFlops(m*n*2.0*ls);
416:   PetscLogEventEnd(SLEPC_UpdateVectors,0,0,0,0);
417:   return(0);
418: }

422: /*@
423:    SlepcVecMAXPBY - Computes y = beta*y + sum alpha*a[j]*x[j]

425:    Logically Collective on Vec

427:    Input parameters:
428: +  beta   - scalar beta
429: .  alpha  - scalar alpha
430: .  nv     - number of vectors in x and scalars in a
431: .  a      - array of scalars
432: -  x      - set of vectors

434:    Input/Output parameter:
435: .  y      - the vector to update

437:    Notes:
438:    If x are Vec's with contiguous storage, then the operation is done
439:    through a call to BLAS. Otherwise, VecMAXPY() is called.

441:    Level: developer

443: .seealso: SlepcVecSetTemplate()
444: @*/
445: PetscErrorCode SlepcVecMAXPBY(Vec y,PetscScalar beta,PetscScalar alpha,PetscInt nv,PetscScalar a[],Vec x[])
446: {
447:   PetscErrorCode    ierr;
448:   PetscBLASInt      i,n,m,one=1;
449:   PetscScalar       *py;
450:   const PetscScalar *px;
451:   PetscContainer    container;
452:   Vec               z;

456:   if (!nv) return(0);
457:   if (nv < 0) SETERRQ1(PetscObjectComm((PetscObject)y),PETSC_ERR_ARG_OUTOFRANGE,"Number of vectors (given %D) cannot be negative",nv);
467:   if ((*x)->map->N != (y)->map->N) SETERRQ(PetscObjectComm((PetscObject)y),PETSC_ERR_ARG_INCOMP,"Incompatible vector global lengths");
468:   if ((*x)->map->n != (y)->map->n) SETERRQ(PetscObjectComm((PetscObject)y),PETSC_ERR_ARG_INCOMP,"Incompatible vector local lengths");

470:   PetscObjectQuery((PetscObject)(x[0]),"contiguous",(PetscObject*)&container);
471:   if (container) {
472:     /* assume x Vecs are contiguous, use BLAS calls */
473:     PetscLogEventBegin(SLEPC_VecMAXPBY,*x,y,0,0);
474:     VecGetArray(y,&py);
475:     VecGetArrayRead(*x,&px);
476:     PetscBLASIntCast(nv,&n);
477:     PetscBLASIntCast((y)->map->n,&m);
478:     if (m>0) PetscStackCallBLAS("BLASgemv",BLASgemv_("N",&m,&n,&alpha,px,&m,a,&one,&beta,py,&one));
479:     VecRestoreArray(y,&py);
480:     VecRestoreArrayRead(*x,&px);
481:     PetscLogFlops(nv*2*(y)->map->n);
482:     PetscLogEventEnd(SLEPC_VecMAXPBY,*x,y,0,0);
483:   } else {
484:     /* use regular Vec operations */
485:     if (alpha==-beta) {
486:       for (i=0;i<nv;i++) a[i] = -a[i];
487:       VecMAXPY(y,nv,a,x);
488:       for (i=0;i<nv;i++) a[i] = -a[i];
489:       VecScale(y,beta);
490:     } else {
491:       VecDuplicate(y,&z);
492:       VecCopy(y,z);
493:       VecMAXPY(y,nv,a,x);
494:       VecAXPBY(y,beta-alpha,alpha,z);
495:       VecDestroy(&z);
496:     }
497:   }
498:   return(0);
499: }