dense_matrix_blas_lapack.C
Go to the documentation of this file.
1 // The libMesh Finite Element Library.
2 // Copyright (C) 2002-2014 Benjamin S. Kirk, John W. Peterson, Roy H. Stogner
3 
4 // This library is free software; you can redistribute it and/or
5 // modify it under the terms of the GNU Lesser General Public
6 // License as published by the Free Software Foundation; either
7 // version 2.1 of the License, or (at your option) any later version.
8 
9 // This library is distributed in the hope that it will be useful,
10 // but WITHOUT ANY WARRANTY; without even the implied warranty of
11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 // Lesser General Public License for more details.
13 
14 // You should have received a copy of the GNU Lesser General Public
15 // License along with this library; if not, write to the Free Software
16 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17 
18 
19 // Local Includes
20 #include "libmesh/dense_matrix.h"
21 #include "libmesh/dense_vector.h"
22 
23 
24 #if (LIBMESH_HAVE_PETSC && LIBMESH_USE_REAL_NUMBERS)
25 #include "libmesh/petsc_macro.h"
26 
27 EXTERN_C_FOR_PETSC_BEGIN
28 #include <petscblaslapack.h>
29 EXTERN_C_FOR_PETSC_END
30 #endif
31 
32 namespace libMesh
33 {
34 
35 
36 
37 #if (LIBMESH_HAVE_PETSC && LIBMESH_USE_REAL_NUMBERS)
38 
39 template<typename T>
42 {
43  int result_size = 0;
44 
45  // For each case, determine the size of the final result make sure
46  // that the inner dimensions match
47  switch (flag)
48  {
49  case LEFT_MULTIPLY:
50  {
51  result_size = other.m() * this->n();
52  if (other.n() == this->m())
53  break;
54  }
55  case RIGHT_MULTIPLY:
56  {
57  result_size = other.n() * this->m();
58  if (other.m() == this->n())
59  break;
60  }
61  case LEFT_MULTIPLY_TRANSPOSE:
62  {
63  result_size = other.n() * this->n();
64  if (other.m() == this->m())
65  break;
66  }
67  case RIGHT_MULTIPLY_TRANSPOSE:
68  {
69  result_size = other.m() * this->m();
70  if (other.n() == this->n())
71  break;
72  }
73  default:
74  {
75  libMesh::out << "Unknown flag selected or matrices are ";
76  libMesh::out << "incompatible for multiplication." << std::endl;
77  libmesh_error();
78  }
79  }
80 
81  // For this to work, the passed arg. must actually be a DenseMatrix<T>
82  const DenseMatrix<T>* const_that = libmesh_cast_ptr< const DenseMatrix<T>* >(&other);
83 
84  // Also, although 'that' is logically const in this BLAS routine,
85  // the PETSc BLAS interface does not specify that any of the inputs are
86  // const. To use it, I must cast away const-ness.
87  DenseMatrix<T>* that = const_cast< DenseMatrix<T>* > (const_that);
88 
89  // Initialize A, B pointers for LEFT_MULTIPLY* cases
91  *A = this,
92  *B = that;
93 
94  // For RIGHT_MULTIPLY* cases, swap the meaning of A and B.
95  // Here is a full table of combinations we can pass to BLASgemm, and what the answer is when finished:
96  // pass A B -> (Fortran) -> A^T B^T -> (C++) -> (A^T B^T)^T -> (identity) -> B A "lt multiply"
97  // pass B A -> (Fortran) -> B^T A^T -> (C++) -> (B^T A^T)^T -> (identity) -> A B "rt multiply"
98  // pass A B^T -> (Fortran) -> A^T B -> (C++) -> (A^T B)^T -> (identity) -> B^T A "lt multiply t"
99  // pass B^T A -> (Fortran) -> B A^T -> (C++) -> (B A^T)^T -> (identity) -> A B^T "rt multiply t"
100  if (flag==RIGHT_MULTIPLY || flag==RIGHT_MULTIPLY_TRANSPOSE)
101  std::swap(A,B);
102 
103  // transa, transb values to pass to blas
104  char
105  transa[] = "n",
106  transb[] = "n";
107 
108  // Integer values to pass to BLAS:
109  //
110  // M
111  // In Fortran, the number of rows of op(A),
112  // In the BLAS documentation, typically known as 'M'.
113  //
114  // In C/C++, we set:
115  // M = n_cols(A) if (transa='n')
116  // n_rows(A) if (transa='t')
117  int M = static_cast<int>( A->n() );
118 
119  // N
120  // In Fortran, the number of cols of op(B), and also the number of cols of C.
121  // In the BLAS documentation, typically known as 'N'.
122  //
123  // In C/C++, we set:
124  // N = n_rows(B) if (transb='n')
125  // n_cols(B) if (transb='t')
126  int N = static_cast<int>( B->m() );
127 
128  // K
129  // In Fortran, the number of cols of op(A), and also
130  // the number of rows of op(B). In the BLAS documentation,
131  // typically known as 'K'.
132  //
133  // In C/C++, we set:
134  // K = n_rows(A) if (transa='n')
135  // n_cols(A) if (transa='t')
136  int K = static_cast<int>( A->m() );
137 
138  // LDA (leading dimension of A). In our cases,
139  // LDA is always the number of columns of A.
140  int LDA = static_cast<int>( A->n() );
141 
142  // LDB (leading dimension of B). In our cases,
143  // LDB is always the number of columns of B.
144  int LDB = static_cast<int>( B->n() );
145 
146  if (flag == LEFT_MULTIPLY_TRANSPOSE)
147  {
148  transb[0] = 't';
149  N = static_cast<int>( B->n() );
150  }
151 
152  else if (flag == RIGHT_MULTIPLY_TRANSPOSE)
153  {
154  transa[0] = 't';
155  std::swap(M,K);
156  }
157 
158  // LDC (leading dimension of C). LDC is the
159  // number of columns in the solution matrix.
160  int LDC = M;
161 
162  // Scalar values to pass to BLAS
163  //
164  // scalar multiplying the whole product AB
165  T alpha = 1.;
166 
167  // scalar multiplying C, which is the original matrix.
168  T beta = 0.;
169 
170  // Storage for the result
171  std::vector<T> result (result_size);
172 
173  // Finally ready to call the BLAS
174  BLASgemm_(transa, transb, &M, &N, &K, &alpha, &(A->_val[0]), &LDA, &(B->_val[0]), &LDB, &beta, &result[0], &LDC);
175 
176  // Update the relevant dimension for this matrix.
177  switch (flag)
178  {
179  case LEFT_MULTIPLY: { this->_m = other.m(); break; }
180  case RIGHT_MULTIPLY: { this->_n = other.n(); break; }
181  case LEFT_MULTIPLY_TRANSPOSE: { this->_m = other.n(); break; }
182  case RIGHT_MULTIPLY_TRANSPOSE: { this->_n = other.m(); break; }
183  default:
184  {
185  libMesh::out << "Unknown flag selected." << std::endl;
186  libmesh_error();
187  }
188  }
189 
190  // Swap my data vector with the result
191  this->_val.swap(result);
192 }
193 
194 #else
195 
196 template<typename T>
198  _BLAS_Multiply_Flag )
199 {
200  libMesh::err << "No PETSc-provided BLAS/LAPACK available!" << std::endl;
201  libmesh_error();
202 }
203 
204 #endif
205 
206 
207 
208 
209 
210 
211 
212 #if (LIBMESH_HAVE_PETSC && LIBMESH_USE_REAL_NUMBERS)
213 
214 template<typename T>
216 {
217  // If this function was called, there better not be any
218  // previous decomposition of the matrix.
219  libmesh_assert_equal_to (this->_decomposition_type, NONE);
220 
221  // The calling sequence for dgetrf is:
222  // dgetrf(M, N, A, lda, ipiv, info)
223 
224  // M (input) int*
225  // The number of rows of the matrix A. M >= 0.
226  // In C/C++, pass the number of *cols* of A
227  int M = this->n();
228 
229  // N (input) int*
230  // The number of columns of the matrix A. N >= 0.
231  // In C/C++, pass the number of *rows* of A
232  int N = this->m();
233 
234  // A (input/output) double precision array, dimension (LDA,N)
235  // On entry, the M-by-N matrix to be factored.
236  // On exit, the factors L and U from the factorization
237  // A = P*L*U; the unit diagonal elements of L are not stored.
238  // Here, we pass &(_val[0]).
239 
240  // LDA (input) int*
241  // The leading dimension of the array A. LDA >= max(1,M).
242  int LDA = M;
243 
244  // ipiv (output) integer array, dimension (min(m,n))
245  // The pivot indices; for 1 <= i <= min(m,n), row i of the
246  // matrix was interchanged with row IPIV(i).
247  // Here, we pass &(_pivots[0]), a private class member used to store pivots
248  this->_pivots.resize( std::min(M,N) );
249 
250  // info (output) int*
251  // = 0: successful exit
252  // < 0: if INFO = -i, the i-th argument had an illegal value
253  // > 0: if INFO = i, U(i,i) is exactly zero. The factorization
254  // has been completed, but the factor U is exactly
255  // singular, and division by zero will occur if it is used
256  // to solve a system of equations.
257  int INFO = 0;
258 
259  // Ready to call the actual factorization routine through PETSc's interface
260  LAPACKgetrf_(&M, &N, &(this->_val[0]), &LDA, &(_pivots[0]), &INFO);
261 
262  // Check return value for errors
263  if (INFO != 0)
264  {
265  libMesh::out << "INFO="
266  << INFO
267  << ", Error during Lapack LU factorization!" << std::endl;
268  libmesh_error();
269  }
270 
271  // Set the flag for LU decomposition
272  this->_decomposition_type = LU_BLAS_LAPACK;
273 }
274 
275 #else
276 
277 template<typename T>
279 {
280  libMesh::err << "No PETSc-provided BLAS/LAPACK available!" << std::endl;
281  libmesh_error();
282 }
283 
284 #endif
285 
286 
287 
288 template<typename T>
290 {
291  // The calling sequence for dgetrf is:
292  // DGESVD( JOBU, JOBVT, M, N, A, LDA, S, U, LDU, VT, LDVT, WORK, LWORK, INFO )
293 
294 
295  // JOBU (input) CHARACTER*1
296  // Specifies options for computing all or part of the matrix U:
297  // = 'A': all M columns of U are returned in array U:
298  // = 'S': the first min(m,n) columns of U (the left singular
299  // vectors) are returned in the array U;
300  // = 'O': the first min(m,n) columns of U (the left singular
301  // vectors) are overwritten on the array A;
302  // = 'N': no columns of U (no left singular vectors) are
303  // computed.
304  char JOBU = 'N';
305 
306  // JOBVT (input) CHARACTER*1
307  // Specifies options for computing all or part of the matrix
308  // V**T:
309  // = 'A': all N rows of V**T are returned in the array VT;
310  // = 'S': the first min(m,n) rows of V**T (the right singular
311  // vectors) are returned in the array VT;
312  // = 'O': the first min(m,n) rows of V**T (the right singular
313  // vectors) are overwritten on the array A;
314  // = 'N': no rows of V**T (no right singular vectors) are
315  // computed.
316  char JOBVT = 'N';
317 
318  std::vector<T> sigma_val;
319  std::vector<T> U_val;
320  std::vector<T> VT_val;
321 
322  _svd_helper(JOBU, JOBVT, sigma_val, U_val, VT_val);
323 
324  // Load the singular values into sigma, ignore U_val and VT_val
325  const unsigned int n_sigma_vals =
326  libmesh_cast_int<unsigned int>(sigma_val.size());
327  sigma.resize(n_sigma_vals);
328  for(unsigned int i=0; i<n_sigma_vals; i++)
329  sigma(i) = sigma_val[i];
330 
331 }
332 
333 template<typename T>
335 {
336  // The calling sequence for dgetrf is:
337  // DGESVD( JOBU, JOBVT, M, N, A, LDA, S, U, LDU, VT, LDVT, WORK, LWORK, INFO )
338 
339 
340  // JOBU (input) CHARACTER*1
341  // Specifies options for computing all or part of the matrix U:
342  // = 'A': all M columns of U are returned in array U:
343  // = 'S': the first min(m,n) columns of U (the left singular
344  // vectors) are returned in the array U;
345  // = 'O': the first min(m,n) columns of U (the left singular
346  // vectors) are overwritten on the array A;
347  // = 'N': no columns of U (no left singular vectors) are
348  // computed.
349  char JOBU = 'S';
350 
351  // JOBVT (input) CHARACTER*1
352  // Specifies options for computing all or part of the matrix
353  // V**T:
354  // = 'A': all N rows of V**T are returned in the array VT;
355  // = 'S': the first min(m,n) rows of V**T (the right singular
356  // vectors) are returned in the array VT;
357  // = 'O': the first min(m,n) rows of V**T (the right singular
358  // vectors) are overwritten on the array A;
359  // = 'N': no rows of V**T (no right singular vectors) are
360  // computed.
361  char JOBVT = 'S';
362 
363  std::vector<T> sigma_val;
364  std::vector<T> U_val;
365  std::vector<T> VT_val;
366 
367  _svd_helper(JOBU, JOBVT, sigma_val, U_val, VT_val);
368 
369  // Load the singular values into sigma, ignore U_val and VT_val
370  const unsigned int n_sigma_vals =
371  libmesh_cast_int<unsigned int>(sigma_val.size());
372  sigma.resize(n_sigma_vals);
373  for(unsigned int i=0; i<n_sigma_vals; i++)
374  sigma(i) = sigma_val[i];
375 
376  int M = this->n();
377  int N = this->m();
378  int min_MN = (M < N) ? M : N;
379  U.resize(M,min_MN);
380  for(unsigned int i=0; i<U.m(); i++)
381  for(unsigned int j=0; j<U.n(); j++)
382  {
383  unsigned int index = i + j*U.n(); // Column major storage
384  U(i,j) = U_val[index];
385  }
386 
387  VT.resize(min_MN,N);
388  for(unsigned int i=0; i<VT.m(); i++)
389  for(unsigned int j=0; j<VT.n(); j++)
390  {
391  unsigned int index = i + j*U.n(); // Column major storage
392  VT(i,j) = VT_val[index];
393  }
394 
395 }
396 
397 #if (LIBMESH_HAVE_PETSC && LIBMESH_USE_REAL_NUMBERS)
398 
399 template<typename T>
401  char JOBVT,
402  std::vector<T>& sigma_val,
403  std::vector<T>& U_val,
404  std::vector<T>& VT_val)
405 {
406 
407  // M (input) int*
408  // The number of rows of the matrix A. M >= 0.
409  // In C/C++, pass the number of *cols* of A
410  int M = this->n();
411 
412  // N (input) int*
413  // The number of columns of the matrix A. N >= 0.
414  // In C/C++, pass the number of *rows* of A
415  int N = this->m();
416 
417  int min_MN = (M < N) ? M : N;
418  int max_MN = (M > N) ? M : N;
419 
420  // A (input/output) DOUBLE PRECISION array, dimension (LDA,N)
421  // On entry, the M-by-N matrix A.
422  // On exit,
423  // if JOBU = 'O', A is overwritten with the first min(m,n)
424  // columns of U (the left singular vectors,
425  // stored columnwise);
426  // if JOBVT = 'O', A is overwritten with the first min(m,n)
427  // rows of V**T (the right singular vectors,
428  // stored rowwise);
429  // if JOBU .ne. 'O' and JOBVT .ne. 'O', the contents of A
430  // are destroyed.
431  // Here, we pass &(_val[0]).
432 
433  // LDA (input) int*
434  // The leading dimension of the array A. LDA >= max(1,M).
435  int LDA = M;
436 
437  // S (output) DOUBLE PRECISION array, dimension (min(M,N))
438  // The singular values of A, sorted so that S(i) >= S(i+1).
439  sigma_val.resize( min_MN );
440 
441  // LDU (input) INTEGER
442  // The leading dimension of the array U. LDU >= 1; if
443  // JOBU = 'S' or 'A', LDU >= M.
444  int LDU = M;
445 
446  // U (output) DOUBLE PRECISION array, dimension (LDU,UCOL)
447  // (LDU,M) if JOBU = 'A' or (LDU,min(M,N)) if JOBU = 'S'.
448  // If JOBU = 'A', U contains the M-by-M orthogonal matrix U;
449  // if JOBU = 'S', U contains the first min(m,n) columns of U
450  // (the left singular vectors, stored columnwise);
451  // if JOBU = 'N' or 'O', U is not referenced.
452  U_val.resize( LDU*M );
453 
454  // LDVT (input) INTEGER
455  // The leading dimension of the array VT. LDVT >= 1; if
456  // JOBVT = 'A', LDVT >= N; if JOBVT = 'S', LDVT >= min(M,N).
457  int LDVT = N;
458 
459  // VT (output) DOUBLE PRECISION array, dimension (LDVT,N)
460  // If JOBVT = 'A', VT contains the N-by-N orthogonal matrix
461  // V**T;
462  // if JOBVT = 'S', VT contains the first min(m,n) rows of
463  // V**T (the right singular vectors, stored rowwise);
464  // if JOBVT = 'N' or 'O', VT is not referenced.
465  VT_val.resize( LDVT*N );
466 
467  // LWORK (input) INTEGER
468  // The dimension of the array WORK.
469  // LWORK >= MAX(1,3*MIN(M,N)+MAX(M,N),5*MIN(M,N)).
470  // For good performance, LWORK should generally be larger.
471  //
472  // If LWORK = -1, then a workspace query is assumed; the routine
473  // only calculates the optimal size of the WORK array, returns
474  // this value as the first entry of the WORK array, and no error
475  // message related to LWORK is issued by XERBLA.
476  int larger = (3*min_MN+max_MN > 5*min_MN) ? 3*min_MN+max_MN : 5*min_MN;
477  int LWORK = (larger > 1) ? larger : 1;
478 
479 
480  // WORK (workspace/output) DOUBLE PRECISION array, dimension (MAX(1,LWORK))
481  // On exit, if INFO = 0, WORK(1) returns the optimal LWORK;
482  // if INFO > 0, WORK(2:MIN(M,N)) contains the unconverged
483  // superdiagonal elements of an upper bidiagonal matrix B
484  // whose diagonal is in S (not necessarily sorted). B
485  // satisfies A = U * B * VT, so it has the same singular values
486  // as A, and singular vectors related by U and VT.
487  std::vector<T> WORK( LWORK );
488 
489  // INFO (output) INTEGER
490  // = 0: successful exit.
491  // < 0: if INFO = -i, the i-th argument had an illegal value.
492  // > 0: if DBDSQR did not converge, INFO specifies how many
493  // superdiagonals of an intermediate bidiagonal form B
494  // did not converge to zero. See the description of WORK
495  // above for details.
496  int INFO = 0;
497 
498  // Ready to call the actual factorization routine through PETSc's interface
499  LAPACKgesvd_(&JOBU, &JOBVT, &M, &N, &(_val[0]), &LDA, &(sigma_val[0]), &(U_val[0]),
500  &LDU, &(VT_val[0]), &LDVT, &(WORK[0]), &LWORK, &INFO);
501 
502  // Check return value for errors
503  if (INFO != 0)
504  {
505  libMesh::out << "INFO="
506  << INFO
507  << ", Error during Lapack SVD calculation!" << std::endl;
508  libmesh_error();
509  }
510 }
511 
512 
513 #else
514 
515 template<typename T>
516 void DenseMatrix<T>::_svd_helper (char,
517  char,
518  std::vector<T>&,
519  std::vector<T>&,
520  std::vector<T>&)
521 {
522  libMesh::err << "No PETSc-provided BLAS/LAPACK available!" << std::endl;
523  libmesh_error();
524 }
525 
526 #endif
527 
528 
529 
530 
531 
532 #if (LIBMESH_HAVE_PETSC && LIBMESH_USE_REAL_NUMBERS)
533 
534 template<typename T>
536  DenseVector<T>& x)
537 {
538  // The calling sequence for getrs is:
539  // dgetrs(TRANS, N, NRHS, A, LDA, IPIV, B, LDB, INFO)
540 
541  // trans (input) char*
542  // 'n' for no tranpose, 't' for transpose
543  char TRANS[] = "t";
544 
545  // N (input) int*
546  // The order of the matrix A. N >= 0.
547  int N = this->m();
548 
549 
550  // NRHS (input) int*
551  // The number of right hand sides, i.e., the number of columns
552  // of the matrix B. NRHS >= 0.
553  int NRHS = 1;
554 
555  // A (input) DOUBLE PRECISION array, dimension (LDA,N)
556  // The factors L and U from the factorization A = P*L*U
557  // as computed by dgetrf.
558  // Here, we pass &(_val[0])
559 
560  // LDA (input) int*
561  // The leading dimension of the array A. LDA >= max(1,N).
562  int LDA = N;
563 
564  // ipiv (input) int array, dimension (N)
565  // The pivot indices from DGETRF; for 1<=i<=N, row i of the
566  // matrix was interchanged with row IPIV(i).
567  // Here, we pass &(_pivots[0]) which was computed in _lu_decompose_lapack
568 
569  // B (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS)
570  // On entry, the right hand side matrix B.
571  // On exit, the solution matrix X.
572  // Here, we pass a copy of the rhs vector's data array in x, so that the
573  // passed right-hand side b is unmodified. I don't see a way around this
574  // copy if we want to maintain an unmodified rhs in LibMesh.
575  x = b;
576  std::vector<T>& x_vec = x.get_values();
577 
578  // We can avoid the copy if we don't care about overwriting the RHS: just
579  // pass b to the Lapack routine and then swap with x before exiting
580  // std::vector<T>& x_vec = b.get_values();
581 
582  // LDB (input) int*
583  // The leading dimension of the array B. LDB >= max(1,N).
584  int LDB = N;
585 
586  // INFO (output) int*
587  // = 0: successful exit
588  // < 0: if INFO = -i, the i-th argument had an illegal value
589  int INFO = 0;
590 
591  // Finally, ready to call the Lapack getrs function
592  LAPACKgetrs_(TRANS, &N, &NRHS, &(_val[0]), &LDA, &(_pivots[0]), &(x_vec[0]), &LDB, &INFO);
593 
594  // Check return value for errors
595  if (INFO != 0)
596  {
597  libMesh::out << "INFO="
598  << INFO
599  << ", Error during Lapack LU solve!" << std::endl;
600  libmesh_error();
601  }
602 
603  // Don't do this if you already made a copy of b above
604  // Swap b and x. The solution will then be in x, and whatever was originally
605  // in x, maybe garbage, maybe nothing, will be in b.
606  // FIXME: Rewrite the LU and Cholesky solves to just take one input, and overwrite
607  // the input. This *should* make user code simpler, as they don't have to create
608  // an extra vector just to pass it in to the solve function!
609  // b.swap(x);
610 }
611 
612 #else
613 
614 template<typename T>
616  DenseVector<T>& )
617 {
618  libMesh::err << "No PETSc-provided BLAS/LAPACK available!" << std::endl;
619  libmesh_error();
620 }
621 
622 #endif
623 
624 
625 
626 
627 
628 #if (LIBMESH_HAVE_PETSC && LIBMESH_USE_REAL_NUMBERS)
629 
630 template<typename T>
631 void DenseMatrix<T>::_matvec_blas(T alpha, T beta,
632  DenseVector<T>& dest,
633  const DenseVector<T>& arg,
634  bool trans) const
635 {
636  // Ensure that dest and arg sizes are compatible
637  if (!trans)
638  {
639  // dest ~ A * arg
640  // (mx1) (mxn) * (nx1)
641  if ((dest.size() != this->m()) || (arg.size() != this->n()))
642  {
643  libMesh::out << "Improper input argument sizes!" << std::endl;
644  libmesh_error();
645  }
646  }
647 
648  else // trans == true
649  {
650  // Ensure that dest and arg are proper size
651  // dest ~ A^T * arg
652  // (nx1) (nxm) * (mx1)
653  if ((dest.size() != this->n()) || (arg.size() != this->m()))
654  {
655  libMesh::out << "Improper input argument sizes!" << std::endl;
656  libmesh_error();
657  }
658  }
659 
660  // Calling sequence for dgemv:
661  //
662  // dgemv(TRANS,M,N,ALPHA,A,LDA,X,INCX,BETA,Y,INCY)
663 
664  // TRANS - CHARACTER*1, 't' for transpose, 'n' for non-transpose multiply
665  // We store everything in row-major order, so pass the transpose flag for
666  // non-transposed matvecs and the 'n' flag for transposed matvecs
667  char TRANS[] = "t";
668  if (trans)
669  TRANS[0] = 'n';
670 
671  // M - INTEGER.
672  // On entry, M specifies the number of rows of the matrix A.
673  // In C/C++, pass the number of *cols* of A
674  int M = this->n();
675 
676  // N - INTEGER.
677  // On entry, N specifies the number of columns of the matrix A.
678  // In C/C++, pass the number of *rows* of A
679  int N = this->m();
680 
681  // ALPHA - DOUBLE PRECISION.
682  // The scalar constant passed to this function
683 
684  // A - DOUBLE PRECISION array of DIMENSION ( LDA, n ).
685  // Before entry, the leading m by n part of the array A must
686  // contain the matrix of coefficients.
687  // The matrix, *this. Note that _matvec_blas is called from
688  // a const function, vector_mult(), and so we have made this function const
689  // as well. Since BLAS knows nothing about const, we have to cast it away
690  // now.
691  DenseMatrix<T>& a_ref = const_cast< DenseMatrix<T>& > ( *this );
692  std::vector<T>& a = a_ref.get_values();
693 
694  // LDA - INTEGER.
695  // On entry, LDA specifies the first dimension of A as declared
696  // in the calling (sub) program. LDA must be at least
697  // max( 1, m ).
698  int LDA = M;
699 
700  // X - DOUBLE PRECISION array of DIMENSION at least
701  // ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n'
702  // and at least
703  // ( 1 + ( m - 1 )*abs( INCX ) ) otherwise.
704  // Before entry, the incremented array X must contain the
705  // vector x.
706  // Here, we must cast away the const-ness of "arg" since BLAS knows
707  // nothing about const
708  DenseVector<T>& x_ref = const_cast< DenseVector<T>& > ( arg );
709  std::vector<T>& x = x_ref.get_values();
710 
711  // INCX - INTEGER.
712  // On entry, INCX specifies the increment for the elements of
713  // X. INCX must not be zero.
714  int INCX = 1;
715 
716  // BETA - DOUBLE PRECISION.
717  // On entry, BETA specifies the scalar beta. When BETA is
718  // supplied as zero then Y need not be set on input.
719  // The second scalar constant passed to this function
720 
721  // Y - DOUBLE PRECISION array of DIMENSION at least
722  // ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n'
723  // and at least
724  // ( 1 + ( n - 1 )*abs( INCY ) ) otherwise.
725  // Before entry with BETA non-zero, the incremented array Y
726  // must contain the vector y. On exit, Y is overwritten by the
727  // updated vector y.
728  // The input vector "dest"
729  std::vector<T>& y = dest.get_values();
730 
731  // INCY - INTEGER.
732  // On entry, INCY specifies the increment for the elements of
733  // Y. INCY must not be zero.
734  int INCY = 1;
735 
736  // Finally, ready to call the BLAS function
737  BLASgemv_(TRANS, &M, &N, &alpha, &(a[0]), &LDA, &(x[0]), &INCX, &beta, &(y[0]), &INCY);
738 }
739 
740 
741 #else
742 
743 
744 template<typename T>
746  DenseVector<T>& ,
747  const DenseVector<T>&,
748  bool ) const
749 {
750  libMesh::err << "No PETSc-provided BLAS/LAPACK available!" << std::endl;
751  libmesh_error();
752 }
753 
754 
755 #endif
756 
757 
758 //--------------------------------------------------------------
759 // Explicit instantiations
760 template void DenseMatrix<Real>::_multiply_blas(const DenseMatrixBase<Real>&, _BLAS_Multiply_Flag);
762 template void DenseMatrix<Real>::_lu_back_substitute_lapack(const DenseVector<Real>& ,
763  DenseVector<Real>&);
765  DenseVector<Real>& ,
766  const DenseVector<Real>&,
767  bool ) const;
768 template void DenseMatrix<Real>::_svd_lapack(DenseVector<Real>&);
769 template void DenseMatrix<Real>::_svd_lapack(DenseVector<Real>&, DenseMatrix<Real>&, DenseMatrix<Real>&);
770 template void DenseMatrix<Real>::_svd_helper (char, char, std::vector<Real>&,
771  std::vector<Real>&,
772  std::vector<Real>& );
773 
774 #if !(LIBMESH_USE_REAL_NUMBERS)
775 template void DenseMatrix<Number>::_multiply_blas(const DenseMatrixBase<Number>&, _BLAS_Multiply_Flag);
777 template void DenseMatrix<Number>::_lu_back_substitute_lapack(const DenseVector<Number>& ,
778  DenseVector<Number>&);
780  DenseVector<Number>& ,
781  const DenseVector<Number>&,
782  bool ) const;
783 template void DenseMatrix<Number>::_svd_lapack(DenseVector<Number>&);
784 template void DenseMatrix<Number>::_svd_lapack(DenseVector<Number>&, DenseMatrix<Number>&, DenseMatrix<Number>&);
785 template void DenseMatrix<Number>::_svd_helper (char, char, std::vector<Number>&,
786  std::vector<Number>&,
787  std::vector<Number>& );
788 #endif
789 
790 } // namespace libMesh

Site Created By: libMesh Developers
Last modified: February 07 2014 16:57:04 UTC

Hosted By:
SourceForge.net Logo