dense_matrix_blas_lapack.C
Go to the documentation of this file.00001 // The libMesh Finite Element Library. 00002 // Copyright (C) 2002-2012 Benjamin S. Kirk, John W. Peterson, Roy H. Stogner 00003 00004 // This library is free software; you can redistribute it and/or 00005 // modify it under the terms of the GNU Lesser General Public 00006 // License as published by the Free Software Foundation; either 00007 // version 2.1 of the License, or (at your option) any later version. 00008 00009 // This library is distributed in the hope that it will be useful, 00010 // but WITHOUT ANY WARRANTY; without even the implied warranty of 00011 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 00012 // Lesser General Public License for more details. 00013 00014 // You should have received a copy of the GNU Lesser General Public 00015 // License along with this library; if not, write to the Free Software 00016 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 00017 00018 00019 // Local Includes 00020 #include "libmesh/dense_matrix.h" 00021 #include "libmesh/dense_vector.h" 00022 00023 00024 #if (LIBMESH_HAVE_PETSC && LIBMESH_USE_REAL_NUMBERS) 00025 #include "libmesh/petsc_macro.h" 00026 00027 EXTERN_C_FOR_PETSC_BEGIN 00028 #include <petscblaslapack.h> 00029 EXTERN_C_FOR_PETSC_END 00030 #endif 00031 00032 namespace libMesh 00033 { 00034 00035 00036 00037 #if (LIBMESH_HAVE_PETSC && LIBMESH_USE_REAL_NUMBERS) 00038 00039 template<typename T> 00040 void DenseMatrix<T>::_multiply_blas(const DenseMatrixBase<T>& other, 00041 _BLAS_Multiply_Flag flag) 00042 { 00043 int result_size = 0; 00044 00045 // For each case, determine the size of the final result make sure 00046 // that the inner dimensions match 00047 switch (flag) 00048 { 00049 case LEFT_MULTIPLY: 00050 { 00051 result_size = other.m() * this->n(); 00052 if (other.n() == this->m()) 00053 break; 00054 } 00055 case RIGHT_MULTIPLY: 00056 { 00057 result_size = other.n() * this->m(); 00058 if (other.m() == this->n()) 00059 break; 00060 } 00061 case LEFT_MULTIPLY_TRANSPOSE: 00062 { 00063 result_size = other.n() * this->n(); 00064 if (other.m() == this->m()) 00065 break; 00066 } 00067 case RIGHT_MULTIPLY_TRANSPOSE: 00068 { 00069 result_size = other.m() * this->m(); 00070 if (other.n() == this->n()) 00071 break; 00072 } 00073 default: 00074 { 00075 libMesh::out << "Unknown flag selected or matrices are "; 00076 libMesh::out << "incompatible for multiplication." << std::endl; 00077 libmesh_error(); 00078 } 00079 } 00080 00081 // For this to work, the passed arg. must actually be a DenseMatrix<T> 00082 const DenseMatrix<T>* const_that = libmesh_cast_ptr< const DenseMatrix<T>* >(&other); 00083 00084 // Also, although 'that' is logically const in this BLAS routine, 00085 // the PETSc BLAS interface does not specify that any of the inputs are 00086 // const. To use it, I must cast away const-ness. 00087 DenseMatrix<T>* that = const_cast< DenseMatrix<T>* > (const_that); 00088 00089 // Initialize A, B pointers for LEFT_MULTIPLY* cases 00090 DenseMatrix<T> 00091 *A = this, 00092 *B = that; 00093 00094 // For RIGHT_MULTIPLY* cases, swap the meaning of A and B. 00095 // Here is a full table of combinations we can pass to BLASgemm, and what the answer is when finished: 00096 // pass A B -> (Fortran) -> A^T B^T -> (C++) -> (A^T B^T)^T -> (identity) -> B A "lt multiply" 00097 // pass B A -> (Fortran) -> B^T A^T -> (C++) -> (B^T A^T)^T -> (identity) -> A B "rt multiply" 00098 // pass A B^T -> (Fortran) -> A^T B -> (C++) -> (A^T B)^T -> (identity) -> B^T A "lt multiply t" 00099 // pass B^T A -> (Fortran) -> B A^T -> (C++) -> (B A^T)^T -> (identity) -> A B^T "rt multiply t" 00100 if (flag==RIGHT_MULTIPLY || flag==RIGHT_MULTIPLY_TRANSPOSE) 00101 std::swap(A,B); 00102 00103 // transa, transb values to pass to blas 00104 char 00105 transa[] = "n", 00106 transb[] = "n"; 00107 00108 // Integer values to pass to BLAS: 00109 // 00110 // M 00111 // In Fortran, the number of rows of op(A), 00112 // In the BLAS documentation, typically known as 'M'. 00113 // 00114 // In C/C++, we set: 00115 // M = n_cols(A) if (transa='n') 00116 // n_rows(A) if (transa='t') 00117 int M = static_cast<int>( A->n() ); 00118 00119 // N 00120 // In Fortran, the number of cols of op(B), and also the number of cols of C. 00121 // In the BLAS documentation, typically known as 'N'. 00122 // 00123 // In C/C++, we set: 00124 // N = n_rows(B) if (transb='n') 00125 // n_cols(B) if (transb='t') 00126 int N = static_cast<int>( B->m() ); 00127 00128 // K 00129 // In Fortran, the number of cols of op(A), and also 00130 // the number of rows of op(B). In the BLAS documentation, 00131 // typically known as 'K'. 00132 // 00133 // In C/C++, we set: 00134 // K = n_rows(A) if (transa='n') 00135 // n_cols(A) if (transa='t') 00136 int K = static_cast<int>( A->m() ); 00137 00138 // LDA (leading dimension of A). In our cases, 00139 // LDA is always the number of columns of A. 00140 int LDA = static_cast<int>( A->n() ); 00141 00142 // LDB (leading dimension of B). In our cases, 00143 // LDB is always the number of columns of B. 00144 int LDB = static_cast<int>( B->n() ); 00145 00146 if (flag == LEFT_MULTIPLY_TRANSPOSE) 00147 { 00148 transb[0] = 't'; 00149 N = static_cast<int>( B->n() ); 00150 } 00151 00152 else if (flag == RIGHT_MULTIPLY_TRANSPOSE) 00153 { 00154 transa[0] = 't'; 00155 std::swap(M,K); 00156 } 00157 00158 // LDC (leading dimension of C). LDC is the 00159 // number of columns in the solution matrix. 00160 int LDC = M; 00161 00162 // Scalar values to pass to BLAS 00163 // 00164 // scalar multiplying the whole product AB 00165 T alpha = 1.; 00166 00167 // scalar multiplying C, which is the original matrix. 00168 T beta = 0.; 00169 00170 // Storage for the result 00171 std::vector<T> result (result_size); 00172 00173 // Finally ready to call the BLAS 00174 BLASgemm_(transa, transb, &M, &N, &K, &alpha, &(A->_val[0]), &LDA, &(B->_val[0]), &LDB, &beta, &result[0], &LDC); 00175 00176 // Update the relevant dimension for this matrix. 00177 switch (flag) 00178 { 00179 case LEFT_MULTIPLY: { this->_m = other.m(); break; } 00180 case RIGHT_MULTIPLY: { this->_n = other.n(); break; } 00181 case LEFT_MULTIPLY_TRANSPOSE: { this->_m = other.n(); break; } 00182 case RIGHT_MULTIPLY_TRANSPOSE: { this->_n = other.m(); break; } 00183 default: 00184 { 00185 libMesh::out << "Unknown flag selected." << std::endl; 00186 libmesh_error(); 00187 } 00188 } 00189 00190 // Swap my data vector with the result 00191 this->_val.swap(result); 00192 } 00193 00194 #else 00195 00196 template<typename T> 00197 void DenseMatrix<T>::_multiply_blas(const DenseMatrixBase<T>& , 00198 _BLAS_Multiply_Flag ) 00199 { 00200 libMesh::err << "No PETSc-provided BLAS/LAPACK available!" << std::endl; 00201 libmesh_error(); 00202 } 00203 00204 #endif 00205 00206 00207 00208 00209 00210 00211 00212 #if (LIBMESH_HAVE_PETSC && LIBMESH_USE_REAL_NUMBERS) 00213 00214 template<typename T> 00215 void DenseMatrix<T>::_lu_decompose_lapack () 00216 { 00217 // If this function was called, there better not be any 00218 // previous decomposition of the matrix. 00219 libmesh_assert_equal_to (this->_decomposition_type, NONE); 00220 00221 // The calling sequence for dgetrf is: 00222 // dgetrf(M, N, A, lda, ipiv, info) 00223 00224 // M (input) int* 00225 // The number of rows of the matrix A. M >= 0. 00226 // In C/C++, pass the number of *cols* of A 00227 int M = this->n(); 00228 00229 // N (input) int* 00230 // The number of columns of the matrix A. N >= 0. 00231 // In C/C++, pass the number of *rows* of A 00232 int N = this->m(); 00233 00234 // A (input/output) double precision array, dimension (LDA,N) 00235 // On entry, the M-by-N matrix to be factored. 00236 // On exit, the factors L and U from the factorization 00237 // A = P*L*U; the unit diagonal elements of L are not stored. 00238 // Here, we pass &(_val[0]). 00239 00240 // LDA (input) int* 00241 // The leading dimension of the array A. LDA >= max(1,M). 00242 int LDA = M; 00243 00244 // ipiv (output) integer array, dimension (min(m,n)) 00245 // The pivot indices; for 1 <= i <= min(m,n), row i of the 00246 // matrix was interchanged with row IPIV(i). 00247 // Here, we pass &(_pivots[0]), a private class member used to store pivots 00248 this->_pivots.resize( std::min(M,N) ); 00249 00250 // info (output) int* 00251 // = 0: successful exit 00252 // < 0: if INFO = -i, the i-th argument had an illegal value 00253 // > 0: if INFO = i, U(i,i) is exactly zero. The factorization 00254 // has been completed, but the factor U is exactly 00255 // singular, and division by zero will occur if it is used 00256 // to solve a system of equations. 00257 int INFO = 0; 00258 00259 // Ready to call the actual factorization routine through PETSc's interface 00260 LAPACKgetrf_(&M, &N, &(this->_val[0]), &LDA, &(_pivots[0]), &INFO); 00261 00262 // Check return value for errors 00263 if (INFO != 0) 00264 { 00265 libMesh::out << "INFO=" 00266 << INFO 00267 << ", Error during Lapack LU factorization!" << std::endl; 00268 libmesh_error(); 00269 } 00270 00271 // Set the flag for LU decomposition 00272 this->_decomposition_type = LU_BLAS_LAPACK; 00273 } 00274 00275 #else 00276 00277 template<typename T> 00278 void DenseMatrix<T>::_lu_decompose_lapack () 00279 { 00280 libMesh::err << "No PETSc-provided BLAS/LAPACK available!" << std::endl; 00281 libmesh_error(); 00282 } 00283 00284 #endif 00285 00286 00287 00288 template<typename T> 00289 void DenseMatrix<T>::_svd_lapack (DenseVector<T>& sigma) 00290 { 00291 // The calling sequence for dgetrf is: 00292 // DGESVD( JOBU, JOBVT, M, N, A, LDA, S, U, LDU, VT, LDVT, WORK, LWORK, INFO ) 00293 00294 00295 // JOBU (input) CHARACTER*1 00296 // Specifies options for computing all or part of the matrix U: 00297 // = 'A': all M columns of U are returned in array U: 00298 // = 'S': the first min(m,n) columns of U (the left singular 00299 // vectors) are returned in the array U; 00300 // = 'O': the first min(m,n) columns of U (the left singular 00301 // vectors) are overwritten on the array A; 00302 // = 'N': no columns of U (no left singular vectors) are 00303 // computed. 00304 char JOBU = 'N'; 00305 00306 // JOBVT (input) CHARACTER*1 00307 // Specifies options for computing all or part of the matrix 00308 // V**T: 00309 // = 'A': all N rows of V**T are returned in the array VT; 00310 // = 'S': the first min(m,n) rows of V**T (the right singular 00311 // vectors) are returned in the array VT; 00312 // = 'O': the first min(m,n) rows of V**T (the right singular 00313 // vectors) are overwritten on the array A; 00314 // = 'N': no rows of V**T (no right singular vectors) are 00315 // computed. 00316 char JOBVT = 'N'; 00317 00318 std::vector<T> sigma_val; 00319 std::vector<T> U_val; 00320 std::vector<T> VT_val; 00321 00322 _svd_helper(JOBU, JOBVT, sigma_val, U_val, VT_val); 00323 00324 // Load the singular values into sigma, ignore U_val and VT_val 00325 const unsigned int n_sigma_vals = 00326 libmesh_cast_int<unsigned int>(sigma_val.size()); 00327 sigma.resize(n_sigma_vals); 00328 for(unsigned int i=0; i<n_sigma_vals; i++) 00329 sigma(i) = sigma_val[i]; 00330 00331 } 00332 00333 template<typename T> 00334 void DenseMatrix<T>::_svd_lapack (DenseVector<T>& sigma, DenseMatrix<T>& U, DenseMatrix<T>& VT) 00335 { 00336 // The calling sequence for dgetrf is: 00337 // DGESVD( JOBU, JOBVT, M, N, A, LDA, S, U, LDU, VT, LDVT, WORK, LWORK, INFO ) 00338 00339 00340 // JOBU (input) CHARACTER*1 00341 // Specifies options for computing all or part of the matrix U: 00342 // = 'A': all M columns of U are returned in array U: 00343 // = 'S': the first min(m,n) columns of U (the left singular 00344 // vectors) are returned in the array U; 00345 // = 'O': the first min(m,n) columns of U (the left singular 00346 // vectors) are overwritten on the array A; 00347 // = 'N': no columns of U (no left singular vectors) are 00348 // computed. 00349 char JOBU = 'S'; 00350 00351 // JOBVT (input) CHARACTER*1 00352 // Specifies options for computing all or part of the matrix 00353 // V**T: 00354 // = 'A': all N rows of V**T are returned in the array VT; 00355 // = 'S': the first min(m,n) rows of V**T (the right singular 00356 // vectors) are returned in the array VT; 00357 // = 'O': the first min(m,n) rows of V**T (the right singular 00358 // vectors) are overwritten on the array A; 00359 // = 'N': no rows of V**T (no right singular vectors) are 00360 // computed. 00361 char JOBVT = 'S'; 00362 00363 std::vector<T> sigma_val; 00364 std::vector<T> U_val; 00365 std::vector<T> VT_val; 00366 00367 _svd_helper(JOBU, JOBVT, sigma_val, U_val, VT_val); 00368 00369 // Load the singular values into sigma, ignore U_val and VT_val 00370 const unsigned int n_sigma_vals = 00371 libmesh_cast_int<unsigned int>(sigma_val.size()); 00372 sigma.resize(n_sigma_vals); 00373 for(unsigned int i=0; i<n_sigma_vals; i++) 00374 sigma(i) = sigma_val[i]; 00375 00376 int M = this->n(); 00377 int N = this->m(); 00378 int min_MN = (M < N) ? M : N; 00379 U.resize(M,min_MN); 00380 for(unsigned int i=0; i<U.m(); i++) 00381 for(unsigned int j=0; j<U.n(); j++) 00382 { 00383 unsigned int index = i + j*U.n(); // Column major storage 00384 U(i,j) = U_val[index]; 00385 } 00386 00387 VT.resize(min_MN,N); 00388 for(unsigned int i=0; i<VT.m(); i++) 00389 for(unsigned int j=0; j<VT.n(); j++) 00390 { 00391 unsigned int index = i + j*U.n(); // Column major storage 00392 VT(i,j) = VT_val[index]; 00393 } 00394 00395 } 00396 00397 #if (LIBMESH_HAVE_PETSC && LIBMESH_USE_REAL_NUMBERS) 00398 00399 template<typename T> 00400 void DenseMatrix<T>::_svd_helper (char JOBU, 00401 char JOBVT, 00402 std::vector<T>& sigma_val, 00403 std::vector<T>& U_val, 00404 std::vector<T>& VT_val) 00405 { 00406 00407 // M (input) int* 00408 // The number of rows of the matrix A. M >= 0. 00409 // In C/C++, pass the number of *cols* of A 00410 int M = this->n(); 00411 00412 // N (input) int* 00413 // The number of columns of the matrix A. N >= 0. 00414 // In C/C++, pass the number of *rows* of A 00415 int N = this->m(); 00416 00417 int min_MN = (M < N) ? M : N; 00418 int max_MN = (M > N) ? M : N; 00419 00420 // A (input/output) DOUBLE PRECISION array, dimension (LDA,N) 00421 // On entry, the M-by-N matrix A. 00422 // On exit, 00423 // if JOBU = 'O', A is overwritten with the first min(m,n) 00424 // columns of U (the left singular vectors, 00425 // stored columnwise); 00426 // if JOBVT = 'O', A is overwritten with the first min(m,n) 00427 // rows of V**T (the right singular vectors, 00428 // stored rowwise); 00429 // if JOBU .ne. 'O' and JOBVT .ne. 'O', the contents of A 00430 // are destroyed. 00431 // Here, we pass &(_val[0]). 00432 00433 // LDA (input) int* 00434 // The leading dimension of the array A. LDA >= max(1,M). 00435 int LDA = M; 00436 00437 // S (output) DOUBLE PRECISION array, dimension (min(M,N)) 00438 // The singular values of A, sorted so that S(i) >= S(i+1). 00439 sigma_val.resize( min_MN ); 00440 00441 // LDU (input) INTEGER 00442 // The leading dimension of the array U. LDU >= 1; if 00443 // JOBU = 'S' or 'A', LDU >= M. 00444 int LDU = M; 00445 00446 // U (output) DOUBLE PRECISION array, dimension (LDU,UCOL) 00447 // (LDU,M) if JOBU = 'A' or (LDU,min(M,N)) if JOBU = 'S'. 00448 // If JOBU = 'A', U contains the M-by-M orthogonal matrix U; 00449 // if JOBU = 'S', U contains the first min(m,n) columns of U 00450 // (the left singular vectors, stored columnwise); 00451 // if JOBU = 'N' or 'O', U is not referenced. 00452 U_val.resize( LDU*M ); 00453 00454 // LDVT (input) INTEGER 00455 // The leading dimension of the array VT. LDVT >= 1; if 00456 // JOBVT = 'A', LDVT >= N; if JOBVT = 'S', LDVT >= min(M,N). 00457 int LDVT = N; 00458 00459 // VT (output) DOUBLE PRECISION array, dimension (LDVT,N) 00460 // If JOBVT = 'A', VT contains the N-by-N orthogonal matrix 00461 // V**T; 00462 // if JOBVT = 'S', VT contains the first min(m,n) rows of 00463 // V**T (the right singular vectors, stored rowwise); 00464 // if JOBVT = 'N' or 'O', VT is not referenced. 00465 VT_val.resize( LDVT*N ); 00466 00467 // LWORK (input) INTEGER 00468 // The dimension of the array WORK. 00469 // LWORK >= MAX(1,3*MIN(M,N)+MAX(M,N),5*MIN(M,N)). 00470 // For good performance, LWORK should generally be larger. 00471 // 00472 // If LWORK = -1, then a workspace query is assumed; the routine 00473 // only calculates the optimal size of the WORK array, returns 00474 // this value as the first entry of the WORK array, and no error 00475 // message related to LWORK is issued by XERBLA. 00476 int larger = (3*min_MN+max_MN > 5*min_MN) ? 3*min_MN+max_MN : 5*min_MN; 00477 int LWORK = (larger > 1) ? larger : 1; 00478 00479 00480 // WORK (workspace/output) DOUBLE PRECISION array, dimension (MAX(1,LWORK)) 00481 // On exit, if INFO = 0, WORK(1) returns the optimal LWORK; 00482 // if INFO > 0, WORK(2:MIN(M,N)) contains the unconverged 00483 // superdiagonal elements of an upper bidiagonal matrix B 00484 // whose diagonal is in S (not necessarily sorted). B 00485 // satisfies A = U * B * VT, so it has the same singular values 00486 // as A, and singular vectors related by U and VT. 00487 std::vector<T> WORK( LWORK ); 00488 00489 // INFO (output) INTEGER 00490 // = 0: successful exit. 00491 // < 0: if INFO = -i, the i-th argument had an illegal value. 00492 // > 0: if DBDSQR did not converge, INFO specifies how many 00493 // superdiagonals of an intermediate bidiagonal form B 00494 // did not converge to zero. See the description of WORK 00495 // above for details. 00496 int INFO = 0; 00497 00498 // Ready to call the actual factorization routine through PETSc's interface 00499 LAPACKgesvd_(&JOBU, &JOBVT, &M, &N, &(_val[0]), &LDA, &(sigma_val[0]), &(U_val[0]), 00500 &LDU, &(VT_val[0]), &LDVT, &(WORK[0]), &LWORK, &INFO); 00501 00502 // Check return value for errors 00503 if (INFO != 0) 00504 { 00505 libMesh::out << "INFO=" 00506 << INFO 00507 << ", Error during Lapack SVD calculation!" << std::endl; 00508 libmesh_error(); 00509 } 00510 } 00511 00512 00513 #else 00514 00515 template<typename T> 00516 void DenseMatrix<T>::_svd_helper (char, 00517 char, 00518 std::vector<T>&, 00519 std::vector<T>&, 00520 std::vector<T>&) 00521 { 00522 libMesh::err << "No PETSc-provided BLAS/LAPACK available!" << std::endl; 00523 libmesh_error(); 00524 } 00525 00526 #endif 00527 00528 00529 00530 00531 00532 #if (LIBMESH_HAVE_PETSC && LIBMESH_USE_REAL_NUMBERS) 00533 00534 template<typename T> 00535 void DenseMatrix<T>::_lu_back_substitute_lapack (const DenseVector<T>& b, 00536 DenseVector<T>& x) 00537 { 00538 // The calling sequence for getrs is: 00539 // dgetrs(TRANS, N, NRHS, A, LDA, IPIV, B, LDB, INFO) 00540 00541 // trans (input) char* 00542 // 'n' for no tranpose, 't' for transpose 00543 char TRANS[] = "t"; 00544 00545 // N (input) int* 00546 // The order of the matrix A. N >= 0. 00547 int N = this->m(); 00548 00549 00550 // NRHS (input) int* 00551 // The number of right hand sides, i.e., the number of columns 00552 // of the matrix B. NRHS >= 0. 00553 int NRHS = 1; 00554 00555 // A (input) DOUBLE PRECISION array, dimension (LDA,N) 00556 // The factors L and U from the factorization A = P*L*U 00557 // as computed by dgetrf. 00558 // Here, we pass &(_val[0]) 00559 00560 // LDA (input) int* 00561 // The leading dimension of the array A. LDA >= max(1,N). 00562 int LDA = N; 00563 00564 // ipiv (input) int array, dimension (N) 00565 // The pivot indices from DGETRF; for 1<=i<=N, row i of the 00566 // matrix was interchanged with row IPIV(i). 00567 // Here, we pass &(_pivots[0]) which was computed in _lu_decompose_lapack 00568 00569 // B (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS) 00570 // On entry, the right hand side matrix B. 00571 // On exit, the solution matrix X. 00572 // Here, we pass a copy of the rhs vector's data array in x, so that the 00573 // passed right-hand side b is unmodified. I don't see a way around this 00574 // copy if we want to maintain an unmodified rhs in LibMesh. 00575 x = b; 00576 std::vector<T>& x_vec = x.get_values(); 00577 00578 // We can avoid the copy if we don't care about overwriting the RHS: just 00579 // pass b to the Lapack routine and then swap with x before exiting 00580 // std::vector<T>& x_vec = b.get_values(); 00581 00582 // LDB (input) int* 00583 // The leading dimension of the array B. LDB >= max(1,N). 00584 int LDB = N; 00585 00586 // INFO (output) int* 00587 // = 0: successful exit 00588 // < 0: if INFO = -i, the i-th argument had an illegal value 00589 int INFO = 0; 00590 00591 // Finally, ready to call the Lapack getrs function 00592 LAPACKgetrs_(TRANS, &N, &NRHS, &(_val[0]), &LDA, &(_pivots[0]), &(x_vec[0]), &LDB, &INFO); 00593 00594 // Check return value for errors 00595 if (INFO != 0) 00596 { 00597 libMesh::out << "INFO=" 00598 << INFO 00599 << ", Error during Lapack LU solve!" << std::endl; 00600 libmesh_error(); 00601 } 00602 00603 // Don't do this if you already made a copy of b above 00604 // Swap b and x. The solution will then be in x, and whatever was originally 00605 // in x, maybe garbage, maybe nothing, will be in b. 00606 // FIXME: Rewrite the LU and Cholesky solves to just take one input, and overwrite 00607 // the input. This *should* make user code simpler, as they don't have to create 00608 // an extra vector just to pass it in to the solve function! 00609 // b.swap(x); 00610 } 00611 00612 #else 00613 00614 template<typename T> 00615 void DenseMatrix<T>::_lu_back_substitute_lapack (const DenseVector<T>& , 00616 DenseVector<T>& ) 00617 { 00618 libMesh::err << "No PETSc-provided BLAS/LAPACK available!" << std::endl; 00619 libmesh_error(); 00620 } 00621 00622 #endif 00623 00624 00625 00626 00627 00628 #if (LIBMESH_HAVE_PETSC && LIBMESH_USE_REAL_NUMBERS) 00629 00630 template<typename T> 00631 void DenseMatrix<T>::_matvec_blas(T alpha, T beta, 00632 DenseVector<T>& dest, 00633 const DenseVector<T>& arg, 00634 bool trans) const 00635 { 00636 // Ensure that dest and arg sizes are compatible 00637 if (!trans) 00638 { 00639 // dest ~ A * arg 00640 // (mx1) (mxn) * (nx1) 00641 if ((dest.size() != this->m()) || (arg.size() != this->n())) 00642 { 00643 libMesh::out << "Improper input argument sizes!" << std::endl; 00644 libmesh_error(); 00645 } 00646 } 00647 00648 else // trans == true 00649 { 00650 // Ensure that dest and arg are proper size 00651 // dest ~ A^T * arg 00652 // (nx1) (nxm) * (mx1) 00653 if ((dest.size() != this->n()) || (arg.size() != this->m())) 00654 { 00655 libMesh::out << "Improper input argument sizes!" << std::endl; 00656 libmesh_error(); 00657 } 00658 } 00659 00660 // Calling sequence for dgemv: 00661 // 00662 // dgemv(TRANS,M,N,ALPHA,A,LDA,X,INCX,BETA,Y,INCY) 00663 00664 // TRANS - CHARACTER*1, 't' for transpose, 'n' for non-transpose multiply 00665 // We store everything in row-major order, so pass the transpose flag for 00666 // non-transposed matvecs and the 'n' flag for transposed matvecs 00667 char TRANS[] = "t"; 00668 if (trans) 00669 TRANS[0] = 'n'; 00670 00671 // M - INTEGER. 00672 // On entry, M specifies the number of rows of the matrix A. 00673 // In C/C++, pass the number of *cols* of A 00674 int M = this->n(); 00675 00676 // N - INTEGER. 00677 // On entry, N specifies the number of columns of the matrix A. 00678 // In C/C++, pass the number of *rows* of A 00679 int N = this->m(); 00680 00681 // ALPHA - DOUBLE PRECISION. 00682 // The scalar constant passed to this function 00683 00684 // A - DOUBLE PRECISION array of DIMENSION ( LDA, n ). 00685 // Before entry, the leading m by n part of the array A must 00686 // contain the matrix of coefficients. 00687 // The matrix, *this. Note that _matvec_blas is called from 00688 // a const function, vector_mult(), and so we have made this function const 00689 // as well. Since BLAS knows nothing about const, we have to cast it away 00690 // now. 00691 DenseMatrix<T>& a_ref = const_cast< DenseMatrix<T>& > ( *this ); 00692 std::vector<T>& a = a_ref.get_values(); 00693 00694 // LDA - INTEGER. 00695 // On entry, LDA specifies the first dimension of A as declared 00696 // in the calling (sub) program. LDA must be at least 00697 // max( 1, m ). 00698 int LDA = M; 00699 00700 // X - DOUBLE PRECISION array of DIMENSION at least 00701 // ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n' 00702 // and at least 00703 // ( 1 + ( m - 1 )*abs( INCX ) ) otherwise. 00704 // Before entry, the incremented array X must contain the 00705 // vector x. 00706 // Here, we must cast away the const-ness of "arg" since BLAS knows 00707 // nothing about const 00708 DenseVector<T>& x_ref = const_cast< DenseVector<T>& > ( arg ); 00709 std::vector<T>& x = x_ref.get_values(); 00710 00711 // INCX - INTEGER. 00712 // On entry, INCX specifies the increment for the elements of 00713 // X. INCX must not be zero. 00714 int INCX = 1; 00715 00716 // BETA - DOUBLE PRECISION. 00717 // On entry, BETA specifies the scalar beta. When BETA is 00718 // supplied as zero then Y need not be set on input. 00719 // The second scalar constant passed to this function 00720 00721 // Y - DOUBLE PRECISION array of DIMENSION at least 00722 // ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n' 00723 // and at least 00724 // ( 1 + ( n - 1 )*abs( INCY ) ) otherwise. 00725 // Before entry with BETA non-zero, the incremented array Y 00726 // must contain the vector y. On exit, Y is overwritten by the 00727 // updated vector y. 00728 // The input vector "dest" 00729 std::vector<T>& y = dest.get_values(); 00730 00731 // INCY - INTEGER. 00732 // On entry, INCY specifies the increment for the elements of 00733 // Y. INCY must not be zero. 00734 int INCY = 1; 00735 00736 // Finally, ready to call the BLAS function 00737 BLASgemv_(TRANS, &M, &N, &alpha, &(a[0]), &LDA, &(x[0]), &INCX, &beta, &(y[0]), &INCY); 00738 } 00739 00740 00741 #else 00742 00743 00744 template<typename T> 00745 void DenseMatrix<T>::_matvec_blas(T , T, 00746 DenseVector<T>& , 00747 const DenseVector<T>&, 00748 bool ) const 00749 { 00750 libMesh::err << "No PETSc-provided BLAS/LAPACK available!" << std::endl; 00751 libmesh_error(); 00752 } 00753 00754 00755 #endif 00756 00757 00758 //-------------------------------------------------------------- 00759 // Explicit instantiations 00760 template void DenseMatrix<Real>::_multiply_blas(const DenseMatrixBase<Real>&, _BLAS_Multiply_Flag); 00761 template void DenseMatrix<Real>::_lu_decompose_lapack(); 00762 template void DenseMatrix<Real>::_lu_back_substitute_lapack(const DenseVector<Real>& , 00763 DenseVector<Real>&); 00764 template void DenseMatrix<Real>::_matvec_blas(Real, Real, 00765 DenseVector<Real>& , 00766 const DenseVector<Real>&, 00767 bool ) const; 00768 template void DenseMatrix<Real>::_svd_lapack(DenseVector<Real>&); 00769 template void DenseMatrix<Real>::_svd_lapack(DenseVector<Real>&, DenseMatrix<Real>&, DenseMatrix<Real>&); 00770 template void DenseMatrix<Real>::_svd_helper (char, char, std::vector<Real>&, 00771 std::vector<Real>&, 00772 std::vector<Real>& ); 00773 00774 #if !(LIBMESH_USE_REAL_NUMBERS) 00775 template void DenseMatrix<Number>::_multiply_blas(const DenseMatrixBase<Number>&, _BLAS_Multiply_Flag); 00776 template void DenseMatrix<Number>::_lu_decompose_lapack(); 00777 template void DenseMatrix<Number>::_lu_back_substitute_lapack(const DenseVector<Number>& , 00778 DenseVector<Number>&); 00779 template void DenseMatrix<Number>::_matvec_blas(Number, Number, 00780 DenseVector<Number>& , 00781 const DenseVector<Number>&, 00782 bool ) const; 00783 template void DenseMatrix<Number>::_svd_lapack(DenseVector<Number>&); 00784 template void DenseMatrix<Number>::_svd_lapack(DenseVector<Number>&, DenseMatrix<Number>&, DenseMatrix<Number>&); 00785 template void DenseMatrix<Number>::_svd_helper (char, char, std::vector<Number>&, 00786 std::vector<Number>&, 00787 std::vector<Number>& ); 00788 #endif 00789 00790 } // namespace libMesh 00791
Site Created By: libMesh Developers
Last modified: February 05 2013 19:54:46 UTC
Hosted By: