The source file eigenproblems_ex3.C with comments:
#include <fstream>
libMesh include files.
#include "libmesh/libmesh.h"
#include "libmesh/mesh.h"
#include "libmesh/mesh_generation.h"
#include "libmesh/exodusII_io.h"
#include "libmesh/condensed_eigen_system.h"
#include "libmesh/equation_systems.h"
#include "libmesh/fe.h"
#include "libmesh/quadrature_gauss.h"
#include "libmesh/dense_matrix.h"
#include "libmesh/sparse_matrix.h"
#include "libmesh/numeric_vector.h"
#include "libmesh/dof_map.h"
#include "libmesh/fe_interface.h"
#include "libmesh/getpot.h"
Bring in everything from the libMesh namespace
using namespace libMesh;
Function prototype. This is the function that will assemble
the eigen system. Here, we will simply assemble a mass matrix.
void assemble_matrices(EquationSystems& es,
const std::string& system_name);
We store the Dirichlet dofs in a set in order to impose the boundary conditions
void get_dirichlet_dofs(EquationSystems& es,
const std::string& system_name,
std::set<unsigned int>& global_dirichlet_dofs_set);
int main (int argc, char** argv)
{
Initialize libMesh and the dependent libraries.
LibMeshInit init (argc, argv);
This example uses an ExodusII input file
#ifndef LIBMESH_HAVE_EXODUS_API
libmesh_example_assert(false, "--enable-exodus");
#endif
This example is designed for the SLEPc eigen solver interface.
#ifndef LIBMESH_HAVE_SLEPC
if (libMesh::processor_id() == 0)
std::cerr << "ERROR: This example requires libMesh to be\n"
<< "compiled with SLEPc eigen solvers support!"
<< std::endl;
return 0;
#else
#ifdef LIBMESH_DEFAULT_SINGLE_PRECISION
SLEPc currently gives us a nasty crash with Real==float
libmesh_example_assert(false, "--disable-singleprecision");
#endif
#ifdef LIBMESH_USE_COMPLEX_NUMBERS
SLEPc currently gives us an "inner product not well defined" with
Number==complex
libmesh_example_assert(false, "--disable-complex");
#endif
Tell the user what we are doing.
{
std::cout << "Running " << argv[0];
for (int i=1; i<argc; i++)
std::cout << " " << argv[i];
std::cout << std::endl << std::endl;
}
Skip this 2D example if libMesh was compiled as 1D-only.
libmesh_example_assert(2 <= LIBMESH_DIM, "2D support");
Use GetPot to parse the command line arguments
GetPot command_line (argc, argv);
Read the mesh name from the command line
std::string mesh_name = "";
if ( command_line.search(1, "-mesh_name") )
mesh_name = command_line.next(mesh_name);
Also, read in the index of the eigenvector that we should plot
(zero-based indexing, as usual!)
unsigned int plotting_index = 0;
if ( command_line.search(1, "-plotting_index") )
plotting_index = command_line.next(plotting_index);
Finally, read in the number of eigenpairs we want to compute!
unsigned int n_evals = 0;
if ( command_line.search(1, "-n_evals") )
n_evals = command_line.next(n_evals);
Append the .e to mesh_name
std::ostringstream mesh_name_exodus;
mesh_name_exodus << mesh_name << "_mesh.e";
Mesh mesh;
mesh.read(mesh_name_exodus.str());
Print information about the mesh to the screen.
mesh.print_info();
Create an equation systems object.
EquationSystems equation_systems (mesh);
Create a CondensedEigenSystem named "Eigensystem" and (for convenience)
use a reference to the system we create.
CondensedEigenSystem & eigen_system =
equation_systems.add_system<CondensedEigenSystem> ("Eigensystem");
Declare the system variables.
Adds the variable "p" to "Eigensystem". "p"
will be approximated using second-order approximation.
eigen_system.add_variable("p", SECOND);
Give the system a pointer to the matrix assembly
function defined below.
eigen_system.attach_assemble_function (assemble_matrices);
Set the number of requested eigenpairs \p n_evals and the number
of basis vectors used in the solution algorithm.
equation_systems.parameters.set<unsigned int>("eigenpairs") = n_evals;
equation_systems.parameters.set<unsigned int>("basis vectors") = n_evals*3;
Set the solver tolerance and the maximum number of iterations.
equation_systems.parameters.set<Real>("linear solver tolerance") = pow(TOLERANCE, 5./3.);
equation_systems.parameters.set<unsigned int>
("linear solver maximum iterations") = 1000;
Set the type of the problem, here we deal with
a generalized Hermitian problem.
eigen_system.set_eigenproblem_type(GHEP);
Order the eigenvalues "smallest first"
eigen_system.eigen_solver->set_position_of_spectrum(SMALLEST_MAGNITUDE);
Initialize the data structures for the equation system.
equation_systems.init();
Prints information about the system to the screen.
equation_systems.print_info();
Pass the Dirichlet dof IDs to the CondensedEigenSystem
std::set<unsigned int> dirichlet_dof_ids;
get_dirichlet_dofs(equation_systems, "Eigensystem", dirichlet_dof_ids);
eigen_system.initialize_condensed_dofs(dirichlet_dof_ids);
Solve the system "Eigensystem".
eigen_system.solve();
Get the number of converged eigen pairs.
unsigned int nconv = eigen_system.get_n_converged();
std::cout << "Number of converged eigenpairs: " << nconv
<< "\n" << std::endl;
if (plotting_index > n_evals)
{
std::cout << "WARNING: Solver did not converge for the requested eigenvector!" << std::endl;
}
write out all of the computed eigenvalues and plot the specified eigenvector
std::ostringstream eigenvalue_output_name;
eigenvalue_output_name << mesh_name << "_evals.txt";
std::ofstream evals_file(eigenvalue_output_name.str().c_str());
for(unsigned int i=0; i<nconv; i++)
{
std::pair<Real,Real> eval = eigen_system.get_eigenpair(i);
The eigenvalues should be real!
libmesh_assert_less (eval.second, TOLERANCE);
evals_file << eval.first << std::endl;
plot the specified eigenvector
if(i == plotting_index)
{
#ifdef LIBMESH_HAVE_EXODUS_API
Write the eigen vector to file.
std::ostringstream eigenvector_output_name;
eigenvector_output_name << mesh_name << "_evec.e";
ExodusII_IO (mesh).write_equation_systems (eigenvector_output_name.str(), equation_systems);
#endif // #ifdef LIBMESH_HAVE_EXODUS_API
}
}
evals_file.close();
#endif // LIBMESH_HAVE_SLEPC
All done.
return 0;
}
void assemble_matrices(EquationSystems& es,
const std::string& system_name)
{
It is a good idea to make sure we are assembling
the proper system.
libmesh_assert_equal_to (system_name, "Eigensystem");
#ifdef LIBMESH_HAVE_SLEPC
Get a constant reference to the mesh object.
const MeshBase& mesh = es.get_mesh();
The dimension that we are running.
const unsigned int dim = mesh.mesh_dimension();
Get a reference to our system.
EigenSystem & eigen_system = es.get_system<EigenSystem> (system_name);
Get a constant reference to the Finite Element type
for the first (and only) variable in the system.
FEType fe_type = eigen_system.get_dof_map().variable_type(0);
A reference to the two system matrices
SparseMatrix<Number>& matrix_A = *eigen_system.matrix_A;
SparseMatrix<Number>& matrix_B = *eigen_system.matrix_B;
Build a Finite Element object of the specified type. Since the
\p FEBase::build() member dynamically creates memory we will
store the object as an \p AutoPtr. This can be thought
of as a pointer that will clean up after itself.
AutoPtr<FEBase> fe (FEBase::build(dim, fe_type));
A Gauss quadrature rule for numerical integration.
Use the default quadrature order.
QGauss qrule (dim, fe_type.default_quadrature_order());
Tell the finite element object to use our quadrature rule.
fe->attach_quadrature_rule (&qrule);
The element Jacobian * quadrature weight at each integration point.
const std::vector<Real>& JxW = fe->get_JxW();
The element shape functions evaluated at the quadrature points.
const std::vector<std::vector<Real> >& phi = fe->get_phi();
The element shape function gradients evaluated at the quadrature
points.
const std::vector<std::vector<RealGradient> >& dphi = fe->get_dphi();
A reference to the \p DofMap object for this system. The \p DofMap
object handles the index translation from node and element numbers
to degree of freedom numbers.
const DofMap& dof_map = eigen_system.get_dof_map();
The element mass and stiffness matrices.
DenseMatrix<Number> Me;
DenseMatrix<Number> Ke;
This vector will hold the degree of freedom indices for
the element. These define where in the global system
the element degrees of freedom get mapped.
std::vector<dof_id_type> dof_indices;
Now we will loop over all the elements in the mesh that
live on the local processor. We will compute the element
matrix and right-hand-side contribution. In case users
later modify this program to include refinement, we will
be safe and will only consider the active elements;
hence we use a variant of the \p active_elem_iterator.
MeshBase::const_element_iterator el = mesh.active_local_elements_begin();
const MeshBase::const_element_iterator end_el = mesh.active_local_elements_end();
for ( ; el != end_el; ++el)
{
Store a pointer to the element we are currently
working on. This allows for nicer syntax later.
const Elem* elem = *el;
Get the degree of freedom indices for the
current element. These define where in the global
matrix and right-hand-side this element will
contribute to.
dof_map.dof_indices (elem, dof_indices);
Compute the element-specific data for the current
element. This involves computing the location of the
quadrature points (q_point) and the shape functions
(phi, dphi) for the current element.
fe->reinit (elem);
Zero the element matrices before
summing them. We use the resize member here because
the number of degrees of freedom might have changed from
the last element. Note that this will be the case if the
element type is different (i.e. the last element was a
triangle, now we are on a quadrilateral).
Ke.resize (dof_indices.size(), dof_indices.size());
Me.resize (dof_indices.size(), dof_indices.size());
Now loop over the quadrature points. This handles
the numeric integration.
We will build the element matrix. This involves a double loop to integrate the test funcions (i) against the trial functions (j).
We will build the element matrix. This involves a double loop to integrate the test funcions (i) against the trial functions (j).
for (unsigned int qp=0; qp<qrule.n_points(); qp++)
for (unsigned int i=0; i<phi.size(); i++)
for (unsigned int j=0; j<phi.size(); j++)
{
Me(i,j) += JxW[qp]*phi[i][qp]*phi[j][qp];
Ke(i,j) += JxW[qp]*(dphi[i][qp]*dphi[j][qp]);
}
On an unrefined mesh, constrain_element_matrix does
nothing. If this assembly function is ever repurposed to
run on a refined mesh, getting the hanging node constraints
right will be important. Note that, even with
asymmetric_constraint_rows = false, the constrained dof
diagonals still exist in the matrix, with diagonal entries
that are there to ensure non-singular matrices for linear
solves but which would generate positive non-physical
eigenvalues for eigensolves.
dof_map.constrain_element_matrix(Ke, dof_indices, false);
dof_map.constrain_element_matrix(Me, dof_indices, false);
Finally, simply add the element contribution to the overall matrices A and B.
Finally, simply add the element contribution to the overall matrices A and B.
matrix_A.add_matrix (Ke, dof_indices);
matrix_B.add_matrix (Me, dof_indices);
} // end of element loop
#endif // LIBMESH_HAVE_SLEPC
/**
* All done!
*/
return;
}
void get_dirichlet_dofs(EquationSystems& es,
const std::string& system_name,
std::set<unsigned int>& dirichlet_dof_ids)
{
#ifdef LIBMESH_HAVE_SLEPC
dirichlet_dof_ids.clear();
It is a good idea to make sure we are assembling
the proper system.
libmesh_assert_equal_to (system_name, "Eigensystem");
Get a constant reference to the mesh object.
const MeshBase& mesh = es.get_mesh();
The dimension that we are running.
const unsigned int dim = mesh.mesh_dimension();
Get a reference to our system.
EigenSystem & eigen_system = es.get_system<EigenSystem> (system_name);
Get a constant reference to the Finite Element type
for the first (and only) variable in the system.
FEType fe_type = eigen_system.get_dof_map().variable_type(0);
const DofMap& dof_map = eigen_system.get_dof_map();
This vector will hold the degree of freedom indices for
the element. These define where in the global system
the element degrees of freedom get mapped.
std::vector<unsigned int> dof_indices;
Now we will loop over all the elements in the mesh that
live on the local processor. We will compute the element
matrix and right-hand-side contribution. In case users
later modify this program to include refinement, we will
be safe and will only consider the active elements;
hence we use a variant of the \p active_elem_iterator.
MeshBase::const_element_iterator el = mesh.active_local_elements_begin();
const MeshBase::const_element_iterator end_el = mesh.active_local_elements_end();
for ( ; el != end_el; ++el)
{
Store a pointer to the element we are currently
working on. This allows for nicer syntax later.
const Elem* elem = *el;
Get the degree of freedom indices for the
current element. These define where in the global
matrix and right-hand-side this element will
contribute to.
dof_map.dof_indices (elem, dof_indices);
{
All boundary dofs are Dirichlet dofs in this case
for (unsigned int s=0; s<elem->n_sides(); s++)
if (elem->neighbor(s) == NULL)
{
std::vector<unsigned int> side_dofs;
FEInterface::dofs_on_side(elem, dim, fe_type,
s, side_dofs);
for(unsigned int ii=0; ii<side_dofs.size(); ii++)
dirichlet_dof_ids.insert(dof_indices[side_dofs[ii]]);
}
}
} // end of element loop
#endif // LIBMESH_HAVE_SLEPC
/**
* All done!
*/
return;
}
The source file eigenproblems_ex3.C without comments:
#include <fstream>
#include "libmesh/libmesh.h"
#include "libmesh/mesh.h"
#include "libmesh/mesh_generation.h"
#include "libmesh/exodusII_io.h"
#include "libmesh/condensed_eigen_system.h"
#include "libmesh/equation_systems.h"
#include "libmesh/fe.h"
#include "libmesh/quadrature_gauss.h"
#include "libmesh/dense_matrix.h"
#include "libmesh/sparse_matrix.h"
#include "libmesh/numeric_vector.h"
#include "libmesh/dof_map.h"
#include "libmesh/fe_interface.h"
#include "libmesh/getpot.h"
using namespace libMesh;
void assemble_matrices(EquationSystems& es,
const std::string& system_name);
void get_dirichlet_dofs(EquationSystems& es,
const std::string& system_name,
std::set<unsigned int>& global_dirichlet_dofs_set);
int main (int argc, char** argv)
{
LibMeshInit init (argc, argv);
#ifndef LIBMESH_HAVE_EXODUS_API
libmesh_example_assert(false, "--enable-exodus");
#endif
#ifndef LIBMESH_HAVE_SLEPC
if (libMesh::processor_id() == 0)
std::cerr << "ERROR: This example requires libMesh to be\n"
<< "compiled with SLEPc eigen solvers support!"
<< std::endl;
return 0;
#else
#ifdef LIBMESH_DEFAULT_SINGLE_PRECISION
libmesh_example_assert(false, "--disable-singleprecision");
#endif
#ifdef LIBMESH_USE_COMPLEX_NUMBERS
libmesh_example_assert(false, "--disable-complex");
#endif
{
std::cout << "Running " << argv[0];
for (int i=1; i<argc; i++)
std::cout << " " << argv[i];
std::cout << std::endl << std::endl;
}
libmesh_example_assert(2 <= LIBMESH_DIM, "2D support");
GetPot command_line (argc, argv);
std::string mesh_name = "";
if ( command_line.search(1, "-mesh_name") )
mesh_name = command_line.next(mesh_name);
unsigned int plotting_index = 0;
if ( command_line.search(1, "-plotting_index") )
plotting_index = command_line.next(plotting_index);
unsigned int n_evals = 0;
if ( command_line.search(1, "-n_evals") )
n_evals = command_line.next(n_evals);
std::ostringstream mesh_name_exodus;
mesh_name_exodus << mesh_name << "_mesh.e";
Mesh mesh;
mesh.read(mesh_name_exodus.str());
mesh.print_info();
EquationSystems equation_systems (mesh);
CondensedEigenSystem & eigen_system =
equation_systems.add_system<CondensedEigenSystem> ("Eigensystem");
eigen_system.add_variable("p", SECOND);
eigen_system.attach_assemble_function (assemble_matrices);
equation_systems.parameters.set<unsigned int>("eigenpairs") = n_evals;
equation_systems.parameters.set<unsigned int>("basis vectors") = n_evals*3;
equation_systems.parameters.set<Real>("linear solver tolerance") = pow(TOLERANCE, 5./3.);
equation_systems.parameters.set<unsigned int>
("linear solver maximum iterations") = 1000;
eigen_system.set_eigenproblem_type(GHEP);
eigen_system.eigen_solver->set_position_of_spectrum(SMALLEST_MAGNITUDE);
equation_systems.init();
equation_systems.print_info();
std::set<unsigned int> dirichlet_dof_ids;
get_dirichlet_dofs(equation_systems, "Eigensystem", dirichlet_dof_ids);
eigen_system.initialize_condensed_dofs(dirichlet_dof_ids);
eigen_system.solve();
unsigned int nconv = eigen_system.get_n_converged();
std::cout << "Number of converged eigenpairs: " << nconv
<< "\n" << std::endl;
if (plotting_index > n_evals)
{
std::cout << "WARNING: Solver did not converge for the requested eigenvector!" << std::endl;
}
std::ostringstream eigenvalue_output_name;
eigenvalue_output_name << mesh_name << "_evals.txt";
std::ofstream evals_file(eigenvalue_output_name.str().c_str());
for(unsigned int i=0; i<nconv; i++)
{
std::pair<Real,Real> eval = eigen_system.get_eigenpair(i);
libmesh_assert_less (eval.second, TOLERANCE);
evals_file << eval.first << std::endl;
if(i == plotting_index)
{
#ifdef LIBMESH_HAVE_EXODUS_API
std::ostringstream eigenvector_output_name;
eigenvector_output_name << mesh_name << "_evec.e";
ExodusII_IO (mesh).write_equation_systems (eigenvector_output_name.str(), equation_systems);
#endif // #ifdef LIBMESH_HAVE_EXODUS_API
}
}
evals_file.close();
#endif // LIBMESH_HAVE_SLEPC
return 0;
}
void assemble_matrices(EquationSystems& es,
const std::string& system_name)
{
libmesh_assert_equal_to (system_name, "Eigensystem");
#ifdef LIBMESH_HAVE_SLEPC
const MeshBase& mesh = es.get_mesh();
const unsigned int dim = mesh.mesh_dimension();
EigenSystem & eigen_system = es.get_system<EigenSystem> (system_name);
FEType fe_type = eigen_system.get_dof_map().variable_type(0);
SparseMatrix<Number>& matrix_A = *eigen_system.matrix_A;
SparseMatrix<Number>& matrix_B = *eigen_system.matrix_B;
AutoPtr<FEBase> fe (FEBase::build(dim, fe_type));
QGauss qrule (dim, fe_type.default_quadrature_order());
fe->attach_quadrature_rule (&qrule);
const std::vector<Real>& JxW = fe->get_JxW();
const std::vector<std::vector<Real> >& phi = fe->get_phi();
const std::vector<std::vector<RealGradient> >& dphi = fe->get_dphi();
const DofMap& dof_map = eigen_system.get_dof_map();
DenseMatrix<Number> Me;
DenseMatrix<Number> Ke;
std::vector<dof_id_type> dof_indices;
MeshBase::const_element_iterator el = mesh.active_local_elements_begin();
const MeshBase::const_element_iterator end_el = mesh.active_local_elements_end();
for ( ; el != end_el; ++el)
{
const Elem* elem = *el;
dof_map.dof_indices (elem, dof_indices);
fe->reinit (elem);
Ke.resize (dof_indices.size(), dof_indices.size());
Me.resize (dof_indices.size(), dof_indices.size());
for (unsigned int qp=0; qp<qrule.n_points(); qp++)
for (unsigned int i=0; i<phi.size(); i++)
for (unsigned int j=0; j<phi.size(); j++)
{
Me(i,j) += JxW[qp]*phi[i][qp]*phi[j][qp];
Ke(i,j) += JxW[qp]*(dphi[i][qp]*dphi[j][qp]);
}
matrix_A.add_matrix (Ke, dof_indices);
matrix_B.add_matrix (Me, dof_indices);
} // end of element loop
#endif // LIBMESH_HAVE_SLEPC
/**
* All done!
*/
return;
}
void get_dirichlet_dofs(EquationSystems& es,
const std::string& system_name,
std::set<unsigned int>& dirichlet_dof_ids)
{
#ifdef LIBMESH_HAVE_SLEPC
dirichlet_dof_ids.clear();
libmesh_assert_equal_to (system_name, "Eigensystem");
const MeshBase& mesh = es.get_mesh();
const unsigned int dim = mesh.mesh_dimension();
EigenSystem & eigen_system = es.get_system<EigenSystem> (system_name);
FEType fe_type = eigen_system.get_dof_map().variable_type(0);
const DofMap& dof_map = eigen_system.get_dof_map();
std::vector<unsigned int> dof_indices;
MeshBase::const_element_iterator el = mesh.active_local_elements_begin();
const MeshBase::const_element_iterator end_el = mesh.active_local_elements_end();
for ( ; el != end_el; ++el)
{
const Elem* elem = *el;
dof_map.dof_indices (elem, dof_indices);
{
for (unsigned int s=0; s<elem->n_sides(); s++)
if (elem->neighbor(s) == NULL)
{
std::vector<unsigned int> side_dofs;
FEInterface::dofs_on_side(elem, dim, fe_type,
s, side_dofs);
for(unsigned int ii=0; ii<side_dofs.size(); ii++)
dirichlet_dof_ids.insert(dof_indices[side_dofs[ii]]);
}
}
} // end of element loop
#endif // LIBMESH_HAVE_SLEPC
/**
* All done!
*/
return;
}
The console output of the program:
***************************************************************
* Running Example eigenproblems_ex3:
* mpirun -np 2 example-devel -n_evals 5 -mesh_name drum1 -plotting_index 2 -pc_type bjacobi -sub_pc_type ilu -sub_pc_factor_levels 4 -sub_pc_factor_zeropivot 0 -ksp_right_pc -log_summary
***************************************************************
Running /workspace/libmesh/examples/eigenproblems/eigenproblems_ex3/.libs/lt-example-devel -n_evals 5 -mesh_name drum1 -plotting_index 2 -pc_type bjacobi -sub_pc_type ilu -sub_pc_factor_levels 4 -sub_pc_factor_zeropivot 0 -ksp_right_pc -log_summary
Mesh Information:
mesh_dimension()=2
spatial_dimension()=3
n_nodes()=1157
n_local_nodes()=591
n_elem()=536
n_local_elem()=268
n_active_elem()=536
n_subdomains()=1
n_partitions()=2
n_processors()=2
n_threads()=1
processor_id()=0
EquationSystems
n_systems()=1
System #0, "Eigensystem"
Type "Eigen"
Variables="p"
Finite Element Types="LAGRANGE", "JACOBI_20_00"
Infinite Element Mapping="CARTESIAN"
Approximation Orders="SECOND", "THIRD"
n_dofs()=1157
n_local_dofs()=591
n_constrained_dofs()=0
n_local_constrained_dofs()=0
n_vectors()=0
n_matrices()=2
DofMap Sparsity
Average On-Processor Bandwidth <= 10.8349
Average Off-Processor Bandwidth <= 0.188418
Maximum On-Processor Bandwidth <= 22
Maximum Off-Processor Bandwidth <= 10
DofMap Constraints
Number of DoF Constraints = 0
Number of Node Constraints = 0
Number of converged eigenpairs: 5
************************************************************************************************************************
*** WIDEN YOUR WINDOW TO 120 CHARACTERS. Use 'enscript -r -fCourier9' to print this document ***
************************************************************************************************************************
---------------------------------------------- PETSc Performance Summary: ----------------------------------------------
/workspace/libmesh/examples/eigenproblems/eigenproblems_ex3/.libs/lt-example-devel on a intel-12. named hbar.ices.utexas.edu with 2 processors, by benkirk Tue Feb 5 13:40:46 2013
Using Petsc Release Version 3.3.0, Patch 2, Fri Jul 13 15:42:00 CDT 2012
Max Max/Min Avg Total
Time (sec): 1.002e+00 1.00000 1.002e+00
Objects: 1.020e+02 1.00000 1.020e+02
Flops: 1.232e+09 1.02165 1.219e+09 2.438e+09
Flops/sec: 1.230e+09 1.02165 1.217e+09 2.434e+09
MPI Messages: 4.092e+04 1.00000 4.092e+04 8.185e+04
MPI Message Lengths: 4.031e+07 1.00001 9.851e+02 8.063e+07
MPI Reductions: 3.291e+04 1.00000
Flop counting convention: 1 flop = 1 real number operation of type (multiply/divide/add/subtract)
e.g., VecAXPY() for real vectors of length N --> 2N flops
and VecAXPY() for complex vectors of length N --> 8N flops
Summary of Stages: ----- Time ------ ----- Flops ----- --- Messages --- -- Message Lengths -- -- Reductions --
Avg %Total Avg %Total counts %Total Avg %Total counts %Total
0: Main Stage: 1.0018e+00 100.0% 2.4384e+09 100.0% 8.185e+04 100.0% 9.851e+02 100.0% 3.291e+04 100.0%
------------------------------------------------------------------------------------------------------------------------
See the 'Profiling' chapter of the users' manual for details on interpreting output.
Phase summary info:
Count: number of times phase was executed
Time and Flops: Max - maximum over all processors
Ratio - ratio of maximum to minimum over all processors
Mess: number of messages sent
Avg. len: average message length
Reduct: number of global reductions
Global: entire computation
Stage: stages of a computation. Set stages with PetscLogStagePush() and PetscLogStagePop().
%T - percent time in this phase %f - percent flops in this phase
%M - percent messages in this phase %L - percent message lengths in this phase
%R - percent reductions in this phase
Total Mflop/s: 10e-6 * (sum of flops over all processors)/(max time over all processors)
------------------------------------------------------------------------------------------------------------------------
Event Count Time (sec) Flops --- Global --- --- Stage --- Total
Max Ratio Max Ratio Max Ratio Mess Avg len Reduct %T %f %M %L %R %T %f %M %L %R Mflop/s
------------------------------------------------------------------------------------------------------------------------
--- Event Stage 0: Main Stage
STSetUp 1 1.0 3.3860e-03 1.0 8.63e+05 1.0 1.8e+01 8.9e+03 2.9e+01 0 0 0 0 0 0 0 0 0 0 510
STApply 8184 1.0 4.3120e-01 1.0 6.52e+08 1.0 3.3e+04 2.1e+03 0.0e+00 43 53 40 85 0 43 53 40 85 0 3015
EPSSetUp 1 1.0 3.7670e-03 1.0 8.63e+05 1.0 1.8e+01 8.9e+03 6.5e+01 0 0 0 0 0 0 0 0 0 0 458
EPSSolve 1 1.0 8.4995e-01 1.0 1.23e+09 1.0 8.2e+04 9.8e+02 3.3e+04 85100100100 99 85100100100 99 2866
IPOrthogonalize 8179 1.0 3.8109e-01 1.0 5.66e+08 1.0 4.9e+04 2.4e+02 3.3e+04 38 46 60 15 99 38 46 60 15 99 2914
IPInnerProduct 81744 1.0 3.3768e-01 1.0 4.19e+08 1.0 4.9e+04 2.4e+02 3.3e+04 34 34 60 15 99 34 34 60 15 99 2433
IPApplyMatrix 24528 1.0 1.9872e-01 1.0 2.47e+08 1.0 4.9e+04 2.4e+02 0.0e+00 19 20 60 15 0 19 20 60 15 0 2439
DSSolve 630 1.0 1.9167e-02 1.0 0.00e+00 0.0 0.0e+00 0.0e+00 0.0e+00 2 0 0 0 0 2 0 0 0 0 0
DSVectors 635 1.0 1.8272e-03 1.0 0.00e+00 0.0 0.0e+00 0.0e+00 0.0e+00 0 0 0 0 0 0 0 0 0 0 0
DSOther 1260 1.0 5.7940e-03 1.0 0.00e+00 0.0 0.0e+00 0.0e+00 0.0e+00 1 0 0 0 0 1 0 0 0 0 0
UpdateVectors 630 1.0 6.2318e-03 1.1 9.60e+06 1.0 0.0e+00 0.0e+00 0.0e+00 1 1 0 0 0 1 1 0 0 0 3023
VecMAXPBY 16344 1.0 2.6446e-02 1.0 1.47e+08 1.0 0.0e+00 0.0e+00 0.0e+00 3 12 0 0 0 3 12 0 0 0 10923
VecScale 7554 1.0 3.3209e-03 1.0 3.81e+06 1.0 0.0e+00 0.0e+00 0.0e+00 0 0 0 0 0 0 0 0 0 0 2250
VecCopy 15 1.0 1.0014e-05 1.2 0.00e+00 0.0 0.0e+00 0.0e+00 0.0e+00 0 0 0 0 0 0 0 0 0 0 0
VecSet 16384 1.0 6.3770e-03 1.0 0.00e+00 0.0 0.0e+00 0.0e+00 0.0e+00 1 0 0 0 0 1 0 0 0 0 0
VecAssemblyBegin 15 1.0 1.2684e-04 1.1 0.00e+00 0.0 0.0e+00 0.0e+00 4.5e+01 0 0 0 0 0 0 0 0 0 0 0
VecAssemblyEnd 15 1.0 1.9789e-05 1.1 0.00e+00 0.0 0.0e+00 0.0e+00 0.0e+00 0 0 0 0 0 0 0 0 0 0 0
VecScatterBegin 49085 1.0 3.9564e-02 1.0 0.00e+00 0.0 8.2e+04 9.8e+02 0.0e+00 4 0100100 0 4 0100100 0 0
VecScatterEnd 49085 1.0 3.6301e-02 1.1 0.00e+00 0.0 0.0e+00 0.0e+00 0.0e+00 4 0 0 0 0 4 0 0 0 0 0
VecReduceArith 40872 1.0 4.5499e-02 1.0 1.72e+08 1.0 0.0e+00 0.0e+00 0.0e+00 4 14 0 0 0 4 14 0 0 0 7408
VecReduceComm 32694 1.0 7.9679e-02 1.2 0.00e+00 0.0 0.0e+00 0.0e+00 3.3e+04 7 0 0 0 99 7 0 0 0 99 0
MatMult 32712 1.0 2.5767e-01 1.0 3.30e+08 1.0 6.5e+04 2.4e+02 0.0e+00 25 27 80 19 0 25 27 80 19 0 2508
MatSolve 8184 1.0 2.5151e-01 1.0 5.69e+08 1.0 0.0e+00 0.0e+00 0.0e+00 25 47 0 0 0 25 47 0 0 0 4525
MatLUFactorSym 1 1.0 7.1406e-04 1.0 0.00e+00 0.0 0.0e+00 0.0e+00 3.0e+00 0 0 0 0 0 0 0 0 0 0 0
MatLUFactorNum 1 1.0 9.4199e-04 1.2 8.63e+05 1.0 0.0e+00 0.0e+00 0.0e+00 0 0 0 0 0 0 0 0 0 0 1832
MatAssemblyBegin 11 1.0 1.8907e-04 1.7 0.00e+00 0.0 6.0e+00 1.4e+03 1.6e+01 0 0 0 0 0 0 0 0 0 0 0
MatAssemblyEnd 11 1.0 6.3562e-04 1.0 0.00e+00 0.0 1.6e+01 6.5e+01 3.2e+01 0 0 0 0 0 0 0 0 0 0 0
MatGetRowIJ 1 1.0 6.7949e-05 1.0 0.00e+00 0.0 0.0e+00 0.0e+00 0.0e+00 0 0 0 0 0 0 0 0 0 0 0
MatGetOrdering 1 1.0 6.2013e-04 1.0 0.00e+00 0.0 0.0e+00 0.0e+00 2.0e+00 0 0 0 0 0 0 0 0 0 0 0
MatZeroEntries 4 1.0 4.1962e-05 1.9 0.00e+00 0.0 0.0e+00 0.0e+00 0.0e+00 0 0 0 0 0 0 0 0 0 0 0
MatGetRedundant 1 1.0 3.9887e-04 1.0 0.00e+00 0.0 6.0e+00 2.2e+04 4.0e+00 0 0 0 0 0 0 0 0 0 0 0
KSPSetUp 2 1.0 1.9073e-06 2.0 0.00e+00 0.0 0.0e+00 0.0e+00 0.0e+00 0 0 0 0 0 0 0 0 0 0 0
KSPSolve 8184 1.0 3.3327e-01 1.0 5.69e+08 1.0 1.6e+04 4.0e+03 0.0e+00 33 47 20 80 0 33 47 20 80 0 3415
PCSetUp 1 1.0 3.3329e-03 1.0 8.63e+05 1.0 1.8e+01 8.9e+03 2.7e+01 0 0 0 0 0 0 0 0 0 0 518
PCApply 8184 1.0 3.2016e-01 1.0 5.69e+08 1.0 1.6e+04 4.0e+03 0.0e+00 32 47 20 80 0 32 47 20 80 0 3555
------------------------------------------------------------------------------------------------------------------------
Memory usage is given in bytes:
Object Type Creations Destructions Memory Descendants' Mem.
Reports information only for process 0.
--- Event Stage 0: Main Stage
Container 3 3 1644 0
Spectral Transform 1 1 736 0
Eigenproblem Solver 1 1 1680 0
Inner product 1 1 624 0
Direct solver 1 1 9408 0
Vector 42 42 133800 0
Vector Scatter 7 7 7252 0
Index Set 23 23 33860 0
IS L to G Mapping 1 1 564 0
Matrix 16 16 1239624 0
PetscRandom 1 1 608 0
Krylov Solver 2 2 2144 0
Preconditioner 2 2 1800 0
Viewer 1 0 0 0
========================================================================================================================
Average time to get PetscTime(): 9.53674e-08
Average time for MPI_Barrier(): 1.19209e-06
Average time for zero size MPI_Send(): 1.50204e-05
#PETSc Option Table entries:
-ksp_right_pc
-log_summary
-mesh_name drum1
-n_evals 5
-pc_type bjacobi
-plotting_index 2
-sub_pc_factor_levels 4
-sub_pc_factor_zeropivot 0
-sub_pc_type ilu
#End of PETSc Option Table entries
Compiled without FORTRAN kernels
Compiled with full precision matrices (default)
sizeof(short) 2 sizeof(int) 4 sizeof(long) 8 sizeof(void*) 8 sizeof(PetscScalar) 8 sizeof(PetscInt) 4
Configure run at: Thu Nov 8 11:21:02 2012
Configure options: --with-debugging=false --COPTFLAGS=-O3 --CXXOPTFLAGS=-O3 --FOPTFLAGS=-O3 --with-clanguage=C++ --with-shared-libraries=1 --with-mpi-dir=/opt/apps/ossw/libraries/mpich2/mpich2-1.4.1p1/sl6/intel-12.1 --with-mumps=true --download-mumps=1 --with-metis=true --download-metis=1 --with-parmetis=true --download-parmetis=1 --with-superlu=true --download-superlu=1 --with-superludir=true --download-superlu_dist=1 --with-blacs=true --download-blacs=1 --with-scalapack=true --download-scalapack=1 --with-hypre=true --download-hypre=1 --with-blas-lib="[/opt/apps/sysnet/intel/12.1/mkl/10.3.12.361/lib/intel64/libmkl_intel_lp64.so,/opt/apps/sysnet/intel/12.1/mkl/10.3.12.361/lib/intel64/libmkl_sequential.so,/opt/apps/sysnet/intel/12.1/mkl/10.3.12.361/lib/intel64/libmkl_core.so]" --with-lapack-lib="[/opt/apps/sysnet/intel/12.1/mkl/10.3.12.361/lib/intel64/libmkl_lapack95_lp64.a]"
-----------------------------------------
Libraries compiled on Thu Nov 8 11:21:02 2012 on daedalus.ices.utexas.edu
Machine characteristics: Linux-2.6.32-279.1.1.el6.x86_64-x86_64-with-redhat-6.3-Carbon
Using PETSc directory: /opt/apps/ossw/libraries/petsc/petsc-3.3-p2
Using PETSc arch: intel-12.1-mkl-intel-10.3.12.361-mpich2-1.4.1p1-cxx-opt
-----------------------------------------
Using C compiler: /opt/apps/ossw/libraries/mpich2/mpich2-1.4.1p1/sl6/intel-12.1/bin/mpicxx -wd1572 -O3 -fPIC ${COPTFLAGS} ${CFLAGS}
Using Fortran compiler: /opt/apps/ossw/libraries/mpich2/mpich2-1.4.1p1/sl6/intel-12.1/bin/mpif90 -fPIC -O3 ${FOPTFLAGS} ${FFLAGS}
-----------------------------------------
Using include paths: -I/opt/apps/ossw/libraries/petsc/petsc-3.3-p2/intel-12.1-mkl-intel-10.3.12.361-mpich2-1.4.1p1-cxx-opt/include -I/opt/apps/ossw/libraries/petsc/petsc-3.3-p2/include -I/opt/apps/ossw/libraries/petsc/petsc-3.3-p2/include -I/opt/apps/ossw/libraries/petsc/petsc-3.3-p2/intel-12.1-mkl-intel-10.3.12.361-mpich2-1.4.1p1-cxx-opt/include -I/opt/apps/ossw/libraries/mpich2/mpich2-1.4.1p1/sl6/intel-12.1/include
-----------------------------------------
Using C linker: /opt/apps/ossw/libraries/mpich2/mpich2-1.4.1p1/sl6/intel-12.1/bin/mpicxx
Using Fortran linker: /opt/apps/ossw/libraries/mpich2/mpich2-1.4.1p1/sl6/intel-12.1/bin/mpif90
Using libraries: -Wl,-rpath,/opt/apps/ossw/libraries/petsc/petsc-3.3-p2/intel-12.1-mkl-intel-10.3.12.361-mpich2-1.4.1p1-cxx-opt/lib -L/opt/apps/ossw/libraries/petsc/petsc-3.3-p2/intel-12.1-mkl-intel-10.3.12.361-mpich2-1.4.1p1-cxx-opt/lib -lpetsc -lX11 -Wl,-rpath,/opt/apps/ossw/libraries/petsc/petsc-3.3-p2/intel-12.1-mkl-intel-10.3.12.361-mpich2-1.4.1p1-cxx-opt/lib -L/opt/apps/ossw/libraries/petsc/petsc-3.3-p2/intel-12.1-mkl-intel-10.3.12.361-mpich2-1.4.1p1-cxx-opt/lib -lcmumps -ldmumps -lsmumps -lzmumps -lmumps_common -lpord -lHYPRE -lpthread -lsuperlu_dist_3.0 -lparmetis -lmetis -lscalapack -lblacs -lsuperlu_4.3 -Wl,-rpath,/opt/apps/sysnet/intel/12.1/mkl/10.3.12.361/lib/intel64 -L/opt/apps/sysnet/intel/12.1/mkl/10.3.12.361/lib/intel64 -lmkl_lapack95_lp64 -lmkl_intel_lp64 -lmkl_sequential -lmkl_core -Wl,-rpath,/opt/apps/ossw/libraries/mpich2/mpich2-1.4.1p1/sl6/intel-12.1/lib -L/opt/apps/ossw/libraries/mpich2/mpich2-1.4.1p1/sl6/intel-12.1/lib -Wl,-rpath,/opt/apps/sysnet/intel/12.1/composer_xe_2011_sp1.7.256/compiler/lib/intel64 -L/opt/apps/sysnet/intel/12.1/composer_xe_2011_sp1.7.256/compiler/lib/intel64 -Wl,-rpath,/usr/lib/gcc/x86_64-redhat-linux/4.4.6 -L/usr/lib/gcc/x86_64-redhat-linux/4.4.6 -lmpichf90 -lifport -lifcore -lm -lm -lmpichcxx -ldl -lmpich -lopa -lmpl -lrt -lpthread -limf -lsvml -lipgo -ldecimal -lcilkrts -lstdc++ -lgcc_s -lirc -lirc_s -ldl
-----------------------------------------
----------------------------------------------------------------------------------------------------------------------
| Processor id: 0 |
| Num Processors: 2 |
| Time: Tue Feb 5 13:40:46 2013 |
| OS: Linux |
| HostName: hbar.ices.utexas.edu |
| OS Release: 2.6.32-279.1.1.el6.x86_64 |
| OS Version: #1 SMP Tue Jul 10 11:24:23 CDT 2012 |
| Machine: x86_64 |
| Username: benkirk |
| Configuration: ./configure '--enable-everything' |
| '--prefix=/workspace/libmesh/install' |
| 'CXX=mpicxx' |
| 'CC=mpicc' |
| 'F77=mpif77' |
| 'FC=mpif90' |
| 'PETSC_DIR=/opt/apps/ossw/libraries/petsc/petsc-3.3-p2' |
| 'PETSC_ARCH=intel-12.1-mkl-intel-10.3.12.361-mpich2-1.4.1p1-cxx-opt' |
| 'SLEPC_DIR=/opt/apps/ossw/libraries/slepc/slepc-3.3-p2-petsc-3.3-p2-cxx-opt' |
| 'TRILINOS_DIR=/opt/apps/ossw/libraries/trilinos/trilinos-10.12.2/sl6/intel-12.1/mpich2-1.4.1p1/mkl-intel-10.3.12.361'|
| 'VTK_DIR=/opt/apps/ossw/libraries/vtk/vtk-5.10.0/sl6/intel-12.1' |
----------------------------------------------------------------------------------------------------------------------
----------------------------------------------------------------------------------------------------------------
| libMesh Performance: Alive time=1.01288, Active time=0.989436 |
----------------------------------------------------------------------------------------------------------------
| Event nCalls Total Time Avg Time Total Time Avg Time % of Active Time |
| w/o Sub w/o Sub With Sub With Sub w/o S With S |
|----------------------------------------------------------------------------------------------------------------|
| |
| |
| CondensedEigenSystem |
| get_eigenpair() 5 0.0011 0.000213 0.0011 0.000227 0.11 0.11 |
| solve() 1 0.0024 0.002390 0.8798 0.879813 0.24 88.92 |
| |
| DofMap |
| add_neighbors_to_send_list() 1 0.0089 0.008892 0.0101 0.010140 0.90 1.02 |
| build_sparsity() 1 0.0062 0.006190 0.0172 0.017214 0.63 1.74 |
| create_dof_constraints() 1 0.0005 0.000501 0.0005 0.000501 0.05 0.05 |
| distribute_dofs() 1 0.0110 0.011036 0.0248 0.024807 1.12 2.51 |
| dof_indices() 1104 0.0429 0.000039 0.0429 0.000039 4.33 4.33 |
| prepare_send_list() 1 0.0000 0.000036 0.0000 0.000036 0.00 0.00 |
| reinit() 1 0.0135 0.013529 0.0135 0.013529 1.37 1.37 |
| |
| EquationSystems |
| build_solution_vector() 1 0.0013 0.001267 0.0118 0.011835 0.13 1.20 |
| |
| ExodusII_IO |
| write_nodal_data() 1 0.0029 0.002927 0.0029 0.002927 0.30 0.30 |
| |
| FE |
| compute_shape_functions() 268 0.0029 0.000011 0.0029 0.000011 0.30 0.30 |
| init_shape_functions() 1 0.0001 0.000110 0.0001 0.000110 0.01 0.01 |
| |
| FEMap |
| compute_affine_map() 268 0.0019 0.000007 0.0019 0.000007 0.19 0.19 |
| init_reference_to_physical_map() 1 0.0001 0.000076 0.0001 0.000076 0.01 0.01 |
| |
| Mesh |
| find_neighbors() 1 0.0046 0.004572 0.0046 0.004636 0.46 0.47 |
| read() 1 0.0035 0.003461 0.0035 0.003461 0.35 0.35 |
| renumber_nodes_and_elem() 2 0.0004 0.000176 0.0004 0.000176 0.04 0.04 |
| |
| MeshCommunication |
| broadcast() 1 0.0035 0.003532 0.0068 0.006842 0.36 0.69 |
| compute_hilbert_indices() 2 0.0047 0.002364 0.0047 0.002364 0.48 0.48 |
| find_global_indices() 2 0.0019 0.000927 0.0078 0.003880 0.19 0.78 |
| parallel_sort() 2 0.0008 0.000420 0.0009 0.000457 0.08 0.09 |
| |
| MeshOutput |
| write_equation_systems() 1 0.0001 0.000092 0.0149 0.014942 0.01 1.51 |
| |
| MetisPartitioner |
| partition() 1 0.0052 0.005221 0.0088 0.008810 0.53 0.89 |
| |
| Parallel |
| allgather() 9 0.0002 0.000019 0.0002 0.000021 0.02 0.02 |
| broadcast() 6 0.0001 0.000008 0.0001 0.000008 0.01 0.01 |
| max(bool) 2 0.0000 0.000002 0.0000 0.000002 0.00 0.00 |
| max(scalar) 120 0.0003 0.000003 0.0003 0.000003 0.03 0.03 |
| max(vector) 27 0.0002 0.000006 0.0004 0.000013 0.02 0.04 |
| min(bool) 138 0.0003 0.000002 0.0003 0.000002 0.03 0.03 |
| min(scalar) 113 0.0041 0.000036 0.0041 0.000036 0.41 0.41 |
| min(vector) 27 0.0002 0.000008 0.0004 0.000016 0.02 0.04 |
| probe() 12 0.0002 0.000018 0.0002 0.000018 0.02 0.02 |
| receive() 12 0.0001 0.000007 0.0003 0.000025 0.01 0.03 |
| send() 12 0.0000 0.000004 0.0000 0.000004 0.00 0.00 |
| send_receive() 16 0.0002 0.000011 0.0006 0.000035 0.02 0.06 |
| sum() 25 0.0002 0.000009 0.0003 0.000012 0.02 0.03 |
| |
| Parallel::Request |
| wait() 12 0.0000 0.000003 0.0000 0.000003 0.00 0.00 |
| |
| Partitioner |
| set_node_processor_ids() 1 0.0007 0.000744 0.0008 0.000819 0.08 0.08 |
| set_parent_processor_ids() 1 0.0004 0.000425 0.0004 0.000425 0.04 0.04 |
| |
| SlepcEigenSolver |
| solve_generalized() 1 0.8546 0.854612 0.8546 0.854612 86.37 86.37 |
| |
| System |
| assemble() 1 0.0072 0.007185 0.0228 0.022810 0.73 2.31 |
----------------------------------------------------------------------------------------------------------------
| Totals: 2204 0.9894 100.00 |
----------------------------------------------------------------------------------------------------------------
***************************************************************
* Done Running Example eigenproblems_ex3:
* mpirun -np 2 example-devel -n_evals 5 -mesh_name drum1 -plotting_index 2 -pc_type bjacobi -sub_pc_type ilu -sub_pc_factor_levels 4 -sub_pc_factor_zeropivot 0 -ksp_right_pc -log_summary
***************************************************************
***************************************************************
* Running Example eigenproblems_ex3:
* mpirun -np 2 example-devel -n_evals 5 -mesh_name drum2 -plotting_index 2 -pc_type bjacobi -sub_pc_type ilu -sub_pc_factor_levels 4 -sub_pc_factor_zeropivot 0 -ksp_right_pc -log_summary
***************************************************************
Running /workspace/libmesh/examples/eigenproblems/eigenproblems_ex3/.libs/lt-example-devel -n_evals 5 -mesh_name drum2 -plotting_index 2 -pc_type bjacobi -sub_pc_type ilu -sub_pc_factor_levels 4 -sub_pc_factor_zeropivot 0 -ksp_right_pc -log_summary
Mesh Information:
mesh_dimension()=2
spatial_dimension()=3
n_nodes()=1189
n_local_nodes()=611
n_elem()=552
n_local_elem()=276
n_active_elem()=552
n_subdomains()=1
n_partitions()=2
n_processors()=2
n_threads()=1
processor_id()=0
EquationSystems
n_systems()=1
System #0, "Eigensystem"
Type "Eigen"
Variables="p"
Finite Element Types="LAGRANGE", "JACOBI_20_00"
Infinite Element Mapping="CARTESIAN"
Approximation Orders="SECOND", "THIRD"
n_dofs()=1189
n_local_dofs()=611
n_constrained_dofs()=0
n_local_constrained_dofs()=0
n_vectors()=0
n_matrices()=2
DofMap Sparsity
Average On-Processor Bandwidth <= 10.7973
Average Off-Processor Bandwidth <= 0.272498
Maximum On-Processor Bandwidth <= 22
Maximum Off-Processor Bandwidth <= 10
DofMap Constraints
Number of DoF Constraints = 0
Number of Node Constraints = 0
Number of converged eigenpairs: 5
************************************************************************************************************************
*** WIDEN YOUR WINDOW TO 120 CHARACTERS. Use 'enscript -r -fCourier9' to print this document ***
************************************************************************************************************************
---------------------------------------------- PETSc Performance Summary: ----------------------------------------------
/workspace/libmesh/examples/eigenproblems/eigenproblems_ex3/.libs/lt-example-devel on a intel-12. named hbar.ices.utexas.edu with 2 processors, by benkirk Tue Feb 5 13:40:48 2013
Using Petsc Release Version 3.3.0, Patch 2, Fri Jul 13 15:42:00 CDT 2012
Max Max/Min Avg Total
Time (sec): 1.056e+00 1.00000 1.056e+00
Objects: 1.020e+02 1.00000 1.020e+02
Flops: 1.206e+09 1.03861 1.183e+09 2.367e+09
Flops/sec: 1.142e+09 1.03861 1.121e+09 2.241e+09
MPI Messages: 3.811e+04 1.00000 3.811e+04 7.622e+04
MPI Message Lengths: 4.231e+07 1.00001 1.110e+03 8.461e+07
MPI Reductions: 3.066e+04 1.00000
Flop counting convention: 1 flop = 1 real number operation of type (multiply/divide/add/subtract)
e.g., VecAXPY() for real vectors of length N --> 2N flops
and VecAXPY() for complex vectors of length N --> 8N flops
Summary of Stages: ----- Time ------ ----- Flops ----- --- Messages --- -- Message Lengths -- -- Reductions --
Avg %Total Avg %Total counts %Total Avg %Total counts %Total
0: Main Stage: 1.0560e+00 100.0% 2.3667e+09 100.0% 7.622e+04 100.0% 1.110e+03 100.0% 3.066e+04 100.0%
------------------------------------------------------------------------------------------------------------------------
See the 'Profiling' chapter of the users' manual for details on interpreting output.
Phase summary info:
Count: number of times phase was executed
Time and Flops: Max - maximum over all processors
Ratio - ratio of maximum to minimum over all processors
Mess: number of messages sent
Avg. len: average message length
Reduct: number of global reductions
Global: entire computation
Stage: stages of a computation. Set stages with PetscLogStagePush() and PetscLogStagePop().
%T - percent time in this phase %f - percent flops in this phase
%M - percent messages in this phase %L - percent message lengths in this phase
%R - percent reductions in this phase
Total Mflop/s: 10e-6 * (sum of flops over all processors)/(max time over all processors)
------------------------------------------------------------------------------------------------------------------------
Event Count Time (sec) Flops --- Global --- --- Stage --- Total
Max Ratio Max Ratio Max Ratio Mess Avg len Reduct %T %f %M %L %R %T %f %M %L %R Mflop/s
------------------------------------------------------------------------------------------------------------------------
--- Event Stage 0: Main Stage
STSetUp 1 1.0 3.5388e-03 1.0 9.51e+05 1.0 1.8e+01 9.2e+03 2.9e+01 0 0 0 0 0 0 0 0 0 0 537
STApply 7621 1.0 4.1804e-01 1.0 6.41e+08 1.0 3.0e+04 2.2e+03 0.0e+00 40 54 40 80 0 40 54 40 80 0 3053
EPSSetUp 1 1.0 3.9270e-03 1.0 9.51e+05 1.0 1.8e+01 9.2e+03 6.5e+01 0 0 0 0 0 0 0 0 0 0 484
EPSSolve 1 1.0 8.1881e-01 1.0 1.20e+09 1.0 7.6e+04 1.1e+03 3.0e+04 78100100100 99 78100100100 99 2887
IPOrthogonalize 7616 1.0 3.6392e-01 1.0 5.51e+08 1.1 4.6e+04 3.6e+02 3.0e+04 34 45 60 20 99 34 45 60 20 99 2922
IPInnerProduct 76122 1.0 3.2315e-01 1.0 4.09e+08 1.1 4.6e+04 3.6e+02 3.0e+04 31 33 60 20 99 31 33 60 20 99 2440
IPApplyMatrix 22841 1.0 1.9264e-01 1.1 2.43e+08 1.1 4.6e+04 3.6e+02 0.0e+00 18 20 60 20 0 18 20 60 20 0 2423
DSSolve 580 1.0 1.7831e-02 1.0 0.00e+00 0.0 0.0e+00 0.0e+00 0.0e+00 2 0 0 0 0 2 0 0 0 0 0
DSVectors 585 1.0 1.7123e-03 1.0 0.00e+00 0.0 0.0e+00 0.0e+00 0.0e+00 0 0 0 0 0 0 0 0 0 0 0
DSOther 1160 1.0 4.9644e-03 1.0 0.00e+00 0.0 0.0e+00 0.0e+00 0.0e+00 0 0 0 0 0 0 0 0 0 0 0
UpdateVectors 580 1.0 6.3157e-03 1.1 9.23e+06 1.1 0.0e+00 0.0e+00 0.0e+00 1 1 0 0 0 1 1 0 0 0 2837
VecMAXPBY 15220 1.0 2.5096e-02 1.0 1.42e+08 1.1 0.0e+00 0.0e+00 0.0e+00 2 12 0 0 0 2 12 0 0 0 10970
VecScale 7041 1.0 3.1388e-03 1.0 3.70e+06 1.1 0.0e+00 0.0e+00 0.0e+00 0 0 0 0 0 0 0 0 0 0 2290
VecCopy 15 1.0 7.8678e-06 1.1 0.00e+00 0.0 0.0e+00 0.0e+00 0.0e+00 0 0 0 0 0 0 0 0 0 0 0
VecSet 15258 1.0 6.1514e-03 1.0 0.00e+00 0.0 0.0e+00 0.0e+00 0.0e+00 1 0 0 0 0 1 0 0 0 0 0
VecAssemblyBegin 15 1.0 1.3375e-04 1.2 0.00e+00 0.0 0.0e+00 0.0e+00 4.5e+01 0 0 0 0 0 0 0 0 0 0 0
VecAssemblyEnd 15 1.0 2.1935e-05 1.2 0.00e+00 0.0 0.0e+00 0.0e+00 0.0e+00 0 0 0 0 0 0 0 0 0 0 0
VecScatterBegin 45709 1.0 3.8941e-02 1.0 0.00e+00 0.0 7.6e+04 1.1e+03 0.0e+00 4 0100100 0 4 0100100 0 0
VecScatterEnd 45709 1.0 3.5058e-02 1.1 0.00e+00 0.0 0.0e+00 0.0e+00 0.0e+00 3 0 0 0 0 3 0 0 0 0 0
VecReduceArith 38061 1.0 4.3163e-02 1.0 1.66e+08 1.1 0.0e+00 0.0e+00 0.0e+00 4 14 0 0 0 4 14 0 0 0 7452
VecReduceComm 30446 1.0 7.8247e-02 1.3 0.00e+00 0.0 0.0e+00 0.0e+00 3.0e+04 7 0 0 0 99 7 0 0 0 99 0
MatMult 30462 1.0 2.5039e-01 1.1 3.24e+08 1.1 6.1e+04 3.6e+02 0.0e+00 23 26 80 26 0 23 26 80 26 0 2486
MatSolve 7621 1.0 2.4718e-01 1.0 5.60e+08 1.0 0.0e+00 0.0e+00 0.0e+00 23 47 0 0 0 23 47 0 0 0 4533
MatLUFactorSym 1 1.0 8.8906e-04 1.2 0.00e+00 0.0 0.0e+00 0.0e+00 3.0e+00 0 0 0 0 0 0 0 0 0 0 0
MatLUFactorNum 1 1.0 8.5402e-04 1.0 9.51e+05 1.0 0.0e+00 0.0e+00 0.0e+00 0 0 0 0 0 0 0 0 0 0 2227
MatAssemblyBegin 11 1.0 2.4319e-04 2.3 0.00e+00 0.0 6.0e+00 2.0e+03 1.6e+01 0 0 0 0 0 0 0 0 0 0 0
MatAssemblyEnd 11 1.0 6.8712e-04 1.0 0.00e+00 0.0 1.6e+01 9.7e+01 3.2e+01 0 0 0 0 0 0 0 0 0 0 0
MatGetRowIJ 1 1.0 7.2956e-05 1.1 0.00e+00 0.0 0.0e+00 0.0e+00 0.0e+00 0 0 0 0 0 0 0 0 0 0 0
MatGetOrdering 1 1.0 6.6304e-04 1.0 0.00e+00 0.0 0.0e+00 0.0e+00 2.0e+00 0 0 0 0 0 0 0 0 0 0 0
MatZeroEntries 4 1.0 5.9843e-05 1.5 0.00e+00 0.0 0.0e+00 0.0e+00 0.0e+00 0 0 0 0 0 0 0 0 0 0 0
MatGetRedundant 1 1.0 4.0007e-04 1.0 0.00e+00 0.0 6.0e+00 2.3e+04 4.0e+00 0 0 0 0 0 0 0 0 0 0 0
KSPSetUp 2 1.0 9.5367e-07 0.0 0.00e+00 0.0 0.0e+00 0.0e+00 0.0e+00 0 0 0 0 0 0 0 0 0 0 0
KSPSolve 7621 1.0 3.2546e-01 1.0 5.60e+08 1.0 1.5e+04 4.1e+03 0.0e+00 31 47 20 74 0 31 47 20 74 0 3442
PCSetUp 1 1.0 3.4831e-03 1.0 9.51e+05 1.0 1.8e+01 9.2e+03 2.7e+01 0 0 0 0 0 0 0 0 0 0 546
PCApply 7621 1.0 3.1310e-01 1.0 5.60e+08 1.0 1.5e+04 4.1e+03 0.0e+00 30 47 20 74 0 30 47 20 74 0 3578
------------------------------------------------------------------------------------------------------------------------
Memory usage is given in bytes:
Object Type Creations Destructions Memory Descendants' Mem.
Reports information only for process 0.
--- Event Stage 0: Main Stage
Container 3 3 1644 0
Spectral Transform 1 1 736 0
Eigenproblem Solver 1 1 1680 0
Inner product 1 1 624 0
Direct solver 1 1 9408 0
Vector 42 42 137456 0
Vector Scatter 7 7 7252 0
Index Set 23 23 34912 0
IS L to G Mapping 1 1 564 0
Matrix 16 16 1297232 0
PetscRandom 1 1 608 0
Krylov Solver 2 2 2144 0
Preconditioner 2 2 1800 0
Viewer 1 0 0 0
========================================================================================================================
Average time to get PetscTime(): 0
Average time for MPI_Barrier(): 1.19209e-06
Average time for zero size MPI_Send(): 1.14441e-05
#PETSc Option Table entries:
-ksp_right_pc
-log_summary
-mesh_name drum2
-n_evals 5
-pc_type bjacobi
-plotting_index 2
-sub_pc_factor_levels 4
-sub_pc_factor_zeropivot 0
-sub_pc_type ilu
#End of PETSc Option Table entries
Compiled without FORTRAN kernels
Compiled with full precision matrices (default)
sizeof(short) 2 sizeof(int) 4 sizeof(long) 8 sizeof(void*) 8 sizeof(PetscScalar) 8 sizeof(PetscInt) 4
Configure run at: Thu Nov 8 11:21:02 2012
Configure options: --with-debugging=false --COPTFLAGS=-O3 --CXXOPTFLAGS=-O3 --FOPTFLAGS=-O3 --with-clanguage=C++ --with-shared-libraries=1 --with-mpi-dir=/opt/apps/ossw/libraries/mpich2/mpich2-1.4.1p1/sl6/intel-12.1 --with-mumps=true --download-mumps=1 --with-metis=true --download-metis=1 --with-parmetis=true --download-parmetis=1 --with-superlu=true --download-superlu=1 --with-superludir=true --download-superlu_dist=1 --with-blacs=true --download-blacs=1 --with-scalapack=true --download-scalapack=1 --with-hypre=true --download-hypre=1 --with-blas-lib="[/opt/apps/sysnet/intel/12.1/mkl/10.3.12.361/lib/intel64/libmkl_intel_lp64.so,/opt/apps/sysnet/intel/12.1/mkl/10.3.12.361/lib/intel64/libmkl_sequential.so,/opt/apps/sysnet/intel/12.1/mkl/10.3.12.361/lib/intel64/libmkl_core.so]" --with-lapack-lib="[/opt/apps/sysnet/intel/12.1/mkl/10.3.12.361/lib/intel64/libmkl_lapack95_lp64.a]"
-----------------------------------------
Libraries compiled on Thu Nov 8 11:21:02 2012 on daedalus.ices.utexas.edu
Machine characteristics: Linux-2.6.32-279.1.1.el6.x86_64-x86_64-with-redhat-6.3-Carbon
Using PETSc directory: /opt/apps/ossw/libraries/petsc/petsc-3.3-p2
Using PETSc arch: intel-12.1-mkl-intel-10.3.12.361-mpich2-1.4.1p1-cxx-opt
-----------------------------------------
Using C compiler: /opt/apps/ossw/libraries/mpich2/mpich2-1.4.1p1/sl6/intel-12.1/bin/mpicxx -wd1572 -O3 -fPIC ${COPTFLAGS} ${CFLAGS}
Using Fortran compiler: /opt/apps/ossw/libraries/mpich2/mpich2-1.4.1p1/sl6/intel-12.1/bin/mpif90 -fPIC -O3 ${FOPTFLAGS} ${FFLAGS}
-----------------------------------------
Using include paths: -I/opt/apps/ossw/libraries/petsc/petsc-3.3-p2/intel-12.1-mkl-intel-10.3.12.361-mpich2-1.4.1p1-cxx-opt/include -I/opt/apps/ossw/libraries/petsc/petsc-3.3-p2/include -I/opt/apps/ossw/libraries/petsc/petsc-3.3-p2/include -I/opt/apps/ossw/libraries/petsc/petsc-3.3-p2/intel-12.1-mkl-intel-10.3.12.361-mpich2-1.4.1p1-cxx-opt/include -I/opt/apps/ossw/libraries/mpich2/mpich2-1.4.1p1/sl6/intel-12.1/include
-----------------------------------------
Using C linker: /opt/apps/ossw/libraries/mpich2/mpich2-1.4.1p1/sl6/intel-12.1/bin/mpicxx
Using Fortran linker: /opt/apps/ossw/libraries/mpich2/mpich2-1.4.1p1/sl6/intel-12.1/bin/mpif90
Using libraries: -Wl,-rpath,/opt/apps/ossw/libraries/petsc/petsc-3.3-p2/intel-12.1-mkl-intel-10.3.12.361-mpich2-1.4.1p1-cxx-opt/lib -L/opt/apps/ossw/libraries/petsc/petsc-3.3-p2/intel-12.1-mkl-intel-10.3.12.361-mpich2-1.4.1p1-cxx-opt/lib -lpetsc -lX11 -Wl,-rpath,/opt/apps/ossw/libraries/petsc/petsc-3.3-p2/intel-12.1-mkl-intel-10.3.12.361-mpich2-1.4.1p1-cxx-opt/lib -L/opt/apps/ossw/libraries/petsc/petsc-3.3-p2/intel-12.1-mkl-intel-10.3.12.361-mpich2-1.4.1p1-cxx-opt/lib -lcmumps -ldmumps -lsmumps -lzmumps -lmumps_common -lpord -lHYPRE -lpthread -lsuperlu_dist_3.0 -lparmetis -lmetis -lscalapack -lblacs -lsuperlu_4.3 -Wl,-rpath,/opt/apps/sysnet/intel/12.1/mkl/10.3.12.361/lib/intel64 -L/opt/apps/sysnet/intel/12.1/mkl/10.3.12.361/lib/intel64 -lmkl_lapack95_lp64 -lmkl_intel_lp64 -lmkl_sequential -lmkl_core -Wl,-rpath,/opt/apps/ossw/libraries/mpich2/mpich2-1.4.1p1/sl6/intel-12.1/lib -L/opt/apps/ossw/libraries/mpich2/mpich2-1.4.1p1/sl6/intel-12.1/lib -Wl,-rpath,/opt/apps/sysnet/intel/12.1/composer_xe_2011_sp1.7.256/compiler/lib/intel64 -L/opt/apps/sysnet/intel/12.1/composer_xe_2011_sp1.7.256/compiler/lib/intel64 -Wl,-rpath,/usr/lib/gcc/x86_64-redhat-linux/4.4.6 -L/usr/lib/gcc/x86_64-redhat-linux/4.4.6 -lmpichf90 -lifport -lifcore -lm -lm -lmpichcxx -ldl -lmpich -lopa -lmpl -lrt -lpthread -limf -lsvml -lipgo -ldecimal -lcilkrts -lstdc++ -lgcc_s -lirc -lirc_s -ldl
-----------------------------------------
----------------------------------------------------------------------------------------------------------------------
| Processor id: 0 |
| Num Processors: 2 |
| Time: Tue Feb 5 13:40:48 2013 |
| OS: Linux |
| HostName: hbar.ices.utexas.edu |
| OS Release: 2.6.32-279.1.1.el6.x86_64 |
| OS Version: #1 SMP Tue Jul 10 11:24:23 CDT 2012 |
| Machine: x86_64 |
| Username: benkirk |
| Configuration: ./configure '--enable-everything' |
| '--prefix=/workspace/libmesh/install' |
| 'CXX=mpicxx' |
| 'CC=mpicc' |
| 'F77=mpif77' |
| 'FC=mpif90' |
| 'PETSC_DIR=/opt/apps/ossw/libraries/petsc/petsc-3.3-p2' |
| 'PETSC_ARCH=intel-12.1-mkl-intel-10.3.12.361-mpich2-1.4.1p1-cxx-opt' |
| 'SLEPC_DIR=/opt/apps/ossw/libraries/slepc/slepc-3.3-p2-petsc-3.3-p2-cxx-opt' |
| 'TRILINOS_DIR=/opt/apps/ossw/libraries/trilinos/trilinos-10.12.2/sl6/intel-12.1/mpich2-1.4.1p1/mkl-intel-10.3.12.361'|
| 'VTK_DIR=/opt/apps/ossw/libraries/vtk/vtk-5.10.0/sl6/intel-12.1' |
----------------------------------------------------------------------------------------------------------------------
----------------------------------------------------------------------------------------------------------------
| libMesh Performance: Alive time=1.06731, Active time=1.03784 |
----------------------------------------------------------------------------------------------------------------
| Event nCalls Total Time Avg Time Total Time Avg Time % of Active Time |
| w/o Sub w/o Sub With Sub With Sub w/o S With S |
|----------------------------------------------------------------------------------------------------------------|
| |
| |
| CondensedEigenSystem |
| get_eigenpair() 5 0.0011 0.000224 0.0012 0.000238 0.11 0.11 |
| solve() 1 0.0024 0.002404 0.8520 0.851968 0.23 82.09 |
| |
| DofMap |
| add_neighbors_to_send_list() 1 0.0164 0.016401 0.0197 0.019665 1.58 1.89 |
| build_sparsity() 1 0.0112 0.011167 0.0305 0.030466 1.08 2.94 |
| create_dof_constraints() 1 0.0008 0.000815 0.0008 0.000815 0.08 0.08 |
| distribute_dofs() 1 0.0177 0.017727 0.0445 0.044543 1.71 4.29 |
| dof_indices() 1152 0.0635 0.000055 0.0635 0.000055 6.11 6.11 |
| prepare_send_list() 1 0.0001 0.000067 0.0001 0.000067 0.01 0.01 |
| reinit() 1 0.0262 0.026217 0.0262 0.026217 2.53 2.53 |
| |
| EquationSystems |
| build_solution_vector() 1 0.0013 0.001270 0.0121 0.012144 0.12 1.17 |
| |
| ExodusII_IO |
| write_nodal_data() 1 0.0030 0.002957 0.0030 0.002957 0.28 0.28 |
| |
| FE |
| compute_shape_functions() 276 0.0034 0.000012 0.0034 0.000012 0.33 0.33 |
| init_shape_functions() 1 0.0001 0.000126 0.0001 0.000126 0.01 0.01 |
| |
| FEMap |
| compute_affine_map() 276 0.0021 0.000008 0.0021 0.000008 0.21 0.21 |
| init_reference_to_physical_map() 1 0.0001 0.000085 0.0001 0.000085 0.01 0.01 |
| |
| Mesh |
| find_neighbors() 1 0.0083 0.008294 0.0084 0.008367 0.80 0.81 |
| read() 1 0.0057 0.005747 0.0057 0.005747 0.55 0.55 |
| renumber_nodes_and_elem() 2 0.0006 0.000317 0.0006 0.000317 0.06 0.06 |
| |
| MeshCommunication |
| broadcast() 1 0.0078 0.007792 0.0141 0.014133 0.75 1.36 |
| compute_hilbert_indices() 2 0.0084 0.004216 0.0084 0.004216 0.81 0.81 |
| find_global_indices() 2 0.0034 0.001711 0.0135 0.006758 0.33 1.30 |
| parallel_sort() 2 0.0012 0.000590 0.0013 0.000651 0.11 0.13 |
| |
| MeshOutput |
| write_equation_systems() 1 0.0001 0.000093 0.0153 0.015283 0.01 1.47 |
| |
| MetisPartitioner |
| partition() 1 0.0090 0.009046 0.0154 0.015431 0.87 1.49 |
| |
| Parallel |
| allgather() 9 0.0005 0.000056 0.0005 0.000058 0.05 0.05 |
| broadcast() 6 0.0001 0.000011 0.0001 0.000011 0.01 0.01 |
| max(bool) 2 0.0000 0.000004 0.0000 0.000004 0.00 0.00 |
| max(scalar) 120 0.0004 0.000003 0.0004 0.000003 0.04 0.04 |
| max(vector) 27 0.0002 0.000009 0.0005 0.000018 0.02 0.05 |
| min(bool) 138 0.0004 0.000003 0.0004 0.000003 0.04 0.04 |
| min(scalar) 113 0.0070 0.000062 0.0070 0.000062 0.67 0.67 |
| min(vector) 27 0.0003 0.000011 0.0006 0.000020 0.03 0.05 |
| probe() 12 0.0003 0.000026 0.0003 0.000026 0.03 0.03 |
| receive() 12 0.0001 0.000009 0.0004 0.000035 0.01 0.04 |
| send() 12 0.0001 0.000005 0.0001 0.000005 0.01 0.01 |
| send_receive() 16 0.0003 0.000016 0.0008 0.000050 0.03 0.08 |
| sum() 25 0.0002 0.000009 0.0004 0.000015 0.02 0.04 |
| |
| Parallel::Request |
| wait() 12 0.0001 0.000004 0.0001 0.000004 0.00 0.00 |
| |
| Partitioner |
| set_node_processor_ids() 1 0.0013 0.001346 0.0015 0.001452 0.13 0.14 |
| set_parent_processor_ids() 1 0.0009 0.000940 0.0009 0.000940 0.09 0.09 |
| |
| SlepcEigenSolver |
| solve_generalized() 1 0.8236 0.823630 0.8236 0.823630 79.36 79.36 |
| |
| System |
| assemble() 1 0.0080 0.008037 0.0259 0.025934 0.77 2.50 |
----------------------------------------------------------------------------------------------------------------
| Totals: 2268 1.0378 100.00 |
----------------------------------------------------------------------------------------------------------------
***************************************************************
* Done Running Example eigenproblems_ex3:
* mpirun -np 2 example-devel -n_evals 5 -mesh_name drum2 -plotting_index 2 -pc_type bjacobi -sub_pc_type ilu -sub_pc_factor_levels 4 -sub_pc_factor_zeropivot 0 -ksp_right_pc -log_summary
***************************************************************