The source file miscellaneous_ex4.C with comments:
#include <iostream>
#include <algorithm>
#include <cstdlib> // *must* precede <cmath> for proper std:abs() on PGI, Sun Studio CC
#include <cmath>
Basic include file needed for the mesh functionality.
#include "libmesh/libmesh.h"
#include "libmesh/mesh.h"
#include "libmesh/mesh_refinement.h"
#include "libmesh/vtk_io.h"
#include "libmesh/equation_systems.h"
#include "libmesh/fe.h"
#include "libmesh/quadrature_gauss.h"
#include "libmesh/dof_map.h"
#include "libmesh/sparse_matrix.h"
#include "libmesh/numeric_vector.h"
#include "libmesh/dense_matrix.h"
#include "libmesh/dense_vector.h"
#include "libmesh/mesh_generation.h"
#include "libmesh/sum_shell_matrix.h"
#include "libmesh/tensor_shell_matrix.h"
#include "libmesh/sparse_shell_matrix.h"
#include "libmesh/mesh_refinement.h"
#include "libmesh/getpot.h"
This example will solve a linear transient system,
so we need to include the \p TransientLinearImplicitSystem definition.
#include "libmesh/transient_system.h"
#include "libmesh/linear_implicit_system.h"
#include "libmesh/vector_value.h"
The definition of a geometric element
#include "libmesh/elem.h"
Bring in everything from the libMesh namespace
using namespace libMesh;
Function prototype. This function will assemble the system matrix
and right-hand-side.
void assemble (EquationSystems& es,
const std::string& system_name);
Begin the main program. Note that the first
statement in the program throws an error if
you are in complex number mode, since this
example is only intended to work with real
numbers.
int main (int argc, char** argv)
{
Initialize libMesh.
LibMeshInit init (argc, argv);
#if !defined(LIBMESH_ENABLE_AMR)
libmesh_example_assert(false, "--enable-amr");
#else
libmesh_example_assert(libMesh::default_solver_package() == PETSC_SOLVERS, "--enable-petsc");
Brief message to the user regarding the program name
and command line arguments.
std::cout << "Running: " << argv[0];
for (int i=1; i<argc; i++)
std::cout << " " << argv[i];
std::cout << std::endl << std::endl;
Skip this 2D example if libMesh was compiled as 1D-only.
libmesh_example_assert(2 <= LIBMESH_DIM, "2D support");
Create a mesh
Mesh mesh;
Create an equation systems object.
EquationSystems equation_systems (mesh);
MeshTools::Generation::build_square (mesh,
16,
16,
-1., 1.,
-1., 1.,
QUAD4);
LinearImplicitSystem & system =
equation_systems.add_system<LinearImplicitSystem>
("System");
Adds the variable "u" to "System". "u"
will be approximated using first-order approximation.
system.add_variable ("u", FIRST);
Also, we need to add two vectors. The tensor matrix v*w^T of
these two vectors will be part of the system matrix.
system.add_vector("v",false);
system.add_vector("w",false);
We need an additional matrix to be used for preconditioning since
a shell matrix is not suitable for that.
system.add_matrix("Preconditioner");
Give the system a pointer to the matrix assembly function.
system.attach_assemble_function (assemble);
Initialize the data structures for the equation system.
equation_systems.init ();
Prints information about the system to the screen.
equation_systems.print_info();
equation_systems.parameters.set<unsigned int>
("linear solver maximum iterations") = 250;
equation_systems.parameters.set<Real>
("linear solver tolerance") = TOLERANCE;
Refine arbitrarily some elements.
for(unsigned int i=0; i<2; i++)
{
MeshRefinement mesh_refinement(mesh);
MeshBase::element_iterator elem_it = mesh.elements_begin();
const MeshBase::element_iterator elem_end = mesh.elements_end();
for (; elem_it != elem_end; ++elem_it)
{
Elem* elem = *elem_it;
if(elem->active())
{
if((elem->id()%20)>8)
{
elem->set_refinement_flag(Elem::REFINE);
}
else
{
elem->set_refinement_flag(Elem::DO_NOTHING);
}
}
else
{
elem->set_refinement_flag(Elem::INACTIVE);
}
}
mesh_refinement.refine_elements();
equation_systems.reinit();
}
Prints information about the system to the screen.
equation_systems.print_info();
Before the assemblation of the matrix, we have to clear the two
vectors that form the tensor matrix (since this is not performed
automatically).
system.get_vector("v").init(system.n_dofs(), system.n_local_dofs());
system.get_vector("w").init(system.n_dofs(), system.n_local_dofs());
We need a shell matrix to solve. There is currently no way to
store the shell matrix in the system. We just create it locally
here (a shell matrix does not occupy much memory).
SumShellMatrix<Number> shellMatrix;
TensorShellMatrix<Number> shellMatrix0(system.get_vector("v"),system.get_vector("w"));
shellMatrix.matrices.push_back(&shellMatrix0);
SparseShellMatrix<Number> shellMatrix1(*system.matrix);
shellMatrix.matrices.push_back(&shellMatrix1);
Attach that to the system.
system.attach_shell_matrix(&shellMatrix);
Reset the preconditioning matrix to zero (for the system matrix,
the same thing is done automatically).
system.get_matrix("Preconditioner").zero();
Assemble & solve the linear system
system.solve();
Detach the shell matrix from the system since it will go out of
scope. Nobody should solve the system outside this function.
system.detach_shell_matrix();
Print a nice message.
std::cout << "Solved linear system in " << system.n_linear_iterations() << " iterations, residual norm is " << system.final_linear_residual() << "." << std::endl;
#if defined(LIBMESH_HAVE_VTK) && !defined(LIBMESH_ENABLE_PARMESH)
Write result to file.
VTKIO(mesh).write_equation_systems ("out.pvtu", equation_systems);
#endif // #ifdef LIBMESH_HAVE_VTK
#endif // #ifndef LIBMESH_ENABLE_AMR
return 0;
}
This function defines the assembly routine. It is responsible for
computing the proper matrix entries for the element stiffness
matrices and right-hand sides.
void assemble (EquationSystems& es,
const std::string& system_name)
{
#ifdef LIBMESH_ENABLE_AMR
It is a good idea to make sure we are assembling
the proper system.
libmesh_assert_equal_to (system_name, "System");
Get a constant reference to the mesh object.
const MeshBase& mesh = es.get_mesh();
The dimension that we are running
const unsigned int dim = mesh.mesh_dimension();
Get a reference to the Convection-Diffusion system object.
LinearImplicitSystem & system =
es.get_system<LinearImplicitSystem> ("System");
Get the Finite Element type for the first (and only)
variable in the system.
FEType fe_type = system.variable_type(0);
Build a Finite Element object of the specified type. Since the
\p FEBase::build() member dynamically creates memory we will
store the object as an \p AutoPtr. This can be thought
of as a pointer that will clean up after itself.
AutoPtr<FEBase> fe (FEBase::build(dim, fe_type));
AutoPtr<FEBase> fe_face (FEBase::build(dim, fe_type));
A Gauss quadrature rule for numerical integration.
Let the \p FEType object decide what order rule is appropriate.
QGauss qrule (dim, fe_type.default_quadrature_order());
QGauss qface (dim-1, fe_type.default_quadrature_order());
Tell the finite element object to use our quadrature rule.
fe->attach_quadrature_rule (&qrule);
fe_face->attach_quadrature_rule (&qface);
Here we define some references to cell-specific data that
will be used to assemble the linear system. We will start
with the element Jacobian * quadrature weight at each integration point.
const std::vector<Real>& JxW = fe->get_JxW();
const std::vector<Real>& JxW_face = fe_face->get_JxW();
The element shape functions evaluated at the quadrature points.
const std::vector<std::vector<Real> >& phi = fe->get_phi();
const std::vector<std::vector<Real> >& psi = fe_face->get_phi();
The element shape function gradients evaluated at the quadrature
points.
const std::vector<std::vector<RealGradient> >& dphi = fe->get_dphi();
The XY locations of the quadrature points used for face integration
const std::vector& qface_points = fe_face->get_xyz();
A reference to the \p DofMap object for this system. The \p DofMap object handles the index translation from node and element numbers to degree of freedom numbers. We will talk more about the \p DofMap in future examples.
A reference to the \p DofMap object for this system. The \p DofMap object handles the index translation from node and element numbers to degree of freedom numbers. We will talk more about the \p DofMap in future examples.
const DofMap& dof_map = system.get_dof_map();
Define data structures to contain the element matrix
and right-hand-side vector contribution. Following
basic finite element terminology we will denote these
"Ke" and "Fe".
DenseMatrix<Number> Ke;
DenseVector<Number> Fe;
Analogous data structures for thw two vectors v and w that form
the tensor shell matrix.
DenseVector<Number> Ve;
DenseVector<Number> We;
This vector will hold the degree of freedom indices for
the element. These define where in the global system
the element degrees of freedom get mapped.
std::vector<dof_id_type> dof_indices;
Now we will loop over all the elements in the mesh that
live on the local processor. We will compute the element
matrix and right-hand-side contribution. Since the mesh
will be refined we want to only consider the ACTIVE elements,
hence we use a variant of the \p active_elem_iterator.
MeshBase::const_element_iterator el = mesh.active_local_elements_begin();
const MeshBase::const_element_iterator end_el = mesh.active_local_elements_end();
for ( ; el != end_el; ++el)
{
Store a pointer to the element we are currently
working on. This allows for nicer syntax later.
const Elem* elem = *el;
Get the degree of freedom indices for the
current element. These define where in the global
matrix and right-hand-side this element will
contribute to.
dof_map.dof_indices (elem, dof_indices);
Compute the element-specific data for the current
element. This involves computing the location of the
quadrature points (q_point) and the shape functions
(phi, dphi) for the current element.
fe->reinit (elem);
Zero the element matrix and right-hand side before
summing them. We use the resize member here because
the number of degrees of freedom might have changed from
the last element. Note that this will be the case if the
element type is different (i.e. the last element was a
triangle, now we are on a quadrilateral).
Ke.resize (dof_indices.size(),
dof_indices.size());
Fe.resize (dof_indices.size());
Ve.resize (dof_indices.size());
We.resize (dof_indices.size());
Now we will build the element matrix and right-hand-side.
Constructing the RHS requires the solution and its
gradient from the previous timestep. This myst be
calculated at each quadrature point by summing the
solution degree-of-freedom values by the appropriate
weight functions.
for (unsigned int qp=0; qp<qrule.n_points(); qp++)
{
Now compute the element matrix and RHS contributions.
for (unsigned int i=0; i<phi.size(); i++)
{
The RHS contribution
Fe(i) += JxW[qp]*(
phi[i][qp]
);
for (unsigned int j=0; j<phi.size(); j++)
{
The matrix contribution
Ke(i,j) += JxW[qp]*(
Stiffness matrix
(dphi[i][qp]*dphi[j][qp])
);
}
V and W are the same for this example.
Ve(i) += JxW[qp]*(
phi[i][qp]
);
We(i) += JxW[qp]*(
phi[i][qp]
);
}
}
At this point the interior element integration has
been completed. However, we have not yet addressed
boundary conditions. For this example we will only
consider simple Dirichlet boundary conditions imposed
via the penalty method.
The following loops over the sides of the element. If the element has no neighbor on a side then that side MUST live on a boundary of the domain.
The following loops over the sides of the element. If the element has no neighbor on a side then that side MUST live on a boundary of the domain.
{
The penalty value.
const Real penalty = 1.e10;
The following loops over the sides of the element.
If the element has no neighbor on a side then that
side MUST live on a boundary of the domain.
for (unsigned int s=0; s<elem->n_sides(); s++)
if (elem->neighbor(s) == NULL)
{
fe_face->reinit(elem,s);
for (unsigned int qp=0; qp<qface.n_points(); qp++)
{
Matrix contribution
for (unsigned int i=0; i<psi.size(); i++)
for (unsigned int j=0; j<psi.size(); j++)
Ke(i,j) += penalty*JxW_face[qp]*psi[i][qp]*psi[j][qp];
}
}
}
We have now built the element matrix and RHS vector in terms
of the element degrees of freedom. However, it is possible
that some of the element DOFs are constrained to enforce
solution continuity, i.e. they are not really "free". We need
to constrain those DOFs in terms of non-constrained DOFs to
ensure a continuous solution. The
\p DofMap::constrain_element_matrix_and_vector() method does
just that.
However, constraining both the sparse matrix (and right hand side) plus the rank 1 matrix is tricky. The dof_indices vector has to be backuped for that because the constraining functions modify it.
However, constraining both the sparse matrix (and right hand side) plus the rank 1 matrix is tricky. The dof_indices vector has to be backuped for that because the constraining functions modify it.
std::vector<dof_id_type> dof_indices_backup(dof_indices);
dof_map.constrain_element_matrix_and_vector (Ke, Fe, dof_indices);
dof_indices = dof_indices_backup;
dof_map.constrain_element_dyad_matrix(Ve,We,dof_indices);
The element matrix and right-hand-side are now built
for this element. Add them to the global matrix and
right-hand-side vector. The \p SparseMatrix::add_matrix()
and \p NumericVector::add_vector() members do this for us.
system.matrix->add_matrix (Ke, dof_indices);
system.get_matrix("Preconditioner").add_matrix (Ke, dof_indices);
system.rhs->add_vector (Fe, dof_indices);
system.get_vector("v").add_vector(Ve,dof_indices);
system.get_vector("w").add_vector(We,dof_indices);
}
Finished computing the sytem matrix and right-hand side.
Matrices and vectors must be closed manually. This is necessary because the matrix is not directly used as the system matrix (in which case the solver closes it) but as a part of a shell matrix.
Matrices and vectors must be closed manually. This is necessary because the matrix is not directly used as the system matrix (in which case the solver closes it) but as a part of a shell matrix.
system.matrix->close();
system.get_matrix("Preconditioner").close();
system.rhs->close();
system.get_vector("v").close();
system.get_vector("w").close();
#endif // #ifdef LIBMESH_ENABLE_AMR
}
The source file miscellaneous_ex4.C without comments:
#include <iostream>
#include <algorithm>
#include <cstdlib> // *must* precede <cmath> for proper std:abs() on PGI, Sun Studio CC
#include <cmath>
#include "libmesh/libmesh.h"
#include "libmesh/mesh.h"
#include "libmesh/mesh_refinement.h"
#include "libmesh/vtk_io.h"
#include "libmesh/equation_systems.h"
#include "libmesh/fe.h"
#include "libmesh/quadrature_gauss.h"
#include "libmesh/dof_map.h"
#include "libmesh/sparse_matrix.h"
#include "libmesh/numeric_vector.h"
#include "libmesh/dense_matrix.h"
#include "libmesh/dense_vector.h"
#include "libmesh/mesh_generation.h"
#include "libmesh/sum_shell_matrix.h"
#include "libmesh/tensor_shell_matrix.h"
#include "libmesh/sparse_shell_matrix.h"
#include "libmesh/mesh_refinement.h"
#include "libmesh/getpot.h"
#include "libmesh/transient_system.h"
#include "libmesh/linear_implicit_system.h"
#include "libmesh/vector_value.h"
#include "libmesh/elem.h"
using namespace libMesh;
void assemble (EquationSystems& es,
const std::string& system_name);
int main (int argc, char** argv)
{
LibMeshInit init (argc, argv);
#if !defined(LIBMESH_ENABLE_AMR)
libmesh_example_assert(false, "--enable-amr");
#else
libmesh_example_assert(libMesh::default_solver_package() == PETSC_SOLVERS, "--enable-petsc");
std::cout << "Running: " << argv[0];
for (int i=1; i<argc; i++)
std::cout << " " << argv[i];
std::cout << std::endl << std::endl;
libmesh_example_assert(2 <= LIBMESH_DIM, "2D support");
Mesh mesh;
EquationSystems equation_systems (mesh);
MeshTools::Generation::build_square (mesh,
16,
16,
-1., 1.,
-1., 1.,
QUAD4);
LinearImplicitSystem & system =
equation_systems.add_system<LinearImplicitSystem>
("System");
system.add_variable ("u", FIRST);
system.add_vector("v",false);
system.add_vector("w",false);
system.add_matrix("Preconditioner");
system.attach_assemble_function (assemble);
equation_systems.init ();
equation_systems.print_info();
equation_systems.parameters.set<unsigned int>
("linear solver maximum iterations") = 250;
equation_systems.parameters.set<Real>
("linear solver tolerance") = TOLERANCE;
for(unsigned int i=0; i<2; i++)
{
MeshRefinement mesh_refinement(mesh);
MeshBase::element_iterator elem_it = mesh.elements_begin();
const MeshBase::element_iterator elem_end = mesh.elements_end();
for (; elem_it != elem_end; ++elem_it)
{
Elem* elem = *elem_it;
if(elem->active())
{
if((elem->id()%20)>8)
{
elem->set_refinement_flag(Elem::REFINE);
}
else
{
elem->set_refinement_flag(Elem::DO_NOTHING);
}
}
else
{
elem->set_refinement_flag(Elem::INACTIVE);
}
}
mesh_refinement.refine_elements();
equation_systems.reinit();
}
equation_systems.print_info();
system.get_vector("v").init(system.n_dofs(), system.n_local_dofs());
system.get_vector("w").init(system.n_dofs(), system.n_local_dofs());
SumShellMatrix<Number> shellMatrix;
TensorShellMatrix<Number> shellMatrix0(system.get_vector("v"),system.get_vector("w"));
shellMatrix.matrices.push_back(&shellMatrix0);
SparseShellMatrix<Number> shellMatrix1(*system.matrix);
shellMatrix.matrices.push_back(&shellMatrix1);
system.attach_shell_matrix(&shellMatrix);
system.get_matrix("Preconditioner").zero();
system.solve();
system.detach_shell_matrix();
std::cout << "Solved linear system in " << system.n_linear_iterations() << " iterations, residual norm is " << system.final_linear_residual() << "." << std::endl;
#if defined(LIBMESH_HAVE_VTK) && !defined(LIBMESH_ENABLE_PARMESH)
VTKIO(mesh).write_equation_systems ("out.pvtu", equation_systems);
#endif // #ifdef LIBMESH_HAVE_VTK
#endif // #ifndef LIBMESH_ENABLE_AMR
return 0;
}
void assemble (EquationSystems& es,
const std::string& system_name)
{
#ifdef LIBMESH_ENABLE_AMR
libmesh_assert_equal_to (system_name, "System");
const MeshBase& mesh = es.get_mesh();
const unsigned int dim = mesh.mesh_dimension();
LinearImplicitSystem & system =
es.get_system<LinearImplicitSystem> ("System");
FEType fe_type = system.variable_type(0);
AutoPtr<FEBase> fe (FEBase::build(dim, fe_type));
AutoPtr<FEBase> fe_face (FEBase::build(dim, fe_type));
QGauss qrule (dim, fe_type.default_quadrature_order());
QGauss qface (dim-1, fe_type.default_quadrature_order());
fe->attach_quadrature_rule (&qrule);
fe_face->attach_quadrature_rule (&qface);
const std::vector<Real>& JxW = fe->get_JxW();
const std::vector<Real>& JxW_face = fe_face->get_JxW();
const std::vector<std::vector<Real> >& phi = fe->get_phi();
const std::vector<std::vector<Real> >& psi = fe_face->get_phi();
const std::vector<std::vector<RealGradient> >& dphi = fe->get_dphi();
const DofMap& dof_map = system.get_dof_map();
DenseMatrix<Number> Ke;
DenseVector<Number> Fe;
DenseVector<Number> Ve;
DenseVector<Number> We;
std::vector<dof_id_type> dof_indices;
MeshBase::const_element_iterator el = mesh.active_local_elements_begin();
const MeshBase::const_element_iterator end_el = mesh.active_local_elements_end();
for ( ; el != end_el; ++el)
{
const Elem* elem = *el;
dof_map.dof_indices (elem, dof_indices);
fe->reinit (elem);
Ke.resize (dof_indices.size(),
dof_indices.size());
Fe.resize (dof_indices.size());
Ve.resize (dof_indices.size());
We.resize (dof_indices.size());
for (unsigned int qp=0; qp<qrule.n_points(); qp++)
{
for (unsigned int i=0; i<phi.size(); i++)
{
Fe(i) += JxW[qp]*(
phi[i][qp]
);
for (unsigned int j=0; j<phi.size(); j++)
{
Ke(i,j) += JxW[qp]*(
(dphi[i][qp]*dphi[j][qp])
);
}
Ve(i) += JxW[qp]*(
phi[i][qp]
);
We(i) += JxW[qp]*(
phi[i][qp]
);
}
}
{
const Real penalty = 1.e10;
for (unsigned int s=0; s<elem->n_sides(); s++)
if (elem->neighbor(s) == NULL)
{
fe_face->reinit(elem,s);
for (unsigned int qp=0; qp<qface.n_points(); qp++)
{
for (unsigned int i=0; i<psi.size(); i++)
for (unsigned int j=0; j<psi.size(); j++)
Ke(i,j) += penalty*JxW_face[qp]*psi[i][qp]*psi[j][qp];
}
}
}
std::vector<dof_id_type> dof_indices_backup(dof_indices);
dof_map.constrain_element_matrix_and_vector (Ke, Fe, dof_indices);
dof_indices = dof_indices_backup;
dof_map.constrain_element_dyad_matrix(Ve,We,dof_indices);
system.matrix->add_matrix (Ke, dof_indices);
system.get_matrix("Preconditioner").add_matrix (Ke, dof_indices);
system.rhs->add_vector (Fe, dof_indices);
system.get_vector("v").add_vector(Ve,dof_indices);
system.get_vector("w").add_vector(We,dof_indices);
}
system.matrix->close();
system.get_matrix("Preconditioner").close();
system.rhs->close();
system.get_vector("v").close();
system.get_vector("w").close();
#endif // #ifdef LIBMESH_ENABLE_AMR
}
The console output of the program:
***************************************************************
* Running Example miscellaneous_ex4:
* mpirun -np 2 example-devel -pc_type bjacobi -sub_pc_type ilu -sub_pc_factor_levels 4 -sub_pc_factor_zeropivot 0 -ksp_right_pc -log_summary
***************************************************************
Running: /workspace/libmesh/examples/miscellaneous/miscellaneous_ex4/.libs/lt-example-devel -pc_type bjacobi -sub_pc_type ilu -sub_pc_factor_levels 4 -sub_pc_factor_zeropivot 0 -ksp_right_pc -log_summary
EquationSystems
n_systems()=1
System #0, "System"
Type "LinearImplicit"
Variables="u"
Finite Element Types="LAGRANGE", "JACOBI_20_00"
Infinite Element Mapping="CARTESIAN"
Approximation Orders="FIRST", "THIRD"
n_dofs()=289
n_local_dofs()=154
n_constrained_dofs()=0
n_local_constrained_dofs()=0
n_vectors()=3
n_matrices()=2
DofMap Sparsity
Average On-Processor Bandwidth <= 8.14533
Average Off-Processor Bandwidth <= 0.352941
Maximum On-Processor Bandwidth <= 11
Maximum Off-Processor Bandwidth <= 4
DofMap Constraints
Number of DoF Constraints = 0
Number of Node Constraints = 0
EquationSystems
n_systems()=1
System #0, "System"
Type "LinearImplicit"
Variables="u"
Finite Element Types="LAGRANGE", "JACOBI_20_00"
Infinite Element Mapping="CARTESIAN"
Approximation Orders="FIRST", "THIRD"
n_dofs()=2093
n_local_dofs()=1065
n_constrained_dofs()=458
n_local_constrained_dofs()=230
n_vectors()=3
n_matrices()=2
DofMap Sparsity
Average On-Processor Bandwidth <= 9.50741
Average Off-Processor Bandwidth <= 0.128046
Maximum On-Processor Bandwidth <= 21
Maximum Off-Processor Bandwidth <= 7
DofMap Constraints
Number of DoF Constraints = 458
Average DoF Constraint Length= 2
Number of Node Constraints = 909
Maximum Node Constraint Length= 5
Average Node Constraint Length= 2.51155
Solved linear system in 20 iterations, residual norm is 8.65289e-07.
*** Warning, This code is untested, experimental, or likely to see future API changes: ../../../include/libmesh/vtk_io.h, line 178, compiled Feb 5 2013 at 13:35:53 ***
************************************************************************************************************************
*** WIDEN YOUR WINDOW TO 120 CHARACTERS. Use 'enscript -r -fCourier9' to print this document ***
************************************************************************************************************************
---------------------------------------------- PETSc Performance Summary: ----------------------------------------------
/workspace/libmesh/examples/miscellaneous/miscellaneous_ex4/.libs/lt-example-devel on a intel-12. named hbar.ices.utexas.edu with 2 processors, by benkirk Tue Feb 5 13:41:16 2013
Using Petsc Release Version 3.3.0, Patch 2, Fri Jul 13 15:42:00 CDT 2012
Max Max/Min Avg Total
Time (sec): 7.189e-01 1.00000 7.189e-01
Objects: 1.160e+02 1.00000 1.160e+02
Flops: 7.656e+06 1.01364 7.604e+06 1.521e+07
Flops/sec: 1.065e+07 1.01364 1.058e+07 2.116e+07
MPI Messages: 5.400e+01 1.00000 5.400e+01 1.080e+02
MPI Message Lengths: 4.127e+04 1.00000 7.643e+02 8.254e+04
MPI Reductions: 4.170e+02 1.00000
Flop counting convention: 1 flop = 1 real number operation of type (multiply/divide/add/subtract)
e.g., VecAXPY() for real vectors of length N --> 2N flops
and VecAXPY() for complex vectors of length N --> 8N flops
Summary of Stages: ----- Time ------ ----- Flops ----- --- Messages --- -- Message Lengths -- -- Reductions --
Avg %Total Avg %Total counts %Total Avg %Total counts %Total
0: Main Stage: 7.1883e-01 100.0% 1.5208e+07 100.0% 1.080e+02 100.0% 7.643e+02 100.0% 4.160e+02 99.8%
------------------------------------------------------------------------------------------------------------------------
See the 'Profiling' chapter of the users' manual for details on interpreting output.
Phase summary info:
Count: number of times phase was executed
Time and Flops: Max - maximum over all processors
Ratio - ratio of maximum to minimum over all processors
Mess: number of messages sent
Avg. len: average message length
Reduct: number of global reductions
Global: entire computation
Stage: stages of a computation. Set stages with PetscLogStagePush() and PetscLogStagePop().
%T - percent time in this phase %f - percent flops in this phase
%M - percent messages in this phase %L - percent message lengths in this phase
%R - percent reductions in this phase
Total Mflop/s: 10e-6 * (sum of flops over all processors)/(max time over all processors)
------------------------------------------------------------------------------------------------------------------------
Event Count Time (sec) Flops --- Global --- --- Stage --- Total
Max Ratio Max Ratio Max Ratio Mess Avg len Reduct %T %f %M %L %R %T %f %M %L %R Mflop/s
------------------------------------------------------------------------------------------------------------------------
--- Event Stage 0: Main Stage
VecDot 21 1.0 1.3399e-04 1.4 4.47e+04 1.0 0.0e+00 0.0e+00 2.1e+01 0 1 0 0 5 0 1 0 0 5 656
VecMDot 20 1.0 2.9850e-04 1.8 4.47e+05 1.0 0.0e+00 0.0e+00 2.0e+01 0 6 0 0 5 0 6 0 0 5 2944
VecNorm 22 1.0 6.4850e-05 1.1 4.69e+04 1.0 0.0e+00 0.0e+00 2.2e+01 0 1 0 0 5 0 1 0 0 5 1420
VecScale 21 1.0 2.5511e-05 1.1 2.24e+04 1.0 0.0e+00 0.0e+00 0.0e+00 0 0 0 0 0 0 0 0 0 0 1723
VecCopy 8 1.0 9.5367e-06 1.2 0.00e+00 0.0 0.0e+00 0.0e+00 0.0e+00 0 0 0 0 0 0 0 0 0 0 0
VecSet 74 1.0 4.4823e-05 1.3 0.00e+00 0.0 0.0e+00 0.0e+00 0.0e+00 0 0 0 0 0 0 0 0 0 0 0
VecAXPY 23 1.0 3.2187e-05 1.0 4.69e+04 1.0 0.0e+00 0.0e+00 0.0e+00 0 1 0 0 0 0 1 0 0 0 2861
VecMAXPY 21 1.0 1.3113e-04 1.1 4.90e+05 1.0 0.0e+00 0.0e+00 0.0e+00 0 6 0 0 0 0 6 0 0 0 7342
VecAssemblyBegin 66 1.0 3.0675e-03 7.9 0.00e+00 0.0 6.0e+00 5.5e+02 1.8e+02 0 0 6 4 43 0 0 6 4 43 0
VecAssemblyEnd 66 1.0 4.1723e-05 1.1 0.00e+00 0.0 0.0e+00 0.0e+00 0.0e+00 0 0 0 0 0 0 0 0 0 0 0
VecScatterBegin 30 1.0 1.4758e-04 1.0 0.00e+00 0.0 6.0e+01 8.4e+02 0.0e+00 0 0 56 61 0 0 0 56 61 0 0
VecScatterEnd 30 1.0 5.0306e-05 1.1 0.00e+00 0.0 0.0e+00 0.0e+00 0.0e+00 0 0 0 0 0 0 0 0 0 0 0
VecNormalize 21 1.0 1.1730e-04 1.0 6.71e+04 1.0 0.0e+00 0.0e+00 2.1e+01 0 1 0 0 5 0 1 0 0 5 1124
MatMult 21 1.0 1.8368e-03 1.2 5.17e+05 1.0 4.2e+01 4.2e+02 1.9e+02 0 7 39 21 45 0 7 39 21 45 552
MatMultAdd 21 1.0 3.6120e-04 1.1 4.30e+05 1.0 4.2e+01 4.2e+02 0.0e+00 0 6 39 21 0 0 6 39 21 0 2330
MatSolve 22 1.0 1.4317e-03 1.1 3.33e+06 1.0 0.0e+00 0.0e+00 0.0e+00 0 43 0 0 0 0 43 0 0 0 4577
MatLUFactorNum 1 1.0 2.3191e-03 1.1 2.85e+06 1.0 0.0e+00 0.0e+00 0.0e+00 0 37 0 0 0 0 37 0 0 0 2439
MatILUFactorSym 1 1.0 5.2722e-03 1.1 0.00e+00 0.0 0.0e+00 0.0e+00 3.0e+00 1 0 0 0 1 1 0 0 0 1 0
MatAssemblyBegin 23 1.0 5.1284e-04 3.2 0.00e+00 0.0 6.0e+00 2.1e+03 4.6e+01 0 0 6 16 11 0 0 6 16 11 0
MatAssemblyEnd 23 1.0 7.2908e-04 1.0 0.00e+00 0.0 8.0e+00 1.1e+02 1.6e+01 0 0 7 1 4 0 0 7 1 4 0
MatGetRowIJ 1 1.0 1.1921e-06 1.0 0.00e+00 0.0 0.0e+00 0.0e+00 0.0e+00 0 0 0 0 0 0 0 0 0 0 0
MatGetOrdering 1 1.0 4.8876e-05 1.0 0.00e+00 0.0 0.0e+00 0.0e+00 2.0e+00 0 0 0 0 0 0 0 0 0 0 0
MatZeroEntries 14 1.0 7.9393e-05 1.2 0.00e+00 0.0 0.0e+00 0.0e+00 0.0e+00 0 0 0 0 0 0 0 0 0 0 0
KSPGMRESOrthog 20 1.0 4.4966e-04 1.4 8.94e+05 1.0 0.0e+00 0.0e+00 2.0e+01 0 12 0 0 5 0 12 0 0 5 3909
KSPSetUp 2 1.0 4.0054e-05 1.0 0.00e+00 0.0 0.0e+00 0.0e+00 0.0e+00 0 0 0 0 0 0 0 0 0 0 0
KSPSolve 1 1.0 1.1839e-02 1.0 7.66e+06 1.0 4.2e+01 4.2e+02 2.4e+02 2100 39 21 57 2100 39 21 57 1285
PCSetUp 2 1.0 7.8142e-03 1.0 2.85e+06 1.0 0.0e+00 0.0e+00 7.0e+00 1 37 0 0 2 1 37 0 0 2 724
PCSetUpOnBlocks 1 1.0 7.5550e-03 1.0 2.85e+06 1.0 0.0e+00 0.0e+00 5.0e+00 1 37 0 0 1 1 37 0 0 1 749
PCApply 22 1.0 1.6296e-03 1.1 3.33e+06 1.0 0.0e+00 0.0e+00 0.0e+00 0 43 0 0 0 0 43 0 0 0 4021
------------------------------------------------------------------------------------------------------------------------
Memory usage is given in bytes:
Object Type Creations Destructions Memory Descendants' Mem.
Reports information only for process 0.
--- Event Stage 0: Main Stage
Vector 62 62 468072 0
Vector Scatter 9 9 9324 0
Index Set 17 17 17780 0
IS L to G Mapping 3 3 1692 0
Matrix 20 20 1430692 0
Krylov Solver 2 2 19360 0
Preconditioner 2 2 1784 0
Viewer 1 0 0 0
========================================================================================================================
Average time to get PetscTime(): 9.53674e-08
Average time for MPI_Barrier(): 8.10623e-07
Average time for zero size MPI_Send(): 1.50204e-05
#PETSc Option Table entries:
-ksp_right_pc
-log_summary
-pc_type bjacobi
-sub_pc_factor_levels 4
-sub_pc_factor_zeropivot 0
-sub_pc_type ilu
#End of PETSc Option Table entries
Compiled without FORTRAN kernels
Compiled with full precision matrices (default)
sizeof(short) 2 sizeof(int) 4 sizeof(long) 8 sizeof(void*) 8 sizeof(PetscScalar) 8 sizeof(PetscInt) 4
Configure run at: Thu Nov 8 11:21:02 2012
Configure options: --with-debugging=false --COPTFLAGS=-O3 --CXXOPTFLAGS=-O3 --FOPTFLAGS=-O3 --with-clanguage=C++ --with-shared-libraries=1 --with-mpi-dir=/opt/apps/ossw/libraries/mpich2/mpich2-1.4.1p1/sl6/intel-12.1 --with-mumps=true --download-mumps=1 --with-metis=true --download-metis=1 --with-parmetis=true --download-parmetis=1 --with-superlu=true --download-superlu=1 --with-superludir=true --download-superlu_dist=1 --with-blacs=true --download-blacs=1 --with-scalapack=true --download-scalapack=1 --with-hypre=true --download-hypre=1 --with-blas-lib="[/opt/apps/sysnet/intel/12.1/mkl/10.3.12.361/lib/intel64/libmkl_intel_lp64.so,/opt/apps/sysnet/intel/12.1/mkl/10.3.12.361/lib/intel64/libmkl_sequential.so,/opt/apps/sysnet/intel/12.1/mkl/10.3.12.361/lib/intel64/libmkl_core.so]" --with-lapack-lib="[/opt/apps/sysnet/intel/12.1/mkl/10.3.12.361/lib/intel64/libmkl_lapack95_lp64.a]"
-----------------------------------------
Libraries compiled on Thu Nov 8 11:21:02 2012 on daedalus.ices.utexas.edu
Machine characteristics: Linux-2.6.32-279.1.1.el6.x86_64-x86_64-with-redhat-6.3-Carbon
Using PETSc directory: /opt/apps/ossw/libraries/petsc/petsc-3.3-p2
Using PETSc arch: intel-12.1-mkl-intel-10.3.12.361-mpich2-1.4.1p1-cxx-opt
-----------------------------------------
Using C compiler: /opt/apps/ossw/libraries/mpich2/mpich2-1.4.1p1/sl6/intel-12.1/bin/mpicxx -wd1572 -O3 -fPIC ${COPTFLAGS} ${CFLAGS}
Using Fortran compiler: /opt/apps/ossw/libraries/mpich2/mpich2-1.4.1p1/sl6/intel-12.1/bin/mpif90 -fPIC -O3 ${FOPTFLAGS} ${FFLAGS}
-----------------------------------------
Using include paths: -I/opt/apps/ossw/libraries/petsc/petsc-3.3-p2/intel-12.1-mkl-intel-10.3.12.361-mpich2-1.4.1p1-cxx-opt/include -I/opt/apps/ossw/libraries/petsc/petsc-3.3-p2/include -I/opt/apps/ossw/libraries/petsc/petsc-3.3-p2/include -I/opt/apps/ossw/libraries/petsc/petsc-3.3-p2/intel-12.1-mkl-intel-10.3.12.361-mpich2-1.4.1p1-cxx-opt/include -I/opt/apps/ossw/libraries/mpich2/mpich2-1.4.1p1/sl6/intel-12.1/include
-----------------------------------------
Using C linker: /opt/apps/ossw/libraries/mpich2/mpich2-1.4.1p1/sl6/intel-12.1/bin/mpicxx
Using Fortran linker: /opt/apps/ossw/libraries/mpich2/mpich2-1.4.1p1/sl6/intel-12.1/bin/mpif90
Using libraries: -Wl,-rpath,/opt/apps/ossw/libraries/petsc/petsc-3.3-p2/intel-12.1-mkl-intel-10.3.12.361-mpich2-1.4.1p1-cxx-opt/lib -L/opt/apps/ossw/libraries/petsc/petsc-3.3-p2/intel-12.1-mkl-intel-10.3.12.361-mpich2-1.4.1p1-cxx-opt/lib -lpetsc -lX11 -Wl,-rpath,/opt/apps/ossw/libraries/petsc/petsc-3.3-p2/intel-12.1-mkl-intel-10.3.12.361-mpich2-1.4.1p1-cxx-opt/lib -L/opt/apps/ossw/libraries/petsc/petsc-3.3-p2/intel-12.1-mkl-intel-10.3.12.361-mpich2-1.4.1p1-cxx-opt/lib -lcmumps -ldmumps -lsmumps -lzmumps -lmumps_common -lpord -lHYPRE -lpthread -lsuperlu_dist_3.0 -lparmetis -lmetis -lscalapack -lblacs -lsuperlu_4.3 -Wl,-rpath,/opt/apps/sysnet/intel/12.1/mkl/10.3.12.361/lib/intel64 -L/opt/apps/sysnet/intel/12.1/mkl/10.3.12.361/lib/intel64 -lmkl_lapack95_lp64 -lmkl_intel_lp64 -lmkl_sequential -lmkl_core -Wl,-rpath,/opt/apps/ossw/libraries/mpich2/mpich2-1.4.1p1/sl6/intel-12.1/lib -L/opt/apps/ossw/libraries/mpich2/mpich2-1.4.1p1/sl6/intel-12.1/lib -Wl,-rpath,/opt/apps/sysnet/intel/12.1/composer_xe_2011_sp1.7.256/compiler/lib/intel64 -L/opt/apps/sysnet/intel/12.1/composer_xe_2011_sp1.7.256/compiler/lib/intel64 -Wl,-rpath,/usr/lib/gcc/x86_64-redhat-linux/4.4.6 -L/usr/lib/gcc/x86_64-redhat-linux/4.4.6 -lmpichf90 -lifport -lifcore -lm -lm -lmpichcxx -ldl -lmpich -lopa -lmpl -lrt -lpthread -limf -lsvml -lipgo -ldecimal -lcilkrts -lstdc++ -lgcc_s -lirc -lirc_s -ldl
-----------------------------------------
----------------------------------------------------------------------------------------------------------------------
| Processor id: 0 |
| Num Processors: 2 |
| Time: Tue Feb 5 13:41:16 2013 |
| OS: Linux |
| HostName: hbar.ices.utexas.edu |
| OS Release: 2.6.32-279.1.1.el6.x86_64 |
| OS Version: #1 SMP Tue Jul 10 11:24:23 CDT 2012 |
| Machine: x86_64 |
| Username: benkirk |
| Configuration: ./configure '--enable-everything' |
| '--prefix=/workspace/libmesh/install' |
| 'CXX=mpicxx' |
| 'CC=mpicc' |
| 'F77=mpif77' |
| 'FC=mpif90' |
| 'PETSC_DIR=/opt/apps/ossw/libraries/petsc/petsc-3.3-p2' |
| 'PETSC_ARCH=intel-12.1-mkl-intel-10.3.12.361-mpich2-1.4.1p1-cxx-opt' |
| 'SLEPC_DIR=/opt/apps/ossw/libraries/slepc/slepc-3.3-p2-petsc-3.3-p2-cxx-opt' |
| 'TRILINOS_DIR=/opt/apps/ossw/libraries/trilinos/trilinos-10.12.2/sl6/intel-12.1/mpich2-1.4.1p1/mkl-intel-10.3.12.361'|
| 'VTK_DIR=/opt/apps/ossw/libraries/vtk/vtk-5.10.0/sl6/intel-12.1' |
----------------------------------------------------------------------------------------------------------------------
----------------------------------------------------------------------------------------------------------------
| libMesh Performance: Alive time=0.729458, Active time=0.69487 |
----------------------------------------------------------------------------------------------------------------
| Event nCalls Total Time Avg Time Total Time Avg Time % of Active Time |
| w/o Sub w/o Sub With Sub With Sub w/o S With S |
|----------------------------------------------------------------------------------------------------------------|
| |
| |
| DofMap |
| add_neighbors_to_send_list() 3 0.0314 0.010477 0.0362 0.012065 4.52 5.21 |
| build_constraint_matrix() 1794 0.0137 0.000008 0.0137 0.000008 1.97 1.97 |
| build_sparsity() 3 0.0260 0.008654 0.0642 0.021386 3.74 9.23 |
| cnstrn_elem_dyad_mat() 897 0.0027 0.000003 0.0027 0.000003 0.40 0.40 |
| cnstrn_elem_mat_vec() 897 0.0115 0.000013 0.0115 0.000013 1.66 1.66 |
| create_dof_constraints() 3 0.0314 0.010456 0.0722 0.024078 4.51 10.40 |
| distribute_dofs() 3 0.0314 0.010451 0.0801 0.026701 4.51 11.53 |
| dof_indices() 6868 0.1595 0.000023 0.1595 0.000023 22.96 22.96 |
| enforce_constraints_exactly() 2 0.0005 0.000273 0.0005 0.000273 0.08 0.08 |
| old_dof_indices() 2466 0.0672 0.000027 0.0672 0.000027 9.67 9.67 |
| prepare_send_list() 3 0.0001 0.000029 0.0001 0.000029 0.01 0.01 |
| reinit() 3 0.0480 0.015997 0.0480 0.015997 6.91 6.91 |
| |
| EquationSystems |
| build_solution_vector() 1 0.0035 0.003538 0.0282 0.028186 0.51 4.06 |
| |
| FE |
| compute_shape_functions() 961 0.0069 0.000007 0.0069 0.000007 1.00 1.00 |
| init_shape_functions() 65 0.0003 0.000004 0.0003 0.000004 0.04 0.04 |
| inverse_map() 4116 0.0124 0.000003 0.0124 0.000003 1.79 1.79 |
| |
| FEMap |
| compute_affine_map() 961 0.0056 0.000006 0.0056 0.000006 0.81 0.81 |
| compute_face_map() 64 0.0006 0.000010 0.0013 0.000020 0.09 0.19 |
| init_face_shape_functions() 1 0.0000 0.000011 0.0000 0.000011 0.00 0.00 |
| init_reference_to_physical_map() 65 0.0005 0.000008 0.0005 0.000008 0.08 0.08 |
| |
| LocationMap |
| find() 6156 0.0164 0.000003 0.0164 0.000003 2.35 2.35 |
| init() 4 0.0039 0.000972 0.0039 0.000972 0.56 0.56 |
| |
| Mesh |
| contract() 2 0.0005 0.000244 0.0011 0.000548 0.07 0.16 |
| find_neighbors() 3 0.0383 0.012769 0.0385 0.012825 5.51 5.54 |
| renumber_nodes_and_elem() 8 0.0020 0.000254 0.0020 0.000254 0.29 0.29 |
| |
| MeshCommunication |
| compute_hilbert_indices() 4 0.0125 0.003129 0.0125 0.003129 1.80 1.80 |
| find_global_indices() 4 0.0053 0.001328 0.0199 0.004987 0.76 2.87 |
| parallel_sort() 4 0.0016 0.000389 0.0017 0.000425 0.22 0.24 |
| |
| MeshOutput |
| write_equation_systems() 1 0.0149 0.014872 0.0431 0.043112 2.14 6.20 |
| |
| MeshRefinement |
| _coarsen_elements() 2 0.0004 0.000185 0.0004 0.000210 0.05 0.06 |
| _refine_elements() 4 0.0269 0.006730 0.0618 0.015444 3.87 8.89 |
| add_point() 6156 0.0164 0.000003 0.0336 0.000005 2.36 4.83 |
| make_coarsening_compatible() 4 0.0049 0.001213 0.0049 0.001213 0.70 0.70 |
| make_refinement_compatible() 9 0.0012 0.000133 0.0012 0.000137 0.17 0.18 |
| |
| MeshTools::Generation |
| build_cube() 1 0.0009 0.000902 0.0009 0.000902 0.13 0.13 |
| |
| MetisPartitioner |
| partition() 3 0.0334 0.011147 0.0511 0.017024 4.81 7.35 |
| |
| Parallel |
| allgather() 17 0.0001 0.000009 0.0002 0.000011 0.02 0.03 |
| max(bool) 19 0.0006 0.000031 0.0006 0.000031 0.08 0.08 |
| max(scalar) 393 0.0009 0.000002 0.0009 0.000002 0.14 0.14 |
| max(vector) 94 0.0005 0.000006 0.0012 0.000013 0.08 0.17 |
| min(bool) 482 0.0011 0.000002 0.0011 0.000002 0.16 0.16 |
| min(scalar) 379 0.0015 0.000004 0.0015 0.000004 0.22 0.22 |
| min(vector) 94 0.0006 0.000007 0.0013 0.000014 0.09 0.19 |
| probe() 32 0.0006 0.000019 0.0006 0.000019 0.09 0.09 |
| receive() 32 0.0002 0.000007 0.0008 0.000026 0.03 0.12 |
| send() 32 0.0001 0.000003 0.0001 0.000003 0.01 0.01 |
| send_receive() 40 0.0004 0.000010 0.0014 0.000034 0.05 0.20 |
| sum() 39 0.0002 0.000006 0.0003 0.000009 0.03 0.05 |
| |
| Parallel::Request |
| wait() 32 0.0001 0.000002 0.0001 0.000002 0.01 0.01 |
| |
| Partitioner |
| set_node_processor_ids() 3 0.0034 0.001142 0.0038 0.001257 0.49 0.54 |
| set_parent_processor_ids() 3 0.0027 0.000898 0.0027 0.000898 0.39 0.39 |
| |
| PetscLinearSolver |
| solve() 1 0.0125 0.012468 0.0125 0.012468 1.79 1.79 |
| |
| ProjectVector |
| operator() 2 0.0092 0.004580 0.0830 0.041502 1.32 11.95 |
| |
| System |
| assemble() 1 0.0238 0.023798 0.0931 0.093150 3.42 13.41 |
| project_vector() 2 0.0035 0.001730 0.1203 0.060128 0.50 17.31 |
----------------------------------------------------------------------------------------------------------------
| Totals: 33238 0.6949 100.00 |
----------------------------------------------------------------------------------------------------------------
***************************************************************
* Done Running Example miscellaneous_ex4:
* mpirun -np 2 example-devel -pc_type bjacobi -sub_pc_type ilu -sub_pc_factor_levels 4 -sub_pc_factor_zeropivot 0 -ksp_right_pc -log_summary
***************************************************************
Miscellaneous Example 4 - Using a shell matrix
This example solves the equation
\f$-\Delta u+\int u = 1\f$
with homogeneous Dirichlet boundary conditions. This system has a full system matrix which can be written as the sum of of sparse matrix and a rank 1 matrix. The shell matrix concept is used to solve this problem.
The problem is solved in parallel on a non-uniform grid in order to demonstrate all the techniques that are required for this. The grid is fixed, however, i.e. no adaptive mesh refinement is used, so that the example remains simple.
The example is 2d; extension to 3d is straight forward.
C++ include files that we need