The source file assembly.h with comments:
#ifndef __assembly_h__
#define __assembly_h__
#include "libmesh/sparse_matrix.h"
#include "libmesh/numeric_vector.h"
#include "libmesh/dense_matrix.h"
#include "libmesh/dense_vector.h"
#include "libmesh/fe.h"
#include "libmesh/fe_interface.h"
#include "libmesh/fe_base.h"
#include "libmesh/elem_assembly.h"
#include "libmesh/quadrature_gauss.h"
rbOOmit includes
#include "libmesh/transient_rb_theta_expansion.h"
#include "libmesh/transient_rb_assembly_expansion.h"
Bring in bits from the libMesh namespace.
Just the bits we're using, since this is a header.
using libMesh::AutoPtr;
using libMesh::DirichletBoundary;
using libMesh::ElemAssembly;
using libMesh::FEInterface;
using libMesh::FEMContext;
using libMesh::Number;
using libMesh::Point;
using libMesh::RBParameters;
using libMesh::RBTheta;
using libMesh::Real;
using libMesh::RealGradient;
using libMesh::TransientRBThetaExpansion;
using libMesh::TransientRBAssemblyExpansion;
Functors for the parameter-dependent part of the affine decomposition of the PDE
The RHS and outputs just require a constant value of 1, so use a default RBTheta object there
struct ThetaA0 : RBTheta { virtual Number evaluate(const RBParameters& ) { return 0.05; } };
struct ThetaA1 : RBTheta { virtual Number evaluate(const RBParameters& mu) { return mu.get_value("x_vel"); } };
struct ThetaA2 : RBTheta { virtual Number evaluate(const RBParameters& mu) { return mu.get_value("y_vel"); } };
struct M0 : ElemAssembly
{
L2 matrix
virtual void interior_assembly(FEMContext &c)
{
const unsigned int u_var = 0;
const std::vector<Real> &JxW =
c.element_fe_var[u_var]->get_JxW();
const std::vector<std::vector<Real> >& phi =
c.element_fe_var[u_var]->get_phi();
The number of local degrees of freedom in each variable
const unsigned int n_u_dofs = c.dof_indices_var[u_var].size();
Now we will build the affine operator
unsigned int n_qpoints = (c.get_element_qrule())->n_points();
for (unsigned int qp=0; qp != n_qpoints; qp++)
for (unsigned int i=0; i != n_u_dofs; i++)
for (unsigned int j=0; j != n_u_dofs; j++)
c.get_elem_jacobian()(i,j) += JxW[qp] *phi[j][qp]*phi[i][qp];
}
};
struct A0 : ElemAssembly
{
Assemble the Laplacian operator
virtual void interior_assembly(FEMContext &c)
{
const unsigned int u_var = 0;
const std::vector<Real> &JxW =
c.element_fe_var[u_var]->get_JxW();
The velocity shape function gradients at interior
quadrature points.
const std::vector<std::vector<RealGradient> >& dphi =
c.element_fe_var[u_var]->get_dphi();
The number of local degrees of freedom in each variable
const unsigned int n_u_dofs = c.dof_indices_var[u_var].size();
Now we will build the affine operator
unsigned int n_qpoints = (c.get_element_qrule())->n_points();
for (unsigned int qp=0; qp != n_qpoints; qp++)
for (unsigned int i=0; i != n_u_dofs; i++)
for (unsigned int j=0; j != n_u_dofs; j++)
c.get_elem_jacobian()(i,j) += JxW[qp] * dphi[j][qp]*dphi[i][qp];
}
};
struct A1 : ElemAssembly
{
Convection in the x-direction
virtual void interior_assembly(FEMContext &c)
{
const unsigned int u_var = 0;
const std::vector<Real> &JxW =
c.element_fe_var[u_var]->get_JxW();
const std::vector<std::vector<Real> >& phi =
c.element_fe_var[u_var]->get_phi();
const std::vector<std::vector<RealGradient> >& dphi =
c.element_fe_var[u_var]->get_dphi();
The number of local degrees of freedom in each variable
const unsigned int n_u_dofs = c.dof_indices_var[u_var].size();
Now we will build the affine operator
unsigned int n_qpoints = (c.get_element_qrule())->n_points();
for (unsigned int qp=0; qp != n_qpoints; qp++)
for (unsigned int i=0; i != n_u_dofs; i++)
for (unsigned int j=0; j != n_u_dofs; j++)
c.get_elem_jacobian()(i,j) += JxW[qp] *dphi[j][qp](0)*phi[i][qp];
}
};
struct A2 : ElemAssembly
{
Convection in the y-direction
virtual void interior_assembly(FEMContext &c)
{
const unsigned int u_var = 0;
const std::vector<Real> &JxW =
c.element_fe_var[u_var]->get_JxW();
const std::vector<std::vector<Real> >& phi =
c.element_fe_var[u_var]->get_phi();
const std::vector<std::vector<RealGradient> >& dphi =
c.element_fe_var[u_var]->get_dphi();
The number of local degrees of freedom in each variable
const unsigned int n_u_dofs = c.dof_indices_var[u_var].size();
Now we will build the affine operator
unsigned int n_qpoints = (c.get_element_qrule())->n_points();
for (unsigned int qp=0; qp != n_qpoints; qp++)
for (unsigned int i=0; i != n_u_dofs; i++)
for (unsigned int j=0; j != n_u_dofs; j++)
c.get_elem_jacobian()(i,j) += JxW[qp] *dphi[j][qp](1)*phi[i][qp];
}
};
struct F0 : ElemAssembly
{
Source term, 1 throughout the domain
virtual void interior_assembly(FEMContext &c)
{
const unsigned int u_var = 0;
const std::vector<Real> &JxW =
c.element_fe_var[u_var]->get_JxW();
const std::vector<std::vector<Real> >& phi =
c.element_fe_var[u_var]->get_phi();
The number of local degrees of freedom in each variable
const unsigned int n_u_dofs = c.dof_indices_var[u_var].size();
Now we will build the affine operator
unsigned int n_qpoints = (c.get_element_qrule())->n_points();
for (unsigned int qp=0; qp != n_qpoints; qp++)
for (unsigned int i=0; i != n_u_dofs; i++)
c.get_elem_residual()(i) += JxW[qp] * ( 1.*phi[i][qp] );
}
};
struct OutputAssembly : ElemAssembly
{
OutputAssembly(Real min_x_in, Real max_x_in,
Real min_y_in, Real max_y_in)
:
min_x(min_x_in),
max_x(max_x_in),
min_y(min_y_in),
max_y(max_y_in)
{}
Output: Average value over the region [min_x,max_x]x[min_y,max_y]
virtual void interior_assembly(FEMContext &c)
{
const unsigned int u_var = 0;
const std::vector<Real> &JxW =
c.element_fe_var[u_var]->get_JxW();
const std::vector<std::vector<Real> >& phi =
c.element_fe_var[u_var]->get_phi();
The number of local degrees of freedom in each variable
const unsigned int n_u_dofs = c.dof_indices_var[u_var].size();
Now we will build the affine operator
unsigned int n_qpoints = (c.get_element_qrule())->n_points();
Real output_area = (max_x-min_x) * (max_y-min_y);
Point centroid = c.elem->centroid();
if( (min_x <= centroid(0)) && (centroid(0) <= max_x) &&
(min_y <= centroid(1)) && (centroid(1) <= max_y) )
for (unsigned int qp=0; qp != n_qpoints; qp++)
for (unsigned int i=0; i != n_u_dofs; i++)
c.get_elem_residual()(i) += JxW[qp] * ( 1.*phi[i][qp] ) / output_area;
}
Member variables that define the output region in 2D
Real min_x, max_x, min_y, max_y;
};
Define an RBThetaExpansion class for this PDE
struct CDRBThetaExpansion : TransientRBThetaExpansion
{
/**
* Constructor.
*/
CDRBThetaExpansion()
{
set up the RBThetaExpansion object
attach_M_theta(&rb_theta); // Attach the time-derivative theta
attach_A_theta(&theta_a_0); // Attach the lhs theta
attach_A_theta(&theta_a_1);
attach_A_theta(&theta_a_2);
attach_F_theta(&rb_theta); // Attach the rhs theta
attach_output_theta(&rb_theta); // Attach output 0 theta
attach_output_theta(&rb_theta); // Attach output 1 theta
attach_output_theta(&rb_theta); // Attach output 2 theta
attach_output_theta(&rb_theta); // Attach output 3 theta
}
The RBTheta member variables
ThetaA0 theta_a_0;
ThetaA1 theta_a_1;
ThetaA2 theta_a_2;
RBTheta rb_theta; // Default RBTheta object, just returns 1.
};
Define an RBAssemblyExpansion class for this PDE
struct CDRBAssemblyExpansion : TransientRBAssemblyExpansion
{
/**
* Constructor.
*/
CDRBAssemblyExpansion()
:
L0(0.72,0.88,0.72,0.88), // We make sure these output regions conform to the mesh
L1(0.12,0.28,0.72,0.88),
L2(0.12,0.28,0.12,0.28),
L3(0.72,0.88,0.12,0.28)
{
And set up the RBAssemblyExpansion object
attach_M_assembly(&M0_assembly); // Attach the time-derivative assembly
attach_A_assembly(&A0_assembly); // Attach the lhs assembly
attach_A_assembly(&A1_assembly);
attach_A_assembly(&A2_assembly);
attach_F_assembly(&F0_assembly); // Attach the rhs assembly
attach_output_assembly(&L0); // Attach output 0 assembly
attach_output_assembly(&L1); // Attach output 1 assembly
attach_output_assembly(&L2); // Attach output 2 assembly
attach_output_assembly(&L3); // Attach output 3 assembly
}
The ElemAssembly objects
M0 M0_assembly;
A0 A0_assembly;
A1 A1_assembly;
A2 A2_assembly;
F0 F0_assembly;
OutputAssembly L0;
OutputAssembly L1;
OutputAssembly L2;
OutputAssembly L3;
};
#endif
The source file rb_classes.h with comments:
rbOOmit is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
#ifndef __rb_classes_h__
#define __rb_classes_h__
#include "libmesh/transient_rb_construction.h"
#include "libmesh/fe_base.h"
local include
#include "assembly.h"
Bring in bits from the libMesh namespace.
Just the bits we're using, since this is a header.
using libMesh::EquationSystems;
using libMesh::FEMContext;
using libMesh::RBConstruction;
using libMesh::RBEvaluation;
using libMesh::Real;
using libMesh::TransientRBEvaluation;
using libMesh::TransientRBConstruction;
A simple subclass of RBEvaluation, which just needs to specify
(a lower bound for) the coercivity constant for this problem.
For this simple convection-diffusion problem, we can set the
coercivity constant lower bound to 0.05.
class SimpleRBEvaluation : public TransientRBEvaluation
{
public:
/**
* Constructor. Just set the theta expansion.
*/
SimpleRBEvaluation()
{
set_rb_theta_expansion(cd_rb_theta_expansion);
}
/**
* The coercivity constant is bounded below by 0.05.
*/
virtual Real get_stability_lower_bound() { return 0.05; }
/**
* The object that stores the "theta" expansion of the parameter dependent PDE,
* i.e. the set of parameter-dependent functions in the affine expansion of the PDE.
*/
CDRBThetaExpansion cd_rb_theta_expansion;
};
A simple subclass of Construction, which just needs to override build_rb_evaluation
in order to build a SimpleRBEvaluation object, rather than an RBEvaluation object.
class SimpleRBConstruction : public TransientRBConstruction
{
public:
SimpleRBConstruction (EquationSystems& es,
const std::string& name_in,
const unsigned int number_in)
: Parent(es, name_in, number_in),
dirichlet_bc(AutoPtr<DirichletBoundary>(NULL))
{}
/**
* Destructor.
*/
virtual ~SimpleRBConstruction () {}
/**
* The type of system.
*/
typedef SimpleRBConstruction sys_type;
/**
* The type of the parent.
*/
typedef TransientRBConstruction Parent;
/**
* Initialize data structures.
*/
virtual void init_data()
{
u_var = this->add_variable ("u", FIRST);
Generate a DirichletBoundary object
dirichlet_bc = build_zero_dirichlet_boundary_object();
Set the Dirichet boundary IDs
and the Dirichlet boundary variable numbers
dirichlet_bc->b.insert(0);
dirichlet_bc->b.insert(1);
dirichlet_bc->b.insert(2);
dirichlet_bc->b.insert(3);
dirichlet_bc->variables.push_back(u_var);
Attach dirichlet_bc (must do this _before_ Parent::init_data)
get_dof_map().add_dirichlet_boundary(*dirichlet_bc);
Parent::init_data();
Set the rb_assembly_expansion for this Construction object.
set_rb_assembly_expansion(cd_rb_assembly_expansion);
We need to define an inner product matrix for this problem
set_inner_product_assembly(cd_rb_assembly_expansion.A0_assembly);
We also need an "L2 matrix" in order to compute the time-dependent error bound
set_L2_assembly(cd_rb_assembly_expansion.M0_assembly);
}
/**
* Pre-request all relevant element data.
*/
virtual void init_context(FEMContext &c)
{
For efficiency, we should prerequest all
the data we will need to build the
linear system before doing an element loop.
c.element_fe_var[u_var]->get_JxW();
c.element_fe_var[u_var]->get_phi();
c.element_fe_var[u_var]->get_dphi();
}
/**
* Variable number for u.
*/
unsigned int u_var;
/**
* The object that stores the "assembly" expansion of the parameter dependent PDE,
* i.e. the objects that define how to assemble the set of parameter-independent
* operators in the affine expansion of the PDE.
*/
CDRBAssemblyExpansion cd_rb_assembly_expansion;
/**
* The object that defines which degrees of freedom are on a Dirichlet boundary.
*/
AutoPtr<DirichletBoundary> dirichlet_bc;
};
#endif
The source file reduced_basis_ex3.C with comments:
/* rbOOmit is distributed in the hope that it will be useful, */
/* but WITHOUT ANY WARRANTY; without even the implied warranty of */
/* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU */
/* Lesser General Public License for more details. */
/* You should have received a copy of the GNU Lesser General Public */
/* License along with this library; if not, write to the Free Software */
/* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
Reduced Basis Example 3 - Transient Reduced Basis Problem
In this example problem we use the Certified Reduced Basis method to solve a transient convection-diffusion problem on the unit square. The PDE is similar to reduced_basis_ex1, except there is a time-derivative in this case.
Basic include file needed for the mesh functionality.
#include "libmesh/libmesh.h"
#include "libmesh/mesh.h"
#include "libmesh/mesh_generation.h"
#include "libmesh/exodusII_io.h"
#include "libmesh/equation_systems.h"
#include "libmesh/dof_map.h"
#include "libmesh/getpot.h"
#include "libmesh/elem.h"
local includes
#include "rb_classes.h"
#include "assembly.h"
Bring in everything from the libMesh namespace
using namespace libMesh;
The main program.
int main (int argc, char** argv)
{
Initialize libMesh.
LibMeshInit init (argc, argv);
This example requires SLEPc
#if !defined(LIBMESH_HAVE_SLEPC)
libmesh_example_assert(false, "--enable-slepc");
#else
#if !defined(LIBMESH_HAVE_XDR)
We need XDR support to write out reduced bases
libmesh_example_assert(false, "--enable-xdr");
#elif defined(LIBMESH_DEFAULT_SINGLE_PRECISION)
XDR binary support requires double precision
libmesh_example_assert(false, "--disable-singleprecision");
#endif
FIXME: This example currently segfaults with Trilinos?
libmesh_example_assert(libMesh::default_solver_package() == PETSC_SOLVERS, "--enable-petsc");
Skip this 2D example if libMesh was compiled as 1D-only.
libmesh_example_assert(2 <= LIBMESH_DIM, "2D support");
Parse the input file (reduced_basis_ex3.in) using GetPot
std::string parameters_filename = "reduced_basis_ex3.in";
GetPot infile(parameters_filename);
unsigned int n_elem = infile("n_elem", 1); // Determines the number of elements in the "truth" mesh
const unsigned int dim = 2; // The number of spatial dimensions
bool store_basis_functions = infile("store_basis_functions", true); // Do we write the RB basis functions to disk?
Read the "online_mode" flag from the command line
GetPot command_line (argc, argv);
int online_mode = 0;
if ( command_line.search(1, "-online_mode") )
online_mode = command_line.next(online_mode);
Build a mesh.
Mesh mesh (dim);
MeshTools::Generation::build_square (mesh,
n_elem, n_elem,
0., 1.,
0., 1.,
QUAD4);
Create an equation systems object.
EquationSystems equation_systems (mesh);
We override RBConstruction with SimpleRBConstruction in order to
specialize a few functions for this particular problem.
SimpleRBConstruction & rb_con =
equation_systems.add_system<SimpleRBConstruction> ("RBConvectionDiffusion");
Initialize the data structures for the equation system.
equation_systems.init ();
Print out some information about the "truth" discretization
equation_systems.print_info();
mesh.print_info();
Build a new RBEvaluation object which will be used to perform
Reduced Basis calculations. This is required in both the
"Offline" and "Online" stages.
SimpleRBEvaluation rb_eval;
Finally, we need to give the RBConstruction object a pointer to
our RBEvaluation object
rb_con.set_rb_evaluation(rb_eval);
if(!online_mode) // Perform the Offline stage of the RB method
{
Read in the data that defines this problem from the specified text file
rb_con.process_parameters_file(parameters_filename);
Print out info that describes the current setup of rb_con
rb_con.print_info();
Prepare rb_con for the Construction stage of the RB method.
This sets up the necessary data structures and performs
initial assembly of the "truth" affine expansion of the PDE.
rb_con.initialize_rb_construction();
Compute the reduced basis space by computing "snapshots", i.e.
"truth" solves, at well-chosen parameter values and employing
these snapshots as basis functions.
rb_con.train_reduced_basis();
Write out the data that will subsequently be required for the Evaluation stage
rb_con.get_rb_evaluation().write_offline_data_to_files();
If requested, write out the RB basis functions for visualization purposes
if(store_basis_functions)
{
Write out the basis functions
rb_con.get_rb_evaluation().write_out_basis_functions(rb_con);
}
}
else // Perform the Online stage of the RB method
{
Read in the reduced basis data
rb_eval.read_offline_data_from_files();
Read in online_N and initialize online parameters
unsigned int online_N = infile("online_N",1);
Real online_x_vel = infile("online_x_vel", 0.);
Real online_y_vel = infile("online_y_vel", 0.);
RBParameters online_mu;
online_mu.set_value("x_vel", online_x_vel);
online_mu.set_value("y_vel", online_y_vel);
rb_eval.set_parameters(online_mu);
rb_eval.print_parameters();
Now do the Online solve using the precomputed reduced basis
Real error_bound_final_time = rb_eval.rb_solve(online_N);
libMesh::out << "Error bound (absolute) at the final time is "
<< error_bound_final_time << std::endl << std::endl;
if(store_basis_functions)
{
Read in the basis functions
rb_eval.read_in_basis_functions(rb_con);
Plot the solution at the final time level
rb_con.pull_temporal_discretization_data( rb_eval );
rb_con.set_time_step(rb_con.get_n_time_steps());
rb_con.load_rb_solution();
#ifdef LIBMESH_HAVE_EXODUS_API
ExodusII_IO(mesh).write_equation_systems ("RB_sol.e",equation_systems);
#endif
Plot the first basis function that was generated from the train_reduced_basis
call in the Offline stage
rb_con.load_basis_function(0);
#ifdef LIBMESH_HAVE_EXODUS_API
ExodusII_IO(mesh).write_equation_systems ("bf0.e",equation_systems);
#endif
}
}
return 0;
#endif // LIBMESH_HAVE_SLEPC
}
The source file assembly.h without comments:
#ifndef __assembly_h__
#define __assembly_h__
#include "libmesh/sparse_matrix.h"
#include "libmesh/numeric_vector.h"
#include "libmesh/dense_matrix.h"
#include "libmesh/dense_vector.h"
#include "libmesh/fe.h"
#include "libmesh/fe_interface.h"
#include "libmesh/fe_base.h"
#include "libmesh/elem_assembly.h"
#include "libmesh/quadrature_gauss.h"
#include "libmesh/transient_rb_theta_expansion.h"
#include "libmesh/transient_rb_assembly_expansion.h"
using libMesh::AutoPtr;
using libMesh::DirichletBoundary;
using libMesh::ElemAssembly;
using libMesh::FEInterface;
using libMesh::FEMContext;
using libMesh::Number;
using libMesh::Point;
using libMesh::RBParameters;
using libMesh::RBTheta;
using libMesh::Real;
using libMesh::RealGradient;
using libMesh::TransientRBThetaExpansion;
using libMesh::TransientRBAssemblyExpansion;
struct ThetaA0 : RBTheta { virtual Number evaluate(const RBParameters& ) { return 0.05; } };
struct ThetaA1 : RBTheta { virtual Number evaluate(const RBParameters& mu) { return mu.get_value("x_vel"); } };
struct ThetaA2 : RBTheta { virtual Number evaluate(const RBParameters& mu) { return mu.get_value("y_vel"); } };
struct M0 : ElemAssembly
{
virtual void interior_assembly(FEMContext &c)
{
const unsigned int u_var = 0;
const std::vector<Real> &JxW =
c.element_fe_var[u_var]->get_JxW();
const std::vector<std::vector<Real> >& phi =
c.element_fe_var[u_var]->get_phi();
const unsigned int n_u_dofs = c.dof_indices_var[u_var].size();
unsigned int n_qpoints = (c.get_element_qrule())->n_points();
for (unsigned int qp=0; qp != n_qpoints; qp++)
for (unsigned int i=0; i != n_u_dofs; i++)
for (unsigned int j=0; j != n_u_dofs; j++)
c.get_elem_jacobian()(i,j) += JxW[qp] *phi[j][qp]*phi[i][qp];
}
};
struct A0 : ElemAssembly
{
virtual void interior_assembly(FEMContext &c)
{
const unsigned int u_var = 0;
const std::vector<Real> &JxW =
c.element_fe_var[u_var]->get_JxW();
const std::vector<std::vector<RealGradient> >& dphi =
c.element_fe_var[u_var]->get_dphi();
const unsigned int n_u_dofs = c.dof_indices_var[u_var].size();
unsigned int n_qpoints = (c.get_element_qrule())->n_points();
for (unsigned int qp=0; qp != n_qpoints; qp++)
for (unsigned int i=0; i != n_u_dofs; i++)
for (unsigned int j=0; j != n_u_dofs; j++)
c.get_elem_jacobian()(i,j) += JxW[qp] * dphi[j][qp]*dphi[i][qp];
}
};
struct A1 : ElemAssembly
{
virtual void interior_assembly(FEMContext &c)
{
const unsigned int u_var = 0;
const std::vector<Real> &JxW =
c.element_fe_var[u_var]->get_JxW();
const std::vector<std::vector<Real> >& phi =
c.element_fe_var[u_var]->get_phi();
const std::vector<std::vector<RealGradient> >& dphi =
c.element_fe_var[u_var]->get_dphi();
const unsigned int n_u_dofs = c.dof_indices_var[u_var].size();
unsigned int n_qpoints = (c.get_element_qrule())->n_points();
for (unsigned int qp=0; qp != n_qpoints; qp++)
for (unsigned int i=0; i != n_u_dofs; i++)
for (unsigned int j=0; j != n_u_dofs; j++)
c.get_elem_jacobian()(i,j) += JxW[qp] *dphi[j][qp](0)*phi[i][qp];
}
};
struct A2 : ElemAssembly
{
virtual void interior_assembly(FEMContext &c)
{
const unsigned int u_var = 0;
const std::vector<Real> &JxW =
c.element_fe_var[u_var]->get_JxW();
const std::vector<std::vector<Real> >& phi =
c.element_fe_var[u_var]->get_phi();
const std::vector<std::vector<RealGradient> >& dphi =
c.element_fe_var[u_var]->get_dphi();
const unsigned int n_u_dofs = c.dof_indices_var[u_var].size();
unsigned int n_qpoints = (c.get_element_qrule())->n_points();
for (unsigned int qp=0; qp != n_qpoints; qp++)
for (unsigned int i=0; i != n_u_dofs; i++)
for (unsigned int j=0; j != n_u_dofs; j++)
c.get_elem_jacobian()(i,j) += JxW[qp] *dphi[j][qp](1)*phi[i][qp];
}
};
struct F0 : ElemAssembly
{
virtual void interior_assembly(FEMContext &c)
{
const unsigned int u_var = 0;
const std::vector<Real> &JxW =
c.element_fe_var[u_var]->get_JxW();
const std::vector<std::vector<Real> >& phi =
c.element_fe_var[u_var]->get_phi();
const unsigned int n_u_dofs = c.dof_indices_var[u_var].size();
unsigned int n_qpoints = (c.get_element_qrule())->n_points();
for (unsigned int qp=0; qp != n_qpoints; qp++)
for (unsigned int i=0; i != n_u_dofs; i++)
c.get_elem_residual()(i) += JxW[qp] * ( 1.*phi[i][qp] );
}
};
struct OutputAssembly : ElemAssembly
{
OutputAssembly(Real min_x_in, Real max_x_in,
Real min_y_in, Real max_y_in)
:
min_x(min_x_in),
max_x(max_x_in),
min_y(min_y_in),
max_y(max_y_in)
{}
virtual void interior_assembly(FEMContext &c)
{
const unsigned int u_var = 0;
const std::vector<Real> &JxW =
c.element_fe_var[u_var]->get_JxW();
const std::vector<std::vector<Real> >& phi =
c.element_fe_var[u_var]->get_phi();
const unsigned int n_u_dofs = c.dof_indices_var[u_var].size();
unsigned int n_qpoints = (c.get_element_qrule())->n_points();
Real output_area = (max_x-min_x) * (max_y-min_y);
Point centroid = c.elem->centroid();
if( (min_x <= centroid(0)) && (centroid(0) <= max_x) &&
(min_y <= centroid(1)) && (centroid(1) <= max_y) )
for (unsigned int qp=0; qp != n_qpoints; qp++)
for (unsigned int i=0; i != n_u_dofs; i++)
c.get_elem_residual()(i) += JxW[qp] * ( 1.*phi[i][qp] ) / output_area;
}
Real min_x, max_x, min_y, max_y;
};
struct CDRBThetaExpansion : TransientRBThetaExpansion
{
/**
* Constructor.
*/
CDRBThetaExpansion()
{
attach_M_theta(&rb_theta); // Attach the time-derivative theta
attach_A_theta(&theta_a_0); // Attach the lhs theta
attach_A_theta(&theta_a_1);
attach_A_theta(&theta_a_2);
attach_F_theta(&rb_theta); // Attach the rhs theta
attach_output_theta(&rb_theta); // Attach output 0 theta
attach_output_theta(&rb_theta); // Attach output 1 theta
attach_output_theta(&rb_theta); // Attach output 2 theta
attach_output_theta(&rb_theta); // Attach output 3 theta
}
ThetaA0 theta_a_0;
ThetaA1 theta_a_1;
ThetaA2 theta_a_2;
RBTheta rb_theta; // Default RBTheta object, just returns 1.
};
struct CDRBAssemblyExpansion : TransientRBAssemblyExpansion
{
/**
* Constructor.
*/
CDRBAssemblyExpansion()
:
L0(0.72,0.88,0.72,0.88), // We make sure these output regions conform to the mesh
L1(0.12,0.28,0.72,0.88),
L2(0.12,0.28,0.12,0.28),
L3(0.72,0.88,0.12,0.28)
{
attach_M_assembly(&M0_assembly); // Attach the time-derivative assembly
attach_A_assembly(&A0_assembly); // Attach the lhs assembly
attach_A_assembly(&A1_assembly);
attach_A_assembly(&A2_assembly);
attach_F_assembly(&F0_assembly); // Attach the rhs assembly
attach_output_assembly(&L0); // Attach output 0 assembly
attach_output_assembly(&L1); // Attach output 1 assembly
attach_output_assembly(&L2); // Attach output 2 assembly
attach_output_assembly(&L3); // Attach output 3 assembly
}
M0 M0_assembly;
A0 A0_assembly;
A1 A1_assembly;
A2 A2_assembly;
F0 F0_assembly;
OutputAssembly L0;
OutputAssembly L1;
OutputAssembly L2;
OutputAssembly L3;
};
#endif
The source file rb_classes.h without comments:
#ifndef __rb_classes_h__
#define __rb_classes_h__
#include "libmesh/transient_rb_construction.h"
#include "libmesh/fe_base.h"
#include "assembly.h"
using libMesh::EquationSystems;
using libMesh::FEMContext;
using libMesh::RBConstruction;
using libMesh::RBEvaluation;
using libMesh::Real;
using libMesh::TransientRBEvaluation;
using libMesh::TransientRBConstruction;
class SimpleRBEvaluation : public TransientRBEvaluation
{
public:
/**
* Constructor. Just set the theta expansion.
*/
SimpleRBEvaluation()
{
set_rb_theta_expansion(cd_rb_theta_expansion);
}
/**
* The coercivity constant is bounded below by 0.05.
*/
virtual Real get_stability_lower_bound() { return 0.05; }
/**
* The object that stores the "theta" expansion of the parameter dependent PDE,
* i.e. the set of parameter-dependent functions in the affine expansion of the PDE.
*/
CDRBThetaExpansion cd_rb_theta_expansion;
};
class SimpleRBConstruction : public TransientRBConstruction
{
public:
SimpleRBConstruction (EquationSystems& es,
const std::string& name_in,
const unsigned int number_in)
: Parent(es, name_in, number_in),
dirichlet_bc(AutoPtr<DirichletBoundary>(NULL))
{}
/**
* Destructor.
*/
virtual ~SimpleRBConstruction () {}
/**
* The type of system.
*/
typedef SimpleRBConstruction sys_type;
/**
* The type of the parent.
*/
typedef TransientRBConstruction Parent;
/**
* Initialize data structures.
*/
virtual void init_data()
{
u_var = this->add_variable ("u", FIRST);
dirichlet_bc = build_zero_dirichlet_boundary_object();
dirichlet_bc->b.insert(0);
dirichlet_bc->b.insert(1);
dirichlet_bc->b.insert(2);
dirichlet_bc->b.insert(3);
dirichlet_bc->variables.push_back(u_var);
get_dof_map().add_dirichlet_boundary(*dirichlet_bc);
Parent::init_data();
set_rb_assembly_expansion(cd_rb_assembly_expansion);
set_inner_product_assembly(cd_rb_assembly_expansion.A0_assembly);
set_L2_assembly(cd_rb_assembly_expansion.M0_assembly);
}
/**
* Pre-request all relevant element data.
*/
virtual void init_context(FEMContext &c)
{
c.element_fe_var[u_var]->get_JxW();
c.element_fe_var[u_var]->get_phi();
c.element_fe_var[u_var]->get_dphi();
}
/**
* Variable number for u.
*/
unsigned int u_var;
/**
* The object that stores the "assembly" expansion of the parameter dependent PDE,
* i.e. the objects that define how to assemble the set of parameter-independent
* operators in the affine expansion of the PDE.
*/
CDRBAssemblyExpansion cd_rb_assembly_expansion;
/**
* The object that defines which degrees of freedom are on a Dirichlet boundary.
*/
AutoPtr<DirichletBoundary> dirichlet_bc;
};
#endif
The source file reduced_basis_ex3.C without comments:
/* rbOOmit is distributed in the hope that it will be useful, */
/* but WITHOUT ANY WARRANTY; without even the implied warranty of */
/* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU */
/* Lesser General Public License for more details. */
/* You should have received a copy of the GNU Lesser General Public */
/* License along with this library; if not, write to the Free Software */
/* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
#include "libmesh/libmesh.h"
#include "libmesh/mesh.h"
#include "libmesh/mesh_generation.h"
#include "libmesh/exodusII_io.h"
#include "libmesh/equation_systems.h"
#include "libmesh/dof_map.h"
#include "libmesh/getpot.h"
#include "libmesh/elem.h"
#include "rb_classes.h"
#include "assembly.h"
using namespace libMesh;
int main (int argc, char** argv)
{
LibMeshInit init (argc, argv);
#if !defined(LIBMESH_HAVE_SLEPC)
libmesh_example_assert(false, "--enable-slepc");
#else
#if !defined(LIBMESH_HAVE_XDR)
libmesh_example_assert(false, "--enable-xdr");
#elif defined(LIBMESH_DEFAULT_SINGLE_PRECISION)
libmesh_example_assert(false, "--disable-singleprecision");
#endif
libmesh_example_assert(libMesh::default_solver_package() == PETSC_SOLVERS, "--enable-petsc");
libmesh_example_assert(2 <= LIBMESH_DIM, "2D support");
std::string parameters_filename = "reduced_basis_ex3.in";
GetPot infile(parameters_filename);
unsigned int n_elem = infile("n_elem", 1); // Determines the number of elements in the "truth" mesh
const unsigned int dim = 2; // The number of spatial dimensions
bool store_basis_functions = infile("store_basis_functions", true); // Do we write the RB basis functions to disk?
GetPot command_line (argc, argv);
int online_mode = 0;
if ( command_line.search(1, "-online_mode") )
online_mode = command_line.next(online_mode);
Mesh mesh (dim);
MeshTools::Generation::build_square (mesh,
n_elem, n_elem,
0., 1.,
0., 1.,
QUAD4);
EquationSystems equation_systems (mesh);
SimpleRBConstruction & rb_con =
equation_systems.add_system<SimpleRBConstruction> ("RBConvectionDiffusion");
equation_systems.init ();
equation_systems.print_info();
mesh.print_info();
SimpleRBEvaluation rb_eval;
rb_con.set_rb_evaluation(rb_eval);
if(!online_mode) // Perform the Offline stage of the RB method
{
rb_con.process_parameters_file(parameters_filename);
rb_con.print_info();
rb_con.initialize_rb_construction();
rb_con.train_reduced_basis();
rb_con.get_rb_evaluation().write_offline_data_to_files();
if(store_basis_functions)
{
rb_con.get_rb_evaluation().write_out_basis_functions(rb_con);
}
}
else // Perform the Online stage of the RB method
{
rb_eval.read_offline_data_from_files();
unsigned int online_N = infile("online_N",1);
Real online_x_vel = infile("online_x_vel", 0.);
Real online_y_vel = infile("online_y_vel", 0.);
RBParameters online_mu;
online_mu.set_value("x_vel", online_x_vel);
online_mu.set_value("y_vel", online_y_vel);
rb_eval.set_parameters(online_mu);
rb_eval.print_parameters();
Real error_bound_final_time = rb_eval.rb_solve(online_N);
libMesh::out << "Error bound (absolute) at the final time is "
<< error_bound_final_time << std::endl << std::endl;
if(store_basis_functions)
{
rb_eval.read_in_basis_functions(rb_con);
rb_con.pull_temporal_discretization_data( rb_eval );
rb_con.set_time_step(rb_con.get_n_time_steps());
rb_con.load_rb_solution();
#ifdef LIBMESH_HAVE_EXODUS_API
ExodusII_IO(mesh).write_equation_systems ("RB_sol.e",equation_systems);
#endif
rb_con.load_basis_function(0);
#ifdef LIBMESH_HAVE_EXODUS_API
ExodusII_IO(mesh).write_equation_systems ("bf0.e",equation_systems);
#endif
}
}
return 0;
#endif // LIBMESH_HAVE_SLEPC
}
The console output of the program:
***************************************************************
* Running Example reduced_basis_ex3:
* mpirun -np 2 example-devel -online_mode 0 -pc_type bjacobi -sub_pc_type ilu -sub_pc_factor_levels 4 -sub_pc_factor_zeropivot 0 -ksp_right_pc -log_summary
***************************************************************
*** Warning, This code is untested, experimental, or likely to see future API changes: src/reduced_basis/rb_parametrized.C, line 41, compiled Feb 5 2013 at 13:19:15 ***
EquationSystems
n_systems()=1
System #0, "RBConvectionDiffusion"
Type "TransientRBConstruction"
Variables="u"
Finite Element Types="LAGRANGE", "JACOBI_20_00"
Infinite Element Mapping="CARTESIAN"
Approximation Orders="FIRST", "THIRD"
n_dofs()=676
n_local_dofs()=354
n_constrained_dofs()=100
n_local_constrained_dofs()=50
n_vectors()=3
n_matrices()=1
DofMap Sparsity
Average On-Processor Bandwidth <= 8.44083
Average Off-Processor Bandwidth <= 0.242604
Maximum On-Processor Bandwidth <= 11
Maximum Off-Processor Bandwidth <= 4
DofMap Constraints
Number of DoF Constraints = 100
Average DoF Constraint Length= 0
Number of Node Constraints = 0
Mesh Information:
mesh_dimension()=2
spatial_dimension()=3
n_nodes()=676
n_local_nodes()=354
n_elem()=625
n_local_elem()=313
n_active_elem()=625
n_subdomains()=1
n_partitions()=2
n_processors()=2
n_threads()=1
processor_id()=0
Initializing training parameters with deterministic training set...
Parameter x_vel: log scaling = 0
Parameter y_vel: log scaling = 0
RBConstruction parameters:
system name: RBConvectionDiffusion
constrained_problem: 0
Nmax: 20
Aq operators attached: 3
Fq functions attached: 1
n_outputs: 4
output 0, Q_l = 1
output 1, Q_l = 1
output 2, Q_l = 1
output 3, Q_l = 1
Number of parameters: 2
Parameter x_vel: Min = -2, Max = 2, value = 1
Parameter y_vel: Min = -2, Max = 2, value = 1
n_training_samples: 100
single-matrix mode? 0
reuse preconditioner? 1
use a relative error bound in greedy? 1
write out data during basis training? 0
quiet mode? 1
TransientRBConstruction parameters:
Q_m: 1
Number of time-steps: 100
dt: 0.01
euler_theta (time discretization parameter): 1
delta_N (number of basis functions to add each POD-Greedy step): 1
Using zero initial condition
Compute output dual inner products
output_dual_innerprods[0][0] = 33.6538
output_dual_innerprods[1][0] = 33.6538
output_dual_innerprods[2][0] = 33.6538
output_dual_innerprods[3][0] = 33.6538
---- Performing Greedy basis enrichment ----
---- Basis dimension: 0 ----
Performing RB solves on training set
Maximum (relative) error bound is inf
Performing truth solve at parameter:
x_vel: -2.000000e+00
y_vel: -2.000000e+00
Enriching the RB space
POD Eigenvalues:
eigenvalue 0 = 2.19193
eigenvalue 1 = 0.0116709
eigenvalue 2 = 0.00106473
...
last eigenvalue = -2.04984e-16
Updating RB matrices
Updating RB residual terms
Updating RB initial conditions
---- Basis dimension: 1 ----
Performing RB solves on training set
Maximum (relative) error bound is 6.81811
Performing truth solve at parameter:
x_vel: 2.000000e+00
y_vel: 2.000000e+00
Enriching the RB space
POD Eigenvalues:
eigenvalue 0 = 2.14617
eigenvalue 1 = 0.0106903
eigenvalue 2 = 0.000908038
...
last eigenvalue = -4.5872e-16
Updating RB matrices
Updating RB residual terms
Updating RB initial conditions
---- Basis dimension: 2 ----
Performing RB solves on training set
Maximum (relative) error bound is 3.15659
Performing truth solve at parameter:
x_vel: -2.000000e+00
y_vel: 2.000000e+00
Enriching the RB space
POD Eigenvalues:
eigenvalue 0 = 1.6024
eigenvalue 1 = 0.00595727
eigenvalue 2 = 0.000820407
...
last eigenvalue = -7.08102e-17
Updating RB matrices
Updating RB residual terms
Updating RB initial conditions
---- Basis dimension: 3 ----
Performing RB solves on training set
Maximum (relative) error bound is 3.95604
Performing truth solve at parameter:
x_vel: 2.000000e+00
y_vel: -2.000000e+00
Enriching the RB space
POD Eigenvalues:
eigenvalue 0 = 1.55558
eigenvalue 1 = 0.0058283
eigenvalue 2 = 0.00079201
...
last eigenvalue = -1.90415e-16
Updating RB matrices
Updating RB residual terms
Updating RB initial conditions
---- Basis dimension: 4 ----
Performing RB solves on training set
Maximum (relative) error bound is 1.3165
Performing truth solve at parameter:
x_vel: 2.222222e-01
y_vel: -2.222222e-01
Enriching the RB space
POD Eigenvalues:
eigenvalue 0 = 1.02449
eigenvalue 1 = 0.0148056
eigenvalue 2 = 0.000593638
...
last eigenvalue = -2.15693e-16
Updating RB matrices
Updating RB residual terms
Updating RB initial conditions
---- Basis dimension: 5 ----
Performing RB solves on training set
Maximum (relative) error bound is 1.05097
Performing truth solve at parameter:
x_vel: -2.222222e-01
y_vel: -2.000000e+00
Enriching the RB space
POD Eigenvalues:
eigenvalue 0 = 0.365693
eigenvalue 1 = 0.00367148
eigenvalue 2 = 0.000675947
...
last eigenvalue = -3.46945e-18
Updating RB matrices
Updating RB residual terms
Updating RB initial conditions
---- Basis dimension: 6 ----
Performing RB solves on training set
Maximum (relative) error bound is 1.0171
Performing truth solve at parameter:
x_vel: -2.000000e+00
y_vel: -2.222222e-01
Enriching the RB space
POD Eigenvalues:
eigenvalue 0 = 0.285715
eigenvalue 1 = 0.00305589
eigenvalue 2 = 0.000804404
...
last eigenvalue = -3.24774e-18
Updating RB matrices
Updating RB residual terms
Updating RB initial conditions
---- Basis dimension: 7 ----
Performing RB solves on training set
Maximum (relative) error bound is 1.14302
Performing truth solve at parameter:
x_vel: 2.000000e+00
y_vel: -2.222222e-01
Enriching the RB space
POD Eigenvalues:
eigenvalue 0 = 0.256366
eigenvalue 1 = 0.00345875
eigenvalue 2 = 0.000587394
...
last eigenvalue = -1.12757e-17
Updating RB matrices
Updating RB residual terms
Updating RB initial conditions
---- Basis dimension: 8 ----
Performing RB solves on training set
Maximum (relative) error bound is 1.21494
Performing truth solve at parameter:
x_vel: -2.222222e-01
y_vel: 2.000000e+00
Enriching the RB space
POD Eigenvalues:
eigenvalue 0 = 0.260659
eigenvalue 1 = 0.00277678
eigenvalue 2 = 0.000773864
...
last eigenvalue = -4.16334e-17
Updating RB matrices
Updating RB residual terms
Updating RB initial conditions
---- Basis dimension: 9 ----
Performing RB solves on training set
Maximum (relative) error bound is 0.43786
Performing truth solve at parameter:
x_vel: 1.111111e+00
y_vel: 1.111111e+00
Enriching the RB space
POD Eigenvalues:
eigenvalue 0 = 0.0933151
eigenvalue 1 = 0.00402501
eigenvalue 2 = 0.00138016
...
last eigenvalue = -9.17712e-18
Updating RB matrices
Updating RB residual terms
Updating RB initial conditions
---- Basis dimension: 10 ----
Performing RB solves on training set
Maximum (relative) error bound is 0.433787
Performing truth solve at parameter:
x_vel: -1.111111e+00
y_vel: 6.666667e-01
Enriching the RB space
POD Eigenvalues:
eigenvalue 0 = 0.134913
eigenvalue 1 = 0.00436435
eigenvalue 2 = 0.00137045
...
last eigenvalue = -9.41969e-18
Updating RB matrices
Updating RB residual terms
Updating RB initial conditions
---- Basis dimension: 11 ----
Performing RB solves on training set
Maximum (relative) error bound is 0.319326
Performing truth solve at parameter:
x_vel: -1.111111e+00
y_vel: -1.111111e+00
Enriching the RB space
POD Eigenvalues:
eigenvalue 0 = 0.0629291
eigenvalue 1 = 0.00315345
eigenvalue 2 = 0.00120446
...
last eigenvalue = -2.1684e-18
Updating RB matrices
Updating RB residual terms
Updating RB initial conditions
---- Basis dimension: 12 ----
Performing RB solves on training set
Maximum (relative) error bound is 0.287715
Performing truth solve at parameter:
x_vel: 1.111111e+00
y_vel: -1.555556e+00
Enriching the RB space
POD Eigenvalues:
eigenvalue 0 = 0.0278364
eigenvalue 1 = 0.00277066
eigenvalue 2 = 0.000760673
...
last eigenvalue = -1.35153e-18
Updating RB matrices
Updating RB residual terms
Updating RB initial conditions
---- Basis dimension: 13 ----
Performing RB solves on training set
Maximum (relative) error bound is 0.228017
Performing truth solve at parameter:
x_vel: -2.000000e+00
y_vel: 1.111111e+00
Enriching the RB space
POD Eigenvalues:
eigenvalue 0 = 0.00863589
eigenvalue 1 = 0.00176423
eigenvalue 2 = 0.000525486
...
last eigenvalue = -4.79885e-19
Updating RB matrices
Updating RB residual terms
Updating RB initial conditions
---- Basis dimension: 14 ----
Performing RB solves on training set
Maximum (relative) error bound is 0.227118
Performing truth solve at parameter:
x_vel: 1.111111e+00
y_vel: 2.000000e+00
Enriching the RB space
POD Eigenvalues:
eigenvalue 0 = 0.0117352
eigenvalue 1 = 0.00206357
eigenvalue 2 = 0.000546886
...
last eigenvalue = -1.87851e-18
Updating RB matrices
Updating RB residual terms
Updating RB initial conditions
---- Basis dimension: 15 ----
Performing RB solves on training set
Maximum (relative) error bound is 0.231987
Performing truth solve at parameter:
x_vel: 2.000000e+00
y_vel: 6.666667e-01
Enriching the RB space
POD Eigenvalues:
eigenvalue 0 = 0.00898933
eigenvalue 1 = 0.0022199
eigenvalue 2 = 0.000417343
...
last eigenvalue = -4.46554e-19
Updating RB matrices
Updating RB residual terms
Updating RB initial conditions
---- Basis dimension: 16 ----
Performing RB solves on training set
Maximum (relative) error bound is 0.199807
Performing truth solve at parameter:
x_vel: 6.666667e-01
y_vel: -2.000000e+00
Enriching the RB space
POD Eigenvalues:
eigenvalue 0 = 0.0065467
eigenvalue 1 = 0.00176326
eigenvalue 2 = 0.000231436
...
last eigenvalue = -5.69206e-19
Updating RB matrices
Updating RB residual terms
Updating RB initial conditions
---- Basis dimension: 17 ----
Performing RB solves on training set
Maximum (relative) error bound is 0.193551
Performing truth solve at parameter:
x_vel: 2.000000e+00
y_vel: -1.111111e+00
Enriching the RB space
POD Eigenvalues:
eigenvalue 0 = 0.00390004
eigenvalue 1 = 0.00190681
eigenvalue 2 = 0.000456244
...
last eigenvalue = -1.35696e-18
Updating RB matrices
Updating RB residual terms
Updating RB initial conditions
---- Basis dimension: 18 ----
Performing RB solves on training set
Maximum (relative) error bound is 0.181413
Performing truth solve at parameter:
x_vel: -2.000000e+00
y_vel: -1.111111e+00
Enriching the RB space
POD Eigenvalues:
eigenvalue 0 = 0.00391101
eigenvalue 1 = 0.00171004
eigenvalue 2 = 0.000346167
...
last eigenvalue = -9.62489e-20
Updating RB matrices
Updating RB residual terms
Updating RB initial conditions
---- Basis dimension: 19 ----
Performing RB solves on training set
Maximum (relative) error bound is 0.174248
Performing truth solve at parameter:
x_vel: -2.222222e-01
y_vel: 1.111111e+00
Enriching the RB space
POD Eigenvalues:
eigenvalue 0 = 0.0287899
eigenvalue 1 = 0.00198823
eigenvalue 2 = 0.000531507
...
last eigenvalue = -3.91067e-18
Updating RB matrices
Updating RB residual terms
Updating RB initial conditions
---- Basis dimension: 20 ----
Performing RB solves on training set
Maximum (relative) error bound is 0.145574
Maximum number of basis functions reached: Nmax = 20
************************************************************************************************************************
*** WIDEN YOUR WINDOW TO 120 CHARACTERS. Use 'enscript -r -fCourier9' to print this document ***
************************************************************************************************************************
---------------------------------------------- PETSc Performance Summary: ----------------------------------------------
/workspace/libmesh/examples/reduced_basis/reduced_basis_ex3/.libs/lt-example-devel on a intel-12. named hbar.ices.utexas.edu with 2 processors, by benkirk Tue Feb 5 13:47:25 2013
Using Petsc Release Version 3.3.0, Patch 2, Fri Jul 13 15:42:00 CDT 2012
Max Max/Min Avg Total
Time (sec): 9.592e+00 1.00000 9.592e+00
Objects: 7.293e+04 1.00000 7.293e+04
Flops: 8.934e+08 1.12114 8.451e+08 1.690e+09
Flops/sec: 9.314e+07 1.12114 8.810e+07 1.762e+08
MPI Messages: 6.148e+04 1.00000 6.148e+04 1.230e+05
MPI Message Lengths: 1.322e+07 1.00000 2.150e+02 2.643e+07
MPI Reductions: 4.155e+05 1.00000
Flop counting convention: 1 flop = 1 real number operation of type (multiply/divide/add/subtract)
e.g., VecAXPY() for real vectors of length N --> 2N flops
and VecAXPY() for complex vectors of length N --> 8N flops
Summary of Stages: ----- Time ------ ----- Flops ----- --- Messages --- -- Message Lengths -- -- Reductions --
Avg %Total Avg %Total counts %Total Avg %Total counts %Total
0: Main Stage: 9.5920e+00 100.0% 1.6902e+09 100.0% 1.230e+05 100.0% 2.150e+02 100.0% 4.155e+05 100.0%
------------------------------------------------------------------------------------------------------------------------
See the 'Profiling' chapter of the users' manual for details on interpreting output.
Phase summary info:
Count: number of times phase was executed
Time and Flops: Max - maximum over all processors
Ratio - ratio of maximum to minimum over all processors
Mess: number of messages sent
Avg. len: average message length
Reduct: number of global reductions
Global: entire computation
Stage: stages of a computation. Set stages with PetscLogStagePush() and PetscLogStagePop().
%T - percent time in this phase %f - percent flops in this phase
%M - percent messages in this phase %L - percent message lengths in this phase
%R - percent reductions in this phase
Total Mflop/s: 10e-6 * (sum of flops over all processors)/(max time over all processors)
------------------------------------------------------------------------------------------------------------------------
Event Count Time (sec) Flops --- Global --- --- Stage --- Total
Max Ratio Max Ratio Max Ratio Mess Avg len Reduct %T %f %M %L %R %T %f %M %L %R Mflop/s
------------------------------------------------------------------------------------------------------------------------
--- Event Stage 0: Main Stage
VecDot 138405 1.0 1.9198e-01 1.1 9.79e+07 1.1 0.0e+00 0.0e+00 1.4e+05 2 11 0 0 33 2 11 0 0 33 973
VecMDot 19152 1.0 8.9699e-02 1.4 8.10e+07 1.1 0.0e+00 0.0e+00 1.9e+04 1 9 0 0 5 1 9 0 0 5 1724
VecNorm 23322 1.0 5.3804e-02 1.1 1.65e+07 1.1 0.0e+00 0.0e+00 2.3e+04 1 2 0 0 6 1 2 0 0 6 586
VecScale 29317 1.0 1.0624e-02 1.0 9.67e+06 1.1 0.0e+00 0.0e+00 0.0e+00 0 1 0 0 0 0 1 0 0 0 1738
VecCopy 10214 1.0 4.8056e-03 1.0 0.00e+00 0.0 0.0e+00 0.0e+00 0.0e+00 0 0 0 0 0 0 0 0 0 0 0
VecSet 67792 1.0 1.8877e-02 1.0 0.00e+00 0.0 0.0e+00 0.0e+00 0.0e+00 0 0 0 0 0 0 0 0 0 0 0
VecAXPY 37635 1.0 1.5563e-02 1.0 2.63e+07 1.1 0.0e+00 0.0e+00 0.0e+00 0 3 0 0 0 0 3 0 0 0 3233
VecMAXPY 21237 1.0 2.8751e-02 1.1 9.46e+07 1.1 0.0e+00 0.0e+00 0.0e+00 0 11 0 0 0 0 11 0 0 0 6286
VecAssemblyBegin 14344 1.0 8.0609e-02 1.0 0.00e+00 0.0 1.0e+01 3.7e+02 4.3e+04 1 0 0 0 10 1 0 0 0 10 0
VecAssemblyEnd 14344 1.0 4.7829e-03 1.0 0.00e+00 0.0 0.0e+00 0.0e+00 0.0e+00 0 0 0 0 0 0 0 0 0 0 0
VecScatterBegin 45356 1.0 5.0679e-02 1.0 0.00e+00 0.0 9.1e+04 2.7e+02 0.0e+00 1 0 74 92 0 1 0 74 92 0 0
VecScatterEnd 45356 1.0 4.3029e-02 1.0 0.00e+00 0.0 0.0e+00 0.0e+00 0.0e+00 0 0 0 0 0 0 0 0 0 0 0
VecNormalize 21237 1.0 6.0364e-02 1.1 2.26e+07 1.1 0.0e+00 0.0e+00 2.1e+04 1 3 0 0 5 1 3 0 0 5 713
MatMult 21237 1.0 1.3616e-01 1.1 1.21e+08 1.1 4.2e+04 2.6e+02 0.0e+00 1 14 35 41 0 1 14 35 41 0 1696
MatMultAdd 19994 1.0 1.3141e-01 1.1 1.21e+08 1.1 4.0e+04 2.6e+02 0.0e+00 1 14 33 39 0 1 14 33 39 0 1758
MatSolve 23322 1.0 1.5447e-01 1.1 3.22e+08 1.2 0.0e+00 0.0e+00 0.0e+00 2 36 0 0 0 2 36 0 0 0 3891
MatLUFactorNum 42 1.0 4.1273e-03 1.2 2.60e+06 1.2 0.0e+00 0.0e+00 0.0e+00 0 0 0 0 0 0 0 0 0 0 1142
MatILUFactorSym 42 1.0 9.0315e-03 1.2 0.00e+00 0.0 0.0e+00 0.0e+00 1.3e+02 0 0 0 0 0 0 0 0 0 0 0
MatAssemblyBegin 36214 1.0 2.8633e-01 1.5 0.00e+00 0.0 1.8e+01 1.3e+03 7.2e+04 2 0 0 0 17 2 0 0 0 17 0
MatAssemblyEnd 36214 1.0 7.3855e-01 1.0 0.00e+00 0.0 3.2e+04 6.6e+01 6.4e+04 8 0 26 8 15 8 0 26 8 15 0
MatGetRow 5693736 1.1 4.6851e-01 1.1 0.00e+00 0.0 0.0e+00 0.0e+00 0.0e+00 5 0 0 0 0 5 0 0 0 0 0
MatGetRowIJ 42 1.0 7.1526e-06 1.2 0.00e+00 0.0 0.0e+00 0.0e+00 0.0e+00 0 0 0 0 0 0 0 0 0 0 0
MatGetOrdering 42 1.0 8.2779e-04 1.0 0.00e+00 0.0 0.0e+00 0.0e+00 8.4e+01 0 0 0 0 0 0 0 0 0 0 0
MatZeroEntries 2062 1.0 2.4259e-03 1.1 0.00e+00 0.0 0.0e+00 0.0e+00 0.0e+00 0 0 0 0 0 0 0 0 0 0 0
MatAXPY 8042 1.0 3.6987e+00 1.0 0.00e+00 0.0 3.2e+04 6.6e+01 1.3e+05 39 0 26 8 31 39 0 26 8 31 0
KSPGMRESOrthog 19152 1.0 1.2193e-01 1.2 1.62e+08 1.1 0.0e+00 0.0e+00 1.9e+04 1 18 0 0 5 1 18 0 0 5 2538
KSPSetUp 2127 1.0 4.3910e-03 1.0 0.00e+00 0.0 0.0e+00 0.0e+00 0.0e+00 0 0 0 0 0 0 0 0 0 0 0
KSPSolve 2085 1.0 6.4861e-01 1.0 6.49e+08 1.1 4.2e+04 2.6e+02 4.3e+04 7 72 35 41 10 7 72 35 41 10 1886
PCSetUp 84 1.0 1.8326e-02 1.1 2.60e+06 1.2 0.0e+00 0.0e+00 2.1e+02 0 0 0 0 0 0 0 0 0 0 257
PCSetUpOnBlocks 2085 1.0 1.7525e-02 1.1 2.60e+06 1.2 0.0e+00 0.0e+00 2.1e+02 0 0 0 0 0 0 0 0 0 0 269
PCApply 23322 1.0 2.7042e-01 1.1 3.22e+08 1.2 0.0e+00 0.0e+00 0.0e+00 3 36 0 0 0 3 36 0 0 0 2223
------------------------------------------------------------------------------------------------------------------------
Memory usage is given in bytes:
Object Type Creations Destructions Memory Descendants' Mem.
Reports information only for process 0.
--- Event Stage 0: Main Stage
Vector 24441 24441 61712408 0
Vector Scatter 8054 8054 8343944 0
Index Set 16234 16234 13103920 0
IS L to G Mapping 5 5 2820 0
Matrix 24189 24189 449505780 0
Krylov Solver 2 2 19360 0
Preconditioner 2 2 1784 0
Viewer 1 0 0 0
========================================================================================================================
Average time to get PetscTime(): 0
Average time for MPI_Barrier(): 6.19888e-07
Average time for zero size MPI_Send(): 1.29938e-05
#PETSc Option Table entries:
-ksp_right_pc
-log_summary
-online_mode 0
-pc_type bjacobi
-sub_pc_factor_levels 4
-sub_pc_factor_zeropivot 0
-sub_pc_type ilu
#End of PETSc Option Table entries
Compiled without FORTRAN kernels
Compiled with full precision matrices (default)
sizeof(short) 2 sizeof(int) 4 sizeof(long) 8 sizeof(void*) 8 sizeof(PetscScalar) 8 sizeof(PetscInt) 4
Configure run at: Thu Nov 8 11:21:02 2012
Configure options: --with-debugging=false --COPTFLAGS=-O3 --CXXOPTFLAGS=-O3 --FOPTFLAGS=-O3 --with-clanguage=C++ --with-shared-libraries=1 --with-mpi-dir=/opt/apps/ossw/libraries/mpich2/mpich2-1.4.1p1/sl6/intel-12.1 --with-mumps=true --download-mumps=1 --with-metis=true --download-metis=1 --with-parmetis=true --download-parmetis=1 --with-superlu=true --download-superlu=1 --with-superludir=true --download-superlu_dist=1 --with-blacs=true --download-blacs=1 --with-scalapack=true --download-scalapack=1 --with-hypre=true --download-hypre=1 --with-blas-lib="[/opt/apps/sysnet/intel/12.1/mkl/10.3.12.361/lib/intel64/libmkl_intel_lp64.so,/opt/apps/sysnet/intel/12.1/mkl/10.3.12.361/lib/intel64/libmkl_sequential.so,/opt/apps/sysnet/intel/12.1/mkl/10.3.12.361/lib/intel64/libmkl_core.so]" --with-lapack-lib="[/opt/apps/sysnet/intel/12.1/mkl/10.3.12.361/lib/intel64/libmkl_lapack95_lp64.a]"
-----------------------------------------
Libraries compiled on Thu Nov 8 11:21:02 2012 on daedalus.ices.utexas.edu
Machine characteristics: Linux-2.6.32-279.1.1.el6.x86_64-x86_64-with-redhat-6.3-Carbon
Using PETSc directory: /opt/apps/ossw/libraries/petsc/petsc-3.3-p2
Using PETSc arch: intel-12.1-mkl-intel-10.3.12.361-mpich2-1.4.1p1-cxx-opt
-----------------------------------------
Using C compiler: /opt/apps/ossw/libraries/mpich2/mpich2-1.4.1p1/sl6/intel-12.1/bin/mpicxx -wd1572 -O3 -fPIC ${COPTFLAGS} ${CFLAGS}
Using Fortran compiler: /opt/apps/ossw/libraries/mpich2/mpich2-1.4.1p1/sl6/intel-12.1/bin/mpif90 -fPIC -O3 ${FOPTFLAGS} ${FFLAGS}
-----------------------------------------
Using include paths: -I/opt/apps/ossw/libraries/petsc/petsc-3.3-p2/intel-12.1-mkl-intel-10.3.12.361-mpich2-1.4.1p1-cxx-opt/include -I/opt/apps/ossw/libraries/petsc/petsc-3.3-p2/include -I/opt/apps/ossw/libraries/petsc/petsc-3.3-p2/include -I/opt/apps/ossw/libraries/petsc/petsc-3.3-p2/intel-12.1-mkl-intel-10.3.12.361-mpich2-1.4.1p1-cxx-opt/include -I/opt/apps/ossw/libraries/mpich2/mpich2-1.4.1p1/sl6/intel-12.1/include
-----------------------------------------
Using C linker: /opt/apps/ossw/libraries/mpich2/mpich2-1.4.1p1/sl6/intel-12.1/bin/mpicxx
Using Fortran linker: /opt/apps/ossw/libraries/mpich2/mpich2-1.4.1p1/sl6/intel-12.1/bin/mpif90
Using libraries: -Wl,-rpath,/opt/apps/ossw/libraries/petsc/petsc-3.3-p2/intel-12.1-mkl-intel-10.3.12.361-mpich2-1.4.1p1-cxx-opt/lib -L/opt/apps/ossw/libraries/petsc/petsc-3.3-p2/intel-12.1-mkl-intel-10.3.12.361-mpich2-1.4.1p1-cxx-opt/lib -lpetsc -lX11 -Wl,-rpath,/opt/apps/ossw/libraries/petsc/petsc-3.3-p2/intel-12.1-mkl-intel-10.3.12.361-mpich2-1.4.1p1-cxx-opt/lib -L/opt/apps/ossw/libraries/petsc/petsc-3.3-p2/intel-12.1-mkl-intel-10.3.12.361-mpich2-1.4.1p1-cxx-opt/lib -lcmumps -ldmumps -lsmumps -lzmumps -lmumps_common -lpord -lHYPRE -lpthread -lsuperlu_dist_3.0 -lparmetis -lmetis -lscalapack -lblacs -lsuperlu_4.3 -Wl,-rpath,/opt/apps/sysnet/intel/12.1/mkl/10.3.12.361/lib/intel64 -L/opt/apps/sysnet/intel/12.1/mkl/10.3.12.361/lib/intel64 -lmkl_lapack95_lp64 -lmkl_intel_lp64 -lmkl_sequential -lmkl_core -Wl,-rpath,/opt/apps/ossw/libraries/mpich2/mpich2-1.4.1p1/sl6/intel-12.1/lib -L/opt/apps/ossw/libraries/mpich2/mpich2-1.4.1p1/sl6/intel-12.1/lib -Wl,-rpath,/opt/apps/sysnet/intel/12.1/composer_xe_2011_sp1.7.256/compiler/lib/intel64 -L/opt/apps/sysnet/intel/12.1/composer_xe_2011_sp1.7.256/compiler/lib/intel64 -Wl,-rpath,/usr/lib/gcc/x86_64-redhat-linux/4.4.6 -L/usr/lib/gcc/x86_64-redhat-linux/4.4.6 -lmpichf90 -lifport -lifcore -lm -lm -lmpichcxx -ldl -lmpich -lopa -lmpl -lrt -lpthread -limf -lsvml -lipgo -ldecimal -lcilkrts -lstdc++ -lgcc_s -lirc -lirc_s -ldl
-----------------------------------------
----------------------------------------------------------------------------------------------------------------------
| Processor id: 0 |
| Num Processors: 2 |
| Time: Tue Feb 5 13:47:25 2013 |
| OS: Linux |
| HostName: hbar.ices.utexas.edu |
| OS Release: 2.6.32-279.1.1.el6.x86_64 |
| OS Version: #1 SMP Tue Jul 10 11:24:23 CDT 2012 |
| Machine: x86_64 |
| Username: benkirk |
| Configuration: ./configure '--enable-everything' |
| '--prefix=/workspace/libmesh/install' |
| 'CXX=mpicxx' |
| 'CC=mpicc' |
| 'F77=mpif77' |
| 'FC=mpif90' |
| 'PETSC_DIR=/opt/apps/ossw/libraries/petsc/petsc-3.3-p2' |
| 'PETSC_ARCH=intel-12.1-mkl-intel-10.3.12.361-mpich2-1.4.1p1-cxx-opt' |
| 'SLEPC_DIR=/opt/apps/ossw/libraries/slepc/slepc-3.3-p2-petsc-3.3-p2-cxx-opt' |
| 'TRILINOS_DIR=/opt/apps/ossw/libraries/trilinos/trilinos-10.12.2/sl6/intel-12.1/mpich2-1.4.1p1/mkl-intel-10.3.12.361'|
| 'VTK_DIR=/opt/apps/ossw/libraries/vtk/vtk-5.10.0/sl6/intel-12.1' |
----------------------------------------------------------------------------------------------------------------------
-------------------------------------------------------------------------------------------------------------------
| libMesh Performance: Alive time=9.60086, Active time=9.55216 |
-------------------------------------------------------------------------------------------------------------------
| Event nCalls Total Time Avg Time Total Time Avg Time % of Active Time |
| w/o Sub w/o Sub With Sub With Sub w/o S With S |
|-------------------------------------------------------------------------------------------------------------------|
| |
| |
| DofMap |
| add_neighbors_to_send_list() 1 0.0072 0.007151 0.0089 0.008866 0.07 0.09 |
| build_constraint_matrix() 3443 0.0159 0.000005 0.0159 0.000005 0.17 0.17 |
| build_sparsity() 1 0.0049 0.004923 0.0141 0.014092 0.05 0.15 |
| cnstrn_elem_mat_vec() 3443 0.0247 0.000007 0.0247 0.000007 0.26 0.26 |
| create_dof_constraints() 1 0.0063 0.006256 0.0234 0.023363 0.07 0.24 |
| distribute_dofs() 1 0.0068 0.006816 0.0182 0.018196 0.07 0.19 |
| dof_indices() 7886 0.2386 0.000030 0.2386 0.000030 2.50 2.50 |
| prepare_send_list() 1 0.0000 0.000033 0.0000 0.000033 0.00 0.00 |
| reinit() 1 0.0111 0.011094 0.0111 0.011094 0.12 0.12 |
| |
| FE |
| compute_shape_functions() 7964 0.1093 0.000014 0.1093 0.000014 1.14 1.14 |
| init_shape_functions() 1100 0.0149 0.000014 0.0149 0.000014 0.16 0.16 |
| inverse_map() 2156 0.0111 0.000005 0.0111 0.000005 0.12 0.12 |
| |
| FEMap |
| compute_affine_map() 7964 0.0428 0.000005 0.0428 0.000005 0.45 0.45 |
| compute_face_map() 1078 0.0109 0.000010 0.0222 0.000021 0.11 0.23 |
| init_face_shape_functions() 22 0.0002 0.000009 0.0002 0.000009 0.00 0.00 |
| init_reference_to_physical_map() 1100 0.0087 0.000008 0.0087 0.000008 0.09 0.09 |
| |
| Mesh |
| find_neighbors() 1 0.0075 0.007506 0.0076 0.007559 0.08 0.08 |
| renumber_nodes_and_elem() 2 0.0003 0.000152 0.0003 0.000152 0.00 0.00 |
| |
| MeshCommunication |
| assign_global_indices() 1 0.0165 0.016504 0.0169 0.016905 0.17 0.18 |
| compute_hilbert_indices() 2 0.0057 0.002849 0.0057 0.002849 0.06 0.06 |
| find_global_indices() 2 0.0021 0.001064 0.0091 0.004543 0.02 0.10 |
| parallel_sort() 2 0.0009 0.000431 0.0009 0.000473 0.01 0.01 |
| |
| MeshTools::Generation |
| build_cube() 1 0.0016 0.001586 0.0016 0.001586 0.02 0.02 |
| |
| MetisPartitioner |
| partition() 1 0.0064 0.006357 0.0106 0.010588 0.07 0.11 |
| |
| Parallel |
| allgather() 14 0.0003 0.000018 0.0003 0.000020 0.00 0.00 |
| barrier() 1 0.0000 0.000015 0.0000 0.000015 0.00 0.00 |
| broadcast() 44 0.0002 0.000003 0.0002 0.000003 0.00 0.00 |
| max(bool) 7 0.0000 0.000003 0.0000 0.000003 0.00 0.00 |
| max(scalar) 127 0.0003 0.000003 0.0003 0.000003 0.00 0.00 |
| max(vector) 30 0.0002 0.000006 0.0005 0.000015 0.00 0.00 |
| maxloc(scalar) 21 0.0006 0.000027 0.0006 0.000027 0.01 0.01 |
| min(bool) 148 0.0005 0.000003 0.0005 0.000003 0.00 0.00 |
| min(scalar) 121 0.0012 0.000010 0.0012 0.000010 0.01 0.01 |
| min(vector) 30 0.0002 0.000008 0.0005 0.000017 0.00 0.01 |
| probe() 32 0.0002 0.000007 0.0002 0.000007 0.00 0.00 |
| receive() 24 0.0002 0.000010 0.0004 0.000017 0.00 0.00 |
| send() 20 0.0001 0.000004 0.0001 0.000004 0.00 0.00 |
| send_receive() 24 0.0003 0.000011 0.0007 0.000028 0.00 0.01 |
| sum() 44 0.0003 0.000007 0.0004 0.000009 0.00 0.00 |
| |
| Parallel::Request |
| wait() 20 0.0000 0.000002 0.0000 0.000002 0.00 0.00 |
| |
| Partitioner |
| set_node_processor_ids() 1 0.0008 0.000789 0.0009 0.000902 0.01 0.01 |
| set_parent_processor_ids() 1 0.0005 0.000499 0.0005 0.000499 0.01 0.01 |
| |
| PetscLinearSolver |
| solve() 2085 0.7470 0.000358 0.7470 0.000358 7.82 7.82 |
| |
| RBConstruction |
| add_scaled_matrix_and_vector() 11 0.1102 0.010015 0.5651 0.051371 1.15 5.92 |
| clear() 3 0.0007 0.000233 0.0007 0.000233 0.01 0.01 |
| compute_Fq_representor_innerprods() 1 0.0006 0.000577 0.0019 0.001882 0.01 0.02 |
| compute_max_error_bound() 21 0.0226 0.001078 3.0985 0.147550 0.24 32.44 |
| compute_output_dual_innerprods() 1 0.0018 0.001819 0.0050 0.004978 0.02 0.05 |
| train_reduced_basis() 1 0.0023 0.002297 8.8167 8.816660 0.02 92.30 |
| update_RB_system_matrices() 20 0.0341 0.001703 0.0341 0.001703 0.36 0.36 |
| update_residual_terms() 20 0.0676 0.003381 0.1289 0.006444 0.71 1.35 |
| |
| RBEvaluation |
| clear() 2 0.0003 0.000145 0.0003 0.000145 0.00 0.00 |
| resize_data_structures() 1 0.0003 0.000307 0.0003 0.000307 0.00 0.00 |
| write_offline_data_to_files() 1 0.0012 0.001169 0.0012 0.001169 0.01 0.01 |
| write_out_basis_functions() 1 0.0000 0.000033 0.0980 0.097952 0.00 1.03 |
| write_out_vectors() 1 0.0808 0.080752 0.0979 0.097919 0.85 1.03 |
| |
| TransientRBConstruction |
| enrich_RB_space() 20 0.2423 0.012114 0.2423 0.012114 2.54 2.54 |
| mass_matrix_scaled_matvec() 2000 0.1068 0.000053 0.1068 0.000053 1.12 1.12 |
| set_error_temporal_data() 2020 0.2189 0.000108 0.2189 0.000108 2.29 2.29 |
| truth_assembly() 2000 4.0563 0.002028 4.1636 0.002082 42.46 43.59 |
| truth_solve() 20 0.1748 0.008738 5.2224 0.261118 1.83 54.67 |
| update_RB_initial_condition_all_N() 20 0.0063 0.000315 0.0063 0.000315 0.07 0.07 |
| update_RB_system_matrices() 20 0.0106 0.000528 0.0446 0.002232 0.11 0.47 |
| update_residual_terms() 20 0.0464 0.002320 0.1929 0.009644 0.49 2.02 |
| |
| TransientRBEvaluation |
| cache_online_residual_terms() 1050 0.0442 0.000042 0.0442 0.000042 0.46 0.46 |
| compute_residual_dual_norm() 105000 1.7132 0.000016 1.7132 0.000016 17.94 17.94 |
| rb_solve() 1050 1.3024 0.001240 3.0749 0.002928 13.63 32.19 |
| resize_data_structures() 1 0.0002 0.000151 0.0005 0.000458 0.00 0.00 |
| write_offline_data_to_files() 1 0.0003 0.000329 0.0015 0.001498 0.00 0.02 |
-------------------------------------------------------------------------------------------------------------------
| Totals: 152274 9.5522 100.00 |
-------------------------------------------------------------------------------------------------------------------
***************************************************************
* Done Running Example reduced_basis_ex3:
* mpirun -np 2 example-devel -online_mode 0 -pc_type bjacobi -sub_pc_type ilu -sub_pc_factor_levels 4 -sub_pc_factor_zeropivot 0 -ksp_right_pc -log_summary
***************************************************************
***************************************************************
* Running Example reduced_basis_ex3:
* mpirun -np 2 example-devel -online_mode 1 -pc_type bjacobi -sub_pc_type ilu -sub_pc_factor_levels 4 -sub_pc_factor_zeropivot 0 -ksp_right_pc -log_summary
***************************************************************
*** Warning, This code is untested, experimental, or likely to see future API changes: src/reduced_basis/rb_parametrized.C, line 41, compiled Feb 5 2013 at 13:19:15 ***
EquationSystems
n_systems()=1
System #0, "RBConvectionDiffusion"
Type "TransientRBConstruction"
Variables="u"
Finite Element Types="LAGRANGE", "JACOBI_20_00"
Infinite Element Mapping="CARTESIAN"
Approximation Orders="FIRST", "THIRD"
n_dofs()=676
n_local_dofs()=354
n_constrained_dofs()=100
n_local_constrained_dofs()=50
n_vectors()=3
n_matrices()=1
DofMap Sparsity
Average On-Processor Bandwidth <= 8.44083
Average Off-Processor Bandwidth <= 0.242604
Maximum On-Processor Bandwidth <= 11
Maximum Off-Processor Bandwidth <= 4
DofMap Constraints
Number of DoF Constraints = 100
Average DoF Constraint Length= 0
Number of Node Constraints = 0
Mesh Information:
mesh_dimension()=2
spatial_dimension()=3
n_nodes()=676
n_local_nodes()=354
n_elem()=625
n_local_elem()=313
n_active_elem()=625
n_subdomains()=1
n_partitions()=2
n_processors()=2
n_threads()=1
processor_id()=0
x_vel: 1.000000e+00
y_vel: 1.000000e+00
Error bound (absolute) at the final time is 0.0167619
************************************************************************************************************************
*** WIDEN YOUR WINDOW TO 120 CHARACTERS. Use 'enscript -r -fCourier9' to print this document ***
************************************************************************************************************************
---------------------------------------------- PETSc Performance Summary: ----------------------------------------------
/workspace/libmesh/examples/reduced_basis/reduced_basis_ex3/.libs/lt-example-devel on a intel-12. named hbar.ices.utexas.edu with 2 processors, by benkirk Tue Feb 5 13:47:25 2013
Using Petsc Release Version 3.3.0, Patch 2, Fri Jul 13 15:42:00 CDT 2012
Max Max/Min Avg Total
Time (sec): 2.082e-01 1.00000 2.082e-01
Objects: 5.700e+01 1.00000 5.700e+01
Flops: 1.416e+04 1.09938 1.352e+04 2.704e+04
Flops/sec: 6.801e+04 1.09937 6.494e+04 1.299e+05
MPI Messages: 1.200e+01 1.00000 1.200e+01 2.400e+01
MPI Message Lengths: 1.748e+03 1.00000 1.457e+02 3.496e+03
MPI Reductions: 1.390e+02 1.00000
Flop counting convention: 1 flop = 1 real number operation of type (multiply/divide/add/subtract)
e.g., VecAXPY() for real vectors of length N --> 2N flops
and VecAXPY() for complex vectors of length N --> 8N flops
Summary of Stages: ----- Time ------ ----- Flops ----- --- Messages --- -- Message Lengths -- -- Reductions --
Avg %Total Avg %Total counts %Total Avg %Total counts %Total
0: Main Stage: 2.0816e-01 100.0% 2.7040e+04 100.0% 2.400e+01 100.0% 1.457e+02 100.0% 1.380e+02 99.3%
------------------------------------------------------------------------------------------------------------------------
See the 'Profiling' chapter of the users' manual for details on interpreting output.
Phase summary info:
Count: number of times phase was executed
Time and Flops: Max - maximum over all processors
Ratio - ratio of maximum to minimum over all processors
Mess: number of messages sent
Avg. len: average message length
Reduct: number of global reductions
Global: entire computation
Stage: stages of a computation. Set stages with PetscLogStagePush() and PetscLogStagePop().
%T - percent time in this phase %f - percent flops in this phase
%M - percent messages in this phase %L - percent message lengths in this phase
%R - percent reductions in this phase
Total Mflop/s: 10e-6 * (sum of flops over all processors)/(max time over all processors)
------------------------------------------------------------------------------------------------------------------------
Event Count Time (sec) Flops --- Global --- --- Stage --- Total
Max Ratio Max Ratio Max Ratio Mess Avg len Reduct %T %f %M %L %R %T %f %M %L %R Mflop/s
------------------------------------------------------------------------------------------------------------------------
--- Event Stage 0: Main Stage
VecCopy 3 1.0 3.0994e-06 1.1 0.00e+00 0.0 0.0e+00 0.0e+00 0.0e+00 0 0 0 0 0 0 0 0 0 0 0
VecSet 29 1.0 1.0967e-05 1.4 0.00e+00 0.0 0.0e+00 0.0e+00 0.0e+00 0 0 0 0 0 0 0 0 0 0 0
VecAXPY 20 1.0 1.6212e-05 1.5 1.42e+04 1.1 0.0e+00 0.0e+00 0.0e+00 0100 0 0 0 0100 0 0 0 1668
VecAssemblyBegin 23 1.0 3.7539e-0327.9 0.00e+00 0.0 0.0e+00 0.0e+00 6.9e+01 1 0 0 0 50 1 0 0 0 50 0
VecAssemblyEnd 23 1.0 2.3127e-05 1.2 0.00e+00 0.0 0.0e+00 0.0e+00 0.0e+00 0 0 0 0 0 0 0 0 0 0 0
VecScatterBegin 2 1.0 3.4094e-05 1.0 0.00e+00 0.0 4.0e+00 3.8e+02 0.0e+00 0 0 17 44 0 0 0 17 44 0 0
VecScatterEnd 2 1.0 7.8678e-06 1.3 0.00e+00 0.0 0.0e+00 0.0e+00 0.0e+00 0 0 0 0 0 0 0 0 0 0 0
MatZeroEntries 2 1.0 1.3113e-05 1.9 0.00e+00 0.0 0.0e+00 0.0e+00 0.0e+00 0 0 0 0 0 0 0 0 0 0 0
------------------------------------------------------------------------------------------------------------------------
Memory usage is given in bytes:
Object Type Creations Destructions Memory Descendants' Mem.
Reports information only for process 0.
--- Event Stage 0: Main Stage
Vector 33 33 129624 0
Vector Scatter 5 5 5180 0
Index Set 10 10 8040 0
IS L to G Mapping 5 5 2820 0
Matrix 3 3 53552 0
Viewer 1 0 0 0
========================================================================================================================
Average time to get PetscTime(): 0
Average time for MPI_Barrier(): 1.19209e-06
Average time for zero size MPI_Send(): 1.20401e-05
#PETSc Option Table entries:
-ksp_right_pc
-log_summary
-online_mode 1
-pc_type bjacobi
-sub_pc_factor_levels 4
-sub_pc_factor_zeropivot 0
-sub_pc_type ilu
#End of PETSc Option Table entries
Compiled without FORTRAN kernels
Compiled with full precision matrices (default)
sizeof(short) 2 sizeof(int) 4 sizeof(long) 8 sizeof(void*) 8 sizeof(PetscScalar) 8 sizeof(PetscInt) 4
Configure run at: Thu Nov 8 11:21:02 2012
Configure options: --with-debugging=false --COPTFLAGS=-O3 --CXXOPTFLAGS=-O3 --FOPTFLAGS=-O3 --with-clanguage=C++ --with-shared-libraries=1 --with-mpi-dir=/opt/apps/ossw/libraries/mpich2/mpich2-1.4.1p1/sl6/intel-12.1 --with-mumps=true --download-mumps=1 --with-metis=true --download-metis=1 --with-parmetis=true --download-parmetis=1 --with-superlu=true --download-superlu=1 --with-superludir=true --download-superlu_dist=1 --with-blacs=true --download-blacs=1 --with-scalapack=true --download-scalapack=1 --with-hypre=true --download-hypre=1 --with-blas-lib="[/opt/apps/sysnet/intel/12.1/mkl/10.3.12.361/lib/intel64/libmkl_intel_lp64.so,/opt/apps/sysnet/intel/12.1/mkl/10.3.12.361/lib/intel64/libmkl_sequential.so,/opt/apps/sysnet/intel/12.1/mkl/10.3.12.361/lib/intel64/libmkl_core.so]" --with-lapack-lib="[/opt/apps/sysnet/intel/12.1/mkl/10.3.12.361/lib/intel64/libmkl_lapack95_lp64.a]"
-----------------------------------------
Libraries compiled on Thu Nov 8 11:21:02 2012 on daedalus.ices.utexas.edu
Machine characteristics: Linux-2.6.32-279.1.1.el6.x86_64-x86_64-with-redhat-6.3-Carbon
Using PETSc directory: /opt/apps/ossw/libraries/petsc/petsc-3.3-p2
Using PETSc arch: intel-12.1-mkl-intel-10.3.12.361-mpich2-1.4.1p1-cxx-opt
-----------------------------------------
Using C compiler: /opt/apps/ossw/libraries/mpich2/mpich2-1.4.1p1/sl6/intel-12.1/bin/mpicxx -wd1572 -O3 -fPIC ${COPTFLAGS} ${CFLAGS}
Using Fortran compiler: /opt/apps/ossw/libraries/mpich2/mpich2-1.4.1p1/sl6/intel-12.1/bin/mpif90 -fPIC -O3 ${FOPTFLAGS} ${FFLAGS}
-----------------------------------------
Using include paths: -I/opt/apps/ossw/libraries/petsc/petsc-3.3-p2/intel-12.1-mkl-intel-10.3.12.361-mpich2-1.4.1p1-cxx-opt/include -I/opt/apps/ossw/libraries/petsc/petsc-3.3-p2/include -I/opt/apps/ossw/libraries/petsc/petsc-3.3-p2/include -I/opt/apps/ossw/libraries/petsc/petsc-3.3-p2/intel-12.1-mkl-intel-10.3.12.361-mpich2-1.4.1p1-cxx-opt/include -I/opt/apps/ossw/libraries/mpich2/mpich2-1.4.1p1/sl6/intel-12.1/include
-----------------------------------------
Using C linker: /opt/apps/ossw/libraries/mpich2/mpich2-1.4.1p1/sl6/intel-12.1/bin/mpicxx
Using Fortran linker: /opt/apps/ossw/libraries/mpich2/mpich2-1.4.1p1/sl6/intel-12.1/bin/mpif90
Using libraries: -Wl,-rpath,/opt/apps/ossw/libraries/petsc/petsc-3.3-p2/intel-12.1-mkl-intel-10.3.12.361-mpich2-1.4.1p1-cxx-opt/lib -L/opt/apps/ossw/libraries/petsc/petsc-3.3-p2/intel-12.1-mkl-intel-10.3.12.361-mpich2-1.4.1p1-cxx-opt/lib -lpetsc -lX11 -Wl,-rpath,/opt/apps/ossw/libraries/petsc/petsc-3.3-p2/intel-12.1-mkl-intel-10.3.12.361-mpich2-1.4.1p1-cxx-opt/lib -L/opt/apps/ossw/libraries/petsc/petsc-3.3-p2/intel-12.1-mkl-intel-10.3.12.361-mpich2-1.4.1p1-cxx-opt/lib -lcmumps -ldmumps -lsmumps -lzmumps -lmumps_common -lpord -lHYPRE -lpthread -lsuperlu_dist_3.0 -lparmetis -lmetis -lscalapack -lblacs -lsuperlu_4.3 -Wl,-rpath,/opt/apps/sysnet/intel/12.1/mkl/10.3.12.361/lib/intel64 -L/opt/apps/sysnet/intel/12.1/mkl/10.3.12.361/lib/intel64 -lmkl_lapack95_lp64 -lmkl_intel_lp64 -lmkl_sequential -lmkl_core -Wl,-rpath,/opt/apps/ossw/libraries/mpich2/mpich2-1.4.1p1/sl6/intel-12.1/lib -L/opt/apps/ossw/libraries/mpich2/mpich2-1.4.1p1/sl6/intel-12.1/lib -Wl,-rpath,/opt/apps/sysnet/intel/12.1/composer_xe_2011_sp1.7.256/compiler/lib/intel64 -L/opt/apps/sysnet/intel/12.1/composer_xe_2011_sp1.7.256/compiler/lib/intel64 -Wl,-rpath,/usr/lib/gcc/x86_64-redhat-linux/4.4.6 -L/usr/lib/gcc/x86_64-redhat-linux/4.4.6 -lmpichf90 -lifport -lifcore -lm -lm -lmpichcxx -ldl -lmpich -lopa -lmpl -lrt -lpthread -limf -lsvml -lipgo -ldecimal -lcilkrts -lstdc++ -lgcc_s -lirc -lirc_s -ldl
-----------------------------------------
----------------------------------------------------------------------------------------------------------------------
| Processor id: 0 |
| Num Processors: 2 |
| Time: Tue Feb 5 13:47:25 2013 |
| OS: Linux |
| HostName: hbar.ices.utexas.edu |
| OS Release: 2.6.32-279.1.1.el6.x86_64 |
| OS Version: #1 SMP Tue Jul 10 11:24:23 CDT 2012 |
| Machine: x86_64 |
| Username: benkirk |
| Configuration: ./configure '--enable-everything' |
| '--prefix=/workspace/libmesh/install' |
| 'CXX=mpicxx' |
| 'CC=mpicc' |
| 'F77=mpif77' |
| 'FC=mpif90' |
| 'PETSC_DIR=/opt/apps/ossw/libraries/petsc/petsc-3.3-p2' |
| 'PETSC_ARCH=intel-12.1-mkl-intel-10.3.12.361-mpich2-1.4.1p1-cxx-opt' |
| 'SLEPC_DIR=/opt/apps/ossw/libraries/slepc/slepc-3.3-p2-petsc-3.3-p2-cxx-opt' |
| 'TRILINOS_DIR=/opt/apps/ossw/libraries/trilinos/trilinos-10.12.2/sl6/intel-12.1/mpich2-1.4.1p1/mkl-intel-10.3.12.361'|
| 'VTK_DIR=/opt/apps/ossw/libraries/vtk/vtk-5.10.0/sl6/intel-12.1' |
----------------------------------------------------------------------------------------------------------------------
--------------------------------------------------------------------------------------------------------------
| libMesh Performance: Alive time=0.218097, Active time=0.196257 |
--------------------------------------------------------------------------------------------------------------
| Event nCalls Total Time Avg Time Total Time Avg Time % of Active Time |
| w/o Sub w/o Sub With Sub With Sub w/o S With S |
|--------------------------------------------------------------------------------------------------------------|
| |
| |
| DofMap |
| add_neighbors_to_send_list() 1 0.0073 0.007285 0.0090 0.008973 3.71 4.57 |
| build_sparsity() 1 0.0050 0.004987 0.0141 0.014085 2.54 7.18 |
| create_dof_constraints() 1 0.0063 0.006292 0.0236 0.023557 3.21 12.00 |
| distribute_dofs() 1 0.0068 0.006830 0.0181 0.018090 3.48 9.22 |
| dof_indices() 1626 0.0444 0.000027 0.0444 0.000027 22.62 22.62 |
| prepare_send_list() 1 0.0000 0.000034 0.0000 0.000034 0.02 0.02 |
| reinit() 1 0.0108 0.010822 0.0108 0.010822 5.51 5.51 |
| |
| EquationSystems |
| build_solution_vector() 2 0.0026 0.001300 0.0202 0.010077 1.32 10.27 |
| |
| ExodusII_IO |
| write_nodal_data() 2 0.0067 0.003338 0.0067 0.003338 3.40 3.40 |
| |
| Mesh |
| find_neighbors() 1 0.0072 0.007197 0.0076 0.007572 3.67 3.86 |
| renumber_nodes_and_elem() 2 0.0003 0.000159 0.0003 0.000159 0.16 0.16 |
| |
| MeshCommunication |
| assign_global_indices() 1 0.0164 0.016410 0.0169 0.016860 8.36 8.59 |
| compute_hilbert_indices() 2 0.0058 0.002891 0.0058 0.002891 2.95 2.95 |
| find_global_indices() 2 0.0021 0.001048 0.0092 0.004588 1.07 4.68 |
| parallel_sort() 2 0.0009 0.000447 0.0010 0.000497 0.46 0.51 |
| |
| MeshOutput |
| write_equation_systems() 2 0.0001 0.000047 0.0270 0.013513 0.05 13.77 |
| |
| MeshTools::Generation |
| build_cube() 1 0.0015 0.001498 0.0015 0.001498 0.76 0.76 |
| |
| MetisPartitioner |
| partition() 1 0.0065 0.006481 0.0107 0.010712 3.30 5.46 |
| |
| Parallel |
| allgather() 14 0.0005 0.000038 0.0006 0.000040 0.27 0.28 |
| barrier() 1 0.0009 0.000865 0.0009 0.000865 0.44 0.44 |
| broadcast() 13 0.0002 0.000015 0.0002 0.000013 0.10 0.08 |
| max(bool) 1 0.0000 0.000003 0.0000 0.000003 0.00 0.00 |
| max(scalar) 135 0.0004 0.000003 0.0004 0.000003 0.18 0.18 |
| max(vector) 30 0.0002 0.000007 0.0004 0.000014 0.10 0.21 |
| min(bool) 156 0.0003 0.000002 0.0003 0.000002 0.18 0.18 |
| min(scalar) 129 0.0014 0.000011 0.0014 0.000011 0.70 0.70 |
| min(vector) 30 0.0002 0.000008 0.0005 0.000016 0.12 0.24 |
| probe() 24 0.0002 0.000007 0.0002 0.000007 0.09 0.09 |
| receive() 22 0.0002 0.000008 0.0003 0.000015 0.09 0.17 |
| send() 22 0.0001 0.000005 0.0001 0.000005 0.06 0.06 |
| send_receive() 24 0.0002 0.000010 0.0006 0.000026 0.13 0.32 |
| sum() 31 0.0003 0.000008 0.0007 0.000021 0.13 0.33 |
| |
| Parallel::Request |
| wait() 22 0.0001 0.000002 0.0001 0.000002 0.03 0.03 |
| |
| Partitioner |
| set_node_processor_ids() 1 0.0008 0.000800 0.0009 0.000913 0.41 0.47 |
| set_parent_processor_ids() 1 0.0005 0.000519 0.0005 0.000519 0.26 0.26 |
| |
| RBConstruction |
| clear() 3 0.0003 0.000101 0.0003 0.000101 0.15 0.15 |
| load_basis_function() 1 0.0000 0.000046 0.0000 0.000046 0.02 0.02 |
| |
| RBEvaluation |
| clear() 2 0.0000 0.000021 0.0000 0.000021 0.02 0.02 |
| read_in_basis_functions() 1 0.0000 0.000039 0.0589 0.058923 0.02 30.02 |
| read_in_vectors() 1 0.0407 0.040738 0.0589 0.058884 20.76 30.00 |
| read_offline_data_from_files() 1 0.0009 0.000889 0.0014 0.001447 0.45 0.74 |
| resize_data_structures() 1 0.0004 0.000384 0.0004 0.000384 0.20 0.20 |
| |
| TransientRBConstruction |
| load_rb_solution() 1 0.0002 0.000154 0.0002 0.000154 0.08 0.08 |
| |
| TransientRBEvaluation |
| cache_online_residual_terms() 1 0.0001 0.000129 0.0001 0.000129 0.07 0.07 |
| compute_residual_dual_norm() 100 0.0052 0.000052 0.0052 0.000052 2.68 2.68 |
| rb_solve() 1 0.0107 0.010710 0.0161 0.016109 5.46 8.21 |
| read_offline_data_from_files() 1 0.0003 0.000269 0.0017 0.001717 0.14 0.87 |
| resize_data_structures() 1 0.0002 0.000173 0.0006 0.000557 0.09 0.28 |
--------------------------------------------------------------------------------------------------------------
| Totals: 2421 0.1963 100.00 |
--------------------------------------------------------------------------------------------------------------
***************************************************************
* Done Running Example reduced_basis_ex3:
* mpirun -np 2 example-devel -online_mode 1 -pc_type bjacobi -sub_pc_type ilu -sub_pc_factor_levels 4 -sub_pc_factor_zeropivot 0 -ksp_right_pc -log_summary
***************************************************************