diff --git a/include/graphblas/algorithms/amg/amg.hpp b/include/graphblas/algorithms/amg/amg.hpp new file mode 100644 index 000000000..660b0e63a --- /dev/null +++ b/include/graphblas/algorithms/amg/amg.hpp @@ -0,0 +1,251 @@ + +/* + * Copyright 2021 Huawei Technologies Co., Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * Algebraic Multi-grid algorithm relying on level_ matrices from AMGCL. + * @file amg.hpp + * @author Alberto Scolari (alberto.scolari@huawei.com) + * @author Denis Jelovina (denis.jelovina@huawei.com) + * @date 2022-08-10 + */ + +#ifndef _H_GRB_ALGORITHMS_AMG +#define _H_GRB_ALGORITHMS_AMG +#ifdef _DEBUG + #define AMG_PRINT_STEPS +#endif + +#include +#include "amg_data.hpp" +#include "multigrid_v_cycle.hpp" +#include + +namespace grb { + + namespace algorithms { + + /** + * Algebraic Multi-grid algorithm relying on level_ matrices from AMGCL. + * + * Finds the solution x of an \f$ A x = b \f$ algebraic system by running + * the AMG algorithm. AMG implementation (as the standard one) couples a + * standard CG algorithm with a V-cycle multi-grid solver to initially + * refine the tentative solution. This refinement step depends on the + * availability of coarsening information, which should be stored inside + * \p data; otherwise, the refinement is not performed and only the CG + * algorithm is run. + * + * This implementation assumes that the vectors and matrices inside \p data + * are all correctly initialized and populated with the proper values; + * in particular + * - amg_data#x with the initial tentative solution + * (iterative solutions are also stored here) + * - amg_data#A with the system matrix + * - amg_data#b with the right-hand side vector \f$ b \f$ + * - amg_data#A_diagonal with the diagonal values of the matrix + * - amg_data#coarser_level with the information for + * the coarser multi-grid run (if any) + * The other vectors are assumed to be inizialized + * (via the usual grb::Vector#Vector(size_t) constructor) + * but not necessarily populated with values, as they are internally populated + * when needed; hence, any previous values are overwritten. + * + * Failuers of GraphBLAS operations are handled by immediately stopping the + * execution and by returning the failure code. + * + * @tparam IOType type of result and intermediate vectors used during computation + * @tparam ResidualType type of the residual norm + * @tparam NonzeroType type of matrix values + * @tparam InputType type of values of the right-hand side vector b + * @tparam Ring the ring of algebraic operators zero-values + * @tparam Minus the minus operator for subtractions + * + * @param[in,out] data \ref amg_data object storing inputs, outputs and temporary + * vectors used for the computation, as long as the information + * for the recursive multi-grid runs + * @param[in] with_preconditioning whether to use pre-conditioning, + * i.e. to perform multi-grid runs + * @param[in] presmoother_steps number of pre-smoother steps, for multi-grid runs + * @param[in] postsmoother_steps nomber of post-smoother steps, + * for multi-grid runs + * @param[in] max_iterations maximum number if iterations the simulation may run + * for; once reached, the simulation stops even if the + * residual norm is above \p tolerance + * @param[in] tolerance the tolerance over the residual norm, i.e. the value of + * the residual norm to stop the simulation at + * @param[out] iterations numbers of iterations performed + * @param[out] norm_residual norm of the final residual + * @param[in] ring the ring to perform the operations on + * @param[in] minus the \f$ - \f$ operator for vector subtractions + * @return grb::RC::SUCCESS if the algorithm could correctly terminate, the error + * code of the first unsuccessful operation otherwise + */ + template< typename IOType, + typename ResidualType, + typename NonzeroType, + typename InputType, + class Ring = Semiring< + grb::operators::add< IOType >, + grb::operators::mul< IOType >, + grb::identities::zero, + grb::identities::one + >, + class Minus = operators::subtract< IOType > > + grb::RC amg( + amg_data< IOType, NonzeroType, InputType > &data, + bool with_preconditioning, + const size_t presmoother_steps, + const size_t postsmoother_steps, + const size_t max_iterations, + const ResidualType tolerance, + size_t &iterations, + ResidualType &norm_residual, + const Ring &ring = Ring(), + const Minus &minus = Minus() + ) { + ResidualType alpha; + const grb::Matrix< NonzeroType > &A = data.A; + grb::Vector< IOType > &x = data.x; + const grb::Vector< InputType > &b = data.b; + grb::Vector< IOType > &r = data.r; // residual vector + grb::Vector< IOType > &p = data.p; // direction vector + grb::Vector< IOType > &Ap = data.u; // temp vector + grb::Vector< IOType > &z = data.z; // pre-conditioned residual vector + grb::RC ret = SUCCESS; + + ret = ret ? ret : grb::set( Ap, 0 ); + ret = ret ? ret : grb::set( r, 0 ); + ret = ret ? ret : grb::set( p, 0 ); + + ret = ret ? ret : grb::set( p, x ); + ret = ret ? ret : grb::mxv( Ap, A, x, ring ); // Ap = A * x + assert( ret == SUCCESS ); + + ret = ret ? ret : grb::eWiseApply( r, b, Ap, minus ); // r = b - Ap; + assert( ret == SUCCESS ); + + norm_residual = ring.template getZero< ResidualType >(); + ret = ret ? ret : grb::dot( norm_residual, r, r, ring ); // norm_residual = r' * r; + assert( ret == SUCCESS ); + + // compute sqrt to avoid underflow + norm_residual = std::sqrt( norm_residual ); + + // initial norm of residual + const ResidualType norm_residual_initial = norm_residual; + ResidualType old_r_dot_z = 0.0; + ResidualType r_dot_z = 0.0; + ResidualType beta = 0.0; + size_t iter = 0; + +#ifdef AMG_PRINT_STEPS + print_norm( p, "start p" ); + print_norm( Ap, "start Ap" ); + print_norm( r, "start r" ); +#endif + + do { +#ifdef AMG_PRINT_STEPS + DBG_println( "========= iteration " << iter << " =========" ); +#endif + if( with_preconditioning ) { + //ret = ret ? ret : grb::set( z, r ); // z = r; + ret = ret ? ret : internal::multi_grid( + data, data.coarser_level, presmoother_steps, postsmoother_steps, ring, minus + ); + //ret = ret ? ret : grb::set( x, z ); + assert( ret == SUCCESS ); + } else { + ret = ret ? ret : grb::set( z, r ); // z = r; + assert( ret == SUCCESS ); + } +#ifdef AMG_PRINT_STEPS + print_norm( z, "initial z" ); +#endif + + ResidualType pAp; + + if( iter == 0 ) { + ret = ret ? ret : grb::set( p, z ); // p = z; + assert( ret == SUCCESS ); + + ret = ret ? ret : grb::dot( r_dot_z, r, z, ring ); // r_dot_z = r' * z; + assert( ret == SUCCESS ); + } else { + old_r_dot_z = r_dot_z; + + r_dot_z = ring.template getZero< ResidualType >(); + ret = ret ? ret : grb::dot( r_dot_z, r, z, ring ); // r_dot_z = r' * z; + assert( ret == SUCCESS ); + + beta = r_dot_z / old_r_dot_z; + ret = ret ? ret : grb::clear( Ap ); // Ap = 0; + ret = ret ? ret : grb::eWiseMulAdd( Ap, beta, p, z, ring ); // Ap += beta * p + z; + std::swap( Ap, p ); // p = Ap; + assert( ret == SUCCESS ); + } +#ifdef AMG_PRINT_STEPS + print_norm( p, "middle p" ); +#endif + ret = ret ? ret : grb::set( Ap, 0 ); + ret = ret ? ret : grb::mxv( Ap, A, p, ring ); // Ap = A * p; + + assert( ret == SUCCESS ); +#ifdef AMG_PRINT_STEPS + print_norm( Ap, "middle Ap" ); +#endif + pAp = static_cast< ResidualType >( 0.0 ); + ret = ret ? ret : grb::dot( pAp, Ap, p, ring ); // pAp = p' * Ap + assert( ret == SUCCESS ); + + alpha = r_dot_z / pAp; + + ret = ret ? ret : grb::eWiseMul( x, alpha, p, ring ); // x += alpha * p; + assert( ret == SUCCESS ); +#ifdef AMG_PRINT_STEPS + print_norm( x, "end x" ); +#endif + ret = ret ? ret : grb::eWiseMul( r, -alpha, Ap, ring ); // r += - alpha * Ap; + assert( ret == SUCCESS ); +#ifdef AMG_PRINT_STEPS + print_norm( r, "end r" ); +#endif + + norm_residual = static_cast< ResidualType >( 0.0 ); + ret = ret ? ret : grb::dot( norm_residual, r, r, ring ); // residual = r' * r; + assert( ret == SUCCESS ); + + norm_residual = std::sqrt( norm_residual ); + +#ifdef AMG_PRINT_STEPS + std::cout << " ---> norm_residual=" << norm_residual << "\n"; +#endif + ++iter; + } while( + iter < max_iterations && + norm_residual / norm_residual_initial > tolerance && + ret == SUCCESS + ); + iterations = iter; + return ret; + } + + } // namespace algorithms + +} // namespace grb + +#endif // _H_GRB_ALGORITHMS_AMG diff --git a/include/graphblas/algorithms/amg/amg_data.hpp b/include/graphblas/algorithms/amg/amg_data.hpp new file mode 100644 index 000000000..2be2524db --- /dev/null +++ b/include/graphblas/algorithms/amg/amg_data.hpp @@ -0,0 +1,201 @@ + +/* + * Copyright 2021 Huawei Technologies Co., Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * Data structures to store AMG input/output data. + * @file amg_data.hpp + * @author Alberto Scolari (alberto.scolari@huawei.com) + * @author Denis Jelovina (denis.jelovina@huawei.com) + * @date 2022-10-08 + */ + +#ifndef _H_GRB_ALGORITHMS_AMG_DATA +#define _H_GRB_ALGORITHMS_AMG_DATA + +#include +#include +#include + +namespace grb { + + namespace algorithms { + + /** + * basic data container for the AMG algorithm, storing \b only the + * data in common between the full CG run and the V-cycle multi-grid solver. + * Additional data are stored in inheriting data structures. + * + * @tparam IOType type of values of the vectors for intermediate results + * @tparam NonzeroType type of the values stored inside the system matrix #A + */ + template< typename IOType, typename NonzeroType > + struct system_data { + const std::size_t system_size; ///< size of the system, i.e. side of the #A + grb::Matrix< NonzeroType > A; ///< system matrix + grb::Vector< IOType > A_diagonal; ///< vector with the diagonal of #A + grb::Vector< IOType > z; ///< multi-grid solution + grb::Vector< IOType > r; ///< residual + grb::Vector< IOType > smoother_temp; ///< for smoother's intermediate results + std::vector< grb::Vector< bool > > color_masks; ///< for color masks + + /** + * Constructor building all the stored vectors and matrices. + * + * Stored vectors and matrices are constructed according to + * \p sys_size but \b not initialized + * to any value internally, as initialization is up to users's code. + * + * @param[in] sys_size the size of the underlying physical system, + * i.e. the size of vectors and the number + * of rows and columns of the #A matrix. + */ + system_data( std::size_t sys_size ) : + system_size( sys_size ), A( sys_size, sys_size ), A_diagonal( sys_size ), z( sys_size ), r( sys_size ), + smoother_temp( sys_size ) {} + // for safety, disable copy semantics + system_data( const system_data & o ) = delete; + system_data & operator=( const system_data & ) = delete; + }; + + /** + * Data container for all multi-grid inputs and outputs. + * + * @tparam IOType Type of values of the vectors for intermediate results + * @tparam NonzeroType Type of the values stored inside the system matrix \p A + * and the coarsening matrix #Ax_finer + * + * This data structure stores information for a full multi-grid V cycle, i.e. + * - input and output vectors for solution, residual and temporary vectors + * - coarsening information, in particular the #coarsening_matrix that + * coarsens a larger system of size #finer_size to the current system + * of size #system_size + * - the next level of coarsening, pointed to by #coarser_level, + * possibly being \c nullptr if no further coarsening is desired; + * note that this information is automatically + * destructed on object destruction (if any) + * + * Vectors stored here refer to the \b coarsened system + * (with the exception of #Ax_finer), + * thus having size #system_size; this also holds for the system matrix #A, + * while #coarsening_matrix has size #system_size \f$ \times \f$ #finer_size. + * Hence, the typical usage of this data structure is to coarsen \b external + * vectors, e.g. vectors coming from another + * \code multi_grid_data + * \endcode object whose #system_size equals + * \code this-> \endcode #fines_size, via + * \code this-> \endcode #coarsening_matrix and store the coarsened + * vectors internally. Mimicing the recursive behavior of standard multi-grid + * simulations, the information for a further coarsening is stored inside + * #coarser_level, so that the hierarchy of coarsened levels is reflected + * inside this data structure. + * + * As for \ref system_data, internal vectors and matrices are initialized to the + * proper size, but their values are \b not initialized. + */ + template< typename IOType, typename NonzeroType > + struct multi_grid_data : public system_data< IOType, NonzeroType > { + + const std::size_t finer_size; ///< ssize of the finer system to coarse from; + grb::Vector< IOType > Ax_finer; ///< finer vector for intermediate computations, of size #finer_size + grb::Matrix< NonzeroType > coarsening_matrix; ///< matrix of size #system_size \f$ \times \f$ #finer_size + ///< to coarsen an input vector of size #finer_size + ///< into a vector of size #system_size + struct multi_grid_data< IOType, NonzeroType > *coarser_level; ///< pointer to next coarsening level, for recursive + ///< multi-grid V cycle implementations + + /** + * Construct a new \c multi_grid_data_object by initializing internal + * data structures and setting #coarser_level to \c nullptr. + * @param[in] coarser_size size of the current system, + * i.e. size \b after coarsening + * @param[in] _finer_size size of the finer system, + * i.e. size of external objects \b before coarsening + */ + multi_grid_data( std::size_t coarser_size, std::size_t _finer_size ) : + system_data< IOType, NonzeroType >( coarser_size ), finer_size( _finer_size ), + Ax_finer( finer_size ), coarsening_matrix( coarser_size, finer_size ) { + coarser_level = nullptr; + } + + /** + * @brief Destroys the \c multi_grid_data_object object + * by destroying #coarser_level. + */ + virtual ~multi_grid_data() { + if( coarser_level != nullptr ) { + delete coarser_level; + } + } + }; + + /** + * @brief Data stucture to store the data for a full AMG run: + * system vectors and matrix, coarsening information and temporary vectors. + * + * This data structures contains all the needed vectors and matrices to + * solve a linear system \f$ A x = b \f$. As for \ref system_data, + * internal elements are built and their sizes properly initialized + * to #system_size, but internal values are \b not initialized, as + * they are left to user's logic. Similarly, the coarsening information + * in #coarser_level is to be initialized by users by properly + * building a \code multi_grid_data \endcode object + * and storing its pointer into #coarser_level; on destruction, #coarser_level + * will also be properly destroyed without user's intervention. + * + * @tparam IOType type of values of the vectors for intermediate results + * @tparam NonzeroType type of the values stored inside the system matrix #A + * @tparam InputType type of the values of the right-hand side vector #b + */ + template< typename IOType, typename NonzeroType, typename InputType > + struct amg_data : public system_data< IOType, NonzeroType > { + + grb::Vector< InputType > b; ///< right-side vector of known values + grb::Vector< IOType > u; ///< temporary vectors (typically for CG exploration directions) + grb::Vector< IOType > p; ///< temporary vector (typically for x refinements coming from the multi-grid run) + grb::Vector< IOType > x; // system solution being refined over the iterations: it us up to the user + ///< to set the initial solution value + + struct multi_grid_data< IOType, NonzeroType > *coarser_level; ///< information about the coarser system, for + ///< the multi-grid run + + /** + * Construct a new \c amg_data object by building vectors and matrices + * and by setting #coarser_level to \c nullptr (i.e. no coarser level is assumed). + * + * @param[in] sys_size the size of the simulated system, + * i.e. of all the internal vectors and matrices + */ + amg_data( std::size_t sys_size ) : system_data< IOType, NonzeroType >( sys_size ), b( sys_size ), u( sys_size ), p( sys_size ), x( sys_size ) { + coarser_level = nullptr; + } + + /** + * Destroy the \c amg_data object by destroying the #coarser_level informartion, + * if any. + */ + virtual ~amg_data() { + if( coarser_level != nullptr ) { + delete coarser_level; + } + } + }; + + } // namespace algorithms + +} // namespace grb + +#endif // _H_GRB_ALGORITHMS_AMG_DATA diff --git a/include/graphblas/algorithms/amg/multigrid_v_cycle.hpp b/include/graphblas/algorithms/amg/multigrid_v_cycle.hpp new file mode 100644 index 000000000..e19da1026 --- /dev/null +++ b/include/graphblas/algorithms/amg/multigrid_v_cycle.hpp @@ -0,0 +1,327 @@ + +/* + * Copyright 2022 Huawei Technologies Co., Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * + * This file contains the routines for multi-grid solution refinement, + * including the main routine and those for coarsening and refinement + * of the tentative solution. + * @file multigrid_v_cycle.hpp + * @author Alberto Scolari (alberto.scolari@huawei.com) + * @author Denis Jelovina (denis.jelovina@huawei.com) + * @date 2022-08-10 + */ + +#ifndef _H_GRB_ALGORITHMS_MULTIGRID_V_CYCLE +#define _H_GRB_ALGORITHMS_MULTIGRID_V_CYCLE + +#include +#include +#include +#include "amg_data.hpp" +#include + +#define DBG_println( args ) std::cout << args << std::endl; + +namespace grb { + + namespace algorithms { + + /** + * Namespace for interfaces that should not be used outside of the algorithm + * namespace. + */ + namespace internal { + + /** + * computes the coarser residual vector \p coarsening_data.r by coarsening + * \p coarsening_data.Ax_finer - \p r_fine via + * \p coarsening_data.coarsening_matrix. + * + * The coarsening information are stored inside \p coarsening_data. + * + * @tparam IOType type of result and intermediate vectors used during + * computation + * @tparam NonzeroType type of matrix values + * @tparam Ring the ring of algebraic operators zero-values + * @tparam Minus the minus operator for subtractions + * + * @param[in] r_fine fine residual vector + * @param[in,out] coarsening_data \ref multi_grid_data data structure storing + * the information for coarsening + * @param[in] ring the ring to perform the operations on + * @param[in] minus the \f$ - \f$ operator for vector subtractions + * @return grb::RC::SUCCESS if the algorithm could correctly terminate, the + * error code of the first unsuccessful operation otherwise + */ + template< typename IOType, + typename NonzeroType, + class Ring, + class Minus > + grb::RC compute_coarsening( + const grb::Vector< IOType > &r_fine, // fine residual + struct multi_grid_data< IOType, NonzeroType > &coarsening_data, + const Ring &ring, + const Minus &minus + ) { + RC ret = SUCCESS; + ret = ret ? ret : grb::eWiseApply( + coarsening_data.Ax_finer, + r_fine, coarsening_data.Ax_finer, + minus + ); // Ax_finer = r_fine - Ax_finer + assert( ret == SUCCESS ); + + ret = ret ? ret : grb::set( coarsening_data.r, 0 ); + ret = ret ? ret : grb::mxv( + coarsening_data.r, coarsening_data.coarsening_matrix, + coarsening_data.Ax_finer, ring + ); // r = coarsening_matrix * Ax_finer + return ret; + } + + /** + * computes the prolongation of the coarser solution \p coarsening_data.z and + * stores it into \p x_fine. + * + * For prolongation, this function uses the matrix + * \p coarsening_data.coarsening_matrix by transposing it. + * + * @tparam IOType type of result and intermediate vectors used + * during computation + * @tparam NonzeroType type of matrix values + * @tparam Ring the ring of algebraic operators zero-values + * + * @param[out] x_fine the solution vector to store the + * prolonged solution into + * @param[in,out] coarsening_data information for coarsening + * @param[in] ring the ring to perform the operations on + * @return grb::RC::SUCCESS if the algorithm could correctly terminate, + * the error code of the first unsuccessful operation otherwise + */ + template< typename IOType, + typename NonzeroType, + class Ring > + grb::RC compute_prolongation( + grb::Vector< IOType > &x_fine, // fine residual + struct multi_grid_data< IOType, NonzeroType > &coarsening_data, + const Ring &ring + ) { + RC ret = SUCCESS; + ret = ret ? ret : set( coarsening_data.Ax_finer, 0 ); + + ret = ret ? ret : grb::mxv< grb::descriptors::transpose_matrix >( + coarsening_data.Ax_finer, + coarsening_data.coarsening_matrix, + coarsening_data.z, + ring + ); + assert( ret == SUCCESS ); + // x_fine += Ax_finer; + ret = ret ? ret : grb::foldl( x_fine, coarsening_data.Ax_finer, ring.getAdditiveMonoid() ); + assert( ret == SUCCESS ); + return ret; + } + + template< typename IOType, typename NonzeroType, class Ring > + grb::RC spai0_smoother( system_data< IOType, NonzeroType > &data, const Ring &ring ) { + RC ret = SUCCESS; + + NonzeroType alpha = 1.; + ret = ret ? ret : grb::eWiseMulAdd( data.z, alpha, data.A_diagonal, data.r, ring ); + assert( ret == SUCCESS ); + return ret; + } + + /** + * Runs \p smoother_steps iteration of the SPAI0 smoother, + * with inputs and outputs stored inside \p data. + * + * @tparam IOType type of result and intermediate vectors used during computation + * @tparam NonzeroType type of matrix values + * @tparam Ring the ring of algebraic operators zero-values + * + * @param[in,out] data \ref system_data data structure with relevant inpus + * and outputs: system matrix, initial solution, residual, + * system matrix colors, temporary vectors + * @param[in] smoother_steps how many smoothing steps to run + * @param[in] ring the ring to perform the operations on + * @return grb::RC::SUCCESS if the algorithm could correctly terminate, + * the error code of the first unsuccessful operation otherwise + */ + template< typename IOType, typename NonzeroType, class Ring, class Minus > + grb::RC run_spai0_smoother( system_data< IOType, NonzeroType > &data, + const size_t smoother_steps, const Ring &ring, const Minus &minus ) { + RC ret = SUCCESS; + + for( size_t i = 0; i < smoother_steps && ret == SUCCESS; i++ ) { + ret = ret ? ret : grb::set( data.smoother_temp, 0 ); + ret = ret ? ret : grb::mxv( data.smoother_temp, data.A, data.z, ring ); + ret = ret ? ret : grb::eWiseApply( data.smoother_temp, data.r, data.smoother_temp, + minus ); + +#ifdef HPCG_PRINT_STEPS + std::cout << " data.A(spai0): " << nrows(data.A) << " x " << ncols(data.A) << " \n"; + print_norm( data.A_diagonal, " data.A_diagonal" ); + print_norm( data.smoother_temp, " data.smoother_temp" ); + print_norm( data.z, " data.z" ); +#endif + + ret = ret ? ret : + grb::eWiseLambda( + [ &data ]( const size_t i ) { +#ifdef HPCG_PRINT_STEPS + if( i < 10 || i + 10 > size( data.A_diagonal ) ) { + std::cout << " i= " << i + << " data.z[i]= " << data.z[ i ] + << " data.A_diagonal[i]= " << data.A_diagonal[ i ] + << " data.smoother_temp[i]= " << data.smoother_temp[ i ] + << "\n"; + } + std::cout << "\n"; +#endif + data.z[ i ] += data.A_diagonal[ i ] * data.smoother_temp[ i ]; + }, + data.A_diagonal, data.z, data.smoother_temp ); + + assert( ret == SUCCESS ); + } + + return ret; + } + + /** + * @brief Multi-grid V cycle implementation to refine a given solution. + * + * A full multi-grid run goes through the following steps: + * -# if \p presmoother_steps \f$ > 0 \f$, \p presmoother_steps of the SPAI0 + * smoother, run to improve on the initial solution stored into \p data.z + * -# the coarsening of \f$ r - A*z \f$ is computed to find the + * coarser residual vector + * -# a multi-grid run is recursively performed on the coarser system + * -# the tentative solution from the coarser multi-grid run is prolonged + * and added to the current tentative solution into \p data.z + * -# this solution is further smoothed for \p postsmoother_steps steps + * + * If coarsening information is not available, the multi-grid run + * consists in a single smmothing run. + * + * Failuers of GraphBLAS operations are handled by immediately + * stopping the execution and by returning the failure code. + * + * @tparam IOType type of result and intermediate vectors used during computation + * @tparam NonzeroType type of matrix values + * @tparam Ring the ring of algebraic operators zero-values + * @tparam Minus the minus operator for subtractions + * + * @param[in,out] data \ref multi_grid_data object storing the relevant data + * for the multi-grid run of the current clevel + * @param[in,out] coarsening_data pointer to information for the + * coarsening/refinement operations and + * for the recursive multi-grid run on the coarsened system; + * if \c nullptr, no coarsening/refinement occurs + * and only smoothing occurs on the current solution + * @param[in] presmoother_steps number of pre-smoother steps + * @param[in] postsmoother_steps number of post-smoother steps + * @param[in] ring the ring to perform the operations on + * @param[in] minus the \f$ - \f$ operator for vector subtractions + * @return grb::RC::SUCCESS if the algorithm could correctly terminate, + * the error code of the first unsuccessful operation otherwise + */ + template< typename IOType, typename NonzeroType, class Ring, class Minus > + grb::RC multi_grid( system_data< IOType, NonzeroType > &data, + struct multi_grid_data< IOType, NonzeroType > *const coarsening_data, + const size_t presmoother_steps, + const size_t postsmoother_steps, + const Ring &ring, + const Minus &minus ) { + RC ret = SUCCESS; + +#ifdef HPCG_PRINT_STEPS + DBG_println( "mg BEGINNING {" ); +#endif + + // clean destination vector + //ret = ret ? ret : grb::set( data.z, data.r ); + ret = ret ? ret : grb::set( data.z, 0 ); + +#ifdef HPCG_PRINT_STEPS + print_norm( data.z, "first print smoothed z" ); + print_norm( data.r, "initial r" ); +#endif + if( coarsening_data == nullptr ) { + //compute one round of smoother + ret = ret ? ret : run_spai0_smoother( data, 1, ring, minus ); + assert( ret == SUCCESS ); +#ifdef HPCG_PRINT_STEPS + print_norm( data.z, "smoothed z" ); + DBG_println( "} mg END" ); +#endif + return ret; + } + + struct multi_grid_data< IOType, NonzeroType > &cd = *coarsening_data; + + // pre-smoother + ret = ret ? ret : run_spai0_smoother( data, presmoother_steps, ring, minus ); + assert( ret == SUCCESS ); +#ifdef HPCG_PRINT_STEPS + print_norm( data.z, "pre-smoothed z" ); +#endif + ret = ret ? ret : grb::set( cd.Ax_finer, 0 ); + ret = ret ? ret : grb::mxv( cd.Ax_finer, data.A, data.z, ring ); + assert( ret == SUCCESS ); + +#ifdef HPCG_PRINT_STEPS + std::cout << " data.A: " << nrows(data.A) << " x " << ncols(data.A) << " \n"; + print_norm( cd.r, "before coarse cd.r" ); +#endif + ret = ret ? ret : compute_coarsening( data.r, cd, ring, minus ); + assert( ret == SUCCESS ); + +#ifdef HPCG_PRINT_STEPS + print_norm( cd.z, "after cd.coarse z" ); + print_norm( cd.r, "after cd.coarse r" ); +#endif + ret = ret ? ret : multi_grid( cd, cd.coarser_level, presmoother_steps, postsmoother_steps, ring, minus ); + assert( ret == SUCCESS ); + + ret = ret ? ret : compute_prolongation( data.z, cd, ring ); + assert( ret == SUCCESS ); + +#ifdef HPCG_PRINT_STEPS + print_norm( data.z, "prolonged z" ); +#endif + // post-smoother + ret = ret ? ret : run_spai0_smoother( data, postsmoother_steps, ring, minus ); + assert( ret == SUCCESS ); + +#ifdef HPCG_PRINT_STEPS + print_norm( data.z, "post-smoothed z" ); + DBG_println( "} mg END" ); +#endif + + return ret; + } + + } // namespace internal + + } // namespace algorithms + +} // namespace grb + +#endif // _H_GRB_ALGORITHMS_MULTIGRID_V_CYCLE diff --git a/include/graphblas/algorithms/amg/system_building_utils.hpp b/include/graphblas/algorithms/amg/system_building_utils.hpp new file mode 100644 index 000000000..f2a431ec9 --- /dev/null +++ b/include/graphblas/algorithms/amg/system_building_utils.hpp @@ -0,0 +1,193 @@ + +/* + * Copyright 2022 Huawei Technologies Co., Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * Utilities to build an antire system for AMG simulations. + * @file amg_system_building_utils.hpp + * @author Alberto Scolari (alberto.scolari@huawei.com) + * @author Denis Jelovina (denis.jelovina@huawei.com) + * @date 2022-08-10 + */ + +#ifndef _H_GRB_ALGORITHMS_SYSTEM_BUILDING_UTILS +#define _H_GRB_ALGORITHMS_SYSTEM_BUILDING_UTILS + +#include +#include +#include +#include +#include +#include +#include "amg_data.hpp" + +namespace grb { + + namespace algorithms { + + /** + * Generates an entire AMG problem, storing it in \p holder. + * + * @tparam DIMS dimensions of the system + * @tparam T type of matrix values + * @param holder std::unique_ptr to store the AMG problem into + * @param params parameters container to build the AMG problem + * @return grb::SUCCESS if every GraphBLAS operation (to generate vectors + * and matrices) succeeded, otherwise the first unsuccessful return value + */ + template< typename T = double, typename SYSINP > + grb::RC build_amg_system( + std::unique_ptr< grb::algorithms::amg_data< T, T, T > > &holder, + const std::size_t max_levels, + SYSINP &in + ) { + grb::RC rc = grb::SUCCESS; + std::size_t coarsening_level = 0UL; + const size_t n_A = in.matAbuffer[ coarsening_level ].get_n(); + grb::algorithms::amg_data< T, T, T > *data = new grb::algorithms::amg_data< T, T, T >( n_A ); + rc = buildMatrixUnique( + data->A, + in.matAbuffer[ coarsening_level ].i_data, + in.matAbuffer[ coarsening_level ].j_data, + in.matAbuffer[ coarsening_level ].v_data, + in.matAbuffer[ coarsening_level ].size(), + SEQUENTIAL + ); + + if( rc != SUCCESS ) { + std::cerr << "Failure: call to buildMatrixUnique did not succeed " + << "(" << toString( rc ) << ")." << std::endl; + return rc; + } +#ifdef AMG_PRINT_STEPS + std::cout << " buildMatrixUnique: constructed data->A " << nrows( data->A ) + << " x " << ncols( data->A ) << " matrix \n"; +#endif + assert( ! holder ); // should be empty + holder = std::unique_ptr< grb::algorithms::amg_data< T, T, T > >( data ); + + { + RC rc = grb::buildVector( + data->A_diagonal, + in.matMbuffer[ coarsening_level ].v_data, + in.matMbuffer[ coarsening_level ].v_data + in.matMbuffer[ coarsening_level ].size(), + SEQUENTIAL + ); + if ( rc != SUCCESS ) { + std::cerr << " buildVector failed!\n "; + return rc; + } +#ifdef AMG_PRINT_STEPS + std::cout << " buildVector: data->A_diagonal " + << size( data->A_diagonal ) << " vector \n"; +#endif + } + + std::size_t coarser_size; + std::size_t previous_size = n_A; + + // initialize coarsening with additional pointers and + // dimensions copies to iterate and divide + grb::algorithms::multi_grid_data< T, T > **coarser = &data->coarser_level; + assert( *coarser == nullptr ); + + // generate linked list of hierarchical coarseners + while( coarsening_level < max_levels ) { + assert( *coarser == nullptr ); + + coarser_size = in.matAbuffer[ coarsening_level + 1 ].get_n(); + + // build data structures for new level + grb::algorithms::multi_grid_data< double, double > *new_coarser = + new grb::algorithms::multi_grid_data< double, double >( coarser_size, previous_size ); + + // install coarser level immediately to cleanup in case of build error + *coarser = new_coarser; + + // initialize coarsener matrix, system matrix and + // diagonal vector for the coarser level + { + rc = buildMatrixUnique( + new_coarser->coarsening_matrix, + in.matRbuffer[ coarsening_level ].i_data, + in.matRbuffer[ coarsening_level ].j_data, + in.matRbuffer[ coarsening_level ].v_data, + in.matRbuffer[ coarsening_level ].size(), + SEQUENTIAL + ); + + if( rc != SUCCESS ) { + std::cerr << "Failure: call to buildMatrixUnique did not succeed " + << "(" << toString( rc ) << ")." << std::endl; + return rc; + } +#ifdef AMG_PRINT_STEPS + std::cout << " buildMatrixUnique: constructed new_coarser->coarsening_matrix " + << nrows(new_coarser->coarsening_matrix) << " x " + << ncols(new_coarser->coarsening_matrix) << " matrix \n"; +#endif + } + { + rc = buildMatrixUnique( + new_coarser->A, + in.matAbuffer[ coarsening_level + 1 ].i_data, + in.matAbuffer[ coarsening_level + 1 ].j_data, + in.matAbuffer[ coarsening_level + 1 ].v_data, + in.matAbuffer[ coarsening_level + 1 ].size(), + SEQUENTIAL + ); + + if( rc != SUCCESS ) { + std::cerr << "Failure: call to buildMatrixUnique did not succeed " + << "(" << toString( rc ) << ")." << std::endl; + return rc; + } +#ifdef AMG_PRINT_STEPS + std::cout << " buildMatrixUnique: constructed new_coarser->A " + << nrows(new_coarser->A) << " x " << ncols(new_coarser->A) << " matrix \n"; +#endif + } + + RC rc = grb::buildVector( + new_coarser->A_diagonal, + in.matMbuffer[ coarsening_level + 1 ].v_data, + in.matMbuffer[ coarsening_level + 1 ].v_data + in.matMbuffer[ coarsening_level + 1 ].size(), + SEQUENTIAL + ); + + if ( rc != SUCCESS ) { + std::cerr << " buildVector failed!\n "; + return rc; + } +#ifdef AMG_PRINT_STEPS + std::cout << " buildVector: new_coarser->A_diagonal " + << size(new_coarser->A_diagonal) << " vector \n"; +#endif + + // prepare for new iteration + coarser = &new_coarser->coarser_level; + previous_size = coarser_size; + coarsening_level++; + } + + return rc; + } + + } // namespace algorithms + +} // namespace grb + +#endif // _H_GRB_ALGORITHMS_SYSTEM_BUILDING_UTILS diff --git a/include/graphblas/reference/io.hpp b/include/graphblas/reference/io.hpp index dbd34c199..01bb250ce 100644 --- a/include/graphblas/reference/io.hpp +++ b/include/graphblas/reference/io.hpp @@ -334,6 +334,9 @@ namespace grb { } // namespace internal + extern "C" void foo(); + + /** * Sets all elements of a vector to the given value. * diff --git a/tests/smoke/CMakeLists.txt b/tests/smoke/CMakeLists.txt index fac6ceee8..21f97586a 100644 --- a/tests/smoke/CMakeLists.txt +++ b/tests/smoke/CMakeLists.txt @@ -98,7 +98,11 @@ add_grb_executables( knn knn.cpp ../unit/parser.cpp ) add_grb_executables( hpcg hpcg.cpp + BACKENDS reference reference_omp bsp1d hybrid + ADDITIONAL_LINK_LIBRARIES test_utils +) +add_grb_executables( amg amg.cpp BACKENDS reference reference_omp bsp1d hybrid ADDITIONAL_LINK_LIBRARIES test_utils ) diff --git a/tests/smoke/amg.cpp b/tests/smoke/amg.cpp new file mode 100644 index 000000000..380e614f8 --- /dev/null +++ b/tests/smoke/amg.cpp @@ -0,0 +1,713 @@ + +/* + * Copyright 2022 Huawei Technologies Co., Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * Test for AMG solver, levels provided by AMGCL. + * @file amg.cpp + * @author Alberto Scolari (alberto.scolari@huawei.com) + * @author Denis Jelovina (denis.jelovina@huawei.com) + * + * + * @date 2022-10-08 + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +// forward declaration for the tracing facility +template< typename T, + class Ring = grb::Semiring< + grb::operators::add< T >, + grb::operators::mul< T >, + grb::identities::zero, + grb::identities::one + > +> +void print_norm( const grb::Vector< T > &r, const char * head, const Ring &ring = Ring() ); + +#ifdef AMG_PRINT_STEPS + +// AMG_PRINT_STEPS requires defining the following symbols + +/** + * simply prints \p args on a dedicated line. + */ +#define DBG_println( args ) std::cout << args << std::endl; + +/** + * prints \p head and the norm of \p r. + */ +#define DBG_print_norm( vec, head ) print_norm( vec, head ) +#endif + +//========== MAIN PROBLEM PARAMETERS ========= +// values modifiable via cmd line args: default set as in reference AMG +constexpr size_t DEF_COARSENING_LEVELS{ 1U }; +constexpr size_t MAX_COARSENING_LEVELS{ 4U }; +constexpr size_t MAX_ITERATIONS_DEF{ 56UL }; +constexpr size_t SMOOTHER_STEPS_DEF{ 1 }; +//============================================ + +constexpr double MAX_NORM { 4.0e-14 }; + +using namespace grb; +using namespace algorithms; + +static const char * const TEXT_HIGHLIGHT = "===> "; +#define thcout ( std::cout << TEXT_HIGHLIGHT ) +#define thcerr ( std::cerr << TEXT_HIGHLIGHT ) + +/** + * Container to store matrices loaded from a file. + */ +template< typename T = double > +struct mat_data { + size_t *i_data; + size_t *j_data; + T *v_data; + size_t nz, n, m; + + void resize( size_t innz, size_t inn, size_t inm ){ + nz=innz; + n=inn; + m=inm; + i_data = new size_t [ nz ]; + j_data = new size_t [ nz ]; + v_data = new T [ nz ]; + }; + + mat_data( ){ + nz = 0; + }; + + mat_data( size_t in ){ + resize( in ); + }; + + size_t size(){ + return nz; + }; + + size_t get_n(){ + return n; + }; + + size_t get_m(){ + return m; + }; + + ~mat_data(){ + delete [] i_data; + delete [] i_data; + delete [] v_data; + }; +}; + +/** + * Container to store vectors loaded from a file. + */ +template< typename T = double > +struct vec_data { + T *v_data; + size_t n; + + void resize( size_t in ){ + n=in; + v_data = new T [ n ]; + }; + + vec_data( ){ + n = 0; + }; + + vec_data( size_t in ){ + resize( in ); + }; + + size_t size(){ + return n; + }; + + ~vec_data(){ + delete [] v_data; + }; +}; + +static bool matloaded = false; + +/** + * Container for the parameters for the AMG simulation. + */ +struct simulation_input { + size_t max_coarsening_levels; + size_t test_repetitions; + size_t max_iterations; + size_t smoother_steps; + const char * matAfile_c_str; + // vectors of input matrix filenames + std::vector< std::string > matAfiles; + std::vector< std::string > matMfiles; + std::vector< std::string > matPfiles; + std::vector< std::string > matRfiles; + bool evaluation_run; + bool no_preconditioning; +}; + +template > +class vectorwrapbuf : + public std::basic_streambuf { +public: + vectorwrapbuf( std::vector &vec) { + this->setg( vec.data(), vec.data(), vec.data() + vec.size() ); + } +}; +std::istream& operator>>(std::istream& is, std::string& s){ + std::getline(is, s); + return is; +} + +/** + * @brief Container to store all data for AMG hierarchy. + */ +class preloaded_matrices : public simulation_input { + +public : + size_t nzAmt, nzMmt, nzPmt, nzRmt; + mat_data< double > *matAbuffer; + vec_data< double > *matMbuffer; + mat_data< double > *matPbuffer; + mat_data< double > *matRbuffer; + + grb::RC read_matrix( + std::vector< std::string > &fname, + mat_data *data + ) { + grb::RC rc = SUCCESS; + for ( size_t i = 0; i < fname.size(); i++ ) { + grb::utils::MatrixFileReader< + double, std::conditional< + ( sizeof( grb::config::RowIndexType ) > sizeof( grb::config::ColIndexType ) ), + grb::config::RowIndexType, + grb::config::ColIndexType >::type + > parser_mat( fname[ i ].c_str(), true ); +#ifdef DEBUG + std::cout << " ---> parser_mat.filename()=" << parser_mat.filename() << "\n"; + std::cout << " ---> parser_mat.nz()=" << parser_mat.nz() << "\n"; + std::cout << " ---> parser_mat.n()=" << parser_mat.n() << "\n"; + std::cout << " ---> parser_mat.m()=" << parser_mat.m() << "\n"; + std::cout << " ---> parser_mat.entries()=" << parser_mat.entries() << "\n"; +#endif + // very poor choice and temp solution size = 2 x nz + data[i].resize( parser_mat.entries()*2, parser_mat.n(), parser_mat.m() ); + std::ifstream inFile( fname[ i ], std::ios::binary | std::ios::ate ); + if( ! inFile.is_open() ) { + std::cout << " Cannot open "<< fname[ i ].c_str() << "\n"; + return( PANIC ); + } + std::streamsize size = inFile.tellg(); + inFile.seekg( 0, std::ios::beg ); + std::vector< char > buffer( size ); + if ( inFile.read( buffer.data(), size ) ) { + // all fine + } + else { + std::cout << " Cannot read "<< fname[ i ].c_str() << "\n"; + return( PANIC ); + } + inFile.close(); + vectorwrapbuf< char > databuf( buffer ); + std::istream isdata( &databuf ); + std::string headder; + isdata >> headder; + while( headder.at( 0 ) == '%' ) { + isdata >> headder; + } + std::stringstream ss( headder ); + size_t n, m, nz; + ss >> n >> m >> nz ; + size_t k = 0; + for ( size_t j = 0; j < nz; j++ ) { + size_t itmp, jtmp; + double vtmp; + isdata >> itmp >> jtmp >> vtmp; + data[ i ].i_data[ k ] = itmp - 1; + data[ i ].j_data[ k ] = jtmp - 1; + data[ i ].v_data[ k ] = vtmp; + k += 1; + } + data[ i ].nz = k; + } + return ( rc ); + } + + grb::RC read_vector( std::vector< std::string > &fname, + vec_data< double > *data ) { + grb::RC rc = SUCCESS; + for ( size_t i = 0; i < fname.size(); i++ ){ +#ifdef DEBUG + std::cout << " Reading " << fname[ i ].c_str() << ".\n"; +#endif + std::ifstream inFile( fname[ i ], std::ios::binary | std::ios::ate ); + if( ! inFile.is_open() ) { + std::cout << " Cannot open "<< fname[ i ].c_str() << "\n"; + return( PANIC ); + } + std::streamsize size = inFile.tellg(); + inFile.seekg( 0, std::ios::beg ); + std::vector< char > buffer( size ); + if ( inFile.read( buffer.data(), size ) ) { + // all fine + } + else { + std::cout << " Cannot read "<< fname[ i ].c_str() << "\n"; + return( PANIC ); + } + inFile.close(); + vectorwrapbuf< char > databuf( buffer ); + std::istream isdata( &databuf ); + std::string headder; + size_t n, m; + std::getline( isdata, headder ); // skip the first line + while( headder.at( 0 ) == '%' ) { + std::getline( isdata, headder ); + } + std::stringstream ss( headder ); + ss >> n >> m; + std::cout << " Reading from" << fname[ i ] << " dense matrix with dimensions: " + << " n x m = " << n << " x " << m << "\n"; + data[ i ].resize( n ); + for ( size_t j = 0; j < n; j++ ) { + isdata >> data[ i ].v_data[ j ]; + } + } + return( rc ); + } + + grb::RC read_vec_matrics(){ + grb::RC rc = SUCCESS; + + nzAmt = matAfiles.size() ; + nzMmt = matMfiles.size() ; + nzPmt = matPfiles.size() ; + nzRmt = matRfiles.size() ; + + matAbuffer = new mat_data< double > [ nzAmt ]; + matMbuffer = new vec_data< double > [ nzMmt ]; + matPbuffer = new mat_data< double > [ nzPmt ]; + matRbuffer = new mat_data< double > [ nzRmt ]; + + rc = rc ? rc : read_matrix( matAfiles, matAbuffer ); + rc = rc ? rc : read_matrix( matRfiles, matRbuffer ); + // rc = rc ? rc : read_matrix( matPfiles, matPbuffer ); + rc = rc ? rc : read_vector( matMfiles, matMbuffer ); + + return rc; + } +}; + + + +preloaded_matrices inputData; + +/** + * Containers for test outputs. + */ +struct output { + RC error_code; + size_t test_repetitions; + size_t performed_iterations; + double residual; + grb::utils::TimerResults times; + std::unique_ptr< PinnedVector< double > > pinnedVector; + double square_norm_diff; + + output() { + error_code = SUCCESS; + test_repetitions = 0; + performed_iterations = 0; + residual = 0.0; + } +}; + +#ifdef AMG_PRINT_SYSTEM +static void print_system( const amg_data< double, double, double > & data ) { + print_matrix( data.A, 70, "A" ); + multi_grid_data< double, double > * coarser = data.coarser_level; + while( coarser != nullptr ) { + print_matrix( coarser->coarsening_matrix, 50, "COARSENING MATRIX" ); + print_matrix( coarser->A, 50, "COARSER SYSTEM MATRIX" ); + coarser = coarser->coarser_level; + } +} +#endif + +#ifdef AMG_PRINT_STEPS +template< + typename T, + class Ring = Semiring< + grb::operators::add< T >, + grb::operators::mul< T >, + grb::identities::zero, + grb::identities::one > + > +void print_norm( const grb::Vector< T > &r, const char *head, const Ring &ring ) { + T norm = 0; + RC ret = grb::dot( norm, r, r, ring ); // residual = r' * r; + (void)ret; + assert( ret == SUCCESS ); + std::cout << ">>> "; + if( head != nullptr ) { + std::cout << head << ": "; + } + std::cout << norm << std::endl; +} +#endif + +/** + * Main test, building an AMG problem and running the simulation closely + * following the parameters in the reference AMG test. + */ +void grbProgram( const simulation_input &in, struct output &out ) { + grb::utils::Timer timer; + timer.reset(); + + // get user process ID + assert( spmd<>::pid() < spmd<>::nprocs() ); + + // assume successful run + out.error_code = SUCCESS; + RC rc { SUCCESS }; + + if( ! matloaded ) { + //preloaded_matrices inputData; + inputData.matAfiles = in.matAfiles; + inputData.matMfiles = in.matMfiles; + inputData.matPfiles = in.matPfiles; + inputData.matRfiles = in.matRfiles; + + rc = inputData.read_vec_matrics(); + if( rc != SUCCESS ) { + std::cerr << "Failure to read data" << std::endl; + } + matloaded = true ; + } + + out.times.io = timer.time(); + timer.reset(); + + // wrap amg_data inside a unique_ptr to forget about cleaning chores + std::unique_ptr< amg_data< double, double, double > > amg_state; + rc = build_amg_system< double >( amg_state, in.max_coarsening_levels, inputData ); + + if( rc != SUCCESS ) { + std::cerr << "Failure to generate the system (" << toString( rc ) << ")." << std::endl; + out.error_code = rc; + return; + } +#ifdef AMG_PRINT_SYSTEM + if( spmd<>::pid() == 0 ) { + print_system( *amg_state ); + } +#endif + + Matrix< double > &A = amg_state->A; + Vector< double > &x = amg_state->x; + Vector< double > &b = amg_state->b; + + // set vectors as from standard AMG benchmark + set( x, 1.0 ); + set( b, 0.0 ); + rc = grb::mxv( b, A, x, + grb::Semiring< grb::operators::add< double >, + grb::operators::mul< double >, + grb::identities::zero, + grb::identities::one >() ); + set( x, 0.0 ); + + double norm_b = 0; + rc = grb::dot( norm_b, b, b, + grb::Semiring< grb::operators::add< double >, + grb::operators::mul< double >, + grb::identities::zero, + grb::identities::one >() ); + (void)rc; + assert( rc == SUCCESS ); + +#ifdef AMG_PRINT_SYSTEM + if( spmd<>::pid() == 0 ) { + print_vector( x, 50, " ---> X(1)" ); + print_vector( b, 50, " ---> B(1)" ); + } +#endif + + out.times.preamble = timer.time(); + timer.reset(); + + const bool with_preconditioning = ! in.no_preconditioning; + out.test_repetitions = 0; + if( in.evaluation_run ) { + double single_time_start = timer.time(); + rc = amg( *amg_state, with_preconditioning, in.smoother_steps, + in.smoother_steps, in.max_iterations, 0.0, out.performed_iterations, out.residual ); + double single_time = timer.time() - single_time_start; + if( rc == SUCCESS ) { + rc = collectives<>::reduce( single_time, 0, operators::max< double >() ); + } + out.times.useful = single_time; + out.test_repetitions = static_cast< size_t >( 1000.0 / single_time ) + 1; + } else { + // do benchmark + double time_start = timer.time(); + for( size_t i = 0; i < in.test_repetitions && rc == SUCCESS; ++i ) { + rc = set( x, 0.0 ); + assert( rc == SUCCESS ); + rc = amg( *amg_state, with_preconditioning, in.smoother_steps, + in.smoother_steps, in.max_iterations, 0.0, out.performed_iterations, out.residual ); + + out.test_repetitions++; + if( rc != SUCCESS ) { + break; + } + + } + double time_taken = timer.time() - time_start; + rc = rc ? rc : collectives<>::reduce( time_taken, 0, operators::max< double >() ); + out.times.useful = time_taken / static_cast< double >( out.test_repetitions ); + +#ifdef AMG_PRINT_SYSTEM + rc = rc ? rc : grb::eWiseLambda( [ &x ]( const size_t i ) { x[ i ] -= 1;}, x ); + print_norm (x, " norm(x)"); +#endif + // sleep( 1 ); + } + timer.reset(); + +#ifdef AMG_PRINT_SYSTEM + if( spmd<>::pid() == 0 ) { + print_vector( x, 50, " x(first 50 elements)" ); + print_vector( b, 50, " b(first 50 elements)" ); + } +#endif + + if( spmd<>::pid() == 0 ) { + if( rc == SUCCESS ) { + if( in.evaluation_run ) { + std::cout << "Info: cold AMG completed within " << out.performed_iterations + << " iterations. Last computed residual is " << out.residual + << ". Time taken was " << out.times.useful + << " ms. Deduced inner repetitions parameter of " << out.test_repetitions + << " to take 1 second or more per inner benchmark." << std::endl; + } else { + std::cout << "Final residual= "<< out.residual << " relative error= " + << out.residual/sqrt(norm_b) << "\n"; + std::cout << "Average time taken for each of " << out.test_repetitions + << " AMG calls (hot start): " << out.times.useful << std::endl; + } + } else { + std::cerr << "Failure: call to AMG did not succeed (" << toString( rc ) << ")." << std::endl; + } + } + + // start postamble + timer.reset(); + // set error code + out.error_code = rc; + + Semiring< grb::operators::add< double >, + grb::operators::mul< double >, + grb::identities::zero, + grb::identities::one > ring; + grb::set( b, 1.0 ); + out.square_norm_diff = 0.0; + grb::eWiseMul( b, -1.0, x, ring ); + grb::dot( out.square_norm_diff, b, b, ring ); + + // output + out.pinnedVector = std::unique_ptr< PinnedVector< double > >( new PinnedVector< double >( x, SEQUENTIAL ) ); + // finish timing + const double time_taken { timer.time() }; + out.times.postamble = time_taken; +} + +/** + * Parser the command-line arguments to extract the simulation information + * and checks they are valid. + */ +static void parse_arguments( simulation_input &, size_t &, double &, int, char ** ); + +int main( int argc, char ** argv ) { + simulation_input sim_in; + size_t test_outer_iterations; + double max_residual_norm; + + parse_arguments( sim_in, test_outer_iterations, max_residual_norm, argc, argv ); + thcout << "System max coarsening levels " << sim_in.max_coarsening_levels << std::endl; + thcout << "Test repetitions: " << sim_in.test_repetitions << std::endl; + thcout << "Max iterations: " << sim_in.max_iterations << std::endl; + thcout << "Direct launch: " << std::boolalpha << sim_in.evaluation_run << std::noboolalpha << std::endl; + thcout << "No conditioning: " << std::boolalpha << sim_in.no_preconditioning << std::noboolalpha << std::endl; + thcout << "Smoother steps: " << sim_in.smoother_steps << std::endl; + thcout << "Test outer iterations: " << test_outer_iterations << std::endl; + thcout << "Maximum norm for residual: " << max_residual_norm << std::endl; + + // the output struct + struct output out; + + // set standard exit code + grb::RC rc = SUCCESS; + + // launch estimator (if requested) + if( sim_in.evaluation_run ) { + grb::Launcher< AUTOMATIC > launcher; + rc = launcher.exec( &grbProgram, sim_in, out, true ); + if( rc == SUCCESS ) { + sim_in.test_repetitions = out.test_repetitions; + } else { + thcout << "launcher.exec returns with non-SUCCESS error code " << grb::toString( rc ) << std::endl; + std::exit( -1 ); + } + } + + // launch full benchmark + grb::Benchmarker< AUTOMATIC > benchmarker; + rc = benchmarker.exec( &grbProgram, sim_in, out, 1, test_outer_iterations, true ); + ASSERT_RC_SUCCESS( rc ); + thcout << "Benchmark completed successfully and took " << out.performed_iterations + << " iterations to converge with residual " << out.residual << std::endl; + + if( ! out.pinnedVector ) { + thcerr << "no output vector to inspect" << std::endl; + } else { + const PinnedVector< double > &solution = *out.pinnedVector; + thcout << "Size of x is " << solution.size() << std::endl; + if( solution.size() > 0 ) { + print_vector( solution, 30, "SOLUTION" ); + } else { + thcerr << "ERROR: solution contains no values" << std::endl; + } + } + + ASSERT_RC_SUCCESS( out.error_code ); + + double residual_norm = sqrt( out.square_norm_diff ); + thcout << "Residual norm: " << residual_norm << std::endl; + + ASSERT_LT( residual_norm, max_residual_norm ); + + thcout << "Test OK" << std::endl; + return 0; +} + +static void parse_arguments( simulation_input &sim_in, size_t &outer_iterations, + double &max_residual_norm, int argc, char **argv ) { + + argument_parser parser; + parser.add_optional_argument( "--max_coarse-levels", sim_in.max_coarsening_levels, DEF_COARSENING_LEVELS, + "maximum level for coarsening; 0 means no coarsening; note: actual " + "level may be limited by the minimum system dimension" ) + .add_optional_argument( "--mat_files_pattern", sim_in.matAfile_c_str, + "file pattern for files contining matrices A, M_diag, P, R " + "i.e. '--mat_a_file_names /path/to/dir/level_ --max_coarse-levels 2' will read " + "/path/to/dir/level_0_A.mtx, /path/to/dir/level_1_A.mtx, /path/to/dir/level_2_A.mtx ... " ) + .add_optional_argument( "--test-rep", sim_in.test_repetitions, grb::config::BENCHMARKING::inner(), + "consecutive test repetitions before benchmarking" ) + .add_optional_argument( "--init-iter", outer_iterations, grb::config::BENCHMARKING::outer(), + "test repetitions with complete initialization" ) + .add_optional_argument( "--max_iter", sim_in.max_iterations, MAX_ITERATIONS_DEF, + "maximum number of AMG iterations" ) + .add_optional_argument( "--max-residual-norm", max_residual_norm, MAX_NORM, + "maximum norm for the residual to be acceptable (does NOT limit " + "the execution of the algorithm)" ) + .add_optional_argument( "--smoother-steps", sim_in.smoother_steps, + SMOOTHER_STEPS_DEF, "number of pre/post-smoother steps; 0 disables smoothing" ) + .add_option( "--evaluation-run", sim_in.evaluation_run, false, + "launch single run directly, without benchmarker (ignore repetitions)" ) + .add_option( "--no-preconditioning", sim_in.no_preconditioning, false, + "do not apply pre-conditioning via multi-grid V cycle" ); + + parser.parse( argc, argv ); + + std::string matAfile = sim_in.matAfile_c_str; + if ( ! matAfile.empty() ) { + std::cout << "Using <<" << matAfile << ">> pattern to read matrices " << std::endl; + for ( size_t i = 0 ; i < sim_in.max_coarsening_levels ; i++ ){ + std::string fnamebase = matAfile + std::to_string( i ); + std::string fnameA = fnamebase + "_A.mtx"; + std::string fnameM = fnamebase + "_M_diag.mtx"; + std::string fnameP = fnamebase + "_P.mtx"; + std::string fnameR = fnamebase + "_R.mtx"; + sim_in.matAfiles.push_back( fnameA ); + sim_in.matMfiles.push_back( fnameM ); + sim_in.matPfiles.push_back( fnameP ); + sim_in.matRfiles.push_back( fnameR ); + } + { + std::string fnamebase = matAfile + std::to_string( sim_in.max_coarsening_levels ); + std::string fnameA = fnamebase + "_A.mtx"; + sim_in.matAfiles.push_back (fnameA); + std::string fnameM = fnamebase + "_M_diag.mtx"; + sim_in.matMfiles.push_back (fnameM); + } + std::cout << "files to read matrices: " << std::endl; + for ( std::string fname: sim_in.matAfiles ) { + std::cout << fname << " \n"; + } + + for ( std::string fname: sim_in.matMfiles ) { + std::cout << fname << " \n"; + } + + for ( std::string fname: sim_in.matPfiles ) { + std::cout << fname << " \n"; + } + + for ( std::string fname: sim_in.matRfiles ) { + std::cout << fname << " \n"; + } + + } + else { + std::cout << "No pattern to read matrices provided" << std::endl; + } + + // check for valid values + if( sim_in.max_coarsening_levels > MAX_COARSENING_LEVELS ) { + std::cout << "Setting max coarsening level to " << MAX_COARSENING_LEVELS + << " instead of " << sim_in.max_coarsening_levels << std::endl; + sim_in.max_coarsening_levels = MAX_COARSENING_LEVELS; + } + if( sim_in.test_repetitions == 0 ) { + std::cerr << "ERROR no test runs selected: set \"--test-rep >0\"" << std::endl; + std::exit( -1 ); + } + if( sim_in.max_iterations == 0 ) { + std::cout << "Setting number of iterations to 1" << std::endl; + sim_in.max_iterations = 1; + } +}