From f67b46346f6bf1aea3682239c53efc6e1dcf6275 Mon Sep 17 00:00:00 2001 From: Aleksa Milisavljevic Date: Wed, 21 Feb 2024 14:52:16 +0100 Subject: [PATCH 01/39] masked set matrix to matrix --- include/graphblas/reference/io.hpp | 200 +++++++++++++++++++++++++++++ 1 file changed, 200 insertions(+) diff --git a/include/graphblas/reference/io.hpp b/include/graphblas/reference/io.hpp index f4f1b0709..b3792528e 100644 --- a/include/graphblas/reference/io.hpp +++ b/include/graphblas/reference/io.hpp @@ -2028,6 +2028,206 @@ namespace grb { } } + template< + Descriptor descr = descriptors::no_operation, + typename OutputType, typename MaskType, typename InputType, + typename RIT1, typename CIT1, typename NIT1, + typename RIT2, typename CIT2, typename NIT2 + > + RC set( + Matrix< OutputType, reference, RIT1, CIT1, NIT1 > &C, + const Matrix< MaskType, reference, RIT2, CIT2, NIT2 > &Mask, + const Matrix< InputType, reference, RIT2, CIT2, NIT2 > &A, + const Phase &phase = EXECUTE + ) noexcept { + static_assert( !std::is_void< OutputType >::value, + "grb::set (masked set to matrix): cannot have a pattern " + "matrix as output" ); + static_assert( std::is_convertible< InputType, OutputType >::value, + "grb::set (masked set to matrix): input type cannot be " + "converted to output type" + ); + static_assert( + ! ( descr & descriptors::structural && descr & descriptors::invert_mask ), + "grb::set can not be called with both descriptors::structural " + "and descriptors::invert_mask in the masked variant" + ); +#ifdef _DEBUG + std::cout << "Called grb::set (matrix-to-matrix-masked, reference)\n"; +#endif + // static checks + NO_CAST_ASSERT( ( !(descr & descriptors::no_casting) || + std::is_same< InputType, OutputType >::value + ), "grb::set", + "called with non-matching value types" + ); + + // dynamic checks + assert( phase != TRY ); + + const size_t nrows = grb::nrows( C ); + const size_t ncols = grb::ncols( C ); + + const size_t m = grb::nrows( Mask ); + const size_t n = grb::ncols( Mask ); + + /*grb::Monoid< + grb::operators::left_assign< OutputType >, + grb::identities::zero + > dummyMonoid;*/ + + if( m == 0 || n == 0 ) { + // If the mask has a null size, it will be ignored + return set< descr >( C, A, phase ); + } + + if( nrows != grb::nrows( A ) || nrows != m ) { + return MISMATCH; + } + + if( ncols != grb::ncols( A ) || ncols != n ) { + return MISMATCH; + } + + const auto &mask_raw = internal::getCRS( Mask ); + + const auto &A_raw = internal::getCRS( A ); + + size_t nzc = 0; + + char * mask_arr = nullptr; + char * mask_buf = nullptr; + OutputType * mask_valbuf = nullptr; + internal::getMatrixBuffers( mask_arr, mask_buf, mask_valbuf, 1, Mask ); + + internal::Coordinates< reference > mask_coors; + mask_coors.set( mask_arr, false, mask_buf, ncols ); + + for( size_t i = 0; i < nrows; ++i ) { + mask_coors.clear(); + for( auto k = mask_raw.col_start[ i ]; k < mask_raw.col_start[ i + 1 ]; ++k ) { + const size_t k_col = mask_raw.row_index[ k ]; + if( utils::interpretMask< descr, MaskType >( true, mask_raw.values, k ) ) { + mask_coors.assign( k_col ); + } + } + for( auto k = A_raw.col_start[ i ]; k < A_raw.col_start[ i + 1 ]; ++k ) { + const size_t k_col = A_raw.row_index[ k ]; + if( mask_coors.assigned( k_col ) ) { + nzc++; + } + } + } + + if( phase == RESIZE ) { + return resize( C, nzc ); + } + + assert( phase == EXECUTE ); + + if( capacity( C ) < nzc ) { +#ifdef _DEBUG + std::cout << "\t insufficient capacity to complete " + "requested masked set matrix to matrix computation\n"; +#endif + const RC clear_rc = clear( C ); + if( clear_rc != SUCCESS ) { + return PANIC; + } else { + return FAILED; + } + } + + auto &CRS_raw = internal::getCRS( C ); + auto &CCS_raw = internal::getCCS( C ); + + config::NonzeroIndexType * C_col_index = internal::template + getReferenceBuffer< typename config::NonzeroIndexType >( ncols + 1 ); + + CRS_raw.col_start[ 0 ] = 0; + +#ifdef _H_GRB_REFERENCE_OMP_BLAS3 + #pragma omp parallel for simd +#endif + for( size_t j = 0; j <= ncols; ++j ) { + CCS_raw.col_start[ j ] = 0; + } + + + nzc = 0; + for( size_t i = 0; i < nrows; ++i ) { + mask_coors.clear(); + for( auto k = mask_raw.col_start[ i ]; k < mask_raw.col_start[ i + 1 ]; ++k ) { + const size_t k_col = mask_raw.row_index[ k ]; + if( utils::interpretMask< descr, MaskType >( true, mask_raw.values, k ) ) { + mask_coors.assign( k_col ); + } + } + for( auto k = A_raw.col_start[ i ]; k < A_raw.col_start[ i + 1 ]; ++k ) { + const size_t k_col = A_raw.row_index[ k ]; + if( mask_coors.assigned( k_col ) ) { + nzc++; + CCS_raw.col_start[ k_col + 1 ]++; + } + } + CRS_raw.col_start[ i + 1 ] = nzc; + } + + for( size_t j = 1; j < ncols; ++j ) { + CCS_raw.col_start[ j + 1 ] += CCS_raw.col_start[ j ]; + } + +#ifdef _H_GRB_REFERENCE_OMP_BLAS3 + #pragma omp parallel for simd +#endif + for( size_t j = 0; j < ncols; ++j ) { + C_col_index[ j ] = 0; + } + + + const size_t old_nzc = nzc; + + // use previously computed CCS offset array to update CCS during the + // computational phase + nzc = 0; + for( size_t i = 0; i < nrows; ++i ) { + mask_coors.clear(); + for( auto k = mask_raw.col_start[ i ]; k < mask_raw.col_start[ i + 1 ]; ++k ) { + const size_t k_col = mask_raw.row_index[ k ]; + if( utils::interpretMask< descr, MaskType >( true, mask_raw.values, k ) ) { + mask_coors.assign( k_col ); + } + } + for( auto k = A_raw.col_start[ i ]; k < A_raw.col_start[ i + 1 ]; ++k ) { + const size_t k_col = A_raw.row_index[ k ]; + if( mask_coors.assigned( k_col ) ) { + OutputType val = A_raw.getValue( k, *( (OutputType*) nullptr ) ); + CRS_raw.row_index[ nzc ] = k_col; + CRS_raw.setValue( nzc, val ); + const size_t CCS_index = C_col_index[ k_col ]++ + CCS_raw.col_start[ k_col ]; + CCS_raw.row_index[ CCS_index ] = i; + CCS_raw.setValue( CCS_index, val ); + nzc++; + } + } + } + + for( size_t j = 0; j < ncols; ++j ) { + assert( CCS_raw.col_start[ j + 1 ] - CCS_raw.col_start[ j ] == + C_col_index[ j ] ); + } + assert( old_nzc == nzc ); + + internal::setCurrentNonzeroes( C, nzc ); + + return SUCCESS; + } + + + + + + /** * Ingests raw data into a GraphBLAS vector. * From 969ddd9c25e0c15d7da4e6e6c4017e98bf656ba3 Mon Sep 17 00:00:00 2001 From: Aleksa Milisavljevic Date: Wed, 21 Feb 2024 14:52:25 +0100 Subject: [PATCH 02/39] tests --- tests/unit/matrixSet.cpp | 120 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 120 insertions(+) diff --git a/tests/unit/matrixSet.cpp b/tests/unit/matrixSet.cpp index c6ab6739d..02eecfee2 100644 --- a/tests/unit/matrixSet.cpp +++ b/tests/unit/matrixSet.cpp @@ -224,6 +224,126 @@ void grb_program( const size_t &n, grb::RC &rc ) { } } if( rc != SUCCESS ) { return; } + + //check masked matrix set + grb::Matrix< int > Mask( n, n ); + grb::Matrix< int > Output( n, n ); + grb::Matrix< int > Input( n, n ); + size_t I[ 2 * n - 1 ], J[ 2 * n - 1 ]; + int mask_vals [ 2 * n - 1 ]; + int input_vals [ 2 * n - 1 ]; + + for( size_t k = 0; k < n; ++k ) { + I[ k ] = J[ k ] = k; + mask_vals[ k ] = 1; + input_vals[ k ] = k; + if( k < n - 1 ) { + I[ n + k ] = k; + J[ n + k ] = k + 1; + mask_vals [ n + k ] = 0; + input_vals[ n + k ] = k; + } + } + + rc = grb::buildMatrixUnique( Mask, I, J, mask_vals, 2 * n - 1, SEQUENTIAL ); + if( rc != SUCCESS ) { + std::cerr << "\t buildMatrixUnique of mask matrix FAILED\n"; + return; + } + + rc = grb::buildMatrixUnique( Input, I, J, input_vals, 2 * n - 1, SEQUENTIAL ); + if( rc != SUCCESS ) { + std::cerr << "\t buildMatrixUnique of input matrix FAILED\n"; + return; + } + + rc = grb::resize(Output, 2 * n - 1 ); + if( rc != SUCCESS ) { + std::cerr << "\t grb::resize of output matrix for the matrix to matrix masked set FAILED\n"; + return; + } + + rc = grb::set< descriptors::structural >( Output, Mask, Input ); + if( rc != SUCCESS ) { + std::cerr << "\t grb::set structural (matrix to matrix masked) FAILED\n"; + return; + } + + if( grb::nnz( Output ) != 2 * n - 1 ) { + std::cerr << "\t unexpected number of output elements ( " << grb::nnz( Output ) << " ), expected " << 2 * n - 1 <<".\n"; + rc = FAILED; + } + + for( const auto & triplet : Output ) { + if( triplet.first.first != triplet.first.second && triplet.first.first != triplet.first.second - 1 ) { + std::cerr << "\tunexpected entry at ( " << triplet.first.first << ", " << triplet.first.second << " ), value " << triplet.second << ".\n"; + rc = FAILED; + } if( triplet.first.first != triplet.second ) { + std::cerr << "\tunexpected entry at ( " << triplet.first.first << ", " << triplet.first.second << " ) with value " << triplet.second; + std::cerr << ", expected value "<< triplet.first.first <<".\n"; + rc = FAILED; + } + } + + if( rc != SUCCESS ) { + return; + } + + + + rc = grb::set( Output, Mask, Input ); + if( rc != SUCCESS ) { + std::cerr << "\t grb::set (matrix to matrix masked) FAILED\n"; + return; + } + + if( grb::nnz( Output ) != n ) { + std::cerr << "\t unexpected number of output elements ( " << grb::nnz( Output ) << " ), expected " << n <<".\n"; + rc = FAILED; + } + + for( const auto & triplet : Output ) { + if( triplet.first.first != triplet.first.second ) { + std::cerr << "\tunexpected entry at ( " << triplet.first.first << ", " << triplet.first.second << " ), value " << triplet.second << ".\n"; + rc = FAILED; + } if( triplet.first.first != triplet.second ) { + std::cerr << "\tunexpected entry at ( " << triplet.first.first << ", " << triplet.first.second << " ) with value " << triplet.second; + std::cerr << ", expected value "<< triplet.first.first <<".\n"; + rc = FAILED; + } + } + + if( rc != SUCCESS ) { + return; + } + + + + rc = grb::set< descriptors::invert_mask >( Output, Mask, Input ); + if( rc != SUCCESS ) { + std::cerr << "\t grb::set invert mask (matrix to matrix masked) FAILED\n"; + return; + } + + if( grb::nnz( Output ) != n - 1 ) { + std::cerr << "\t unexpected number of output elements ( " << grb::nnz( Output ) << " ), expected " << n - 1 <<".\n"; + rc = FAILED; + } + + for( const auto & triplet : Output ) { + if( triplet.first.first != triplet.first.second - 1 ) { + std::cerr << "\tunexpected entry at ( " << triplet.first.first << ", " << triplet.first.second << " ), value " << triplet.second << ".\n"; + rc = FAILED; + } if( triplet.first.first != triplet.second ) { + std::cerr << "\tunexpected entry at ( " << triplet.first.first << ", " << triplet.first.second << " ) with value " << triplet.second; + std::cerr << ", expected value "<< triplet.first.first <<".\n"; + rc = FAILED; + } + } + + if( rc != SUCCESS ) { + return; + } } int main( int argc, char ** argv ) { From c7cb4f5a682d003c44c89873bfff0f44e7a67959 Mon Sep 17 00:00:00 2001 From: Aleksa Milisavljevic Date: Wed, 21 Feb 2024 15:14:13 +0100 Subject: [PATCH 03/39] tests refactor --- tests/unit/matrixSet.cpp | 80 ++++++++++++++++++++-------------------- 1 file changed, 41 insertions(+), 39 deletions(-) diff --git a/tests/unit/matrixSet.cpp b/tests/unit/matrixSet.cpp index 02eecfee2..f66600729 100644 --- a/tests/unit/matrixSet.cpp +++ b/tests/unit/matrixSet.cpp @@ -43,6 +43,12 @@ void grb_program( const size_t &n, grb::RC &rc ) { grb::Matrix< void > C( n, n ); grb::Matrix< void > D( n, n ); grb::Matrix< unsigned int > E( n, n ); + grb::Matrix< int > Mask( n, n ); + grb::Matrix< int > Output( n, n ); + grb::Matrix< int > Input( n, n ); + + + rc = grb::resize( A, 15 ); if( rc == SUCCESS ) { rc = grb::buildMatrixUnique( A, I, J, data1, 15, SEQUENTIAL ); @@ -64,6 +70,35 @@ void grb_program( const size_t &n, grb::RC &rc ) { } } } + + size_t I_mask[ 2 * n - 1 ], J_mask[ 2 * n - 1 ]; + int mask_vals [ 2 * n - 1 ]; + int input_vals [ 2 * n - 1 ]; + + for( size_t k = 0; k < n; ++k ) { + I_mask[ k ] = J_mask[ k ] = k; + mask_vals[ k ] = 1; + input_vals[ k ] = k; + if( k < n - 1 ) { + I_mask[ n + k ] = k; + J_mask[ n + k ] = k + 1; + mask_vals [ n + k ] = 0; + input_vals[ n + k ] = k; + } + } + + rc = grb::buildMatrixUnique( Mask, I_mask, J_mask, mask_vals, 2 * n - 1, SEQUENTIAL ); + if( rc != SUCCESS ) { + std::cerr << "\t buildMatrixUnique of mask matrix FAILED\n"; + return; + } + + rc = grb::buildMatrixUnique( Input, I_mask, J_mask, input_vals, 2 * n - 1, SEQUENTIAL ); + if( rc != SUCCESS ) { + std::cerr << "\t buildMatrixUnique of input matrix FAILED\n"; + return; + } + if( rc == SUCCESS ) { rc = grb::resize( B, 15 ); } @@ -76,6 +111,9 @@ void grb_program( const size_t &n, grb::RC &rc ) { if( rc == SUCCESS ) { rc = grb::resize( E, 15 ); } + if( rc == SUCCESS ) { + rc = grb::resize(Output, 2 * n - 1 ); + } if( rc != SUCCESS || grb::nnz( A ) != 15 ) { std::cerr << "\tinitialisation FAILED\n"; return; @@ -226,42 +264,6 @@ void grb_program( const size_t &n, grb::RC &rc ) { if( rc != SUCCESS ) { return; } //check masked matrix set - grb::Matrix< int > Mask( n, n ); - grb::Matrix< int > Output( n, n ); - grb::Matrix< int > Input( n, n ); - size_t I[ 2 * n - 1 ], J[ 2 * n - 1 ]; - int mask_vals [ 2 * n - 1 ]; - int input_vals [ 2 * n - 1 ]; - - for( size_t k = 0; k < n; ++k ) { - I[ k ] = J[ k ] = k; - mask_vals[ k ] = 1; - input_vals[ k ] = k; - if( k < n - 1 ) { - I[ n + k ] = k; - J[ n + k ] = k + 1; - mask_vals [ n + k ] = 0; - input_vals[ n + k ] = k; - } - } - - rc = grb::buildMatrixUnique( Mask, I, J, mask_vals, 2 * n - 1, SEQUENTIAL ); - if( rc != SUCCESS ) { - std::cerr << "\t buildMatrixUnique of mask matrix FAILED\n"; - return; - } - - rc = grb::buildMatrixUnique( Input, I, J, input_vals, 2 * n - 1, SEQUENTIAL ); - if( rc != SUCCESS ) { - std::cerr << "\t buildMatrixUnique of input matrix FAILED\n"; - return; - } - - rc = grb::resize(Output, 2 * n - 1 ); - if( rc != SUCCESS ) { - std::cerr << "\t grb::resize of output matrix for the matrix to matrix masked set FAILED\n"; - return; - } rc = grb::set< descriptors::structural >( Output, Mask, Input ); if( rc != SUCCESS ) { @@ -278,7 +280,7 @@ void grb_program( const size_t &n, grb::RC &rc ) { if( triplet.first.first != triplet.first.second && triplet.first.first != triplet.first.second - 1 ) { std::cerr << "\tunexpected entry at ( " << triplet.first.first << ", " << triplet.first.second << " ), value " << triplet.second << ".\n"; rc = FAILED; - } if( triplet.first.first != triplet.second ) { + } if( (int) triplet.first.first != triplet.second ) { std::cerr << "\tunexpected entry at ( " << triplet.first.first << ", " << triplet.first.second << " ) with value " << triplet.second; std::cerr << ", expected value "<< triplet.first.first <<".\n"; rc = FAILED; @@ -306,7 +308,7 @@ void grb_program( const size_t &n, grb::RC &rc ) { if( triplet.first.first != triplet.first.second ) { std::cerr << "\tunexpected entry at ( " << triplet.first.first << ", " << triplet.first.second << " ), value " << triplet.second << ".\n"; rc = FAILED; - } if( triplet.first.first != triplet.second ) { + } if( (int) triplet.first.first != triplet.second ) { std::cerr << "\tunexpected entry at ( " << triplet.first.first << ", " << triplet.first.second << " ) with value " << triplet.second; std::cerr << ", expected value "<< triplet.first.first <<".\n"; rc = FAILED; @@ -334,7 +336,7 @@ void grb_program( const size_t &n, grb::RC &rc ) { if( triplet.first.first != triplet.first.second - 1 ) { std::cerr << "\tunexpected entry at ( " << triplet.first.first << ", " << triplet.first.second << " ), value " << triplet.second << ".\n"; rc = FAILED; - } if( triplet.first.first != triplet.second ) { + } if( (int) triplet.first.first != triplet.second ) { std::cerr << "\tunexpected entry at ( " << triplet.first.first << ", " << triplet.first.second << " ) with value " << triplet.second; std::cerr << ", expected value "<< triplet.first.first <<".\n"; rc = FAILED; From 34af750117f3d7579ad3ab82e95bc86e1025a9a4 Mon Sep 17 00:00:00 2001 From: Aleksa Milisavljevic Date: Wed, 21 Feb 2024 15:14:42 +0100 Subject: [PATCH 04/39] nonblocking implementation --- include/graphblas/nonblocking/io.hpp | 35 ++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/include/graphblas/nonblocking/io.hpp b/include/graphblas/nonblocking/io.hpp index 0831fc564..1246e7c37 100644 --- a/include/graphblas/nonblocking/io.hpp +++ b/include/graphblas/nonblocking/io.hpp @@ -1227,6 +1227,41 @@ namespace grb { internal::getRefMatrix( C ), internal::getRefMatrix( A ), val, phase ); } + + template< + Descriptor descr = descriptors::no_operation, + typename OutputType, typename MaskType, typename InputType, + typename RIT1, typename CIT1, typename NIT1, + typename RIT2, typename CIT2, typename NIT2 + > + RC set( + Matrix< OutputType, nonblocking, RIT1, CIT1, NIT1 > &C, + const Matrix< MaskType, nonblocking, RIT2, CIT2, NIT2 > &Mask, + const Matrix< InputType, nonblocking, RIT2, CIT2, NIT2 > &A, + const Phase &phase = EXECUTE + ) noexcept { + if( internal::NONBLOCKING::warn_if_not_native && + config::PIPELINE::warn_if_not_native + ) { + std::cerr << "Warning: masked set (nonblocking) currently delegates " + << "to a blocking implementation.\n" + << " Further similar such warnings will be suppressed.\n"; + internal::NONBLOCKING::warn_if_not_native = false; + } + + // nonblocking execution is not supported + // first, execute any computation that is not completed + internal::le.execution(); + + // second, delegate to the reference backend + return set< descr >( + internal::getRefMatrix( C ), + internal::getRefMatrix( Mask ), + internal::getRefMatrix( A ), + phase + ); + } + template< Descriptor descr = descriptors::no_operation, typename InputType, From 1611bc4c7dbc6f3a36e5cbf08cb3c82450747bd4 Mon Sep 17 00:00:00 2001 From: Aleksa Milisavljevic Date: Wed, 21 Feb 2024 15:15:09 +0100 Subject: [PATCH 05/39] hyperdags implementation --- include/graphblas/hyperdags/hyperdags.hpp | 4 ++- include/graphblas/hyperdags/io.hpp | 38 +++++++++++++++++++++++ src/graphblas/hyperdags/hyperdags.cpp | 3 ++ 3 files changed, 44 insertions(+), 1 deletion(-) diff --git a/include/graphblas/hyperdags/hyperdags.hpp b/include/graphblas/hyperdags/hyperdags.hpp index 2ae951565..b4a650c0e 100644 --- a/include/graphblas/hyperdags/hyperdags.hpp +++ b/include/graphblas/hyperdags/hyperdags.hpp @@ -381,6 +381,8 @@ namespace grb { SET_MATRIX_MATRIX_INPUT2, + SET_MATRIX_MATRIX_MASKED, + MXM_MATRIX_MATRIX_MATRIX_SEMIRING, MXM_MATRIX_MATRIX_MATRIX_MONOID, @@ -503,7 +505,7 @@ namespace grb { }; /** \internal How many operation vertex types exist. */ - const constexpr size_t numOperationVertexTypes = 111; + const constexpr size_t numOperationVertexTypes = 112; /** \internal An array of all operation vertex types. */ const constexpr enum OperationVertexType diff --git a/include/graphblas/hyperdags/io.hpp b/include/graphblas/hyperdags/io.hpp index 4ec81917b..c9347adfd 100644 --- a/include/graphblas/hyperdags/io.hpp +++ b/include/graphblas/hyperdags/io.hpp @@ -440,6 +440,44 @@ namespace grb { return ret; } + /** + * This function inherits the performance semantics of the underlying backend. + */ + template< + Descriptor descr = descriptors::no_operation, + typename OutputType, typename MaskType, typename InputType, + typename RIT1, typename CIT1, typename NIT1, + typename RIT2, typename CIT2, typename NIT2 + > + RC set( + Matrix< OutputType, hyperdags, RIT1, CIT1, NIT1 > &C, + const Matrix< MaskType, hyperdags, RIT2, CIT2, NIT2 > &Mask, + const Matrix< InputType, hyperdags, RIT2, CIT2, NIT2 > &A, + const Phase &phase = EXECUTE + ) { + const RC ret = set< descr >( + internal::getMatrix( C ), internal::getMatrix( Mask ), + internal::getMatrix( A ), phase + ); + if( ret != SUCCESS ) { return ret; } + if( phase != EXECUTE ) { return ret; } + if( nrows( A ) == 0 || ncols( A ) == 0 ) { return ret; } + std::array< const void *, 0 > sourcesP{}; + std::array< uintptr_t, 3 > sourcesC{ + getID( internal::getMatrix(A) ), + getID( internal::getMatrix(Mask) ), + getID( internal::getMatrix(C) ) + }; + std::array< uintptr_t, 1 > destinations{ getID( internal::getMatrix(C) ) }; + internal::hyperdags::generator.addOperation( + internal::hyperdags::SET_MATRIX_MATRIX_MASKED, + sourcesP.begin(), sourcesP.end(), + sourcesC.begin(), sourcesC.end(), + destinations.begin(), destinations.end() + ); + return ret; + } + /** * This function inherits the performance semantics of the underlying backend. */ diff --git a/src/graphblas/hyperdags/hyperdags.cpp b/src/graphblas/hyperdags/hyperdags.cpp index 256b31fba..320b522f5 100644 --- a/src/graphblas/hyperdags/hyperdags.cpp +++ b/src/graphblas/hyperdags/hyperdags.cpp @@ -215,6 +215,9 @@ std::string grb::internal::hyperdags::toString( case SET_MATRIX_MATRIX_INPUT2: return "set( matrix, matrix, scalar )"; + case SET_MATRIX_MATRIX_MASKED: + return "set( matrix, matrix, matrix )"; + case MXM_MATRIX_MATRIX_MATRIX_MONOID: return "mxm( matrix, matrix, matrix, monoid, scalar, scalar )"; From d250c5463f38962e8ee3b4da48d3b6d2a72baf87 Mon Sep 17 00:00:00 2001 From: Aleksa Milisavljevic Date: Mon, 26 Feb 2024 12:25:56 +0100 Subject: [PATCH 06/39] rename --- include/graphblas/hyperdags/io.hpp | 6 ++--- include/graphblas/nonblocking/io.hpp | 4 ++-- include/graphblas/reference/io.hpp | 13 ++++------ tests/unit/matrixSet.cpp | 36 ++++++++++++++-------------- 4 files changed, 28 insertions(+), 31 deletions(-) diff --git a/include/graphblas/hyperdags/io.hpp b/include/graphblas/hyperdags/io.hpp index c9347adfd..6c93044b4 100644 --- a/include/graphblas/hyperdags/io.hpp +++ b/include/graphblas/hyperdags/io.hpp @@ -451,12 +451,12 @@ namespace grb { > RC set( Matrix< OutputType, hyperdags, RIT1, CIT1, NIT1 > &C, - const Matrix< MaskType, hyperdags, RIT2, CIT2, NIT2 > &Mask, + const Matrix< MaskType, hyperdags, RIT2, CIT2, NIT2 > &M, const Matrix< InputType, hyperdags, RIT2, CIT2, NIT2 > &A, const Phase &phase = EXECUTE ) { const RC ret = set< descr >( - internal::getMatrix( C ), internal::getMatrix( Mask ), + internal::getMatrix( C ), internal::getMatrix( M ), internal::getMatrix( A ), phase ); if( ret != SUCCESS ) { return ret; } @@ -465,7 +465,7 @@ namespace grb { std::array< const void *, 0 > sourcesP{}; std::array< uintptr_t, 3 > sourcesC{ getID( internal::getMatrix(A) ), - getID( internal::getMatrix(Mask) ), + getID( internal::getMatrix(M) ), getID( internal::getMatrix(C) ) }; std::array< uintptr_t, 1 > destinations{ getID( internal::getMatrix(C) ) }; diff --git a/include/graphblas/nonblocking/io.hpp b/include/graphblas/nonblocking/io.hpp index 1246e7c37..022a50901 100644 --- a/include/graphblas/nonblocking/io.hpp +++ b/include/graphblas/nonblocking/io.hpp @@ -1236,7 +1236,7 @@ namespace grb { > RC set( Matrix< OutputType, nonblocking, RIT1, CIT1, NIT1 > &C, - const Matrix< MaskType, nonblocking, RIT2, CIT2, NIT2 > &Mask, + const Matrix< MaskType, nonblocking, RIT2, CIT2, NIT2 > &M, const Matrix< InputType, nonblocking, RIT2, CIT2, NIT2 > &A, const Phase &phase = EXECUTE ) noexcept { @@ -1256,7 +1256,7 @@ namespace grb { // second, delegate to the reference backend return set< descr >( internal::getRefMatrix( C ), - internal::getRefMatrix( Mask ), + internal::getRefMatrix( M ), internal::getRefMatrix( A ), phase ); diff --git a/include/graphblas/reference/io.hpp b/include/graphblas/reference/io.hpp index b3792528e..b0402cb05 100644 --- a/include/graphblas/reference/io.hpp +++ b/include/graphblas/reference/io.hpp @@ -2036,7 +2036,7 @@ namespace grb { > RC set( Matrix< OutputType, reference, RIT1, CIT1, NIT1 > &C, - const Matrix< MaskType, reference, RIT2, CIT2, NIT2 > &Mask, + const Matrix< MaskType, reference, RIT2, CIT2, NIT2 > &M, const Matrix< InputType, reference, RIT2, CIT2, NIT2 > &A, const Phase &phase = EXECUTE ) noexcept { @@ -2068,8 +2068,8 @@ namespace grb { const size_t nrows = grb::nrows( C ); const size_t ncols = grb::ncols( C ); - const size_t m = grb::nrows( Mask ); - const size_t n = grb::ncols( Mask ); + const size_t m = grb::nrows( M ); + const size_t n = grb::ncols( M ); /*grb::Monoid< grb::operators::left_assign< OutputType >, @@ -2089,7 +2089,7 @@ namespace grb { return MISMATCH; } - const auto &mask_raw = internal::getCRS( Mask ); + const auto &mask_raw = internal::getCRS( M ); const auto &A_raw = internal::getCRS( A ); @@ -2098,7 +2098,7 @@ namespace grb { char * mask_arr = nullptr; char * mask_buf = nullptr; OutputType * mask_valbuf = nullptr; - internal::getMatrixBuffers( mask_arr, mask_buf, mask_valbuf, 1, Mask ); + internal::getMatrixBuffers( mask_arr, mask_buf, mask_valbuf, 1, M ); internal::Coordinates< reference > mask_coors; mask_coors.set( mask_arr, false, mask_buf, ncols ); @@ -2185,8 +2185,6 @@ namespace grb { } - const size_t old_nzc = nzc; - // use previously computed CCS offset array to update CCS during the // computational phase nzc = 0; @@ -2216,7 +2214,6 @@ namespace grb { assert( CCS_raw.col_start[ j + 1 ] - CCS_raw.col_start[ j ] == C_col_index[ j ] ); } - assert( old_nzc == nzc ); internal::setCurrentNonzeroes( C, nzc ); diff --git a/tests/unit/matrixSet.cpp b/tests/unit/matrixSet.cpp index f66600729..6088a5927 100644 --- a/tests/unit/matrixSet.cpp +++ b/tests/unit/matrixSet.cpp @@ -43,9 +43,9 @@ void grb_program( const size_t &n, grb::RC &rc ) { grb::Matrix< void > C( n, n ); grb::Matrix< void > D( n, n ); grb::Matrix< unsigned int > E( n, n ); - grb::Matrix< int > Mask( n, n ); - grb::Matrix< int > Output( n, n ); - grb::Matrix< int > Input( n, n ); + grb::Matrix< int > mask( n, n ); + grb::Matrix< int > output( n, n ); + grb::Matrix< int > input( n, n ); @@ -87,13 +87,13 @@ void grb_program( const size_t &n, grb::RC &rc ) { } } - rc = grb::buildMatrixUnique( Mask, I_mask, J_mask, mask_vals, 2 * n - 1, SEQUENTIAL ); + rc = grb::buildMatrixUnique( mask, I_mask, J_mask, mask_vals, 2 * n - 1, SEQUENTIAL ); if( rc != SUCCESS ) { std::cerr << "\t buildMatrixUnique of mask matrix FAILED\n"; return; } - rc = grb::buildMatrixUnique( Input, I_mask, J_mask, input_vals, 2 * n - 1, SEQUENTIAL ); + rc = grb::buildMatrixUnique( input, I_mask, J_mask, input_vals, 2 * n - 1, SEQUENTIAL ); if( rc != SUCCESS ) { std::cerr << "\t buildMatrixUnique of input matrix FAILED\n"; return; @@ -112,7 +112,7 @@ void grb_program( const size_t &n, grb::RC &rc ) { rc = grb::resize( E, 15 ); } if( rc == SUCCESS ) { - rc = grb::resize(Output, 2 * n - 1 ); + rc = grb::resize(output, 2 * n - 1 ); } if( rc != SUCCESS || grb::nnz( A ) != 15 ) { std::cerr << "\tinitialisation FAILED\n"; @@ -265,18 +265,18 @@ void grb_program( const size_t &n, grb::RC &rc ) { //check masked matrix set - rc = grb::set< descriptors::structural >( Output, Mask, Input ); + rc = grb::set< descriptors::structural >( output, mask, input ); if( rc != SUCCESS ) { std::cerr << "\t grb::set structural (matrix to matrix masked) FAILED\n"; return; } - if( grb::nnz( Output ) != 2 * n - 1 ) { - std::cerr << "\t unexpected number of output elements ( " << grb::nnz( Output ) << " ), expected " << 2 * n - 1 <<".\n"; + if( grb::nnz( output ) != 2 * n - 1 ) { + std::cerr << "\t unexpected number of output elements ( " << grb::nnz( output ) << " ), expected " << 2 * n - 1 <<".\n"; rc = FAILED; } - for( const auto & triplet : Output ) { + for( const auto & triplet : output ) { if( triplet.first.first != triplet.first.second && triplet.first.first != triplet.first.second - 1 ) { std::cerr << "\tunexpected entry at ( " << triplet.first.first << ", " << triplet.first.second << " ), value " << triplet.second << ".\n"; rc = FAILED; @@ -293,18 +293,18 @@ void grb_program( const size_t &n, grb::RC &rc ) { - rc = grb::set( Output, Mask, Input ); + rc = grb::set( output, mask, input ); if( rc != SUCCESS ) { std::cerr << "\t grb::set (matrix to matrix masked) FAILED\n"; return; } - if( grb::nnz( Output ) != n ) { - std::cerr << "\t unexpected number of output elements ( " << grb::nnz( Output ) << " ), expected " << n <<".\n"; + if( grb::nnz( output ) != n ) { + std::cerr << "\t unexpected number of output elements ( " << grb::nnz( output ) << " ), expected " << n <<".\n"; rc = FAILED; } - for( const auto & triplet : Output ) { + for( const auto & triplet : output ) { if( triplet.first.first != triplet.first.second ) { std::cerr << "\tunexpected entry at ( " << triplet.first.first << ", " << triplet.first.second << " ), value " << triplet.second << ".\n"; rc = FAILED; @@ -321,18 +321,18 @@ void grb_program( const size_t &n, grb::RC &rc ) { - rc = grb::set< descriptors::invert_mask >( Output, Mask, Input ); + rc = grb::set< descriptors::invert_mask >( output, mask, input ); if( rc != SUCCESS ) { std::cerr << "\t grb::set invert mask (matrix to matrix masked) FAILED\n"; return; } - if( grb::nnz( Output ) != n - 1 ) { - std::cerr << "\t unexpected number of output elements ( " << grb::nnz( Output ) << " ), expected " << n - 1 <<".\n"; + if( grb::nnz( output ) != n - 1 ) { + std::cerr << "\t unexpected number of output elements ( " << grb::nnz( output ) << " ), expected " << n - 1 <<".\n"; rc = FAILED; } - for( const auto & triplet : Output ) { + for( const auto & triplet : output ) { if( triplet.first.first != triplet.first.second - 1 ) { std::cerr << "\tunexpected entry at ( " << triplet.first.first << ", " << triplet.first.second << " ), value " << triplet.second << ".\n"; rc = FAILED; From cc7368960e6a921bd7a038c66f9f3c7f2591878f Mon Sep 17 00:00:00 2001 From: Aleksa Milisavljevic Date: Tue, 27 Feb 2024 10:44:51 +0100 Subject: [PATCH 07/39] code cleanup --- include/graphblas/reference/io.hpp | 28 ++++++++++++++------------ include/graphblas/utils.hpp | 32 ++++++++++++++++++++++++++++++ 2 files changed, 47 insertions(+), 13 deletions(-) diff --git a/include/graphblas/reference/io.hpp b/include/graphblas/reference/io.hpp index b0402cb05..2033b31ee 100644 --- a/include/graphblas/reference/io.hpp +++ b/include/graphblas/reference/io.hpp @@ -2097,7 +2097,7 @@ namespace grb { char * mask_arr = nullptr; char * mask_buf = nullptr; - OutputType * mask_valbuf = nullptr; + MaskType * mask_valbuf = nullptr; internal::getMatrixBuffers( mask_arr, mask_buf, mask_valbuf, 1, M ); internal::Coordinates< reference > mask_coors; @@ -2106,13 +2106,13 @@ namespace grb { for( size_t i = 0; i < nrows; ++i ) { mask_coors.clear(); for( auto k = mask_raw.col_start[ i ]; k < mask_raw.col_start[ i + 1 ]; ++k ) { - const size_t k_col = mask_raw.row_index[ k ]; - if( utils::interpretMask< descr, MaskType >( true, mask_raw.values, k ) ) { + const auto k_col = mask_raw.row_index[ k ]; + if( utils::interpretMatrixMask< descr, MaskType >( true, mask_raw.getValues(), k ) ) { mask_coors.assign( k_col ); } } for( auto k = A_raw.col_start[ i ]; k < A_raw.col_start[ i + 1 ]; ++k ) { - const size_t k_col = A_raw.row_index[ k ]; + const auto k_col = A_raw.row_index[ k ]; if( mask_coors.assigned( k_col ) ) { nzc++; } @@ -2147,7 +2147,7 @@ namespace grb { CRS_raw.col_start[ 0 ] = 0; #ifdef _H_GRB_REFERENCE_OMP_BLAS3 - #pragma omp parallel for simd + #pragma omp parallel for simd #endif for( size_t j = 0; j <= ncols; ++j ) { CCS_raw.col_start[ j ] = 0; @@ -2158,13 +2158,13 @@ namespace grb { for( size_t i = 0; i < nrows; ++i ) { mask_coors.clear(); for( auto k = mask_raw.col_start[ i ]; k < mask_raw.col_start[ i + 1 ]; ++k ) { - const size_t k_col = mask_raw.row_index[ k ]; - if( utils::interpretMask< descr, MaskType >( true, mask_raw.values, k ) ) { + const auto k_col = mask_raw.row_index[ k ]; + if( utils::interpretMask< descr, MaskType >( true, mask_raw.getValues(), k ) ) { mask_coors.assign( k_col ); } } for( auto k = A_raw.col_start[ i ]; k < A_raw.col_start[ i + 1 ]; ++k ) { - const size_t k_col = A_raw.row_index[ k ]; + const auto k_col = A_raw.row_index[ k ]; if( mask_coors.assigned( k_col ) ) { nzc++; CCS_raw.col_start[ k_col + 1 ]++; @@ -2191,29 +2191,31 @@ namespace grb { for( size_t i = 0; i < nrows; ++i ) { mask_coors.clear(); for( auto k = mask_raw.col_start[ i ]; k < mask_raw.col_start[ i + 1 ]; ++k ) { - const size_t k_col = mask_raw.row_index[ k ]; - if( utils::interpretMask< descr, MaskType >( true, mask_raw.values, k ) ) { + const auto k_col = mask_raw.row_index[ k ]; + if( utils::interpretMatrixMask< descr, MaskType >( true, mask_raw.getValues(), k ) ) { mask_coors.assign( k_col ); } } for( auto k = A_raw.col_start[ i ]; k < A_raw.col_start[ i + 1 ]; ++k ) { - const size_t k_col = A_raw.row_index[ k ]; + const auto k_col = A_raw.row_index[ k ]; if( mask_coors.assigned( k_col ) ) { OutputType val = A_raw.getValue( k, *( (OutputType*) nullptr ) ); CRS_raw.row_index[ nzc ] = k_col; CRS_raw.setValue( nzc, val ); - const size_t CCS_index = C_col_index[ k_col ]++ + CCS_raw.col_start[ k_col ]; + const size_t CCS_index = C_col_index[ k_col ] + CCS_raw.col_start[ k_col ]; + C_col_index[ k_col ]++; CCS_raw.row_index[ CCS_index ] = i; CCS_raw.setValue( CCS_index, val ); nzc++; } } } - +#ifndef NDEBUG for( size_t j = 0; j < ncols; ++j ) { assert( CCS_raw.col_start[ j + 1 ] - CCS_raw.col_start[ j ] == C_col_index[ j ] ); } +#endif internal::setCurrentNonzeroes( C, nzc ); diff --git a/include/graphblas/utils.hpp b/include/graphblas/utils.hpp index 3ab508eec..09c8d8b71 100644 --- a/include/graphblas/utils.hpp +++ b/include/graphblas/utils.hpp @@ -384,6 +384,38 @@ namespace grb { return ret; } } + /** Specialisation for void-valued matrice's masks */ + template< Descriptor descriptor, typename MatrixDataType, typename ValuesType > + static bool interpretMatrixMask( + const bool &assigned, + const ValuesType * const values, + const size_t k, + typename std::enable_if< + !std::is_void< MatrixDataType >::value + >::type * = nullptr + ) { + return interpretMask< descriptor, ValuesType >( assigned, values, k ); + } + + /** Specialisation for void-valued matrice's masks */ + template< Descriptor descriptor, typename MatrixDataType, typename ValuesType > + static bool interpretMatrixMask( + const bool &assigned, + const ValuesType * const, + const size_t, + typename std::enable_if< + std::is_void< MatrixDataType >::value + >::type * = nullptr + ) { + // set default mask to false + bool ret = assigned; + // check whether we should return the inverted value + if( descriptor & descriptors::invert_mask ) { + return !ret; + } else { + return ret; + } + } /** Specialisation for void-valued matrice's masks */ template< Descriptor descriptor, typename MatrixDataType, typename ValuesType > From 3e33029504a73d27dcc2f4b570082cbbc71ab7fa Mon Sep 17 00:00:00 2001 From: Aleksa Milisavljevic Date: Tue, 27 Feb 2024 10:58:39 +0100 Subject: [PATCH 08/39] omp reductions --- include/graphblas/reference/io.hpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/include/graphblas/reference/io.hpp b/include/graphblas/reference/io.hpp index 2033b31ee..032027fb2 100644 --- a/include/graphblas/reference/io.hpp +++ b/include/graphblas/reference/io.hpp @@ -2111,6 +2111,9 @@ namespace grb { mask_coors.assign( k_col ); } } +#ifdef _H_GRB_REFERENCE_OMP_BLAS3 + #pragma omp parallel for reduction(+:nzc) +#endif for( auto k = A_raw.col_start[ i ]; k < A_raw.col_start[ i + 1 ]; ++k ) { const auto k_col = A_raw.row_index[ k ]; if( mask_coors.assigned( k_col ) ) { From 36849bdb5a75ee6a76812361f41909e953b78ba7 Mon Sep 17 00:00:00 2001 From: Albert-Jan Yzelman Date: Sun, 17 Aug 2025 16:51:20 +0200 Subject: [PATCH 09/39] Fixes after rebase --- include/graphblas/base/io.hpp | 31 +++++++++++++++ include/graphblas/utils.hpp | 43 ++++----------------- tests/unit/matrixSet.cpp | 71 ++++++++++++++++------------------- 3 files changed, 72 insertions(+), 73 deletions(-) diff --git a/include/graphblas/base/io.hpp b/include/graphblas/base/io.hpp index 493dfcfea..4e493332b 100644 --- a/include/graphblas/base/io.hpp +++ b/include/graphblas/base/io.hpp @@ -1320,6 +1320,37 @@ namespace grb { return UNSUPPORTED; } + /** TODO add specification */ + template< + Descriptor descr = descriptors::no_operation, + typename OutputType, typename MaskType, typename ValueType, + typename RIT1, typename CIT1, typename NIT1, + typename RIT2, typename CIT2, typename NIT2, + typename RIT3, typename CIT3, typename NIT3, + Backend backend + > + RC set( + Matrix< OutputType, backend, RIT1, CIT1, NIT1 > &C, + const Matrix< MaskType, backend, RIT2, CIT2, NIT2 > &mask, + const Matrix< ValueType, backend, RIT3, CIT3, NIT3 > &A, + const Phase &phase = EXECUTE, + const typename std::enable_if< + !grb::is_object< OutputType >::value && + !grb::is_object< ValueType >::value && + !grb::is_object< MaskType >::value, + void >::type * const = nullptr + ) noexcept { +#ifndef NDEBUG + const bool should_not_call_base_matrix_masked_matrix_set = false; + assert( should_not_call_base_matrix_masked_matrix_set ); +#endif + (void) C; + (void) mask; + (void) A; + (void) phase; + return UNSUPPORTED; + } + /** * Sets the element of a given vector at a given position to a given value. * diff --git a/include/graphblas/utils.hpp b/include/graphblas/utils.hpp index 09c8d8b71..440a835f0 100644 --- a/include/graphblas/utils.hpp +++ b/include/graphblas/utils.hpp @@ -385,7 +385,10 @@ namespace grb { } } /** Specialisation for void-valued matrice's masks */ - template< Descriptor descriptor, typename MatrixDataType, typename ValuesType > + template< + Descriptor descriptor, + typename MatrixDataType, typename ValuesType + > static bool interpretMatrixMask( const bool &assigned, const ValuesType * const values, @@ -398,40 +401,10 @@ namespace grb { } /** Specialisation for void-valued matrice's masks */ - template< Descriptor descriptor, typename MatrixDataType, typename ValuesType > - static bool interpretMatrixMask( - const bool &assigned, - const ValuesType * const, - const size_t, - typename std::enable_if< - std::is_void< MatrixDataType >::value - >::type * = nullptr - ) { - // set default mask to false - bool ret = assigned; - // check whether we should return the inverted value - if( descriptor & descriptors::invert_mask ) { - return !ret; - } else { - return ret; - } - } - - /** Specialisation for void-valued matrice's masks */ - template< Descriptor descriptor, typename MatrixDataType, typename ValuesType > - static bool interpretMatrixMask( - const bool &assigned, - const ValuesType * const values, - const size_t k, - typename std::enable_if< - !std::is_void< MatrixDataType >::value - >::type * = nullptr - ) { - return interpretMask< descriptor, ValuesType >( assigned, values, k ); - } - - /** Specialisation for void-valued matrice's masks */ - template< Descriptor descriptor, typename MatrixDataType, typename ValuesType > + template< + Descriptor descriptor, + typename MatrixDataType, typename ValuesType + > static bool interpretMatrixMask( const bool &assigned, const ValuesType * const, diff --git a/tests/unit/matrixSet.cpp b/tests/unit/matrixSet.cpp index 6088a5927..617d4af8a 100644 --- a/tests/unit/matrixSet.cpp +++ b/tests/unit/matrixSet.cpp @@ -270,82 +270,77 @@ void grb_program( const size_t &n, grb::RC &rc ) { std::cerr << "\t grb::set structural (matrix to matrix masked) FAILED\n"; return; } - if( grb::nnz( output ) != 2 * n - 1 ) { - std::cerr << "\t unexpected number of output elements ( " << grb::nnz( output ) << " ), expected " << 2 * n - 1 <<".\n"; + std::cerr << "\t unexpected number of output elements ( " + << grb::nnz( output ) << " ), expected " << 2 * n - 1 <<".\n"; rc = FAILED; } - - for( const auto & triplet : output ) { - if( triplet.first.first != triplet.first.second && triplet.first.first != triplet.first.second - 1 ) { - std::cerr << "\tunexpected entry at ( " << triplet.first.first << ", " << triplet.first.second << " ), value " << triplet.second << ".\n"; + for( const auto &triplet : output ) { + if( + triplet.first.first != triplet.first.second && + triplet.first.first != triplet.first.second - 1 + ) { + std::cerr << "\tunexpected entry at ( " << triplet.first.first << ", " + << triplet.first.second << " ), value " << triplet.second << ".\n"; rc = FAILED; - } if( (int) triplet.first.first != triplet.second ) { - std::cerr << "\tunexpected entry at ( " << triplet.first.first << ", " << triplet.first.second << " ) with value " << triplet.second; + } if( triplet.first.first != static_cast< size_t >(triplet.second) ) { + std::cerr << "\tunexpected entry at ( " << triplet.first.first << ", " + << triplet.first.second << " ) with value " << triplet.second; std::cerr << ", expected value "<< triplet.first.first <<".\n"; rc = FAILED; } } - - if( rc != SUCCESS ) { - return; - } - - + if( rc != SUCCESS ) { return; } rc = grb::set( output, mask, input ); if( rc != SUCCESS ) { std::cerr << "\t grb::set (matrix to matrix masked) FAILED\n"; return; } - if( grb::nnz( output ) != n ) { - std::cerr << "\t unexpected number of output elements ( " << grb::nnz( output ) << " ), expected " << n <<".\n"; + std::cerr << "\t unexpected number of output elements ( " + << grb::nnz( output ) << " ), expected " << n <<".\n"; rc = FAILED; } - - for( const auto & triplet : output ) { + for( const auto &triplet : output ) { if( triplet.first.first != triplet.first.second ) { - std::cerr << "\tunexpected entry at ( " << triplet.first.first << ", " << triplet.first.second << " ), value " << triplet.second << ".\n"; + std::cerr << "\tunexpected entry at ( " << triplet.first.first << ", " + << triplet.first.second << " ), value " << triplet.second << ".\n"; rc = FAILED; - } if( (int) triplet.first.first != triplet.second ) { - std::cerr << "\tunexpected entry at ( " << triplet.first.first << ", " << triplet.first.second << " ) with value " << triplet.second; + } if( triplet.first.first != static_cast< size_t >(triplet.second) ) { + std::cerr << "\tunexpected entry at ( " << triplet.first.first << ", " + << triplet.first.second << " ) with value " << triplet.second; std::cerr << ", expected value "<< triplet.first.first <<".\n"; rc = FAILED; } } + if( rc != SUCCESS ) { return; } - if( rc != SUCCESS ) { - return; - } - - - + /* TODO check: what are the GraphBLAS semantics for this operation? rc = grb::set< descriptors::invert_mask >( output, mask, input ); if( rc != SUCCESS ) { std::cerr << "\t grb::set invert mask (matrix to matrix masked) FAILED\n"; return; } - if( grb::nnz( output ) != n - 1 ) { - std::cerr << "\t unexpected number of output elements ( " << grb::nnz( output ) << " ), expected " << n - 1 <<".\n"; + std::cerr << "\t unexpected number of output elements ( " + << grb::nnz( output ) << " ), expected " << n - 1 <<".\n"; rc = FAILED; } - - for( const auto & triplet : output ) { + for( const auto &triplet : output ) { if( triplet.first.first != triplet.first.second - 1 ) { - std::cerr << "\tunexpected entry at ( " << triplet.first.first << ", " << triplet.first.second << " ), value " << triplet.second << ".\n"; + std::cerr << "\tunexpected entry at ( " << triplet.first.first << ", " + << triplet.first.second << " ), value " << triplet.second << ".\n"; rc = FAILED; - } if( (int) triplet.first.first != triplet.second ) { - std::cerr << "\tunexpected entry at ( " << triplet.first.first << ", " << triplet.first.second << " ) with value " << triplet.second; + } if( triplet.first.first != triplet.second ) { + std::cerr << "\tunexpected entry at ( " << triplet.first.first << ", " + << triplet.first.second << " ) with value " << triplet.second; std::cerr << ", expected value "<< triplet.first.first <<".\n"; rc = FAILED; } } - - if( rc != SUCCESS ) { - return; - } + if( rc != SUCCESS ) { return; } + */ } int main( int argc, char ** argv ) { From 969ce5c80565f6b6844361626dceae726315a9e3 Mon Sep 17 00:00:00 2001 From: Albert-Jan Yzelman Date: Sun, 17 Aug 2025 19:52:49 +0200 Subject: [PATCH 10/39] The code in the unit test was correct re invert_mask, even though presently it trips an internal static_assert. Fixing that would be a first TODO --- include/graphblas/descriptors.hpp | 24 +++++++++++++++++++++++- tests/unit/matrixSet.cpp | 23 ++++++++++++----------- 2 files changed, 35 insertions(+), 12 deletions(-) diff --git a/include/graphblas/descriptors.hpp b/include/graphblas/descriptors.hpp index cd0a5ab25..2c0a793ae 100644 --- a/include/graphblas/descriptors.hpp +++ b/include/graphblas/descriptors.hpp @@ -62,7 +62,20 @@ namespace grb { */ static constexpr Descriptor no_operation = 0; - /** Inverts the mask prior to applying it. */ + /** + * Inverts the mask prior to applying it. + * + * Applying this descriptor to a sparse mask may still only generate output + * where the mask has elements. To decide whether an output will be generated + * at a given element in the mask, its value will be inverted. + * + * If instead the structural complement is to be taken as a mask, this + * descriptor must be combined with #grb::descriptors::structural. + * ALP/GraphBLAS forbids taking the structural inverse of matrix masks + * (because then either the output matrix or the mask matrix has + * \f$ \mathcal{mn} \f$ values, which defeats any useful application of + * GraphBLAS as this signifies one of the containers is, in fact, not sparse). + */ static constexpr Descriptor invert_mask = 1; /** @@ -98,6 +111,11 @@ namespace grb { * i-th index, regardless of how that value evaluates. It evaluates false * if there was no value assigned. * + * These semantics are inverted when this descriptor is combined with + * #grb::descriptors::invert_mask: in that case, the mask evaluates true at + * index \f$ i \f$ if the mask had no value at that index; and evaluates false + * otherwise. + * * @see structural_complement */ static constexpr Descriptor structural = 8; @@ -113,6 +131,10 @@ namespace grb { * This ignores the actual values of the mask argument. The i-th element of * the mask now evaluates true if the mask has \em no value assigned to its * i-th index, and evaluates false otherwise. + * + * The application of this descriptor is forbidden for matrix mask arguments, + * as otherwise either the output or the mask is dense or is (too) close to + * being dense. */ static constexpr Descriptor structural_complement = structural | invert_mask; diff --git a/tests/unit/matrixSet.cpp b/tests/unit/matrixSet.cpp index 617d4af8a..891b91c85 100644 --- a/tests/unit/matrixSet.cpp +++ b/tests/unit/matrixSet.cpp @@ -47,8 +47,6 @@ void grb_program( const size_t &n, grb::RC &rc ) { grb::Matrix< int > output( n, n ); grb::Matrix< int > input( n, n ); - - rc = grb::resize( A, 15 ); if( rc == SUCCESS ) { rc = grb::buildMatrixUnique( A, I, J, data1, 15, SEQUENTIAL ); @@ -71,14 +69,19 @@ void grb_program( const size_t &n, grb::RC &rc ) { } } + // initialise data for masked-set tests + // - mask will be an n by n matrix with its diagonal set to 1 and its + // superdiagonal set to 0. + // - input will be an n by n matrix with each element on its diagonal and + // superdiagonal set to its row index (meaning the entries on row 0 have + // value 0). size_t I_mask[ 2 * n - 1 ], J_mask[ 2 * n - 1 ]; int mask_vals [ 2 * n - 1 ]; int input_vals [ 2 * n - 1 ]; - for( size_t k = 0; k < n; ++k ) { I_mask[ k ] = J_mask[ k ] = k; mask_vals[ k ] = 1; - input_vals[ k ] = k; + input_vals[ k ] = static_cast< int >( k ); if( k < n - 1 ) { I_mask[ n + k ] = k; J_mask[ n + k ] = k + 1; @@ -86,14 +89,14 @@ void grb_program( const size_t &n, grb::RC &rc ) { input_vals[ n + k ] = k; } } - - rc = grb::buildMatrixUnique( mask, I_mask, J_mask, mask_vals, 2 * n - 1, SEQUENTIAL ); + rc = grb::buildMatrixUnique( mask, I_mask, J_mask, mask_vals, 2 * n - 1, + SEQUENTIAL ); if( rc != SUCCESS ) { std::cerr << "\t buildMatrixUnique of mask matrix FAILED\n"; return; } - - rc = grb::buildMatrixUnique( input, I_mask, J_mask, input_vals, 2 * n - 1, SEQUENTIAL ); + rc = grb::buildMatrixUnique( input, I_mask, J_mask, input_vals, 2 * n - 1, + SEQUENTIAL ); if( rc != SUCCESS ) { std::cerr << "\t buildMatrixUnique of input matrix FAILED\n"; return; @@ -316,7 +319,6 @@ void grb_program( const size_t &n, grb::RC &rc ) { } if( rc != SUCCESS ) { return; } - /* TODO check: what are the GraphBLAS semantics for this operation? rc = grb::set< descriptors::invert_mask >( output, mask, input ); if( rc != SUCCESS ) { std::cerr << "\t grb::set invert mask (matrix to matrix masked) FAILED\n"; @@ -332,7 +334,7 @@ void grb_program( const size_t &n, grb::RC &rc ) { std::cerr << "\tunexpected entry at ( " << triplet.first.first << ", " << triplet.first.second << " ), value " << triplet.second << ".\n"; rc = FAILED; - } if( triplet.first.first != triplet.second ) { + } if( triplet.first.first != static_cast< size_t >(triplet.second) ) { std::cerr << "\tunexpected entry at ( " << triplet.first.first << ", " << triplet.first.second << " ) with value " << triplet.second; std::cerr << ", expected value "<< triplet.first.first <<".\n"; @@ -340,7 +342,6 @@ void grb_program( const size_t &n, grb::RC &rc ) { } } if( rc != SUCCESS ) { return; } - */ } int main( int argc, char ** argv ) { From 17868d8995c2e75cc0044a24234de23db2eb174a Mon Sep 17 00:00:00 2001 From: Albert-Jan Yzelman Date: Sun, 17 Aug 2025 21:08:09 +0200 Subject: [PATCH 11/39] Code review on implementations for indirect backends --- include/graphblas/hyperdags/io.hpp | 5 +++-- include/graphblas/nonblocking/io.hpp | 5 +++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/include/graphblas/hyperdags/io.hpp b/include/graphblas/hyperdags/io.hpp index 6c93044b4..4ac5b4635 100644 --- a/include/graphblas/hyperdags/io.hpp +++ b/include/graphblas/hyperdags/io.hpp @@ -447,12 +447,13 @@ namespace grb { Descriptor descr = descriptors::no_operation, typename OutputType, typename MaskType, typename InputType, typename RIT1, typename CIT1, typename NIT1, - typename RIT2, typename CIT2, typename NIT2 + typename RIT2, typename CIT2, typename NIT2, + typename RIT3, typename CIT3, typename NIT3 > RC set( Matrix< OutputType, hyperdags, RIT1, CIT1, NIT1 > &C, const Matrix< MaskType, hyperdags, RIT2, CIT2, NIT2 > &M, - const Matrix< InputType, hyperdags, RIT2, CIT2, NIT2 > &A, + const Matrix< InputType, hyperdags, RIT3, CIT3, NIT3 > &A, const Phase &phase = EXECUTE ) { const RC ret = set< descr >( diff --git a/include/graphblas/nonblocking/io.hpp b/include/graphblas/nonblocking/io.hpp index 022a50901..963846251 100644 --- a/include/graphblas/nonblocking/io.hpp +++ b/include/graphblas/nonblocking/io.hpp @@ -1232,12 +1232,13 @@ namespace grb { Descriptor descr = descriptors::no_operation, typename OutputType, typename MaskType, typename InputType, typename RIT1, typename CIT1, typename NIT1, - typename RIT2, typename CIT2, typename NIT2 + typename RIT2, typename CIT2, typename NIT2, + typename RIT3, typename CIT3, typename NIT3 > RC set( Matrix< OutputType, nonblocking, RIT1, CIT1, NIT1 > &C, const Matrix< MaskType, nonblocking, RIT2, CIT2, NIT2 > &M, - const Matrix< InputType, nonblocking, RIT2, CIT2, NIT2 > &A, + const Matrix< InputType, nonblocking, RIT3, CIT3, NIT3 > &A, const Phase &phase = EXECUTE ) noexcept { if( internal::NONBLOCKING::warn_if_not_native && From 6c7d9db84dcda4e2d56e06a4f2970af5f7c67854 Mon Sep 17 00:00:00 2001 From: Albert-Jan Yzelman Date: Sun, 17 Aug 2025 21:10:15 +0200 Subject: [PATCH 12/39] Code review utils --- include/graphblas/utils.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/graphblas/utils.hpp b/include/graphblas/utils.hpp index 440a835f0..4a2235501 100644 --- a/include/graphblas/utils.hpp +++ b/include/graphblas/utils.hpp @@ -384,7 +384,7 @@ namespace grb { return ret; } } - /** Specialisation for void-valued matrice's masks */ + /** Specialisation for void-valued matrix masks */ template< Descriptor descriptor, typename MatrixDataType, typename ValuesType @@ -400,7 +400,7 @@ namespace grb { return interpretMask< descriptor, ValuesType >( assigned, values, k ); } - /** Specialisation for void-valued matrice's masks */ + /** Specialisation for void-valued matrix masks */ template< Descriptor descriptor, typename MatrixDataType, typename ValuesType From 009ce240fd02507ccff216ffb98f57dc171aedb2 Mon Sep 17 00:00:00 2001 From: "Albert-Jan N. Yzelman" Date: Mon, 18 Aug 2025 13:15:17 +0200 Subject: [PATCH 13/39] Wrote spec for set(matrix,matrix,matrix) --- include/graphblas/base/io.hpp | 136 ++++++++++++++++++++++++++++------ 1 file changed, 114 insertions(+), 22 deletions(-) diff --git a/include/graphblas/base/io.hpp b/include/graphblas/base/io.hpp index 4e493332b..c904570fc 100644 --- a/include/graphblas/base/io.hpp +++ b/include/graphblas/base/io.hpp @@ -1218,15 +1218,17 @@ namespace grb { * -# #grb::descriptors::invert_mask, and * -# #grb::descriptors::structural. * - * However, and differently from most ALP primtivies, the + * However, and differently from most ALP/GraphBLAS primitivies, the * #grb::descriptors::invert_mask and #grb::descriptors::structural are * mutually exclusive for this primitive. * \endparblock * * @tparam OutputType The type of each element in the given matrix. * @tparam MaskType The type of each element in the given mask. - * @tparam ValueType The type of the given value. Should be convertible - * to \a OutputType. + * @tparam ValueType The type of the given value. + * + * The given \a ValueType must be convertible to \a OutputType. + * * @tparam RIT The integer type for encoding row indices. * @tparam CIT The integer type for encoding column indices. * @tparam NIT The integer type for encoding nonzero indices. @@ -1244,51 +1246,50 @@ namespace grb { * the default is #grb::EXECUTE. * * In #grb::RESIZE mode: - * @returns #grb::SUCCESS When the capacity of \a C has been (made) sufficient - * to store the requested output. + * + * @returns #grb::SUCCESS When the capacity of \a C (has been made or already + * was) sufficient to store the requested output. * @returns #grb::OUTOFMEM When out-of-memory conditions have been met while - * executing. If this error code is returned, \a C + * resizing \a C. If this error code is returned, \a C * shall be unmodified compared to its state at * function entry. * * In #grb::EXECUTE mode: - * @returns #grb::SUCCESS When the call completes successfully. + * + * @returns #grb::SUCCESS When the computation has completed or will execute + * successfully. * @returns #grb::ILLEGAL When \a C did not have enough capacity to store the * output of the requested computation. * * Either mode may additionally return: - * @returns #grb::ILLEGAL In case the given \a mask was empty. + * * @returns #grb::MISMATCH In case \a C and \a mask have mismatching sizes. * @returns #grb::PANIC In case an unmitigable error was encountered. The - * caller is suggested to exit gracefully, and in any - * case to not make any further calls to ALP. + * caller, when encountering this return code, is + * suggested to exit gracefully and to not make any + * further calls to ALP. * * When \a descr includes #grb::descriptors::no_casting then code shall not * compile if one of the following conditions are met: * -# \a ValueType does not match \a OutputType; or * -# \a MaskType does not match bool. * - * In these cases, the code shall not compile: implementations must throw - * a static assertion failure in this case. - * * Similarly, it is forbidden to call this function with both following * descriptors simultaneously: * - #grb::descriptors::invert_mask \em and #grb::descriptors::structural. * * The use of the #grb::descriptors::structural_complement descriptor hence is - * is forbidden also. Implementations shall throw a static assertion failure - * if the user nonetheless asks for structural mask inversion. + * is forbidden also. These conditions, when encountered, should lead to + * compile-time errors also. + * + * \note One vehicle to ensure compilation does not occur in these cases is via + * static_assert. * * \parblock * \par Performance semantics * Each backend must define performance semantics for this primitive. * * @see perfSemantics - * - * \warning Generally, if \a mask equals \a C and the mask is non-structural, - * then optimised implementations will assign higher costs than when - * \a mask does not equal \a C. This is because the nonzero structure - * update cannot be done in-place. * \endparblock */ template< @@ -1320,7 +1321,98 @@ namespace grb { return UNSUPPORTED; } - /** TODO add specification */ + /** + * Sets all values of a matrix to that of a given source matrix, if and only if + * the corresponding value coordinates evaluate true at the given mask + * matrix. + * + * @tparam descr The descriptor used for this operation. + * + * \parblock + * \par Accepted descriptors + * -# #grb::descriptors::no_operation, + * -# #grb::descriptors::no_casting, + * -# #grb::descriptors::invert_mask, + * -# #grb::descriptors::structural + * + * However, and differently from most ALP primtivies, the + * #grb::descriptors::invert_mask and #grb::descriptors::structural are + * mutually exclusive for this primitive. + * \endparblock + * + * @tparam OutputType The type of each element in the destination matrix. + * @tparam MaskType The type of each element in the output mask. + * @tparam ValueType The type of each element in the source matrix. + * + * The given \a ValueType must be convertible to \a OutputType. + * + * \internal + * @tparam RIT1 The integer type for encoding row indices in \a C. + * @tparam CIT1 The integer type for encoding column indices in \a C. + * @tparam NIT1 The integer type for encoding nonzero indices in \a C. + * @tparam RIT2 The integer type for encoding row indices in \a mask. + * @tparam CIT2 The integer type for encoding column indices in \a mask. + * @tparam NIT2 The integer type for encoding nonzero indices in \a mask. + * @tparam RIT3 The integer type for encoding row indices in \a A. + * @tparam CIT3 The integer type for encoding column indices in \a A. + * @tparam NIT3 The integer type for encoding nonzero indices in \a A. + * @tparam backend The backend selected for executing this primitive. + * \endinternal + * + * @param[out] C The matrix that will be a masked copy of \a A. + * @param[in] mask Matrix that acts as output mask on \a C. + * @param[in] A The source matrix which will (partially) be copied to + * \a A. + * @param[in] phase Which #grb::Phase of the operation is requested. Optional; + * the default is #grb::EXECUTE. + * + * In #grb::RESIZE mode: + * + * @returns #grb::SUCCESS When the capacity of \a C (has been made or already + * was) sufficient to store the requested output. + * @returns #grb::OUTOFMEM When out-of-memory conditions were met while + * resizing \a C. If this error code is returned, \a C + * shall be left unmodified compared to its state at + * function entry. + * + * In #grb::EXECUTE mode: + * + * @returns #grb::SUCCESS When the computation has completed or will execute + * successfully. + * @returns #grb::ILLEGAL When \a C did not have enough capacity to store the + * output of the requested computation. + * + * Either mode may additionally return: + * + * @returns #grb::MISMATCH When \a A and \a C have mismatching sizes. + * @returns #grb::MISMATCH When \a C and \a mask have mismatching sizes. + * @returns #grb::PANIC In case an unmitigable error was encountered. The + * caller, when encountering this return code, is + * suggested to exit the program gracefully and to not + * make any further calls to ALP. + * + * When \a descr includes #grb::descriptors::no_casting, then code shall not + * comile if one of the following conditions are met: + * -# \a ValueType does not match \a OutputType; or + * -# \a MaskType does not match bool. + * + * Similarly, it is forbidden to call this function with both following + * descriptors simultaneously: + * - #grb::descriptors::invert_mask \em and #grb::descriptors::structural. + * + * The use of the #grb::descriptors::structural_complement descriptor hence is + * is forbidden also. These conditions should lead to compile-time errors also. + * + * \note One vehicle to ensure compilation does not occur in these cases is via + * static_assert. + * + * \parblock + * \par Performance semantics + * Each backend must define performance semantics for this primitive. + * + * @see perfSemantics + * \endparblock + */ template< Descriptor descr = descriptors::no_operation, typename OutputType, typename MaskType, typename ValueType, @@ -1794,7 +1886,7 @@ namespace grb { * successfully, then a call to this function shall return #grb::SUCCESS. * * The are several other cases in which the computation of nonblocking - * primtives is forced: + * primitives is forced: * -# whenever an output iterator of an output container of any of the non- * blocking primitives is requested; and * -# whenever an output container of any of the non-blocking primitives is From 473e70018daed8661c0a5cafbad947aba2e7ff4e Mon Sep 17 00:00:00 2001 From: "Albert-Jan N. Yzelman" Date: Mon, 18 Aug 2025 14:02:18 +0200 Subject: [PATCH 14/39] Initial code review. Main TODO: parallelise this operation using the same strategy as for the mxm, i.e., get as many SPAs as possible and use those to parallelise --- include/graphblas/phase.hpp | 2 +- include/graphblas/reference/io.hpp | 114 +++++++++++++++-------------- 2 files changed, 59 insertions(+), 57 deletions(-) diff --git a/include/graphblas/phase.hpp b/include/graphblas/phase.hpp index 1f12b8f05..044395ac9 100644 --- a/include/graphblas/phase.hpp +++ b/include/graphblas/phase.hpp @@ -236,7 +236,7 @@ namespace grb { * execute phase. * * If, instead, the output container capacity was found to be insufficient, - * then the requested operation may return #grb::FAILED, in which case the + * then the requested operation may return #grb::ILLEGAL, in which case the * contents of output containers shall be cleared. * * \note That on failure a primitive called using the execute phase may diff --git a/include/graphblas/reference/io.hpp b/include/graphblas/reference/io.hpp index 032027fb2..447a2d9b9 100644 --- a/include/graphblas/reference/io.hpp +++ b/include/graphblas/reference/io.hpp @@ -2032,30 +2032,32 @@ namespace grb { Descriptor descr = descriptors::no_operation, typename OutputType, typename MaskType, typename InputType, typename RIT1, typename CIT1, typename NIT1, - typename RIT2, typename CIT2, typename NIT2 + typename RIT2, typename CIT2, typename NIT2, + typename RIT3, typename CIT3, typename NIT3 > RC set( Matrix< OutputType, reference, RIT1, CIT1, NIT1 > &C, const Matrix< MaskType, reference, RIT2, CIT2, NIT2 > &M, - const Matrix< InputType, reference, RIT2, CIT2, NIT2 > &A, + const Matrix< InputType, reference, RIT3, CIT3, NIT3 > &A, const Phase &phase = EXECUTE ) noexcept { - static_assert( !std::is_void< OutputType >::value, - "grb::set (masked set to matrix): cannot have a pattern " - "matrix as output" ); - static_assert( std::is_convertible< InputType, OutputType >::value, + // static checks + static_assert( + !std::is_void< InputType >::value || + std::is_void< OutputType >::value, "grb::set( masked set to matrix ): " + "cannot have a pattern matrix as input unless the output is also a pattern " + "matrix" + ); + static_assert( + std::is_convertible< InputType, OutputType >::value, "grb::set (masked set to matrix): input type cannot be " "converted to output type" ); static_assert( - ! ( descr & descriptors::structural && descr & descriptors::invert_mask ), - "grb::set can not be called with both descriptors::structural " - "and descriptors::invert_mask in the masked variant" + !(descr & descriptors::structural && (descr & descriptors::invert_mask)), + "grb::set (masked set to matrix) may not be called with both the structural " + "and invert_mask descriptors set" ); -#ifdef _DEBUG - std::cout << "Called grb::set (matrix-to-matrix-masked, reference)\n"; -#endif - // static checks NO_CAST_ASSERT( ( !(descr & descriptors::no_casting) || std::is_same< InputType, OutputType >::value ), "grb::set", @@ -2063,24 +2065,23 @@ namespace grb { ); // dynamic checks +#ifdef _DEBUG + std::cout << "Called grb::set (matrix-to-matrix-masked, reference)\n"; +#endif assert( phase != TRY ); - const size_t nrows = grb::nrows( C ); const size_t ncols = grb::ncols( C ); - const size_t m = grb::nrows( M ); const size_t n = grb::ncols( M ); - /*grb::Monoid< - grb::operators::left_assign< OutputType >, - grb::identities::zero - > dummyMonoid;*/ - + // check for trivial dispatch first (otherwise the below checks fail when they + // should not) if( m == 0 || n == 0 ) { - // If the mask has a null size, it will be ignored + // If the mask is empty, ignore it return set< descr >( C, A, phase ); } + // dynamic checks, continued if( nrows != grb::nrows( A ) || nrows != m ) { return MISMATCH; } @@ -2089,20 +2090,21 @@ namespace grb { return MISMATCH; } - const auto &mask_raw = internal::getCRS( M ); - - const auto &A_raw = internal::getCRS( A ); - + // go for implementation, preliminaries: size_t nzc = 0; - char * mask_arr = nullptr; char * mask_buf = nullptr; MaskType * mask_valbuf = nullptr; - internal::getMatrixBuffers( mask_arr, mask_buf, mask_valbuf, 1, M ); - + const auto &A_raw = internal::getCRS( A ); + const auto &mask_raw = internal::getCRS( M ); internal::Coordinates< reference > mask_coors; + internal::getMatrixBuffers( mask_arr, mask_buf, mask_valbuf, 1, M ); mask_coors.set( mask_arr, false, mask_buf, ncols ); + // we now have one (guaranteed) SPA, which is mask_coors. We now are going to + // check how many more SPAs ideally we would like (for reference_omp), and + // then go about trying to get those. If, finally, we get just this one SPA, + // we will go into this mostly-sequential code (essentially, big-Omega nrows): for( size_t i = 0; i < nrows; ++i ) { mask_coors.clear(); for( auto k = mask_raw.col_start[ i ]; k < mask_raw.col_start[ i + 1 ]; ++k ) { @@ -2112,22 +2114,24 @@ namespace grb { } } #ifdef _H_GRB_REFERENCE_OMP_BLAS3 - #pragma omp parallel for reduction(+:nzc) + #pragma omp parallel for reduction( +: nzc ) \ + schedule( dynamic, config::CACHE_LINE_SIZE::value() ) #endif for( auto k = A_raw.col_start[ i ]; k < A_raw.col_start[ i + 1 ]; ++k ) { const auto k_col = A_raw.row_index[ k ]; if( mask_coors.assigned( k_col ) ) { - nzc++; + (void) nzc++; } } } + // we now have a count. If we're in the resize phase that means we're done: if( phase == RESIZE ) { return resize( C, nzc ); } + // otherwise, we now compute the output. We start with checking capacity assert( phase == EXECUTE ); - if( capacity( C ) < nzc ) { #ifdef _DEBUG std::cout << "\t insufficient capacity to complete " @@ -2137,26 +2141,29 @@ namespace grb { if( clear_rc != SUCCESS ) { return PANIC; } else { - return FAILED; + return ILLEGAL; } } + // get output CRS and CCS structures + // TODO: check for crs_only descriptor auto &CRS_raw = internal::getCRS( C ); auto &CCS_raw = internal::getCCS( C ); - config::NonzeroIndexType * C_col_index = internal::template getReferenceBuffer< typename config::NonzeroIndexType >( ncols + 1 ); - CRS_raw.col_start[ 0 ] = 0; #ifdef _H_GRB_REFERENCE_OMP_BLAS3 + // TODO ALPify the below #pragma omp parallel for simd #endif for( size_t j = 0; j <= ncols; ++j ) { CCS_raw.col_start[ j ] = 0; + C_col_index[ j ] = 0; } - + // do counting sort, phase 1 -- also this loop should employ the same + // parallelisation strategy during counting nzc = 0; for( size_t i = 0; i < nrows; ++i ) { mask_coors.clear(); @@ -2169,33 +2176,30 @@ namespace grb { for( auto k = A_raw.col_start[ i ]; k < A_raw.col_start[ i + 1 ]; ++k ) { const auto k_col = A_raw.row_index[ k ]; if( mask_coors.assigned( k_col ) ) { - nzc++; - CCS_raw.col_start[ k_col + 1 ]++; + (void) nzc++; + (void) (CCS_raw.col_start[ k_col + 1 ])++; } } CRS_raw.col_start[ i + 1 ] = nzc; } + // TODO this is a prefix sum -- use the OMP utility function here to + // parallelise it for( size_t j = 1; j < ncols; ++j ) { CCS_raw.col_start[ j + 1 ] += CCS_raw.col_start[ j ]; } -#ifdef _H_GRB_REFERENCE_OMP_BLAS3 - #pragma omp parallel for simd -#endif - for( size_t j = 0; j < ncols; ++j ) { - C_col_index[ j ] = 0; - } - - - // use previously computed CCS offset array to update CCS during the - // computational phase + // do counting sort, phase 2 -- use previously computed CCS offset array to + // update CCS during the computational phase. Also this loop should employ + // the same (multiple-SPA) parallelisation strategy as above nzc = 0; for( size_t i = 0; i < nrows; ++i ) { mask_coors.clear(); for( auto k = mask_raw.col_start[ i ]; k < mask_raw.col_start[ i + 1 ]; ++k ) { const auto k_col = mask_raw.row_index[ k ]; - if( utils::interpretMatrixMask< descr, MaskType >( true, mask_raw.getValues(), k ) ) { + if( utils::interpretMatrixMask< descr, MaskType >( + true, mask_raw.getValues(), k ) + ) { mask_coors.assign( k_col ); } } @@ -2206,30 +2210,28 @@ namespace grb { CRS_raw.row_index[ nzc ] = k_col; CRS_raw.setValue( nzc, val ); const size_t CCS_index = C_col_index[ k_col ] + CCS_raw.col_start[ k_col ]; - C_col_index[ k_col ]++; + (void) C_col_index[ k_col ]++; CCS_raw.row_index[ CCS_index ] = i; CCS_raw.setValue( CCS_index, val ); - nzc++; + (void) nzc++; } } } #ifndef NDEBUG + #ifdef _H_GRB_REFERENCE_OMP_BLAS3 + #pragma omp parallel schedule( static, config::CACHE_LINE_SIZE::value() ) for( size_t j = 0; j < ncols; ++j ) { assert( CCS_raw.col_start[ j + 1 ] - CCS_raw.col_start[ j ] == C_col_index[ j ] ); } + #endif #endif - internal::setCurrentNonzeroes( C, nzc ); + // done return SUCCESS; } - - - - - /** * Ingests raw data into a GraphBLAS vector. * From 777f64018ec2816ad800f237ff434bcbb47b9a27 Mon Sep 17 00:00:00 2001 From: Albert-Jan Yzelman Date: Sat, 23 Aug 2025 14:23:22 +0200 Subject: [PATCH 15/39] Fix static error --- include/graphblas/reference/io.hpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/include/graphblas/reference/io.hpp b/include/graphblas/reference/io.hpp index 447a2d9b9..301829187 100644 --- a/include/graphblas/reference/io.hpp +++ b/include/graphblas/reference/io.hpp @@ -956,9 +956,9 @@ namespace grb { "internal error. Please submit a bug report." ); static_assert( - !(descr & descriptors::invert_mask), "internal::grb::set_copy called with " - "the invert_mask descriptor. This is an internal error; please submit a " - "bug report." + !A_is_mask || !(descr & descriptors::invert_mask), + "internal::grb::set_copy called with the invert_mask descriptor. This is " + "an internal error; please submit a bug report." ); // run-time checks From 3daf10a1650b139b130a58dc3c4cb1505233f9f4 Mon Sep 17 00:00:00 2001 From: Albert-Jan Yzelman Date: Sat, 23 Aug 2025 14:24:09 +0200 Subject: [PATCH 16/39] Fix silly snafu my side --- include/graphblas/reference/io.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/graphblas/reference/io.hpp b/include/graphblas/reference/io.hpp index 301829187..cab931989 100644 --- a/include/graphblas/reference/io.hpp +++ b/include/graphblas/reference/io.hpp @@ -2220,11 +2220,11 @@ namespace grb { #ifndef NDEBUG #ifdef _H_GRB_REFERENCE_OMP_BLAS3 #pragma omp parallel schedule( static, config::CACHE_LINE_SIZE::value() ) + #endif for( size_t j = 0; j < ncols; ++j ) { assert( CCS_raw.col_start[ j + 1 ] - CCS_raw.col_start[ j ] == C_col_index[ j ] ); } - #endif #endif internal::setCurrentNonzeroes( C, nzc ); From a0c297e1a32d921eb488a988a7708220447dd1be Mon Sep 17 00:00:00 2001 From: Albert-Jan Yzelman Date: Sat, 23 Aug 2025 15:39:03 +0200 Subject: [PATCH 17/39] Made masked tests generic, and add test for empty mask. Passes OK --- tests/unit/matrixSet.cpp | 338 +++++++++++++++++++++++++++------------ 1 file changed, 233 insertions(+), 105 deletions(-) diff --git a/tests/unit/matrixSet.cpp b/tests/unit/matrixSet.cpp index 891b91c85..27eeb75c9 100644 --- a/tests/unit/matrixSet.cpp +++ b/tests/unit/matrixSet.cpp @@ -19,6 +19,7 @@ #include #include +#include using namespace grb; @@ -27,36 +28,214 @@ static const int data1[ 15 ] = { 4, 7, 4, 6, 4, 7, 1, 7, 3, 6, 7, 5, 1, 8, 7 }; static const size_t I[ 15 ] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 9, 9, 8, 7, 6 }; static const size_t J[ 15 ] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 5, 7, 5, 1 }; -void grb_program( const size_t &n, grb::RC &rc ) { - // initialize test - int chk[ 10 ][ 10 ]; - for( size_t i = 0; i < 10; ++i ) { - for( size_t j = 0; j < 10; ++j ) { - chk[ i ][ j ] = 0; +/** Generic implementation of masked tests */ +template< typename Tout, typename Tmask, typename Tin > +RC masked_tests_generic_impl( + RC &rc, grb::Matrix< Tout > &output, + const grb::Matrix< Tmask > &mask, const grb::Matrix< Tin > &input, + const size_t n +) { + const bool emptyMask = grb::nrows( mask ) == 0 || grb::ncols( mask ) == 0; + + std::cout << "\t\t with structural descriptor\n"; + rc = grb::set< descriptors::structural >( output, mask, input ); + if( rc != SUCCESS ) { + std::cerr << "\t grb::set structural (matrix to matrix masked) FAILED\n"; + return rc; + } + if( grb::nnz( output ) != 2 * n - 1 ) { + std::cerr << "\t unexpected number of output elements ( " + << grb::nnz( output ) << " ), expected " << 2 * n - 1 <<".\n"; + rc = FAILED; + } + for( const auto &triplet : output ) { + if( + triplet.first.first != triplet.first.second && + triplet.first.first != triplet.first.second - 1 + ) { + std::cerr << "\t unexpected entry at ( " << triplet.first.first << ", " + << triplet.first.second << " ), value " << triplet.second << ".\n"; + rc = FAILED; + } + if( triplet.first.first != static_cast< size_t >(triplet.second) ) { + std::cerr << "\t unexpected entry at ( " << triplet.first.first << ", " + << triplet.first.second << " ) with value " << triplet.second; + std::cerr << ", expected value "<< triplet.first.first <<".\n"; + rc = FAILED; } } - for( size_t k = 0; k < 15; ++k ) { - chk[ I[ k ] ][ J[ k ] ] = data1[ k ]; + if( rc != SUCCESS ) { return rc; } + + std::cout << "\t\t without descriptor\n"; + rc = grb::set( output, mask, input ); + if( rc != SUCCESS ) { + std::cerr << "\t grb::set (matrix to matrix masked) FAILED\n"; + return rc; + } + if( emptyMask ) { + if( grb::nnz( output ) != 2 * n - 1 ) { + std::cerr << "\t unexpected number of output elements ( " + << grb::nnz( output ) << " ), expected " << 2 * n - 1 <<".\n"; + rc = FAILED; + } + } else { + if( grb::nnz( output ) != n ) { + std::cerr << "\t unexpected number of output elements ( " + << grb::nnz( output ) << " ), expected " << n <<".\n"; + rc = FAILED; + } + } + for( const auto &triplet : output ) { + if( emptyMask ) { + if( triplet.first.first != triplet.first.second && + triplet.first.first != triplet.first.second - 1 + ) { + std::cerr << "\t unexpected entry at ( " << triplet.first.first << ", " + << triplet.first.second << " ), value " << triplet.second << ".\n"; + rc = FAILED; + } + } else { + if( triplet.first.first != triplet.first.second ) { + std::cerr << "\t unexpected entry at ( " << triplet.first.first << ", " + << triplet.first.second << " ), value " << triplet.second << ".\n"; + rc = FAILED; + } + } + if( triplet.first.first != static_cast< size_t >(triplet.second) ) { + std::cerr << "\t unexpected entry at ( " << triplet.first.first << ", " + << triplet.first.second << " ) with value " << triplet.second; + std::cerr << ", expected value "<< triplet.first.first <<".\n"; + rc = FAILED; + } } + if( rc != SUCCESS ) { return rc; } + + // done + return rc; +} + +/** Implementation of masked tests for non-void masks (nvm). */ +template< typename Tout, typename Tmask, typename Tin > +RC masked_tests_nvm_impl( + RC &rc, grb::Matrix< Tout > &output, + const grb::Matrix< Tmask > &mask, const grb::Matrix< Tin > &input, + const size_t n +) { + const bool emptyMask = grb::nrows( mask ) == 0 || grb::ncols( mask ) == 0; + + std::cout << "\t\t with invert_mask descriptor\n"; + rc = grb::set< descriptors::invert_mask >( output, mask, input ); + if( rc != SUCCESS ) { + std::cerr << "\t grb::set invert mask (matrix to matrix masked) FAILED\n"; + return rc; + } + if( emptyMask ) { + if( grb::nnz( output ) != 2 * n - 1 ) { + std::cerr << "\t unexpected number of output elements ( " + << grb::nnz( output ) << " ), expected " << 2 * n - 1 <<".\n"; + rc = FAILED; + } + } else { + if( grb::nnz( output ) != n - 1 ) { + std::cerr << "\t unexpected number of output elements ( " + << grb::nnz( output ) << " ), expected " << n - 1 <<".\n"; + rc = FAILED; + } + } + for( const auto &triplet : output ) { + if( emptyMask ) { + if( triplet.first.first != triplet.first.second && + triplet.first.first != triplet.first.second - 1 + ) { + std::cerr << "\t unexpected entry at ( " << triplet.first.first << ", " + << triplet.first.second << " ), value " << triplet.second << ".\n"; + rc = FAILED; + } + } else { + if( triplet.first.first != triplet.first.second - 1 ) { + std::cerr << "\t unexpected entry at ( " << triplet.first.first << ", " + << triplet.first.second << " ), value " << triplet.second << ".\n"; + rc = FAILED; + } + } + if( triplet.first.first != static_cast< size_t >(triplet.second) ) { + std::cerr << "\t unexpected entry at ( " << triplet.first.first << ", " + << triplet.first.second << " ) with value " << triplet.second; + std::cerr << ", expected value "<< triplet.first.first <<".\n"; + rc = FAILED; + } + } + if( rc != SUCCESS ) { return rc; } + + // done + return rc; +} + +/** Dispatch for generic masked tests */ +template< typename Tout, typename Tmask, typename Tin > +RC masked_tests( + RC &rc, grb::Matrix< Tout > &output, + const grb::Matrix< Tmask > &mask, const grb::Matrix< Tin > &input, + const size_t n +) { + if( masked_tests_generic_impl( rc, output, mask, input, n ) != SUCCESS ) { + return rc; + } + return masked_tests_nvm_impl( rc, output, mask, input, n ); +} + +/** Specialised dispatched for masked tests with void masks */ +template< typename Tout, typename Tin > +RC masked_tests( + RC &rc, grb::Matrix< Tout > &output, + const grb::Matrix< void > &mask, const grb::Matrix< Tin > &input, + const size_t n +) { + const grb::RC ret = masked_tests_generic_impl( rc, output, mask, input, n ); + std::cout << "\t\t invert_mask descriptor SKIPPED\n"; + return ret; +} + +void grb_program( const size_t &n, grb::RC &rc ) { + // initialize non-masked test containers grb::Matrix< double > A( n, n ); grb::Matrix< double > B( n, n ); grb::Matrix< void > C( n, n ); grb::Matrix< void > D( n, n ); grb::Matrix< unsigned int > E( n, n ); + + // initalize masked test containers grb::Matrix< int > mask( n, n ); - grb::Matrix< int > output( n, n ); + grb::Matrix< bool > maskBool( n, n ); + grb::Matrix< void > maskVoid( n, n ); + grb::Matrix< double > maskEmpty( 0, 0 ); grb::Matrix< int > input( n, n ); + grb::Matrix< void > inputVoid( n, n ); + grb::Matrix< float > inputFloat( n, n ); + grb::Matrix< int > output( n, n ); + grb::Matrix< void > outputVoid( n, n ); + // initialise non-masked test data + int chk[ 10 ][ 10 ]; + for( size_t i = 0; i < 10; ++i ) { + for( size_t j = 0; j < 10; ++j ) { + chk[ i ][ j ] = 0; + } + } + for( size_t k = 0; k < 15; ++k ) { + chk[ I[ k ] ][ J[ k ] ] = data1[ k ]; + } + rc = grb::resize( A, 15 ); if( rc == SUCCESS ) { rc = grb::buildMatrixUnique( A, I, J, data1, 15, SEQUENTIAL ); for( const auto &triplet : A ) { if( triplet.first.first >= 10 || triplet.first.second >= 10 ) { - std::cerr << "\tunexpected entry at A( " << triplet.first.first << ", " + std::cerr << "\t unexpected entry at A( " << triplet.first.first << ", " << triplet.first.second << " ).\n"; rc = FAILED; } else if( chk[ triplet.first.first ][ triplet.first.second ] != triplet.second ) { - std::cerr << "\tunexpected entry at A( " << triplet.first.first << ", " + std::cerr << "\t unexpected entry at A( " << triplet.first.first << ", " << triplet.first.second << " ) with value " << triplet.second; if( chk[ triplet.first.first ][ triplet.first.second ] == 0 ) { std::cerr << ", expected no entry here.\n"; @@ -68,6 +247,15 @@ void grb_program( const size_t &n, grb::RC &rc ) { } } } + rc = rc ? rc : grb::resize( B, 15 ); + rc = rc ? rc : grb::resize( C, 15 ); + rc = rc ? rc : grb::resize( D, 15 ); + rc = rc ? rc : grb::resize( E, 15 ); + rc = rc ? rc : grb::resize(output, 2 * n - 1 ); + if( rc != SUCCESS || grb::nnz( A ) != 15 ) { + std::cerr << "\tinitialisation FAILED\n"; + return; + } // initialise data for masked-set tests // - mask will be an n by n matrix with its diagonal set to 1 and its @@ -95,32 +283,37 @@ void grb_program( const size_t &n, grb::RC &rc ) { std::cerr << "\t buildMatrixUnique of mask matrix FAILED\n"; return; } - rc = grb::buildMatrixUnique( input, I_mask, J_mask, input_vals, 2 * n - 1, + rc = grb::buildMatrixUnique( maskBool, I_mask, J_mask, mask_vals, 2 * n - 1, SEQUENTIAL ); if( rc != SUCCESS ) { - std::cerr << "\t buildMatrixUnique of input matrix FAILED\n"; + std::cerr << "\t buildMatrixUnique of maskBool matrix FAILED\n"; return; } - - if( rc == SUCCESS ) { - rc = grb::resize( B, 15 ); - } - if( rc == SUCCESS ) { - rc = grb::resize( C, 15 ); - } - if( rc == SUCCESS ) { - rc = grb::resize( D, 15 ); + try { + maskVoid = grb::algorithms::matrices< void >::identity( n ); + } catch( ... ) { + std::cerr << "\t constructing maskVoid FAILED\n"; + return; } - if( rc == SUCCESS ) { - rc = grb::resize( E, 15 ); + rc = grb::buildMatrixUnique( input, I_mask, J_mask, input_vals, 2 * n - 1, + SEQUENTIAL ); + if( rc != SUCCESS ) { + std::cerr << "\t buildMatrixUnique of input matrix FAILED\n"; + return; } - if( rc == SUCCESS ) { - rc = grb::resize(output, 2 * n - 1 ); + rc = grb::buildMatrixUnique( inputFloat, I_mask, J_mask, input_vals, 2 * n - 1, + SEQUENTIAL ); + if( rc != SUCCESS ) { + std::cerr << "\t buildMatrixUnique of inputFloat matrix FAILED\n"; + return; } - if( rc != SUCCESS || grb::nnz( A ) != 15 ) { - std::cerr << "\tinitialisation FAILED\n"; + try { + inputVoid = grb::algorithms::matrices< void >::identity( n ); + } catch( ... ) { + std::cerr << "\t constructing inputVoid FAILED\n"; return; } + std::cout << "\t test initialisation complete\n"; // check grb::set for non-voids @@ -137,11 +330,11 @@ void grb_program( const size_t &n, grb::RC &rc ) { } for( const auto &triplet : B ) { if( triplet.first.first >= 10 || triplet.first.second >= 10 ) { - std::cerr << "\tunexpected entry at B( " << triplet.first.first << ", " + std::cerr << "\t unexpected entry at B( " << triplet.first.first << ", " << triplet.first.second << " ).\n"; rc = FAILED; } else if( chk[ triplet.first.first ][ triplet.first.second ] != triplet.second ) { - std::cerr << "\tunexpected entry at B( " << triplet.first.first << ", " + std::cerr << "\t unexpected entry at B( " << triplet.first.first << ", " << triplet.first.second << " ) with value " << triplet.second; if( chk[ triplet.first.first ][ triplet.first.second ] == 0 ) { std::cerr << ", expected no entry here.\n"; @@ -216,14 +409,14 @@ void grb_program( const size_t &n, grb::RC &rc ) { } for( const auto &triplet : E ) { if( triplet.first.first >= 10 || triplet.first.second >= 10 ) { - std::cerr << "\tunexpected entry at E( " << triplet.first.first << ", " + std::cerr << "\t unexpected entry at E( " << triplet.first.first << ", " << triplet.first.second << " ), value " << triplet.second << ".\n"; rc = FAILED; } else if( static_cast< unsigned int >( chk[ triplet.first.first ][ triplet.first.second ] ) != triplet.second ) { - std::cerr << "\tunexpected entry at E( " << triplet.first.first << ", " + std::cerr << "\t unexpected entry at E( " << triplet.first.first << ", " << triplet.first.second << " ) with value " << triplet.second; if( chk[ triplet.first.first ][ triplet.first.second ] == 0 ) { std::cerr << ", expected no entry here.\n"; @@ -250,11 +443,11 @@ void grb_program( const size_t &n, grb::RC &rc ) { } for( const auto &triplet : E ) { if( triplet.first.first >= 10 || triplet.first.second >= 10 ) { - std::cerr << "\tunexpected entry at ( " << triplet.first.first << ", " + std::cerr << "\t unexpected entry at ( " << triplet.first.first << ", " << triplet.first.second << " ), value " << triplet.second << ".\n"; rc = FAILED; } else if( 117 != triplet.second ) { - std::cerr << "\tunexpected entry at ( " << triplet.first.first << ", " + std::cerr << "\t unexpected entry at ( " << triplet.first.first << ", " << triplet.first.second << " ) with value " << triplet.second; if( chk[ triplet.first.first ][ triplet.first.second ] == 0 ) { std::cerr << ", expected no entry here.\n"; @@ -268,80 +461,15 @@ void grb_program( const size_t &n, grb::RC &rc ) { //check masked matrix set - rc = grb::set< descriptors::structural >( output, mask, input ); - if( rc != SUCCESS ) { - std::cerr << "\t grb::set structural (matrix to matrix masked) FAILED\n"; + std::cout << "\t testing set( matrix, mask, matrix ), non-void, no-cast, " + << "empty mask\n"; + if( masked_tests( rc, output, maskEmpty, input, n ) != grb::SUCCESS ) { return; } - if( grb::nnz( output ) != 2 * n - 1 ) { - std::cerr << "\t unexpected number of output elements ( " - << grb::nnz( output ) << " ), expected " << 2 * n - 1 <<".\n"; - rc = FAILED; - } - for( const auto &triplet : output ) { - if( - triplet.first.first != triplet.first.second && - triplet.first.first != triplet.first.second - 1 - ) { - std::cerr << "\tunexpected entry at ( " << triplet.first.first << ", " - << triplet.first.second << " ), value " << triplet.second << ".\n"; - rc = FAILED; - } if( triplet.first.first != static_cast< size_t >(triplet.second) ) { - std::cerr << "\tunexpected entry at ( " << triplet.first.first << ", " - << triplet.first.second << " ) with value " << triplet.second; - std::cerr << ", expected value "<< triplet.first.first <<".\n"; - rc = FAILED; - } - } - if( rc != SUCCESS ) { return; } - - rc = grb::set( output, mask, input ); - if( rc != SUCCESS ) { - std::cerr << "\t grb::set (matrix to matrix masked) FAILED\n"; - return; - } - if( grb::nnz( output ) != n ) { - std::cerr << "\t unexpected number of output elements ( " - << grb::nnz( output ) << " ), expected " << n <<".\n"; - rc = FAILED; - } - for( const auto &triplet : output ) { - if( triplet.first.first != triplet.first.second ) { - std::cerr << "\tunexpected entry at ( " << triplet.first.first << ", " - << triplet.first.second << " ), value " << triplet.second << ".\n"; - rc = FAILED; - } if( triplet.first.first != static_cast< size_t >(triplet.second) ) { - std::cerr << "\tunexpected entry at ( " << triplet.first.first << ", " - << triplet.first.second << " ) with value " << triplet.second; - std::cerr << ", expected value "<< triplet.first.first <<".\n"; - rc = FAILED; - } - } - if( rc != SUCCESS ) { return; } - rc = grb::set< descriptors::invert_mask >( output, mask, input ); - if( rc != SUCCESS ) { - std::cerr << "\t grb::set invert mask (matrix to matrix masked) FAILED\n"; - return; - } - if( grb::nnz( output ) != n - 1 ) { - std::cerr << "\t unexpected number of output elements ( " - << grb::nnz( output ) << " ), expected " << n - 1 <<".\n"; - rc = FAILED; - } - for( const auto &triplet : output ) { - if( triplet.first.first != triplet.first.second - 1 ) { - std::cerr << "\tunexpected entry at ( " << triplet.first.first << ", " - << triplet.first.second << " ), value " << triplet.second << ".\n"; - rc = FAILED; - } if( triplet.first.first != static_cast< size_t >(triplet.second) ) { - std::cerr << "\tunexpected entry at ( " << triplet.first.first << ", " - << triplet.first.second << " ) with value " << triplet.second; - std::cerr << ", expected value "<< triplet.first.first <<".\n"; - rc = FAILED; - } - } - if( rc != SUCCESS ) { return; } + std::cout << "\t testing set( matrix, mask, matrix ), non-void, no-cast, " + << "non-empty mask\n"; + if( masked_tests( rc, output, mask, input, n ) != grb::SUCCESS ) { return; } } int main( int argc, char ** argv ) { From 4f5fedd61039fe5b5bcca376e61de92319beb422 Mon Sep 17 00:00:00 2001 From: Albert-Jan Yzelman Date: Sat, 23 Aug 2025 15:46:46 +0200 Subject: [PATCH 18/39] Enable casting tests, use Boolean and non-Boolean masks --- tests/unit/matrixSet.cpp | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/tests/unit/matrixSet.cpp b/tests/unit/matrixSet.cpp index 27eeb75c9..d1943d647 100644 --- a/tests/unit/matrixSet.cpp +++ b/tests/unit/matrixSet.cpp @@ -470,6 +470,30 @@ void grb_program( const size_t &n, grb::RC &rc ) { std::cout << "\t testing set( matrix, mask, matrix ), non-void, no-cast, " << "non-empty mask\n"; if( masked_tests( rc, output, mask, input, n ) != grb::SUCCESS ) { return; } + + std::cout << "\t testing set( matrix, mask, matrix ), non-void, no-cast, " + << "non-empty Boolean mask\n"; + if( masked_tests( rc, output, maskBool, input, n ) != grb::SUCCESS ) { + return; + } + + std::cout << "\t testing set( matrix, mask, matrix ), non-void, casting from " + << "float to int, empty mask\n"; + if( masked_tests( rc, output, maskEmpty, inputFloat, n ) != grb::SUCCESS ) { + return; + } + + std::cout << "\t testing set( matrix, mask, matrix ), non-void, casting from " + << "float to int, non-empty mask\n"; + if( masked_tests( rc, output, mask, inputFloat, n ) != grb::SUCCESS ) { + return; + } + + std::cout << "\t testing set( matrix, mask, matrix ), non-void, casting from " + << "float to int, non-empty Boolean mask\n"; + if( masked_tests( rc, output, maskBool, inputFloat, n ) != grb::SUCCESS ) { + return; + } } int main( int argc, char ** argv ) { From 47029c5fc8a544c402039982479c14bcb2b556b6 Mon Sep 17 00:00:00 2001 From: Albert-Jan Yzelman Date: Sat, 23 Aug 2025 16:12:04 +0200 Subject: [PATCH 19/39] Bugfix for no_casting set-matrix-to-matrix --- include/graphblas/reference/io.hpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/include/graphblas/reference/io.hpp b/include/graphblas/reference/io.hpp index cab931989..36a5c3ff6 100644 --- a/include/graphblas/reference/io.hpp +++ b/include/graphblas/reference/io.hpp @@ -951,7 +951,8 @@ namespace grb { ); static_assert( ( !(descr & descriptors::no_casting) || - ( A_is_mask && std::is_same< InputType2, OutputType >::value ) ), + ( A_is_mask && std::is_same< InputType2, OutputType >::value ) || + ( !A_is_mask && std::is_same< InputType1, OutputType >::value ) ), "grb::internal::set_copy called with non-matching value types. This is an " "internal error. Please submit a bug report." ); From dff5ec278e3ce1754b5e0168c5c08ed7eb4babbb Mon Sep 17 00:00:00 2001 From: Albert-Jan Yzelman Date: Sat, 23 Aug 2025 16:12:31 +0200 Subject: [PATCH 20/39] Add no-casting tests --- tests/unit/matrixSet.cpp | 43 ++++++++++++++++++++++++++++++++++------ 1 file changed, 37 insertions(+), 6 deletions(-) diff --git a/tests/unit/matrixSet.cpp b/tests/unit/matrixSet.cpp index d1943d647..b1079a7db 100644 --- a/tests/unit/matrixSet.cpp +++ b/tests/unit/matrixSet.cpp @@ -29,7 +29,10 @@ static const size_t I[ 15 ] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 9, 9, 8, 7, 6 }; static const size_t J[ 15 ] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 5, 7, 5, 1 }; /** Generic implementation of masked tests */ -template< typename Tout, typename Tmask, typename Tin > +template< + Descriptor descr = descriptors::no_operation, + typename Tout, typename Tmask, typename Tin +> RC masked_tests_generic_impl( RC &rc, grb::Matrix< Tout > &output, const grb::Matrix< Tmask > &mask, const grb::Matrix< Tin > &input, @@ -38,7 +41,7 @@ RC masked_tests_generic_impl( const bool emptyMask = grb::nrows( mask ) == 0 || grb::ncols( mask ) == 0; std::cout << "\t\t with structural descriptor\n"; - rc = grb::set< descriptors::structural >( output, mask, input ); + rc = grb::set< descr | descriptors::structural >( output, mask, input ); if( rc != SUCCESS ) { std::cerr << "\t grb::set structural (matrix to matrix masked) FAILED\n"; return rc; @@ -67,7 +70,7 @@ RC masked_tests_generic_impl( if( rc != SUCCESS ) { return rc; } std::cout << "\t\t without descriptor\n"; - rc = grb::set( output, mask, input ); + rc = grb::set< descr >( output, mask, input ); if( rc != SUCCESS ) { std::cerr << "\t grb::set (matrix to matrix masked) FAILED\n"; return rc; @@ -115,7 +118,10 @@ RC masked_tests_generic_impl( } /** Implementation of masked tests for non-void masks (nvm). */ -template< typename Tout, typename Tmask, typename Tin > +template< + Descriptor descr = descriptors::no_operation, + typename Tout, typename Tmask, typename Tin +> RC masked_tests_nvm_impl( RC &rc, grb::Matrix< Tout > &output, const grb::Matrix< Tmask > &mask, const grb::Matrix< Tin > &input, @@ -124,7 +130,7 @@ RC masked_tests_nvm_impl( const bool emptyMask = grb::nrows( mask ) == 0 || grb::ncols( mask ) == 0; std::cout << "\t\t with invert_mask descriptor\n"; - rc = grb::set< descriptors::invert_mask >( output, mask, input ); + rc = grb::set< descr | descriptors::invert_mask >( output, mask, input ); if( rc != SUCCESS ) { std::cerr << "\t grb::set invert mask (matrix to matrix masked) FAILED\n"; return rc; @@ -184,7 +190,7 @@ RC masked_tests( return masked_tests_nvm_impl( rc, output, mask, input, n ); } -/** Specialised dispatched for masked tests with void masks */ +/** Specialised dispatch for masked tests with void masks */ template< typename Tout, typename Tin > RC masked_tests( RC &rc, grb::Matrix< Tout > &output, @@ -196,6 +202,31 @@ RC masked_tests( return ret; } +/** Specialised dispatch for masked tests with no-cast domains */ +template< typename T > +RC masked_tests( + RC &rc, grb::Matrix< T > &output, + const grb::Matrix< bool > &mask, const grb::Matrix< T > &input, + const size_t n +) { + if( masked_tests_generic_impl( rc, output, mask, input, n ) != SUCCESS ) { + return rc; + } + if( masked_tests_nvm_impl( rc, output, mask, input, n ) != SUCCESS ) { + return rc; + } + std::cout << "\t re-running previous tests with no_casting descriptor\n"; + if( + masked_tests_generic_impl< descriptors::no_casting >( + rc, output, mask, input, n + ) != SUCCESS + ) { + return rc; + } + return masked_tests_nvm_impl< descriptors::no_casting >( + rc, output, mask, input, n ); +} + void grb_program( const size_t &n, grb::RC &rc ) { // initialize non-masked test containers grb::Matrix< double > A( n, n ); From ebbd717c01fc4c312f21279e3d4d649cc4d6f83b Mon Sep 17 00:00:00 2001 From: Albert-Jan Yzelman Date: Sat, 23 Aug 2025 16:59:05 +0200 Subject: [PATCH 21/39] Bugfix --- include/graphblas/reference/io.hpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/include/graphblas/reference/io.hpp b/include/graphblas/reference/io.hpp index 36a5c3ff6..bcce52b34 100644 --- a/include/graphblas/reference/io.hpp +++ b/include/graphblas/reference/io.hpp @@ -2207,13 +2207,13 @@ namespace grb { for( auto k = A_raw.col_start[ i ]; k < A_raw.col_start[ i + 1 ]; ++k ) { const auto k_col = A_raw.row_index[ k ]; if( mask_coors.assigned( k_col ) ) { - OutputType val = A_raw.getValue( k, *( (OutputType*) nullptr ) ); + constexpr int zero = 0; CRS_raw.row_index[ nzc ] = k_col; - CRS_raw.setValue( nzc, val ); + CRS_raw.setValue( nzc, A_raw.getValue( k, zero ) ); const size_t CCS_index = C_col_index[ k_col ] + CCS_raw.col_start[ k_col ]; (void) C_col_index[ k_col ]++; CCS_raw.row_index[ CCS_index ] = i; - CCS_raw.setValue( CCS_index, val ); + CCS_raw.setValue( CCS_index, A_raw.getValue( k, zero ) ); (void) nzc++; } } From 1525dbe567c86e48eb58985efa57ed0bcc498145 Mon Sep 17 00:00:00 2001 From: Albert-Jan Yzelman Date: Sat, 23 Aug 2025 17:09:55 +0200 Subject: [PATCH 22/39] Better debug tracing --- include/graphblas/reference/io.hpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/include/graphblas/reference/io.hpp b/include/graphblas/reference/io.hpp index bcce52b34..f201daae4 100644 --- a/include/graphblas/reference/io.hpp +++ b/include/graphblas/reference/io.hpp @@ -2066,7 +2066,7 @@ namespace grb { ); // dynamic checks -#ifdef _DEBUG +#ifdef _DEBUG_REFERENCE_IO std::cout << "Called grb::set (matrix-to-matrix-masked, reference)\n"; #endif assert( phase != TRY ); @@ -2078,6 +2078,9 @@ namespace grb { // check for trivial dispatch first (otherwise the below checks fail when they // should not) if( m == 0 || n == 0 ) { +#ifdef _DEBUG_REFERENCE_IO + std::cout << "\t delegating to unmasked matrix-to-matrix set, reference\n"; +#endif // If the mask is empty, ignore it return set< descr >( C, A, phase ); } From 86b156bc51a986e1b3b693a3b8397439f64a09e8 Mon Sep 17 00:00:00 2001 From: Albert-Jan Yzelman Date: Sat, 23 Aug 2025 17:10:13 +0200 Subject: [PATCH 23/39] Enable masked void-to-void matrix sets --- tests/unit/matrixSet.cpp | 171 +++++++++++++++++++++++++++++++++++++-- 1 file changed, 165 insertions(+), 6 deletions(-) diff --git a/tests/unit/matrixSet.cpp b/tests/unit/matrixSet.cpp index b1079a7db..7ab40f262 100644 --- a/tests/unit/matrixSet.cpp +++ b/tests/unit/matrixSet.cpp @@ -117,6 +117,83 @@ RC masked_tests_generic_impl( return rc; } +/** Specialisation for void output */ +template< + Descriptor descr = descriptors::no_operation, + typename Tmask, typename Tin +> +RC masked_tests_generic_impl( + RC &rc, grb::Matrix< void > &output, + const grb::Matrix< Tmask > &mask, const grb::Matrix< Tin > &input, + const size_t n +) { + const bool emptyMask = grb::nrows( mask ) == 0 || grb::ncols( mask ) == 0; + + std::cout << "\t\t with structural descriptor\n"; + rc = grb::set< descr | descriptors::structural >( output, mask, input ); + if( rc != SUCCESS ) { + std::cerr << "\t grb::set structural (matrix to matrix masked) FAILED\n"; + return rc; + } + if( grb::nnz( output ) != 2 * n - 1 ) { + std::cerr << "\t unexpected number of output elements ( " + << grb::nnz( output ) << " ), expected " << 2 * n - 1 <<".\n"; + rc = FAILED; + } + for( const auto &triplet : output ) { + if( + triplet.first != triplet.second && + triplet.first != triplet.second - 1 + ) { + std::cerr << "\t unexpected entry at ( " << triplet.first << ", " + << triplet.second << " ), no value (pattern matrix).\n"; + rc = FAILED; + } + } + if( rc != SUCCESS ) { return rc; } + + std::cout << "\t\t without descriptor\n"; + rc = grb::set< descr >( output, mask, input ); + if( rc != SUCCESS ) { + std::cerr << "\t grb::set (matrix to matrix masked) FAILED\n"; + return rc; + } + if( emptyMask ) { + if( grb::nnz( output ) != 2 * n - 1 ) { + std::cerr << "\t unexpected number of output elements ( " + << grb::nnz( output ) << " ), expected " << 2 * n - 1 <<".\n"; + rc = FAILED; + } + } else { + if( grb::nnz( output ) != n ) { + std::cerr << "\t unexpected number of output elements ( " + << grb::nnz( output ) << " ), expected " << n <<".\n"; + rc = FAILED; + } + } + for( const auto &triplet : output ) { + if( emptyMask ) { + if( triplet.first != triplet.second && + triplet.first != triplet.second - 1 + ) { + std::cerr << "\t unexpected entry at ( " << triplet.first << ", " + << triplet.second << " ), no value (pattern matrix).\n"; + rc = FAILED; + } + } else { + if( triplet.first != triplet.second ) { + std::cerr << "\t unexpected entry at ( " << triplet.first << ", " + << triplet.second << " ), no value (pattern matrix).\n"; + rc = FAILED; + } + } + } + if( rc != SUCCESS ) { return rc; } + + // done + return rc; +} + /** Implementation of masked tests for non-void masks (nvm). */ template< Descriptor descr = descriptors::no_operation, @@ -177,6 +254,60 @@ RC masked_tests_nvm_impl( return rc; } +/** Specialisation for void output */ +template< + Descriptor descr = descriptors::no_operation, + typename Tmask, typename Tin +> +RC masked_tests_nvm_impl( + RC &rc, grb::Matrix< void > &output, + const grb::Matrix< Tmask > &mask, const grb::Matrix< Tin > &input, + const size_t n +) { + const bool emptyMask = grb::nrows( mask ) == 0 || grb::ncols( mask ) == 0; + + std::cout << "\t\t with invert_mask descriptor\n"; + rc = grb::set< descr | descriptors::invert_mask >( output, mask, input ); + if( rc != SUCCESS ) { + std::cerr << "\t grb::set invert mask (matrix to matrix masked) FAILED\n"; + return rc; + } + if( emptyMask ) { + if( grb::nnz( output ) != 2 * n - 1 ) { + std::cerr << "\t unexpected number of output elements ( " + << grb::nnz( output ) << " ), expected " << 2 * n - 1 <<".\n"; + rc = FAILED; + } + } else { + if( grb::nnz( output ) != n - 1 ) { + std::cerr << "\t unexpected number of output elements ( " + << grb::nnz( output ) << " ), expected " << n - 1 <<".\n"; + rc = FAILED; + } + } + for( const auto &triplet : output ) { + if( emptyMask ) { + if( triplet.first != triplet.second && + triplet.first != triplet.second - 1 + ) { + std::cerr << "\t unexpected entry at ( " << triplet.first << ", " + << triplet.second << " ), no value (pattern matrix).\n"; + rc = FAILED; + } + } else { + if( triplet.first != triplet.second - 1 ) { + std::cerr << "\t unexpected entry at ( " << triplet.first << ", " + << triplet.second << " ), no value (pattern matrix).\n"; + rc = FAILED; + } + } + } + if( rc != SUCCESS ) { return rc; } + + // done + return rc; +} + /** Dispatch for generic masked tests */ template< typename Tout, typename Tmask, typename Tin > RC masked_tests( @@ -282,7 +413,7 @@ void grb_program( const size_t &n, grb::RC &rc ) { rc = rc ? rc : grb::resize( C, 15 ); rc = rc ? rc : grb::resize( D, 15 ); rc = rc ? rc : grb::resize( E, 15 ); - rc = rc ? rc : grb::resize(output, 2 * n - 1 ); + rc = rc ? rc : grb::resize( output, 15 ); if( rc != SUCCESS || grb::nnz( A ) != 15 ) { std::cerr << "\tinitialisation FAILED\n"; return; @@ -338,12 +469,14 @@ void grb_program( const size_t &n, grb::RC &rc ) { std::cerr << "\t buildMatrixUnique of inputFloat matrix FAILED\n"; return; } - try { - inputVoid = grb::algorithms::matrices< void >::identity( n ); - } catch( ... ) { - std::cerr << "\t constructing inputVoid FAILED\n"; + rc = grb::resize( inputVoid, 2 * n - 1 ); + rc = rc ? rc : grb::resize( outputVoid, 2 * n - 1 ); + if( rc != SUCCESS ) { + std::cerr << "\t error resizing matrices for masked tests\n"; return; } + // postpone materialisation of inputVoid since it relies on unmasked grb::set + // (which is itself unit-tested later) std::cout << "\t test initialisation complete\n"; @@ -490,7 +623,15 @@ void grb_program( const size_t &n, grb::RC &rc ) { } if( rc != SUCCESS ) { return; } - //check masked matrix set + // check masked matrix set + // first, finish initialisation + rc = grb::set( inputVoid, input ); + rc = rc ? rc : grb::resize( output, 2 * n - 1 ); + if( rc != SUCCESS || grb::nnz( inputVoid ) != 2 * n - 1 ) { + std::cerr << "\t error in inputVoid (an earlier test likely failed)\n"; + if( rc == SUCCESS ) { rc = FAILED; } + return; + } std::cout << "\t testing set( matrix, mask, matrix ), non-void, no-cast, " << "empty mask\n"; @@ -525,6 +666,24 @@ void grb_program( const size_t &n, grb::RC &rc ) { if( masked_tests( rc, output, maskBool, inputFloat, n ) != grb::SUCCESS ) { return; } + + std::cout << "\t testing set( matrix, mask, matrix ), void-to-void (no cast), " + << "empty mask\n"; + if( masked_tests( rc, outputVoid, maskEmpty, inputVoid, n ) != grb::SUCCESS ) { + return; + } + + std::cout << "\t testing set( matrix, mask, matrix ), void-to-void (no cast), " + << "non-empty mask\n"; + if( masked_tests( rc, outputVoid, mask, inputVoid, n ) != grb::SUCCESS ) { + return; + } + + std::cout << "\t testing set( matrix, mask, matrix ), void-to-void (no cast), " + << "non-empty Boolean mask\n"; + if( masked_tests( rc, outputVoid, maskBool, inputVoid, n ) != grb::SUCCESS ) { + return; + } } int main( int argc, char ** argv ) { From 7975e5626edaf2b5af2dd8c9b6be667d2b1e4bca Mon Sep 17 00:00:00 2001 From: Albert-Jan Yzelman Date: Sat, 23 Aug 2025 17:18:51 +0200 Subject: [PATCH 24/39] Bugfix --- include/graphblas/reference/io.hpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/include/graphblas/reference/io.hpp b/include/graphblas/reference/io.hpp index f201daae4..6ca510598 100644 --- a/include/graphblas/reference/io.hpp +++ b/include/graphblas/reference/io.hpp @@ -2050,7 +2050,8 @@ namespace grb { "matrix" ); static_assert( - std::is_convertible< InputType, OutputType >::value, + std::is_convertible< InputType, OutputType >::value || + std::is_void< OutputType >::value, "grb::set (masked set to matrix): input type cannot be " "converted to output type" ); From 25fdef4fc5eee0b5b0d1e75dbb02263a2cdb8f2c Mon Sep 17 00:00:00 2001 From: Albert-Jan Yzelman Date: Sat, 23 Aug 2025 17:19:12 +0200 Subject: [PATCH 25/39] Complete unit test by testing casting from non-voids to void --- tests/unit/matrixSet.cpp | 41 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/tests/unit/matrixSet.cpp b/tests/unit/matrixSet.cpp index 7ab40f262..305f6f10d 100644 --- a/tests/unit/matrixSet.cpp +++ b/tests/unit/matrixSet.cpp @@ -684,6 +684,47 @@ void grb_program( const size_t &n, grb::RC &rc ) { if( masked_tests( rc, outputVoid, maskBool, inputVoid, n ) != grb::SUCCESS ) { return; } + + std::cout << "\t testing set( matrix, mask, matrix ), int-to-void, casting " + << "(sort of), empty mask\n"; + if( masked_tests( rc, outputVoid, maskEmpty, input, n ) != grb::SUCCESS ) { + return; + } + + std::cout << "\t testing set( matrix, mask, matrix ), int-to-void, casting " + << "(sort of), non-empty mask\n"; + if( masked_tests( rc, outputVoid, mask, input, n ) != grb::SUCCESS ) { + return; + } + + std::cout << "\t testing set( matrix, mask, matrix ), int-to-void, casting " + << "(sort of), Boolean mask\n"; + if( masked_tests( rc, outputVoid, maskBool, input, n ) != grb::SUCCESS ) { + return; + } + + std::cout << "\t testing set( matrix, mask, matrix ), float-to-void, casting " + << "(sort of), empty mask\n"; + if( + masked_tests( rc, outputVoid, maskEmpty, inputFloat, n ) != + grb::SUCCESS + ) { + return; + } + + std::cout << "\t testing set( matrix, mask, matrix ), float-to-void, casting " + << "(sort of), non-empty mask\n"; + if( masked_tests( rc, outputVoid, mask, inputFloat, n ) != grb::SUCCESS ) { + return; + } + + std::cout << "\t testing set( matrix, mask, matrix ), float-to-void, casting " + << "(sort of), Boolean mask\n"; + if( masked_tests( rc, outputVoid, maskBool, inputFloat, n ) != grb::SUCCESS ) { + return; + } + + // done } int main( int argc, char ** argv ) { From 72da7fd91dc3969095d557e44a62851a9743f2be Mon Sep 17 00:00:00 2001 From: Albert-Jan Yzelman Date: Sun, 24 Aug 2025 10:54:23 +0200 Subject: [PATCH 26/39] Unit test uncovered bug in hyperdags implementation, herewith fixed --- include/graphblas/hyperdags/io.hpp | 93 ++++++++++++++++++++++++++---- 1 file changed, 81 insertions(+), 12 deletions(-) diff --git a/include/graphblas/hyperdags/io.hpp b/include/graphblas/hyperdags/io.hpp index 4ac5b4635..99711f0bb 100644 --- a/include/graphblas/hyperdags/io.hpp +++ b/include/graphblas/hyperdags/io.hpp @@ -28,6 +28,10 @@ #include +#ifdef _DEBUG + #define _DEBUG_HYPERDAGS_IO +#endif + namespace grb { @@ -456,26 +460,91 @@ namespace grb { const Matrix< InputType, hyperdags, RIT3, CIT3, NIT3 > &A, const Phase &phase = EXECUTE ) { +#ifdef _DEBUG_HYPERDAGS_IO + std::cout << "Entering set(matrix, matrix, matrix), hyperdags backend\n"; +#endif + // first, check for dynamic errors + const size_t m = nrows( C ); + const size_t n = ncols( C ); + if( m != nrows( A ) || n != ncols( A ) ) { +#ifdef _DEBUG_HYPERDAGS_IO + std::cerr << "\t set( matrix, matrix, matrix ): dimension mismatch (I)\n"; +#endif + return MISMATCH; + } + if( m != nrows( M ) && nrows( M ) != 0 ) { +#ifdef _DEBUG_HYPERDAGS_IO + std::cerr << "\t set( matrix, matrix, matrix ): dimension mismatch (II)\n"; +#endif + return MISMATCH; + } + if( n != ncols( M ) && ncols( M ) != 0 ) { +#ifdef _DEBUG_HYPERDAGS_IO + std::cerr << "\t set( matrix, matrix, matrix ): dimension mismatch (III)\n"; +#endif + return MISMATCH; + } + // second, check for trivial op + if( m == 0 || n == 0 ) { +#ifdef _DEBUG_HYPERDAGS_IO + std::cerr << "\t WARNING set( matrix, matrix, matrix ), hyperdags: " + << "trivial op detected (all containers empty). No operation will be " + << "recorded\n"; +#endif + return SUCCESS; + } + // third, execute +#ifdef _DEBUG_HYPERDAGS_IO + std::cerr << "\t set( matrix, matrix, matrix ), hyperdags: forwarding to " + << "execution backend\n"; +#endif const RC ret = set< descr >( internal::getMatrix( C ), internal::getMatrix( M ), internal::getMatrix( A ), phase ); + // fourth, forward any errors if( ret != SUCCESS ) { return ret; } if( phase != EXECUTE ) { return ret; } - if( nrows( A ) == 0 || ncols( A ) == 0 ) { return ret; } + // fifth, record operation +#ifdef _DEBUG_HYPERDAGS_IO + std::cerr << "\t set( matrix, matrix, matrix ), hyperdags: execution had no " + << "error; recording operation\n"; +#endif std::array< const void *, 0 > sourcesP{}; - std::array< uintptr_t, 3 > sourcesC{ - getID( internal::getMatrix(A) ), - getID( internal::getMatrix(M) ), - getID( internal::getMatrix(C) ) - }; std::array< uintptr_t, 1 > destinations{ getID( internal::getMatrix(C) ) }; - internal::hyperdags::generator.addOperation( - internal::hyperdags::SET_MATRIX_MATRIX_MASKED, - sourcesP.begin(), sourcesP.end(), - sourcesC.begin(), sourcesC.end(), - destinations.begin(), destinations.end() - ); + if( nrows( M ) == 0 || ncols( M ) == 0 ) { +#ifdef _DEBUG_HYPERDAGS_IO + std::cerr << "\t WARNING set( matrix, matrix, matrix ), hyperdags: " + << "empty mask detected; hyperDAG will not (cannot) record it\n"; +#endif + std::array< uintptr_t, 2 > sourcesC{ + getID( internal::getMatrix(A) ), + getID( internal::getMatrix(C) ) + }; + internal::hyperdags::generator.addOperation( + internal::hyperdags::SET_MATRIX_MATRIX_MASKED, + sourcesP.begin(), sourcesP.end(), + sourcesC.begin(), sourcesC.end(), + destinations.begin(), destinations.end() + ); + } else { + std::array< uintptr_t, 3 > sourcesC{ + getID( internal::getMatrix(A) ), + getID( internal::getMatrix(M) ), + getID( internal::getMatrix(C) ) + }; + internal::hyperdags::generator.addOperation( + internal::hyperdags::SET_MATRIX_MATRIX_MASKED, + sourcesP.begin(), sourcesP.end(), + sourcesC.begin(), sourcesC.end(), + destinations.begin(), destinations.end() + ); + } + + // done +#ifdef _DEBUG_HYPERDAGS_IO + std::cerr << "\t set( matrix, matrix, matrix ), hyperdags: exiting\n"; +#endif return ret; } From bc63b593985e3d5d361f0695ccc88c3cd8e72b13 Mon Sep 17 00:00:00 2001 From: Albert-Jan Yzelman Date: Sun, 24 Aug 2025 10:54:40 +0200 Subject: [PATCH 27/39] Also execute matrixSet unit test for larger matrices --- tests/unit/unittests.sh | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tests/unit/unittests.sh b/tests/unit/unittests.sh index 0b49d2421..6abb78e21 100755 --- a/tests/unit/unittests.sh +++ b/tests/unit/unittests.sh @@ -484,6 +484,12 @@ for MODE in ${MODES}; do grep "Test OK" ${TEST_OUT_DIR}/matrixSet_${MODE}_${BACKEND}_${P}_${T}.log || echo "Test FAILED" echo " " + echo ">>> [x] [ ] Testing grb::set (matrices), size 10 000" + $runner ${TEST_BIN_DIR}/matrixSet_${MODE}_${BACKEND} 10000 2> ${TEST_OUT_DIR}/matrixSet_large_${MODE}_${BACKEND}_${P}_${T}.err 1> ${TEST_OUT_DIR}/matrixSet_large_${MODE}_${BACKEND}_${P}_${T}.log + head -1 ${TEST_OUT_DIR}/matrixSet_large_${MODE}_${BACKEND}_${P}_${T}.log + grep "Test OK" ${TEST_OUT_DIR}/matrixSet_large_${MODE}_${BACKEND}_${P}_${T}.log || echo "Test FAILED" + echo " " + echo ">>> [x] [ ] Testing grb::set (matrix, value)" $runner ${TEST_BIN_DIR}/setMatrixValue_${MODE}_${BACKEND} 2> ${TEST_OUT_DIR}/setMatrixValue_${MODE}_${BACKEND}_${P}_${T}.err 1> ${TEST_OUT_DIR}/setMatrixValue_${MODE}_${BACKEND}_${P}_${T}.log head -1 ${TEST_OUT_DIR}/setMatrixValue_${MODE}_${BACKEND}_${P}_${T}.log From c5b11a11f9ef1aa0610263aa35a30e7e221fb0a6 Mon Sep 17 00:00:00 2001 From: Albert-Jan Yzelman Date: Sun, 24 Aug 2025 13:14:19 +0200 Subject: [PATCH 28/39] BSP1D I/O implementation of grb::set(matrix,matrix,matrix). All unit tests pass. --- include/graphblas/bsp1d/io.hpp | 160 +++++++++++++++++++++++++++++++-- 1 file changed, 154 insertions(+), 6 deletions(-) diff --git a/include/graphblas/bsp1d/io.hpp b/include/graphblas/bsp1d/io.hpp index edada4191..cfe2844c6 100644 --- a/include/graphblas/bsp1d/io.hpp +++ b/include/graphblas/bsp1d/io.hpp @@ -32,7 +32,6 @@ #include #endif -#include "graphblas/blas1.hpp" // for grb::size #include "graphblas/nonzeroStorage.hpp" // the below transforms an std::vector iterator into an ALP/GraphBLAS-compatible @@ -976,20 +975,166 @@ namespace grb { #ifdef _BSP1D_IO_DEBUG std::cout << "Called grb::set( matrix, mask, value ) (BSP1D)\n"; #endif + // catch trivial cases const size_t m = nrows( C ); const size_t n = ncols( C ); + if( m == 0 || n == 0 ) { return SUCCESS; } - // dynamic checks (I) + // dynamic checks + if( nrows( mask ) == 0 || ncols( mask ) == 0 ) { + return ILLEGAL; + } if( m != nrows( mask ) || n != ncols( mask ) ) { return MISMATCH; } - // catch trivial case +#ifdef _BSP1D_IO_DEBUG + std::cout << "\t delegating to final backend\n"; +#endif + RC ret = SUCCESS; + // Take care that local matrices may be empty, even if the global matrix is + // not. Processes with empty local matrices will not delegate (no-op). + { + auto &local_C = internal::getLocal( C ); + const auto &local_mask = internal::getLocal( mask ); + const size_t local_m = nrows( local_C ); + const size_t local_n = ncols( local_C ); + assert( local_m == nrows( local_mask ) ); + assert( local_n == ncols( local_mask ) ); + if( local_m > 0 && local_n > 0 ) { + ret = set< descr >( local_C, local_mask, val, phase ); + } + } + + // in the self-masked case, there is no way an error could occur + if( (descr & descriptors::structural) && getID( C ) == getID( mask ) ) { +#ifdef _BSP1D_IO_DEBUG + std::cout << "\t structural self-masking detected, which allows trivial " + "exit\n"; // since the nnz nor capacity would never change +#endif + assert( ret == SUCCESS ); + return ret; + } + + // in all other cases, in either mode (resize or execute), we must check for + // errors +#ifdef _BSP1D_IO_DEBUG + std::cout << "\t all-reducing error code\n"; +#endif + if( collectives< BSP1D >::allreduce( ret, operators::any_or< RC >() ) + != SUCCESS + ) { + return PANIC; + } + +#ifdef _BSP1D_IO_DEBUG + std::cout << "\t all-reduced error code is " << toString( ret ) << "\n"; +#endif + if( phase == RESIZE ) { + if( ret == SUCCESS ) { +#ifdef _BSP1D_IO_DEBUG + std::cout << "\t resize phase detected -- synchronising capacity\n"; +#endif + ret = internal::updateCap( C ); + if( ret != SUCCESS ) { + std::cerr << "Error updating capacity: " << toString( ret ) << "\n"; + } + } + } else { + assert( phase == EXECUTE ); + if( ret == SUCCESS ) { +#ifdef _BSP1D_IO_DEBUG + std::cout << "\t execute phase detected -- synchronising nnz count\n"; +#endif + ret = internal::updateNnz( C ); + if( ret != SUCCESS ) { + std::cerr << "Error updating output number of nonzeroes: " + << toString( ret ) << "\n"; + } + } else if( ret == ILLEGAL ) { +#ifdef _BSP1D_IO_DEBUG + std::cout << "\t delegate returns ILLEGAL, clearing output\n"; +#endif + const RC clear_rc = clear( C ); + if( clear_rc != SUCCESS ) { + ret = PANIC; + } + } else { + if( ret != PANIC ) { + std::cerr << "Warning: unexpected error code in grb::set( matrix, mask, " + << "value ) (BSP1D). Please submit a bug report.\n"; + } + assert( ret == PANIC ); + } + } + +#ifdef _BSP1D_IO_DEBUG + std::cout << "\t done; returning " << toString( ret ) << "\n"; +#endif + + // done + return ret; + } + + /** + * The implementation can trivially rely on the final backend, however, the + * capacity or nonzero count of the output can in some cases differ. The below + * implementation mostly deals with that logic. + */ + template< + Descriptor descr = descriptors::no_operation, + typename DataType, typename RIT1, typename CIT1, typename NIT1, + typename MaskType, typename RIT2, typename CIT2, typename NIT2, + typename ValueType = DataType, typename RIT3, typename CIT3, typename NIT3 + > + RC set( + Matrix< DataType, BSP1D, RIT1, CIT1, NIT1 > &C, + const Matrix< MaskType, BSP1D, RIT2, CIT2, NIT2 > &mask, + const Matrix< ValueType, BSP1D, RIT3, CIT3, NIT3 > &A, + const Phase &phase = EXECUTE, + const typename std::enable_if< + !grb::is_object< DataType >::value && + !grb::is_object< ValueType >::value && + !grb::is_object< MaskType >::value + >::type * const = nullptr + ) noexcept { + // static checks + NO_CAST_ASSERT( ( !(descr & descriptors::no_casting) || + std::is_same< ValueType, DataType >::value + ), "grb::set( matrix, mask, matrix ) (BSP1D)", + "called with non-matching value types" + ); + NO_CAST_ASSERT( + ( !(descr & descriptors::no_casting) || + std::is_same< MaskType, bool >::value ), + "grb::set( matrix, mask, matrix ) (BSP1D)", + "called with non-Boolean mask value type" + ); + static_assert( !( (descr & descriptors::structural) && + (descr & descriptors::invert_mask) + ), "grb::set( matrix, mask, matrix ) (BSP1D): Primitives with matrix " + "outputs may not employ structurally inverted masking" + ); +#ifdef _BSP1D_IO_DEBUG + std::cout << "Called grb::set( matrix, mask, matrix ) (BSP1D)\n"; +#endif + const size_t m = nrows( C ); + const size_t n = ncols( C ); + + // dynamic checks (I) + if( m != nrows( A ) || n != ncols( A ) ) { + return MISMATCH; + } + + // catch trivial cases if( m == 0 || n == 0 ) { return SUCCESS; } + if( nrows( mask ) == 0 || ncols( mask ) == 0 ) { + return set< descr >( C, A, phase ); + } // dynamic checks (II) - if( nrows( mask ) == 0 || ncols( mask ) == 0 ) { - return ILLEGAL; + if( m != nrows( mask ) || n != ncols( mask ) ) { + return MISMATCH; } #ifdef _BSP1D_IO_DEBUG @@ -1000,13 +1145,16 @@ namespace grb { // not. Processes with empty local matrices will not delegate (no-op). { auto &local_C = internal::getLocal( C ); + const auto &local_A = internal::getLocal( A ); const auto &local_mask = internal::getLocal( mask ); const size_t local_m = nrows( local_C ); const size_t local_n = ncols( local_C ); assert( local_m == nrows( local_mask ) ); assert( local_n == ncols( local_mask ) ); + assert( local_m == nrows( local_A ) ); + assert( local_n == ncols( local_A ) ); if( local_m > 0 && local_n > 0 ) { - ret = set< descr >( local_C, local_mask, val, phase ); + ret = set< descr >( local_C, local_mask, local_A, phase ); } } From 0955f9f34f9a2255baf390929c4125bc16b9239d Mon Sep 17 00:00:00 2001 From: Albert-Jan Yzelman Date: Mon, 22 Sep 2025 03:25:48 +0200 Subject: [PATCH 29/39] Remove redundant template info from constructor --- include/graphblas/reference/compressed_storage.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/graphblas/reference/compressed_storage.hpp b/include/graphblas/reference/compressed_storage.hpp index 95b67a903..2d40d18cc 100644 --- a/include/graphblas/reference/compressed_storage.hpp +++ b/include/graphblas/reference/compressed_storage.hpp @@ -1156,7 +1156,7 @@ namespace grb { {} /** Move constructor. */ - Compressed_Storage< void, IND, SIZE >( SelfType &&other ) { + Compressed_Storage( SelfType &&other ) { moveFromOther( other ); } From c69848914bcd7bbcabd8226e82034d201f597c14 Mon Sep 17 00:00:00 2001 From: Albert-Jan Yzelman Date: Mon, 22 Sep 2025 03:37:03 +0200 Subject: [PATCH 30/39] Hotfix missing include --- include/graphblas/bsp1d/init.hpp | 1 + 1 file changed, 1 insertion(+) diff --git a/include/graphblas/bsp1d/init.hpp b/include/graphblas/bsp1d/init.hpp index 582479bf1..d5798d894 100644 --- a/include/graphblas/bsp1d/init.hpp +++ b/include/graphblas/bsp1d/init.hpp @@ -34,6 +34,7 @@ #include #include +#include //uintptr_t #include //assertions #include From 182c79e618c14869c7cce72fd28dd6fec56bb583 Mon Sep 17 00:00:00 2001 From: Albert-Jan Yzelman Date: Mon, 22 Sep 2025 03:41:34 +0200 Subject: [PATCH 31/39] Hotfix bug in matrixFileReader --- include/graphblas/utils/parser/matrixFileReaderBase.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/graphblas/utils/parser/matrixFileReaderBase.hpp b/include/graphblas/utils/parser/matrixFileReaderBase.hpp index c5cdf8b7a..15aa3e4f8 100644 --- a/include/graphblas/utils/parser/matrixFileReaderBase.hpp +++ b/include/graphblas/utils/parser/matrixFileReaderBase.hpp @@ -280,7 +280,7 @@ namespace grb { properties._nz = nz; properties._entries = entries; properties._pattern = pattern; - properties._symmetric = symmetric; + properties._symmetric = symmetric ? Symmetric : General; properties._direct = direct; properties._symmetricmap = symmetricmap; // check for existance of file From 972069b707a0c750afe3191c546e91fe978ce79d Mon Sep 17 00:00:00 2001 From: Albert-Jan Yzelman Date: Mon, 22 Sep 2025 03:55:46 +0200 Subject: [PATCH 32/39] Hotfix bug in BSP1D I/O --- include/graphblas/bsp1d/io.hpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/include/graphblas/bsp1d/io.hpp b/include/graphblas/bsp1d/io.hpp index cfe2844c6..b8181041a 100644 --- a/include/graphblas/bsp1d/io.hpp +++ b/include/graphblas/bsp1d/io.hpp @@ -1773,8 +1773,9 @@ namespace grb { // a pipeline depth of 2 is sufficient. constexpr size_t iteration_overlaps = 2; - const std::unique_ptr< size_t > first_nnz_per_thread( - new size_t[ num_threads * iteration_overlaps ]() + const std::unique_ptr< size_t, void(*)(size_t * const) > first_nnz_per_thread( + new size_t[ num_threads * iteration_overlaps ](), + [](size_t * const array){ delete [] array; } ); size_t * const first_nnz_per_thread_ptr = first_nnz_per_thread.get(); outgoing.resize( data.P ); From 66a19743091bf24d2c5599281ebd6d50a584357b Mon Sep 17 00:00:00 2001 From: Albert-Jan Yzelman Date: Mon, 22 Sep 2025 03:56:40 +0200 Subject: [PATCH 33/39] Refactor SPA buffer management from blas3/mxm to matrix.hpp. This prepares for its use with grb::set(matrix,matrix,matrix) --- include/graphblas/reference/blas3.hpp | 432 +------------------------ include/graphblas/reference/matrix.hpp | 425 ++++++++++++++++++++++++ 2 files changed, 428 insertions(+), 429 deletions(-) diff --git a/include/graphblas/reference/blas3.hpp b/include/graphblas/reference/blas3.hpp index 054d945dd..2a37f51a8 100644 --- a/include/graphblas/reference/blas3.hpp +++ b/include/graphblas/reference/blas3.hpp @@ -579,432 +579,6 @@ namespace grb { return SUCCESS; } -#ifndef _H_GRB_REFERENCE_OMP_BLAS3 - /** - * Meta-data for global buffer management for use with #grb::mxm. - * - * This meta-data is the same for both the sequential (reference) and shared- - * memory parallel (reference_omp) backends. - * - * This class contains all meta-data necessary to interpret the global buffer - * as an array of sparse accumulators (SPAs). The length of the array is given - * by a call to #threads(), minus one. It is called that since a call to - * #threads() retrieves how many threads can be used to process the call to - * #grb::mxm. - * - * Each SPA has the layout (bitarray, stack, valueArray). These are packed in a - * padded byte array, such that each bit array, stack, and value array is - * aligned on sizeof(int) bytes. - * - * @tparam NIT The nonzero index type. - * @tparam ValueType The output matrix value type. - */ - template< typename NIT, typename ValueType > - class MXM_BufferMetaData { - - static_assert( sizeof(NIT) % sizeof(int) == 0, "Unsupported type for NIT; " - "please submit a bug report!" ); - - private: - - /** The size of the offset array */ - size_t m; - - /** The size of the SPA */ - size_t n; - - /** The number of threads supported during a call to #grb::mxm */ - size_t nthreads; - - /** The initial buffer offset */ - size_t bufferOffset; - - /** The size of a single SPA, including bytes needed for padding */ - size_t paddedSPASize; - - /** The number of bytes to pad the SPA array with */ - size_t arrayShift; - - /** The number of bytes to pad the SPA stack with */ - size_t stackShift; - - /** - * Given a number of used bytes of the buffer, calculate the available - * remainder buffer and return it. - * - * @param[in] osize The size of the buffer (in bytes) that is already - * in use. - * @param[out] remainder Pointer to any remainder buffer. - * @param[out] rsize The size of the remainder buffer. - * - * If no buffer space is left, \a remainder will be set to nullptr - * and \a size to 0. - */ - void retrieveRemainderBuffer( - const size_t osize, - void * &remainder, size_t &rsize - ) const noexcept { - const size_t size = internal::template getCurrentBufferSize< char >(); - char * rem = internal::template getReferenceBuffer< char >( size ); - size_t rsize_calc = size - osize; - rem += osize; - const size_t mod = reinterpret_cast< uintptr_t >(rem) % sizeof(int); - if( mod ) { - const size_t shift = sizeof(int) - mod; - if( rsize_calc >= shift ) { - rsize_calc -= shift; - rem += rsize; - } else { - rsize_calc = 0; - rem = nullptr; - } - } - assert( !(reinterpret_cast< uintptr_t >(rem) % sizeof(int)) ); - // write out - remainder = rem; - rsize = rsize_calc; - } - - - public: - - /** - * Base constructor. - * - * @param[in] _m The length of the offset array. - * @param[in] _n The length of the SPA. - * @param[in] max_threads The maximum number of threads. - * - * \note \a max_threads is a separate input since there might be a need to - * cap the maximum number of threads used based on some analytic - * performance model. Rather than putting such a performance model - * within this class, we make it an obligatory input parameter - * instead. - * - * \note It is always valid to pass config::OMP::threads(). - * - * \note This class \em will, however, cap the number of threads returned - * to \a _n. - */ - MXM_BufferMetaData( - const size_t _m, const size_t _n, - const size_t max_threads - ) : m( _m ), n( _n ), arrayShift( 0 ), stackShift( 0 ) { - #ifdef _DEBUG_REFERENCE_BLAS3 - #pragma omp critical - std::cout << "\t\t\t computing padded buffer size for a SPA of length " - << n << " while leaving space for an additional offset buffer of length " - << std::max( m, n ) << "...\n"; - #endif - // compute bufferOffset - bufferOffset = (std::max( m, n ) + 1) * sizeof( NIT ); - - // compute value buffer size - const size_t valBufSize = n * sizeof( ValueType ); - - #ifdef _DEBUG_REFERENCE_BLAS3 - std::cout << "\t\t\t\t bit-array size has byte-size " << - internal::Coordinates< reference >::arraySize( n ) << "\n"; - std::cout << "\t\t\t\t stack has byte-size " << - internal::Coordinates< reference >::stackSize( n ) << "\n"; - std::cout << "\t\t\t\t value buffer has byte-size " << valBufSize << "\n"; - #endif - - // compute paddedSPASize - paddedSPASize = - internal::Coordinates< reference >::arraySize( n ) + - internal::Coordinates< reference >::stackSize( n ) + - valBufSize; - size_t shift = - internal::Coordinates< reference >::arraySize( n ) % sizeof(int); - if( shift != 0 ) { - arrayShift = sizeof(int) - shift; - paddedSPASize += arrayShift; - } - shift = internal::Coordinates< reference >::stackSize( n ) % sizeof(int); - if( shift != 0 ) { - stackShift = sizeof(int) - shift; - paddedSPASize += stackShift; - } - shift = valBufSize % sizeof(int); - if( shift != 0 ) { - paddedSPASize += (sizeof(int) - shift); - } - - // pad bufferOffset - shift = bufferOffset % sizeof(int); - if( shift != 0 ) { - bufferOffset += (sizeof(int) - shift); - } - - // compute free buffer size - const size_t freeBufferSize = internal::getCurrentBufferSize< char >() - - bufferOffset; - - // compute max number of threads - nthreads = 1 + freeBufferSize / paddedSPASize; - #ifdef _DEBUG_REFERENCE_BLAS3 - #pragma omp critical - std::cout << "\t\t\t free buffer size: " << freeBufferSize - << ", (padded) SPA size: " << paddedSPASize - << " -> supported #threads: " << nthreads << ". " - << " The shifts for the bit-array and the stack are " << arrayShift - << ", respectively, " << stackShift << "." - << "\n"; - #endif - // cap the final number of selected threads - if( nthreads > max_threads ) { - nthreads = max_threads; - } - if( nthreads > n ) { - nthreads = n; - } - } - - /** @returns The maximum number of supported threads during #grb::mxm */ - size_t threads() const noexcept { - return nthreads; - } - - /** - * Requests and returns a global buffer required for a thread-local SPA. - * - * @param[in] t The thread ID. Must be larger than 0. - * - * \note Thread 0 employs the SPA allocated with the output matrix. - * - * @returns Pointer into the global buffer starting at the area reserved for - * the SPA of thread \a t. - */ - char * getSPABuffers( size_t t ) const noexcept { - assert( t > 0 ); - (void) --t; - char * raw = internal::template getReferenceBuffer< char >( - bufferOffset + nthreads * paddedSPASize ); - assert( reinterpret_cast< uintptr_t >(raw) % sizeof(int) == 0 ); - raw += bufferOffset; - assert( reinterpret_cast< uintptr_t >(raw) % sizeof(int) == 0 ); - raw += t * paddedSPASize; - return raw; - } - - /** - * Retrieves the column offset buffer. - * - * @param[out] remainder Returns any remainder buffer beyond that of the row - * offset buffer. - * @param[out] rsize The remainder buffer size \a remainder points to. - * - * If \a remainder is not a nullptr then neither should \a rsize, - * and vice versa. - * - * Retrieving any remainder buffer is optional. The default is to not ask - * for them. - * - * \warning If all buffer memory is used for the column offsets, it may be - * that \a remainder equals nullptr and rsize - * zero. - * - * \warning This buffer is only guaranteed exclusive if only the retrieved - * column buffer is used. In particular, if also requesting (and - * using) SPA buffers, the remainder buffer area is shared with - * those SPA buffers, and data races are likely to occur. In other - * words: be very careful with any use of these remainder buffers. - * - * @returns The column offset buffer. - * - * \warning This buffer overlaps with the CRS offset buffer. The caller - * must ensure to only ever use one at a time. - */ - NIT * getColOffsetBuffer( - void * * const remainder = nullptr, - size_t * const rsize = nullptr - ) const noexcept { - NIT * const ret = internal::template getReferenceBuffer< NIT >( n + 1 ); - if( remainder != nullptr || rsize != nullptr ) { - assert( remainder != nullptr && rsize != nullptr ); - retrieveRemainderBuffer( (m + 1) * sizeof(NIT), *remainder, *rsize ); - } - return ret; - } - - /** - * Retrieves the row offset buffer. - * - * @param[out] remainder Returns any remainder buffer beyond that of the row - * offset buffer. - * @param[out] rsize The remainder buffer size \a remainder points to. - * - * If \a remainder is not a nullptr then neither should \a rsize, - * and vice versa. - * - * Retrieving any remainder buffer is optional. The default is to not ask - * for them. - * - * \warning If all buffer memory is used for the row offsets, it may be that - * \a remainder equals nullptr and rsize zero. - * - * \warning This buffer is only guaranteed exclusive if only the retrieved - * row buffer is used. In particular, if also requesting (and - * using) SPA buffers, the remainder buffer area is shared with - * those SPA buffers, and data races are likely to occur. In other - * words: be very careful with any use of these remainder buffers. - * - * @returns The row offset buffer. - * - * \warning This buffer overlaps with the CCS offset buffer. The caller - * must ensure to only ever use one at a time. - */ - NIT * getRowOffsetBuffer( - void * * const remainder = nullptr, - size_t * const rsize = nullptr - ) const noexcept { - NIT * const ret = internal::template getReferenceBuffer< NIT >( m + 1 ); - if( remainder != nullptr || rsize != nullptr ) { - assert( remainder != nullptr && rsize != nullptr ); - retrieveRemainderBuffer( (m + 1) * sizeof(NIT), *remainder, *rsize ); - } - return ret; - } - - /** - * Shifts a pointer into the global buffer by the bit-array size and its - * padding. - * - * @param[in,out] raw On input: an aligned pointer into the global buffer. - * On output: an aligned pointer past the bit-array - * position. - */ - void applyArrayShift( char * &raw ) const noexcept { - const size_t totalShift = - internal::Coordinates< reference >::arraySize( n ) + - arrayShift; - #ifdef _DEBUG_REFERENCE_BLAS3 - std::cout << "\t\t\t shifting input pointer with " - << internal::Coordinates< reference >::arraySize( n ) << " + " - << arrayShift << " = " << totalShift << "bytes \n"; - #endif - raw += totalShift; - } - - /** - * Shifts a pointer into the global buffer by the stack size and its - * padding. - * - * @param[in,out] raw On input: an aligned pointer into the global buffer. - * On output: an aligned pointer past the stack position. - */ - void applyStackShift( char * &raw ) const noexcept { - const size_t totalShift = - internal::Coordinates< reference >::stackSize( n ) + - stackShift; - #ifdef _DEBUG_REFERENCE_BLAS3 - std::cout << "\t\t\t shifting input pointer with " - << internal::Coordinates< reference >::arraySize( n ) << " + " - << stackShift << " = " << totalShift << "bytes \n"; - #endif - raw += totalShift; - } - - }; -#endif - - /** - * Retrieves the SPA buffers for the calling thread. - * - * \warning This function must be called from within an OpenMP parallel - * section. - * - * @param[out] arr Where the bit-array may be located. - * @param[out] buf Where the stack may be located. - * @param[out] valbuf Where the value buffer may be located. - * - * All above pointers are aligned on sizeof(int) bytes. - * - * @param[in] md Meta-data for global buffer management. - * @param[in] C The output matrix. - * - * One thread uses the buffers pre-allocated with the matrix \a C, thus - * ensuring at least one thread may perform the #grb::mxm. Any remainder - * threads can only help process the #grb::mxm if there is enough global - * buffer memory available. - * - * - * \note The global memory has size \f$ \Omega( \mathit{nz} ) \f$, which may - * be several factors (or even asymptotically greater than) - * \f$ \max\{ m, n \} \f$. - * - * \note In case the application stores multiple matrices, the global buffer - * may additionally be greater than the above note indicates if at least - * one of the other matrices is significantly (or asymptotically) larger - * than the one involved with the #grb::mxm. - */ - template< - typename OutputType, - typename RIT, typename CIT, typename NIT - > - void mxm_ompPar_getSPABuffers( - char * &arr, char * &buf, OutputType * &valbuf, - const struct MXM_BufferMetaData< NIT, OutputType > &md, - Matrix< OutputType, reference, RIT, CIT, NIT > &C - ) { -#ifdef _H_GRB_REFERENCE_OMP_BLAS3 - // other threads use the global buffer to create additional SPAs - { - const size_t t = config::OMP::current_thread_ID(); - #ifndef NDEBUG - const size_t T = config::OMP::current_threads(); - assert( t < T ); - #endif - if( t > 0 ) { - #ifdef _DEBUG_REFERENCE_BLAS3 - #pragma omp critical - std::cout << "\t Thread " << t << " gets buffers from global buffer\n"; - #endif - char * rawBuffer = md.getSPABuffers( t ); - assert( reinterpret_cast< uintptr_t >(rawBuffer) % sizeof(int) == 0 ); - arr = rawBuffer; - #ifdef _DEBUG_REFERENCE_BLAS3 - #pragma omp critical - #endif - md.applyArrayShift( rawBuffer ); - assert( reinterpret_cast< uintptr_t >(rawBuffer) % sizeof(int) == 0 ); - buf = rawBuffer; - #ifdef _DEBUG_REFERENCE_BLAS3 - #pragma omp critical - #endif - md.applyStackShift( rawBuffer ); - assert( reinterpret_cast< uintptr_t >(rawBuffer) % sizeof(int) == 0 ); - assert( buf != arr ); - valbuf = reinterpret_cast< OutputType * >(rawBuffer); - assert( static_cast< void * >(valbuf) != static_cast< void * >(buf) ); - } else { - #ifdef _DEBUG_REFERENCE_BLAS3 - #pragma omp critical - std::cout << "\t Thread " << t << " gets buffers from matrix storage\n"; - #endif - // one thread uses the standard matrix buffer - internal::getMatrixBuffers( arr, buf, valbuf, 1, C ); - } - #ifdef _DEBUG_REFERENCE_BLAS3 - #pragma omp critical - { - std::cout << "\t Thread " << t << " has SPA array @ " - << static_cast< void * >( arr ) << " and SPA stack @ " - << static_cast< void * >( buf ) << " and SPA values @ " - << static_cast< void * >( valbuf ) << "\n"; - } - #endif - } -#else - #ifdef _DEBUG_REFERENCE_BLAS3 - std::cout << "\t Reference backend gets buffers from global buffer\n"; - #endif - internal::getMatrixBuffers( arr, buf, valbuf, 1, C ); - (void) md; -#endif - } - /** * Given a computed new row_start array, moves the old index and value arrays * to new offsets. This leaves precisely enough space for the mxm algorithm to @@ -1234,7 +808,7 @@ namespace grb { #endif } - MXM_BufferMetaData< NIT, OutputType > bufferMD( m, n, max_threads ); + SPA_BufferMetaData< NIT, OutputType > bufferMD( m, n, max_threads ); #ifdef _H_GRB_REFERENCE_OMP_BLAS3 // derive number of threads @@ -1255,7 +829,7 @@ namespace grb { char * arr = nullptr; char * buf = nullptr; OutputType * valbuf = nullptr; - mxm_ompPar_getSPABuffers( arr, buf, valbuf, bufferMD, C ); + spa_ompPar_getBuffers( arr, buf, valbuf, bufferMD, C ); // do count size_t local_nzc; @@ -1296,7 +870,7 @@ namespace grb { char * arr = nullptr; char * buf = nullptr; OutputType * valbuf = nullptr; - mxm_ompPar_getSPABuffers( arr, buf, valbuf, bufferMD, C ); + spa_ompPar_getBuffers( arr, buf, valbuf, bufferMD, C ); #ifdef _DEBUG_REFERENCE_BLAS3 #ifdef _H_GRB_REFERENCE_OMP_BLAS3 #pragma omp critical diff --git a/include/graphblas/reference/matrix.hpp b/include/graphblas/reference/matrix.hpp index c1b3a365d..9f7fa96c7 100644 --- a/include/graphblas/reference/matrix.hpp +++ b/include/graphblas/reference/matrix.hpp @@ -2364,7 +2364,432 @@ namespace grb { ); return ret; } + + /** + * Meta-data for global buffer management for use with #grb::mxm and the + * matrix-matrix-matrix variant of #grb::set. + * + * This class contains all meta-data necessary to interpret the global buffer + * as an array of sparse accumulators (SPAs). The length of the array is given + * by a call to #threads(), minus one. It is called that since a call to + * #threads() retrieves how many threads can be used to process the call to + * #grb::mxm. + * + * Each SPA has the layout (bitarray, stack, valueArray). These are packed in a + * padded byte array, such that each bit array, stack, and value array is + * aligned on sizeof(int) bytes. + * + * @tparam NIT The nonzero index type. + * @tparam ValueType The output matrix value type. + * + * This meta-data class applies to both the sequential (reference) and shared- + * memory parallel (reference_omp) backends. + */ + template< typename NIT, typename ValueType > + class SPA_BufferMetaData { + + static_assert( sizeof(NIT) % sizeof(int) == 0, "Unsupported type for NIT; " + "please submit a bug report!" ); + + private: + + /** The size of the offset array */ + size_t m; + + /** The size of the SPA */ + size_t n; + + /** The number of threads supported during a call to #grb::mxm */ + size_t nthreads; + + /** The initial buffer offset */ + size_t bufferOffset; + + /** The size of a single SPA, including bytes needed for padding */ + size_t paddedSPASize; + + /** The number of bytes to pad the SPA array with */ + size_t arrayShift; + + /** The number of bytes to pad the SPA stack with */ + size_t stackShift; + + /** + * Given a number of used bytes of the buffer, calculate the available + * remainder buffer and return it. + * + * @param[in] osize The size of the buffer (in bytes) that is already + * in use. + * @param[out] remainder Pointer to any remainder buffer. + * @param[out] rsize The size of the remainder buffer. + * + * If no buffer space is left, \a remainder will be set to nullptr + * and \a size to 0. + */ + void retrieveRemainderBuffer( + const size_t osize, + void * &remainder, size_t &rsize + ) const noexcept { + const size_t size = internal::template getCurrentBufferSize< char >(); + char * rem = internal::template getReferenceBuffer< char >( size ); + size_t rsize_calc = size - osize; + rem += osize; + const size_t mod = reinterpret_cast< uintptr_t >(rem) % sizeof(int); + if( mod ) { + const size_t shift = sizeof(int) - mod; + if( rsize_calc >= shift ) { + rsize_calc -= shift; + rem += rsize; + } else { + rsize_calc = 0; + rem = nullptr; + } + } + assert( !(reinterpret_cast< uintptr_t >(rem) % sizeof(int)) ); + // write out + remainder = rem; + rsize = rsize_calc; + } + + + public: + + /** + * Base constructor. + * + * @param[in] _m The length of the offset array. + * @param[in] _n The length of the SPA. + * @param[in] max_threads The maximum number of threads. + * + * \note \a max_threads is a separate input since there might be a need to + * cap the maximum number of threads used based on some analytic + * performance model. Rather than putting such a performance model + * within this class, we make it an obligatory input parameter + * instead. + * + * \note It is always valid to pass config::OMP::threads(). + * + * \note This class \em will, however, cap the number of threads returned + * to \a _n. + */ + SPA_BufferMetaData( + const size_t _m, const size_t _n, + const size_t max_threads + ) : m( _m ), n( _n ), arrayShift( 0 ), stackShift( 0 ) { + #ifdef _DEBUG_REFERENCE_MATRIX + #pragma omp critical + std::cout << "\t\t\t computing padded buffer size for a SPA of length " + << n << " while leaving space for an additional offset buffer of length " + << std::max( m, n ) << "...\n"; + #endif + // compute bufferOffset + bufferOffset = (std::max( m, n ) + 1) * sizeof( NIT ); + + // compute value buffer size + const size_t valBufSize = n * sizeof( ValueType ); + + #ifdef _DEBUG_REFERENCE_MATRIX + std::cout << "\t\t\t\t bit-array size has byte-size " << + internal::Coordinates< reference >::arraySize( n ) << "\n"; + std::cout << "\t\t\t\t stack has byte-size " << + internal::Coordinates< reference >::stackSize( n ) << "\n"; + std::cout << "\t\t\t\t value buffer has byte-size " << valBufSize << "\n"; + #endif + + // compute paddedSPASize + paddedSPASize = + internal::Coordinates< reference >::arraySize( n ) + + internal::Coordinates< reference >::stackSize( n ) + + valBufSize; + size_t shift = + internal::Coordinates< reference >::arraySize( n ) % sizeof(int); + if( shift != 0 ) { + arrayShift = sizeof(int) - shift; + paddedSPASize += arrayShift; + } + shift = internal::Coordinates< reference >::stackSize( n ) % sizeof(int); + if( shift != 0 ) { + stackShift = sizeof(int) - shift; + paddedSPASize += stackShift; + } + shift = valBufSize % sizeof(int); + if( shift != 0 ) { + paddedSPASize += (sizeof(int) - shift); + } + + // pad bufferOffset + shift = bufferOffset % sizeof(int); + if( shift != 0 ) { + bufferOffset += (sizeof(int) - shift); + } + + // compute free buffer size + const size_t freeBufferSize = internal::getCurrentBufferSize< char >() - + bufferOffset; + + // compute max number of threads + nthreads = 1 + freeBufferSize / paddedSPASize; + #ifdef _DEBUG_REFERENCE_MATRIX + #pragma omp critical + std::cout << "\t\t\t free buffer size: " << freeBufferSize + << ", (padded) SPA size: " << paddedSPASize + << " -> supported #threads: " << nthreads << ". " + << " The shifts for the bit-array and the stack are " << arrayShift + << ", respectively, " << stackShift << "." + << "\n"; + #endif + // cap the final number of selected threads + if( nthreads > max_threads ) { + nthreads = max_threads; + } + if( nthreads > n ) { + nthreads = n; + } + } + + /** @returns The maximum number of supported threads during #grb::mxm */ + size_t threads() const noexcept { + return nthreads; + } + + /** + * Requests and returns a global buffer required for a thread-local SPA. + * + * @param[in] t The thread ID. Must be larger than 0. + * + * \note Thread 0 employs the SPA allocated with the output matrix. + * + * @returns Pointer into the global buffer starting at the area reserved for + * the SPA of thread \a t. + */ + char * getSPABuffers( size_t t ) const noexcept { + assert( t > 0 ); + (void) --t; + char * raw = internal::template getReferenceBuffer< char >( + bufferOffset + nthreads * paddedSPASize ); + assert( reinterpret_cast< uintptr_t >(raw) % sizeof(int) == 0 ); + raw += bufferOffset; + assert( reinterpret_cast< uintptr_t >(raw) % sizeof(int) == 0 ); + raw += t * paddedSPASize; + return raw; + } + + /** + * Retrieves the column offset buffer. + * + * @param[out] remainder Returns any remainder buffer beyond that of the row + * offset buffer. + * @param[out] rsize The remainder buffer size \a remainder points to. + * + * If \a remainder is not a nullptr then neither should \a rsize, + * and vice versa. + * + * Retrieving any remainder buffer is optional. The default is to not ask + * for them. + * + * \warning If all buffer memory is used for the column offsets, it may be + * that \a remainder equals nullptr and rsize + * zero. + * + * \warning This buffer is only guaranteed exclusive if only the retrieved + * column buffer is used. In particular, if also requesting (and + * using) SPA buffers, the remainder buffer area is shared with + * those SPA buffers, and data races are likely to occur. In other + * words: be very careful with any use of these remainder buffers. + * + * @returns The column offset buffer. + * + * \warning This buffer overlaps with the CRS offset buffer. The caller + * must ensure to only ever use one at a time. + */ + NIT * getColOffsetBuffer( + void * * const remainder = nullptr, + size_t * const rsize = nullptr + ) const noexcept { + NIT * const ret = internal::template getReferenceBuffer< NIT >( n + 1 ); + if( remainder != nullptr || rsize != nullptr ) { + assert( remainder != nullptr && rsize != nullptr ); + retrieveRemainderBuffer( (m + 1) * sizeof(NIT), *remainder, *rsize ); + } + return ret; + } + + /** + * Retrieves the row offset buffer. + * + * @param[out] remainder Returns any remainder buffer beyond that of the row + * offset buffer. + * @param[out] rsize The remainder buffer size \a remainder points to. + * + * If \a remainder is not a nullptr then neither should \a rsize, + * and vice versa. + * + * Retrieving any remainder buffer is optional. The default is to not ask + * for them. + * + * \warning If all buffer memory is used for the row offsets, it may be that + * \a remainder equals nullptr and rsize zero. + * + * \warning This buffer is only guaranteed exclusive if only the retrieved + * row buffer is used. In particular, if also requesting (and + * using) SPA buffers, the remainder buffer area is shared with + * those SPA buffers, and data races are likely to occur. In other + * words: be very careful with any use of these remainder buffers. + * + * @returns The row offset buffer. + * + * \warning This buffer overlaps with the CCS offset buffer. The caller + * must ensure to only ever use one at a time. + */ + NIT * getRowOffsetBuffer( + void * * const remainder = nullptr, + size_t * const rsize = nullptr + ) const noexcept { + NIT * const ret = internal::template getReferenceBuffer< NIT >( m + 1 ); + if( remainder != nullptr || rsize != nullptr ) { + assert( remainder != nullptr && rsize != nullptr ); + retrieveRemainderBuffer( (m + 1) * sizeof(NIT), *remainder, *rsize ); + } + return ret; + } + + /** + * Shifts a pointer into the global buffer by the bit-array size and its + * padding. + * + * @param[in,out] raw On input: an aligned pointer into the global buffer. + * On output: an aligned pointer past the bit-array + * position. + */ + void applyArrayShift( char * &raw ) const noexcept { + const size_t totalShift = + internal::Coordinates< reference >::arraySize( n ) + + arrayShift; + #ifdef _DEBUG_REFERENCE_MATRIX + std::cout << "\t\t\t shifting input pointer with " + << internal::Coordinates< reference >::arraySize( n ) << " + " + << arrayShift << " = " << totalShift << "bytes \n"; + #endif + raw += totalShift; + } + + /** + * Shifts a pointer into the global buffer by the stack size and its + * padding. + * + * @param[in,out] raw On input: an aligned pointer into the global buffer. + * On output: an aligned pointer past the stack position. + */ + void applyStackShift( char * &raw ) const noexcept { + const size_t totalShift = + internal::Coordinates< reference >::stackSize( n ) + + stackShift; + #ifdef _DEBUG_REFERENCE_MATRIX + std::cout << "\t\t\t shifting input pointer with " + << internal::Coordinates< reference >::arraySize( n ) << " + " + << stackShift << " = " << totalShift << "bytes \n"; + #endif + raw += totalShift; + } + + }; +#endif + + /** + * Retrieves the SPA buffers for the calling thread. + * + * \warning This function must be called from within an OpenMP parallel + * section. + * + * @param[out] arr Where the bit-array may be located. + * @param[out] buf Where the stack may be located. + * @param[out] valbuf Where the value buffer may be located. + * + * All above pointers are aligned on sizeof(int) bytes. + * + * @param[in] md Meta-data for global buffer management. + * @param[in] C The output matrix. + * + * One thread uses the buffers pre-allocated with the matrix \a C, thus + * ensuring at least one thread may perform the #grb::mxm. Any remainder + * threads can only help process the #grb::mxm if there is enough global + * buffer memory available. + * + * + * \note The global memory has size \f$ \Omega( \mathit{nz} ) \f$, which may + * be several factors (or even asymptotically greater than) + * \f$ \max\{ m, n \} \f$. + * + * \note In case the application stores multiple matrices, the global buffer + * may additionally be greater than the above note indicates if at least + * one of the other matrices is significantly (or asymptotically) larger + * than the one involved with the #grb::mxm. + */ + template< + typename OutputType, + typename RIT, typename CIT, typename NIT + > + void spa_ompPar_getBuffers( + char * &arr, char * &buf, OutputType * &valbuf, + const struct SPA_BufferMetaData< NIT, OutputType > &md, + Matrix< OutputType, reference, RIT, CIT, NIT > &C + ) { +#ifdef _H_GRB_REFERENCE_OMP_MATRIX + // other threads use the global buffer to create additional SPAs + { + const size_t t = config::OMP::current_thread_ID(); + #ifndef NDEBUG + const size_t T = config::OMP::current_threads(); + assert( t < T ); + #endif + if( t > 0 ) { + #ifdef _DEBUG_REFERENCE_MATRIX + #pragma omp critical + std::cout << "\t Thread " << t << " gets buffers from global buffer\n"; + #endif + char * rawBuffer = md.getSPABuffers( t ); + assert( reinterpret_cast< uintptr_t >(rawBuffer) % sizeof(int) == 0 ); + arr = rawBuffer; + #ifdef _DEBUG_REFERENCE_MATRIX + #pragma omp critical + #endif + md.applyArrayShift( rawBuffer ); + assert( reinterpret_cast< uintptr_t >(rawBuffer) % sizeof(int) == 0 ); + buf = rawBuffer; + #ifdef _DEBUG_REFERENCE_MATRIX + #pragma omp critical + #endif + md.applyStackShift( rawBuffer ); + assert( reinterpret_cast< uintptr_t >(rawBuffer) % sizeof(int) == 0 ); + assert( buf != arr ); + valbuf = reinterpret_cast< OutputType * >(rawBuffer); + assert( static_cast< void * >(valbuf) != static_cast< void * >(buf) ); + } else { + #ifdef _DEBUG_REFERENCE_MATRIX + #pragma omp critical + std::cout << "\t Thread " << t << " gets buffers from matrix storage\n"; + #endif + // one thread uses the standard matrix buffer + internal::getMatrixBuffers( arr, buf, valbuf, 1, C ); + } + #ifdef _DEBUG_REFERENCE_MATRIX + #pragma omp critical + { + std::cout << "\t Thread " << t << " has SPA array @ " + << static_cast< void * >( arr ) << " and SPA stack @ " + << static_cast< void * >( buf ) << " and SPA values @ " + << static_cast< void * >( valbuf ) << "\n"; + } + #endif + } +#else + #ifdef _DEBUG_REFERENCE_MATRIX + std::cout << "\t Reference backend gets buffers from global buffer\n"; + #endif + internal::getMatrixBuffers( arr, buf, valbuf, 1, C ); + (void) md; #endif + } } // end namespace grb::internal From 958aa6823e862a8483eb1c0c0f055dfe5f05fbe3 Mon Sep 17 00:00:00 2001 From: Albert-Jan Yzelman Date: Sat, 27 Sep 2025 10:33:41 +0200 Subject: [PATCH 34/39] Metabugfix: use heap for dynamic arrays, not the stack --- tests/unit/matrixSet.cpp | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/tests/unit/matrixSet.cpp b/tests/unit/matrixSet.cpp index 305f6f10d..da3d48f64 100644 --- a/tests/unit/matrixSet.cpp +++ b/tests/unit/matrixSet.cpp @@ -425,9 +425,16 @@ void grb_program( const size_t &n, grb::RC &rc ) { // - input will be an n by n matrix with each element on its diagonal and // superdiagonal set to its row index (meaning the entries on row 0 have // value 0). - size_t I_mask[ 2 * n - 1 ], J_mask[ 2 * n - 1 ]; - int mask_vals [ 2 * n - 1 ]; - int input_vals [ 2 * n - 1 ]; + size_t * const I_mask = new size_t[ 2 * n - 1 ]; + size_t * const J_mask = new size_t[ 2 * n - 1 ]; + int * const mask_vals = new int[ 2 * n - 1 ]; + int * const input_vals = new int[ 2 * n - 1 ]; + if( I_mask == nullptr || J_mask == nullptr || mask_vals == nullptr || + input_vals == nullptr + ) { + std::cerr << "\t initialisation FAILED\n"; + return; + } for( size_t k = 0; k < n; ++k ) { I_mask[ k ] = J_mask[ k ] = k; mask_vals[ k ] = 1; @@ -478,6 +485,10 @@ void grb_program( const size_t &n, grb::RC &rc ) { // postpone materialisation of inputVoid since it relies on unmasked grb::set // (which is itself unit-tested later) + delete [] I_mask; + delete [] J_mask; + delete [] mask_vals; + delete [] input_vals; std::cout << "\t test initialisation complete\n"; // check grb::set for non-voids From df2d41dece01aa467256d19db068834cc76982de Mon Sep 17 00:00:00 2001 From: Albert-Jan Yzelman Date: Sat, 27 Sep 2025 10:34:13 +0200 Subject: [PATCH 35/39] make SPA buffer retrieval compatible with void matrices --- include/graphblas/reference/matrix.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/graphblas/reference/matrix.hpp b/include/graphblas/reference/matrix.hpp index 9f7fa96c7..08227a673 100644 --- a/include/graphblas/reference/matrix.hpp +++ b/include/graphblas/reference/matrix.hpp @@ -2486,7 +2486,7 @@ namespace grb { bufferOffset = (std::max( m, n ) + 1) * sizeof( NIT ); // compute value buffer size - const size_t valBufSize = n * sizeof( ValueType ); + const size_t valBufSize = n * utils::SizeOf< ValueType >::value; #ifdef _DEBUG_REFERENCE_MATRIX std::cout << "\t\t\t\t bit-array size has byte-size " << From efe6f6a5f0a109927f36ab51bdffa8d7a8b14a52 Mon Sep 17 00:00:00 2001 From: Albert-Jan Yzelman Date: Sat, 27 Sep 2025 10:34:58 +0200 Subject: [PATCH 36/39] make SPA buffer retrieval compatible with void matrices --- include/graphblas/reference/io.hpp | 116 ++++++++++++++++++++++------- 1 file changed, 88 insertions(+), 28 deletions(-) diff --git a/include/graphblas/reference/io.hpp b/include/graphblas/reference/io.hpp index 6ca510598..b4af46eea 100644 --- a/include/graphblas/reference/io.hpp +++ b/include/graphblas/reference/io.hpp @@ -2097,37 +2097,89 @@ namespace grb { // go for implementation, preliminaries: size_t nzc = 0; - char * mask_arr = nullptr; - char * mask_buf = nullptr; - MaskType * mask_valbuf = nullptr; const auto &A_raw = internal::getCRS( A ); const auto &mask_raw = internal::getCRS( M ); - internal::Coordinates< reference > mask_coors; - internal::getMatrixBuffers( mask_arr, mask_buf, mask_valbuf, 1, M ); - mask_coors.set( mask_arr, false, mask_buf, ncols ); - // we now have one (guaranteed) SPA, which is mask_coors. We now are going to // check how many more SPAs ideally we would like (for reference_omp), and // then go about trying to get those. If, finally, we get just this one SPA, // we will go into this mostly-sequential code (essentially, big-Omega nrows): - for( size_t i = 0; i < nrows; ++i ) { - mask_coors.clear(); - for( auto k = mask_raw.col_start[ i ]; k < mask_raw.col_start[ i + 1 ]; ++k ) { - const auto k_col = mask_raw.row_index[ k ]; - if( utils::interpretMatrixMask< descr, MaskType >( true, mask_raw.getValues(), k ) ) { - mask_coors.assign( k_col ); +#ifdef _H_GRB_REFERENCE_OMP_IO + const size_t nnz_based_nthreads = std::max( config::OMP::threads(), + grb::nnz( A ) / config::CACHE_LINE_SIZE::value() ); + grb::internal::SPA_BufferMetaData< NIT1, OutputType > bufferMD( m, n, + nnz_based_nthreads ); + const size_t nthreads = bufferMD.threads(); + std::cout << "\t set( matrix, matrix, matrix ) will use " << nthreads + << " threads\n"; +#else + const size_t nthreads = 1; +#endif + if( nthreads == 1 ) { + char * arr = nullptr; + char * buf = nullptr; + OutputType * valbuf = nullptr; + internal::Coordinates< reference > coors; + internal::getMatrixBuffers( arr, buf, valbuf, 1, C ); + coors.set( arr, false, buf, ncols ); + for( size_t i = 0; i < nrows; ++i ) { + coors.clear(); + for( auto k = mask_raw.col_start[ i ]; k < mask_raw.col_start[ i + 1 ]; ++k ) { + const auto k_col = mask_raw.row_index[ k ]; + if( utils::interpretMatrixMask< descr, MaskType >( true, mask_raw.getValues(), k ) ) { + coors.assign( k_col ); + } } - } -#ifdef _H_GRB_REFERENCE_OMP_BLAS3 - #pragma omp parallel for reduction( +: nzc ) \ - schedule( dynamic, config::CACHE_LINE_SIZE::value() ) +#ifdef _H_GRB_REFERENCE_OMP_IO + #pragma omp parallel for reduction( +: nzc ) \ + schedule( dynamic, config::CACHE_LINE_SIZE::value() ) #endif - for( auto k = A_raw.col_start[ i ]; k < A_raw.col_start[ i + 1 ]; ++k ) { - const auto k_col = A_raw.row_index[ k ]; - if( mask_coors.assigned( k_col ) ) { - (void) nzc++; + for( auto k = A_raw.col_start[ i ]; k < A_raw.col_start[ i + 1 ]; ++k ) { + const auto k_col = A_raw.row_index[ k ]; + if( coors.assigned( k_col ) ) { + (void) ++nzc; + } + } + } + } else { +#ifdef _H_GRB_REFERENCE_OMP_IO + #pragma omp parallel num_threads( nthreads ) reduction( +: nzc ) + { + // get thread-local buffers + size_t local_nz = 0; + char * arr = nullptr; + char * buf = nullptr; + OutputType * valbuf = nullptr; + internal::spa_ompPar_getBuffers( arr, buf, valbuf, bufferMD, C ); + internal::Coordinates< reference > coors; + coors.set_seq( arr, false, buf, n ); + // follow dynamic schedule since we cannot predict sparsity structure + #pragma omp for schedule( dynamic, config::CACHE_LINE_SIZE::value() ) + for( size_t i = 0; i < nrows; ++i ) { + coors.clear(); + for( auto k = mask_raw.col_start[ i ]; k < mask_raw.col_start[ i + 1 ]; ++k ) { + const auto k_col = mask_raw.row_index[ k ]; + if( utils::interpretMatrixMask< descr, MaskType >( true, mask_raw.getValues(), k ) ) { + coors.assign( k_col ); + } + } + for( auto k = A_raw.col_start[ i ]; k < A_raw.col_start[ i + 1 ]; ++k ) { + const auto k_col = A_raw.row_index[ k ]; + if( coors.assigned( k_col ) ) { + (void) ++local_nz; + } + } } + nzc += local_nz; } +#else + const bool code_path_should_not_be_reached = false; + std::cerr << "\t logic error in grb::set( matrix, matrix, matrix ): " + << "code path should not reach here. Please submit a bug report\n"; + assert( code_path_should_not_be_reached ); + #ifdef NDEBUG + (void) code_path_should_not_be_reached; + #endif +#endif } // we now have a count. If we're in the resize phase that means we're done: @@ -2158,7 +2210,7 @@ namespace grb { getReferenceBuffer< typename config::NonzeroIndexType >( ncols + 1 ); CRS_raw.col_start[ 0 ] = 0; -#ifdef _H_GRB_REFERENCE_OMP_BLAS3 +#ifdef _H_GRB_REFERENCE_OMP_IO // TODO ALPify the below #pragma omp parallel for simd #endif @@ -2167,20 +2219,28 @@ namespace grb { C_col_index[ j ] = 0; } + //TODO: revise the below, WIP + char * arr = nullptr; + char * buf = nullptr; + MaskType * valbuf = nullptr; + internal::Coordinates< reference > coors; + internal::getMatrixBuffers( arr, buf, valbuf, 1, M ); + coors.set( arr, false, buf, ncols ); + // do counting sort, phase 1 -- also this loop should employ the same // parallelisation strategy during counting nzc = 0; for( size_t i = 0; i < nrows; ++i ) { - mask_coors.clear(); + coors.clear(); for( auto k = mask_raw.col_start[ i ]; k < mask_raw.col_start[ i + 1 ]; ++k ) { const auto k_col = mask_raw.row_index[ k ]; if( utils::interpretMask< descr, MaskType >( true, mask_raw.getValues(), k ) ) { - mask_coors.assign( k_col ); + coors.assign( k_col ); } } for( auto k = A_raw.col_start[ i ]; k < A_raw.col_start[ i + 1 ]; ++k ) { const auto k_col = A_raw.row_index[ k ]; - if( mask_coors.assigned( k_col ) ) { + if( coors.assigned( k_col ) ) { (void) nzc++; (void) (CCS_raw.col_start[ k_col + 1 ])++; } @@ -2199,18 +2259,18 @@ namespace grb { // the same (multiple-SPA) parallelisation strategy as above nzc = 0; for( size_t i = 0; i < nrows; ++i ) { - mask_coors.clear(); + coors.clear(); for( auto k = mask_raw.col_start[ i ]; k < mask_raw.col_start[ i + 1 ]; ++k ) { const auto k_col = mask_raw.row_index[ k ]; if( utils::interpretMatrixMask< descr, MaskType >( true, mask_raw.getValues(), k ) ) { - mask_coors.assign( k_col ); + coors.assign( k_col ); } } for( auto k = A_raw.col_start[ i ]; k < A_raw.col_start[ i + 1 ]; ++k ) { const auto k_col = A_raw.row_index[ k ]; - if( mask_coors.assigned( k_col ) ) { + if( coors.assigned( k_col ) ) { constexpr int zero = 0; CRS_raw.row_index[ nzc ] = k_col; CRS_raw.setValue( nzc, A_raw.getValue( k, zero ) ); From 7b1692586ab38482915c7d2586b548215125a150 Mon Sep 17 00:00:00 2001 From: Albert-Jan Yzelman Date: Sat, 27 Sep 2025 15:18:52 +0200 Subject: [PATCH 37/39] Always select at least one thread --- include/graphblas/reference/blas3.hpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/include/graphblas/reference/blas3.hpp b/include/graphblas/reference/blas3.hpp index 2a37f51a8..3cffb588a 100644 --- a/include/graphblas/reference/blas3.hpp +++ b/include/graphblas/reference/blas3.hpp @@ -789,6 +789,7 @@ namespace grb { // a basic analytic model based on the number of nonzeroes size_t max_threads = config::OMP::threads(); + assert( max_threads > 0 ); { size_t target_nnz = 0; if( phase == EXECUTE ) { @@ -800,7 +801,7 @@ namespace grb { const size_t nnz_based_nthreads = target_nnz / config::CACHE_LINE_SIZE::value(); if( nnz_based_nthreads < max_threads ) { - max_threads = nnz_based_nthreads; + max_threads = nnz_based_nthreads > 0 ? nnz_based_nthreads : 1; } #ifdef _DEBUG_REFERENCE_BLAS3 std::cout << "\t simple analytic model selects max threads of " From 6e2e38186efeb3da8acf410e75c2e50e33da40fc Mon Sep 17 00:00:00 2001 From: Albert-Jan Yzelman Date: Sat, 27 Sep 2025 15:19:27 +0200 Subject: [PATCH 38/39] Bugfix error in expected capacity calculation (metabug - did not trigger with NDEBUG) --- include/graphblas/reference/matrix.hpp | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/include/graphblas/reference/matrix.hpp b/include/graphblas/reference/matrix.hpp index 08227a673..84873a22c 100644 --- a/include/graphblas/reference/matrix.hpp +++ b/include/graphblas/reference/matrix.hpp @@ -1283,7 +1283,9 @@ namespace grb { "We hit here a configuration border case which the implementation does not " "handle at present. Please submit a bug report." ); - // compute and return + // compute and return the maximum of + // - row- and column-wise buffers. The added factor two is for padding + // - minimal buffer requirement for parallel buildMatrixUnique return std::max( (m + n + 2) * globalBufferUnitSize, #ifdef _H_GRB_REFERENCE_OMP_MATRIX config::OMP::threads() * config::CACHE_LINE_SIZE::value() * @@ -2533,6 +2535,7 @@ namespace grb { #pragma omp critical std::cout << "\t\t\t free buffer size: " << freeBufferSize << ", (padded) SPA size: " << paddedSPASize + << ", bufferOffset: " << bufferOffset << " -> supported #threads: " << nthreads << ". " << " The shifts for the bit-array and the stack are " << arrayShift << ", respectively, " << stackShift << "." @@ -2564,9 +2567,10 @@ namespace grb { */ char * getSPABuffers( size_t t ) const noexcept { assert( t > 0 ); + assert( nthreads > 1 ); (void) --t; char * raw = internal::template getReferenceBuffer< char >( - bufferOffset + nthreads * paddedSPASize ); + bufferOffset + (nthreads - 1) * paddedSPASize ); assert( reinterpret_cast< uintptr_t >(raw) % sizeof(int) == 0 ); raw += bufferOffset; assert( reinterpret_cast< uintptr_t >(raw) % sizeof(int) == 0 ); From a6ae18b896f447836584c0051ba01f0261be76cb Mon Sep 17 00:00:00 2001 From: "Albert-Jan N. Yzelman" Date: Tue, 14 Oct 2025 14:55:55 +0200 Subject: [PATCH 39/39] fix typo --- tests/unit/id.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/unit/id.cpp b/tests/unit/id.cpp index a7a7cdb0b..8711369ad 100644 --- a/tests/unit/id.cpp +++ b/tests/unit/id.cpp @@ -216,7 +216,7 @@ void grb_program2( const struct input &in, struct output &out ) { } /** - * Test for move assignement id cleanup. + * Test for move assignment id cleanup. * * Creating and performing move assignment on multiple new objects and check * for collisions.