Skip to content

Commit 5357549

Browse files
authored
664 missing assertions for threaded execution (#284)
Benjamin (@byjtew) noticed, once upon a time, that should users manually modify the number of threads available to OpenMP _while_ the application is running, the application could crash without a clear error message. This issue recently resurfaced and this MR fixes at least some instances of this by implicitly checking if thread-counts remain the same whenever the sparse accumulator (SPA) of vectors or sparse matrices is retrieved. This check, however, has non-trivial overhead and is only active in debug mode. The end-user advice hence, as always, is to enable debug mode should their application crash for unclear reasons. This MR also includes a performance improvement to advancing iterators over ALP/GraphBLAS vectors, which was done in the context of this MR as otherwise the debug-mode unit tests took too much time. However, this performance improvement should also benefit performance-mode code as it holistically speeds up iteration. It also includes minor code style fixes and internal documentation improvements. The implementation of the check is constrained in that it assumes thread counts will not change between allocating SPA buffer data and assigning that buffer to a SPA. While the ALP internals guarantee that no such change occurs from within ALP, it is conceivable that such a run-time thread-count change could be concurrently initiated by a user thread, which in turn could be timed unfortunately so that this assumption is indeed violated. We consider this a rather adversarial case that would not be worth the implementation effort to guard against, but do note this caveat here in the commit notes. Thanks also to @GiovaGa for recently revisiting this MR and again to Benjamin for flagging the issue and providing the initial fix, now finally merged.
1 parent adfdd1e commit 5357549

File tree

2 files changed

+169
-17
lines changed

2 files changed

+169
-17
lines changed

include/graphblas/reference/coordinates.hpp

Lines changed: 146 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -59,9 +59,32 @@ namespace grb {
5959

6060
/**
6161
* This class encapsulates everything needed to store a sparse set of 1D
62-
* coordinates. Its use is internal via, e.g., grb::Vector< T, reference, C >.
63-
* All functions needed to rebuild or update sparsity information are
64-
* encapsulated here.
62+
* coordinates.
63+
*
64+
* Its use is internal via, e.g., grb::Vector< T, reference, C >. All
65+
* functions needed to rebuild or update sparsity information are encapsulated
66+
* here.
67+
*
68+
* An instance of this class should always be initialised using one of the
69+
* following functions:
70+
* -# set
71+
* -# set_seq
72+
* -# set_ompPar
73+
* -# setDense
74+
* The use of both the default constructor and that of setDense does not lead
75+
* to a fully initialised instance and may only be used in restricted use
76+
* cases.
77+
*
78+
* The coordinates are in essense traditional sparse accumulators. There are
79+
* three main arrays in which sparsity information is stored:
80+
* -# _assigned
81+
* -# _stack
82+
* -# _buffer
83+
*
84+
* In the case of shared-memory parallel backends, the size of the buffer
85+
* depends on the number of threads the instance needs to support. (Or
86+
* rather, this is the case for all presently-supported shared-memory parallel
87+
* backends).
6588
*/
6689
template<>
6790
class Coordinates< reference > {
@@ -104,6 +127,9 @@ namespace grb {
104127
/** Per-thread capacity for parallel stack updates. */
105128
size_t _buf;
106129

130+
/** Number of threads for which these coordinates have been initialised. */
131+
size_t _threads;
132+
107133
/**
108134
* Increments the number of nonzeroes in the current thread-local stack.
109135
*
@@ -163,6 +189,7 @@ namespace grb {
163189
_n = 0;
164190
_cap = 0;
165191
_buf = 0;
192+
_threads = 0;
166193
return;
167194
}
168195

@@ -182,6 +209,11 @@ namespace grb {
182209
// initialise
183210
_n = 0;
184211
_cap = dim;
212+
#ifdef _H_GRB_REFERENCE_OMP_COORDINATES
213+
_threads = config::OMP::threads();
214+
#else
215+
_threads = 1;
216+
#endif
185217
}
186218

187219
/**
@@ -370,7 +402,7 @@ namespace grb {
370402
/** Base constructor. Creates an empty coordinates list of dimension 0. */
371403
inline Coordinates() noexcept :
372404
_assigned( nullptr ), _stack( nullptr ), _buffer( nullptr ),
373-
_n( 0 ), _cap( 0 ), _buf( 0 )
405+
_n( 0 ), _cap( 0 ), _buf( 0 ), _threads( 0 )
374406
{}
375407

376408
/**
@@ -379,12 +411,12 @@ namespace grb {
379411
*/
380412
inline Coordinates( Coordinates &&x ) noexcept :
381413
_assigned( x._assigned ), _stack( x._stack ), _buffer( x._buffer ),
382-
_n( x._n ), _cap( x._cap ), _buf( x._buf )
414+
_n( x._n ), _cap( x._cap ), _buf( x._buf ), _threads( x._threads )
383415
{
384416
x._assigned = nullptr;
385417
x._stack = nullptr;
386418
x._buffer = nullptr;
387-
x._n = x._cap = x._buf = 0;
419+
x._n = x._cap = x._buf = x._threads = 0;
388420
}
389421

390422
/**
@@ -396,7 +428,7 @@ namespace grb {
396428
*/
397429
inline Coordinates( const Coordinates &x ) noexcept :
398430
_assigned( x._assigned ), _stack( x._stack ), _buffer( x._buffer ),
399-
_n( x._n ), _cap( x._cap ), _buf( x._buf )
431+
_n( x._n ), _cap( x._cap ), _buf( x._buf ), _threads( x._threads )
400432
{
401433
// self-assignment is a programming error
402434
assert( this != &x );
@@ -426,9 +458,10 @@ namespace grb {
426458
_n = x._n;
427459
_cap = x._cap;
428460
_buf = x._buf;
461+
_threads = x._threads;
429462
x._assigned = NULL;
430463
x._stack = x._buffer = NULL;
431-
x._n = x._cap = x._buf = 0;
464+
x._n = x._cap = x._buf = x._threads = 0;
432465
return *this;
433466
}
434467

@@ -440,6 +473,105 @@ namespace grb {
440473
// blocks are not managed by this class)
441474
}
442475

476+
/**
477+
* Checks whether a new OpenMP parallel section returns the same number of
478+
* threads that was given during initialisation of this instance.
479+
*
480+
* This variant automatically determines if it is in a sequential or
481+
* (OpenMP) parallel context.
482+
*
483+
* This is intended exclusively for use within a debug mode, as the test
484+
* has the significant overhead of opening up an OpenMP parallel section.
485+
*
486+
* @returns <tt>true</tt> if the number of threads matches that during
487+
* setup;
488+
* @returns <tt>false</tt> otherwise.
489+
*
490+
* An assertion will trip in debug mode instead of returning <tt>false</tt>,
491+
* however.
492+
*/
493+
bool checkNumThreads() const noexcept {
494+
if( omp_in_parallel() ) {
495+
return checkNumThreadsPar();
496+
} else {
497+
return checkNumThreadsSeq();
498+
}
499+
}
500+
501+
/**
502+
* Checks whether a new OpenMP parallel section returns the same number of
503+
* threads that was given during initialisation of this instance.
504+
*
505+
* This variant should be called from a sequential context.
506+
*
507+
* This is intended exclusively for use within a debug mode, as the test
508+
* has the significant overhead of opening up an OpenMP parallel section.
509+
*
510+
* @returns <tt>true</tt> if the number of threads matches that during
511+
* setup;
512+
* @returns <tt>false</tt> otherwise.
513+
*
514+
* An assertion will trip in debug mode instead of returning <tt>false</tt>,
515+
* however.
516+
*/
517+
bool checkNumThreadsSeq() const noexcept {
518+
// guard against (valid) use of non-initialised coordinates
519+
if( _threads == 0 ) {
520+
return true;
521+
}
522+
// then, get actual number of threads now active and compare
523+
const size_t actualThreads = config::OMP::threads();
524+
if( actualThreads != _threads ) {
525+
std::cerr << "\t Error: coordinates instance was set for " << _threads
526+
<< " threads, however, current OpenMP parallel region reports "
527+
<< actualThreads << " threads instead!\n";
528+
#ifndef NDEBUG
529+
const bool num_omp_threads_has_changed = false;
530+
assert( num_omp_threads_has_changed );
531+
#endif
532+
return false;
533+
}
534+
return true;
535+
}
536+
537+
/**
538+
* Checks whether a new OpenMP parallel section returns the same number of
539+
* threads that was given during initialisation of this instance.
540+
*
541+
* This variant should be called from a sequential context.
542+
*
543+
* This is intended for use within a debug mode, but could conceivably be
544+
* used defensively in performance mode as well (there is no significant
545+
* performance overhead for this variant).
546+
*
547+
* @returns <tt>true</tt> if the number of threads matches that during
548+
* setup;
549+
* @returns <tt>false</tt> otherwise.
550+
*
551+
* An assertion will trip in debug mode instead of returning <tt>false</tt>,
552+
* however.
553+
*/
554+
bool checkNumThreadsPar() const noexcept {
555+
// guard against (valid) use of non-initialised coordinates
556+
if( _threads == 0 ) {
557+
return true;
558+
}
559+
// then, get actual number of threads now active and compare
560+
const size_t actualThreads =
561+
static_cast< size_t >( omp_get_num_threads() );
562+
if( actualThreads != _threads ) {
563+
std::cerr << "\t Error: coordinates instance was set for " << _threads
564+
<< " threads, however, current OpenMP parallel region reports "
565+
<< actualThreads << " threads instead!\n";
566+
#ifndef NDEBUG
567+
const bool num_omp_threads_has_changed = false;
568+
assert( num_omp_threads_has_changed );
569+
#endif
570+
return false;
571+
}
572+
return true;
573+
}
574+
443575
/**
444576
* @returns An empty thread-local stack for new nonzeroes.
445577
*/
@@ -457,8 +589,9 @@ namespace grb {
457589
}
458590

459591
/**
460-
* Sets the data structure. A call to this function sets the number of
461-
* coordinates to zero.
592+
* Sets the data structure.
593+
*
594+
* A call to this function sets the number of coordinates to zero.
462595
*
463596
* @param[in] arr Pointer to an array of size #arraySize. This array is
464597
* is managed by a container outside this class (and thus
@@ -470,6 +603,8 @@ namespace grb {
470603
* managed by a container outside this class (and thus will
471604
* not be freed on destruction of this instance).
472605
* @param[in] dim Size (dimension) of this vector, in number of elements.
606+
* @param[in] threads The number of threads that \a buf has been constructed
607+
* for.
473608
*
474609
* The memory area \a raw will be reset to reflect an empty coordinate set.
475610
*
@@ -565,6 +700,7 @@ namespace grb {
565700
_n = dim;
566701
_cap = dim;
567702
_buf = 0;
703+
_threads = 0;
568704
}
569705

570706
/**

include/graphblas/reference/vector.hpp

Lines changed: 23 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -653,6 +653,9 @@ namespace grb {
653653
/** The maximum value of #position. */
654654
size_t max;
655655

656+
/** The size of the vector (not necessarily equal to \a max). */
657+
size_t n;
658+
656659
/** The local process ID. */
657660
const size_t s;
658661

@@ -689,10 +692,14 @@ namespace grb {
689692
// if not, go to the next valid value:
690693
if( container->_coordinates.isEmpty() ) {
691694
max = 0;
692-
} else if( container->_coordinates.isDense() ) {
693-
max = container->_coordinates.size();
695+
n = 0;
694696
} else {
695-
max = container->_coordinates.nonzeroes();
697+
n = container->_coordinates.size();
698+
if( container->_coordinates.isDense() ) {
699+
max = n;
700+
} else {
701+
max = container->_coordinates.nonzeroes();
702+
}
696703
}
697704
if( position < max ) {
698705
setValue();
@@ -712,7 +719,7 @@ namespace grb {
712719
* \note If the \a other iterator is not derived from the same container
713720
* as this iterator, the result is undefined.
714721
*/
715-
bool equal( const ConstIterator & other ) const noexcept {
722+
bool equal( const ConstIterator &other ) const noexcept {
716723
return other.position == position;
717724
}
718725

@@ -729,7 +736,7 @@ namespace grb {
729736
}
730737
assert( container->_coordinates.assigned( index ) );
731738
const size_t global_index = ActiveDistribution::local_index_to_global(
732-
index, size( *container ), s, P );
739+
index, n, s, P );
733740
#ifdef _DEBUG
734741
std::cout << "\t ConstIterator at process " << s << " / " << P
735742
<< " translated index " << index << " to " << global_index << "\n";
@@ -742,7 +749,7 @@ namespace grb {
742749

743750
/** Default constructor. */
744751
ConstIterator() noexcept :
745-
container( nullptr ), position( 0 ), max( 0 ),
752+
container( nullptr ), position( 0 ), max( 0 ), n( 0 ),
746753
s( grb::spmd< spmd_backend >::pid() ),
747754
P( grb::spmd< spmd_backend >::nprocs() )
748755
{}
@@ -751,7 +758,7 @@ namespace grb {
751758
ConstIterator( const ConstIterator &other ) noexcept :
752759
container( other.container ),
753760
value( other.value ), position( other.position ),
754-
max( other.max ),
761+
max( other.max ), n( other.n ),
755762
s( other.s ), P( other.P )
756763
{}
757764

@@ -762,6 +769,7 @@ namespace grb {
762769
std::swap( value, other.value );
763770
std::swap( position, other.position );
764771
std::swap( max, other.max );
772+
std::swap( n, other.n );
765773
}
766774

767775
/** Copy assignment. */
@@ -770,6 +778,7 @@ namespace grb {
770778
value = other.value;
771779
position = other.position;
772780
max = other.max;
781+
n = other.n;
773782
assert( s == other.s );
774783
assert( P == other.P );
775784
return *this;
@@ -781,6 +790,7 @@ namespace grb {
781790
std::swap( value, other.value );
782791
std::swap( position, other.position );
783792
std::swap( max, other.max );
793+
std::swap( n, other.n );
784794
assert( s == other.s );
785795
assert( P == other.P );
786796
return *this;
@@ -1308,13 +1318,19 @@ namespace grb {
13081318

13091319
template< typename D, typename C >
13101320
inline C & getCoordinates( Vector< D, reference, C > &x ) noexcept {
1321+
#if defined(_H_GRB_REFERENCE_OMP_VECTOR) && !defined(NDEBUG)
1322+
(void) x._coordinates.checkNumThreads();
1323+
#endif
13111324
return x._coordinates;
13121325
}
13131326

13141327
template< typename D, typename C >
13151328
inline const C & getCoordinates(
13161329
const Vector< D, reference, C > &x
13171330
) noexcept {
1331+
#if defined(_H_GRB_REFERENCE_OMP_VECTOR) && !defined(NDEBUG)
1332+
(void) x._coordinates.checkNumThreads();
1333+
#endif
13181334
return x._coordinates;
13191335
}
13201336

0 commit comments

Comments
 (0)