@@ -584,6 +584,14 @@ tsk_strerror_internal(int err)
584584 ret = "Must have at least one allele when specifying an allele map. "
585585 "(TSK_ERR_ZERO_ALLELES)" ;
586586 break ;
587+ case TSK_ERR_BAD_ALLELE_LENGTH :
588+ ret = "Alleles used when decoding alignments must have length one. "
589+ "(TSK_ERR_BAD_ALLELE_LENGTH)" ;
590+ break ;
591+ case TSK_ERR_MISSING_CHAR_COLLISION :
592+ ret = "Alleles used when decoding alignments must not match the missing "
593+ "data character. (TSK_ERR_MISSING_CHAR_COLLISION)" ;
594+ break ;
587595
588596 /* Distance metric errors */
589597 case TSK_ERR_SAMPLE_SIZE_MISMATCH :
@@ -1260,16 +1268,16 @@ tsk_avl_tree_int_ordered_nodes(const tsk_avl_tree_int_t *self, tsk_avl_node_int_
12601268}
12611269
12621270// Bit Array implementation. Allows us to store unsigned integers in a compact manner.
1263- // Currently implemented as an array of 32-bit unsigned integers for ease of counting .
1271+ // Currently implemented as an array of 32-bit unsigned integers.
12641272
12651273int
1266- tsk_bit_array_init ( tsk_bit_array_t * self , tsk_size_t num_bits , tsk_size_t length )
1274+ tsk_bitset_init ( tsk_bitset_t * self , tsk_size_t num_bits , tsk_size_t length )
12671275{
12681276 int ret = 0 ;
12691277
1270- self -> size = (num_bits >> TSK_BIT_ARRAY_CHUNK )
1271- + ( num_bits % TSK_BIT_ARRAY_NUM_BITS ? 1 : 0 ) ;
1272- self -> data = tsk_calloc (self -> size * length , sizeof (* self -> data ));
1278+ self -> row_len = (num_bits / TSK_BITSET_BITS ) + ( num_bits % TSK_BITSET_BITS ? 1 : 0 );
1279+ self -> len = length ;
1280+ self -> data = tsk_calloc (self -> row_len * length , sizeof (* self -> data ));
12731281 if (self -> data == NULL ) {
12741282 ret = tsk_trace_error (TSK_ERR_NO_MEMORY );
12751283 goto out ;
@@ -1278,96 +1286,111 @@ tsk_bit_array_init(tsk_bit_array_t *self, tsk_size_t num_bits, tsk_size_t length
12781286 return ret ;
12791287}
12801288
1281- void
1282- tsk_bit_array_get_row (const tsk_bit_array_t * self , tsk_size_t row , tsk_bit_array_t * out )
1283- {
1284- out -> size = self -> size ;
1285- out -> data = self -> data + (row * self -> size );
1286- }
1289+ #define BITSET_DATA_ROW (bs , row ) ((bs)->data + (row) * (bs)->row_len)
12871290
12881291void
1289- tsk_bit_array_intersect (
1290- const tsk_bit_array_t * self , const tsk_bit_array_t * other , tsk_bit_array_t * out )
1292+ tsk_bitset_intersect ( const tsk_bitset_t * self , tsk_size_t self_row ,
1293+ const tsk_bitset_t * other , tsk_size_t other_row , tsk_bitset_t * out )
12911294{
1292- for (tsk_size_t i = 0 ; i < self -> size ; i ++ ) {
1293- out -> data [i ] = self -> data [i ] & other -> data [i ];
1295+ const tsk_bitset_val_t * restrict self_d = BITSET_DATA_ROW (self , self_row );
1296+ const tsk_bitset_val_t * restrict other_d = BITSET_DATA_ROW (other , other_row );
1297+ tsk_bitset_val_t * restrict out_d = out -> data ;
1298+ for (tsk_size_t i = 0 ; i < self -> row_len ; i ++ ) {
1299+ out_d [i ] = self_d [i ] & other_d [i ];
12941300 }
12951301}
12961302
12971303void
1298- tsk_bit_array_subtract (tsk_bit_array_t * self , const tsk_bit_array_t * other )
1304+ tsk_bitset_subtract (tsk_bitset_t * self , tsk_size_t self_row , const tsk_bitset_t * other ,
1305+ tsk_size_t other_row )
12991306{
1300- for (tsk_size_t i = 0 ; i < self -> size ; i ++ ) {
1301- self -> data [i ] &= ~(other -> data [i ]);
1307+ tsk_bitset_val_t * restrict self_d = BITSET_DATA_ROW (self , self_row );
1308+ const tsk_bitset_val_t * restrict other_d = BITSET_DATA_ROW (other , other_row );
1309+ for (tsk_size_t i = 0 ; i < self -> row_len ; i ++ ) {
1310+ self_d [i ] &= ~(other_d [i ]);
13021311 }
13031312}
13041313
13051314void
1306- tsk_bit_array_add (tsk_bit_array_t * self , const tsk_bit_array_t * other )
1315+ tsk_bitset_union (tsk_bitset_t * self , tsk_size_t self_row , const tsk_bitset_t * other ,
1316+ tsk_size_t other_row )
13071317{
1308- for (tsk_size_t i = 0 ; i < self -> size ; i ++ ) {
1309- self -> data [i ] |= other -> data [i ];
1318+ tsk_bitset_val_t * restrict self_d = BITSET_DATA_ROW (self , self_row );
1319+ const tsk_bitset_val_t * restrict other_d = BITSET_DATA_ROW (other , other_row );
1320+ for (tsk_size_t i = 0 ; i < self -> row_len ; i ++ ) {
1321+ self_d [i ] |= other_d [i ];
13101322 }
13111323}
13121324
13131325void
1314- tsk_bit_array_add_bit ( tsk_bit_array_t * self , const tsk_bit_array_value_t bit )
1326+ tsk_bitset_set_bit ( tsk_bitset_t * self , tsk_size_t row , const tsk_bitset_val_t bit )
13151327{
1316- tsk_bit_array_value_t i = bit >> TSK_BIT_ARRAY_CHUNK ;
1317- self -> data [i ] |= (tsk_bit_array_value_t ) 1 << (bit - (TSK_BIT_ARRAY_NUM_BITS * i ));
1328+ tsk_bitset_val_t i = (bit / TSK_BITSET_BITS );
1329+ * (BITSET_DATA_ROW (self , row ) + i ) |= (tsk_bitset_val_t ) 1
1330+ << (bit - (TSK_BITSET_BITS * i ));
13181331}
13191332
13201333bool
1321- tsk_bit_array_contains (const tsk_bit_array_t * self , const tsk_bit_array_value_t bit )
1334+ tsk_bitset_contains (const tsk_bitset_t * self , tsk_size_t row , const tsk_bitset_val_t bit )
13221335{
1323- tsk_bit_array_value_t i = bit >> TSK_BIT_ARRAY_CHUNK ;
1324- return self -> data [ i ]
1325- & ((tsk_bit_array_value_t ) 1 << (bit - (TSK_BIT_ARRAY_NUM_BITS * i )));
1336+ tsk_bitset_val_t i = ( bit / TSK_BITSET_BITS ) ;
1337+ return * ( BITSET_DATA_ROW ( self , row ) + i )
1338+ & ((tsk_bitset_val_t ) 1 << (bit - (TSK_BITSET_BITS * i )));
13261339}
13271340
1328- tsk_size_t
1329- tsk_bit_array_count ( const tsk_bit_array_t * self )
1341+ static inline uint32_t
1342+ popcount ( tsk_bitset_val_t v )
13301343{
1331- // Utilizes 12 operations per bit array . NB this only works on 32 bit integers.
1344+ // Utilizes 12 operations per chunk . NB this only works on 32 bit integers.
13321345 // Taken from:
13331346 // https://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
13341347 // There's a nice breakdown of this algorithm here:
13351348 // https://stackoverflow.com/a/109025
1336- // Could probably do better with explicit SIMD (instead of SWAR), but not as
1337- // portable: https://arxiv.org/pdf/1611.07612.pdf
13381349 //
1339- // There is one solution to explore further, which uses __builtin_popcountll.
1340- // This option is relatively simple, but requires a 64 bit bit array and also
1341- // involves some compiler flag plumbing (-mpopcnt)
1350+ // The gcc/clang compiler flag will -mpopcnt will convert this code to a
1351+ // popcnt instruction (most if not all modern CPUs will support this). The
1352+ // popcnt instruction will yield some speed improvements, which depend on
1353+ // the tree sequence.
1354+ //
1355+ // NB: 32bit counting is typically faster than 64bit counting for this task.
1356+ // (at least on x86-64)
1357+
1358+ v = v - ((v >> 1 ) & 0x55555555 );
1359+ v = (v & 0x33333333 ) + ((v >> 2 ) & 0x33333333 );
1360+ return (((v + (v >> 4 )) & 0xF0F0F0F ) * 0x1010101 ) >> 24 ;
1361+ }
13421362
1343- tsk_bit_array_value_t tmp ;
1344- tsk_size_t i , count = 0 ;
1363+ tsk_size_t
1364+ tsk_bitset_count (const tsk_bitset_t * self , tsk_size_t row )
1365+ {
1366+ tsk_size_t i = 0 ;
1367+ tsk_size_t count = 0 ;
1368+ const tsk_bitset_val_t * restrict self_d = BITSET_DATA_ROW (self , row );
13451369
1346- for (i = 0 ; i < self -> size ; i ++ ) {
1347- tmp = self -> data [i ] - ((self -> data [i ] >> 1 ) & 0x55555555 );
1348- tmp = (tmp & 0x33333333 ) + ((tmp >> 2 ) & 0x33333333 );
1349- count += (((tmp + (tmp >> 4 )) & 0xF0F0F0F ) * 0x1010101 ) >> 24 ;
1370+ for (i = 0 ; i < self -> row_len ; i ++ ) {
1371+ count += popcount (self_d [i ]);
13501372 }
13511373 return count ;
13521374}
13531375
13541376void
1355- tsk_bit_array_get_items (
1356- const tsk_bit_array_t * self , tsk_id_t * items , tsk_size_t * n_items )
1377+ tsk_bitset_get_items (
1378+ const tsk_bitset_t * self , tsk_size_t row , tsk_id_t * items , tsk_size_t * n_items )
13571379{
13581380 // Get the items stored in the row of a bitset.
1359- // Uses a de Bruijn sequence lookup table to determine the lowest bit set. See the
1360- // wikipedia article for more info: https://w.wiki/BYiF
1381+ // Uses a de Bruijn sequence lookup table to determine the lowest bit set.
1382+ // See the wikipedia article for more info: https://w.wiki/BYiF
13611383
13621384 tsk_size_t i , n , off ;
1363- tsk_bit_array_value_t v , lsb ; // least significant bit
1385+ tsk_bitset_val_t v , lsb ; // least significant bit
13641386 static const tsk_id_t lookup [32 ] = { 0 , 1 , 28 , 2 , 29 , 14 , 24 , 3 , 30 , 22 , 20 , 15 , 25 ,
13651387 17 , 4 , 8 , 31 , 27 , 13 , 23 , 21 , 19 , 16 , 7 , 26 , 12 , 18 , 6 , 11 , 5 , 10 , 9 };
1388+ const tsk_bitset_val_t * restrict self_d = BITSET_DATA_ROW (self , row );
13661389
13671390 n = 0 ;
1368- for (i = 0 ; i < self -> size ; i ++ ) {
1369- v = self -> data [i ];
1370- off = i * (( tsk_size_t ) TSK_BIT_ARRAY_NUM_BITS ) ;
1391+ for (i = 0 ; i < self -> row_len ; i ++ ) {
1392+ v = self_d [i ];
1393+ off = i * TSK_BITSET_BITS ;
13711394 if (v == 0 ) {
13721395 continue ;
13731396 }
@@ -1381,7 +1404,7 @@ tsk_bit_array_get_items(
13811404}
13821405
13831406void
1384- tsk_bit_array_free ( tsk_bit_array_t * self )
1407+ tsk_bitset_free ( tsk_bitset_t * self )
13851408{
13861409 tsk_safe_free (self -> data );
13871410}
0 commit comments