Skip to content

Commit 5090c3e

Browse files
committed
feat: working signed semi-global
Signed-off-by: Seth Stadick <[email protected]>
1 parent 5be7699 commit 5090c3e

File tree

15 files changed

+2433
-2164
lines changed

15 files changed

+2433
-2164
lines changed

CHANGELOG.md

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,13 @@
11

22
# Next
33

4+
# v1.3.0 (07-21-2025)
5+
6+
- Switch to using signed ints instead of unsigned for semi-global
7+
- Fix alignment score check used to decided whether or not to do the reverse alignment for semi-global and local alignment
8+
- Allow specifying simd width via a flag at comple time
9+
- Fix / updates the bench_aligner for current stable Mojo
10+
411
# v1.2.1 (06-25-2025)
512

613
- Fixed bug in logging module related to update from Mojo 24.3 to 24.4 replacing `write_args`.

benchmarking/ish_bench_aligner.mojo

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -188,7 +188,7 @@ struct ByteFastaRecord:
188188
seq_bytes = scoring_matrix.convert_ascii_to_encoding(seq_bytes)
189189
var rev = List[UInt8](capacity=len(seq_bytes))
190190
for s in reversed(seq_bytes):
191-
rev.append(s[])
191+
rev.append(s)
192192
self.name = name^
193193
self.seq = seq_bytes^
194194
self.rev = rev^
@@ -329,26 +329,26 @@ struct BenchmarkResults(ToDelimited):
329329
var gcups = 0.0
330330

331331
for result in results:
332-
total_query_seqs += result[].total_query_seqs
333-
total_target_seqs += result[].total_target_seqs
334-
runtime_secs += result[].runtime_secs
335-
cells_updated += result[].cells_updated
336-
gcups += result[].gcups
332+
total_query_seqs += result.total_query_seqs
333+
total_target_seqs += result.total_target_seqs
334+
runtime_secs += result.runtime_secs
335+
cells_updated += result.cells_updated
336+
gcups += result.gcups
337337

338-
if query_len != result[].query_len:
338+
if query_len != result.query_len:
339339
# TODO: may want to change this if we want to process multiple queries in one go.
340340
raise "Mismatching query len"
341-
if matrix != result[].matrix:
341+
if matrix != result.matrix:
342342
raise "Mismatching matrix"
343-
if gap_open != result[].gap_open:
343+
if gap_open != result.gap_open:
344344
raise "Mismatching gap open"
345-
if gap_extend != result[].gap_extend:
345+
if gap_extend != result.gap_extend:
346346
raise "Mismatching gap extend"
347-
if u8_width != result[].u8_width:
347+
if u8_width != result.u8_width:
348348
raise "Mismatching u8 width"
349-
if u16_width != result[].u16_width:
349+
if u16_width != result.u16_width:
350350
raise "Mismatching u16 width"
351-
if score_size != result[].score_size:
351+
if score_size != result.score_size:
352352
raise "Mismatching score size"
353353

354354
return Self(
@@ -518,7 +518,7 @@ fn bench_striped_local(
518518
)
519519
for q in queries:
520520
profiles.append(
521-
Profiles[SIMD_U8_WIDTH, SIMD_U16_WIDTH](q[], matrix, score_size)
521+
Profiles[SIMD_U8_WIDTH, SIMD_U16_WIDTH](q, matrix, score_size)
522522
)
523523
var prep_end = perf_counter()
524524
print("Setup Time:", prep_end - prep_start, file=stderr)
@@ -644,7 +644,7 @@ fn bench_striped_semi_global(
644644
for q in queries:
645645
profiles.append(
646646
SemiGlobalProfiles[SIMD_U8_WIDTH, SIMD_U16_WIDTH](
647-
q[], matrix, score_size
647+
q, matrix, score_size
648648
)
649649
)
650650
var prep_end = perf_counter()

ishlib/gpu/searcher_device.mojo

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -157,7 +157,9 @@ struct SearcherDevice[func_type: AnyTrivialRegType, //, func: func_type]:
157157
max_devices: Int,
158158
) raises -> List[Self]:
159159
var ret = List[Self]()
160-
for i in range(0, min(DeviceContext.number_of_devices(), max_devices)):
160+
for i in range(
161+
0, min(DeviceContext.number_of_devices(), min(1, max_devices))
162+
):
161163
var device = DeviceContext(i)
162164
# Triple check it's a gpu
163165
if device.api() == "cuda" or device.api() == "hip":

ishlib/matcher/__init__.mojo

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,10 +16,16 @@ fn simd_width_selector[dtype: DType]() -> Int:
1616
constrained[sizeof[Scalar[dtype]]() <= 16, "dytpe size too large."]()
1717
alias target = env_get_string["ISH_SIMD_TARGET", "none"]().lower()
1818
alias SSE_U8_SIZE = 16
19+
alias AVX_256_U8_SIZE = 32
20+
alias AVX_512_U8_SIZE = 64
1921

2022
@parameter
21-
if target == "baseline":
23+
if target == "baseline" or target == "sse":
2224
return SSE_U8_SIZE // sizeof[Scalar[dtype]]()
25+
elif target == "avx256":
26+
return AVX_256_U8_SIZE // sizeof[Scalar[dtype]]()
27+
elif target == "avx512":
28+
return AVX_512_U8_SIZE // sizeof[Scalar[dtype]]()
2329
else:
2430
return simdwidthof[dtype]()
2531

ishlib/matcher/alignment/__init__.mojo

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@ struct AlignedMemory[dtype: DType, width: Int, alignment: Int](
5959
if zero_mem:
6060
memset_zero(self.ptr, self.length)
6161

62+
@always_inline
6263
fn __getitem__[
6364
I: Indexer
6465
](ref self, offset: I) -> ref [self] SIMD[dtype, width]:
@@ -78,9 +79,11 @@ struct AlignedMemory[dtype: DType, width: Int, alignment: Int](
7879
fn __del__(owned self):
7980
self.ptr.free()
8081

82+
@always_inline
8183
fn __len__(read self) -> Int:
8284
return self.length
8385

86+
@always_inline
8487
fn as_span(
8588
ref self,
8689
) -> Span[SIMD[self.dtype, self.width], __origin_of(self)]:

ishlib/matcher/alignment/local_aln/striped.mojo

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -152,7 +152,7 @@ fn ssw_align[
152152
gap_extension_penalty: UInt8 = 1,
153153
return_only_alignment_end: Bool = False,
154154
mask_length: Int32 = 15, # for second best score
155-
score_cutoff: Int32 = 0,
155+
score_cutoff: Float32 = 0.0,
156156
) -> Optional[Alignment]:
157157
# Find the alignment scores and ending positions
158158
var bests: AlignmentResult
@@ -212,7 +212,7 @@ fn ssw_align[
212212
Logger.warn("Failed to provide a valid query profile")
213213
return None
214214

215-
if bests.best.score <= score_cutoff:
215+
if Float32(bests.best.score) <= score_cutoff:
216216
Logger.debug("Worse than cutoff")
217217
return None
218218

@@ -294,7 +294,9 @@ fn sw[
294294
p_vecs.init_columns(len(reference))
295295
var max_score = UInt8(0).cast[dt]()
296296
var end_query: Int32 = query_len - 1
297-
var end_reference: Int32 = -1 # 0 based best alignment ending point; initialized as isn't aligned -1
297+
var end_reference: Int32 = (
298+
-1
299+
) # 0 based best alignment ending point; initialized as isn't aligned -1
298300
var segment_length = p_vecs.segment_length
299301

300302
# Note:

0 commit comments

Comments
 (0)