@@ -274,6 +274,32 @@ def test_tile_load_aligned_offset_unaligned_size(test, device):
274274 assert_np_equal (output_array .numpy ()[TILE_WIDTH :, :], np .zeros ((remaining_height , TILE_M )))
275275
276276
277+ @wp .kernel
278+ def test_tile_load_stride_unaligned_kernel (input : wp .array2d (dtype = wp .float32 ), output : wp .array2d (dtype = wp .float32 )):
279+ tile = wp .tile_load (input , shape = (4 , 4 ))
280+ wp .tile_store (output , tile )
281+
282+
283+ # regression test for float4 aligned tiles that load from a source array with an incommensurate stride
284+ def test_tile_load_stride_unaligned (test , device ):
285+ DIM = 5
286+ input_np = np .eye (DIM ) * 2.0
287+ input_array = wp .array (input_np , dtype = wp .float32 , device = device )
288+ output_array = wp .zeros_like (input_array )
289+
290+ wp .launch_tiled (
291+ test_tile_load_stride_unaligned_kernel ,
292+ dim = (1 , 1 ),
293+ inputs = [input_array ],
294+ outputs = [output_array ],
295+ block_dim = TILE_DIM ,
296+ device = device ,
297+ )
298+
299+ input_np [DIM - 1 , DIM - 1 ] = 0.0
300+ assert_np_equal (output_array .numpy (), input_np )
301+
302+
277303# ----------------------------------------------------------------------------------------
278304
279305TILE_SIZE = 4
@@ -485,6 +511,7 @@ class TestTileLoad(unittest.TestCase):
485511 test_tile_load_aligned_offset_unaligned_size ,
486512 devices = devices ,
487513)
514+ add_function_test (TestTileLoad , "test_tile_load_stride_unaligned" , test_tile_load_stride_unaligned , devices = devices )
488515
489516add_function_test (TestTileLoad , "test_tile_extract_1d" , test_tile_extract (tile_extract_1d_kernel , 1 ), devices = devices )
490517add_function_test (TestTileLoad , "test_tile_extract_2d" , test_tile_extract (tile_extract_2d_kernel , 2 ), devices = devices )
0 commit comments