[water] Support read/write lowering with MemRefType memory operands

tyb0807 · tyb0807 · commit 4cb62c3b8a54 · 2026-01-11T01:19:14.000+01:00
After ResolveDistributedAllocations converts WaveTensorType to MemRefType,
read/write ops need to determine dimension ordering for correct lowering.

With IndexExprsSpecified as a precondition for LowerWaveToMLIR, read/write
ops are guaranteed to have index expressions. Since DictAttr is internally
an ArrayRef&lt;NamedAttribute&gt;, the index dictionary keys are ordered and can
be used directly for dimension ordering.

Signed-off-by: tyb0807 &lt;sontuan.vu@amd.com&gt;
diff --git a/water/lib/Dialect/Wave/Transforms/LowerReadWriteOps.cpp b/water/lib/Dialect/Wave/Transforms/LowerReadWriteOps.cpp
@@ -432,15 +432,8 @@ static FailureOr<MemAccessInfo>
 createMemoryIndicesAndMask(ConversionPatternRewriter &rewriter,
                            const TypeConverter *typeConverter, OpTy op,
                            Type memoryTypeArg, VectorType vectorType) {
-  auto memoryType = dyn_cast<wave::WaveTensorType>(memoryTypeArg);
-  if (!memoryType)
-    return rewriter.notifyMatchFailure(
-        op, "lowering with MemRefType memory not yet implemented");
-
   int64_t elementsPerThread = vectorType.getNumElements();
 
-  ArrayRef<wave::WaveSymbolAttr> orderedSyms = memoryType.getShape();
-
   wave::WaveReadWriteBoundsAttr boundsDict = op.getBoundsAttr();
   wave::WaveHyperparameterAttr hyper =
       static_cast<const wave::WaveTypeConverter &>(*typeConverter)
@@ -459,6 +452,15 @@ createMemoryIndicesAndMask(ConversionPatternRewriter &rewriter,
   assert(llvm::hasSingleElement(indexArr.getValue()) &&
          "'index' must be an array with exactly one dictionary");
   DictionaryAttr indexDict = cast<DictionaryAttr>(indexArr[0]);
+
+  // Get ordered symbols from the index dictionary keys.
+  // DictAttr is internally an ArrayRef<NamedAttribute>, so keys are ordered.
+  SmallVector<wave::WaveSymbolAttr> orderedSymsStorage;
+  orderedSymsStorage.reserve(indexDict.size());
+  for (NamedAttribute namedAttr : indexDict)
+    orderedSymsStorage.push_back(wave::WaveSymbolAttr::get(
+        op.getContext(), namedAttr.getName().strref()));
+  ArrayRef<wave::WaveSymbolAttr> orderedSyms = orderedSymsStorage;
   std::optional<int64_t> vectorizedDim =
       wave::getPositionOfVectorizedDim(orderedSyms, indexDict, hyper);
 
diff --git a/water/lib/Dialect/Wave/Transforms/ResolveDistributedAllocations.cpp b/water/lib/Dialect/Wave/Transforms/ResolveDistributedAllocations.cpp
@@ -55,6 +55,7 @@ struct ResolveDistributedAllocations
         return;
       }
 
+      // Update the result type in place.
       allocateOp.getResult().setType(memrefType);
     });
     return result;
diff --git a/water/test/Dialect/Wave/lower-wave-to-mlir.mlir b/water/test/Dialect/Wave/lower-wave-to-mlir.mlir
@@ -867,3 +867,29 @@ module attributes {wave.normal_form = #wave.normal_form<full_types,memory_only_t
     return
   }
 }
+
+// -----
+
+// Test read/write lowering with MemRefType memory operand.
+// This simulates the state after ResolveDistributedAllocations pass has run.
+// Dimension ordering is derived from the index dictionary keys (DictAttr is ordered).
+module attributes {wave.normal_form = #wave.normal_form<full_types,index_exprs,memory_only_types,resolved_allocations>} {
+  // CHECK-LABEL: @lower_read_write_memref
+  func.func @lower_read_write_memref(%mem: memref<64x64xf16, #gpu.address_space<workgroup>>)
+      attributes {wave.hyperparameters = #wave.hyperparameters<{BLOCK_M = 64, BLOCK_N = 64}>} {
+    // CHECK: %[[READ:.*]] = vector.load %{{.*}}[%{{.*}}, %{{.*}}] : memref<64x64xf16, #gpu.address_space<workgroup>>, vector<8xf16>
+    %0 = wave.read %mem index [{
+        BLOCK_M : [#wave.index_symbol<T0>, #wave.symbol<"BLOCK_M">] -> (T0 mod 64, 1, 64),
+        BLOCK_N : [#wave.index_symbol<T1>, #wave.symbol<"BLOCK_N">] -> (T1 * 8, 8, 1)
+      }]
+      : (memref<64x64xf16, #gpu.address_space<workgroup>>) -> vector<8xf16>
+
+    // CHECK: vector.store %[[READ]], %{{.*}}[%{{.*}}, %{{.*}}] : memref<64x64xf16, #gpu.address_space<workgroup>>, vector<8xf16>
+    wave.write %0, %mem index [{
+        BLOCK_M : [#wave.index_symbol<T0>, #wave.symbol<"BLOCK_M">] -> (T0 mod 64, 1, 64),
+        BLOCK_N : [#wave.index_symbol<T1>, #wave.symbol<"BLOCK_N">] -> (T1 * 8, 8, 1)
+      }]
+      : vector<8xf16>, memref<64x64xf16, #gpu.address_space<workgroup>>
+    return
+  }
+}

Original file line number	Diff line number	Diff line change
`@@ -55,6 +55,7 @@ struct ResolveDistributedAllocations`
`55`	`55`	`return;`
`56`	`56`	`}`
`57`	`57`
	`58`	`+ // Update the result type in place.`
`58`	`59`	`allocateOp.getResult().setType(memrefType);`
`59`	`60`	`});`
`60`	`61`	`return result;`