Skip to content

Commit 27ba1a3

Browse files
committed
add detailed tests & README for samples
1 parent b6b175b commit 27ba1a3

File tree

16 files changed

+323
-41
lines changed

16 files changed

+323
-41
lines changed

lib/Conversion/LlvmToNeura/LlvmToNeuraPass.cpp

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
#include "mlir/Pass/Pass.h"
1010
#include "mlir/Transforms/GreedyPatternRewriteDriver.h"
1111
#include "Conversion/ConversionPasses.h"
12+
#include "llvm/Support/raw_ostream.h"
1213

1314
namespace mlir {
1415
namespace neura {
@@ -62,6 +63,26 @@ struct LlvmFAddToNeuraFAdd : public OpRewritePattern<mlir::LLVM::FAddOp> {
6263
}
6364
};
6465

66+
struct LlvmFSubToNeuraFSub : public OpRewritePattern<mlir::LLVM::FSubOp> {
67+
using OpRewritePattern::OpRewritePattern;
68+
69+
LogicalResult matchAndRewrite(mlir::LLVM::FSubOp op,
70+
PatternRewriter &rewriter) const override {
71+
Value lhs = op->getOperand(0);
72+
Value rhs = op.getOperand(1);
73+
Type result_type = op->getResult(0).getType();
74+
75+
// Only matches scalar float.
76+
if (!mlir::isa<FloatType>(result_type)){
77+
return failure();
78+
}
79+
80+
// Optional predicate: default to 'none'
81+
rewriter.replaceOpWithNewOp<neura::FSubOp>(op, result_type, lhs, rhs, Value());
82+
return success();
83+
}
84+
};
85+
6586
struct LlvmOrToNeuraOr : public OpRewritePattern<mlir::LLVM::OrOp> {
6687
using OpRewritePattern::OpRewritePattern;
6788

@@ -316,6 +337,7 @@ struct LowerLlvmToNeuraPass
316337
patterns.add<LlvmBrToNeuraBr>(&getContext());
317338
patterns.add<LlvmReturnToNeuraReturn>(&getContext());
318339
patterns.add<FuncReturnToNeuraReturn>(&getContext());
340+
patterns.add<LlvmFSubToNeuraFSub>(&getContext());
319341

320342
FrozenRewritePatternSet frozen(std::move(patterns));
321343

lib/Conversion/LlvmToNeura/LlvmToNeuraPatterns.td

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,9 @@ include "mlir/Dialect/LLVMIR/LLVMOps.td"
44
include "NeuraDialect/NeuraOps.td"
55

66
// Floating point binary operations.
7-
def : Pat<
8-
(LLVM_FSubOp $lhs, $rhs, $_fastmath),
9-
(Neura_FSubOp $lhs, $rhs)
10-
>;
7+
// Deprecated Pattern: Because we need the predicate bit to be set to null initially
8+
// def : Pat<
9+
// (LLVM_FSubOp $lhs, $rhs, $_fastmath),
10+
// (Neura_FSubOp $lhs, $rhs)
11+
// >;
1112

test/README.md

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
# Tests for Neura
2+
3+
The structure of the files in this folder is as follows:
4+
```
5+
.
6+
├── affine2neura
7+
│ └── bert
8+
├── arith2neura
9+
│ ├── add.mlir
10+
│ └── Output
11+
├── c2llvm2mlir
12+
│ ├── kernel.cpp
13+
│ ├── Output
14+
│ └── test.mlir
15+
├── lit.cfg
16+
├── lit.cfg.in
17+
├── neura
18+
│ ├── arith_add.mlir
19+
│ ├── ctrl
20+
│ ├── fadd_fadd.mlir
21+
│ ├── for_loop
22+
│ ├── interpreter
23+
│ ├── llvm_add.mlir
24+
│ ├── llvm_sub.mlir
25+
│ └── Output
26+
├── Output
27+
│ └── test.mlir.script
28+
├── README.md
29+
├── samples
30+
│ ├── bert
31+
│ └── lenet
32+
└── test.mlir
33+
```
34+
35+
All of the above content can be divided into three categories
36+
37+
## 1 Conversion Test
38+
We need to convert other dialects to our `neura` dialect for compilation optimization. In order to verify the correctness of conversions from other dialects to `nerua` dialect, we need to provide the appropriate test for a conversion pass from a dialect to `nerua` dialect.
39+
40+
For now, we have:
41+
`affine2neura`: tests provided for `--lower-affine-to-neura` [To be provided]
42+
`arith2neura`: tests provided for `--lower-arith-to-neura`
43+
`c2llvm2mlir`: tests provided for `--lower-llvm-to-neura`
44+
45+
## 2 Neura Compiler Test
46+
Tests for individual passes/pass pipelines at the `neura` dialect level.
47+
48+
## 3 Samples
49+
A collection of real-world applications for generating unit small tests.
50+
51+
For now, [BERT](https://github.com/codertimo/BERT-pytorch) and [LENET](https://github.com/kuangliu/pytorch-cifar/blob/master/models/lenet.py) are included.
52+
53+
We generate the `linalg` dialect of these models via [Torch MLIR](https://github.com/llvm/torch-mlir). which is then lowered to `affine` dialect for further lowering.
54+
55+
Due to the data dependencies between loops in models, we are now unable to automatically extract each of these SINGLE loops from the model IR for individual tests.
56+
57+
But we can manually collect some small unit tests from these sample IRs. For example, you can write `c++` code of a loop from BERT by mimicing the its corresponding `affine.for` operations, then use [Polygeist](https://github.com/llvm/Polygeist) to convert these `c++` code into `affine` mlir for further lowering. And that's how we generated tests in `affine2neura/bert`.

test/affine2neura/bert/bert_node0/bert_node0.mlir

Lines changed: 24 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
// RUN: mlir-opt %s --lower-affine --convert-scf-to-cf --convert-cf-to-llvm -o %t-llvm.mlir
2-
// RUN: mlir-neura-opt %t-llvm.mlir --assign-accelerator --lower-arith-to-neura --lower-llvm-to-neura | FileCheck %s
2+
// RUN: mlir-neura-opt %t-llvm.mlir --assign-accelerator --lower-arith-to-neura | FileCheck %s
33

44
module attributes {} {
55
func.func @_Z10bert_node0PA128_KiPA128_b(%arg0: memref<?x128xi32>, %arg1: memref<?x128xi8>) attributes {} {
@@ -14,8 +14,26 @@ module attributes {} {
1414
}
1515
}
1616

17-
// CHECK-LABEL: func.func @_Z10bert_node0PA128_KiPA128_b
18-
// CHECK-NOT: arith.
19-
// CHECK-NOT: affine.
20-
// CHECK-NOT: llvm.
21-
17+
// CHECK: func.func @_Z10bert_node0PA128_KiPA128_b(%arg0: memref<?x128xi32>, %arg1: memref<?x128xi8>) attributes {accelerator = "neura"} {
18+
// CHECK-NEXT: %0 = "neura.constant"() <{value = 1 : index}> : () -> index
19+
// CHECK-NEXT: %1 = "neura.constant"() <{value = 128 : index}> : () -> index
20+
// CHECK-NEXT: %2 = "neura.constant"() <{value = 0 : i32}> : () -> i32
21+
// CHECK-NEXT: %3 = "neura.constant"() <{value = 0 : index}> : () -> index
22+
// CHECK-NEXT: %4 = builtin.unrealized_conversion_cast %3 : index to i64
23+
// CHECK-NEXT: llvm.br ^bb1(%4 : i64)
24+
// CHECK-NEXT: ^bb1(%5: i64): // 2 preds: ^bb0, ^bb2
25+
// CHECK-NEXT: %6 = builtin.unrealized_conversion_cast %5 : i64 to index
26+
// CHECK-NEXT: %7 = "neura.icmp"(%6, %1) <{cmpType = "slt"}> : (index, index) -> i1
27+
// CHECK-NEXT: llvm.cond_br %7, ^bb2, ^bb3
28+
// CHECK-NEXT: ^bb2: // pred: ^bb1
29+
// CHECK-NEXT: %8 = memref.load %arg0[%3, %6] : memref<?x128xi32>
30+
// CHECK-NEXT: %9 = "neura.icmp"(%8, %2) <{cmpType = "sgt"}> : (i32, i32) -> i1
31+
// CHECK-NEXT: %10 = "neura.cast"(%9) <{cast_type = "extui"}> : (i1) -> i8
32+
// CHECK-NEXT: memref.store %10, %arg1[%3, %6] : memref<?x128xi8>
33+
// CHECK-NEXT: %11 = "neura.add"(%6, %0) : (index, index) -> index
34+
// CHECK-NEXT: %12 = builtin.unrealized_conversion_cast %11 : index to i64
35+
// CHECK-NEXT: llvm.br ^bb1(%12 : i64)
36+
// CHECK-NEXT: ^bb3: // pred: ^bb1
37+
// CHECK-NEXT: return
38+
// CHECK-NEXT: }
39+
// CHECK-NEXT: }
Lines changed: 31 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
// RUN: mlir-opt %s --lower-affine --convert-scf-to-cf --convert-cf-to-llvm -o %t-llvm.mlir
2-
// RUN: mlir-neura-opt %t-llvm.mlir --assign-accelerator --lower-arith-to-neura --lower-llvm-to-neura | FileCheck %s
2+
// RUN: mlir-neura-opt %t-llvm.mlir --assign-accelerator --lower-arith-to-neura | FileCheck %s
33
module attributes {} {
44
func.func @_Z10bert_node1PA1_A1_A1_A1_A128_bPA1_A128_S1_(%arg0: memref<?x1x1x1x1x128xi8>, %arg1: memref<?x1x128x1x1x128xi8>) attributes {} {
55
affine.for %arg2 = 0 to 128 {
@@ -12,7 +12,33 @@ module attributes {} {
1212
}
1313
}
1414

15-
// CHECK-LABEL: func.func @_Z10bert_node1PA1_A1_A1_A1_A128_bPA1_A128_S1_
16-
// CHECK-NOT: arith.
17-
// CHECK-NOT: affine.
18-
// CHECK-NOT: llvm.
15+
// CHECK: func.func @_Z10bert_node1PA1_A1_A1_A1_A128_bPA1_A128_S1_(%arg0: memref<?x1x1x1x1x128xi8>, %arg1: memref<?x1x128x1x1x128xi8>) attributes {accelerator = "neura"} {
16+
// CHECK-NEXT: %0 = "neura.constant"() <{value = 1 : index}> : () -> index
17+
// CHECK-NEXT: %1 = "neura.constant"() <{value = 128 : index}> : () -> index
18+
// CHECK-NEXT: %2 = "neura.constant"() <{value = 0 : index}> : () -> index
19+
// CHECK-NEXT: %3 = builtin.unrealized_conversion_cast %2 : index to i64
20+
// CHECK-NEXT: llvm.br ^bb1(%3 : i64)
21+
// CHECK-NEXT: ^bb1(%4: i64): // 2 preds: ^bb0, ^bb5
22+
// CHECK-NEXT: %5 = builtin.unrealized_conversion_cast %4 : i64 to index
23+
// CHECK-NEXT: %6 = "neura.icmp"(%5, %1) <{cmpType = "slt"}> : (index, index) -> i1
24+
// CHECK-NEXT: llvm.cond_br %6, ^bb2, ^bb6
25+
// CHECK-NEXT: ^bb2: // pred: ^bb1
26+
// CHECK-NEXT: %7 = builtin.unrealized_conversion_cast %2 : index to i64
27+
// CHECK-NEXT: llvm.br ^bb3(%7 : i64)
28+
// CHECK-NEXT: ^bb3(%8: i64): // 2 preds: ^bb2, ^bb4
29+
// CHECK-NEXT: %9 = builtin.unrealized_conversion_cast %8 : i64 to index
30+
// CHECK-NEXT: %10 = "neura.icmp"(%9, %1) <{cmpType = "slt"}> : (index, index) -> i1
31+
// CHECK-NEXT: llvm.cond_br %10, ^bb4, ^bb5
32+
// CHECK-NEXT: ^bb4: // pred: ^bb3
33+
// CHECK-NEXT: %11 = memref.load %arg0[%2, %2, %2, %2, %2, %9] : memref<?x1x1x1x1x128xi8>
34+
// CHECK-NEXT: memref.store %11, %arg1[%2, %2, %5, %2, %2, %9] : memref<?x1x128x1x1x128xi8>
35+
// CHECK-NEXT: %12 = "neura.add"(%9, %0) : (index, index) -> index
36+
// CHECK-NEXT: %13 = builtin.unrealized_conversion_cast %12 : index to i64
37+
// CHECK-NEXT: llvm.br ^bb3(%13 : i64)
38+
// CHECK-NEXT: ^bb5: // pred: ^bb3
39+
// CHECK-NEXT: %14 = "neura.add"(%5, %0) : (index, index) -> index
40+
// CHECK-NEXT: %15 = builtin.unrealized_conversion_cast %14 : index to i64
41+
// CHECK-NEXT: llvm.br ^bb1(%15 : i64)
42+
// CHECK-NEXT: ^bb6: // pred: ^bb1
43+
// CHECK-NEXT: return
44+
// CHECK-NEXT: }

test/affine2neura/bert/bert_node2/bert_node2.mlir

Lines changed: 50 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
// RUN: mlir-opt %s --lower-affine --convert-scf-to-cf --convert-cf-to-llvm -o %t-llvm.mlir
2-
// RUN: mlir-neura-opt %t-llvm.mlir --assign-accelerator --lower-arith-to-neura --lower-llvm-to-neura | FileCheck %s
2+
// RUN: mlir-neura-opt %t-llvm.mlir --assign-accelerator --lower-arith-to-neura | FileCheck %s
33
module attributes {} {
44
func.func @_Z10bert_node2PA128_KiPA768_KfPA128_A768_f(%arg0: memref<?x128xi32>, %arg1: memref<?x768xf32>, %arg2: memref<?x128x768xf32>) attributes {} {
55
%false = arith.constant false
@@ -27,7 +27,52 @@ module attributes {} {
2727
}
2828
}
2929

30-
// CHECK-LABEL: func.func @_Z10bert_node2PA128_KiPA768_KfPA128_A768_f
31-
// CHECK-NOT: arith.
32-
// CHECK-NOT: affine.
33-
// CHECK-NOT: llvm.
30+
// CHECK: func.func @_Z10bert_node2PA128_KiPA768_KfPA128_A768_f(%arg0: memref<?x128xi32>, %arg1: memref<?x768xf32>, %arg2: memref<?x128x768xf32>) attributes {accelerator = "neura"} {
31+
// CHECK-NEXT: %0 = "neura.constant"() <{value = 768 : index}> : () -> index
32+
// CHECK-NEXT: %1 = "neura.constant"() <{value = 1 : index}> : () -> index
33+
// CHECK-NEXT: %2 = "neura.constant"() <{value = 128 : index}> : () -> index
34+
// CHECK-NEXT: %3 = "neura.constant"() <{value = false}> : () -> i1
35+
// CHECK-NEXT: %4 = "neura.constant"() <{value = 30521 : i32}> : () -> i32
36+
// CHECK-NEXT: %5 = "neura.constant"() <{value = 0 : i32}> : () -> i32
37+
// CHECK-NEXT: %6 = "neura.constant"() <{value = 30522 : i32}> : () -> i32
38+
// CHECK-NEXT: %7 = "neura.constant"() <{value = 0 : index}> : () -> index
39+
// CHECK-NEXT: %8 = builtin.unrealized_conversion_cast %7 : index to i64
40+
// CHECK-NEXT: llvm.br ^bb1(%8 : i64)
41+
// CHECK-NEXT: ^bb1(%9: i64): // 2 preds: ^bb0, ^bb9
42+
// CHECK-NEXT: %10 = builtin.unrealized_conversion_cast %9 : i64 to index
43+
// CHECK-NEXT: %11 = "neura.icmp"(%10, %2) <{cmpType = "slt"}> : (index, index) -> i1
44+
// CHECK-NEXT: llvm.cond_br %11, ^bb2, ^bb10
45+
// CHECK-NEXT: ^bb2: // pred: ^bb1
46+
// CHECK-NEXT: %12 = builtin.unrealized_conversion_cast %7 : index to i64
47+
// CHECK-NEXT: llvm.br ^bb3(%12 : i64)
48+
// CHECK-NEXT: ^bb3(%13: i64): // 2 preds: ^bb2, ^bb8
49+
// CHECK-NEXT: %14 = builtin.unrealized_conversion_cast %13 : i64 to index
50+
// CHECK-NEXT: %15 = "neura.icmp"(%14, %0) <{cmpType = "slt"}> : (index, index) -> i1
51+
// CHECK-NEXT: llvm.cond_br %15, ^bb4, ^bb9
52+
// CHECK-NEXT: ^bb4: // pred: ^bb3
53+
// CHECK-NEXT: %16 = memref.load %arg0[%7, %10] : memref<?x128xi32>
54+
// CHECK-NEXT: %17 = "neura.icmp"(%16, %6) <{cmpType = "sge"}> : (i32, i32) -> i1
55+
// CHECK-NEXT: %18 = "neura.sel"(%4, %16, %17) : (i32, i32, i1) -> i32
56+
// CHECK-NEXT: llvm.cond_br %17, ^bb5, ^bb6
57+
// CHECK-NEXT: ^bb5: // pred: ^bb4
58+
// CHECK-NEXT: llvm.br ^bb7(%3 : i1)
59+
// CHECK-NEXT: ^bb6: // pred: ^bb4
60+
// CHECK-NEXT: %19 = "neura.icmp"(%16, %5) <{cmpType = "slt"}> : (i32, i32) -> i1
61+
// CHECK-NEXT: llvm.br ^bb7(%19 : i1)
62+
// CHECK-NEXT: ^bb7(%20: i1): // 2 preds: ^bb5, ^bb6
63+
// CHECK-NEXT: llvm.br ^bb8
64+
// CHECK-NEXT: ^bb8: // pred: ^bb7
65+
// CHECK-NEXT: %21 = "neura.sel"(%5, %18, %20) : (i32, i32, i1) -> i32
66+
// CHECK-NEXT: %22 = "neura.cast"(%21) <{cast_type = "indexCast"}> : (i32) -> index
67+
// CHECK-NEXT: %23 = memref.load %arg1[%22, %14] : memref<?x768xf32>
68+
// CHECK-NEXT: memref.store %23, %arg2[%7, %10, %14] : memref<?x128x768xf32>
69+
// CHECK-NEXT: %24 = "neura.add"(%14, %1) : (index, index) -> index
70+
// CHECK-NEXT: %25 = builtin.unrealized_conversion_cast %24 : index to i64
71+
// CHECK-NEXT: llvm.br ^bb3(%25 : i64)
72+
// CHECK-NEXT: ^bb9: // pred: ^bb3
73+
// CHECK-NEXT: %26 = "neura.add"(%10, %1) : (index, index) -> index
74+
// CHECK-NEXT: %27 = builtin.unrealized_conversion_cast %26 : index to i64
75+
// CHECK-NEXT: llvm.br ^bb1(%27 : i64)
76+
// CHECK-NEXT: ^bb10: // pred: ^bb1
77+
// CHECK-NEXT: return
78+
// CHECK-NEXT: }

test/affine2neura/bert/bert_node28/bert_node28.mlir

Lines changed: 46 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
// RUN: mlir-opt %s --lower-affine --convert-scf-to-cf --convert-cf-to-llvm -o %t-llvm.mlir
2-
// RUN: mlir-neura-opt %t-llvm.mlir --assign-accelerator --lower-arith-to-neura --lower-llvm-to-neura | FileCheck %s
2+
// RUN: mlir-neura-opt %t-llvm.mlir --assign-accelerator --lower-arith-to-neura | FileCheck %s
33
module attributes {} {
44
func.func @_Z11bert_node28PA128_A768_KfPA768_S0_PA128_A768_f(%arg0: memref<?x128x768xf32>, %arg1: memref<?x768x768xf32>, %arg2: memref<?x128x768xf32>) attributes {} {
55
affine.for %arg3 = 0 to 128 {
@@ -17,7 +17,48 @@ module attributes {} {
1717
return
1818
}
1919
}
20-
// CHECK-LABEL: func.func @_Z11bert_node28PA128_A768_KfPA768_S0_PA128_A768_f
21-
// CHECK-NOT: arith.
22-
// CHECK-NOT: affine.
23-
// CHECK-NOT: llvm.
20+
// CHECK: func.func @_Z11bert_node28PA128_A768_KfPA768_S0_PA128_A768_f(%arg0: memref<?x128x768xf32>, %arg1: memref<?x768x768xf32>, %arg2: memref<?x128x768xf32>) attributes {accelerator = "neura"} {
21+
// CHECK-NEXT: %0 = "neura.constant"() <{value = 768 : index}> : () -> index
22+
// CHECK-NEXT: %1 = "neura.constant"() <{value = 1 : index}> : () -> index
23+
// CHECK-NEXT: %2 = "neura.constant"() <{value = 128 : index}> : () -> index
24+
// CHECK-NEXT: %3 = "neura.constant"() <{value = 0 : index}> : () -> index
25+
// CHECK-NEXT: %4 = builtin.unrealized_conversion_cast %3 : index to i64
26+
// CHECK-NEXT: llvm.br ^bb1(%4 : i64)
27+
// CHECK-NEXT: ^bb1(%5: i64): // 2 preds: ^bb0, ^bb8
28+
// CHECK-NEXT: %6 = builtin.unrealized_conversion_cast %5 : i64 to index
29+
// CHECK-NEXT: %7 = "neura.icmp"(%6, %2) <{cmpType = "slt"}> : (index, index) -> i1
30+
// CHECK-NEXT: llvm.cond_br %7, ^bb2, ^bb9
31+
// CHECK-NEXT: ^bb2: // pred: ^bb1
32+
// CHECK-NEXT: %8 = builtin.unrealized_conversion_cast %3 : index to i64
33+
// CHECK-NEXT: llvm.br ^bb3(%8 : i64)
34+
// CHECK-NEXT: ^bb3(%9: i64): // 2 preds: ^bb2, ^bb7
35+
// CHECK-NEXT: %10 = builtin.unrealized_conversion_cast %9 : i64 to index
36+
// CHECK-NEXT: %11 = "neura.icmp"(%10, %0) <{cmpType = "slt"}> : (index, index) -> i1
37+
// CHECK-NEXT: llvm.cond_br %11, ^bb4, ^bb8
38+
// CHECK-NEXT: ^bb4: // pred: ^bb3
39+
// CHECK-NEXT: %12 = builtin.unrealized_conversion_cast %3 : index to i64
40+
// CHECK-NEXT: llvm.br ^bb5(%12 : i64)
41+
// CHECK-NEXT: ^bb5(%13: i64): // 2 preds: ^bb4, ^bb6
42+
// CHECK-NEXT: %14 = builtin.unrealized_conversion_cast %13 : i64 to index
43+
// CHECK-NEXT: %15 = "neura.icmp"(%14, %0) <{cmpType = "slt"}> : (index, index) -> i1
44+
// CHECK-NEXT: llvm.cond_br %15, ^bb6, ^bb7
45+
// CHECK-NEXT: ^bb6: // pred: ^bb5
46+
// CHECK-NEXT: %16 = memref.load %arg0[%3, %6, %14] : memref<?x128x768xf32>
47+
// CHECK-NEXT: %17 = memref.load %arg1[%3, %14, %10] : memref<?x768x768xf32>
48+
// CHECK-NEXT: %18 = memref.load %arg2[%3, %6, %10] : memref<?x128x768xf32>
49+
// CHECK-NEXT: %19 = "neura.fmul"(%16, %17) : (f32, f32) -> f32
50+
// CHECK-NEXT: %20 = "neura.fadd"(%18, %19) : (f32, f32) -> f32
51+
// CHECK-NEXT: memref.store %20, %arg2[%3, %6, %10] : memref<?x128x768xf32>
52+
// CHECK-NEXT: %21 = "neura.add"(%14, %1) : (index, index) -> index
53+
// CHECK-NEXT: %22 = builtin.unrealized_conversion_cast %21 : index to i64
54+
// CHECK-NEXT: llvm.br ^bb5(%22 : i64)
55+
// CHECK-NEXT: ^bb7: // pred: ^bb5
56+
// CHECK-NEXT: %23 = "neura.add"(%10, %1) : (index, index) -> index
57+
// CHECK-NEXT: %24 = builtin.unrealized_conversion_cast %23 : index to i64
58+
// CHECK-NEXT: llvm.br ^bb3(%24 : i64)
59+
// CHECK-NEXT: ^bb8: // pred: ^bb3
60+
// CHECK-NEXT: %25 = "neura.add"(%6, %1) : (index, index) -> index
61+
// CHECK-NEXT: %26 = builtin.unrealized_conversion_cast %25 : index to i64
62+
// CHECK-NEXT: llvm.br ^bb1(%26 : i64)
63+
// CHECK-NEXT: ^bb9: // pred: ^bb1
64+
// CHECK-NEXT: return

test/affine2neura/bert/bert_node3/bert_node3.mlir

Lines changed: 33 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
// RUN: mlir-opt %s --lower-affine --convert-scf-to-cf --convert-cf-to-llvm -o %t-llvm.mlir
2-
// RUN: mlir-neura-opt %t-llvm.mlir --assign-accelerator --lower-arith-to-neura --lower-llvm-to-neura | FileCheck %s
2+
// RUN: mlir-neura-opt %t-llvm.mlir --assign-accelerator --lower-arith-to-neura | FileCheck %s
33
module attributes {} {
44
func.func @_Z10bert_node3PA128_A768_KfS2_PA128_A768_f(%arg0: memref<?x128x768xf32>, %arg1: memref<?x128x768xf32>, %arg2: memref<?x128x768xf32>) attributes {} {
55
affine.for %arg3 = 0 to 128 {
@@ -14,7 +14,35 @@ module attributes {} {
1414
}
1515
}
1616

17-
// CHECK-LABEL: func.func @_Z10bert_node3PA128_A768_KfS2_PA128_A768_f
18-
// CHECK-NOT: arith.
19-
// CHECK-NOT: affine.
20-
// CHECK-NOT: llvm.
17+
// CHECK: func.func @_Z10bert_node3PA128_A768_KfS2_PA128_A768_f(%arg0: memref<?x128x768xf32>, %arg1: memref<?x128x768xf32>, %arg2: memref<?x128x768xf32>) attributes {accelerator = "neura"} {
18+
// CHECK-NEXT: %0 = "neura.constant"() <{value = 768 : index}> : () -> index
19+
// CHECK-NEXT: %1 = "neura.constant"() <{value = 1 : index}> : () -> index
20+
// CHECK-NEXT: %2 = "neura.constant"() <{value = 128 : index}> : () -> index
21+
// CHECK-NEXT: %3 = "neura.constant"() <{value = 0 : index}> : () -> index
22+
// CHECK-NEXT: %4 = builtin.unrealized_conversion_cast %3 : index to i64
23+
// CHECK-NEXT: llvm.br ^bb1(%4 : i64)
24+
// CHECK-NEXT: ^bb1(%5: i64): // 2 preds: ^bb0, ^bb5
25+
// CHECK-NEXT: %6 = builtin.unrealized_conversion_cast %5 : i64 to index
26+
// CHECK-NEXT: %7 = "neura.icmp"(%6, %2) <{cmpType = "slt"}> : (index, index) -> i1
27+
// CHECK-NEXT: llvm.cond_br %7, ^bb2, ^bb6
28+
// CHECK-NEXT: ^bb2: // pred: ^bb1
29+
// CHECK-NEXT: %8 = builtin.unrealized_conversion_cast %3 : index to i64
30+
// CHECK-NEXT: llvm.br ^bb3(%8 : i64)
31+
// CHECK-NEXT: ^bb3(%9: i64): // 2 preds: ^bb2, ^bb4
32+
// CHECK-NEXT: %10 = builtin.unrealized_conversion_cast %9 : i64 to index
33+
// CHECK-NEXT: %11 = "neura.icmp"(%10, %0) <{cmpType = "slt"}> : (index, index) -> i1
34+
// CHECK-NEXT: llvm.cond_br %11, ^bb4, ^bb5
35+
// CHECK-NEXT: ^bb4: // pred: ^bb3
36+
// CHECK-NEXT: %12 = memref.load %arg0[%3, %6, %10] : memref<?x128x768xf32>
37+
// CHECK-NEXT: %13 = memref.load %arg1[%3, %6, %10] : memref<?x128x768xf32>
38+
// CHECK-NEXT: %14 = "neura.fadd"(%12, %13) : (f32, f32) -> f32
39+
// CHECK-NEXT: memref.store %14, %arg2[%3, %6, %10] : memref<?x128x768xf32>
40+
// CHECK-NEXT: %15 = "neura.add"(%10, %1) : (index, index) -> index
41+
// CHECK-NEXT: %16 = builtin.unrealized_conversion_cast %15 : index to i64
42+
// CHECK-NEXT: llvm.br ^bb3(%16 : i64)
43+
// CHECK-NEXT: ^bb5: // pred: ^bb3
44+
// CHECK-NEXT: %17 = "neura.add"(%6, %1) : (index, index) -> index
45+
// CHECK-NEXT: %18 = builtin.unrealized_conversion_cast %17 : index to i64
46+
// CHECK-NEXT: llvm.br ^bb1(%18 : i64)
47+
// CHECK-NEXT: ^bb6: // pred: ^bb1
48+
// CHECK-NEXT: return

0 commit comments

Comments
 (0)