From 5976400418f566e5b1a4b714b72162c21f3d1b4b Mon Sep 17 00:00:00 2001 From: Yi Xu Date: Fri, 9 Jul 2021 13:59:27 +0800 Subject: [PATCH 01/24] Add compiler config 'packed'; Add 'shape' field to IndexExtractor --- taichi/analysis/bls_analyzer.cpp | 2 +- taichi/ir/scratch_pad.h | 3 +-- taichi/ir/snode.cpp | 13 ++++++++++++- taichi/ir/snode.h | 4 ++++ taichi/program/compile_config.cpp | 1 + taichi/program/compile_config.h | 1 + taichi/python/export_lang.cpp | 1 + 7 files changed, 21 insertions(+), 4 deletions(-) diff --git a/taichi/analysis/bls_analyzer.cpp b/taichi/analysis/bls_analyzer.cpp index d2105daca..78048e329 100644 --- a/taichi/analysis/bls_analyzer.cpp +++ b/taichi/analysis/bls_analyzer.cpp @@ -26,7 +26,7 @@ void BLSAnalyzer::generate_block_indices(SNode *snode, BlockIndices *indices) { for (int i = 0; i < snode->num_active_indices; i++) { auto j = snode->physical_index_position[i]; indices->push_back( - {/*low=*/0, /*high=*/(1 << snode->extractors[j].num_bits) - 1}); + {/*low=*/0, /*high=*/snode->extractors[j].shape - 1}); } } diff --git a/taichi/ir/scratch_pad.h b/taichi/ir/scratch_pad.h index 7081be117..7df97db99 100644 --- a/taichi/ir/scratch_pad.h +++ b/taichi/ir/scratch_pad.h @@ -103,8 +103,7 @@ class ScratchPad { block_size.resize(dim); for (int i = 0; i < dim; i++) { block_size[i] = - 1 << snode->parent->extractors[snode->physical_index_position[i]] - .num_bits; + snode->parent->extractors[snode->physical_index_position[i]].shape; TI_ASSERT(bounds[i].low != std::numeric_limits::max()); TI_ASSERT(bounds[i].high != std::numeric_limits::min()); } diff --git a/taichi/ir/snode.cpp b/taichi/ir/snode.cpp index 2bdf25156..3389d98e6 100644 --- a/taichi/ir/snode.cpp +++ b/taichi/ir/snode.cpp @@ -2,6 +2,7 @@ #include "taichi/ir/ir.h" #include "taichi/ir/statements.h" +#include "taichi/program/program.h" TLANG_NAMESPACE_BEGIN @@ -44,13 +45,23 @@ SNode &SNode::create_node(std::vector indices, s = promoted_s; } TI_ASSERT(bit::is_power_of_two(s)); - new_node.n *= s; + if (get_current_program().config.packed) { + new_node.n *= sizes[i]; + } else { + new_node.n *= s; + } } for (int i = 0; i < (int)indices.size(); i++) { auto &ind = indices[i]; new_node.extractors[ind.value].activate( bit::log2int(bit::least_pot_bound(sizes[i]))); new_node.extractors[ind.value].num_elements = sizes[i]; + if (get_current_program().config.packed) { + new_node.extractors[ind.value].shape = sizes[i]; + } else { + new_node.extractors[ind.value].shape = + 1 << new_node.extractors[ind.value].num_bits; + } } return new_node; } diff --git a/taichi/ir/snode.h b/taichi/ir/snode.h index 5740e0d27..8efd5544b 100644 --- a/taichi/ir/snode.h +++ b/taichi/ir/snode.h @@ -39,6 +39,10 @@ struct IndexExtractor { * This is the raw shape, *not* padded to power-of-two (POT). */ int num_elements{1}; + /** + * POT shape or packed shape according to the config. + */ + int shape{1}; /** * Number of bits needed to store the coordinate at this index. * diff --git a/taichi/program/compile_config.cpp b/taichi/program/compile_config.cpp index a06805f48..9e126b589 100644 --- a/taichi/program/compile_config.cpp +++ b/taichi/program/compile_config.cpp @@ -8,6 +8,7 @@ CompileConfig::CompileConfig() { arch = host_arch(); simd_width = default_simd_width(arch); external_optimization_level = 3; + packed = false; print_ir = false; print_accessor_ir = false; print_evaluator_ir = false; diff --git a/taichi/program/compile_config.h b/taichi/program/compile_config.h index e42d4e265..3c2d9b465 100644 --- a/taichi/program/compile_config.h +++ b/taichi/program/compile_config.h @@ -14,6 +14,7 @@ struct CompileConfig { bool lazy_compilation; int external_optimization_level; int max_vector_width; + bool packed; bool print_ir; bool print_accessor_ir; bool print_evaluator_ir; diff --git a/taichi/python/export_lang.cpp b/taichi/python/export_lang.cpp index ff94466ec..ed469c619 100644 --- a/taichi/python/export_lang.cpp +++ b/taichi/python/export_lang.cpp @@ -119,6 +119,7 @@ void export_lang(py::module &m) { py::class_(m, "CompileConfig") .def(py::init<>()) .def_readwrite("arch", &CompileConfig::arch) + .def_readwrite("packed", &CompileConfig::packed) .def_readwrite("print_ir", &CompileConfig::print_ir) .def_readwrite("debug", &CompileConfig::debug) .def_readwrite("cfg_optimization", &CompileConfig::cfg_optimization) From 995b47014388f83d945fbe29d4cc4003ff90e13b Mon Sep 17 00:00:00 2001 From: Yi Xu Date: Wed, 14 Jul 2021 15:06:35 +0800 Subject: [PATCH 02/24] Support packed mode for snode type generation and scalar_pointer_lowerer --- taichi/ir/snode.h | 4 +-- taichi/transforms/scalar_pointer_lowerer.cpp | 37 ++++++++++++++++---- 2 files changed, 32 insertions(+), 9 deletions(-) diff --git a/taichi/ir/snode.h b/taichi/ir/snode.h index 8efd5544b..5287edc55 100644 --- a/taichi/ir/snode.h +++ b/taichi/ir/snode.h @@ -110,7 +110,7 @@ class SNode { int depth{0}; std::string name; - int64 n{0}; + int64 n{1}; int total_num_bits{0}; int total_bit_start{0}; int chunk_size{0}; @@ -287,7 +287,7 @@ class SNode { } int64 max_num_elements() const { - return int64(1) << total_num_bits; + return n; } int shape_along_axis(int i) const; diff --git a/taichi/transforms/scalar_pointer_lowerer.cpp b/taichi/transforms/scalar_pointer_lowerer.cpp index 9a922452e..3da212411 100644 --- a/taichi/transforms/scalar_pointer_lowerer.cpp +++ b/taichi/transforms/scalar_pointer_lowerer.cpp @@ -5,6 +5,7 @@ #include "taichi/ir/analysis.h" #include "taichi/ir/snode.h" #include "taichi/ir/statements.h" +#include "taichi/program/program.h" #include "taichi/transforms/scalar_pointer_lowerer.h" namespace taichi { @@ -45,6 +46,14 @@ void ScalarPointerLowerer::run() { start_bits[j] += s->extractors[j].num_bits; } } + // general - no dependence on POT + std::array total_shape; + total_shape.fill(1); + for (const auto *s : snodes_) { + for (int j = 0; j < taichi_max_num_indices; j++) { + total_shape[j] *= s->extractors[j].shape; + } + } if (path_length_ == 0) return; @@ -63,13 +72,27 @@ void ScalarPointerLowerer::run() { for (int k_ = 0; k_ < (int)indices_.size(); k_++) { for (int k = 0; k < taichi_max_num_indices; k++) { if (snode->physical_index_position[k_] == k) { - start_bits[k] -= snode->extractors[k].num_bits; - const int begin = start_bits[k]; - const int end = begin + snode->extractors[k].num_bits; - auto extracted = Stmt::make(indices_[k_], begin, end); - lowered_indices.push_back(extracted.get()); - lowered_->push_back(std::move(extracted)); - strides.push_back(1 << snode->extractors[k].num_bits); + if (get_current_program().config.packed) { + auto const_prev = Stmt::make(TypedConstant(total_shape[k])); + auto mod = Stmt::make(BinaryOpType::mod, indices_[k_], const_prev.get()); + total_shape[k] /= snode->extractors[k].shape; + auto const_next = Stmt::make(TypedConstant(total_shape[k])); + auto div = Stmt::make(BinaryOpType::div, mod.get(), const_next.get()); + lowered_indices.push_back(div.get()); + lowered_->push_back(std::move(const_prev)); + lowered_->push_back(std::move(mod)); + lowered_->push_back(std::move(const_next)); + lowered_->push_back(std::move(div)); + } else { + start_bits[k] -= snode->extractors[k].num_bits; + const int begin = start_bits[k]; + const int end = begin + snode->extractors[k].num_bits; + auto extracted = + Stmt::make(indices_[k_], begin, end); + lowered_indices.push_back(extracted.get()); + lowered_->push_back(std::move(extracted)); + } + strides.push_back(snode->extractors[k].shape); } } } From c192fb7e97324bbcbf54b766f07939c68c2767a6 Mon Sep 17 00:00:00 2001 From: Yi Xu Date: Thu, 15 Jul 2021 11:30:02 +0800 Subject: [PATCH 03/24] Refactor scalar_pointer_lowerer --- taichi/transforms/scalar_pointer_lowerer.cpp | 44 ++++++++------------ 1 file changed, 18 insertions(+), 26 deletions(-) diff --git a/taichi/transforms/scalar_pointer_lowerer.cpp b/taichi/transforms/scalar_pointer_lowerer.cpp index 3da212411..4ea3f40dc 100644 --- a/taichi/transforms/scalar_pointer_lowerer.cpp +++ b/taichi/transforms/scalar_pointer_lowerer.cpp @@ -46,7 +46,7 @@ void ScalarPointerLowerer::run() { start_bits[j] += s->extractors[j].num_bits; } } - // general - no dependence on POT + // general shape calculation - no dependence on POT std::array total_shape; total_shape.fill(1); for (const auto *s : snodes_) { @@ -68,33 +68,25 @@ void ScalarPointerLowerer::run() { } std::vector lowered_indices; std::vector strides; - // extract bits + // extract lowered indices for (int k_ = 0; k_ < (int)indices_.size(); k_++) { - for (int k = 0; k < taichi_max_num_indices; k++) { - if (snode->physical_index_position[k_] == k) { - if (get_current_program().config.packed) { - auto const_prev = Stmt::make(TypedConstant(total_shape[k])); - auto mod = Stmt::make(BinaryOpType::mod, indices_[k_], const_prev.get()); - total_shape[k] /= snode->extractors[k].shape; - auto const_next = Stmt::make(TypedConstant(total_shape[k])); - auto div = Stmt::make(BinaryOpType::div, mod.get(), const_next.get()); - lowered_indices.push_back(div.get()); - lowered_->push_back(std::move(const_prev)); - lowered_->push_back(std::move(mod)); - lowered_->push_back(std::move(const_next)); - lowered_->push_back(std::move(div)); - } else { - start_bits[k] -= snode->extractors[k].num_bits; - const int begin = start_bits[k]; - const int end = begin + snode->extractors[k].num_bits; - auto extracted = - Stmt::make(indices_[k_], begin, end); - lowered_indices.push_back(extracted.get()); - lowered_->push_back(std::move(extracted)); - } - strides.push_back(snode->extractors[k].shape); - } + int k = snode->physical_index_position[k_]; + if (k < 0) continue; + if (get_current_program().config.packed) { // no dependence on POT + auto prev = lowered_->push_back(TypedConstant(total_shape[k])); + total_shape[k] /= snode->extractors[k].shape; + auto next = lowered_->push_back(TypedConstant(total_shape[k])); + auto mod = lowered_->push_back(BinaryOpType::mod, indices_[k_], prev); + auto div = lowered_->push_back(BinaryOpType::div, mod, next); + lowered_indices.push_back(div); + } else { + const int end = start_bits[k]; + start_bits[k] -= snode->extractors[k].num_bits; + const int begin = start_bits[k]; + auto extracted = lowered_->push_back(indices_[k_], begin, end); + lowered_indices.push_back(extracted); } + strides.push_back(snode->extractors[k].shape); } // linearize auto *linearized = From fe51c4acec6e03f001a4472f6fc5e28f0f5d07a2 Mon Sep 17 00:00:00 2001 From: Yi Xu Date: Thu, 15 Jul 2021 15:04:51 +0800 Subject: [PATCH 04/24] Support packed mode for demote_dense_struct_for --- taichi/ir/snode.h | 4 + taichi/struct/struct.cpp | 5 + .../transforms/demote_dense_struct_fors.cpp | 110 ++++++++++++------ 3 files changed, 81 insertions(+), 38 deletions(-) diff --git a/taichi/ir/snode.h b/taichi/ir/snode.h index 5287edc55..c43ffb6ac 100644 --- a/taichi/ir/snode.h +++ b/taichi/ir/snode.h @@ -43,6 +43,10 @@ struct IndexExtractor { * POT shape or packed shape according to the config. */ int shape{1}; + /** + * Accumulated shape from the last activated index. + */ + int acc_shape{1}; /** * Number of bits needed to store the coordinate at this index. * diff --git a/taichi/struct/struct.cpp b/taichi/struct/struct.cpp index 01115c79b..033060077 100644 --- a/taichi/struct/struct.cpp +++ b/taichi/struct/struct.cpp @@ -49,6 +49,11 @@ void infer_snode_properties(SNode &snode) { snode.extractors[i].acc_offset = acc_offsets; acc_offsets += snode.extractors[i].num_bits; } + int acc_shape = 1; + for (int i = taichi_max_num_indices - 1; i >= 0; i--) { + snode.extractors[i].acc_shape = acc_shape; + acc_shape *= snode.extractors[i].shape; + } if (snode.type == SNodeType::dynamic) { int active_extractor_counder = 0; for (int i = 0; i < taichi_max_num_indices; i++) { diff --git a/taichi/transforms/demote_dense_struct_fors.cpp b/taichi/transforms/demote_dense_struct_fors.cpp index 23656aa74..4ea3baf60 100644 --- a/taichi/transforms/demote_dense_struct_fors.cpp +++ b/taichi/transforms/demote_dense_struct_fors.cpp @@ -27,10 +27,21 @@ void convert_to_range_for(OffloadedStmt *offloaded) { std::reverse(snodes.begin(), snodes.end()); TI_ASSERT(total_bits <= 30); + // general shape calculation - no dependence on POT + int total_n = 1; + std::array total_shape; + total_shape.fill(1); + for (const auto *s : snodes) { + for (int j = 0; j < taichi_max_num_indices; j++) { + total_shape[j] *= s->extractors[j].shape; + } + total_n *= s->n; + } + offloaded->const_begin = true; offloaded->const_end = true; offloaded->begin_value = 0; - offloaded->end_value = 1 << total_bits; + offloaded->end_value = total_n; ////// Begin core transformation auto body = std::move(offloaded->body); @@ -51,47 +62,70 @@ void convert_to_range_for(OffloadedStmt *offloaded) { auto main_loop_var = body_header.push_back(nullptr, 0); // We will set main_loop_var->loop later. - int offset = total_bits; - int start_bits[taichi_max_num_indices] = {0}; - std::copy(std::begin(start_bits_root), std::end(start_bits_root), - std::begin(start_bits)); Stmt *test = body_header.push_back(TypedConstant(-1)); bool has_test = false; - for (int i = 0; i < (int)snodes.size(); i++) { - auto snode = snodes[i]; - offset -= snode->total_num_bits; - for (int j = 0; j < (int)physical_indices.size(); j++) { - auto p = physical_indices[j]; - auto ext = snode->extractors[p]; - Stmt *delta = body_header.push_back( - main_loop_var, ext.acc_offset + offset, - ext.acc_offset + offset + ext.num_bits); - start_bits[p] -= ext.num_bits; - auto multiplier = - body_header.push_back(TypedConstant(1 << start_bits[p])); - delta = body_header.push_back(BinaryOpType::mul, delta, - multiplier); - new_loop_vars[j] = body_header.push_back( - BinaryOpType::add, new_loop_vars[j], delta); + if (get_current_program().config.packed) { // no dependence on POT + for (int i = 0; i < (int)snodes.size(); i++) { + auto snode = snodes[i]; + auto prev_n = body_header.push_back(TypedConstant(total_n)); + total_n /= snode->n; + auto next_n = body_header.push_back(TypedConstant(total_n)); + auto mod_n = body_header.push_back(BinaryOpType::mod, main_loop_var, prev_n); + auto div_n = body_header.push_back(BinaryOpType::div, mod_n, next_n); + for (int j = 0; j < (int)physical_indices.size(); j++) { + auto p = physical_indices[j]; + auto ext = snode->extractors[p]; + auto prev_acc_shape = body_header.push_back(TypedConstant(ext.acc_shape * ext.shape)); + auto next_acc_shape = body_header.push_back(TypedConstant(ext.acc_shape)); + auto mod_acc_shape = body_header.push_back(BinaryOpType::mod, div_n, prev_acc_shape); + auto div_acc_shape = body_header.push_back(BinaryOpType::div, mod_acc_shape, next_acc_shape); + total_shape[p] /= ext.shape; + auto multiplier = body_header.push_back(TypedConstant(total_shape[p])); + auto delta = body_header.push_back(BinaryOpType::mul, div_acc_shape, multiplier); + new_loop_vars[j] = body_header.push_back(BinaryOpType::add, new_loop_vars[j], delta); + } + } + } else { + int offset = total_bits; + int start_bits[taichi_max_num_indices] = {0}; + std::copy(std::begin(start_bits_root), std::end(start_bits_root), + std::begin(start_bits)); + for (int i = 0; i < (int)snodes.size(); i++) { + auto snode = snodes[i]; + offset -= snode->total_num_bits; + for (int j = 0; j < (int)physical_indices.size(); j++) { + auto p = physical_indices[j]; + auto ext = snode->extractors[p]; + Stmt *delta = body_header.push_back( + main_loop_var, ext.acc_offset + offset, + ext.acc_offset + offset + ext.num_bits); + start_bits[p] -= ext.num_bits; + auto multiplier = + body_header.push_back(TypedConstant(1 << start_bits[p])); + delta = body_header.push_back(BinaryOpType::mul, delta, + multiplier); + new_loop_vars[j] = body_header.push_back( + BinaryOpType::add, new_loop_vars[j], delta); + } } - } - std::copy(std::begin(start_bits_root), std::end(start_bits_root), - std::begin(start_bits)); - for (int i = 0; i < (int)snodes.size(); i++) { - auto snode = snodes[i]; - for (int j = 0; j < (int)physical_indices.size(); j++) { - auto p = physical_indices[j]; - start_bits[p] -= snode->extractors[p].num_bits; - auto num_elements = snode->extractors[p].num_elements << start_bits[p]; - if (!bit::is_power_of_two(num_elements)) { - has_test = true; - auto bound = - body_header.push_back(TypedConstant(num_elements)); - auto cmp = body_header.push_back(BinaryOpType::cmp_lt, - new_loop_vars[j], bound); - test = body_header.push_back(BinaryOpType::bit_and, test, - cmp); + std::copy(std::begin(start_bits_root), std::end(start_bits_root), + std::begin(start_bits)); + for (int i = 0; i < (int)snodes.size(); i++) { + auto snode = snodes[i]; + for (int j = 0; j < (int)physical_indices.size(); j++) { + auto p = physical_indices[j]; + start_bits[p] -= snode->extractors[p].num_bits; + auto num_elements = snode->extractors[p].num_elements << start_bits[p]; + if (!bit::is_power_of_two(num_elements)) { + has_test = true; + auto bound = + body_header.push_back(TypedConstant(num_elements)); + auto cmp = body_header.push_back( + BinaryOpType::cmp_lt, new_loop_vars[j], bound); + test = body_header.push_back(BinaryOpType::bit_and, + test, cmp); + } } } } From b8154e4e75dd33d15e3583e6dd92d630601eaaae Mon Sep 17 00:00:00 2001 From: Yi Xu Date: Fri, 16 Jul 2021 11:47:28 +0800 Subject: [PATCH 05/24] Support packed mode for generate_refine_coordinates --- taichi/codegen/codegen_llvm.cpp | 20 ++++++++------- taichi/struct/struct_llvm.cpp | 44 +++++++++++++++++++++++---------- 2 files changed, 42 insertions(+), 22 deletions(-) diff --git a/taichi/codegen/codegen_llvm.cpp b/taichi/codegen/codegen_llvm.cpp index 59d4b9335..20e477255 100644 --- a/taichi/codegen/codegen_llvm.cpp +++ b/taichi/codegen/codegen_llvm.cpp @@ -1673,15 +1673,17 @@ void CodeGenLLVM::create_offload_struct_for(OffloadedStmt *stmt, bool spmd) { auto coord_object = RuntimeObject(kLLVMPhysicalCoordinatesName, this, builder.get(), new_coordinates); - for (int i = 0; i < snode->num_active_indices; i++) { - auto j = snode->physical_index_position[i]; - if (!bit::is_power_of_two(snode->extractors[j].num_elements)) { - auto coord = coord_object.get("val", tlctx->get_constant(j)); - exec_cond = builder->CreateAnd( - exec_cond, - builder->CreateICmp( - llvm::CmpInst::ICMP_SLT, coord, - tlctx->get_constant(snode->extractors[j].num_elements))); + if (!get_current_program().config.packed) { + for (int i = 0; i < snode->num_active_indices; i++) { + auto j = snode->physical_index_position[i]; + if (!bit::is_power_of_two(snode->extractors[j].num_elements)) { + auto coord = coord_object.get("val", tlctx->get_constant(j)); + exec_cond = builder->CreateAnd( + exec_cond, + builder->CreateICmp( + llvm::CmpInst::ICMP_SLT, coord, + tlctx->get_constant(snode->extractors[j].num_elements))); + } } } diff --git a/taichi/struct/struct_llvm.cpp b/taichi/struct/struct_llvm.cpp index c8246271d..b290128ae 100644 --- a/taichi/struct/struct_llvm.cpp +++ b/taichi/struct/struct_llvm.cpp @@ -192,20 +192,38 @@ void StructCompilerLLVM::generate_refine_coordinates(SNode *snode) { auto outp_coords = args[1]; auto l = args[2]; - for (int i = 0; i < taichi_max_num_indices; i++) { - auto addition = tlctx_->get_constant(0); - if (snode->extractors[i].num_bits) { - auto mask = ((1 << snode->extractors[i].num_bits) - 1); - addition = builder.CreateAnd( - builder.CreateAShr(l, snode->extractors[i].acc_offset), mask); + if (get_current_program().config.packed) { // no dependence on POT + for (int i = 0; i < taichi_max_num_indices; i++) { + auto addition = tlctx_->get_constant(0); + if (snode->extractors[i].shape > 1) { + auto prev = tlctx_->get_constant(snode->extractors[i].acc_shape * snode->extractors[i].shape); + auto next = tlctx_->get_constant(snode->extractors[i].acc_shape); + addition = builder.CreateSDiv(builder.CreateSRem(l, prev), next); + } + auto in = call(&builder, "PhysicalCoordinates_get_val", inp_coords, + tlctx_->get_constant(i)); + in = builder.CreateMul(in, + tlctx_->get_constant(snode->extractors[i].shape)); + auto added = builder.CreateAdd(in, addition); + call(&builder, "PhysicalCoordinates_set_val", outp_coords, + tlctx_->get_constant(i), added); + } + } else { + for (int i = 0; i < taichi_max_num_indices; i++) { + auto addition = tlctx_->get_constant(0); + if (snode->extractors[i].num_bits) { + auto mask = ((1 << snode->extractors[i].num_bits) - 1); + addition = builder.CreateAnd( + builder.CreateAShr(l, snode->extractors[i].acc_offset), mask); + } + auto in = call(&builder, "PhysicalCoordinates_get_val", inp_coords, + tlctx_->get_constant(i)); + in = builder.CreateShl(in, + tlctx_->get_constant(snode->extractors[i].num_bits)); + auto added = builder.CreateOr(in, addition); + call(&builder, "PhysicalCoordinates_set_val", outp_coords, + tlctx_->get_constant(i), added); } - auto in = call(&builder, "PhysicalCoordinates_get_val", inp_coords, - tlctx_->get_constant(i)); - in = builder.CreateShl(in, - tlctx_->get_constant(snode->extractors[i].num_bits)); - auto added = builder.CreateOr(in, addition); - call(&builder, "PhysicalCoordinates_set_val", outp_coords, - tlctx_->get_constant(i), added); } builder.CreateRetVoid(); } From 183aaba0f057e1b530759c7bbade2668e88538da Mon Sep 17 00:00:00 2001 From: Yi Xu Date: Mon, 19 Jul 2021 10:19:10 +0800 Subject: [PATCH 06/24] Disable test_indices_assert in packed mode --- tests/python/test_indices_assert.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/python/test_indices_assert.py b/tests/python/test_indices_assert.py index cfc2c1e46..ce6a0ce9a 100644 --- a/tests/python/test_indices_assert.py +++ b/tests/python/test_indices_assert.py @@ -7,7 +7,7 @@ @pytest.mark.skipif(platform.system() == 'Windows', reason="Too much virtual memory for github windows env.") -@ti.test(debug=True, gdb_trigger=False, arch=[ti.cpu]) +@ti.test(debug=True, gdb_trigger=False, packed=False, arch=[ti.cpu]) def test_indices_assert(): overflow = ti.field(ti.i32, (334, 334, 334, 2 * 10)) From d14b9391c97a0079289b0427b859c83010b3307e Mon Sep 17 00:00:00 2001 From: Yi Xu Date: Mon, 19 Jul 2021 14:42:20 +0800 Subject: [PATCH 07/24] Refactor --- taichi/transforms/demote_dense_struct_fors.cpp | 13 ++++--------- taichi/transforms/scalar_pointer_lowerer.cpp | 14 +++++++------- taichi/transforms/utils.cpp | 12 ++++++++++++ taichi/transforms/utils.h | 7 +++++++ 4 files changed, 30 insertions(+), 16 deletions(-) create mode 100644 taichi/transforms/utils.cpp create mode 100644 taichi/transforms/utils.h diff --git a/taichi/transforms/demote_dense_struct_fors.cpp b/taichi/transforms/demote_dense_struct_fors.cpp index 4ea3baf60..c58e9e72b 100644 --- a/taichi/transforms/demote_dense_struct_fors.cpp +++ b/taichi/transforms/demote_dense_struct_fors.cpp @@ -2,6 +2,7 @@ #include "taichi/ir/statements.h" #include "taichi/ir/transforms.h" #include "taichi/ir/visitors.h" +#include "taichi/transforms/utils.h" TLANG_NAMESPACE_BEGIN @@ -67,21 +68,15 @@ void convert_to_range_for(OffloadedStmt *offloaded) { if (get_current_program().config.packed) { // no dependence on POT for (int i = 0; i < (int)snodes.size(); i++) { auto snode = snodes[i]; - auto prev_n = body_header.push_back(TypedConstant(total_n)); + auto extracted = generate_mod_x_div_y(&body_header, main_loop_var, total_n, total_n / snode->n); total_n /= snode->n; - auto next_n = body_header.push_back(TypedConstant(total_n)); - auto mod_n = body_header.push_back(BinaryOpType::mod, main_loop_var, prev_n); - auto div_n = body_header.push_back(BinaryOpType::div, mod_n, next_n); for (int j = 0; j < (int)physical_indices.size(); j++) { auto p = physical_indices[j]; auto ext = snode->extractors[p]; - auto prev_acc_shape = body_header.push_back(TypedConstant(ext.acc_shape * ext.shape)); - auto next_acc_shape = body_header.push_back(TypedConstant(ext.acc_shape)); - auto mod_acc_shape = body_header.push_back(BinaryOpType::mod, div_n, prev_acc_shape); - auto div_acc_shape = body_header.push_back(BinaryOpType::div, mod_acc_shape, next_acc_shape); + auto index = generate_mod_x_div_y(&body_header, extracted, ext.acc_shape * ext.shape, ext.acc_shape); total_shape[p] /= ext.shape; auto multiplier = body_header.push_back(TypedConstant(total_shape[p])); - auto delta = body_header.push_back(BinaryOpType::mul, div_acc_shape, multiplier); + auto delta = body_header.push_back(BinaryOpType::mul, index, multiplier); new_loop_vars[j] = body_header.push_back(BinaryOpType::add, new_loop_vars[j], delta); } } diff --git a/taichi/transforms/scalar_pointer_lowerer.cpp b/taichi/transforms/scalar_pointer_lowerer.cpp index 4ea3f40dc..a2dcf4afa 100644 --- a/taichi/transforms/scalar_pointer_lowerer.cpp +++ b/taichi/transforms/scalar_pointer_lowerer.cpp @@ -7,6 +7,7 @@ #include "taichi/ir/statements.h" #include "taichi/program/program.h" #include "taichi/transforms/scalar_pointer_lowerer.h" +#include "taichi/transforms/utils.h" namespace taichi { namespace lang { @@ -72,20 +73,19 @@ void ScalarPointerLowerer::run() { for (int k_ = 0; k_ < (int)indices_.size(); k_++) { int k = snode->physical_index_position[k_]; if (k < 0) continue; + Stmt *extracted; if (get_current_program().config.packed) { // no dependence on POT - auto prev = lowered_->push_back(TypedConstant(total_shape[k])); + const int prev = total_shape[k]; total_shape[k] /= snode->extractors[k].shape; - auto next = lowered_->push_back(TypedConstant(total_shape[k])); - auto mod = lowered_->push_back(BinaryOpType::mod, indices_[k_], prev); - auto div = lowered_->push_back(BinaryOpType::div, mod, next); - lowered_indices.push_back(div); + const int next = total_shape[k]; + extracted = generate_mod_x_div_y(lowered_, indices_[k_], prev, next); } else { const int end = start_bits[k]; start_bits[k] -= snode->extractors[k].num_bits; const int begin = start_bits[k]; - auto extracted = lowered_->push_back(indices_[k_], begin, end); - lowered_indices.push_back(extracted); + extracted = lowered_->push_back(indices_[k_], begin, end); } + lowered_indices.push_back(extracted); strides.push_back(snode->extractors[k].shape); } // linearize diff --git a/taichi/transforms/utils.cpp b/taichi/transforms/utils.cpp new file mode 100644 index 000000000..f29e68b00 --- /dev/null +++ b/taichi/transforms/utils.cpp @@ -0,0 +1,12 @@ +#include "taichi/ir/statements.h" + +TLANG_NAMESPACE_BEGIN + +Stmt *generate_mod_x_div_y(VecStatement *stmts, Stmt *num, int x, int y) { + auto const_x = stmts->push_back(TypedConstant(x)); + auto mod_x = stmts->push_back(BinaryOpType::mod, num, const_x); + auto const_y = stmts->push_back(TypedConstant(y)); + return stmts->push_back(BinaryOpType::div, mod_x, const_y); +} + +TLANG_NAMESPACE_END diff --git a/taichi/transforms/utils.h b/taichi/transforms/utils.h new file mode 100644 index 000000000..c8280cc14 --- /dev/null +++ b/taichi/transforms/utils.h @@ -0,0 +1,7 @@ +#pragma once + +TLANG_NAMESPACE_BEGIN + +Stmt *generate_mod_x_div_y(VecStatement *stmts, Stmt *num, int x, int y); + +TLANG_NAMESPACE_END From eb59a93d0e2df1fc49761e2e565878b14f5ddbda Mon Sep 17 00:00:00 2001 From: Yi Xu Date: Mon, 19 Jul 2021 15:50:11 +0800 Subject: [PATCH 08/24] Add snode size test in packed mode --- tests/python/test_packed_size.py | 8 ++++++++ 1 file changed, 8 insertions(+) create mode 100644 tests/python/test_packed_size.py diff --git a/tests/python/test_packed_size.py b/tests/python/test_packed_size.py new file mode 100644 index 000000000..b546d280e --- /dev/null +++ b/tests/python/test_packed_size.py @@ -0,0 +1,8 @@ +import taichi as ti + + +@ti.test(packed=True) +def test_packed_size(): + x = ti.field(ti.i32) + ti.root.dense(ti.i, 20).dense(ti.ijk, 334).place(x) + assert x.snode.parent().parent().cell_size_bytes == 149038816 From 363520f948c1b7928ff14c48ff9fde01d3b3f8e9 Mon Sep 17 00:00:00 2001 From: Taichi Gardener Date: Mon, 19 Jul 2021 08:28:56 +0000 Subject: [PATCH 09/24] Auto Format --- taichi/analysis/bls_analyzer.cpp | 3 +-- taichi/struct/struct_llvm.cpp | 7 ++++--- taichi/transforms/demote_dense_struct_fors.cpp | 15 ++++++++++----- taichi/transforms/scalar_pointer_lowerer.cpp | 8 +++++--- 4 files changed, 20 insertions(+), 13 deletions(-) diff --git a/taichi/analysis/bls_analyzer.cpp b/taichi/analysis/bls_analyzer.cpp index 78048e329..868df66ab 100644 --- a/taichi/analysis/bls_analyzer.cpp +++ b/taichi/analysis/bls_analyzer.cpp @@ -25,8 +25,7 @@ void BLSAnalyzer::generate_block_indices(SNode *snode, BlockIndices *indices) { // NOTE: Assuming not vectorized for (int i = 0; i < snode->num_active_indices; i++) { auto j = snode->physical_index_position[i]; - indices->push_back( - {/*low=*/0, /*high=*/snode->extractors[j].shape - 1}); + indices->push_back({/*low=*/0, /*high=*/snode->extractors[j].shape - 1}); } } diff --git a/taichi/struct/struct_llvm.cpp b/taichi/struct/struct_llvm.cpp index b290128ae..4faf531c3 100644 --- a/taichi/struct/struct_llvm.cpp +++ b/taichi/struct/struct_llvm.cpp @@ -196,7 +196,8 @@ void StructCompilerLLVM::generate_refine_coordinates(SNode *snode) { for (int i = 0; i < taichi_max_num_indices; i++) { auto addition = tlctx_->get_constant(0); if (snode->extractors[i].shape > 1) { - auto prev = tlctx_->get_constant(snode->extractors[i].acc_shape * snode->extractors[i].shape); + auto prev = tlctx_->get_constant(snode->extractors[i].acc_shape * + snode->extractors[i].shape); auto next = tlctx_->get_constant(snode->extractors[i].acc_shape); addition = builder.CreateSDiv(builder.CreateSRem(l, prev), next); } @@ -218,8 +219,8 @@ void StructCompilerLLVM::generate_refine_coordinates(SNode *snode) { } auto in = call(&builder, "PhysicalCoordinates_get_val", inp_coords, tlctx_->get_constant(i)); - in = builder.CreateShl(in, - tlctx_->get_constant(snode->extractors[i].num_bits)); + in = builder.CreateShl( + in, tlctx_->get_constant(snode->extractors[i].num_bits)); auto added = builder.CreateOr(in, addition); call(&builder, "PhysicalCoordinates_set_val", outp_coords, tlctx_->get_constant(i), added); diff --git a/taichi/transforms/demote_dense_struct_fors.cpp b/taichi/transforms/demote_dense_struct_fors.cpp index c58e9e72b..de6cfd6cb 100644 --- a/taichi/transforms/demote_dense_struct_fors.cpp +++ b/taichi/transforms/demote_dense_struct_fors.cpp @@ -68,16 +68,21 @@ void convert_to_range_for(OffloadedStmt *offloaded) { if (get_current_program().config.packed) { // no dependence on POT for (int i = 0; i < (int)snodes.size(); i++) { auto snode = snodes[i]; - auto extracted = generate_mod_x_div_y(&body_header, main_loop_var, total_n, total_n / snode->n); + auto extracted = generate_mod_x_div_y(&body_header, main_loop_var, + total_n, total_n / snode->n); total_n /= snode->n; for (int j = 0; j < (int)physical_indices.size(); j++) { auto p = physical_indices[j]; auto ext = snode->extractors[p]; - auto index = generate_mod_x_div_y(&body_header, extracted, ext.acc_shape * ext.shape, ext.acc_shape); + auto index = generate_mod_x_div_y( + &body_header, extracted, ext.acc_shape * ext.shape, ext.acc_shape); total_shape[p] /= ext.shape; - auto multiplier = body_header.push_back(TypedConstant(total_shape[p])); - auto delta = body_header.push_back(BinaryOpType::mul, index, multiplier); - new_loop_vars[j] = body_header.push_back(BinaryOpType::add, new_loop_vars[j], delta); + auto multiplier = + body_header.push_back(TypedConstant(total_shape[p])); + auto delta = body_header.push_back(BinaryOpType::mul, + index, multiplier); + new_loop_vars[j] = body_header.push_back( + BinaryOpType::add, new_loop_vars[j], delta); } } } else { diff --git a/taichi/transforms/scalar_pointer_lowerer.cpp b/taichi/transforms/scalar_pointer_lowerer.cpp index a2dcf4afa..10caf04ce 100644 --- a/taichi/transforms/scalar_pointer_lowerer.cpp +++ b/taichi/transforms/scalar_pointer_lowerer.cpp @@ -72,9 +72,10 @@ void ScalarPointerLowerer::run() { // extract lowered indices for (int k_ = 0; k_ < (int)indices_.size(); k_++) { int k = snode->physical_index_position[k_]; - if (k < 0) continue; + if (k < 0) + continue; Stmt *extracted; - if (get_current_program().config.packed) { // no dependence on POT + if (get_current_program().config.packed) { // no dependence on POT const int prev = total_shape[k]; total_shape[k] /= snode->extractors[k].shape; const int next = total_shape[k]; @@ -83,7 +84,8 @@ void ScalarPointerLowerer::run() { const int end = start_bits[k]; start_bits[k] -= snode->extractors[k].num_bits; const int begin = start_bits[k]; - extracted = lowered_->push_back(indices_[k_], begin, end); + extracted = + lowered_->push_back(indices_[k_], begin, end); } lowered_indices.push_back(extracted); strides.push_back(snode->extractors[k].shape); From 5427216f136c5acc4761981dd36dd3916251a1b1 Mon Sep 17 00:00:00 2001 From: Yi Xu Date: Mon, 19 Jul 2021 18:00:44 +0800 Subject: [PATCH 10/24] Update tests/python/test_packed_size.py Co-authored-by: Ye Kuang --- tests/python/test_packed_size.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/python/test_packed_size.py b/tests/python/test_packed_size.py index b546d280e..695f894bc 100644 --- a/tests/python/test_packed_size.py +++ b/tests/python/test_packed_size.py @@ -5,4 +5,4 @@ def test_packed_size(): x = ti.field(ti.i32) ti.root.dense(ti.i, 20).dense(ti.ijk, 334).place(x) - assert x.snode.parent().parent().cell_size_bytes == 149038816 + assert x.snode.parent().parent().cell_size_bytes == 4 * 334 ** 3 From b0efd4f0a2edfd165833cce60f265247dd7e547b Mon Sep 17 00:00:00 2001 From: Yi Xu Date: Tue, 20 Jul 2021 10:30:19 +0800 Subject: [PATCH 11/24] Remove TLANG_NAMESPACE macros --- taichi/transforms/utils.cpp | 6 ++++-- taichi/transforms/utils.h | 6 ++++-- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/taichi/transforms/utils.cpp b/taichi/transforms/utils.cpp index f29e68b00..d3dd28bcc 100644 --- a/taichi/transforms/utils.cpp +++ b/taichi/transforms/utils.cpp @@ -1,6 +1,7 @@ #include "taichi/ir/statements.h" -TLANG_NAMESPACE_BEGIN +namespace taichi { +namespace lang { Stmt *generate_mod_x_div_y(VecStatement *stmts, Stmt *num, int x, int y) { auto const_x = stmts->push_back(TypedConstant(x)); @@ -9,4 +10,5 @@ Stmt *generate_mod_x_div_y(VecStatement *stmts, Stmt *num, int x, int y) { return stmts->push_back(BinaryOpType::div, mod_x, const_y); } -TLANG_NAMESPACE_END +} // namespace lang +} // namespace taichi diff --git a/taichi/transforms/utils.h b/taichi/transforms/utils.h index c8280cc14..3440be031 100644 --- a/taichi/transforms/utils.h +++ b/taichi/transforms/utils.h @@ -1,7 +1,9 @@ #pragma once -TLANG_NAMESPACE_BEGIN +namespace taichi { +namespace lang { Stmt *generate_mod_x_div_y(VecStatement *stmts, Stmt *num, int x, int y); -TLANG_NAMESPACE_END +} // namespace lang +} // namespace taichi From d21a9cad3f7b39db79675783f49cbec975116f69 Mon Sep 17 00:00:00 2001 From: Yi Xu Date: Tue, 20 Jul 2021 14:20:36 +0800 Subject: [PATCH 12/24] Refactor --- taichi/backends/metal/struct_metal.cpp | 2 +- taichi/codegen/codegen_llvm.cpp | 6 ++-- taichi/ir/snode.cpp | 28 +++---------------- taichi/ir/snode.h | 20 ++++++------- taichi/ir/transforms.h | 2 +- taichi/program/program.cpp | 2 +- taichi/struct/snode_tree.cpp | 4 +-- taichi/struct/snode_tree.h | 2 +- taichi/struct/struct.cpp | 27 ++++++++++-------- taichi/struct/struct.h | 2 +- taichi/struct/struct_llvm.cpp | 2 +- taichi/transforms/compile_to_offloads.cpp | 2 +- .../transforms/demote_dense_struct_fors.cpp | 17 +++++------ taichi/transforms/lower_access.cpp | 14 ++++++---- taichi/transforms/scalar_pointer_lowerer.cpp | 9 +++--- taichi/transforms/scalar_pointer_lowerer.h | 4 ++- tests/cpp/codegen/refine_coordinates_test.cpp | 1 + tests/cpp/struct/fake_struct_compiler.h | 2 +- .../scalar_pointer_lowerer_test.cpp | 2 +- 19 files changed, 69 insertions(+), 79 deletions(-) diff --git a/taichi/backends/metal/struct_metal.cpp b/taichi/backends/metal/struct_metal.cpp index 8504e046b..f0c654693 100644 --- a/taichi/backends/metal/struct_metal.cpp +++ b/taichi/backends/metal/struct_metal.cpp @@ -337,7 +337,7 @@ class StructCompiler { } sn_desc.total_num_elems_from_root = 1; for (const auto &e : sn->extractors) { - sn_desc.total_num_elems_from_root *= e.num_elements; + sn_desc.total_num_elems_from_root *= e.num_elements_from_root; } TI_ASSERT(snode_descriptors_.find(sn->id) == snode_descriptors_.end()); diff --git a/taichi/codegen/codegen_llvm.cpp b/taichi/codegen/codegen_llvm.cpp index 20e477255..e4a11d9e7 100644 --- a/taichi/codegen/codegen_llvm.cpp +++ b/taichi/codegen/codegen_llvm.cpp @@ -1673,16 +1673,16 @@ void CodeGenLLVM::create_offload_struct_for(OffloadedStmt *stmt, bool spmd) { auto coord_object = RuntimeObject(kLLVMPhysicalCoordinatesName, this, builder.get(), new_coordinates); - if (!get_current_program().config.packed) { + if (!prog->config.packed) { for (int i = 0; i < snode->num_active_indices; i++) { auto j = snode->physical_index_position[i]; - if (!bit::is_power_of_two(snode->extractors[j].num_elements)) { + if (!bit::is_power_of_two(snode->extractors[j].num_elements_from_root)) { auto coord = coord_object.get("val", tlctx->get_constant(j)); exec_cond = builder->CreateAnd( exec_cond, builder->CreateICmp( llvm::CmpInst::ICMP_SLT, coord, - tlctx->get_constant(snode->extractors[j].num_elements))); + tlctx->get_constant(snode->extractors[j].num_elements_from_root))); } } } diff --git a/taichi/ir/snode.cpp b/taichi/ir/snode.cpp index 3389d98e6..60d6da07a 100644 --- a/taichi/ir/snode.cpp +++ b/taichi/ir/snode.cpp @@ -35,33 +35,13 @@ SNode &SNode::create_node(std::vector indices, "hashed node must be child of root due to initialization " "memset limitation."); auto &new_node = insert_children(type); - new_node.n = 1; - for (int i = 0; i < sizes.size(); i++) { - auto s = sizes[i]; - TI_ASSERT(sizes[i] > 0); - if (!bit::is_power_of_two(s)) { - auto promoted_s = bit::least_pot_bound(s); - TI_DEBUG("Non-power-of-two node size {} promoted to {}.", s, promoted_s); - s = promoted_s; - } - TI_ASSERT(bit::is_power_of_two(s)); - if (get_current_program().config.packed) { - new_node.n *= sizes[i]; - } else { - new_node.n *= s; - } - } for (int i = 0; i < (int)indices.size(); i++) { + TI_ASSERT(sizes[i] > 0); auto &ind = indices[i]; new_node.extractors[ind.value].activate( bit::log2int(bit::least_pot_bound(sizes[i]))); - new_node.extractors[ind.value].num_elements = sizes[i]; - if (get_current_program().config.packed) { - new_node.extractors[ind.value].shape = sizes[i]; - } else { - new_node.extractors[ind.value].shape = - 1 << new_node.extractors[ind.value].num_bits; - } + new_node.extractors[ind.value].shape = sizes[i]; + new_node.extractors[ind.value].num_elements_from_root = sizes[i]; } return new_node; } @@ -110,7 +90,7 @@ SNode *SNode::get_least_sparse_ancestor() const { int SNode::shape_along_axis(int i) const { const auto &extractor = extractors[physical_index_position[i]]; - return extractor.num_elements; + return extractor.num_elements_from_root; } SNode::SNode() : SNode(0, SNodeType::undefined) { diff --git a/taichi/ir/snode.h b/taichi/ir/snode.h index c43ffb6ac..060acc332 100644 --- a/taichi/ir/snode.h +++ b/taichi/ir/snode.h @@ -34,34 +34,34 @@ class Index { */ struct IndexExtractor { /** - * Shape at the given index. + * Number of elements from root at this index. * - * This is the raw shape, *not* padded to power-of-two (POT). + * This is the raw number, *not* padded to power-of-two (POT). */ - int num_elements{1}; + int num_elements_from_root{1}; /** - * POT shape or packed shape according to the config. + * Shape at this index (POT or packed) according to the config. */ int shape{1}; /** - * Accumulated shape from the last activated index. + * Accumulated shape from the last activated index to the first one. */ int acc_shape{1}; /** * Number of bits needed to store the coordinate at this index. * - * ceil(log2(num_elements)) + * ceil(log2(shape)) */ int num_bits{0}; /** * Accumulated offset from the last activated index to the first one. * - * This is the starting bit of this index in a linearized 1D coordiate. For + * This is the starting bit of this index in a linearized 1D coordinate. For * example, assuming an SNode of (ti.ijk, shape=(4, 8, 16)). ti.i takes 2 * bits, ti.j 3 bits and ti.k 4 bits. Then for a linearized coordinate: - * ti.k uses bits [0, 3), acc_offset=0 - * tk.j uses btis [3, 6), acc_offset=3 - * ti.i uses bits [6, 8), acc_offset=6 + * ti.k uses bits [0, 4), acc_offset=0 + * ti.j uses bits [4, 7), acc_offset=4 + * ti.i uses bits [7, 9), acc_offset=7 */ int acc_offset{0}; /** diff --git a/taichi/ir/transforms.h b/taichi/ir/transforms.h index 5b6059916..6b22cc475 100644 --- a/taichi/ir/transforms.h +++ b/taichi/ir/transforms.h @@ -106,7 +106,7 @@ bool replace_and_insert_statements( bool replace_statements(IRNode *root, std::function filter, std::function finder); -void demote_dense_struct_fors(IRNode *root); +void demote_dense_struct_fors(IRNode *root, bool packed); bool demote_atomics(IRNode *root, const CompileConfig &config); void reverse_segments(IRNode *root); // for autograd void detect_read_only(IRNode *root); diff --git a/taichi/program/program.cpp b/taichi/program/program.cpp index 0b11a2061..b8c88894e 100644 --- a/taichi/program/program.cpp +++ b/taichi/program/program.cpp @@ -429,7 +429,7 @@ void Program::initialize_llvm_runtime_snodes(const SNodeTree *tree, int Program::add_snode_tree(std::unique_ptr root) { const int id = snode_trees_.size(); - auto tree = std::make_unique(id, std::move(root)); + auto tree = std::make_unique(id, std::move(root), config.packed); tree->root()->set_snode_tree_id(id); materialize_snode_tree(tree.get()); snode_trees_.push_back(std::move(tree)); diff --git a/taichi/struct/snode_tree.cpp b/taichi/struct/snode_tree.cpp index f26a48543..4893b66ab 100644 --- a/taichi/struct/snode_tree.cpp +++ b/taichi/struct/snode_tree.cpp @@ -5,9 +5,9 @@ namespace taichi { namespace lang { -SNodeTree::SNodeTree(int id, std::unique_ptr root) +SNodeTree::SNodeTree(int id, std::unique_ptr root, bool packed) : id_(id), root_(std::move(root)) { - infer_snode_properties(*root_); + infer_snode_properties(*root_, packed); } } // namespace lang diff --git a/taichi/struct/snode_tree.h b/taichi/struct/snode_tree.h index 58f58f2d3..30ef34dc4 100644 --- a/taichi/struct/snode_tree.h +++ b/taichi/struct/snode_tree.h @@ -22,7 +22,7 @@ class SNodeTree { * @param id Id of the tree * @param root Root of the tree */ - explicit SNodeTree(int id, std::unique_ptr root); + explicit SNodeTree(int id, std::unique_ptr root, bool packed); int id() const { return id_; diff --git a/taichi/struct/struct.cpp b/taichi/struct/struct.cpp index 033060077..bfba1c013 100644 --- a/taichi/struct/struct.cpp +++ b/taichi/struct/struct.cpp @@ -9,12 +9,12 @@ namespace taichi { namespace lang { -void infer_snode_properties(SNode &snode) { +void infer_snode_properties(SNode &snode, bool packed) { for (int ch_id = 0; ch_id < (int)snode.ch.size(); ch_id++) { auto &ch = snode.ch[ch_id]; ch->parent = &snode; for (int i = 0; i < taichi_max_num_indices; i++) { - ch->extractors[i].num_elements *= snode.extractors[i].num_elements; + ch->extractors[i].num_elements_from_root *= snode.extractors[i].num_elements_from_root; bool found = false; for (int k = 0; k < taichi_max_num_indices; k++) { if (snode.physical_index_position[k] == i) { @@ -40,20 +40,27 @@ void infer_snode_properties(SNode &snode) { ch->is_bit_level = snode.is_bit_level; } - infer_snode_properties(*ch); + infer_snode_properties(*ch, packed); } // infer extractors - int acc_offsets = 0; - for (int i = taichi_max_num_indices - 1; i >= 0; i--) { - snode.extractors[i].acc_offset = acc_offsets; - acc_offsets += snode.extractors[i].num_bits; - } int acc_shape = 1; for (int i = taichi_max_num_indices - 1; i >= 0; i--) { + // if not in packed mode, pad shape to POT + if (!packed) { + snode.extractors[i].shape = 1 << snode.extractors[i].num_bits; + } snode.extractors[i].acc_shape = acc_shape; acc_shape *= snode.extractors[i].shape; } + snode.n = acc_shape; + // infer extractors (only for POT) + int acc_offsets = 0; + for (int i = taichi_max_num_indices - 1; i >= 0; i--) { + snode.extractors[i].acc_offset = acc_offsets; + acc_offsets += snode.extractors[i].num_bits; + } + snode.total_num_bits = acc_offsets; if (snode.type == SNodeType::dynamic) { int active_extractor_counder = 0; for (int i = 0; i < taichi_max_num_indices; i++) { @@ -72,10 +79,6 @@ void infer_snode_properties(SNode &snode) { "Dynamic SNode can have only one index extractor."); } - snode.total_num_bits = 0; - for (int i = 0; i < taichi_max_num_indices; i++) { - snode.total_num_bits += snode.extractors[i].num_bits; - } // The highest bit is for the sign. constexpr int kMaxTotalNumBits = 64; TI_ERROR_IF( diff --git a/taichi/struct/struct.h b/taichi/struct/struct.h index 7883fd12b..dd7be4af8 100644 --- a/taichi/struct/struct.h +++ b/taichi/struct/struct.h @@ -12,7 +12,7 @@ namespace lang { * * @param snode The root SNode to compute. */ -void infer_snode_properties(SNode &snode); +void infer_snode_properties(SNode &snode, bool packed); class StructCompiler { public: diff --git a/taichi/struct/struct_llvm.cpp b/taichi/struct/struct_llvm.cpp index 4faf531c3..5c32b8681 100644 --- a/taichi/struct/struct_llvm.cpp +++ b/taichi/struct/struct_llvm.cpp @@ -192,7 +192,7 @@ void StructCompilerLLVM::generate_refine_coordinates(SNode *snode) { auto outp_coords = args[1]; auto l = args[2]; - if (get_current_program().config.packed) { // no dependence on POT + if (config_->packed) { // no dependence on POT for (int i = 0; i < taichi_max_num_indices; i++) { auto addition = tlctx_->get_constant(0); if (snode->extractors[i].shape > 1) { diff --git a/taichi/transforms/compile_to_offloads.cpp b/taichi/transforms/compile_to_offloads.cpp index 367b7f2cb..24778f996 100644 --- a/taichi/transforms/compile_to_offloads.cpp +++ b/taichi/transforms/compile_to_offloads.cpp @@ -173,7 +173,7 @@ void offload_to_executable(IRNode *ir, irpass::analysis::verify(ir); if (config.demote_dense_struct_fors) { - irpass::demote_dense_struct_fors(ir); + irpass::demote_dense_struct_fors(ir, config.packed); irpass::type_check(ir, config); print("Dense struct-for demoted"); irpass::analysis::verify(ir); diff --git a/taichi/transforms/demote_dense_struct_fors.cpp b/taichi/transforms/demote_dense_struct_fors.cpp index de6cfd6cb..2bc995547 100644 --- a/taichi/transforms/demote_dense_struct_fors.cpp +++ b/taichi/transforms/demote_dense_struct_fors.cpp @@ -10,7 +10,7 @@ namespace { using TaskType = OffloadedStmt::TaskType; -void convert_to_range_for(OffloadedStmt *offloaded) { +void convert_to_range_for(OffloadedStmt *offloaded, bool packed) { TI_ASSERT(offloaded->task_type == TaskType::struct_for); std::vector snodes; @@ -65,7 +65,7 @@ void convert_to_range_for(OffloadedStmt *offloaded) { Stmt *test = body_header.push_back(TypedConstant(-1)); bool has_test = false; - if (get_current_program().config.packed) { // no dependence on POT + if (packed) { // no dependence on POT for (int i = 0; i < (int)snodes.size(); i++) { auto snode = snodes[i]; auto extracted = generate_mod_x_div_y(&body_header, main_loop_var, @@ -116,7 +116,8 @@ void convert_to_range_for(OffloadedStmt *offloaded) { for (int j = 0; j < (int)physical_indices.size(); j++) { auto p = physical_indices[j]; start_bits[p] -= snode->extractors[p].num_bits; - auto num_elements = snode->extractors[p].num_elements << start_bits[p]; + auto num_elements = snode->extractors[p].num_elements_from_root + << start_bits[p]; if (!bit::is_power_of_two(num_elements)) { has_test = true; auto bound = @@ -165,10 +166,10 @@ void convert_to_range_for(OffloadedStmt *offloaded) { offloaded->task_type = TaskType::range_for; } -void maybe_convert(OffloadedStmt *stmt) { +void maybe_convert(OffloadedStmt *stmt, bool packed) { if ((stmt->task_type == TaskType::struct_for) && stmt->snode->is_path_all_dense) { - convert_to_range_for(stmt); + convert_to_range_for(stmt, packed); } } @@ -176,15 +177,15 @@ void maybe_convert(OffloadedStmt *stmt) { namespace irpass { -void demote_dense_struct_fors(IRNode *root) { +void demote_dense_struct_fors(IRNode *root, bool packed) { if (auto *block = root->cast()) { for (auto &s_ : block->statements) { if (auto *s = s_->cast()) { - maybe_convert(s); + maybe_convert(s, packed); } } } else if (auto *s = root->cast()) { - maybe_convert(s); + maybe_convert(s, packed); } re_id(root); } diff --git a/taichi/transforms/lower_access.cpp b/taichi/transforms/lower_access.cpp index 299bfe53b..35f5c3adb 100644 --- a/taichi/transforms/lower_access.cpp +++ b/taichi/transforms/lower_access.cpp @@ -46,11 +46,12 @@ class LowerAccess : public IRVisitor { StructForStmt *current_struct_for; const std::vector &kernel_forces_no_activate; bool lower_atomic_ptr; + bool packed; LowerAccess(const std::vector &kernel_forces_no_activate, - bool lower_atomic_ptr) + bool lower_atomic_ptr, bool packed) : kernel_forces_no_activate(kernel_forces_no_activate), - lower_atomic_ptr(lower_atomic_ptr) { + lower_atomic_ptr(lower_atomic_ptr), packed(packed) { // TODO: change this to false allow_undefined_visitor = true; current_struct_for = nullptr; @@ -100,7 +101,7 @@ class LowerAccess : public IRVisitor { } PtrLowererImpl lowerer{leaf_snode, indices, snode_op, is_bit_vectorized, - lowered}; + lowered, packed}; lowerer.set_pointer_needs_activation(pointer_needs_activation); lowerer.set_lower_access(this); lowerer.run(); @@ -211,8 +212,9 @@ class LowerAccess : public IRVisitor { static bool run(IRNode *node, const std::vector &kernel_forces_no_activate, - bool lower_atomic) { - LowerAccess inst(kernel_forces_no_activate, lower_atomic); + bool lower_atomic, + bool packed) { + LowerAccess inst(kernel_forces_no_activate, lower_atomic, packed); bool modified = false; while (true) { node->accept(&inst); @@ -304,7 +306,7 @@ bool lower_access(IRNode *root, const CompileConfig &config, const LowerAccessPass::Args &args) { bool modified = - LowerAccess::run(root, args.kernel_forces_no_activate, args.lower_atomic); + LowerAccess::run(root, args.kernel_forces_no_activate, args.lower_atomic, config.packed); type_check(root, config); return modified; } diff --git a/taichi/transforms/scalar_pointer_lowerer.cpp b/taichi/transforms/scalar_pointer_lowerer.cpp index 10caf04ce..61cce2aa6 100644 --- a/taichi/transforms/scalar_pointer_lowerer.cpp +++ b/taichi/transforms/scalar_pointer_lowerer.cpp @@ -5,7 +5,6 @@ #include "taichi/ir/analysis.h" #include "taichi/ir/snode.h" #include "taichi/ir/statements.h" -#include "taichi/program/program.h" #include "taichi/transforms/scalar_pointer_lowerer.h" #include "taichi/transforms/utils.h" @@ -16,11 +15,13 @@ ScalarPointerLowerer::ScalarPointerLowerer(SNode *leaf_snode, const std::vector &indices, const SNodeOpType snode_op, const bool is_bit_vectorized, - VecStatement *lowered) + VecStatement *lowered, + const bool packed) : indices_(indices), snode_op_(snode_op), is_bit_vectorized_(is_bit_vectorized), - lowered_(lowered) { + lowered_(lowered), + packed_(packed) { for (auto *s = leaf_snode; s != nullptr; s = s->parent) { snodes_.push_back(s); } @@ -75,7 +76,7 @@ void ScalarPointerLowerer::run() { if (k < 0) continue; Stmt *extracted; - if (get_current_program().config.packed) { // no dependence on POT + if (packed_) { // no dependence on POT const int prev = total_shape[k]; total_shape[k] /= snode->extractors[k].shape; const int next = total_shape[k]; diff --git a/taichi/transforms/scalar_pointer_lowerer.h b/taichi/transforms/scalar_pointer_lowerer.h index 574e9c9e6..a763ad3a6 100644 --- a/taichi/transforms/scalar_pointer_lowerer.h +++ b/taichi/transforms/scalar_pointer_lowerer.h @@ -31,7 +31,8 @@ class ScalarPointerLowerer { const std::vector &indices, const SNodeOpType snode_op, const bool is_bit_vectorized, - VecStatement *lowered); + VecStatement *lowered, + const bool packed); virtual ~ScalarPointerLowerer() = default; /** @@ -67,6 +68,7 @@ class ScalarPointerLowerer { const SNodeOpType snode_op_; const bool is_bit_vectorized_; VecStatement *const lowered_; + const bool packed_; private: std::vector snodes_; diff --git a/tests/cpp/codegen/refine_coordinates_test.cpp b/tests/cpp/codegen/refine_coordinates_test.cpp index 5c04809bc..918ff832f 100644 --- a/tests/cpp/codegen/refine_coordinates_test.cpp +++ b/tests/cpp/codegen/refine_coordinates_test.cpp @@ -103,6 +103,7 @@ class RefineCoordinatesTest : public ::testing::Test { protected: void SetUp() override { arch_ = host_arch(); + config_.packed = false; config_.print_kernel_llvm_ir = false; prog_ = std::make_unique(arch_); tlctx_ = prog_->llvm_context_host.get(); diff --git a/tests/cpp/struct/fake_struct_compiler.h b/tests/cpp/struct/fake_struct_compiler.h index e409a01a1..05ab97f25 100644 --- a/tests/cpp/struct/fake_struct_compiler.h +++ b/tests/cpp/struct/fake_struct_compiler.h @@ -12,7 +12,7 @@ class FakeStructCompiler : public StructCompiler { } void run(SNode &root) override { - infer_snode_properties(root); + infer_snode_properties(root, false); } }; diff --git a/tests/cpp/transforms/scalar_pointer_lowerer_test.cpp b/tests/cpp/transforms/scalar_pointer_lowerer_test.cpp index 7de569a3c..fd9b8d6e3 100644 --- a/tests/cpp/transforms/scalar_pointer_lowerer_test.cpp +++ b/tests/cpp/transforms/scalar_pointer_lowerer_test.cpp @@ -62,7 +62,7 @@ TEST_F(ScalarPointerLowererTest, Basic) { LowererImpl lowerer{leaf_snode_, std::vector{builder.get_int32(loop_index)}, SNodeOpType::undefined, - /*is_bit_vectorized=*/false, &lowered}; + /*is_bit_vectorized=*/false, &lowered, /*packed=*/false}; lowerer.run(); // There are three linearized stmts: // 0: for root From e001e658cf39e78116e2f5fa41b4fa78cdf5db37 Mon Sep 17 00:00:00 2001 From: Yi Xu Date: Tue, 20 Jul 2021 17:32:57 +0800 Subject: [PATCH 13/24] Fix variable name in metal --- taichi/backends/metal/kernel_manager.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/taichi/backends/metal/kernel_manager.cpp b/taichi/backends/metal/kernel_manager.cpp index fc48b7f01..2316de42a 100644 --- a/taichi/backends/metal/kernel_manager.cpp +++ b/taichi/backends/metal/kernel_manager.cpp @@ -816,9 +816,9 @@ class KernelManager::Impl { const auto &ext = sn->extractors[j]; rtm_ext->extractors[j].num_bits = ext.num_bits; rtm_ext->extractors[j].acc_offset = ext.acc_offset; - rtm_ext->extractors[j].num_elements = ext.num_elements; - TI_DEBUG(" [{}] num_bits={} acc_offset={} num_elements={}", j, - ext.num_bits, ext.acc_offset, ext.num_elements); + rtm_ext->extractors[j].num_elements_from_root = ext.num_elements_from_root; + TI_DEBUG(" [{}] num_bits={} acc_offset={} num_elements_from_root={}", j, + ext.num_bits, ext.acc_offset, ext.num_elements_from_root); } TI_DEBUG(""); } From e8399dfc78c65e61d64eb2ca94c0fad455c8b5cd Mon Sep 17 00:00:00 2001 From: Taichi Gardener Date: Tue, 20 Jul 2021 06:25:11 +0000 Subject: [PATCH 14/24] Auto Format --- taichi/codegen/codegen_llvm.cpp | 11 ++++++----- taichi/struct/struct.cpp | 3 ++- taichi/transforms/lower_access.cpp | 14 ++++++++------ .../cpp/transforms/scalar_pointer_lowerer_test.cpp | 4 +++- tests/python/test_packed_size.py | 2 +- 5 files changed, 20 insertions(+), 14 deletions(-) diff --git a/taichi/codegen/codegen_llvm.cpp b/taichi/codegen/codegen_llvm.cpp index e4a11d9e7..3e1bd8c05 100644 --- a/taichi/codegen/codegen_llvm.cpp +++ b/taichi/codegen/codegen_llvm.cpp @@ -1676,13 +1676,14 @@ void CodeGenLLVM::create_offload_struct_for(OffloadedStmt *stmt, bool spmd) { if (!prog->config.packed) { for (int i = 0; i < snode->num_active_indices; i++) { auto j = snode->physical_index_position[i]; - if (!bit::is_power_of_two(snode->extractors[j].num_elements_from_root)) { + if (!bit::is_power_of_two( + snode->extractors[j].num_elements_from_root)) { auto coord = coord_object.get("val", tlctx->get_constant(j)); exec_cond = builder->CreateAnd( - exec_cond, - builder->CreateICmp( - llvm::CmpInst::ICMP_SLT, coord, - tlctx->get_constant(snode->extractors[j].num_elements_from_root))); + exec_cond, builder->CreateICmp( + llvm::CmpInst::ICMP_SLT, coord, + tlctx->get_constant( + snode->extractors[j].num_elements_from_root))); } } } diff --git a/taichi/struct/struct.cpp b/taichi/struct/struct.cpp index bfba1c013..d4b8355e7 100644 --- a/taichi/struct/struct.cpp +++ b/taichi/struct/struct.cpp @@ -14,7 +14,8 @@ void infer_snode_properties(SNode &snode, bool packed) { auto &ch = snode.ch[ch_id]; ch->parent = &snode; for (int i = 0; i < taichi_max_num_indices; i++) { - ch->extractors[i].num_elements_from_root *= snode.extractors[i].num_elements_from_root; + ch->extractors[i].num_elements_from_root *= + snode.extractors[i].num_elements_from_root; bool found = false; for (int k = 0; k < taichi_max_num_indices; k++) { if (snode.physical_index_position[k] == i) { diff --git a/taichi/transforms/lower_access.cpp b/taichi/transforms/lower_access.cpp index 35f5c3adb..c2c44562a 100644 --- a/taichi/transforms/lower_access.cpp +++ b/taichi/transforms/lower_access.cpp @@ -49,9 +49,11 @@ class LowerAccess : public IRVisitor { bool packed; LowerAccess(const std::vector &kernel_forces_no_activate, - bool lower_atomic_ptr, bool packed) + bool lower_atomic_ptr, + bool packed) : kernel_forces_no_activate(kernel_forces_no_activate), - lower_atomic_ptr(lower_atomic_ptr), packed(packed) { + lower_atomic_ptr(lower_atomic_ptr), + packed(packed) { // TODO: change this to false allow_undefined_visitor = true; current_struct_for = nullptr; @@ -100,8 +102,8 @@ class LowerAccess : public IRVisitor { TI_ASSERT(!pointer_needs_activation); } - PtrLowererImpl lowerer{leaf_snode, indices, snode_op, is_bit_vectorized, - lowered, packed}; + PtrLowererImpl lowerer{leaf_snode, indices, snode_op, + is_bit_vectorized, lowered, packed}; lowerer.set_pointer_needs_activation(pointer_needs_activation); lowerer.set_lower_access(this); lowerer.run(); @@ -305,8 +307,8 @@ namespace irpass { bool lower_access(IRNode *root, const CompileConfig &config, const LowerAccessPass::Args &args) { - bool modified = - LowerAccess::run(root, args.kernel_forces_no_activate, args.lower_atomic, config.packed); + bool modified = LowerAccess::run(root, args.kernel_forces_no_activate, + args.lower_atomic, config.packed); type_check(root, config); return modified; } diff --git a/tests/cpp/transforms/scalar_pointer_lowerer_test.cpp b/tests/cpp/transforms/scalar_pointer_lowerer_test.cpp index fd9b8d6e3..9a5d3a87e 100644 --- a/tests/cpp/transforms/scalar_pointer_lowerer_test.cpp +++ b/tests/cpp/transforms/scalar_pointer_lowerer_test.cpp @@ -62,7 +62,9 @@ TEST_F(ScalarPointerLowererTest, Basic) { LowererImpl lowerer{leaf_snode_, std::vector{builder.get_int32(loop_index)}, SNodeOpType::undefined, - /*is_bit_vectorized=*/false, &lowered, /*packed=*/false}; + /*is_bit_vectorized=*/false, + &lowered, + /*packed=*/false}; lowerer.run(); // There are three linearized stmts: // 0: for root diff --git a/tests/python/test_packed_size.py b/tests/python/test_packed_size.py index 695f894bc..19837ec4f 100644 --- a/tests/python/test_packed_size.py +++ b/tests/python/test_packed_size.py @@ -5,4 +5,4 @@ def test_packed_size(): x = ti.field(ti.i32) ti.root.dense(ti.i, 20).dense(ti.ijk, 334).place(x) - assert x.snode.parent().parent().cell_size_bytes == 4 * 334 ** 3 + assert x.snode.parent().parent().cell_size_bytes == 4 * 334**3 From 07d8c34bb8401e842dc08593210a91ccc815640f Mon Sep 17 00:00:00 2001 From: Yi Xu Date: Wed, 21 Jul 2021 19:13:50 +0800 Subject: [PATCH 15/24] Fix variable name in metal --- taichi/backends/metal/shaders/runtime_structs.metal.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/taichi/backends/metal/shaders/runtime_structs.metal.h b/taichi/backends/metal/shaders/runtime_structs.metal.h index 171e8463f..9b3347809 100644 --- a/taichi/backends/metal/shaders/runtime_structs.metal.h +++ b/taichi/backends/metal/shaders/runtime_structs.metal.h @@ -117,7 +117,7 @@ STR( int32_t start = 0; int32_t num_bits = 0; int32_t acc_offset = 0; - int32_t num_elements = 0; + int32_t num_elements_from_root = 0; }; Extractor extractors[kTaichiMaxNumIndices]; From 825acd6d44f535d789bad935b6dcc4847ff4408a Mon Sep 17 00:00:00 2001 From: Yi Xu Date: Wed, 21 Jul 2021 22:06:06 +0800 Subject: [PATCH 16/24] Update presubmit.yml --- .github/workflows/presubmit.yml | 115 ++------------------------------ 1 file changed, 4 insertions(+), 111 deletions(-) diff --git a/.github/workflows/presubmit.yml b/.github/workflows/presubmit.yml index 265de5bc2..d9b5c21c6 100644 --- a/.github/workflows/presubmit.yml +++ b/.github/workflows/presubmit.yml @@ -4,51 +4,9 @@ on: types: [opened, synchronize, reopened] jobs: - title_format: - name: Check PR Title - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v2 - - uses: actions/setup-python@v2 - with: - python-version: 3.8 - - - name: Run PR Title Checker - run: | - pip install semver GitPython - python misc/ci_check_pr_title.py "$PR_TITLE" - env: - PR_TITLE: ${{ github.event.pull_request.title }} - - check_code_format: - name: Check Code Format - runs-on: ubuntu-latest - # This job will be required to pass before merging to master branch. - steps: - - uses: actions/checkout@v2 - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v2 - with: - python-version: 3.8 - - name: Check code format - run: | - git config user.email "taichigardener@gmail.com" - git config user.name "Taichi Gardener" - git checkout -b _fake_squash - git remote add upstream https://github.com/taichi-dev/taichi.git - git fetch upstream master - sudo apt-get install clang-format - python3 -m pip install --user yapf gitpython colorama isort - python3 python/taichi/code_format.py - git checkout -b _enforced_format - git commit -am "enforce code format" || true - # exit with 1 if there were differences: - git diff _fake_squash _enforced_format --exit-code - build_and_test_cpu_required: # This job will be required to pass before merging to master branch. name: Required Build and Test (CPU) - needs: check_code_format strategy: matrix: include: @@ -72,7 +30,7 @@ jobs: - name: Build run: | - TAICHI_REPO_DIR=`pwd` + export TAICHI_REPO_DIR=`pwd` export PATH=$TAICHI_REPO_DIR/taichi-llvm/bin/:$PATH export CXX=clang++ python misc/ci_setup.py ci @@ -81,7 +39,7 @@ jobs: - name: Test run: | - TAICHI_REPO_DIR=`pwd` + export TAICHI_REPO_DIR=`pwd` export PATH=$TAICHI_REPO_DIR/taichi-llvm/bin/:$PATH # Note we only need this since we cannot write into system python package. export PATH=$PATH:$HOME/.local/bin @@ -95,14 +53,9 @@ jobs: build_and_test_cpu: name: Build and Test (CPU) - needs: build_and_test_cpu_required strategy: matrix: include: - - os: macos-latest - python: 3.7 - with_cc: OFF - with_cpp_tests: ON - os: ubuntu-latest python: 3.9 with_cc: OFF @@ -129,7 +82,7 @@ jobs: - name: Build run: | - TAICHI_REPO_DIR=`pwd` + export TAICHI_REPO_DIR=`pwd` export PATH=$TAICHI_REPO_DIR/taichi-llvm/bin/:$PATH export CXX=clang++ python misc/ci_setup.py ci @@ -144,7 +97,7 @@ jobs: - name: Test run: | - TAICHI_REPO_DIR=`pwd` + export TAICHI_REPO_DIR=`pwd` export PATH=$TAICHI_REPO_DIR/taichi-llvm/bin/:$PATH export PATH=$PATH:$HOME/.local/bin hash -r @@ -159,7 +112,6 @@ jobs: build_and_test_gpu_linux: name: Build and Test (GPU) - needs: check_code_format runs-on: [self-hosted, cuda, cn] steps: - uses: actions/checkout@v2 @@ -185,62 +137,3 @@ jobs: $PYTHON examples/algorithm/laplace.py ti diagnose ti test -vr2 -t2 - - build_and_test_windows: - name: Build and Test (Windows) - needs: check_code_format - runs-on: windows-latest - steps: - - name: Install 7Zip PowerShell - shell: powershell - run: Install-Module 7Zip4PowerShell -Force -Verbose - - - uses: actions/checkout@v2 - - uses: actions/setup-python@v2 - with: - python-version: 3.7 - - - name: Add msbuild to PATH - uses: microsoft/setup-msbuild@v1.0.2 - - - name: Build - shell: powershell - run: | - $env:TAICHI_REPO_DIR = "D:\a\taichi\taichi" - $env:PYTHONPATH = "$env:TAICHI_REPO_DIR\python" - cd C:\ - Remove-item alias:curl - curl --retry 10 --retry-delay 5 https://github.com/taichi-dev/taichi_assets/releases/download/llvm10/taichi-llvm-10.0.0-msvc2019.zip -LO - 7z x taichi-llvm-10.0.0-msvc2019.zip -otaichi_llvm - curl --retry 10 --retry-delay 5 https://github.com/taichi-dev/taichi_assets/releases/download/llvm10/clang-10.0.0-win.zip -LO - 7z x clang-10.0.0-win.zip -otaichi_clang - $env:PATH += ";C:\taichi_llvm\bin" - $env:PATH += ";C:\taichi_clang\bin" - $env:PATH += ";$env:TAICHI_REPO_DIR\bin" - clang --version - cd D:\a\taichi\taichi - python -m pip install numpy - python -m pip install pybind11 - python misc/ci_setup.py ci - mkdir build - cd build - cmake .. -G"Visual Studio 16 2019" -A x64 -DPYTHON_EXECUTABLE="$env:PYTHON" -DLLVM_DIR="C:\taichi_llvm\lib\cmake\llvm" - msbuild /p:Configuration=RelWithDebInfo /p:Platform=x64 /m taichi.sln - cd .. - env: - PYTHON: C:\hostedtoolcache\windows\Python\3.7.9\x64\python.exe - - - name: Test - shell: powershell - run: | - $env:TAICHI_REPO_DIR = "D:\a\taichi\taichi" - $env:PYTHONPATH = "$env:TAICHI_REPO_DIR\python" - $env:PATH += ";C:\taichi_llvm\bin" - $env:PATH += ";C:\taichi_clang\bin" - $env:PATH += ";$env:TAICHI_REPO_DIR\bin" - python -c "import taichi" - python examples/algorithm/laplace.py - python bin/taichi diagnose - python bin/taichi test -Cvr2 -t2 - env: - PYTHON: C:\hostedtoolcache\windows\Python\3.7.9\x64\python.exe From c8923f1cd4ebfe6ad5774992e6c0ac16725c32ae Mon Sep 17 00:00:00 2001 From: Yi Xu Date: Wed, 21 Jul 2021 22:09:28 +0800 Subject: [PATCH 17/24] Update presubmit.yml --- .github/workflows/presubmit.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/presubmit.yml b/.github/workflows/presubmit.yml index d9b5c21c6..0110392e5 100644 --- a/.github/workflows/presubmit.yml +++ b/.github/workflows/presubmit.yml @@ -119,6 +119,7 @@ jobs: - name: Build run: | git --version + export TAICHI_REPO_DIR=`pwd` export PATH=/home/github/taichi-llvm/bin/:$PATH export CXX=clang++-8 export PYTHON=/usr/bin/python3 From c28570efdddff9d3297c4761709d581284cac25a Mon Sep 17 00:00:00 2001 From: Taichi Gardener Date: Wed, 21 Jul 2021 15:08:15 +0000 Subject: [PATCH 18/24] Auto Format --- taichi/backends/metal/kernel_manager.cpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/taichi/backends/metal/kernel_manager.cpp b/taichi/backends/metal/kernel_manager.cpp index 2316de42a..6b311564d 100644 --- a/taichi/backends/metal/kernel_manager.cpp +++ b/taichi/backends/metal/kernel_manager.cpp @@ -816,9 +816,10 @@ class KernelManager::Impl { const auto &ext = sn->extractors[j]; rtm_ext->extractors[j].num_bits = ext.num_bits; rtm_ext->extractors[j].acc_offset = ext.acc_offset; - rtm_ext->extractors[j].num_elements_from_root = ext.num_elements_from_root; - TI_DEBUG(" [{}] num_bits={} acc_offset={} num_elements_from_root={}", j, - ext.num_bits, ext.acc_offset, ext.num_elements_from_root); + rtm_ext->extractors[j].num_elements_from_root = + ext.num_elements_from_root; + TI_DEBUG(" [{}] num_bits={} acc_offset={} num_elements_from_root={}", + j, ext.num_bits, ext.acc_offset, ext.num_elements_from_root); } TI_DEBUG(""); } From f4393d1b404e40a367d9ebed112b7b993357a36a Mon Sep 17 00:00:00 2001 From: Yi Xu Date: Wed, 21 Jul 2021 23:36:44 +0800 Subject: [PATCH 19/24] Update presubmit.yml --- .github/workflows/presubmit.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.github/workflows/presubmit.yml b/.github/workflows/presubmit.yml index 0110392e5..4b10426d0 100644 --- a/.github/workflows/presubmit.yml +++ b/.github/workflows/presubmit.yml @@ -56,6 +56,10 @@ jobs: strategy: matrix: include: + - os: macos-latest + python: 3.7 + with_cc: OFF + with_cpp_tests: ON - os: ubuntu-latest python: 3.9 with_cc: OFF From fded842f46824a5548ce5ae4ba59f5cbf379d60d Mon Sep 17 00:00:00 2001 From: Yu Fang Date: Thu, 22 Jul 2021 11:31:49 +0800 Subject: [PATCH 20/24] reproduce --- .github/workflows/presubmit.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/presubmit.yml b/.github/workflows/presubmit.yml index 4b10426d0..18fdecae1 100644 --- a/.github/workflows/presubmit.yml +++ b/.github/workflows/presubmit.yml @@ -86,7 +86,7 @@ jobs: - name: Build run: | - export TAICHI_REPO_DIR=`pwd` + TAICHI_REPO_DIR=`pwd` export PATH=$TAICHI_REPO_DIR/taichi-llvm/bin/:$PATH export CXX=clang++ python misc/ci_setup.py ci From df4998e8f4ce3f5a9df2b3d60f62c8d07b19a056 Mon Sep 17 00:00:00 2001 From: Yu Fang Date: Thu, 22 Jul 2021 12:27:40 +0800 Subject: [PATCH 21/24] update --- .github/workflows/presubmit.yml | 35 +++------------------------------ 1 file changed, 3 insertions(+), 32 deletions(-) diff --git a/.github/workflows/presubmit.yml b/.github/workflows/presubmit.yml index 18fdecae1..a451d6529 100644 --- a/.github/workflows/presubmit.yml +++ b/.github/workflows/presubmit.yml @@ -30,7 +30,7 @@ jobs: - name: Build run: | - export TAICHI_REPO_DIR=`pwd` + TAICHI_REPO_DIR=`pwd` export PATH=$TAICHI_REPO_DIR/taichi-llvm/bin/:$PATH export CXX=clang++ python misc/ci_setup.py ci @@ -39,7 +39,7 @@ jobs: - name: Test run: | - export TAICHI_REPO_DIR=`pwd` + TAICHI_REPO_DIR=`pwd` export PATH=$TAICHI_REPO_DIR/taichi-llvm/bin/:$PATH # Note we only need this since we cannot write into system python package. export PATH=$PATH:$HOME/.local/bin @@ -101,7 +101,7 @@ jobs: - name: Test run: | - export TAICHI_REPO_DIR=`pwd` + TAICHI_REPO_DIR=`pwd` export PATH=$TAICHI_REPO_DIR/taichi-llvm/bin/:$PATH export PATH=$PATH:$HOME/.local/bin hash -r @@ -113,32 +113,3 @@ jobs: ti test -vr2 -t2 env: RUN_CPP_TESTS: ${{ matrix.with_cpp_tests }} - - build_and_test_gpu_linux: - name: Build and Test (GPU) - runs-on: [self-hosted, cuda, cn] - steps: - - uses: actions/checkout@v2 - - - name: Build - run: | - git --version - export TAICHI_REPO_DIR=`pwd` - export PATH=/home/github/taichi-llvm/bin/:$PATH - export CXX=clang++-8 - export PYTHON=/usr/bin/python3 - $PYTHON misc/ci_setup.py ci - env: - CI_SETUP_CMAKE_ARGS: -DTI_WITH_OPENGL:BOOL=ON -DTI_WITH_CC:BOOL=OFF - - - name: Test - run: | - export PYTHON=/usr/bin/python3 - export PATH=/home/github/taichi-llvm/bin/:$PATH - export PATH=$PATH:$HOME/.local/bin - export DISPLAY=:1 - hash -r - glewinfo - $PYTHON examples/algorithm/laplace.py - ti diagnose - ti test -vr2 -t2 From e191bd959dabc36bac55e2c6a65181a25784e3d2 Mon Sep 17 00:00:00 2001 From: Yu Fang Date: Thu, 22 Jul 2021 13:33:53 +0800 Subject: [PATCH 22/24] update --- CMakeLists.txt | 3 +++ 1 file changed, 3 insertions(+) diff --git a/CMakeLists.txt b/CMakeLists.txt index 181ed4b34..3d941f959 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -96,6 +96,9 @@ foreach(arch IN LISTS HOST_ARCH CUDA_ARCH) "generate_llvm_runtime_${arch}" COMMAND ${CLANG_EXECUTABLE} -S runtime.cpp -o runtime.ll -fno-exceptions -emit-llvm -std=c++17 -D "ARCH_${arch}" -I ${PROJECT_SOURCE_DIR}; COMMAND ${LLVM_AS_EXECUTABLE} runtime.ll -o "runtime_${arch}.bc" + COMMAND echo "==================================================================================" + COMMAND cat runtime.ll + COMMAND echo "==================================================================================" WORKING_DIRECTORY "${PROJECT_SOURCE_DIR}/taichi/runtime/llvm" ) add_dependencies(${CORE_LIBRARY_NAME} "generate_llvm_runtime_${arch}") From 849c9c851c37ec6af13fc0d20f62f28071d6c2e6 Mon Sep 17 00:00:00 2001 From: Yu Fang Date: Thu, 22 Jul 2021 14:01:17 +0800 Subject: [PATCH 23/24] resolve write conflict --- CMakeLists.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 3d941f959..631336165 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -94,8 +94,8 @@ endif() foreach(arch IN LISTS HOST_ARCH CUDA_ARCH) add_custom_target( "generate_llvm_runtime_${arch}" - COMMAND ${CLANG_EXECUTABLE} -S runtime.cpp -o runtime.ll -fno-exceptions -emit-llvm -std=c++17 -D "ARCH_${arch}" -I ${PROJECT_SOURCE_DIR}; - COMMAND ${LLVM_AS_EXECUTABLE} runtime.ll -o "runtime_${arch}.bc" + COMMAND ${CLANG_EXECUTABLE} -S runtime.cpp -o runtime_${arch}.ll -fno-exceptions -emit-llvm -std=c++17 -D "ARCH_${arch}" -I ${PROJECT_SOURCE_DIR}; + COMMAND ${LLVM_AS_EXECUTABLE} runtime_${arch}.ll -o "runtime_${arch}.bc" COMMAND echo "==================================================================================" COMMAND cat runtime.ll COMMAND echo "==================================================================================" From dd3b0305c49b577102786eb1c24c590ef160bc30 Mon Sep 17 00:00:00 2001 From: Yu Fang Date: Thu, 22 Jul 2021 14:04:13 +0800 Subject: [PATCH 24/24] update --- CMakeLists.txt | 3 --- 1 file changed, 3 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 631336165..990ff3489 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -96,9 +96,6 @@ foreach(arch IN LISTS HOST_ARCH CUDA_ARCH) "generate_llvm_runtime_${arch}" COMMAND ${CLANG_EXECUTABLE} -S runtime.cpp -o runtime_${arch}.ll -fno-exceptions -emit-llvm -std=c++17 -D "ARCH_${arch}" -I ${PROJECT_SOURCE_DIR}; COMMAND ${LLVM_AS_EXECUTABLE} runtime_${arch}.ll -o "runtime_${arch}.bc" - COMMAND echo "==================================================================================" - COMMAND cat runtime.ll - COMMAND echo "==================================================================================" WORKING_DIRECTORY "${PROJECT_SOURCE_DIR}/taichi/runtime/llvm" ) add_dependencies(${CORE_LIBRARY_NAME} "generate_llvm_runtime_${arch}")