|
| 1 | +#include <isl/interface/cpp.h> |
| 2 | + |
| 3 | +// |
| 4 | +// Low-level interface. |
| 5 | +// |
| 6 | + |
| 7 | +class Scop; |
| 8 | + |
| 9 | +// Syntactic identifiers for threads. |
| 10 | +enum class Thread { |
| 11 | + x = 0, |
| 12 | + y, |
| 13 | + z |
| 14 | +}; |
| 15 | + |
| 16 | +// Syntactic identifiers for blocks. |
| 17 | +enum class Block { |
| 18 | + x = 0, |
| 19 | + y, |
| 20 | + z |
| 21 | +}; |
| 22 | + |
| 23 | +// Mark the schedule node "node" as the first node in a GPU kernel by inserting |
| 24 | +// a mark node with a unique identifier, followed by an eventual guard node and |
| 25 | +// a context node (referred to as kernel context below). The kernel is |
| 26 | +// expected to be launched on a grid of thread blocks described by "grid" and |
| 27 | +// "block" sizes. The grid configuration is stored in the kernel context. It |
| 28 | +// will be used when mapping band members to blocks/threads. |
| 29 | +isl::schedule_node_mark initKernel(Scop& scop, isl::schedule_node node, |
| 30 | + const std::array<long, 3>& block, const std::array<long, 3>& grid); |
| 31 | + |
| 32 | +// Check whether the schedule node "node" is inside a kernel. |
| 33 | +bool isInKernel(isl::schedule_node node); |
| 34 | + |
| 35 | +// Return the id of the kernel to which "node" belongs. |
| 36 | +isl::id kernelId(isl::schedule_node node); |
| 37 | + |
| 38 | +// Return the grid of the kernel to which "node" belongs. |
| 39 | +std::array<long, 3> kernelGrid(isl::schedule_node node); |
| 40 | + |
| 41 | +// Return the block size of the kernel to which "node" belongs. |
| 42 | +std::array<long, 3> kernelBlock(isl::schedule_node node); |
| 43 | + |
| 44 | +// Check whether it is valid to map "dim"-th dimension of the band node "node" |
| 45 | +// to thread "t" (block "b") in the context of schedule to which "node" |
| 46 | +// belongs. For example, if children or ancestors of "node" already contain a |
| 47 | +// mapping to thread "t" (block "b"), if the dimension is parallel, etc. |
| 48 | +bool canMapBandDim(isl::schedule_node_band node, int dim, Thread t); |
| 49 | +bool canMapBandDim(isl::schedule_node_band node, int dim, Block b); |
| 50 | + |
| 51 | +// Map "dim"-th dimension of the band node "node" to thread (block) dimension |
| 52 | +// "t" ("b"). This mapping is assumed to be valid, in particular that the band |
| 53 | +// is situated inside a kernel. The number of threads (blocks) to map to is |
| 54 | +// taken from the kernel context. Return the updated band node, its parents may |
| 55 | +// have changed due to mapping. |
| 56 | +// |
| 57 | +// Note: while it may be possible to map subtrees to a different number of |
| 58 | +// blocks (threads), it is often dangerous in practice. For example, inserting |
| 59 | +// a __syncthreads call under a condition involving thread identifiers leads to |
| 60 | +// undefined behavior. Therefore, we prefer to have a fixed number of threads |
| 61 | +// (blocks) per kernel. If it is strictly necessary to use less threads, the |
| 62 | +// caller can first strip-mine the band so as to have the required number of |
| 63 | +// threads and then map the inner band. |
| 64 | +isl::schedule_node_band mapBandDim(isl::schedule_node_band node, int dim, |
| 65 | + Thread t); |
| 66 | +isl::schedule_node_band mapBandDim(isl::schedule_node_band node, int dim, |
| 67 | + Block b); |
| 68 | + |
| 69 | +// Map the subtree rooted at "node" to a single thread (block) along the thread |
| 70 | +// (block) dimension "t" ("b"). Return the updated node, its parents may have |
| 71 | +// changed due to mapping. |
| 72 | +isl::schedule_node mapFixed(isl::schedule_node node, Thread t); |
| 73 | +isl::schedule_node mapFixed(isl::schedule_node node, Block b); |
| 74 | + |
| 75 | +// Get the mapping active at the subtree rooted at "node". In particular, |
| 76 | +// intersect all filters on the path from "node" to the root of schedule tree. |
| 77 | +// Some of these filters perform the mapping and the corresponding constraints |
| 78 | +// will appear in the resulting union set. |
| 79 | +// Ignores any mapping below "node". |
| 80 | +isl::union_set mapping(isl::schedule_node node); |
| 81 | + |
| 82 | +// Check if the subtree rooted at "node" is mapped to thread (block) dimension |
| 83 | +// "t" ("b"). Only the mapping above "node" is taken into account. |
| 84 | +bool isMapped(isl::schedule_node node, Thread t); |
| 85 | +bool isMapped(isl::schedule_node node, Block b); |
| 86 | + |
| 87 | +// Tile the band node "band" with sizes "tiles" and return the updated node. |
| 88 | +isl::schedule_node_band tile(isl::schedule_node_band band, const std::vector<long>& tiles); |
| 89 | +isl::schedule_node_band unroll(isl::schedule_node_band band, long limit); |
| 90 | + |
| 91 | +// Finalize the mapping of a subtree rooted at "node" that represents a kernel. |
| 92 | +// In particular, ensure that all branches are mapped to the same number of |
| 93 | +// thread and block dimensions and that the appropriate synchronizations are |
| 94 | +// inserted. Optionally, change the kernel-level context to only include the |
| 95 | +// actually used threads (blocks). |
| 96 | +isl::schedule_node finalizeKernel(isl::schedule_node_mark node); |
| 97 | + |
| 98 | +// An identifier of a group of references that must be promoted together |
| 99 | +// for validity reasons. All references are to the array identifiable by |
| 100 | +// "arrayId". Individual reference IDs are stored in "refIds". |
| 101 | +// The promotion is be scoped under a mark node identified by "markId", that |
| 102 | +// is, the extension subtree for copying to/from the promoted memory space is |
| 103 | +// inserted below the mark node. |
| 104 | +// |
| 105 | +// It is impossible to store an isl::schedule_node because a new schedule may |
| 106 | +// be created on each operation due to CoW. |
| 107 | +// It is impossible to store the schedule depth if we want the promotion to be |
| 108 | +// scoped at a sequence node, that is keep the promoted elements in memory for |
| 109 | +// a group of sibling subtrees. |
| 110 | +// Note: we may need to artificially separate a sequence (set) node into a |
| 111 | +// nested structure of sequence (set) nodes to restrict the promotion scope. |
| 112 | +struct ScopedReferenceGroup { |
| 113 | + isl::id markId; |
| 114 | + isl::id arrayId; |
| 115 | + isl::id_list refIds; |
| 116 | + |
| 117 | + // Extra functions can be provided here, e.g. the size of the footprint in |
| 118 | + // promoted memory space. |
| 119 | +}; |
| 120 | + |
| 121 | +// Insert a mark node with a unique id that identifies a potential promotion |
| 122 | +// scope. The insertion takes place immediately above "node". Return the |
| 123 | +// inserted mark node. |
| 124 | +isl::schedule_node_mark definePromotionScope(isl::schedule_node node); |
| 125 | + |
| 126 | +// For all promotion scopes defined in "scop", return the list of |
| 127 | +// ScopedReferenceGroup for which promotion to shared (private) memory is legal |
| 128 | +// given the current mapping and that the copies are inserted below the scoping |
| 129 | +// mark node. |
| 130 | +std::unordered_set<ScopedReferenceGroup> promotableToShared(const Scop& scop); |
| 131 | +std::unordered_set<ScopedReferenceGroup> promotableToPrivate(const Scop& scop); |
| 132 | + |
| 133 | +// Promote the scoped reference group "group" to shared (private) memory in its |
| 134 | +// scope by modifying the schedule of "scop". May additionally keep track of |
| 135 | +// required declarations in each kernel inside "scop". |
| 136 | +void promoteToShared(Scop& scop, ScopedReferenceGroup group); |
| 137 | +void promoteToPrivate(Scop& scop, ScopedReferenceGroup group); |
0 commit comments