Skip to content

Commit aaba2a6

Browse files
committed
initial proposal based on TC needs
1 parent 842cba3 commit aaba2a6

File tree

2 files changed

+184
-0
lines changed

2 files changed

+184
-0
lines changed

high_level.h

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
#include <isl/interface/cpp.h>
2+
3+
//
4+
// High-level interface.
5+
//
6+
7+
// Static control part.
8+
// Names of fields are self-explanatory.
9+
// Access relations are tagged, that is live in the spaces
10+
// [S1[...] -> __ref_ID[]] -> arrayID[...]
11+
// where __ref_ID are unique identifiers.
12+
//
13+
// External users have statement-specific information stored either
14+
// (a) separately in a map<isl::id, CustomStmtClass> or
15+
// (b) as user-pointers of statement ids using C interface (discouraged).
16+
struct Scop {
17+
isl::set context;
18+
isl::union_set domain;
19+
isl::union_map mayReads;
20+
isl::union_map mayWrites;
21+
isl::union_map mustWrites;
22+
isl::schedule schedule;
23+
};
24+
25+
// Map "scop" to a grid of GPU thread blocks described by "grid" and "block"
26+
// starting from schedule node "node" and using the end-to-end mapping
27+
// strategy. When tiling is performed, use sizes provided in "tiles".
28+
// The return value indicates whether the mapping was performed.
29+
// Mapping strategy is allowed to decrease the block and grid size to avoid
30+
// launching empty blocks or threads, in which case the values in "block" and
31+
// "grid" are updated with the new sizes.
32+
//
33+
// The underlying strategy may change at any time as long as the same types of
34+
// trees can be mapped.
35+
//
36+
// Node is not necessarily a band node.
37+
//
38+
// Trailing ones in "block" and "grid" may be interpreted as not mapping to the
39+
// corresponding thread or block dimensions. All values are strictly positive.
40+
bool mapToGPU(
41+
Scop& scop,
42+
isl::schedule_node node,
43+
const std::vector<long>& tiles,
44+
std::array<long, 3>& block,
45+
std::array<long, 3>& grid);
46+
47+

low_level.h

Lines changed: 137 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,137 @@
1+
#include <isl/interface/cpp.h>
2+
3+
//
4+
// Low-level interface.
5+
//
6+
7+
class Scop;
8+
9+
// Syntactic identifiers for threads.
10+
enum class Thread {
11+
x = 0,
12+
y,
13+
z
14+
};
15+
16+
// Syntactic identifiers for blocks.
17+
enum class Block {
18+
x = 0,
19+
y,
20+
z
21+
};
22+
23+
// Mark the schedule node "node" as the first node in a GPU kernel by inserting
24+
// a mark node with a unique identifier, followed by an eventual guard node and
25+
// a context node (referred to as kernel context below). The kernel is
26+
// expected to be launched on a grid of thread blocks described by "grid" and
27+
// "block" sizes. The grid configuration is stored in the kernel context. It
28+
// will be used when mapping band members to blocks/threads.
29+
isl::schedule_node_mark initKernel(Scop& scop, isl::schedule_node node,
30+
const std::array<long, 3>& block, const std::array<long, 3>& grid);
31+
32+
// Check whether the schedule node "node" is inside a kernel.
33+
bool isInKernel(isl::schedule_node node);
34+
35+
// Return the id of the kernel to which "node" belongs.
36+
isl::id kernelId(isl::schedule_node node);
37+
38+
// Return the grid of the kernel to which "node" belongs.
39+
std::array<long, 3> kernelGrid(isl::schedule_node node);
40+
41+
// Return the block size of the kernel to which "node" belongs.
42+
std::array<long, 3> kernelBlock(isl::schedule_node node);
43+
44+
// Check whether it is valid to map "dim"-th dimension of the band node "node"
45+
// to thread "t" (block "b") in the context of schedule to which "node"
46+
// belongs. For example, if children or ancestors of "node" already contain a
47+
// mapping to thread "t" (block "b"), if the dimension is parallel, etc.
48+
bool canMapBandDim(isl::schedule_node_band node, int dim, Thread t);
49+
bool canMapBandDim(isl::schedule_node_band node, int dim, Block b);
50+
51+
// Map "dim"-th dimension of the band node "node" to thread (block) dimension
52+
// "t" ("b"). This mapping is assumed to be valid, in particular that the band
53+
// is situated inside a kernel. The number of threads (blocks) to map to is
54+
// taken from the kernel context. Return the updated band node, its parents may
55+
// have changed due to mapping.
56+
//
57+
// Note: while it may be possible to map subtrees to a different number of
58+
// blocks (threads), it is often dangerous in practice. For example, inserting
59+
// a __syncthreads call under a condition involving thread identifiers leads to
60+
// undefined behavior. Therefore, we prefer to have a fixed number of threads
61+
// (blocks) per kernel. If it is strictly necessary to use less threads, the
62+
// caller can first strip-mine the band so as to have the required number of
63+
// threads and then map the inner band.
64+
isl::schedule_node_band mapBandDim(isl::schedule_node_band node, int dim,
65+
Thread t);
66+
isl::schedule_node_band mapBandDim(isl::schedule_node_band node, int dim,
67+
Block b);
68+
69+
// Map the subtree rooted at "node" to a single thread (block) along the thread
70+
// (block) dimension "t" ("b"). Return the updated node, its parents may have
71+
// changed due to mapping.
72+
isl::schedule_node mapFixed(isl::schedule_node node, Thread t);
73+
isl::schedule_node mapFixed(isl::schedule_node node, Block b);
74+
75+
// Get the mapping active at the subtree rooted at "node". In particular,
76+
// intersect all filters on the path from "node" to the root of schedule tree.
77+
// Some of these filters perform the mapping and the corresponding constraints
78+
// will appear in the resulting union set.
79+
// Ignores any mapping below "node".
80+
isl::union_set mapping(isl::schedule_node node);
81+
82+
// Check if the subtree rooted at "node" is mapped to thread (block) dimension
83+
// "t" ("b"). Only the mapping above "node" is taken into account.
84+
bool isMapped(isl::schedule_node node, Thread t);
85+
bool isMapped(isl::schedule_node node, Block b);
86+
87+
// Tile the band node "band" with sizes "tiles" and return the updated node.
88+
isl::schedule_node_band tile(isl::schedule_node_band band, const std::vector<long>& tiles);
89+
isl::schedule_node_band unroll(isl::schedule_node_band band, long limit);
90+
91+
// Finalize the mapping of a subtree rooted at "node" that represents a kernel.
92+
// In particular, ensure that all branches are mapped to the same number of
93+
// thread and block dimensions and that the appropriate synchronizations are
94+
// inserted. Optionally, change the kernel-level context to only include the
95+
// actually used threads (blocks).
96+
isl::schedule_node finalizeKernel(isl::schedule_node_mark node);
97+
98+
// An identifier of a group of references that must be promoted together
99+
// for validity reasons. All references are to the array identifiable by
100+
// "arrayId". Individual reference IDs are stored in "refIds".
101+
// The promotion is be scoped under a mark node identified by "markId", that
102+
// is, the extension subtree for copying to/from the promoted memory space is
103+
// inserted below the mark node.
104+
//
105+
// It is impossible to store an isl::schedule_node because a new schedule may
106+
// be created on each operation due to CoW.
107+
// It is impossible to store the schedule depth if we want the promotion to be
108+
// scoped at a sequence node, that is keep the promoted elements in memory for
109+
// a group of sibling subtrees.
110+
// Note: we may need to artificially separate a sequence (set) node into a
111+
// nested structure of sequence (set) nodes to restrict the promotion scope.
112+
struct ScopedReferenceGroup {
113+
isl::id markId;
114+
isl::id arrayId;
115+
isl::id_list refIds;
116+
117+
// Extra functions can be provided here, e.g. the size of the footprint in
118+
// promoted memory space.
119+
};
120+
121+
// Insert a mark node with a unique id that identifies a potential promotion
122+
// scope. The insertion takes place immediately above "node". Return the
123+
// inserted mark node.
124+
isl::schedule_node_mark definePromotionScope(isl::schedule_node node);
125+
126+
// For all promotion scopes defined in "scop", return the list of
127+
// ScopedReferenceGroup for which promotion to shared (private) memory is legal
128+
// given the current mapping and that the copies are inserted below the scoping
129+
// mark node.
130+
std::unordered_set<ScopedReferenceGroup> promotableToShared(const Scop& scop);
131+
std::unordered_set<ScopedReferenceGroup> promotableToPrivate(const Scop& scop);
132+
133+
// Promote the scoped reference group "group" to shared (private) memory in its
134+
// scope by modifying the schedule of "scop". May additionally keep track of
135+
// required declarations in each kernel inside "scop".
136+
void promoteToShared(Scop& scop, ScopedReferenceGroup group);
137+
void promoteToPrivate(Scop& scop, ScopedReferenceGroup group);

0 commit comments

Comments
 (0)