Skip to content

Commit e1ef7c0

Browse files
author
Bruce J Palmer
committed
Added aggressive checking for contiguous data transfers in strided calls.
1 parent c2af7ec commit e1ef7c0

File tree

2 files changed

+126
-0
lines changed

2 files changed

+126
-0
lines changed

cmx/src-common/cmx_alloc.cpp

Lines changed: 107 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,12 @@ int Allocation::put(void *src, void *dst, int64_t bytes, int proc)
123123
int Allocation::puts(void *src, int64_t *src_stride, void *dst,
124124
int64_t *dst_stride, int64_t *count, int stride_levels, int proc)
125125
{
126+
if (checkContiguous(src_stride,dst_stride,count,stride_levels)) {
127+
int i;
128+
int64_t nbytes = 1;
129+
for (i=0; i<=stride_levels; i++) nbytes *= count[i];
130+
return p_allocation->put(src,dst,nbytes,proc);
131+
}
126132
return p_allocation->puts(src,src_stride,dst,dst_stride,count,
127133
stride_levels,proc);
128134
}
@@ -176,6 +182,12 @@ int Allocation::nbputs(void *src, int64_t *src_stride, void *dst,
176182
int64_t *dst_stride, int64_t *count, int stride_levels, int proc,
177183
cmx_request* req)
178184
{
185+
if (checkContiguous(src_stride,dst_stride,count,stride_levels)) {
186+
int i;
187+
int64_t nbytes = 1;
188+
for (i=0; i<=stride_levels; i++) nbytes *= count[i];
189+
return p_allocation->nbput(src,dst,nbytes,proc,req);
190+
}
179191
return p_allocation->nbputs(src,src_stride,dst,dst_stride,count,
180192
stride_levels,proc,req);
181193
}
@@ -235,6 +247,12 @@ int Allocation::accs(int op, void *scale, void *src, int64_t *src_stride,
235247
void *dst, int64_t *dst_stride, int64_t *count,
236248
int stride_levels, int proc)
237249
{
250+
if (checkContiguous(src_stride,dst_stride,count,stride_levels)) {
251+
int i;
252+
int64_t nbytes = 1;
253+
for (i=0; i<=stride_levels; i++) nbytes *= count[i];
254+
return p_allocation->acc(op,scale,src,dst,nbytes,proc);
255+
}
238256
return p_allocation->accs(op,scale,src,src_stride,dst,dst_stride,count,
239257
stride_levels,proc);
240258
}
@@ -294,6 +312,12 @@ int Allocation::nbaccs(int op, void *scale, void *src, int64_t *src_stride,
294312
void *dst, int64_t *dst_stride, int64_t *count,
295313
int stride_levels, int proc, cmx_request *req)
296314
{
315+
if (checkContiguous(src_stride,dst_stride,count,stride_levels)) {
316+
int i;
317+
int64_t nbytes = 1;
318+
for (i=0; i<=stride_levels; i++) nbytes *= count[i];
319+
return p_allocation->nbacc(op,scale,src,dst,nbytes,proc,req);
320+
}
297321
return p_allocation->nbaccs(op,scale,src,src_stride,dst,dst_stride,count,
298322
stride_levels,proc,req);
299323
}
@@ -347,6 +371,12 @@ int Allocation::get(void *src, void *dst, int64_t bytes, int proc)
347371
int Allocation::gets(void *src, int64_t *src_stride, void *dst,
348372
int64_t *dst_stride, int64_t *count, int stride_levels, int proc)
349373
{
374+
if (checkContiguous(src_stride,dst_stride,count,stride_levels)) {
375+
int i;
376+
int64_t nbytes = 1;
377+
for (i=0; i<=stride_levels; i++) nbytes *= count[i];
378+
return p_allocation->get(src,dst,nbytes,proc);
379+
}
350380
return p_allocation->gets(src,src_stride,dst,dst_stride,count,stride_levels,proc);
351381
}
352382

@@ -399,6 +429,12 @@ int Allocation::nbgets(void *src, int64_t *src_stride, void *dst,
399429
int64_t *dst_stride, int64_t *count, int stride_levels, int proc,
400430
cmx_request *req)
401431
{
432+
if (checkContiguous(src_stride,dst_stride,count,stride_levels)) {
433+
int i;
434+
int64_t nbytes = 1;
435+
for (i=0; i<=stride_levels; i++) nbytes *= count[i];
436+
return p_allocation->nbget(src,dst,nbytes,proc,req);
437+
}
402438
return p_allocation->nbgets(src,src_stride,dst,dst_stride,
403439
count,stride_levels,proc,req);
404440
}
@@ -514,4 +550,75 @@ int Allocation::waitProc(int proc)
514550
return CMX_SUCCESS;
515551
}
516552

553+
/**
554+
* This function checks to see if the data copy is contiguous for both the src
555+
* and destination buffers. If it is, then a contiguous operation can be used
556+
* instead of a strided operation. This function is intended for arrays of
557+
* dimension greater than 1 (contiguous operations can always be used for 1
558+
* dimensional arrays).
559+
*
560+
* The current implementation tries to identify all contiguous cases by using
561+
* all information from the stride and count arrays.
562+
*
563+
* src_stride: physical dimensions of source buffer
564+
* dst_stride: physical dimensions of destination buffer
565+
* count: number of elements being moved in each dimension
566+
* n_stride: number of strides (array dimension minus one)
567+
*/
568+
bool Allocation::checkContiguous(int64_t *src_stride, int64_t *dst_stride,
569+
int64_t *count, int n_stride)
570+
{
571+
int i;
572+
bool ret = true;
573+
int64_t stridelen = 1;
574+
bool gap = false;
575+
int64_t src_ld[7], dst_ld[7];
576+
/**
577+
* Calculate physical dimensions of buffers from stride arrays
578+
*/
579+
src_ld[0] = src_stride[0];
580+
dst_ld[0] = dst_stride[0];
581+
for (i=1; i<n_stride; i++) {
582+
src_ld[i] = src_stride[i]/src_stride[i-1];
583+
dst_ld[i] = dst_stride[i]/dst_stride[i-1];
584+
}
585+
/* NOTE: The count array contains the length of the final dimension and can
586+
* be used to evaluate some corner cases
587+
*/
588+
for (i=0; i<n_stride; i++) {
589+
/* check for overflow */
590+
int64_t tmp = stridelen * count[i];
591+
if (stridelen != 0 && tmp / stridelen != count[i]) {
592+
ret = false;
593+
break;
594+
}
595+
stridelen = tmp;
596+
if ((count[i] < src_ld[i] || count[i] < dst_ld[i])
597+
&& gap) {
598+
/* Data is definitely strided in memory */
599+
ret = false;
600+
break;
601+
} else if ((count[i] < src_ld[i] || count[i] < dst_ld[i]) &&
602+
!gap) {
603+
/* First dimension that doesn't match physical dimension */
604+
gap = true;
605+
} else if (count[i] != 1 && gap) {
606+
/* Found a mismatch between requested block and physical dimensions
607+
* indicating a possible stride in memory
608+
*/
609+
ret = false;
610+
break;
611+
}
612+
}
613+
/**
614+
* Everything looks good up to this point but need to verify that last
615+
* dimension is 1 if a mismatch between requested block and physical
616+
* array dimensions has been found previously
617+
*/
618+
if (gap && ret && n_stride > 0) {
619+
if (count[n_stride] != 1) ret = false;
620+
}
621+
return ret;
622+
}
623+
517624
}; // CMX namespace

cmx/src-common/cmx_alloc.hpp

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -405,6 +405,25 @@ int waitAll();
405405
int waitProc(int proc);
406406

407407
private:
408+
409+
/**
410+
* This function checks to see if the data copy is contiguous for both the src
411+
* and destination buffers. If it is, then a contiguous operation can be used
412+
* instead of a strided operation. This function is intended for arrays of
413+
* dimension greater than 1 (contiguous operations can always be used for 1
414+
* dimensional arrays).
415+
*
416+
* The current implementation tries to identify all contiguous cases by using
417+
* all information from the stride and count arrays.
418+
*
419+
* src_stride: physical dimensions of source buffer
420+
* dst_stride: physical dimensions of destination buffer
421+
* count: number of elements being moved in each dimension
422+
* n_stride: number of strides (array dimension minus one)
423+
*/
424+
bool checkContiguous(int64_t *src_stride, int64_t *dst_stride,
425+
int64_t *count, int n_stride);
426+
408427
Group *p_group; // Group associated with allocation
409428

410429
Environment *p_environment;

0 commit comments

Comments
 (0)