@@ -123,6 +123,12 @@ int Allocation::put(void *src, void *dst, int64_t bytes, int proc)
123123int Allocation::puts (void *src, int64_t *src_stride, void *dst,
124124 int64_t *dst_stride, int64_t *count, int stride_levels, int proc)
125125{
126+ if (checkContiguous (src_stride,dst_stride,count,stride_levels)) {
127+ int i;
128+ int64_t nbytes = 1 ;
129+ for (i=0 ; i<=stride_levels; i++) nbytes *= count[i];
130+ return p_allocation->put (src,dst,nbytes,proc);
131+ }
126132 return p_allocation->puts (src,src_stride,dst,dst_stride,count,
127133 stride_levels,proc);
128134}
@@ -176,6 +182,12 @@ int Allocation::nbputs(void *src, int64_t *src_stride, void *dst,
176182 int64_t *dst_stride, int64_t *count, int stride_levels, int proc,
177183 cmx_request* req)
178184{
185+ if (checkContiguous (src_stride,dst_stride,count,stride_levels)) {
186+ int i;
187+ int64_t nbytes = 1 ;
188+ for (i=0 ; i<=stride_levels; i++) nbytes *= count[i];
189+ return p_allocation->nbput (src,dst,nbytes,proc,req);
190+ }
179191 return p_allocation->nbputs (src,src_stride,dst,dst_stride,count,
180192 stride_levels,proc,req);
181193}
@@ -235,6 +247,12 @@ int Allocation::accs(int op, void *scale, void *src, int64_t *src_stride,
235247 void *dst, int64_t *dst_stride, int64_t *count,
236248 int stride_levels, int proc)
237249{
250+ if (checkContiguous (src_stride,dst_stride,count,stride_levels)) {
251+ int i;
252+ int64_t nbytes = 1 ;
253+ for (i=0 ; i<=stride_levels; i++) nbytes *= count[i];
254+ return p_allocation->acc (op,scale,src,dst,nbytes,proc);
255+ }
238256 return p_allocation->accs (op,scale,src,src_stride,dst,dst_stride,count,
239257 stride_levels,proc);
240258}
@@ -294,6 +312,12 @@ int Allocation::nbaccs(int op, void *scale, void *src, int64_t *src_stride,
294312 void *dst, int64_t *dst_stride, int64_t *count,
295313 int stride_levels, int proc, cmx_request *req)
296314{
315+ if (checkContiguous (src_stride,dst_stride,count,stride_levels)) {
316+ int i;
317+ int64_t nbytes = 1 ;
318+ for (i=0 ; i<=stride_levels; i++) nbytes *= count[i];
319+ return p_allocation->nbacc (op,scale,src,dst,nbytes,proc,req);
320+ }
297321 return p_allocation->nbaccs (op,scale,src,src_stride,dst,dst_stride,count,
298322 stride_levels,proc,req);
299323}
@@ -347,6 +371,12 @@ int Allocation::get(void *src, void *dst, int64_t bytes, int proc)
347371int Allocation::gets (void *src, int64_t *src_stride, void *dst,
348372 int64_t *dst_stride, int64_t *count, int stride_levels, int proc)
349373{
374+ if (checkContiguous (src_stride,dst_stride,count,stride_levels)) {
375+ int i;
376+ int64_t nbytes = 1 ;
377+ for (i=0 ; i<=stride_levels; i++) nbytes *= count[i];
378+ return p_allocation->get (src,dst,nbytes,proc);
379+ }
350380 return p_allocation->gets (src,src_stride,dst,dst_stride,count,stride_levels,proc);
351381}
352382
@@ -399,6 +429,12 @@ int Allocation::nbgets(void *src, int64_t *src_stride, void *dst,
399429 int64_t *dst_stride, int64_t *count, int stride_levels, int proc,
400430 cmx_request *req)
401431{
432+ if (checkContiguous (src_stride,dst_stride,count,stride_levels)) {
433+ int i;
434+ int64_t nbytes = 1 ;
435+ for (i=0 ; i<=stride_levels; i++) nbytes *= count[i];
436+ return p_allocation->nbget (src,dst,nbytes,proc,req);
437+ }
402438 return p_allocation->nbgets (src,src_stride,dst,dst_stride,
403439 count,stride_levels,proc,req);
404440}
@@ -514,4 +550,75 @@ int Allocation::waitProc(int proc)
514550 return CMX_SUCCESS;
515551}
516552
553+ /* *
554+ * This function checks to see if the data copy is contiguous for both the src
555+ * and destination buffers. If it is, then a contiguous operation can be used
556+ * instead of a strided operation. This function is intended for arrays of
557+ * dimension greater than 1 (contiguous operations can always be used for 1
558+ * dimensional arrays).
559+ *
560+ * The current implementation tries to identify all contiguous cases by using
561+ * all information from the stride and count arrays.
562+ *
563+ * src_stride: physical dimensions of source buffer
564+ * dst_stride: physical dimensions of destination buffer
565+ * count: number of elements being moved in each dimension
566+ * n_stride: number of strides (array dimension minus one)
567+ */
568+ bool Allocation::checkContiguous (int64_t *src_stride, int64_t *dst_stride,
569+ int64_t *count, int n_stride)
570+ {
571+ int i;
572+ bool ret = true ;
573+ int64_t stridelen = 1 ;
574+ bool gap = false ;
575+ int64_t src_ld[7 ], dst_ld[7 ];
576+ /* *
577+ * Calculate physical dimensions of buffers from stride arrays
578+ */
579+ src_ld[0 ] = src_stride[0 ];
580+ dst_ld[0 ] = dst_stride[0 ];
581+ for (i=1 ; i<n_stride; i++) {
582+ src_ld[i] = src_stride[i]/src_stride[i-1 ];
583+ dst_ld[i] = dst_stride[i]/dst_stride[i-1 ];
584+ }
585+ /* NOTE: The count array contains the length of the final dimension and can
586+ * be used to evaluate some corner cases
587+ */
588+ for (i=0 ; i<n_stride; i++) {
589+ /* check for overflow */
590+ int64_t tmp = stridelen * count[i];
591+ if (stridelen != 0 && tmp / stridelen != count[i]) {
592+ ret = false ;
593+ break ;
594+ }
595+ stridelen = tmp;
596+ if ((count[i] < src_ld[i] || count[i] < dst_ld[i])
597+ && gap) {
598+ /* Data is definitely strided in memory */
599+ ret = false ;
600+ break ;
601+ } else if ((count[i] < src_ld[i] || count[i] < dst_ld[i]) &&
602+ !gap) {
603+ /* First dimension that doesn't match physical dimension */
604+ gap = true ;
605+ } else if (count[i] != 1 && gap) {
606+ /* Found a mismatch between requested block and physical dimensions
607+ * indicating a possible stride in memory
608+ */
609+ ret = false ;
610+ break ;
611+ }
612+ }
613+ /* *
614+ * Everything looks good up to this point but need to verify that last
615+ * dimension is 1 if a mismatch between requested block and physical
616+ * array dimensions has been found previously
617+ */
618+ if (gap && ret && n_stride > 0 ) {
619+ if (count[n_stride] != 1 ) ret = false ;
620+ }
621+ return ret;
622+ }
623+
517624}; // CMX namespace
0 commit comments