@@ -165,10 +165,10 @@ index 189e97534e..8f805afe8c 100644
165165
166166 void set(const phi::DenseTensor& tensor, const cudnnTensorFormat_t format) {
167167diff --git a/paddle/phi/backends/gpu/cuda/cudnn_helper.h b/paddle/phi/backends/gpu/cuda/cudnn_helper.h
168- index 8b293d3007..35245dddb0 100644
168+ index 9a8a3b7605..3186f37c20 100644
169169--- a/paddle/phi/backends/gpu/cuda/cudnn_helper.h
170170+++ b/paddle/phi/backends/gpu/cuda/cudnn_helper.h
171- @@ -124 ,7 +124 ,7 @@ class CudnnDataType<phi::dtype::float8_e4m3fn> {
171+ @@ -117 ,7 +117 ,7 @@ class CudnnDataType<phi::dtype::float8_e4m3fn> {
172172 #endif
173173
174174 // CUDNN_DATA_BFLOAT16 is not valid before cudnn8.1
@@ -392,7 +392,7 @@ index 9c9ab5dff9..ecf4e8f5e8 100644
392392 template <typename T>
393393 struct CudaLogFunctor : public BaseActivationFunctor<T> {
394394diff --git a/paddle/phi/kernels/funcs/affine_grid_utils.h b/paddle/phi/kernels/funcs/affine_grid_utils.h
395- index 70abf63a3d..af6f2136c5 100644
395+ index 149b5f3d76..eac6f4b942 100644
396396--- a/paddle/phi/kernels/funcs/affine_grid_utils.h
397397+++ b/paddle/phi/kernels/funcs/affine_grid_utils.h
398398@@ -16,7 +16,9 @@
@@ -644,10 +644,10 @@ index df4f214e66..e31b8eb1f6 100644
644644 } // namespace detail
645645 } // namespace funcs
646646diff --git a/paddle/phi/kernels/funcs/layer_norm_impl.cu.h b/paddle/phi/kernels/funcs/layer_norm_impl.cu.h
647- index 92dccf18ce..5cc9937d7a 100644
647+ index 274dc1fc5f..60251f0edd 100644
648648--- a/paddle/phi/kernels/funcs/layer_norm_impl.cu.h
649649+++ b/paddle/phi/kernels/funcs/layer_norm_impl.cu.h
650- @@ -37 ,11 +37 ,7 @@ using LayerNormParamType = typename CudnnDataType<T>::BatchNormParamType;
650+ @@ -36 ,11 +36 ,7 @@ using LayerNormParamType = typename CudnnDataType<T>::BatchNormParamType;
651651
652652 inline static int GetDesiredBlockDim(int64_t block_dim) {
653653 const int kMaxBlockDim = 512;
@@ -660,7 +660,7 @@ index 92dccf18ce..5cc9937d7a 100644
660660 }
661661
662662diff --git a/paddle/phi/kernels/funcs/reduce_function.h b/paddle/phi/kernels/funcs/reduce_function.h
663- index e89969e9dc..65e744f37d 100644
663+ index 1d863f875e..0268bef9b2 100644
664664--- a/paddle/phi/kernels/funcs/reduce_function.h
665665+++ b/paddle/phi/kernels/funcs/reduce_function.h
666666@@ -1131,7 +1131,7 @@ void ReduceKernel(const KPDevice& dev_ctx,
@@ -711,7 +711,7 @@ index 9f12293c0f..4e897bb433 100644
711711 namespace phi {
712712 namespace funcs {
713713diff --git a/paddle/phi/kernels/fusion/gpu/fused_layernorm_kernel.cu b/paddle/phi/kernels/fusion/gpu/fused_layernorm_kernel.cu
714- index 77e3537124..8f6022bc76 100644
714+ index d2545742f9..e1ecc5ef54 100644
715715--- a/paddle/phi/kernels/fusion/gpu/fused_layernorm_kernel.cu
716716+++ b/paddle/phi/kernels/fusion/gpu/fused_layernorm_kernel.cu
717717@@ -58,11 +58,7 @@ namespace fusion {
@@ -727,7 +727,7 @@ index 77e3537124..8f6022bc76 100644
727727 template <typename T>
728728 struct SumOp {
729729diff --git a/paddle/phi/kernels/fusion/gpu/fused_layernorm_residual_dropout_bias.h b/paddle/phi/kernels/fusion/gpu/fused_layernorm_residual_dropout_bias.h
730- index a28047c624..30832164f4 100644
730+ index 25c9fa597e..bb531b681f 100644
731731--- a/paddle/phi/kernels/fusion/gpu/fused_layernorm_residual_dropout_bias.h
732732+++ b/paddle/phi/kernels/fusion/gpu/fused_layernorm_residual_dropout_bias.h
733733@@ -24,11 +24,7 @@ namespace fusion {
@@ -780,7 +780,6 @@ index a28047c624..30832164f4 100644
780780 mask_vec[it][jt] = static_cast<MaskType>(rand[jt] >= dropout_prob);
781781 }
782782 }
783-
784783diff --git a/paddle/phi/kernels/gpu/elementwise_grad.h b/paddle/phi/kernels/gpu/elementwise_grad.h
785784index 411ee4510c..36c2f8fba7 100644
786785--- a/paddle/phi/kernels/gpu/elementwise_grad.h
@@ -797,6 +796,19 @@ index 411ee4510c..36c2f8fba7 100644
797796 dim3 block_size = dim3(PREDEFINED_BLOCK_SIZE, 1);
798797 dim3 grid_size =
799798 dim3(((size + vec_size - 1) / vec_size + PREDEFINED_BLOCK_SIZE - 1) /
799+ diff --git a/paddle/phi/kernels/gpu/fused_rms_norm_quant_grad_kernel.cu b/paddle/phi/kernels/gpu/fused_rms_norm_quant_grad_kernel.cu
800+ index 6e336a7ec4..d377494b50 100644
801+ --- a/paddle/phi/kernels/gpu/fused_rms_norm_quant_grad_kernel.cu
802+ +++ b/paddle/phi/kernels/gpu/fused_rms_norm_quant_grad_kernel.cu
803+ @@ -212,7 +212,7 @@ PD_REGISTER_KERNEL(fused_rms_norm_quant_grad,
804+ float,
805+ phi::float16) {}
806+
807+ - #elif CUDNN_VERSION_MIN(8, 1, 0)
808+ + #elif defined(PADDLE_WITH_COREX)
809+
810+ PD_REGISTER_KERNEL(fused_rms_norm_quant_grad,
811+ GPU,
800812diff --git a/paddle/phi/kernels/gpu/interpolate_grad_kernel.cu b/paddle/phi/kernels/gpu/interpolate_grad_kernel.cu
801813index 701c19a56f..b5d3ba7846 100644
802814--- a/paddle/phi/kernels/gpu/interpolate_grad_kernel.cu
@@ -825,10 +837,10 @@ index 701c19a56f..b5d3ba7846 100644
825837 funcs::BlockReduceMax(top_right_index, FINAL_MASK);
826838 int64_t in_bot_max_index =
827839diff --git a/paddle/phi/kernels/gpu/layer_norm_grad_kernel.cu b/paddle/phi/kernels/gpu/layer_norm_grad_kernel.cu
828- index 8280e95065..dd50aa08bf 100644
840+ index f921cd75fe..141d65bc6e 100644
829841--- a/paddle/phi/kernels/gpu/layer_norm_grad_kernel.cu
830842+++ b/paddle/phi/kernels/gpu/layer_norm_grad_kernel.cu
831- @@ -232 ,7 +232 ,7 @@ PD_REGISTER_KERNEL(layer_norm_grad,
843+ @@ -149 ,7 +149 ,7 @@ PD_REGISTER_KERNEL(layer_norm_grad,
832844 kernel->OutputAt(2).SetDataType(phi::DataType::FLOAT32);
833845 }
834846 }
@@ -838,10 +850,10 @@ index 8280e95065..dd50aa08bf 100644
838850 GPU,
839851 ALL_LAYOUT,
840852diff --git a/paddle/phi/kernels/gpu/layer_norm_kernel.cu b/paddle/phi/kernels/gpu/layer_norm_kernel.cu
841- index 892525493c..854f13a239 100644
853+ index 259f24b0d2..da48ec561c 100644
842854--- a/paddle/phi/kernels/gpu/layer_norm_kernel.cu
843855+++ b/paddle/phi/kernels/gpu/layer_norm_kernel.cu
844- @@ -765 ,7 +765 ,7 @@ PD_REGISTER_KERNEL(
856+ @@ -707 ,7 +707 ,7 @@ PD_REGISTER_KERNEL(
845857 kernel->OutputAt(1).SetDataType(phi::DataType::UNDEFINED);
846858 kernel->OutputAt(2).SetDataType(phi::DataType::UNDEFINED);
847859 }
0 commit comments