Skip to content

Commit 0bfd9af

Browse files
authored
Update Paddle and fix fused_rms_norm (#2307)
* update paddle to 1225 * fix fused_rms_norm
1 parent 37eab0b commit 0bfd9af

File tree

2 files changed

+26
-14
lines changed

2 files changed

+26
-14
lines changed

Paddle

Submodule Paddle updated 202 files

backends/iluvatar_gpu/patches/paddle-corex.patch

Lines changed: 25 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -165,10 +165,10 @@ index 189e97534e..8f805afe8c 100644
165165

166166
void set(const phi::DenseTensor& tensor, const cudnnTensorFormat_t format) {
167167
diff --git a/paddle/phi/backends/gpu/cuda/cudnn_helper.h b/paddle/phi/backends/gpu/cuda/cudnn_helper.h
168-
index 8b293d3007..35245dddb0 100644
168+
index 9a8a3b7605..3186f37c20 100644
169169
--- a/paddle/phi/backends/gpu/cuda/cudnn_helper.h
170170
+++ b/paddle/phi/backends/gpu/cuda/cudnn_helper.h
171-
@@ -124,7 +124,7 @@ class CudnnDataType<phi::dtype::float8_e4m3fn> {
171+
@@ -117,7 +117,7 @@ class CudnnDataType<phi::dtype::float8_e4m3fn> {
172172
#endif
173173

174174
// CUDNN_DATA_BFLOAT16 is not valid before cudnn8.1
@@ -392,7 +392,7 @@ index 9c9ab5dff9..ecf4e8f5e8 100644
392392
template <typename T>
393393
struct CudaLogFunctor : public BaseActivationFunctor<T> {
394394
diff --git a/paddle/phi/kernels/funcs/affine_grid_utils.h b/paddle/phi/kernels/funcs/affine_grid_utils.h
395-
index 70abf63a3d..af6f2136c5 100644
395+
index 149b5f3d76..eac6f4b942 100644
396396
--- a/paddle/phi/kernels/funcs/affine_grid_utils.h
397397
+++ b/paddle/phi/kernels/funcs/affine_grid_utils.h
398398
@@ -16,7 +16,9 @@
@@ -644,10 +644,10 @@ index df4f214e66..e31b8eb1f6 100644
644644
} // namespace detail
645645
} // namespace funcs
646646
diff --git a/paddle/phi/kernels/funcs/layer_norm_impl.cu.h b/paddle/phi/kernels/funcs/layer_norm_impl.cu.h
647-
index 92dccf18ce..5cc9937d7a 100644
647+
index 274dc1fc5f..60251f0edd 100644
648648
--- a/paddle/phi/kernels/funcs/layer_norm_impl.cu.h
649649
+++ b/paddle/phi/kernels/funcs/layer_norm_impl.cu.h
650-
@@ -37,11 +37,7 @@ using LayerNormParamType = typename CudnnDataType<T>::BatchNormParamType;
650+
@@ -36,11 +36,7 @@ using LayerNormParamType = typename CudnnDataType<T>::BatchNormParamType;
651651

652652
inline static int GetDesiredBlockDim(int64_t block_dim) {
653653
const int kMaxBlockDim = 512;
@@ -660,7 +660,7 @@ index 92dccf18ce..5cc9937d7a 100644
660660
}
661661

662662
diff --git a/paddle/phi/kernels/funcs/reduce_function.h b/paddle/phi/kernels/funcs/reduce_function.h
663-
index e89969e9dc..65e744f37d 100644
663+
index 1d863f875e..0268bef9b2 100644
664664
--- a/paddle/phi/kernels/funcs/reduce_function.h
665665
+++ b/paddle/phi/kernels/funcs/reduce_function.h
666666
@@ -1131,7 +1131,7 @@ void ReduceKernel(const KPDevice& dev_ctx,
@@ -711,7 +711,7 @@ index 9f12293c0f..4e897bb433 100644
711711
namespace phi {
712712
namespace funcs {
713713
diff --git a/paddle/phi/kernels/fusion/gpu/fused_layernorm_kernel.cu b/paddle/phi/kernels/fusion/gpu/fused_layernorm_kernel.cu
714-
index 77e3537124..8f6022bc76 100644
714+
index d2545742f9..e1ecc5ef54 100644
715715
--- a/paddle/phi/kernels/fusion/gpu/fused_layernorm_kernel.cu
716716
+++ b/paddle/phi/kernels/fusion/gpu/fused_layernorm_kernel.cu
717717
@@ -58,11 +58,7 @@ namespace fusion {
@@ -727,7 +727,7 @@ index 77e3537124..8f6022bc76 100644
727727
template <typename T>
728728
struct SumOp {
729729
diff --git a/paddle/phi/kernels/fusion/gpu/fused_layernorm_residual_dropout_bias.h b/paddle/phi/kernels/fusion/gpu/fused_layernorm_residual_dropout_bias.h
730-
index a28047c624..30832164f4 100644
730+
index 25c9fa597e..bb531b681f 100644
731731
--- a/paddle/phi/kernels/fusion/gpu/fused_layernorm_residual_dropout_bias.h
732732
+++ b/paddle/phi/kernels/fusion/gpu/fused_layernorm_residual_dropout_bias.h
733733
@@ -24,11 +24,7 @@ namespace fusion {
@@ -780,7 +780,6 @@ index a28047c624..30832164f4 100644
780780
mask_vec[it][jt] = static_cast<MaskType>(rand[jt] >= dropout_prob);
781781
}
782782
}
783-
784783
diff --git a/paddle/phi/kernels/gpu/elementwise_grad.h b/paddle/phi/kernels/gpu/elementwise_grad.h
785784
index 411ee4510c..36c2f8fba7 100644
786785
--- a/paddle/phi/kernels/gpu/elementwise_grad.h
@@ -797,6 +796,19 @@ index 411ee4510c..36c2f8fba7 100644
797796
dim3 block_size = dim3(PREDEFINED_BLOCK_SIZE, 1);
798797
dim3 grid_size =
799798
dim3(((size + vec_size - 1) / vec_size + PREDEFINED_BLOCK_SIZE - 1) /
799+
diff --git a/paddle/phi/kernels/gpu/fused_rms_norm_quant_grad_kernel.cu b/paddle/phi/kernels/gpu/fused_rms_norm_quant_grad_kernel.cu
800+
index 6e336a7ec4..d377494b50 100644
801+
--- a/paddle/phi/kernels/gpu/fused_rms_norm_quant_grad_kernel.cu
802+
+++ b/paddle/phi/kernels/gpu/fused_rms_norm_quant_grad_kernel.cu
803+
@@ -212,7 +212,7 @@ PD_REGISTER_KERNEL(fused_rms_norm_quant_grad,
804+
float,
805+
phi::float16) {}
806+
807+
-#elif CUDNN_VERSION_MIN(8, 1, 0)
808+
+#elif defined(PADDLE_WITH_COREX)
809+
810+
PD_REGISTER_KERNEL(fused_rms_norm_quant_grad,
811+
GPU,
800812
diff --git a/paddle/phi/kernels/gpu/interpolate_grad_kernel.cu b/paddle/phi/kernels/gpu/interpolate_grad_kernel.cu
801813
index 701c19a56f..b5d3ba7846 100644
802814
--- a/paddle/phi/kernels/gpu/interpolate_grad_kernel.cu
@@ -825,10 +837,10 @@ index 701c19a56f..b5d3ba7846 100644
825837
funcs::BlockReduceMax(top_right_index, FINAL_MASK);
826838
int64_t in_bot_max_index =
827839
diff --git a/paddle/phi/kernels/gpu/layer_norm_grad_kernel.cu b/paddle/phi/kernels/gpu/layer_norm_grad_kernel.cu
828-
index 8280e95065..dd50aa08bf 100644
840+
index f921cd75fe..141d65bc6e 100644
829841
--- a/paddle/phi/kernels/gpu/layer_norm_grad_kernel.cu
830842
+++ b/paddle/phi/kernels/gpu/layer_norm_grad_kernel.cu
831-
@@ -232,7 +232,7 @@ PD_REGISTER_KERNEL(layer_norm_grad,
843+
@@ -149,7 +149,7 @@ PD_REGISTER_KERNEL(layer_norm_grad,
832844
kernel->OutputAt(2).SetDataType(phi::DataType::FLOAT32);
833845
}
834846
}
@@ -838,10 +850,10 @@ index 8280e95065..dd50aa08bf 100644
838850
GPU,
839851
ALL_LAYOUT,
840852
diff --git a/paddle/phi/kernels/gpu/layer_norm_kernel.cu b/paddle/phi/kernels/gpu/layer_norm_kernel.cu
841-
index 892525493c..854f13a239 100644
853+
index 259f24b0d2..da48ec561c 100644
842854
--- a/paddle/phi/kernels/gpu/layer_norm_kernel.cu
843855
+++ b/paddle/phi/kernels/gpu/layer_norm_kernel.cu
844-
@@ -765,7 +765,7 @@ PD_REGISTER_KERNEL(
856+
@@ -707,7 +707,7 @@ PD_REGISTER_KERNEL(
845857
kernel->OutputAt(1).SetDataType(phi::DataType::UNDEFINED);
846858
kernel->OutputAt(2).SetDataType(phi::DataType::UNDEFINED);
847859
}

0 commit comments

Comments
 (0)