Skip to content

Commit 94ec3be

Browse files
authored
Update Paddle to 0114 and Fix Iluvatar patch (#2347)
* add retry for paddle installation
1 parent d6f30d6 commit 94ec3be

File tree

3 files changed

+39
-7
lines changed

3 files changed

+39
-7
lines changed

.github/workflows/_IXUCA.yml

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,24 @@ jobs:
9090
curl -o PaddleCustomDevice/Paddle/third_party/mklml/Linux/csrmm_mklml_lnx_2019.0.5.tgz http://paddlepaddledeps.bj.bcebos.com/csrmm_mklml_lnx_2019.0.5.tgz
9191
tar xf PaddleCustomDevice/Paddle/third_party/mklml/Linux/csrmm_mklml_lnx_2019.0.5.tgz
9292
pip uninstall -y paddlepaddle
93-
python3 -m pip install --pre paddlepaddle -i https://www.paddlepaddle.org.cn/packages/nightly/cpu/
93+
# Retry paddlepaddle installation up to 3 times with delays
94+
retry_count=0
95+
max_retries=3
96+
while [ $retry_count -lt $max_retries ]; do
97+
if python3 -m pip install --pre paddlepaddle -i https://www.paddlepaddle.org.cn/packages/nightly/cpu/; then
98+
echo "PaddlePaddle installation successful"
99+
break
100+
else
101+
retry_count=$((retry_count + 1))
102+
if [ $retry_count -lt $max_retries ]; then
103+
echo "PaddlePaddle installation failed, retrying in 60 seconds... (Attempt $retry_count/$max_retries)"
104+
sleep 60
105+
else
106+
echo "PaddlePaddle installation failed after $max_retries attempts, Please try rerun this job."
107+
exit 1
108+
fi
109+
fi
110+
done
94111
pip show paddlepaddle
95112
python3 -m pip install parameterized
96113
export PADDLE_VERSION=0.0.0

Paddle

Submodule Paddle updated 390 files

backends/iluvatar_gpu/patches/paddle-corex.patch

Lines changed: 20 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -125,16 +125,16 @@ index dfd3945e9a..08eda4978c 100644
125125
return CUDA_R_16BF;
126126
#endif
127127
diff --git a/paddle/phi/backends/gpu/cuda/cudnn_desc.h b/paddle/phi/backends/gpu/cuda/cudnn_desc.h
128-
index 189e97534e..8f805afe8c 100644
128+
index ce038ecec9..c78e2e999f 100644
129129
--- a/paddle/phi/backends/gpu/cuda/cudnn_desc.h
130130
+++ b/paddle/phi/backends/gpu/cuda/cudnn_desc.h
131-
@@ -77,7 +77,7 @@ inline cudnnDataType_t ToCudnnDataType(const phi::DataType& t) {
131+
@@ -77,7 +77,7 @@ inline cudnnDataType_t ToCudnnDataType(const DataType& t) {
132132
type = CUDNN_DATA_FP8_E5M2;
133133
break;
134134
#endif
135135
-#if CUDNN_VERSION_MIN(8, 1, 0)
136136
+#if CUDNN_VERSION_MIN(8, 1, 0) || defined(PADDLE_WITH_COREX)
137-
case phi::DataType::BFLOAT16:
137+
case DataType::BFLOAT16:
138138
type = CUDNN_DATA_BFLOAT16;
139139
break;
140140
@@ -167,12 +167,26 @@ class TensorDescriptor {
@@ -160,10 +160,10 @@ index 189e97534e..8f805afe8c 100644
160160
dtype,
161161
transformed_dims.size(),
162162
transformed_dims.data()));
163-
+#endif
163+
+#endif-
164164
}
165165

166-
void set(const phi::DenseTensor& tensor, const cudnnTensorFormat_t format) {
166+
void set(const DenseTensor& tensor, const cudnnTensorFormat_t format) {
167167
diff --git a/paddle/phi/backends/gpu/cuda/cudnn_helper.h b/paddle/phi/backends/gpu/cuda/cudnn_helper.h
168168
index 9a8a3b7605..3186f37c20 100644
169169
--- a/paddle/phi/backends/gpu/cuda/cudnn_helper.h
@@ -1022,3 +1022,18 @@ index ffdf995ece..4a7e03f4ad 100644
10221022
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
10231023
PD_REGISTER_KERNEL(unsqueeze,
10241024
GPU,
1025+
diff --git a/paddle/phi/kernels/legacy/gpu/fp8_quant_blockwise_kernel.cu b/paddle/phi/kernels/legacy/gpu/fp8_quant_blockwise_kernel.cu
1026+
index 1b3393ceab..6bbf4f661b 100644
1027+
--- a/paddle/phi/kernels/legacy/gpu/fp8_quant_blockwise_kernel.cu
1028+
+++ b/paddle/phi/kernels/legacy/gpu/fp8_quant_blockwise_kernel.cu
1029+
@@ -671,8 +671,8 @@ void FP8QuantBlockWiseKernelImpl(const Context &dev_ctx,
1030+
const int sm_count = phi::backends::gpu::GetGPUMultiProcessors(device_id);
1031+
const size_t min_grid_x = sm_count * 8;
1032+
const size_t min_block_x = 1024;
1033+
- const size_t gridx = min(min_grid_x, src_rows);
1034+
- const size_t blockx = min(min_block_x, src_cols / 128 * 32);
1035+
+ const size_t gridx = std::min(min_grid_x, src_rows);
1036+
+ const size_t blockx = std::min(min_block_x, src_cols / 128 * 32);
1037+
1038+
bool use_finegrained_range = false;
1039+
char *env_var = getenv("PER_TOKEN_QUANT_FP8_USE_FINEGRAINED_RANGE");

0 commit comments

Comments
 (0)