Skip to content

Commit 7c5c762

Browse files
committed
flagging services with WARMUP_ENABLED=true to hit warmup endpoint, modified the warmup script to read accorrdingly
1 parent afdd708 commit 7c5c762

File tree

2 files changed

+33
-18
lines changed

2 files changed

+33
-18
lines changed

docker-compose.yml

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@ services:
4444
restart: "no"
4545
environment:
4646
- TORCH_DEVICE=cuda
47+
- WARMUP_ENABLED=true
4748
labels:
4849
ca.mcgill.a11y.image.cacheTimeout: 3600
4950
deploy:
@@ -59,6 +60,7 @@ services:
5960
restart: "no"
6061
environment:
6162
- TORCH_DEVICE=cuda
63+
- WARMUP_ENABLED=true
6264
labels:
6365
ca.mcgill.a11y.image.cacheTimeout: 3600
6466
deploy:
@@ -80,6 +82,8 @@ services:
8082
devices:
8183
- driver: nvidia
8284
capabilities: ["gpu", "compute", "utility"]
85+
environment:
86+
- WARMUP_ENABLED=true
8387

8488
autour-preprocessor:
8589
profiles: [production, test, default]
@@ -110,6 +114,7 @@ services:
110114
./config/ollama.env
111115
environment:
112116
- PII_LOGGING_ENABLED=${PII_LOGGING_ENABLED}
117+
- WARMUP_ENABLED=true
113118

114119
graphic-caption:
115120
profiles: [production, test, default]
@@ -125,6 +130,7 @@ services:
125130
./config/ollama.env
126131
environment:
127132
- PII_LOGGING_ENABLED=${PII_LOGGING_ENABLED}
133+
- WARMUP_ENABLED=true
128134

129135
text-followup:
130136
profiles: [production, test, default]
@@ -134,6 +140,7 @@ services:
134140
- MAX_HISTORY_LENGTH=100
135141
- HISTORY_EXPIRY=3600
136142
- PII_LOGGING_ENABLED=${PII_LOGGING_ENABLED}
143+
- WARMUP_ENABLED=true
137144
labels:
138145
ca.mcgill.a11y.image.preprocessor: 1
139146
ca.mcgill.a11y.image.port: 5000
@@ -179,6 +186,7 @@ services:
179186
- PII_LOGGING_ENABLED=${PII_LOGGING_ENABLED}
180187
- YOLO_MODEL_PATH=/usr/src/app/models/yolo11x.pt
181188
- CONF_THRESHOLD=0.75
189+
- WARMUP_ENABLED=true
182190

183191
multistage-diagram-segmentation:
184192
profiles: [production, test, default]
@@ -201,6 +209,7 @@ services:
201209
- SAM_MODEL_PATH=/usr/src/app/models/sam2.1_l.pt
202210
- GEMINI_MODEL=gemini-2.5-pro-preview-06-05
203211
- BASE_SCHEMA=/usr/src/app/base_schema.json
212+
- WARMUP_ENABLED=true
204213
env_file:
205214
./config/gemini.env
206215

@@ -263,6 +272,7 @@ services:
263272
ca.mcgill.a11y.image.optional_dependencies: "content-categoriser,graphic-tagger"
264273
environment:
265274
- PII_LOGGING_ENABLED=${PII_LOGGING_ENABLED}
275+
- WARMUP_ENABLED=true
266276

267277
supercollider:
268278
profiles: [production, test, default]
@@ -376,6 +386,7 @@ services:
376386
ca.mcgill.a11y.image.optional_dependencies: ""
377387
environment:
378388
- PII_LOGGING_ENABLED=${PII_LOGGING_ENABLED}
389+
- WARMUP_ENABLED=true
379390

380391
svg-depth-map:
381392
profiles: [production, test, default]

scripts/warmup

Lines changed: 22 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -3,43 +3,47 @@
33
# Locate this script's directory
44
SCRIPT_DIR="$(dirname "$(realpath "$0")")"
55

6-
# Load env file
7-
ENV_FILE="$SCRIPT_DIR/../config/warmup.env"
8-
if [ -f "$ENV_FILE" ]; then
9-
source "$ENV_FILE"
10-
else
11-
echo "Missing $ENV_FILE" >&2
12-
exit 1
13-
fi
14-
15-
# Setup log file (with date)
6+
WARMUP_LOG_DIR="/var/docker/image/testing/warmup"
167
timestamp=$(date +"%Y%m%d_%H%M%S")
178
logfile="${WARMUP_LOG_DIR}/warmup_${timestamp}.log"
189
mkdir -p "$WARMUP_LOG_DIR"
1910

2011
echo "[Warmup] $(date) Starting warmup..." | tee -a "$logfile"
2112

22-
# Wait for health and warm up
23-
for container in "${!WARMUP_TARGETS[@]}"; do
24-
endpoint="${WARMUP_TARGETS[$container]}"
13+
# Get all running containers
14+
containers=$(docker ps --format '{{.Names}}')
2515

26-
if ! docker inspect "$container" &>/dev/null; then
27-
echo "[Warmup] Container $container not found. Skipping." | tee -a "$logfile"
16+
for container in $containers; do
17+
# Check if WARMUP_ENABLED=true is present in the environment
18+
if ! docker inspect -f '{{range .Config.Env}}{{println .}}{{end}}' "$container" | grep -q "^WARMUP_ENABLED=true$"; then
2819
continue
2920
fi
3021

22+
# Get EXPOSED port (assume first one is the correct one)
23+
exposed_port=$(docker inspect -f '{{range $p, $_ := .Config.ExposedPorts}}{{println $p}}{{end}}' "$container" | head -n1 | cut -d'/' -f1)
24+
if [ -z "$exposed_port" ]; then
25+
echo "[Warmup] $container has no EXPOSEd port. Skipping." | tee -a "$logfile"
26+
continue
27+
fi
28+
29+
endpoint="http://localhost:${exposed_port}/warmup"
30+
3131
echo "[Warmup] Waiting for $container to be healthy..." | tee -a "$logfile"
3232
until [[ "$(docker inspect -f '{{.State.Health.Status}}' "$container")" == "healthy" ]]; do
33-
sleep 5
33+
sleep 2
3434
done
3535

3636
echo "[Warmup] $container marked healthy. Waiting 10s before hitting warmup..." | tee -a "$logfile"
3737
# Wait briefly after container is marked healthy to ensure internal models are fully initialized before warmup.
3838
# prevents race conditions where healthcheck passes but model isnt ready
39-
sleep 10
4039

41-
echo "[Warmup] Hitting warmup endpoint on $container..." | tee -a "$logfile"
40+
# add random jitter to stagger warmups (addresses potential resource spike if all hit at once)
41+
jitter=$((RANDOM % 5))
42+
sleep $((10 + jitter))
43+
44+
echo "[Warmup] Hitting warmup endpoint at $endpoint..." | tee -a "$logfile"
4245
resp=$(docker exec "$container" curl -s -w "%{http_code}" -o /tmp/warmup_resp.txt "$endpoint")
46+
4347
if [[ "$resp" == "200" ]]; then
4448
echo "[Warmup] $container warmed successfully." | tee -a "$logfile"
4549
else

0 commit comments

Comments
 (0)