Shared-Reality-Lab · jeffbl · Jun 19, 2025 · Jun 19, 2025 · Jun 19, 2025 · Jun 19, 2025
diff --git a/docker-compose.yml b/docker-compose.yml
@@ -82,6 +82,8 @@ services:
           devices:
             - driver: nvidia
               capabilities: ["gpu", "compute", "utility"]
+    environment:
+      - WARMUP_ENABLED=true
 
   autour-preprocessor:
     profiles: [production, test, default]

diff --git a/services/multilang-support/Dockerfile b/services/multilang-support/Dockerfile
@@ -1,20 +1,33 @@
-FROM pytorch/pytorch:latest
+ARG PYTORCH="2.7.1"
+ARG CUDA="11.8"
+ARG CUDNN="9"
+FROM pytorch/pytorch:${PYTORCH}-cuda${CUDA}-cudnn${CUDNN}-runtime
 
 # See README.md#docker-image for visualisation of the directory structure
 WORKDIR /app
-RUN apt-get update && apt-get install python3 python3-pip git git-lfs curl -y
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    python3 python3-pip git git-lfs curl \
+ && apt-get clean && rm -rf /var/lib/apt/lists/*
 
-# Copy requirements 
+# Install Python dependencies
 COPY services/multilang-support/requirements.txt /app/
-
-# Upgrade pip and install requirements
-RUN pip install --upgrade pip
-RUN pip install -r requirements.txt
+RUN pip install --upgrade pip \
+ && pip install --no-cache-dir -r requirements.txt \
+ && pip install --no-cache-dir "transformers[sentencepiece]"
+
+# download english -> french model into the image
+RUN python3 -c "\
+from transformers import MarianMTModel, MarianTokenizer; \
+model = MarianMTModel.from_pretrained('Helsinki-NLP/opus-mt-en-fr'); \
+tokenizer = MarianTokenizer.from_pretrained('Helsinki-NLP/opus-mt-en-fr'); \
+model.save_pretrained('/app/models/opus-mt-en-fr'); \
+tokenizer.save_pretrained('/app/models/opus-mt-en-fr')"
 
 # Clone the model
-RUN git lfs install
+# RUN git lfs install
+
 # English -> French
-RUN git clone https://huggingface.co/Helsinki-NLP/opus-mt-en-fr ./models/opus-mt-en-fr
+# RUN git clone https://huggingface.co/Helsinki-NLP/opus-mt-en-fr ./models/opus-mt-en-fr
 
 # English -> German
 # RUN git clone https://huggingface.co/Helsinki-NLP/opus-mt-en-de ./models/opus-mt-en-de

diff --git a/services/multilang-support/requirements.txt b/services/multilang-support/requirements.txt
@@ -1,6 +1,8 @@
 Flask>=2.3.0
-transformers>=4.29.0
+transformers[sentencepiece]>=4.29.0
 sentencepiece>=0.1.99
 gunicorn>=20.1.0
 jsonschema>=4.17.3
-sacremoses>=0.0.53
+sacremoses>=0.0.53
+torch>=2.6.0
+protobuf>=3.20.0  # to avoid tokenizer compatibility issues
diff --git a/services/multilang-support/src/translate.py b/services/multilang-support/src/translate.py
@@ -94,6 +94,26 @@ def translate_request():
     return jsonify(response), 200
 
 
+@app.route("/warmup", methods=["GET"])
+def warmup():
+    """
+    Trigger a dummy translation to warm up the Hugging Face model.
+    """
+    try:
+        LOGGER.info("[WARMUP] Warmup endpoint triggered.")
+
+        # Instantiate a dummy translator (e.g., English to French)
+        dummy_translator = Translator("en", "fr")
+        _ = dummy_translator.translate(
+            "Internet Multimodal Access to Graphical Exploration")
+
+        LOGGER.info("[WARMUP] Model warmed successfully.")
+        return jsonify({"status": "warmed"}), 200
+    except Exception as e:
+        LOGGER.exception("[WARMUP] Warmup failed.")
+        return jsonify({"status": "error", "message": str(e)}), 500
+
+
 @app.route("/health", methods=["GET"])
 def health():
     """

diff --git a/services/multilang-support/src/utils.py b/services/multilang-support/src/utils.py
@@ -42,33 +42,24 @@ def __init__(self, src_lang: str, tgt_lang: str) -> None:
         - tgt_lang: the target language in ISO 639-1 code
         """
         # Getting model checkpoint from downloaded folder (see Dockerfile)
-        self.CHECKPOINT = f"/app/models/opus-mt-{src_lang}-{tgt_lang}"
+        # self.CHECKPOINT = f"/app/models/opus-mt-{src_lang}-{tgt_lang}"
         self.NAME = f"Translator({src_lang}, {tgt_lang})"
         try:
-            try:
-                self.TOKENIZER = AutoTokenizer.from_pretrained(self.CHECKPOINT)
-                self.MODEL = AutoModelForSeq2SeqLM.from_pretrained(
-                    self.CHECKPOINT)
-            except Exception:
-                LOGGER.warning(
-                    f"Cannot find {self.CHECKPOINT}, is it pre-downloaded?")
-                LOGGER.info("Downloading model from HuggingFace model hub...")
-                self.TOKENIZER = AutoTokenizer.from_pretrained(
-                    f"Helsinki-NLP/opus-mt-{src_lang}-{tgt_lang}")
-                self.MODEL = AutoModelForSeq2SeqLM.from_pretrained(
-                    f"Helsinki-NLP/opus-mt-{src_lang}-{tgt_lang}")
+            model_path = f"/app/models/opus-mt-{src_lang}-{tgt_lang}"
+            LOGGER.info(f"Loading model from local path: {model_path}")
+            self.TOKENIZER = AutoTokenizer.from_pretrained(
+                model_path, local_files_only=True)
+            self.MODEL = AutoModelForSeq2SeqLM.from_pretrained(
+                model_path, local_files_only=True)
 
             LOGGER.info(f"{self.NAME} instantiated!")
-            # set GPU/CPU device
             self.set_model_device()
-            LOGGER.info(
-                f"{self.NAME} running on {self.DEVICE_NAME}")
+            LOGGER.info(f"{self.NAME} running on {self.DEVICE_NAME}")
             Translator.Translators.append(self)
         except Exception as e:
             LOGGER.error(e)
-            LOGGER.info(
-                f"Failed to instantiate {self.NAME}!")
-            LOGGER.debug(f"Failed to start model: {self.CHECKPOINT}")
+            LOGGER.info(f"Failed to instantiate {self.NAME}!")
+            LOGGER.debug(f"Expected model path: {model_path}")
 
     def set_model_device(self):
         num_gpus = torch.cuda.device_count()