diff --git a/docker-compose.yml b/docker-compose.yml index ca60b75ec..139dcbac1 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -82,6 +82,8 @@ services: devices: - driver: nvidia capabilities: ["gpu", "compute", "utility"] + environment: + - WARMUP_ENABLED=true autour-preprocessor: profiles: [production, test, default] diff --git a/services/multilang-support/Dockerfile b/services/multilang-support/Dockerfile index ea73b1495..28168e5f3 100644 --- a/services/multilang-support/Dockerfile +++ b/services/multilang-support/Dockerfile @@ -1,20 +1,33 @@ -FROM pytorch/pytorch:latest +ARG PYTORCH="2.7.1" +ARG CUDA="11.8" +ARG CUDNN="9" +FROM pytorch/pytorch:${PYTORCH}-cuda${CUDA}-cudnn${CUDNN}-runtime # See README.md#docker-image for visualisation of the directory structure WORKDIR /app -RUN apt-get update && apt-get install python3 python3-pip git git-lfs curl -y +RUN apt-get update && apt-get install -y --no-install-recommends \ + python3 python3-pip git git-lfs curl \ + && apt-get clean && rm -rf /var/lib/apt/lists/* -# Copy requirements +# Install Python dependencies COPY services/multilang-support/requirements.txt /app/ - -# Upgrade pip and install requirements -RUN pip install --upgrade pip -RUN pip install -r requirements.txt +RUN pip install --upgrade pip \ + && pip install --no-cache-dir -r requirements.txt \ + && pip install --no-cache-dir "transformers[sentencepiece]" + +# download english -> french model into the image +RUN python3 -c "\ +from transformers import MarianMTModel, MarianTokenizer; \ +model = MarianMTModel.from_pretrained('Helsinki-NLP/opus-mt-en-fr'); \ +tokenizer = MarianTokenizer.from_pretrained('Helsinki-NLP/opus-mt-en-fr'); \ +model.save_pretrained('/app/models/opus-mt-en-fr'); \ +tokenizer.save_pretrained('/app/models/opus-mt-en-fr')" # Clone the model -RUN git lfs install +# RUN git lfs install + # English -> French -RUN git clone https://huggingface.co/Helsinki-NLP/opus-mt-en-fr ./models/opus-mt-en-fr +# RUN git clone https://huggingface.co/Helsinki-NLP/opus-mt-en-fr ./models/opus-mt-en-fr # English -> German # RUN git clone https://huggingface.co/Helsinki-NLP/opus-mt-en-de ./models/opus-mt-en-de diff --git a/services/multilang-support/requirements.txt b/services/multilang-support/requirements.txt index 872912318..c6c9cc30b 100644 --- a/services/multilang-support/requirements.txt +++ b/services/multilang-support/requirements.txt @@ -1,6 +1,8 @@ Flask>=2.3.0 -transformers>=4.29.0 +transformers[sentencepiece]>=4.29.0 sentencepiece>=0.1.99 gunicorn>=20.1.0 jsonschema>=4.17.3 -sacremoses>=0.0.53 \ No newline at end of file +sacremoses>=0.0.53 +torch>=2.6.0 +protobuf>=3.20.0 # to avoid tokenizer compatibility issues \ No newline at end of file diff --git a/services/multilang-support/src/translate.py b/services/multilang-support/src/translate.py index 21fb207f2..9ec6e35e9 100644 --- a/services/multilang-support/src/translate.py +++ b/services/multilang-support/src/translate.py @@ -94,6 +94,26 @@ def translate_request(): return jsonify(response), 200 +@app.route("/warmup", methods=["GET"]) +def warmup(): + """ + Trigger a dummy translation to warm up the Hugging Face model. + """ + try: + LOGGER.info("[WARMUP] Warmup endpoint triggered.") + + # Instantiate a dummy translator (e.g., English to French) + dummy_translator = Translator("en", "fr") + _ = dummy_translator.translate( + "Internet Multimodal Access to Graphical Exploration") + + LOGGER.info("[WARMUP] Model warmed successfully.") + return jsonify({"status": "warmed"}), 200 + except Exception as e: + LOGGER.exception("[WARMUP] Warmup failed.") + return jsonify({"status": "error", "message": str(e)}), 500 + + @app.route("/health", methods=["GET"]) def health(): """ diff --git a/services/multilang-support/src/utils.py b/services/multilang-support/src/utils.py index 3b1d57c64..03660980f 100644 --- a/services/multilang-support/src/utils.py +++ b/services/multilang-support/src/utils.py @@ -42,33 +42,24 @@ def __init__(self, src_lang: str, tgt_lang: str) -> None: - tgt_lang: the target language in ISO 639-1 code """ # Getting model checkpoint from downloaded folder (see Dockerfile) - self.CHECKPOINT = f"/app/models/opus-mt-{src_lang}-{tgt_lang}" + # self.CHECKPOINT = f"/app/models/opus-mt-{src_lang}-{tgt_lang}" self.NAME = f"Translator({src_lang}, {tgt_lang})" try: - try: - self.TOKENIZER = AutoTokenizer.from_pretrained(self.CHECKPOINT) - self.MODEL = AutoModelForSeq2SeqLM.from_pretrained( - self.CHECKPOINT) - except Exception: - LOGGER.warning( - f"Cannot find {self.CHECKPOINT}, is it pre-downloaded?") - LOGGER.info("Downloading model from HuggingFace model hub...") - self.TOKENIZER = AutoTokenizer.from_pretrained( - f"Helsinki-NLP/opus-mt-{src_lang}-{tgt_lang}") - self.MODEL = AutoModelForSeq2SeqLM.from_pretrained( - f"Helsinki-NLP/opus-mt-{src_lang}-{tgt_lang}") + model_path = f"/app/models/opus-mt-{src_lang}-{tgt_lang}" + LOGGER.info(f"Loading model from local path: {model_path}") + self.TOKENIZER = AutoTokenizer.from_pretrained( + model_path, local_files_only=True) + self.MODEL = AutoModelForSeq2SeqLM.from_pretrained( + model_path, local_files_only=True) LOGGER.info(f"{self.NAME} instantiated!") - # set GPU/CPU device self.set_model_device() - LOGGER.info( - f"{self.NAME} running on {self.DEVICE_NAME}") + LOGGER.info(f"{self.NAME} running on {self.DEVICE_NAME}") Translator.Translators.append(self) except Exception as e: LOGGER.error(e) - LOGGER.info( - f"Failed to instantiate {self.NAME}!") - LOGGER.debug(f"Failed to start model: {self.CHECKPOINT}") + LOGGER.info(f"Failed to instantiate {self.NAME}!") + LOGGER.debug(f"Expected model path: {model_path}") def set_model_device(self): num_gpus = torch.cuda.device_count()