From 2a73b2ea92bd3babb96247f00b5886f9c58104ee Mon Sep 17 00:00:00 2001 From: Shahd Yousef Date: Thu, 19 Jun 2025 15:57:25 -0400 Subject: [PATCH 1/3] Switched to an explicit PyTorch base image (as well as dockerfile cmd consolidation), specifying versions in requirements.txt, utils.py logic --- services/multilang-support/Dockerfile | 18 ++++++++++------ services/multilang-support/requirements.txt | 5 +++-- services/multilang-support/src/utils.py | 23 +++++++-------------- 3 files changed, 22 insertions(+), 24 deletions(-) diff --git a/services/multilang-support/Dockerfile b/services/multilang-support/Dockerfile index ea73b1495..c2f4e671a 100644 --- a/services/multilang-support/Dockerfile +++ b/services/multilang-support/Dockerfile @@ -1,20 +1,26 @@ -FROM pytorch/pytorch:latest +ARG PYTORCH="2.7.1" +ARG CUDA="11.8" +ARG CUDNN="9" +FROM pytorch/pytorch:${PYTORCH}-cuda${CUDA}-cudnn${CUDNN}-runtime # See README.md#docker-image for visualisation of the directory structure WORKDIR /app -RUN apt-get update && apt-get install python3 python3-pip git git-lfs curl -y +RUN apt-get update && apt-get install -y --no-install-recommends \ + python3 python3-pip git git-lfs curl \ + && apt-get clean && rm -rf /var/lib/apt/lists/* # Copy requirements COPY services/multilang-support/requirements.txt /app/ # Upgrade pip and install requirements -RUN pip install --upgrade pip -RUN pip install -r requirements.txt +RUN pip install --upgrade pip \ + && pip install --no-cache-dir -r requirements.txt # Clone the model -RUN git lfs install +# RUN git lfs install + # English -> French -RUN git clone https://huggingface.co/Helsinki-NLP/opus-mt-en-fr ./models/opus-mt-en-fr +# RUN git clone https://huggingface.co/Helsinki-NLP/opus-mt-en-fr ./models/opus-mt-en-fr # English -> German # RUN git clone https://huggingface.co/Helsinki-NLP/opus-mt-en-de ./models/opus-mt-en-de diff --git a/services/multilang-support/requirements.txt b/services/multilang-support/requirements.txt index 872912318..e3087bead 100644 --- a/services/multilang-support/requirements.txt +++ b/services/multilang-support/requirements.txt @@ -1,6 +1,7 @@ Flask>=2.3.0 -transformers>=4.29.0 +transformers[sentencepiece]>=4.29.0 sentencepiece>=0.1.99 gunicorn>=20.1.0 jsonschema>=4.17.3 -sacremoses>=0.0.53 \ No newline at end of file +sacremoses>=0.0.53 +torch>=2.6.0 \ No newline at end of file diff --git a/services/multilang-support/src/utils.py b/services/multilang-support/src/utils.py index 3b1d57c64..c71ef5c9c 100644 --- a/services/multilang-support/src/utils.py +++ b/services/multilang-support/src/utils.py @@ -42,21 +42,13 @@ def __init__(self, src_lang: str, tgt_lang: str) -> None: - tgt_lang: the target language in ISO 639-1 code """ # Getting model checkpoint from downloaded folder (see Dockerfile) - self.CHECKPOINT = f"/app/models/opus-mt-{src_lang}-{tgt_lang}" + # self.CHECKPOINT = f"/app/models/opus-mt-{src_lang}-{tgt_lang}" self.NAME = f"Translator({src_lang}, {tgt_lang})" try: - try: - self.TOKENIZER = AutoTokenizer.from_pretrained(self.CHECKPOINT) - self.MODEL = AutoModelForSeq2SeqLM.from_pretrained( - self.CHECKPOINT) - except Exception: - LOGGER.warning( - f"Cannot find {self.CHECKPOINT}, is it pre-downloaded?") - LOGGER.info("Downloading model from HuggingFace model hub...") - self.TOKENIZER = AutoTokenizer.from_pretrained( - f"Helsinki-NLP/opus-mt-{src_lang}-{tgt_lang}") - self.MODEL = AutoModelForSeq2SeqLM.from_pretrained( - f"Helsinki-NLP/opus-mt-{src_lang}-{tgt_lang}") + model_name = f"Helsinki-NLP/opus-mt-{src_lang}-{tgt_lang}" + LOGGER.info(f"Loading model from Hugging Face: {model_name}") + self.TOKENIZER = AutoTokenizer.from_pretrained(model_name) + self.MODEL = AutoModelForSeq2SeqLM.from_pretrained(model_name) LOGGER.info(f"{self.NAME} instantiated!") # set GPU/CPU device @@ -66,9 +58,8 @@ def __init__(self, src_lang: str, tgt_lang: str) -> None: Translator.Translators.append(self) except Exception as e: LOGGER.error(e) - LOGGER.info( - f"Failed to instantiate {self.NAME}!") - LOGGER.debug(f"Failed to start model: {self.CHECKPOINT}") + LOGGER.info(f"Failed to instantiate {self.NAME}!") + LOGGER.debug(f"Failed to start model: {model_name}") def set_model_device(self): num_gpus = torch.cuda.device_count() From 57a58cb1a8e2a12fe5b4594a6d8fa82defcdbd4b Mon Sep 17 00:00:00 2001 From: Shahd Yousef Date: Thu, 19 Jun 2025 16:15:43 -0400 Subject: [PATCH 2/3] adding warmup endpoint + warmup flag --- docker-compose.yml | 2 ++ services/multilang-support/requirements.txt | 3 ++- services/multilang-support/src/translate.py | 20 ++++++++++++++++++++ 3 files changed, 24 insertions(+), 1 deletion(-) diff --git a/docker-compose.yml b/docker-compose.yml index ca60b75ec..139dcbac1 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -82,6 +82,8 @@ services: devices: - driver: nvidia capabilities: ["gpu", "compute", "utility"] + environment: + - WARMUP_ENABLED=true autour-preprocessor: profiles: [production, test, default] diff --git a/services/multilang-support/requirements.txt b/services/multilang-support/requirements.txt index e3087bead..c6c9cc30b 100644 --- a/services/multilang-support/requirements.txt +++ b/services/multilang-support/requirements.txt @@ -4,4 +4,5 @@ sentencepiece>=0.1.99 gunicorn>=20.1.0 jsonschema>=4.17.3 sacremoses>=0.0.53 -torch>=2.6.0 \ No newline at end of file +torch>=2.6.0 +protobuf>=3.20.0 # to avoid tokenizer compatibility issues \ No newline at end of file diff --git a/services/multilang-support/src/translate.py b/services/multilang-support/src/translate.py index 21fb207f2..9ec6e35e9 100644 --- a/services/multilang-support/src/translate.py +++ b/services/multilang-support/src/translate.py @@ -94,6 +94,26 @@ def translate_request(): return jsonify(response), 200 +@app.route("/warmup", methods=["GET"]) +def warmup(): + """ + Trigger a dummy translation to warm up the Hugging Face model. + """ + try: + LOGGER.info("[WARMUP] Warmup endpoint triggered.") + + # Instantiate a dummy translator (e.g., English to French) + dummy_translator = Translator("en", "fr") + _ = dummy_translator.translate( + "Internet Multimodal Access to Graphical Exploration") + + LOGGER.info("[WARMUP] Model warmed successfully.") + return jsonify({"status": "warmed"}), 200 + except Exception as e: + LOGGER.exception("[WARMUP] Warmup failed.") + return jsonify({"status": "error", "message": str(e)}), 500 + + @app.route("/health", methods=["GET"]) def health(): """ From c2056d209531cf425960a55fba399b13c3aae6fe Mon Sep 17 00:00:00 2001 From: Shahd Yousef Date: Thu, 19 Jun 2025 17:54:38 -0400 Subject: [PATCH 3/3] download english -> french model into the image --- services/multilang-support/Dockerfile | 15 +++++++++++---- services/multilang-support/src/utils.py | 16 ++++++++-------- 2 files changed, 19 insertions(+), 12 deletions(-) diff --git a/services/multilang-support/Dockerfile b/services/multilang-support/Dockerfile index c2f4e671a..28168e5f3 100644 --- a/services/multilang-support/Dockerfile +++ b/services/multilang-support/Dockerfile @@ -9,12 +9,19 @@ RUN apt-get update && apt-get install -y --no-install-recommends \ python3 python3-pip git git-lfs curl \ && apt-get clean && rm -rf /var/lib/apt/lists/* -# Copy requirements +# Install Python dependencies COPY services/multilang-support/requirements.txt /app/ - -# Upgrade pip and install requirements RUN pip install --upgrade pip \ - && pip install --no-cache-dir -r requirements.txt + && pip install --no-cache-dir -r requirements.txt \ + && pip install --no-cache-dir "transformers[sentencepiece]" + +# download english -> french model into the image +RUN python3 -c "\ +from transformers import MarianMTModel, MarianTokenizer; \ +model = MarianMTModel.from_pretrained('Helsinki-NLP/opus-mt-en-fr'); \ +tokenizer = MarianTokenizer.from_pretrained('Helsinki-NLP/opus-mt-en-fr'); \ +model.save_pretrained('/app/models/opus-mt-en-fr'); \ +tokenizer.save_pretrained('/app/models/opus-mt-en-fr')" # Clone the model # RUN git lfs install diff --git a/services/multilang-support/src/utils.py b/services/multilang-support/src/utils.py index c71ef5c9c..03660980f 100644 --- a/services/multilang-support/src/utils.py +++ b/services/multilang-support/src/utils.py @@ -45,21 +45,21 @@ def __init__(self, src_lang: str, tgt_lang: str) -> None: # self.CHECKPOINT = f"/app/models/opus-mt-{src_lang}-{tgt_lang}" self.NAME = f"Translator({src_lang}, {tgt_lang})" try: - model_name = f"Helsinki-NLP/opus-mt-{src_lang}-{tgt_lang}" - LOGGER.info(f"Loading model from Hugging Face: {model_name}") - self.TOKENIZER = AutoTokenizer.from_pretrained(model_name) - self.MODEL = AutoModelForSeq2SeqLM.from_pretrained(model_name) + model_path = f"/app/models/opus-mt-{src_lang}-{tgt_lang}" + LOGGER.info(f"Loading model from local path: {model_path}") + self.TOKENIZER = AutoTokenizer.from_pretrained( + model_path, local_files_only=True) + self.MODEL = AutoModelForSeq2SeqLM.from_pretrained( + model_path, local_files_only=True) LOGGER.info(f"{self.NAME} instantiated!") - # set GPU/CPU device self.set_model_device() - LOGGER.info( - f"{self.NAME} running on {self.DEVICE_NAME}") + LOGGER.info(f"{self.NAME} running on {self.DEVICE_NAME}") Translator.Translators.append(self) except Exception as e: LOGGER.error(e) LOGGER.info(f"Failed to instantiate {self.NAME}!") - LOGGER.debug(f"Failed to start model: {model_name}") + LOGGER.debug(f"Expected model path: {model_path}") def set_model_device(self): num_gpus = torch.cuda.device_count()