Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,8 @@ services:
devices:
- driver: nvidia
capabilities: ["gpu", "compute", "utility"]
environment:
- WARMUP_ENABLED=true

autour-preprocessor:
profiles: [production, test, default]
Expand Down
31 changes: 22 additions & 9 deletions services/multilang-support/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,20 +1,33 @@
FROM pytorch/pytorch:latest
ARG PYTORCH="2.7.1"
ARG CUDA="11.8"
ARG CUDNN="9"
FROM pytorch/pytorch:${PYTORCH}-cuda${CUDA}-cudnn${CUDNN}-runtime

# See README.md#docker-image for visualisation of the directory structure
WORKDIR /app
RUN apt-get update && apt-get install python3 python3-pip git git-lfs curl -y
RUN apt-get update && apt-get install -y --no-install-recommends \
python3 python3-pip git git-lfs curl \
&& apt-get clean && rm -rf /var/lib/apt/lists/*

# Copy requirements
# Install Python dependencies
COPY services/multilang-support/requirements.txt /app/

# Upgrade pip and install requirements
RUN pip install --upgrade pip
RUN pip install -r requirements.txt
RUN pip install --upgrade pip \
&& pip install --no-cache-dir -r requirements.txt \
&& pip install --no-cache-dir "transformers[sentencepiece]"

# download english -> french model into the image
RUN python3 -c "\
from transformers import MarianMTModel, MarianTokenizer; \
model = MarianMTModel.from_pretrained('Helsinki-NLP/opus-mt-en-fr'); \
tokenizer = MarianTokenizer.from_pretrained('Helsinki-NLP/opus-mt-en-fr'); \
model.save_pretrained('/app/models/opus-mt-en-fr'); \
tokenizer.save_pretrained('/app/models/opus-mt-en-fr')"

# Clone the model
RUN git lfs install
# RUN git lfs install

# English -> French
RUN git clone https://huggingface.co/Helsinki-NLP/opus-mt-en-fr ./models/opus-mt-en-fr
# RUN git clone https://huggingface.co/Helsinki-NLP/opus-mt-en-fr ./models/opus-mt-en-fr

# English -> German
# RUN git clone https://huggingface.co/Helsinki-NLP/opus-mt-en-de ./models/opus-mt-en-de
Expand Down
6 changes: 4 additions & 2 deletions services/multilang-support/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
Flask>=2.3.0
transformers>=4.29.0
transformers[sentencepiece]>=4.29.0
sentencepiece>=0.1.99
gunicorn>=20.1.0
jsonschema>=4.17.3
sacremoses>=0.0.53
sacremoses>=0.0.53
torch>=2.6.0
protobuf>=3.20.0 # to avoid tokenizer compatibility issues
20 changes: 20 additions & 0 deletions services/multilang-support/src/translate.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,26 @@ def translate_request():
return jsonify(response), 200


@app.route("/warmup", methods=["GET"])
def warmup():
"""
Trigger a dummy translation to warm up the Hugging Face model.
"""
try:
LOGGER.info("[WARMUP] Warmup endpoint triggered.")

# Instantiate a dummy translator (e.g., English to French)
dummy_translator = Translator("en", "fr")
_ = dummy_translator.translate(
"Internet Multimodal Access to Graphical Exploration")

LOGGER.info("[WARMUP] Model warmed successfully.")
return jsonify({"status": "warmed"}), 200
except Exception as e:
LOGGER.exception("[WARMUP] Warmup failed.")
return jsonify({"status": "error", "message": str(e)}), 500


@app.route("/health", methods=["GET"])
def health():
"""
Expand Down
29 changes: 10 additions & 19 deletions services/multilang-support/src/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,33 +42,24 @@ def __init__(self, src_lang: str, tgt_lang: str) -> None:
- tgt_lang: the target language in ISO 639-1 code
"""
# Getting model checkpoint from downloaded folder (see Dockerfile)
self.CHECKPOINT = f"/app/models/opus-mt-{src_lang}-{tgt_lang}"
# self.CHECKPOINT = f"/app/models/opus-mt-{src_lang}-{tgt_lang}"
self.NAME = f"Translator({src_lang}, {tgt_lang})"
try:
try:
self.TOKENIZER = AutoTokenizer.from_pretrained(self.CHECKPOINT)
self.MODEL = AutoModelForSeq2SeqLM.from_pretrained(
self.CHECKPOINT)
except Exception:
LOGGER.warning(
f"Cannot find {self.CHECKPOINT}, is it pre-downloaded?")
LOGGER.info("Downloading model from HuggingFace model hub...")
self.TOKENIZER = AutoTokenizer.from_pretrained(
f"Helsinki-NLP/opus-mt-{src_lang}-{tgt_lang}")
self.MODEL = AutoModelForSeq2SeqLM.from_pretrained(
f"Helsinki-NLP/opus-mt-{src_lang}-{tgt_lang}")
model_path = f"/app/models/opus-mt-{src_lang}-{tgt_lang}"
LOGGER.info(f"Loading model from local path: {model_path}")
self.TOKENIZER = AutoTokenizer.from_pretrained(
model_path, local_files_only=True)
self.MODEL = AutoModelForSeq2SeqLM.from_pretrained(
model_path, local_files_only=True)

LOGGER.info(f"{self.NAME} instantiated!")
# set GPU/CPU device
self.set_model_device()
LOGGER.info(
f"{self.NAME} running on {self.DEVICE_NAME}")
LOGGER.info(f"{self.NAME} running on {self.DEVICE_NAME}")
Translator.Translators.append(self)
except Exception as e:
LOGGER.error(e)
LOGGER.info(
f"Failed to instantiate {self.NAME}!")
LOGGER.debug(f"Failed to start model: {self.CHECKPOINT}")
LOGGER.info(f"Failed to instantiate {self.NAME}!")
LOGGER.debug(f"Expected model path: {model_path}")

def set_model_device(self):
num_gpus = torch.cuda.device_count()
Expand Down