Shared-Reality-Lab · JRegimbal · Aug 7, 2023 · Jul 19, 2023 · Jul 24, 2023 · Jul 26, 2023
diff --git a/services/espnet-tts-fr/requirements.txt b/services/espnet-tts-fr/requirements.txt
@@ -5,4 +5,5 @@ gunicorn==20.1.0
 jsonschema==4.4.0
 parallel_wavegan==0.5.5
 phonemizer
-typeguard==2.13.3
+typeguard==2.13.3
+num2words>=0.5.12
diff --git a/services/espnet-tts-fr/src/app.py b/services/espnet-tts-fr/src/app.py
@@ -26,11 +26,17 @@
 from jsonschema import validate
 from torch.cuda import empty_cache
 from werkzeug.wsgi import FileWrapper
+from num2words import num2words
+import re  # for regular expression processing
 
 logging.basicConfig(format="%(asctime)s %(message)s")
 logger = logging.getLogger(__name__)
 logger.setLevel(logging.DEBUG)
 
+# Define regular expression (regex)
+numRegex = r'\d+(?:[,.]\d+)?'
+rangeRegex = r'\d+(?:[,.]\d+)?\s*-\s*\d+(?:[,.]\d+)?'
+
 with open("segment.request.json", "r") as f:
     segment_request = json.load(f)
 
@@ -40,6 +46,44 @@
 app = Flask(__name__)
 
 
+def frenchNum(num):
+    '''
+    Convert numeric number to French words
+    - @param: `num` - could be str or int type
+    - The function can also process `,` separated numbers
+    '''
+    commaSeparateRegex = re.compile(r'\d+\,\d+')
+    if commaSeparateRegex.search(num):
+        num = num.replace(",", ".")
+    return num2words(num, lang='fr')
+
+
+def processRange(match):
+    '''
+    Processing a number range (having a hyphen as a separator)
+    Cases: age, year, statistic ranges
+    E.g.: 12-24, 12,56 - 25,67
+    '''
+    phrase = match.group()
+    numRange = re.findall(numRegex, phrase)  # num range will have 2 elements
+    if len(numRange) != 2:
+        logger.debug(
+            f"Error: processing range, but having {len(numRange)} numbers")
+        return
+    return f"de {frenchNum(numRange[0])} à {frenchNum(numRange[1])}"
+
+
+def processSegment(s):
+    if re.match(rangeRegex, s):
+        # If the match is a range type
+        return re.sub(rangeRegex, processRange, s)
+    elif re.match(numRegex, s):
+        # If the match is a standalone number
+        return frenchNum(s)
+    else:
+        return s
+
+
 @app.route("/service/tts/simple", methods=["POST"])
 def perform_tts():
     logger.debug("Received request")
@@ -82,7 +126,20 @@ def segment_tts():
     try:
         totalWav = None
         durations = []
-        wavs = [tts(segment) for segment in data["segments"]]
+        wavs = []
+        for segment in data["segments"]:
+            # detect numerical in segments
+            segment_new = []
+            for s in segment.split():
+                try:
+                    segment_new.append(processSegment(s))
+                except Exception as e:
+                    # logger.error(f"ERROR processing {s}")
+                    logger.error(e)
+                    segment_new.append(s)
+
+            segment_new = " ".join(str(s) for s in segment_new)
+            wavs.append(tts(segment_new))
         for wav in wavs:
             if totalWav is not None:
                 totalWav = np.append(totalWav, wav)

diff --git a/services/multilang-support/src/utils.py b/services/multilang-support/src/utils.py
@@ -149,7 +149,7 @@ def translate(self, segment: list):
             output_query = self.decode_generated_tensor(output_tensor)
 
             # 4. Translated query -> result
-            LOGGER.info(f'Translated: "{input_query}" --> "{output_query}"')
+            # LOGGER.info(f'Translated: "{input_query}" --> "{output_query}"')
             translations.append(output_query)
         finish_translate = time.time()
         translate_time = int((finish_translate - start_translate)*1000)
@@ -185,7 +185,7 @@ def instantiate():
     instantiate()
     ready_message = "Translation service is instantiated and ready!"
     LOGGER.info(ready_message)
-    # Dummy translation to test the service
+    # Dummy translation to test if the service is ready
     for lang in SUPPORTED_LANGS:
         LOGGER.info(Translator.get_translator("en", lang)
                     .translate([ready_message])[0].pop())