From f5776f2896ac4cfb669be8754e35152514b05a2e Mon Sep 17 00:00:00 2001 From: Antoine Date: Wed, 19 Jul 2023 19:00:54 -0400 Subject: [PATCH 1/8] Adding num2words to requirements and first attempt on helping French TTS to pronounce numbers of all kinds --- services/espnet-tts-fr/requirements.txt | 3 +- services/espnet-tts-fr/src/app.py | 40 ++++++++++++++++++++++++- 2 files changed, 41 insertions(+), 2 deletions(-) diff --git a/services/espnet-tts-fr/requirements.txt b/services/espnet-tts-fr/requirements.txt index a1ff82cfb..8893bf8de 100644 --- a/services/espnet-tts-fr/requirements.txt +++ b/services/espnet-tts-fr/requirements.txt @@ -5,4 +5,5 @@ gunicorn==20.1.0 jsonschema==4.4.0 parallel_wavegan==0.5.5 phonemizer -typeguard==2.13.3 \ No newline at end of file +typeguard==2.13.3 +num2words>=0.5.12 \ No newline at end of file diff --git a/services/espnet-tts-fr/src/app.py b/services/espnet-tts-fr/src/app.py index bca2e36a4..fa3c0b562 100644 --- a/services/espnet-tts-fr/src/app.py +++ b/services/espnet-tts-fr/src/app.py @@ -26,6 +26,7 @@ from jsonschema import validate from torch.cuda import empty_cache from werkzeug.wsgi import FileWrapper +from num2words import num2words logging.basicConfig(format="%(asctime)s %(message)s") logger = logging.getLogger(__name__) @@ -40,6 +41,33 @@ app = Flask(__name__) +def isfloat(num): + try: + float(num) + return True + except ValueError: + return False + + +def processSegment(s): + if s.isdigit() or isfloat(s): + return num2words(s, lang='fr') + if "." in s: + temps = s.replace(".", "") + if temps.isnumeric(): + temps = int(temps) + s = num2words(int(temps), lang='fr') + if "," in s: + tempns = s.replace(",", ".") + if isfloat(tempns): + s = num2words(tempns, lang='fr') + if "-" in s: + num_in_ns = s.split("-") + s = " ".join(["de", num2words(num_in_ns[0], lang='fr'), + "à", num2words(num_in_ns[1], lang='fr')]) + return s + + @app.route("/service/tts/simple", methods=["POST"]) def perform_tts(): logger.debug("Received request") @@ -82,7 +110,17 @@ def segment_tts(): try: totalWav = None durations = [] - wavs = [tts(segment) for segment in data["segments"]] + wavs = [] + for segment in data["segments"]: + # detect numerical in segments + segment_new = [] + logger.debug(segment.split()) + for s in segment.split(): + logger.debug(f'Performing on: {s}') + segment_new.append(processSegment(s)) + segment_new = " ".join(segment_new) + logger.debug(f'New Segment: {segment_new}') + wavs.append(tts(segment_new)) for wav in wavs: if totalWav is not None: totalWav = np.append(totalWav, wav) From 18cd12c8eb7f7125d0a5e2afb9e58f11d8c8f312 Mon Sep 17 00:00:00 2001 From: Antoine Date: Mon, 24 Jul 2023 14:36:46 -0400 Subject: [PATCH 2/8] Finalize changes and removed unnecessary cases --- services/espnet-tts-fr/src/app.py | 31 +++++++++++++++++-------------- 1 file changed, 17 insertions(+), 14 deletions(-) diff --git a/services/espnet-tts-fr/src/app.py b/services/espnet-tts-fr/src/app.py index fa3c0b562..95283f298 100644 --- a/services/espnet-tts-fr/src/app.py +++ b/services/espnet-tts-fr/src/app.py @@ -50,22 +50,19 @@ def isfloat(num): def processSegment(s): - if s.isdigit() or isfloat(s): + if s.isnumeric() or isfloat(s): + logger.debug(f"Case: '{s}' is numeric or float.") return num2words(s, lang='fr') - if "." in s: - temps = s.replace(".", "") - if temps.isnumeric(): - temps = int(temps) - s = num2words(int(temps), lang='fr') - if "," in s: + elif "," in s: + logger.debug(f"Case: '{s}' has , as separator") tempns = s.replace(",", ".") if isfloat(tempns): - s = num2words(tempns, lang='fr') - if "-" in s: + return num2words(float(tempns), lang='fr') + elif "-" in s: + logger.debug("case 3: -") num_in_ns = s.split("-") - s = " ".join(["de", num2words(num_in_ns[0], lang='fr'), - "à", num2words(num_in_ns[1], lang='fr')]) - return s + return " ".join(["de", num2words(num_in_ns[0], lang='fr'), + "à", num2words(num_in_ns[1], lang='fr')]) @app.route("/service/tts/simple", methods=["POST"]) @@ -116,8 +113,14 @@ def segment_tts(): segment_new = [] logger.debug(segment.split()) for s in segment.split(): - logger.debug(f'Performing on: {s}') - segment_new.append(processSegment(s)) + try: + logger.debug(f'Performing on: "{s}"') + segment_new.append(processSegment(s)) + except Exception as e: + logger.error("ERROR processing") + logger.error(e) + segment_new.append(s) + segment_new = " ".join(segment_new) logger.debug(f'New Segment: {segment_new}') wavs.append(tts(segment_new)) From 97040309a1be5e75f59b6e948cf59eeb3c9328cd Mon Sep 17 00:00:00 2001 From: Antoine Date: Wed, 26 Jul 2023 12:55:38 -0400 Subject: [PATCH 3/8] Commented out translation logging --- services/multilang-support/src/utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/services/multilang-support/src/utils.py b/services/multilang-support/src/utils.py index a16a50e1d..327f422a8 100644 --- a/services/multilang-support/src/utils.py +++ b/services/multilang-support/src/utils.py @@ -149,7 +149,7 @@ def translate(self, segment: list): output_query = self.decode_generated_tensor(output_tensor) # 4. Translated query -> result - LOGGER.info(f'Translated: "{input_query}" --> "{output_query}"') + # LOGGER.info(f'Translated: "{input_query}" --> "{output_query}"') translations.append(output_query) finish_translate = time.time() translate_time = int((finish_translate - start_translate)*1000) @@ -185,7 +185,7 @@ def instantiate(): instantiate() ready_message = "Translation service is instantiated and ready!" LOGGER.info(ready_message) - # Dummy translation to test the service + # Dummy translation to test if the service is ready for lang in SUPPORTED_LANGS: LOGGER.info(Translator.get_translator("en", lang) .translate([ready_message])[0].pop()) From 06aeb9636a19908c996a13a080fe16a008b18624 Mon Sep 17 00:00:00 2001 From: Antoine Date: Wed, 26 Jul 2023 13:35:08 -0400 Subject: [PATCH 4/8] Commented out TTS logging + added edge case handling for case hyphen --- services/espnet-tts-fr/src/app.py | 27 +++++++++++++++++---------- 1 file changed, 17 insertions(+), 10 deletions(-) diff --git a/services/espnet-tts-fr/src/app.py b/services/espnet-tts-fr/src/app.py index 95283f298..6b9a82136 100644 --- a/services/espnet-tts-fr/src/app.py +++ b/services/espnet-tts-fr/src/app.py @@ -51,18 +51,26 @@ def isfloat(num): def processSegment(s): if s.isnumeric() or isfloat(s): - logger.debug(f"Case: '{s}' is numeric or float.") - return num2words(s, lang='fr') + logger.debug("Case: numeric or float.") + s = num2words(s, lang='fr') elif "," in s: - logger.debug(f"Case: '{s}' has , as separator") + logger.debug("Case: has a , as separator") tempns = s.replace(",", ".") if isfloat(tempns): - return num2words(float(tempns), lang='fr') + s = num2words(float(tempns), lang='fr') elif "-" in s: - logger.debug("case 3: -") + logger.debug("Case: has a - as separator") num_in_ns = s.split("-") - return " ".join(["de", num2words(num_in_ns[0], lang='fr'), - "à", num2words(num_in_ns[1], lang='fr')]) + ableToConvert = True + # making sure parts of segment are numbers (cast-able) + for num in num_in_ns: + if not isfloat(num): + ableToConvert = False + if ableToConvert: + s = " ".join(["de", num2words(num_in_ns[0], lang='fr'), + "à", num2words(num_in_ns[1], lang='fr')]) + # without any abnormal syntax, return unchanged s + return s @app.route("/service/tts/simple", methods=["POST"]) @@ -111,17 +119,16 @@ def segment_tts(): for segment in data["segments"]: # detect numerical in segments segment_new = [] - logger.debug(segment.split()) for s in segment.split(): try: - logger.debug(f'Performing on: "{s}"') + # logger.debug(f'Performing on: "{s}"') segment_new.append(processSegment(s)) except Exception as e: logger.error("ERROR processing") logger.error(e) segment_new.append(s) - segment_new = " ".join(segment_new) + segment_new = " ".join(str(s) for s in segment_new) logger.debug(f'New Segment: {segment_new}') wavs.append(tts(segment_new)) for wav in wavs: From 4799560ac3deca121f217a393470c8a96d76ba07 Mon Sep 17 00:00:00 2001 From: Antoine Date: Wed, 26 Jul 2023 13:35:32 -0400 Subject: [PATCH 5/8] Deleted new segment logging --- services/espnet-tts-fr/src/app.py | 1 - 1 file changed, 1 deletion(-) diff --git a/services/espnet-tts-fr/src/app.py b/services/espnet-tts-fr/src/app.py index 6b9a82136..4369e0a19 100644 --- a/services/espnet-tts-fr/src/app.py +++ b/services/espnet-tts-fr/src/app.py @@ -129,7 +129,6 @@ def segment_tts(): segment_new.append(s) segment_new = " ".join(str(s) for s in segment_new) - logger.debug(f'New Segment: {segment_new}') wavs.append(tts(segment_new)) for wav in wavs: if totalWav is not None: From 4093b98c6765d3519558464ac935a88e4f439d45 Mon Sep 17 00:00:00 2001 From: Antoine Date: Fri, 4 Aug 2023 14:37:17 -0400 Subject: [PATCH 6/8] Using regex to process numbers and ranges --- services/espnet-tts-fr/src/app.py | 69 ++++++++++++++++++------------- 1 file changed, 40 insertions(+), 29 deletions(-) diff --git a/services/espnet-tts-fr/src/app.py b/services/espnet-tts-fr/src/app.py index 4369e0a19..682c925e5 100644 --- a/services/espnet-tts-fr/src/app.py +++ b/services/espnet-tts-fr/src/app.py @@ -27,11 +27,16 @@ from torch.cuda import empty_cache from werkzeug.wsgi import FileWrapper from num2words import num2words +import re # for regular expression processing logging.basicConfig(format="%(asctime)s %(message)s") logger = logging.getLogger(__name__) logger.setLevel(logging.DEBUG) +# Define regular expression (regex) +numRegex = r'\d+(?:[,.]\d+)?' +rangeRegex = r'\d+(?:[,.]\d+)?\s*-\s*\d+(?:[,.]\d+)?' + with open("segment.request.json", "r") as f: segment_request = json.load(f) @@ -41,36 +46,42 @@ app = Flask(__name__) -def isfloat(num): - try: - float(num) - return True - except ValueError: - return False +def frenchNum(num): + ''' + Convert numeric number to French words + - @param: `num` - could be str or int type + - The function can also process `,` separated numbers + ''' + commaSeparateRegex = re.compile(r'\d+\,\d+') + if commaSeparateRegex.search(num): + num = num.replace(",", ".") + return num2words(num, lang='fr') + + +def processRange(match): + ''' + Processing a number range (having a hyphen as a separator) + Cases: age, year, statistic ranges + E.g.: 12-24, 12,56 - 25,67 + ''' + phrase = match.group() + numRange = re.findall(numRegex, phrase) # num range will have 2 elements + if len(numRange) != 2: + logger.debug( + f"Error: processing range, but having {len(numRange)} numbers") + return + return f"de {frenchNum(numRange[0])} à {frenchNum(numRange[1])}" def processSegment(s): - if s.isnumeric() or isfloat(s): - logger.debug("Case: numeric or float.") - s = num2words(s, lang='fr') - elif "," in s: - logger.debug("Case: has a , as separator") - tempns = s.replace(",", ".") - if isfloat(tempns): - s = num2words(float(tempns), lang='fr') - elif "-" in s: - logger.debug("Case: has a - as separator") - num_in_ns = s.split("-") - ableToConvert = True - # making sure parts of segment are numbers (cast-able) - for num in num_in_ns: - if not isfloat(num): - ableToConvert = False - if ableToConvert: - s = " ".join(["de", num2words(num_in_ns[0], lang='fr'), - "à", num2words(num_in_ns[1], lang='fr')]) - # without any abnormal syntax, return unchanged s - return s + if re.match(rangeRegex, s): + # If the match is a range type + return re.sub(rangeRegex, processRange, s) + elif re.match(numRegex, s): + # If the match is a standalone number + return frenchNum(s) + else: + return s @app.route("/service/tts/simple", methods=["POST"]) @@ -121,14 +132,14 @@ def segment_tts(): segment_new = [] for s in segment.split(): try: - # logger.debug(f'Performing on: "{s}"') segment_new.append(processSegment(s)) except Exception as e: - logger.error("ERROR processing") + logger.error(f"ERROR processing {s}") logger.error(e) segment_new.append(s) segment_new = " ".join(str(s) for s in segment_new) + logger.debug(f"New: {segment_new}") wavs.append(tts(segment_new)) for wav in wavs: if totalWav is not None: From 33331cba405016020f0805a09a21e7242479e86d Mon Sep 17 00:00:00 2001 From: Antoine Date: Fri, 4 Aug 2023 14:38:33 -0400 Subject: [PATCH 7/8] Commented out error logging --- services/espnet-tts-fr/src/app.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/espnet-tts-fr/src/app.py b/services/espnet-tts-fr/src/app.py index 682c925e5..eed29586e 100644 --- a/services/espnet-tts-fr/src/app.py +++ b/services/espnet-tts-fr/src/app.py @@ -134,7 +134,7 @@ def segment_tts(): try: segment_new.append(processSegment(s)) except Exception as e: - logger.error(f"ERROR processing {s}") + # logger.error(f"ERROR processing {s}") logger.error(e) segment_new.append(s) From 81549671360b3c3452f902038ba1f172dd045ca4 Mon Sep 17 00:00:00 2001 From: Antoine Date: Fri, 4 Aug 2023 14:40:25 -0400 Subject: [PATCH 8/8] Removed tts logging the new segment for privacy matters --- services/espnet-tts-fr/src/app.py | 1 - 1 file changed, 1 deletion(-) diff --git a/services/espnet-tts-fr/src/app.py b/services/espnet-tts-fr/src/app.py index eed29586e..ce9efdb44 100644 --- a/services/espnet-tts-fr/src/app.py +++ b/services/espnet-tts-fr/src/app.py @@ -139,7 +139,6 @@ def segment_tts(): segment_new.append(s) segment_new = " ".join(str(s) for s in segment_new) - logger.debug(f"New: {segment_new}") wavs.append(tts(segment_new)) for wav in wavs: if totalWav is not None: