@@ -51,27 +51,6 @@ namespace mediapipe {
5151
5252const std::string TTS_SESSION_SIDE_PACKET_TAG = " TTS_NODE_RESOURCES" ;
5353
54- ov::Tensor read_speaker_embedding (const std::filesystem::path& file_path) {
55- std::ifstream input (file_path, std::ios::binary);
56- OPENVINO_ASSERT (input, " Failed to open file: " + file_path.string ());
57-
58- // Get file size
59- input.seekg (0 , std::ios::end);
60- size_t buffer_size = static_cast <size_t >(input.tellg ());
61- input.seekg (0 , std::ios::beg);
62-
63- // Check size is multiple of float
64- OPENVINO_ASSERT (buffer_size % sizeof (float ) == 0 , " File size is not a multiple of float size." );
65- size_t num_floats = buffer_size / sizeof (float );
66- OPENVINO_ASSERT (num_floats == 512 , " File must contain speaker embedding including 512 32-bit floats." );
67-
68- OPENVINO_ASSERT (input, " Failed to read all data from file." );
69- ov::Tensor floats_tensor (ov::element::f32 , ov::Shape{1 , num_floats});
70- input.read (reinterpret_cast <char *>(floats_tensor.data ()), buffer_size);
71-
72- return floats_tensor;
73- }
74-
7554class T2sCalculator : public CalculatorBase {
7655 static const std::string INPUT_TAG_NAME;
7756 static const std::string OUTPUT_TAG_NAME;
@@ -129,20 +108,15 @@ class T2sCalculator : public CalculatorBase {
129108 if (voiceIt != payload.parsedJson ->MemberEnd () && voiceIt->value .IsString ()) {
130109 voice = voiceIt->value .GetString ();
131110 }
132- std::string voiceEmbeddingsPath;
133111 if (voice.has_value ()){
134112 if (pipe->voices .find (voice.value ()) == pipe->voices .end ())
135113 return absl::InvalidArgumentError (absl::StrCat (" Requested voice not available: " , payload.uri ));
136- if (!std::filesystem::exists (pipe->voices [voice.value ()]))
137- return absl::InvalidArgumentError (absl::StrCat (" Requested voice speaker embeddings file does not exist: " , pipe->voices [voice.value ()]));
138- voiceEmbeddingsPath = pipe->voices [voice.value ()];
139114 }
140115 ov::genai::Text2SpeechDecodedResults generatedSpeech;
141116 std::unique_lock lock (pipe->ttsPipelineMutex );
142117
143118 if (voice.has_value ()){
144- auto speakerEmbedding = read_speaker_embedding (voiceEmbeddingsPath);
145- generatedSpeech = pipe->ttsPipeline ->generate (inputIt->value .GetString (), speakerEmbedding);
119+ generatedSpeech = pipe->ttsPipeline ->generate (inputIt->value .GetString (), pipe->voices [voice.value ()]);
146120 }
147121 else {
148122 generatedSpeech = pipe->ttsPipeline ->generate (inputIt->value .GetString ());
0 commit comments