Implement AudioFrameGenerator::Create instead of Initialize.

jwcullen · jwcullen · commit df22b95d687f · 2025-11-06T10:08:04.000-05:00
- Able to reduce held data,
  - Existing callsites use `std::unique_ptr`, for reference stability (b/390150766), or delayed initialization.
    - Make interface based on `std::unique_ptr`, switch over some `std::optional` holders.
  - Use `ABSL_CHECK` in some places since the inner pointer is `absl_nonnull`.

PiperOrigin-RevId: 828507760
diff --git a/iamf/cli/iamf_encoder.cc b/iamf/cli/iamf_encoder.cc
@@ -361,11 +361,14 @@ absl::StatusOr<std::unique_ptr<IamfEncoder>> IamfEncoder::Create(
     return demixing_module.status();
   }
 
-  auto audio_frame_generator = std::make_unique<AudioFrameGenerator>(
+  auto audio_frame_generator = AudioFrameGenerator::Create(
       user_metadata.audio_frame_metadata(),
       user_metadata.codec_config_metadata(), *audio_elements, *demixing_module,
       **parameters_manager, *global_timing_module);
-  RETURN_IF_NOT_OK(audio_frame_generator->Initialize());
+  if (!audio_frame_generator.ok()) {
+    return audio_frame_generator.status();
+  }
+  ABSL_CHECK_NE(*audio_frame_generator, nullptr);
 
   // Initialize the audio frame decoder. It is needed to determine the recon
   // gain parameters and measure the loudness of the mixes.
@@ -407,7 +410,7 @@ absl::StatusOr<std::unique_ptr<IamfEncoder>> IamfEncoder::Create(
       std::move(timestamp_to_arbitrary_obus),
       std::move(param_definition_variants),
       std::move(parameter_block_generator), std::move(*parameters_manager),
-      *demixing_module, std::move(audio_frame_generator),
+      *demixing_module, *std::move(audio_frame_generator),
       std::move(audio_frame_decoder), std::move(global_timing_module),
       std::move(*mix_presentation_finalizer), std::move(obu_sequencers),
       std::move(streaming_obu_sequencer)));
diff --git a/iamf/cli/proto_conversion/proto_to_obu/BUILD b/iamf/cli/proto_conversion/proto_to_obu/BUILD
@@ -82,6 +82,7 @@ cc_library(
         "//iamf/obu:parameter_data",
         "//iamf/obu:types",
         "@abseil-cpp//absl/base:core_headers",
+        "@abseil-cpp//absl/base:nullability",
         "@abseil-cpp//absl/container:flat_hash_map",
         "@abseil-cpp//absl/container:flat_hash_set",
         "@abseil-cpp//absl/log:absl_check",
diff --git a/iamf/cli/proto_conversion/proto_to_obu/audio_frame_generator.cc b/iamf/cli/proto_conversion/proto_to_obu/audio_frame_generator.cc
@@ -24,6 +24,7 @@
 #include <utility>
 #include <vector>
 
+#include "absl/base/nullability.h"
 #include "absl/container/flat_hash_map.h"
 #include "absl/container/flat_hash_set.h"
 #include "absl/log/absl_check.h"
@@ -625,58 +626,35 @@ absl::Status ValidateAndApplyUserTrimming(
 
 }  // namespace
 
-AudioFrameGenerator::AudioFrameGenerator(
+absl::StatusOr<std::unique_ptr<AudioFrameGenerator> absl_nonnull>
+AudioFrameGenerator::Create(
     const ::google::protobuf::RepeatedPtrField<
-        iamf_tools_cli_proto::AudioFrameObuMetadata>& audio_frame_metadata,
+        iamf_tools_cli_proto::AudioFrameObuMetadata>& audio_frame_metadatas,
     const ::google::protobuf::RepeatedPtrField<
-        iamf_tools_cli_proto::CodecConfigObuMetadata>& codec_config_metadata,
+        iamf_tools_cli_proto::CodecConfigObuMetadata>& codec_config_metadatas,
     const absl::flat_hash_map<DecodedUleb128, AudioElementWithData>&
         audio_elements,
     const DemixingModule& demixing_module,
     ParametersManager& parameters_manager,
-    GlobalTimingModule& global_timing_module)
-    : audio_elements_(audio_elements),
-      demixing_module_(demixing_module),
-      parameters_manager_(parameters_manager),
-      global_timing_module_(global_timing_module),
-      // Set to a state NOT taking samples at first; may be changed to
-      // `kTakingSamples` once `Initialize()` is called.
-      state_(kFlushingRemaining) {
-  for (const auto& audio_frame_obu_metadata : audio_frame_metadata) {
-    audio_frame_metadata_[audio_frame_obu_metadata.audio_element_id()] =
-        audio_frame_obu_metadata;
-  }
-
-  for (const auto& codec_config_obu_metadata : codec_config_metadata) {
-    codec_config_metadata_[codec_config_obu_metadata.codec_config_id()] =
+    GlobalTimingModule& global_timing_module) {
+  if (audio_frame_metadatas.empty()) {
+    // Ok, nothing will be generated. This state helps clients handle trivial IA
+    // Sequences.
+    return absl::WrapUnique(
+        new AudioFrameGenerator({}, {}, demixing_module, parameters_manager,
+                                global_timing_module, {}, {}, {}));
+  }
+
+  // Mapping from Codec Config ID to additional codec config metadata used
+  // to configure encoders.
+  absl::flat_hash_map<DecodedUleb128, iamf_tools_cli_proto::CodecConfig>
+      codec_config_metadata;
+  for (const auto& codec_config_obu_metadata : codec_config_metadatas) {
+    codec_config_metadata[codec_config_obu_metadata.codec_config_id()] =
         codec_config_obu_metadata.codec_config();
   }
-}
-
-absl::StatusOr<uint32_t> AudioFrameGenerator::GetNumberOfSamplesToDelayAtStart(
-    const iamf_tools_cli_proto::CodecConfig& codec_config_metadata,
-    const CodecConfigObu& codec_config) {
-  // This function is useful when querying what the codec delay should be. We
-  // don't want it to fail if the user-provided codec delay is wrong.
-  constexpr bool kDontValidateCodecDelay = false;
-
-  std::unique_ptr<EncoderBase> encoder;
-  RETURN_IF_NOT_OK(InitializeEncoder(codec_config_metadata, codec_config,
-                                     /*num_channels=*/1, encoder,
-                                     kDontValidateCodecDelay));
-  if (encoder == nullptr) {
-    return absl::InvalidArgumentError("Failed to initialize encoder");
-  }
-  return encoder->GetNumberOfSamplesToDelayAtStart();
-}
 
-absl::Status AudioFrameGenerator::Initialize() {
-  absl::MutexLock lock(&mutex_);
-  if (audio_frame_metadata_.empty()) {
-    return absl::OkStatus();
-  }
-  const auto& first_audio_frame_metadata =
-      audio_frame_metadata_.begin()->second;
+  const auto& first_audio_frame_metadata = *audio_frame_metadatas.begin();
   const int64_t common_samples_to_trim_at_start = static_cast<int64_t>(
       first_audio_frame_metadata.samples_to_trim_at_start());
   const int64_t common_samples_to_trim_at_end =
@@ -687,15 +665,23 @@ absl::Status AudioFrameGenerator::Initialize() {
       first_audio_frame_metadata
           .samples_to_trim_at_start_includes_codec_delay();
 
-  for (const auto& [audio_element_id, audio_frame_metadata] :
-       audio_frame_metadata_) {
+  absl::flat_hash_map<DecodedUleb128, absl::flat_hash_set<ChannelLabel::Label>>
+      audio_element_id_to_labels;
+  absl::flat_hash_map<uint32_t, std::unique_ptr<EncoderBase>>
+      substream_id_to_encoder;
+  absl::flat_hash_map<uint32_t, SubstreamData> substream_id_to_substream_data;
+  absl::flat_hash_map<uint32_t, TrimmingState> substream_id_to_trimming_state;
+  for (const auto& audio_frame_metadata : audio_frame_metadatas) {
+    const DecodedUleb128 audio_element_id =
+        audio_frame_metadata.audio_element_id();
+
     // Precompute the `ChannelLabel::Label` for each channel label string.
     RETURN_IF_NOT_OK(ChannelLabelUtils::SelectConvertAndFillLabels(
-        audio_frame_metadata, audio_element_id_to_labels_[audio_element_id]));
+        audio_frame_metadata, audio_element_id_to_labels[audio_element_id]));
 
     // Find the Codec Config OBU for this mono or coupled stereo substream.
-    const auto audio_elements_iter = audio_elements_.find(audio_element_id);
-    if (audio_elements_iter == audio_elements_.end()) {
+    const auto audio_elements_iter = audio_elements.find(audio_element_id);
+    if (audio_elements_iter == audio_elements.end()) {
       return absl::InvalidArgumentError(absl::StrCat(
           "Audio Element with ID= ", audio_element_id, " not found"));
     }
@@ -710,25 +696,25 @@ absl::Status AudioFrameGenerator::Initialize() {
           "The spec disallows trimming multiple frames from the end.");
     }
     RETURN_IF_NOT_OK(GetEncodingDataAndInitializeEncoders(
-        codec_config_metadata_, audio_element_with_data,
-        substream_id_to_encoder_));
+        codec_config_metadata, audio_element_with_data,
+        substream_id_to_encoder));
 
     // Intermediate data for all substreams belonging to an Audio Element.
     RETURN_IF_NOT_OK(InitializeSubstreamData(
-        audio_element_with_data.substream_id_to_labels,
-        substream_id_to_encoder_, num_samples_per_frame,
+        audio_element_with_data.substream_id_to_labels, substream_id_to_encoder,
+        num_samples_per_frame,
         audio_frame_metadata.samples_to_trim_at_start_includes_codec_delay(),
         audio_frame_metadata.samples_to_trim_at_start(),
-        substream_id_to_substream_data_));
+        substream_id_to_substream_data));
 
     // Validate that a `DemixingParamDefinition` is available if down-mixing
     // is needed.
     absl::StatusOr<const std::list<Demixer>*> down_mixers =
-        demixing_module_.GetDownMixers(audio_element_id);
+        demixing_module.GetDownMixers(audio_element_id);
     if (!down_mixers.ok()) {
       return down_mixers.status();
     }
-    if (!parameters_manager_.DemixingParamDefinitionAvailable(
+    if (!parameters_manager.DemixingParamDefinitionAvailable(
             audio_element_id) &&
         !(*down_mixers)->empty()) {
       return absl::InvalidArgumentError(
@@ -749,9 +735,9 @@ absl::Status AudioFrameGenerator::Initialize() {
       const int64_t additional_samples_to_trim_at_start =
           common_samples_to_trim_at_start_includes_codec_delay
               ? 0
-              : substream_id_to_encoder_[substream_id]
+              : substream_id_to_encoder[substream_id]
                     ->GetNumberOfSamplesToDelayAtStart();
-      substream_id_to_trimming_state_[substream_id] = {
+      substream_id_to_trimming_state[substream_id] = {
           .increment_samples_to_trim_at_end_by_padding =
               !audio_frame_metadata.samples_to_trim_at_end_includes_padding(),
           .user_samples_left_to_trim_at_end = common_samples_to_trim_at_end,
@@ -762,13 +748,29 @@ absl::Status AudioFrameGenerator::Initialize() {
     }
   }
 
-  // If `substream_id_to_substream_data_` is not empty, meaning this generator
-  // is expecting audio substreams and is ready to take audio samples.
-  if (!substream_id_to_substream_data_.empty()) {
-    state_ = kTakingSamples;
-  }
+  return absl::WrapUnique(new AudioFrameGenerator(
+      audio_element_id_to_labels, audio_elements, demixing_module,
+      parameters_manager, global_timing_module,
+      std::move(substream_id_to_encoder),
+      std::move(substream_id_to_substream_data),
+      std::move(substream_id_to_trimming_state)));
+}
 
-  return absl::OkStatus();
+absl::StatusOr<uint32_t> AudioFrameGenerator::GetNumberOfSamplesToDelayAtStart(
+    const iamf_tools_cli_proto::CodecConfig& codec_config_metadata,
+    const CodecConfigObu& codec_config) {
+  // This function is useful when querying what the codec delay should be. We
+  // don't want it to fail if the user-provided codec delay is wrong.
+  constexpr bool kDontValidateCodecDelay = false;
+
+  std::unique_ptr<EncoderBase> encoder;
+  RETURN_IF_NOT_OK(InitializeEncoder(codec_config_metadata, codec_config,
+                                     /*num_channels=*/1, encoder,
+                                     kDontValidateCodecDelay));
+  if (encoder == nullptr) {
+    return absl::InvalidArgumentError("Failed to initialize encoder");
+  }
+  return encoder->GetNumberOfSamplesToDelayAtStart();
 }
 
 bool AudioFrameGenerator::TakingSamples() const {
diff --git a/iamf/cli/proto_conversion/proto_to_obu/audio_frame_generator.h b/iamf/cli/proto_conversion/proto_to_obu/audio_frame_generator.h
@@ -16,7 +16,9 @@
 #include <cstdint>
 #include <list>
 #include <memory>
+#include <utility>
 
+#include "absl/base/nullability.h"
 #include "absl/base/thread_annotations.h"
 #include "absl/container/flat_hash_map.h"
 #include "absl/container/flat_hash_set.h"
@@ -55,8 +57,7 @@ namespace iamf_tools {
  *
  * The use pattern of this class is:
  *
- *   - Initialize (`Initialize()`).
- *     - (This puts the generator in the `kTakingSamples` state.)
+ *   - Call `Create()`.
  *
  *   Thread 1:
  *   - Repeat until no new sample to add (by checking `TakingSamples()`):
@@ -84,7 +85,7 @@ class AudioFrameGenerator {
     int64_t user_samples_left_to_trim_at_start;
   };
 
-  /*!\brief Constructor.
+  /*!\brief Factory function to create an `AudioFrameGenerator`.
    *
    * \param audio_frame_metadata Input audio frame metadata.
    * \param codec_config_metadata Input codec config metadata.
@@ -93,7 +94,8 @@ class AudioFrameGenerator {
    * \param parameters_manager Manager of parameters.
    * \param global_timing_module Global Timing Module.
    */
-  AudioFrameGenerator(
+  static absl::StatusOr<std::unique_ptr<AudioFrameGenerator> absl_nonnull>
+  Create(
       const ::google::protobuf::RepeatedPtrField<
           iamf_tools_cli_proto::AudioFrameObuMetadata>& audio_frame_metadata,
       const ::google::protobuf::RepeatedPtrField<
@@ -118,12 +120,6 @@ class AudioFrameGenerator {
       const iamf_tools_cli_proto::CodecConfig& codec_config_metadata,
       const CodecConfigObu& codec_config);
 
-  /*!\brief Initializes encoders and relevant data structures.
-   *
-   * \return `absl::OkStatus()` on success. A specific status on failure.
-   */
-  absl::Status Initialize();
-
   /*!\brief Returns whether the generator is still taking audio samples.
    *
    * \return True if the generator is still taking audio samples.
@@ -183,24 +179,57 @@ class AudioFrameGenerator {
     kFlushingRemaining,
   };
 
-  // Mapping from Audio Element ID to audio frame metadata.
-  absl::flat_hash_map<DecodedUleb128,
-                      iamf_tools_cli_proto::AudioFrameObuMetadata>
-      audio_frame_metadata_;
+  /*!\brief Private constructor.
+   *
+   * \param audio_element_id_to_labels Mapping from Audio Element ID to labels.
+   * \param audio_elements Mapping from Audio Element ID to audio element data.
+   * \param demixing_module Demixng module.
+   * \param parameters_manager Manager of parameters.
+   * \param global_timing_module Global Timing Module.
+   * \param substream_id_to_encoder Mapping from audio substream IDs to
+   *        encoders.
+   * \param substream_id_to_substream_data Mapping from substream IDs to
+   *        substream data.
+   * \param substream_id_to_trimming_state Mapping from substream IDs to
+   *        trimming states.
+   */
+  AudioFrameGenerator(
+      absl::flat_hash_map<DecodedUleb128,
+                          absl::flat_hash_set<ChannelLabel::Label>>
+          audio_element_id_to_labels,
+      const absl::flat_hash_map<DecodedUleb128, AudioElementWithData>&
+          audio_elements,
+      const DemixingModule& demixing_module,
+      ParametersManager& parameters_manager,
+      GlobalTimingModule& global_timing_module,
+      absl::flat_hash_map<uint32_t, std::unique_ptr<EncoderBase>>
+          substream_id_to_encoder,
+      absl::flat_hash_map<uint32_t, SubstreamData>
+          substream_id_to_substream_data,
+      absl::flat_hash_map<uint32_t, TrimmingState>
+          substream_id_to_trimming_state)
+      : audio_element_id_to_labels_(std::move(audio_element_id_to_labels)),
+        audio_elements_(audio_elements),
+        substream_id_to_encoder_(std::move(substream_id_to_encoder)),
+        substream_id_to_substream_data_(
+            std::move(substream_id_to_substream_data)),
+        substream_id_to_trimming_state_(
+            std::move(substream_id_to_trimming_state)),
+        demixing_module_(demixing_module),
+        parameters_manager_(parameters_manager),
+        global_timing_module_(global_timing_module),
+        state_(substream_id_to_encoder_.empty() ? kFlushingRemaining
+                                                : kTakingSamples) {}
 
   // Mapping from Audio Element ID to labels.
-  absl::flat_hash_map<DecodedUleb128, absl::flat_hash_set<ChannelLabel::Label>>
+  const absl::flat_hash_map<DecodedUleb128,
+                            absl::flat_hash_set<ChannelLabel::Label>>
       audio_element_id_to_labels_;
 
   // Mapping from Audio Element ID to audio element data.
   const absl::flat_hash_map<DecodedUleb128, AudioElementWithData>&
       audio_elements_;
 
-  // Mapping from Codec Config ID to additional codec config metadata used
-  // to configure encoders.
-  absl::flat_hash_map<DecodedUleb128, iamf_tools_cli_proto::CodecConfig>
-      codec_config_metadata_;
-
   // Mapping from audio substream IDs to encoders.
   absl::flat_hash_map<uint32_t, std::unique_ptr<EncoderBase>>
       substream_id_to_encoder_ ABSL_GUARDED_BY(mutex_);
diff --git a/iamf/cli/proto_conversion/proto_to_obu/tests/audio_frame_generator_benchmark.cc b/iamf/cli/proto_conversion/proto_to_obu/tests/audio_frame_generator_benchmark.cc
@@ -12,7 +12,6 @@
 
 #include <cstdint>
 #include <memory>
-#include <optional>
 #include <utility>
 #include <vector>
 
@@ -80,7 +79,7 @@ void InitializeAudioFrameGenerator(
     absl::flat_hash_map<DecodedUleb128, AudioElementWithData>& audio_elements,
     std::unique_ptr<GlobalTimingModule>& global_timing_module,
     std::unique_ptr<ParametersManager>& parameters_manager,
-    std::optional<AudioFrameGenerator>& audio_frame_generator) {
+    std::unique_ptr<AudioFrameGenerator>& audio_frame_generator) {
   // Initialize pre-requisite OBUs and the global timing module. This is all
   // derived from the `user_metadata`.
   CodecConfigGenerator codec_config_generator(
@@ -104,14 +103,12 @@ void InitializeAudioFrameGenerator(
   parameters_manager = *std::move(temp_parameters_manager);
 
   // Create an audio frame generator.
-  audio_frame_generator.emplace(user_metadata.audio_frame_metadata(),
-                                user_metadata.codec_config_metadata(),
-                                audio_elements, *demixing_module,
-                                *parameters_manager, *global_timing_module);
-  ABSL_CHECK(audio_frame_generator.has_value());
-
-  // Initialize.
-  ABSL_CHECK_OK(audio_frame_generator->Initialize());
+  auto temp_audio_frame_generator = AudioFrameGenerator::Create(
+      user_metadata.audio_frame_metadata(),
+      user_metadata.codec_config_metadata(), audio_elements, *demixing_module,
+      *parameters_manager, *global_timing_module);
+  ABSL_CHECK_OK(temp_audio_frame_generator);
+  audio_frame_generator = std::move(*temp_audio_frame_generator);
 }
 
 static void BM_AddSamples(benchmark::State& state) {
@@ -133,7 +130,7 @@ static void BM_AddSamples(benchmark::State& state) {
   absl::flat_hash_map<uint32_t, AudioElementWithData> audio_elements = {};
   std::unique_ptr<GlobalTimingModule> global_timing_module;
   std::unique_ptr<ParametersManager> parameters_manager;
-  std::optional<AudioFrameGenerator> audio_frame_generator;
+  std::unique_ptr<AudioFrameGenerator> audio_frame_generator;
   InitializeAudioFrameGenerator(
       user_metadata, param_definitions, codec_config_obus, audio_elements,
       global_timing_module, parameters_manager, audio_frame_generator);
diff --git a/iamf/cli/proto_conversion/proto_to_obu/tests/audio_frame_generator_test.cc b/iamf/cli/proto_conversion/proto_to_obu/tests/audio_frame_generator_test.cc