Batch processing audio files (#208)

Bebra777228 · web-flow · commit 1bcb04159a85 · 2025-04-27T16:45:05.000-04:00
diff --git a/README.md b/README.md
@@ -5,7 +5,7 @@
 [![Docker pulls](https://img.shields.io/docker/pulls/beveradb/audio-separator.svg)](https://hub.docker.com/r/beveradb/audio-separator/tags)
 [![codecov](https://codecov.io/gh/karaokenerds/python-audio-separator/graph/badge.svg?token=N7YK4ET5JP)](https://codecov.io/gh/karaokenerds/python-audio-separator)
 [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1gSlmSmna7f7fH6OjsiMEDLl-aJ9kGPkY?usp=sharing)
-[![Open In Huggingface](https://huggingface.co/datasets/huggingface/badges/resolve/main/open-in-hf-spaces-md.svg)](https://huggingface.co/spaces/theneos/audio-separator)
+[![Open In Huggingface](https://huggingface.co/datasets/huggingface/badges/resolve/main/open-in-hf-spaces-sm.svg)](https://huggingface.co/spaces/theneos/audio-separator)
 
 **Summary:** Easy to use audio stem separation from the command line or as a dependency in your own Python project, using the amazing MDX-Net, VR Arch, Demucs and MDXC models available in UVR by @Anjok07 & @aufr33.
 
@@ -247,7 +247,7 @@ usage: audio-separator [-h] [-v] [-d] [-e] [-l] [--log_level LOG_LEVEL] [--list_
 Separate audio file into different stems.
 
 positional arguments:
-  audio_files                                            The audio file paths to separate, in any common format.
+  audio_files                                            The audio file paths or directory to separate, in any common format.
 
 options:
   -h, --help                                             show this help message and exit
@@ -338,24 +338,34 @@ You only need to load a model when choosing or changing models. See example belo
 ```python
 from audio_separator.separator import Separator
 
-# Initialize the Separator with other configuration properties, below
+# Initialize the Separator class (with optional configuration properties, below)
 separator = Separator()
 
 # Load a model
 separator.load_model(model_filename='UVR-MDX-NET-Inst_HQ_3.onnx')
 
 # Separate multiple audio files without reloading the model
-output_file_paths_1 = separator.separate('audio1.wav')
-output_file_paths_2 = separator.separate('audio2.wav')
-output_file_paths_3 = separator.separate('audio3.wav')
+output_files = separator.separate(['audio1.wav', 'audio2.wav', 'audio3.wav'])
 
 # Load a different model
 separator.load_model(model_filename='UVR_MDXNET_KARA_2.onnx')
 
 # Separate the same files with the new model
-output_file_paths_4 = separator.separate('audio1.wav')
-output_file_paths_5 = separator.separate('audio2.wav')
-output_file_paths_6 = separator.separate('audio3.wav')
+output_files = separator.separate(['audio1.wav', 'audio2.wav', 'audio3.wav'])
+```
+
+You can also specify the path to a folder containing audio files instead of listing the full paths to each of them:
+```python
+from audio_separator.separator import Separator
+
+# Initialize the Separator class (with optional configuration properties, below)
+separator = Separator()
+
+# Load a model
+separator.load_model(model_filename='UVR-MDX-NET-Inst_HQ_3.onnx')
+
+# Separate all audio files located in a folder
+output_files = separator.separate('path/to/audio_directory')
 ```
 
 #### Renaming Stems
diff --git a/audio_separator/separator/separator.py b/audio_separator/separator/separator.py
@@ -753,30 +753,77 @@ def load_model(self, model_filename="model_mel_band_roformer_ep_3005_sdr_11.4360
 
     def separate(self, audio_file_path, custom_output_names=None):
         """
-        Separates the audio file into different stems (e.g., vocals, instruments) using the loaded model.
+        Separates the audio file(s) into different stems (e.g., vocals, instruments) using the loaded model.
 
-        This method takes the path to an audio file, processes it through the loaded separation model, and returns
-        the paths to the output files containing the separated audio stems. It handles the entire flow from loading
-        the audio, running the separation, clearing up resources, and logging the process.
+        This method takes the path to an audio file or a directory containing audio files, processes them through
+        the loaded separation model, and returns the paths to the output files containing the separated audio stems.
+        It handles the entire flow from loading the audio, running the separation, clearing up resources, and logging the process.
 
         Parameters:
-        - audio_file_path (str): The path to the audio file to be separated.
+        - audio_file_path (str or list): The path to the audio file or directory, or a list of paths.
         - custom_output_names (dict, optional): Custom names for the output files. Defaults to None.
 
         Returns:
         - output_files (list of str): A list containing the paths to the separated audio stem files.
         """
+        # Check if the model and device are properly initialized
         if not (self.torch_device and self.model_instance):
             raise ValueError("Initialization failed or model not loaded. Please load a model before attempting to separate.")
 
-        # Starting the separation process
+        # If audio_file_path is a string, convert it to a list for uniform processing
+        if isinstance(audio_file_path, str):
+            audio_file_path = [audio_file_path]
+
+        # Initialize a list to store paths of all output files
+        output_files = []
+
+        # Process each path in the list
+        for path in audio_file_path:
+            if os.path.isdir(path):
+                # If the path is a directory, recursively search for all audio files
+                for root, dirs, files in os.walk(path):
+                    for file in files:
+                        # Check the file extension to ensure it's an audio file
+                        if file.endswith((".wav", ".flac", ".mp3", ".ogg", ".opus", ".m4a", ".aiff", ".ac3")):  # Add other formats if needed
+                            full_path = os.path.join(root, file)
+                            self.logger.info(f"Processing file: {full_path}")
+                            try:
+                                # Perform separation for each file
+                                files_output = self._separate_file(full_path, custom_output_names)
+                                output_files.extend(files_output)
+                            except Exception as e:
+                                self.logger.error(f"Failed to process file {full_path}: {e}")
+            else:
+                # If the path is a file, process it directly
+                self.logger.info(f"Processing file: {path}")
+                try:
+                    files_output = self._separate_file(path, custom_output_names)
+                    output_files.extend(files_output)
+                except Exception as e:
+                    self.logger.error(f"Failed to process file {path}: {e}")
+
+        return output_files
+
+    def _separate_file(self, audio_file_path, custom_output_names=None):
+        """
+        Internal method to handle separation for a single audio file.
+        This method performs the actual separation process for a single audio file. It logs the start and end of the process,
+        handles autocast if enabled, and ensures GPU cache is cleared after processing.
+        Parameters:
+        - audio_file_path (str): The path to the audio file.
+        - custom_output_names (dict, optional): Custom names for the output files. Defaults to None.
+        Returns:
+        - output_files (list of str): A list containing the paths to the separated audio stem files.
+        """
+        # Log the start of the separation process
         self.logger.info(f"Starting separation process for audio_file_path: {audio_file_path}")
         separate_start_time = time.perf_counter()
 
+        # Log normalization and amplification thresholds
         self.logger.debug(f"Normalization threshold set to {self.normalization_threshold}, waveform will be lowered to this max amplitude to avoid clipping.")
         self.logger.debug(f"Amplification threshold set to {self.amplification_threshold}, waveform will be scaled up to this max amplitude if below it.")
 
-        # Run separation method for the loaded model with autocast enabled if supported by the device.
+        # Run separation method for the loaded model with autocast enabled if supported by the device
         output_files = None
         if self.use_autocast and autocast_mode.is_autocast_available(self.torch_device.type):
             self.logger.debug("Autocast available.")
@@ -789,7 +836,7 @@ def separate(self, audio_file_path, custom_output_names=None):
         # Clear GPU cache to free up memory
         self.model_instance.clear_gpu_cache()
 
-        # Unset more separation params to prevent accidentally re-using the wrong source files or output paths
+        # Unset separation parameters to prevent accidentally re-using the wrong source files or output paths
         self.model_instance.clear_file_specific_paths()
 
         # Remind the user one more time if they used a VIP model, so the message doesn't get lost in the logs
diff --git a/audio_separator/utils/cli.py b/audio_separator/utils/cli.py
@@ -3,8 +3,8 @@
 import logging
 import json
 import sys
+import os
 from importlib import metadata
-from typing import Optional
 
 
 def main():
@@ -17,7 +17,7 @@ def main():
 
     parser = argparse.ArgumentParser(description="Separate audio file into different stems.", formatter_class=lambda prog: argparse.RawTextHelpFormatter(prog, max_help_position=60))
 
-    parser.add_argument("audio_files", nargs="*", help="The audio file paths to separate, in any common format.", default=argparse.SUPPRESS)
+    parser.add_argument("audio_files", nargs="*", help="The audio file paths or directory to separate, in any common format.", default=argparse.SUPPRESS)
 
     package_version = metadata.distribution("audio-separator").version
 
@@ -183,7 +183,26 @@ def main():
         parser.print_help()
         sys.exit(1)
 
-    logger.info(f"Separator version {package_version} beginning with input file(s): {', '.join(args.audio_files)}")
+    # Path processing: if a directory is specified, collect all audio files from it
+    audio_files = []
+    for path in args.audio_files:
+        if os.path.isdir(path):
+            # If the path is a directory, recursively search for all audio files
+            for root, dirs, files in os.walk(path):
+                for file in files:
+                    # Check the file extension to ensure it's an audio file
+                    if file.endswith((".wav", ".flac", ".mp3", ".ogg", ".opus", ".m4a", ".aiff", ".ac3")):  # Add other formats if needed
+                        audio_files.append(os.path.join(root, file))
+        else:
+            # If the path is a file, add it to the list
+            audio_files.append(path)
+
+    # If no audio files are found, log an error and exit the program
+    if not audio_files:
+        logger.error("No valid audio files found in the specified path(s).")
+        sys.exit(1)
+
+    logger.info(f"Separator version {package_version} beginning with input file(s): {', '.join(audio_files)}")
 
     separator = Separator(
         log_formatter=log_formatter,
@@ -227,6 +246,6 @@ def main():
 
     separator.load_model(model_filename=args.model_filename)
 
-    for audio_file in args.audio_files:
+    for audio_file in audio_files:
         output_files = separator.separate(audio_file, custom_output_names=args.custom_output_names)
         logger.info(f"Separation complete! Output file(s): {' '.join(output_files)}")