Skip to content

Commit 1bcb041

Browse files
authored
Batch processing audio files (#208)
1 parent ab5058f commit 1bcb041

File tree

3 files changed

+97
-21
lines changed

3 files changed

+97
-21
lines changed

README.md

Lines changed: 19 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
[![Docker pulls](https://img.shields.io/docker/pulls/beveradb/audio-separator.svg)](https://hub.docker.com/r/beveradb/audio-separator/tags)
66
[![codecov](https://codecov.io/gh/karaokenerds/python-audio-separator/graph/badge.svg?token=N7YK4ET5JP)](https://codecov.io/gh/karaokenerds/python-audio-separator)
77
[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1gSlmSmna7f7fH6OjsiMEDLl-aJ9kGPkY?usp=sharing)
8-
[![Open In Huggingface](https://huggingface.co/datasets/huggingface/badges/resolve/main/open-in-hf-spaces-md.svg)](https://huggingface.co/spaces/theneos/audio-separator)
8+
[![Open In Huggingface](https://huggingface.co/datasets/huggingface/badges/resolve/main/open-in-hf-spaces-sm.svg)](https://huggingface.co/spaces/theneos/audio-separator)
99

1010
**Summary:** Easy to use audio stem separation from the command line or as a dependency in your own Python project, using the amazing MDX-Net, VR Arch, Demucs and MDXC models available in UVR by @Anjok07 & @aufr33.
1111

@@ -247,7 +247,7 @@ usage: audio-separator [-h] [-v] [-d] [-e] [-l] [--log_level LOG_LEVEL] [--list_
247247
Separate audio file into different stems.
248248

249249
positional arguments:
250-
audio_files The audio file paths to separate, in any common format.
250+
audio_files The audio file paths or directory to separate, in any common format.
251251

252252
options:
253253
-h, --help show this help message and exit
@@ -338,24 +338,34 @@ You only need to load a model when choosing or changing models. See example belo
338338
```python
339339
from audio_separator.separator import Separator
340340
341-
# Initialize the Separator with other configuration properties, below
341+
# Initialize the Separator class (with optional configuration properties, below)
342342
separator = Separator()
343343
344344
# Load a model
345345
separator.load_model(model_filename='UVR-MDX-NET-Inst_HQ_3.onnx')
346346
347347
# Separate multiple audio files without reloading the model
348-
output_file_paths_1 = separator.separate('audio1.wav')
349-
output_file_paths_2 = separator.separate('audio2.wav')
350-
output_file_paths_3 = separator.separate('audio3.wav')
348+
output_files = separator.separate(['audio1.wav', 'audio2.wav', 'audio3.wav'])
351349
352350
# Load a different model
353351
separator.load_model(model_filename='UVR_MDXNET_KARA_2.onnx')
354352
355353
# Separate the same files with the new model
356-
output_file_paths_4 = separator.separate('audio1.wav')
357-
output_file_paths_5 = separator.separate('audio2.wav')
358-
output_file_paths_6 = separator.separate('audio3.wav')
354+
output_files = separator.separate(['audio1.wav', 'audio2.wav', 'audio3.wav'])
355+
```
356+
357+
You can also specify the path to a folder containing audio files instead of listing the full paths to each of them:
358+
```python
359+
from audio_separator.separator import Separator
360+
361+
# Initialize the Separator class (with optional configuration properties, below)
362+
separator = Separator()
363+
364+
# Load a model
365+
separator.load_model(model_filename='UVR-MDX-NET-Inst_HQ_3.onnx')
366+
367+
# Separate all audio files located in a folder
368+
output_files = separator.separate('path/to/audio_directory')
359369
```
360370
361371
#### Renaming Stems

audio_separator/separator/separator.py

Lines changed: 55 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -753,30 +753,77 @@ def load_model(self, model_filename="model_mel_band_roformer_ep_3005_sdr_11.4360
753753

754754
def separate(self, audio_file_path, custom_output_names=None):
755755
"""
756-
Separates the audio file into different stems (e.g., vocals, instruments) using the loaded model.
756+
Separates the audio file(s) into different stems (e.g., vocals, instruments) using the loaded model.
757757
758-
This method takes the path to an audio file, processes it through the loaded separation model, and returns
759-
the paths to the output files containing the separated audio stems. It handles the entire flow from loading
760-
the audio, running the separation, clearing up resources, and logging the process.
758+
This method takes the path to an audio file or a directory containing audio files, processes them through
759+
the loaded separation model, and returns the paths to the output files containing the separated audio stems.
760+
It handles the entire flow from loading the audio, running the separation, clearing up resources, and logging the process.
761761
762762
Parameters:
763-
- audio_file_path (str): The path to the audio file to be separated.
763+
- audio_file_path (str or list): The path to the audio file or directory, or a list of paths.
764764
- custom_output_names (dict, optional): Custom names for the output files. Defaults to None.
765765
766766
Returns:
767767
- output_files (list of str): A list containing the paths to the separated audio stem files.
768768
"""
769+
# Check if the model and device are properly initialized
769770
if not (self.torch_device and self.model_instance):
770771
raise ValueError("Initialization failed or model not loaded. Please load a model before attempting to separate.")
771772

772-
# Starting the separation process
773+
# If audio_file_path is a string, convert it to a list for uniform processing
774+
if isinstance(audio_file_path, str):
775+
audio_file_path = [audio_file_path]
776+
777+
# Initialize a list to store paths of all output files
778+
output_files = []
779+
780+
# Process each path in the list
781+
for path in audio_file_path:
782+
if os.path.isdir(path):
783+
# If the path is a directory, recursively search for all audio files
784+
for root, dirs, files in os.walk(path):
785+
for file in files:
786+
# Check the file extension to ensure it's an audio file
787+
if file.endswith((".wav", ".flac", ".mp3", ".ogg", ".opus", ".m4a", ".aiff", ".ac3")): # Add other formats if needed
788+
full_path = os.path.join(root, file)
789+
self.logger.info(f"Processing file: {full_path}")
790+
try:
791+
# Perform separation for each file
792+
files_output = self._separate_file(full_path, custom_output_names)
793+
output_files.extend(files_output)
794+
except Exception as e:
795+
self.logger.error(f"Failed to process file {full_path}: {e}")
796+
else:
797+
# If the path is a file, process it directly
798+
self.logger.info(f"Processing file: {path}")
799+
try:
800+
files_output = self._separate_file(path, custom_output_names)
801+
output_files.extend(files_output)
802+
except Exception as e:
803+
self.logger.error(f"Failed to process file {path}: {e}")
804+
805+
return output_files
806+
807+
def _separate_file(self, audio_file_path, custom_output_names=None):
808+
"""
809+
Internal method to handle separation for a single audio file.
810+
This method performs the actual separation process for a single audio file. It logs the start and end of the process,
811+
handles autocast if enabled, and ensures GPU cache is cleared after processing.
812+
Parameters:
813+
- audio_file_path (str): The path to the audio file.
814+
- custom_output_names (dict, optional): Custom names for the output files. Defaults to None.
815+
Returns:
816+
- output_files (list of str): A list containing the paths to the separated audio stem files.
817+
"""
818+
# Log the start of the separation process
773819
self.logger.info(f"Starting separation process for audio_file_path: {audio_file_path}")
774820
separate_start_time = time.perf_counter()
775821

822+
# Log normalization and amplification thresholds
776823
self.logger.debug(f"Normalization threshold set to {self.normalization_threshold}, waveform will be lowered to this max amplitude to avoid clipping.")
777824
self.logger.debug(f"Amplification threshold set to {self.amplification_threshold}, waveform will be scaled up to this max amplitude if below it.")
778825

779-
# Run separation method for the loaded model with autocast enabled if supported by the device.
826+
# Run separation method for the loaded model with autocast enabled if supported by the device
780827
output_files = None
781828
if self.use_autocast and autocast_mode.is_autocast_available(self.torch_device.type):
782829
self.logger.debug("Autocast available.")
@@ -789,7 +836,7 @@ def separate(self, audio_file_path, custom_output_names=None):
789836
# Clear GPU cache to free up memory
790837
self.model_instance.clear_gpu_cache()
791838

792-
# Unset more separation params to prevent accidentally re-using the wrong source files or output paths
839+
# Unset separation parameters to prevent accidentally re-using the wrong source files or output paths
793840
self.model_instance.clear_file_specific_paths()
794841

795842
# Remind the user one more time if they used a VIP model, so the message doesn't get lost in the logs

audio_separator/utils/cli.py

Lines changed: 23 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,8 @@
33
import logging
44
import json
55
import sys
6+
import os
67
from importlib import metadata
7-
from typing import Optional
88

99

1010
def main():
@@ -17,7 +17,7 @@ def main():
1717

1818
parser = argparse.ArgumentParser(description="Separate audio file into different stems.", formatter_class=lambda prog: argparse.RawTextHelpFormatter(prog, max_help_position=60))
1919

20-
parser.add_argument("audio_files", nargs="*", help="The audio file paths to separate, in any common format.", default=argparse.SUPPRESS)
20+
parser.add_argument("audio_files", nargs="*", help="The audio file paths or directory to separate, in any common format.", default=argparse.SUPPRESS)
2121

2222
package_version = metadata.distribution("audio-separator").version
2323

@@ -183,7 +183,26 @@ def main():
183183
parser.print_help()
184184
sys.exit(1)
185185

186-
logger.info(f"Separator version {package_version} beginning with input file(s): {', '.join(args.audio_files)}")
186+
# Path processing: if a directory is specified, collect all audio files from it
187+
audio_files = []
188+
for path in args.audio_files:
189+
if os.path.isdir(path):
190+
# If the path is a directory, recursively search for all audio files
191+
for root, dirs, files in os.walk(path):
192+
for file in files:
193+
# Check the file extension to ensure it's an audio file
194+
if file.endswith((".wav", ".flac", ".mp3", ".ogg", ".opus", ".m4a", ".aiff", ".ac3")): # Add other formats if needed
195+
audio_files.append(os.path.join(root, file))
196+
else:
197+
# If the path is a file, add it to the list
198+
audio_files.append(path)
199+
200+
# If no audio files are found, log an error and exit the program
201+
if not audio_files:
202+
logger.error("No valid audio files found in the specified path(s).")
203+
sys.exit(1)
204+
205+
logger.info(f"Separator version {package_version} beginning with input file(s): {', '.join(audio_files)}")
187206

188207
separator = Separator(
189208
log_formatter=log_formatter,
@@ -227,6 +246,6 @@ def main():
227246

228247
separator.load_model(model_filename=args.model_filename)
229248

230-
for audio_file in args.audio_files:
249+
for audio_file in audio_files:
231250
output_files = separator.separate(audio_file, custom_output_names=args.custom_output_names)
232251
logger.info(f"Separation complete! Output file(s): {' '.join(output_files)}")

0 commit comments

Comments
 (0)