nomadkaraoke
diff --git a/‎README.md‎
Lines changed: 40 additions & 0 deletions b/‎README.md‎
Lines changed: 40 additions & 0 deletions
diff --git a/‎audio_separator/separator/audio_chunking.py‎
Lines changed: 141 additions & 0 deletions b/‎audio_separator/separator/audio_chunking.py‎
Lines changed: 141 additions & 0 deletions
diff --git a/‎audio_separator/separator/separator.py‎
Lines changed: 130 additions & 0 deletions b/‎audio_separator/separator/separator.py‎
Lines changed: 130 additions & 0 deletions
diff --git a/‎audio_separator/utils/cli.py‎
Lines changed: 3 additions & 0 deletions b/‎audio_separator/utils/cli.py‎
Lines changed: 3 additions & 0 deletions
@@ -278,6 +278,46 @@ For programmatic use, you can output the model list in JSON format:
 audio-separator -l --list_format=json
 ```
 
+### Processing Large Files
+
+For very long audio files (>1 hour), you may encounter out-of-memory errors. The `--chunk_duration` option automatically splits large files into smaller chunks, processes them separately, and merges the results:
+
+```sh
+# Process an 8-hour podcast in 10-minute chunks
+audio-separator long_podcast.wav --chunk_duration 600
+
+# Adjust chunk size based on available memory
+audio-separator very_long_audio.wav --chunk_duration 300  # 5-minute chunks
+```
+
+#### How It Works
+
+1. **Split**: The input file is split into fixed-duration chunks (e.g., 10 minutes)
+2. **Process**: Each chunk is processed separately, reducing peak memory usage
+3. **Merge**: The results are merged back together with simple concatenation
+
+The chunking feature supports all model types:
+- **2-stem models** (e.g., MDX): Vocals + Instrumental
+- **4-stem models** (e.g., Demucs): Drums, Bass, Other, Vocals
+- **6-stem models** (e.g., Demucs 6s): Bass, Drums, Other, Vocals, Guitar, Piano
+
+#### Benefits
+
+- **Prevents OOM errors**: Process files of any length without running out of memory
+- **Predictable memory usage**: Memory usage stays bounded regardless of file length
+- **No quality loss**: Each chunk is fully processed with the selected model
+- **Multi-stem support**: Works seamlessly with 2, 4, and 6-stem models
+
+#### Recommendations
+
+- **Files > 1 hour**: Use `--chunk_duration 600` (10 minutes)
+- **Limited memory systems**: Use smaller chunks (300-600 seconds)
+- **Ample memory**: You may not need chunking at all
+
+#### Note on Audio Quality
+
+Chunks are concatenated without crossfading, which may result in minor artifacts at chunk boundaries in rare cases. For most use cases, these are not noticeable. The simple concatenation approach keeps processing time minimal while solving out-of-memory issues.
+
 ### Full command-line interface options
 
 ```sh
 
@@ -0,0 +1,141 @@
+"""Audio chunking utilities for processing large audio files to prevent OOM errors."""
+
+import os
+import logging
+from typing import List
+from pydub import AudioSegment
+
+
+class AudioChunker:
+    """
+    Handles splitting and merging of large audio files.
+
+    This class provides utilities to:
+    - Split large audio files into fixed-duration chunks
+    - Merge processed chunks back together with simple concatenation
+    - Determine if a file should be chunked based on its duration
+
+    Example:
+        >>> chunker = AudioChunker(chunk_duration_seconds=600)  # 10-minute chunks
+        >>> chunk_paths = chunker.split_audio("long_audio.wav", "/tmp/chunks")
+        >>> # Process each chunk...
+        >>> output_path = chunker.merge_chunks(processed_chunks, "output.wav")
+    """
+
+    def __init__(self, chunk_duration_seconds: float, logger: logging.Logger = None):
+        """
+        Initialize the AudioChunker.
+
+        Args:
+            chunk_duration_seconds: Duration of each chunk in seconds
+            logger: Optional logger instance for logging operations
+        """
+        self.chunk_duration_ms = int(chunk_duration_seconds * 1000)
+        self.logger = logger or logging.getLogger(__name__)
+
+    def split_audio(self, input_path: str, output_dir: str) -> List[str]:
+        """
+        Split audio file into fixed-size chunks.
+
+        Args:
+            input_path: Path to the input audio file
+            output_dir: Directory where chunk files will be saved
+
+        Returns:
+            List of paths to the created chunk files
+
+        Raises:
+            FileNotFoundError: If input file doesn't exist
+            IOError: If there's an error reading or writing audio files
+        """
+        if not os.path.exists(input_path):
+            raise FileNotFoundError(f"Input file not found: {input_path}")
+
+        if not os.path.exists(output_dir):
+            os.makedirs(output_dir)
+
+        self.logger.debug(f"Loading audio file: {input_path}")
+        audio = AudioSegment.from_file(input_path)
+
+        total_duration_ms = len(audio)
+        chunk_paths = []
+
+        # Calculate number of chunks
+        num_chunks = (total_duration_ms + self.chunk_duration_ms - 1) // self.chunk_duration_ms
+        self.logger.info(f"Splitting {total_duration_ms / 1000:.1f}s audio into {num_chunks} chunks of {self.chunk_duration_ms / 1000:.1f}s each")
+
+        # Get file extension from input
+        _, ext = os.path.splitext(input_path)
+        if not ext:
+            ext = ".wav"  # Default to WAV if no extension
+
+        # Split into chunks
+        for i in range(num_chunks):
+            start_ms = i * self.chunk_duration_ms
+            end_ms = min(start_ms + self.chunk_duration_ms, total_duration_ms)
+
+            chunk = audio[start_ms:end_ms]
+            chunk_filename = f"chunk_{i:04d}{ext}"
+            chunk_path = os.path.join(output_dir, chunk_filename)
+
+            self.logger.debug(f"Exporting chunk {i + 1}/{num_chunks}: {start_ms / 1000:.1f}s - {end_ms / 1000:.1f}s to {chunk_path}")
+            chunk.export(chunk_path, format=ext.lstrip('.'))
+            chunk_paths.append(chunk_path)
+
+        return chunk_paths
+
+    def merge_chunks(self, chunk_paths: List[str], output_path: str) -> str:
+        """
+        Merge processed chunks with simple concatenation.
+
+        Args:
+            chunk_paths: List of paths to chunk files to merge
+            output_path: Path where the merged output will be saved
+
+        Returns:
+            Path to the merged output file
+
+        Raises:
+            ValueError: If chunk_paths is empty
+            FileNotFoundError: If any chunk file doesn't exist
+            IOError: If there's an error reading or writing audio files
+        """
+        if not chunk_paths:
+            raise ValueError("Cannot merge empty list of chunks")
+
+        # Verify all chunks exist
+        for chunk_path in chunk_paths:
+            if not os.path.exists(chunk_path):
+                raise FileNotFoundError(f"Chunk file not found: {chunk_path}")
+
+        self.logger.info(f"Merging {len(chunk_paths)} chunks into {output_path}")
+
+        # Start with empty audio segment
+        combined = AudioSegment.empty()
+
+        # Concatenate all chunks
+        for i, chunk_path in enumerate(chunk_paths):
+            self.logger.debug(f"Loading chunk {i + 1}/{len(chunk_paths)}: {chunk_path}")
+            chunk = AudioSegment.from_file(chunk_path)
+            combined += chunk  # Simple concatenation
+
+        # Get output format from file extension
+        _, ext = os.path.splitext(output_path)
+        output_format = ext.lstrip('.') if ext else 'wav'
+
+        self.logger.info(f"Exporting merged audio ({len(combined) / 1000:.1f}s) to {output_path}")
+        combined.export(output_path, format=output_format)
+
+        return output_path
+
+    def should_chunk(self, audio_duration_seconds: float) -> bool:
+        """
+        Determine if file is large enough to benefit from chunking.
+
+        Args:
+            audio_duration_seconds: Duration of the audio file in seconds
+
+        Returns:
+            True if the file should be chunked, False otherwise
+        """
+        return audio_duration_seconds > (self.chunk_duration_ms / 1000)
@@ -10,6 +10,7 @@
 import warnings
 import importlib
 import io
+import re
 from typing import Optional
 
 import hashlib
@@ -94,6 +95,7 @@ def __init__(
         use_soundfile=False,
         use_autocast=False,
         use_directml=False,
+        chunk_duration=None,
         mdx_params={"hop_length": 1024, "segment_size": 256, "overlap": 0.25, "batch_size": 1, "enable_denoise": False},
         vr_params={"batch_size": 1, "window_size": 512, "aggression": 5, "enable_tta": False, "enable_post_process": False, "post_process_threshold": 0.2, "high_end_process": False},
         demucs_params={"segment_size": "Default", "shifts": 2, "overlap": 0.25, "segments_enabled": True},
@@ -182,6 +184,11 @@ def __init__(
         self.use_autocast = use_autocast
         self.use_directml = use_directml
 
+        self.chunk_duration = chunk_duration
+        if chunk_duration is not None:
+            if chunk_duration <= 0:
+                raise ValueError("chunk_duration must be greater than 0")
+
         # These are parameters which users may want to configure so we expose them to the top-level Separator class,
         # even though they are specific to a single model architecture
         self.arch_specific_params = {"MDX": mdx_params, "VR": vr_params, "Demucs": demucs_params, "MDXC": mdxc_params}
@@ -866,6 +873,18 @@ def _separate_file(self, audio_file_path, custom_output_names=None):
         Returns:
         - output_files (list of str): A list containing the paths to the separated audio stem files.
         """
+        # Check if chunking is enabled and file is large enough
+        if self.chunk_duration is not None:
+            import librosa
+            duration = librosa.get_duration(path=audio_file_path)
+
+            from audio_separator.separator.audio_chunking import AudioChunker
+            chunker = AudioChunker(self.chunk_duration, self.logger)
+
+            if chunker.should_chunk(duration):
+                self.logger.info(f"File duration {duration:.1f}s exceeds chunk size {self.chunk_duration}s, using chunked processing")
+                return self._process_with_chunking(audio_file_path, custom_output_names)
+
         # Log the start of the separation process
         self.logger.info(f"Starting separation process for audio_file_path: {audio_file_path}")
         separate_start_time = time.perf_counter()
@@ -899,6 +918,117 @@ def _separate_file(self, audio_file_path, custom_output_names=None):
 
         return output_files
 
+    def _process_with_chunking(self, audio_file_path, custom_output_names=None):
+        """
+        Process large file by splitting into chunks.
+
+        This method splits a large audio file into smaller chunks, processes each chunk
+        separately, and merges the results back together. This helps prevent out-of-memory
+        errors when processing very long audio files.
+
+        Parameters:
+        - audio_file_path (str): The path to the audio file.
+        - custom_output_names (dict, optional): Custom names for the output files. Defaults to None.
+
+        Returns:
+        - output_files (list of str): A list containing the paths to the separated audio stem files.
+        """
+        import tempfile
+        import shutil
+        from audio_separator.separator.audio_chunking import AudioChunker
+
+        # Create temporary directory for chunks
+        temp_dir = tempfile.mkdtemp(prefix="audio-separator-chunks-")
+        self.logger.debug(f"Created temporary directory for chunks: {temp_dir}")
+
+        try:
+            # Split audio into chunks
+            chunker = AudioChunker(self.chunk_duration, self.logger)
+            chunk_paths = chunker.split_audio(audio_file_path, temp_dir)
+
+            # Process each chunk
+            processed_chunks_by_stem = {}
+
+            for i, chunk_path in enumerate(chunk_paths):
+                self.logger.info(f"Processing chunk {i+1}/{len(chunk_paths)}: {chunk_path}")
+
+                original_chunk_duration = self.chunk_duration
+                original_output_dir = self.output_dir
+                self.chunk_duration = None
+                self.output_dir = temp_dir
+
+                if self.model_instance:
+                    original_model_output_dir = self.model_instance.output_dir
+                    self.model_instance.output_dir = temp_dir
+
+                try:
+                    output_files = self._separate_file(chunk_path, custom_output_names)
+
+                    # Dynamically group chunks by stem name
+                    for stem_path in output_files:
+                        # Extract stem name from filename: "chunk_0000_(Vocals).wav" → "Vocals"
+                        filename = os.path.basename(stem_path)
+                        match = re.search(r'_\(([^)]+)\)', filename)
+                        if match:
+                            stem_name = match.group(1)
+                        else:
+                            # Fallback: use index-based name if pattern not found
+                            stem_index = len([k for k in processed_chunks_by_stem.keys() if k.startswith('stem_')])
+                            stem_name = f"stem_{stem_index}"
+                            self.logger.warning(f"Could not extract stem name from {filename}, using {stem_name}")
+
+                        if stem_name not in processed_chunks_by_stem:
+                            processed_chunks_by_stem[stem_name] = []
+
+                        # Ensure absolute path
+                        abs_path = stem_path if os.path.isabs(stem_path) else os.path.join(temp_dir, stem_path)
+                        processed_chunks_by_stem[stem_name].append(abs_path)
+
+                    if not output_files:
+                        self.logger.warning(f"Chunk {i+1} produced no output files")
+
+                finally:
+                    self.chunk_duration = original_chunk_duration
+                    self.output_dir = original_output_dir
+                    if self.model_instance:
+                        self.model_instance.output_dir = original_model_output_dir
+
+                # Clear GPU cache between chunks
+                if self.model_instance:
+                    self.model_instance.clear_gpu_cache()
+
+            # Merge chunks for each stem dynamically
+            base_name = os.path.splitext(os.path.basename(audio_file_path))[0]
+            output_files = []
+
+            for stem_name in sorted(processed_chunks_by_stem.keys()):
+                chunk_paths_for_stem = processed_chunks_by_stem[stem_name]
+
+                if not chunk_paths_for_stem:
+                    self.logger.warning(f"No chunks found for stem: {stem_name}")
+                    continue
+
+                # Determine output filename
+                if custom_output_names and stem_name in custom_output_names:
+                    output_filename = custom_output_names[stem_name]
+                else:
+                    output_filename = f"{base_name}_({stem_name})"
+
+                output_path = os.path.join(self.output_dir, f"{output_filename}.{self.output_format.lower()}")
+
+                self.logger.info(f"Merging {len(chunk_paths_for_stem)} chunks for stem: {stem_name}")
+                chunker.merge_chunks(chunk_paths_for_stem, output_path)
+                output_files.append(output_path)
+
+            self.logger.info(f"Chunked processing completed. Output files: {output_files}")
+            return output_files
+
+        finally:
+            # Clean up temporary directory
+            if os.path.exists(temp_dir):
+                self.logger.debug(f"Cleaning up temporary directory: {temp_dir}")
+                shutil.rmtree(temp_dir, ignore_errors=True)
+
     def download_model_and_data(self, model_filename):
         """
         Downloads the model file without loading it into memory.
 
@@ -59,6 +59,7 @@ def main():
     sample_rate_help = "Modify the sample rate of the output audio (default: %(default)s). Example: --sample_rate=44100"
     use_soundfile_help = "Use soundfile to write audio output (default: %(default)s). Example: --use_soundfile"
     use_autocast_help = "Use PyTorch autocast for faster inference (default: %(default)s). Do not use for CPU inference. Example: --use_autocast"
+    chunk_duration_help = "Split audio into chunks of this duration in seconds (default: %(default)s = no chunking). Useful for processing very long audio files on systems with limited memory. Recommended: 600 (10 minutes) for files >1 hour. Chunks are concatenated without overlap/crossfade. Example: --chunk_duration=600"
     custom_output_names_help = 'Custom names for all output files in JSON format (default: %(default)s). Example: --custom_output_names=\'{"Vocals": "vocals_output", "Drums": "drums_output"}\''
 
     common_params = parser.add_argument_group("Common Separation Parameters")
@@ -69,6 +70,7 @@ def main():
     common_params.add_argument("--sample_rate", type=int, default=44100, help=sample_rate_help)
     common_params.add_argument("--use_soundfile", action="store_true", help=use_soundfile_help)
     common_params.add_argument("--use_autocast", action="store_true", help=use_autocast_help)
+    common_params.add_argument("--chunk_duration", type=float, default=None, help=chunk_duration_help)
     common_params.add_argument("--custom_output_names", type=json.loads, default=None, help=custom_output_names_help)
 
     mdx_segment_size_help = "Larger consumes more resources, but may give better results (default: %(default)s). Example: --mdx_segment_size=256"
@@ -200,6 +202,7 @@ def main():
         sample_rate=args.sample_rate,
         use_soundfile=args.use_soundfile,
         use_autocast=args.use_autocast,
+        chunk_duration=args.chunk_duration,
         mdx_params={
             "hop_length": args.mdx_hop_length,
             "segment_size": args.mdx_segment_size,