From 6666e7661a9a9fde4b4a851fbf8ad423a1a569e9 Mon Sep 17 00:00:00 2001 From: Pierre Vannier Date: Tue, 20 May 2025 11:34:36 +0200 Subject: [PATCH] Expand text bubble utility --- README.md | 41 ++++++++++- overlay_text_bubble.py | 162 +++++++++++++++++++++++++++++++++++++++++ pyproject.toml | 3 +- 3 files changed, 204 insertions(+), 2 deletions(-) create mode 100644 overlay_text_bubble.py diff --git a/README.md b/README.md index ced37ea..0ab9a37 100644 --- a/README.md +++ b/README.md @@ -252,6 +252,45 @@ This cuts `input.mp4` to the 10–20 second range and overlays `anim.gif` at the coordinates (100, 200) from two seconds into the clip until the eight-second mark. +### Overlaying Text Bubbles onto a Video + +Another utility in this repository, `overlay_text_bubble.py`, can draw a +speech bubble with [`drawsvg`](https://github.com/cduck/drawsvg) or overlay an +animated bubble GIF. The script also supports trimming the input video just like +`overlay_gif.py`. + +Install the required libraries (MoviePy 2.x or later is recommended): + +```bash +pip install "moviepy>=2" drawsvg +``` + +Basic usage drawing a bubble: + +```bash +python overlay_text_bubble.py --video input.mp4 --text "Hello!" \ + --start 3 --end 8 --position center --output output.mp4 +``` + +This shows a speech bubble containing *Hello!* between the third and eighth +second of the video. + +The video can be trimmed and a GIF bubble used instead: + +```bash +python overlay_text_bubble.py --video input.mp4 --gif bubble.gif \ + --clip-start 10 --clip-end 20 --start 1 --end 5 \ + --position "50,200" --output clipped.mp4 +``` + +You can also customise the bubble size when drawing it: + +```bash +python overlay_text_bubble.py --video input.mp4 --text "Look" \ + --bubble-width 400 --bubble-height 120 --position "50,200" \ + --output custom.mp4 +``` + ### Customizing the Model You can use different Parakeet models with the `--model` parameter: @@ -269,4 +308,4 @@ This project is available under the MIT License. See the LICENSE file for more d - [Parakeet MLX](https://github.com/senstella/parakeet-mlx) for the excellent speech recognition model - [MLX](https://github.com/ml-explore/mlx) for the machine learning framework optimized for Apple Silicon -- [Sounddevice](https://github.com/spatialaudio/python-sounddevice) for audio capture functionality \ No newline at end of file +- [Sounddevice](https://github.com/spatialaudio/python-sounddevice) for audio capture functionality diff --git a/overlay_text_bubble.py b/overlay_text_bubble.py new file mode 100644 index 0000000..5ab801a --- /dev/null +++ b/overlay_text_bubble.py @@ -0,0 +1,162 @@ +#!/usr/bin/env python3 +"""Overlay a text bubble onto an MP4 video using drawsvg and moviepy.""" + +from __future__ import annotations + +import argparse +import os +import tempfile +from typing import Tuple, Union + +try: + import drawsvg as draw +except ImportError as exc: # pragma: no cover - drawsvg is optional + raise ImportError( + "The drawsvg package is required for this script." + " Install it with 'pip install drawsvg'." + ) from exc + +try: + from moviepy.editor import VideoFileClip, CompositeVideoClip, ImageClip +except ImportError as exc: # pragma: no cover - moviepy is optional + raise ImportError( + "The moviepy package is required for this script." + " Install it with 'pip install moviepy'." + ) from exc + + +def create_text_bubble(text: str, width: int, height: int) -> str: + """Create a speech bubble containing ``text`` and return a PNG path.""" + drawing = draw.Drawing(width, height, origin=(0, 0)) + + rect_height = height - 20 + drawing.append( + draw.Rectangle( + 0, + 0, + width, + rect_height, + rx=15, + ry=15, + fill="white", + stroke="black", + stroke_width=2, + ) + ) + + drawing.append( + draw.Lines( + width * 0.2, + rect_height, + width * 0.2 + 20, + rect_height, + width * 0.2 + 10, + height, + close=True, + fill="white", + stroke="black", + stroke_width=2, + ) + ) + + drawing.append( + draw.Text( + text, + 20, + width / 2, + rect_height / 2, + center=True, + valign="middle", + fill="black", + ) + ) + + tmp = tempfile.NamedTemporaryFile(suffix=".png", delete=False) + drawing.save_png(tmp.name) + return tmp.name + + +def overlay_text_bubble_on_video( + video_path: str, + output_path: str, + text: str | None = None, + bubble_gif: str | None = None, + clip_start: float = 0.0, + clip_end: float | None = None, + start: float = 0.0, + end: float | None = None, + position: Union[str, Tuple[int, int]] = "center", + width: int = 300, + height: int = 100, +) -> None: + """Overlay a speech bubble or GIF onto ``video_path``.""" + + video_clip = VideoFileClip(video_path) + if clip_start != 0.0 or clip_end is not None: + video_clip = video_clip.subclip(clip_start, clip_end) + + if bubble_gif is not None: + bubble_clip = VideoFileClip(bubble_gif).set_start(start).set_position(position) + else: + bubble_png = create_text_bubble(text or "", width, height) + bubble_clip = ImageClip(bubble_png).set_start(start).set_position(position) + + if end is not None: + bubble_clip = bubble_clip.set_end(end) + + final_clip = CompositeVideoClip([video_clip, bubble_clip]) + final_clip.write_videofile(output_path, codec="libx264", audio_codec="aac") + + if bubble_gif is None: + os.unlink(bubble_png) + + +def parse_args() -> argparse.Namespace: + parser = argparse.ArgumentParser(description="Overlay text bubble onto MP4 video") + parser.add_argument("--video", required=True, help="Input MP4 video path") + parser.add_argument("--output", required=True, help="Output MP4 path") + parser.add_argument("--text", help="Text to display in bubble") + parser.add_argument("--gif", help="Animated bubble GIF to overlay instead of drawn bubble") + parser.add_argument("--clip-start", type=float, default=0.0, help="Start time of the video clip") + parser.add_argument("--clip-end", type=float, default=None, help="End time of the video clip") + parser.add_argument("--start", type=float, default=0.0, help="Time when bubble appears") + parser.add_argument("--end", type=float, default=None, help="Time when bubble disappears") + parser.add_argument( + "--position", + default="center", + help="Position of bubble: (x,y) or keywords like 'center', 'top', etc.", + ) + parser.add_argument("--bubble-width", type=int, default=300, help="Bubble width in pixels") + parser.add_argument("--bubble-height", type=int, default=100, help="Bubble height in pixels") + args = parser.parse_args() + if args.gif is None and args.text is None: + parser.error("either --text or --gif must be provided") + return args + + +def main() -> None: + args = parse_args() + pos: Union[str, Tuple[int, int]] + if "," in args.position: + x_str, y_str = args.position.split(",", maxsplit=1) + pos = (int(x_str), int(y_str)) + else: + pos = args.position + + overlay_text_bubble_on_video( + video_path=args.video, + text=args.text, + bubble_gif=args.gif, + clip_start=args.clip_start, + clip_end=args.clip_end, + output_path=args.output, + start=args.start, + end=args.end, + position=pos, + width=args.bubble_width, + height=args.bubble_height, + ) + + +if __name__ == "__main__": + main() diff --git a/pyproject.toml b/pyproject.toml index d535ba7..a566745 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -9,5 +9,6 @@ dependencies = [ "numpy>=2.2.5", "parakeet-mlx>=0.2.6", "sounddevice>=0.5.1", - "moviepy>=1.0.3", + "moviepy>=2.0.0", + "drawsvg>=2.0.0", ]