Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
205 changes: 205 additions & 0 deletions sdk/guides/browser-session-recording.mdx
Original file line number Diff line number Diff line change
@@ -0,0 +1,205 @@
---
title: Browser Session Recording
description: Record and replay your agent's browser sessions using rrweb.

Check warning on line 3 in sdk/guides/browser-session-recording.mdx

View check run for this annotation

Mintlify / Mintlify Validation (allhandsai) - vale-spellcheck

sdk/guides/browser-session-recording.mdx#L3

Did you really mean 'rrweb'?
---

import RunExampleCode from "/sdk/shared-snippets/how-to-run-example.mdx";

> A ready-to-run example is available [here](#ready-to-run-example)!

The browser session recording feature allows you to capture your agent's browser interactions and replay them later using [rrweb](https://github.com/rrweb-io/rrweb). This is useful for debugging, auditing, and understanding how your agent interacts with web pages.

## How It Works

The recording feature uses rrweb to capture DOM mutations, mouse movements, scrolling, and other browser events. The recordings are saved as JSON files that can be replayed using rrweb-player or the online viewer.

Check warning on line 14 in sdk/guides/browser-session-recording.mdx

View check run for this annotation

Mintlify / Mintlify Validation (allhandsai) - vale-spellcheck

sdk/guides/browser-session-recording.mdx#L14

Did you really mean 'rrweb'?

The [ready-to-run example](#ready-to-run-example) demonstrates:

1. **Starting a recording**: Use `browser_start_recording` to begin capturing browser events
2. **Browsing and interacting**: Navigate to websites and perform actions while recording
3. **Stopping the recording**: Use `browser_stop_recording` to stop and save the recording

The recording files are automatically saved to the persistence directory when the recording is stopped.

## Replaying Recordings

After recording a session, you can replay it using:

- **rrweb-player**: A standalone player component - [GitHub](https://github.com/rrweb-io/rrweb/tree/master/packages/rrweb-player)
- **Online viewer**: Upload your recording at [rrweb.io/demo](https://www.rrweb.io/demo/)

## Ready-to-run Example

<Note>
This example is available on GitHub: [examples/01_standalone_sdk/38_browser_session_recording.py](https://github.com/OpenHands/software-agent-sdk/blob/main/examples/01_standalone_sdk/38_browser_session_recording.py)
</Note>

```python icon="python" expandable examples/01_standalone_sdk/38_browser_session_recording.py
"""Browser Session Recording Example

This example demonstrates how to use the browser session recording feature
to capture and save a recording of the agent's browser interactions using rrweb.

The recording can be replayed later using rrweb-player to visualize the agent's
browsing session.

The recording will be automatically saved to the persistence directory when
browser_stop_recording is called. You can replay it with:
- rrweb-player: https://github.com/rrweb-io/rrweb/tree/master/packages/rrweb-player
- Online viewer: https://www.rrweb.io/demo/
"""

import json
import os

from pydantic import SecretStr

from openhands.sdk import (
LLM,
Agent,
Conversation,
Event,
LLMConvertibleEvent,
get_logger,
)
from openhands.sdk.tool import Tool
from openhands.tools.browser_use import BrowserToolSet


logger = get_logger(__name__)

# Configure LLM
api_key = os.getenv("LLM_API_KEY")
assert api_key is not None, "LLM_API_KEY environment variable is not set."
model = os.getenv("LLM_MODEL", "anthropic/claude-sonnet-4-5-20250929")
base_url = os.getenv("LLM_BASE_URL")
llm = LLM(
usage_id="agent",
model=model,
base_url=base_url,
api_key=SecretStr(api_key),
)

# Tools - including browser tools with recording capability
cwd = os.getcwd()
tools = [
Tool(name=BrowserToolSet.name),
]

# Agent
agent = Agent(llm=llm, tools=tools)

llm_messages = [] # collect raw LLM messages


def conversation_callback(event: Event):
if isinstance(event, LLMConvertibleEvent):
llm_messages.append(event.to_llm_message())


# Create conversation with persistence_dir set to save browser recordings
conversation = Conversation(
agent=agent,
callbacks=[conversation_callback],
workspace=cwd,
persistence_dir="./.conversations",
)

# The prompt instructs the agent to:
# 1. Start recording the browser session
# 2. Browse to a website and perform some actions
# 3. Stop recording (auto-saves to file)
PROMPT = """
Please complete the following task to demonstrate browser session recording:

1. First, use `browser_start_recording` to begin recording the browser session.

2. Then navigate to https://docs.openhands.dev/ and:
- Get the page content
- Scroll down the page
- Get the browser state to see interactive elements

3. Next, navigate to https://docs.openhands.dev/openhands/usage/cli/installation and:
- Get the page content
- Scroll down to see more content

4. Finally, use `browser_stop_recording` to stop the recording.
Events are automatically saved.
"""

print("=" * 80)
print("Browser Session Recording Example")
print("=" * 80)
print("\nTask: Record an agent's browser session and save it for replay")
print("\nStarting conversation with agent...\n")

conversation.send_message(PROMPT)
conversation.run()

print("\n" + "=" * 80)
print("Conversation finished!")
print("=" * 80)

persistence_dir = conversation.state.persistence_dir
assert persistence_dir

# Check if the recording files were created
observations_dir = os.path.join(persistence_dir, "observations")
if os.path.exists(observations_dir):
files = sorted(os.listdir(observations_dir))
json_files = [f for f in files if f.endswith(".json")]

if json_files:
print(f"\n✓ Recording saved to: {observations_dir}")
print(f"✓ Number of files: {len(json_files)}")

# Count total events across all files
total_events = 0
all_event_types = {}
total_size = 0

for json_file in json_files:
filepath = os.path.join(observations_dir, json_file)
file_size = os.path.getsize(filepath)
total_size += file_size

with open(filepath) as f:
events = json.load(f)

# Events are stored as a list in each file
if isinstance(events, list):
total_events += len(events)
for event in events:
event_type = event.get("type", "unknown")
all_event_types[event_type] = all_event_types.get(event_type, 0) + 1

print(f" - {json_file}: {len(events)} events, {file_size} bytes")

print(f"✓ Total events: {total_events}")
print(f"✓ Total size: {total_size} bytes")
if all_event_types:
print(f"✓ Event types: {all_event_types}")

print("\nTo replay this recording, you can use:")
print(
" - rrweb-player: https://github.com/rrweb-io/rrweb/tree/master/packages/rrweb-player"
)
else:
print(f"\n✗ No recording files found in: {observations_dir}")
print(" The agent may not have completed the recording task.")
else:
print(f"\n✗ Observations directory not found: {observations_dir}")
print(" The agent may not have completed the recording task.")

print("\n" + "=" * 100)
print("Conversation finished.")
print(f"Total LLM messages: {len(llm_messages)}")
print("=" * 100)

# Report cost
cost = conversation.conversation_stats.get_combined_metrics().accumulated_cost
print(f"Conversation ID: {conversation.id}")
print(f"EXAMPLE_COST: {cost}")
```

<RunExampleCode />