Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 27 additions & 0 deletions openhands-sdk/openhands/sdk/agent/agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -362,6 +362,30 @@ def _extract_security_risk(
security_risk = risk.SecurityRisk(raw)
return security_risk

def _extract_summary(self, tool_name: str, arguments: dict) -> str:
"""Extract and validate the summary field from tool arguments.

Summary field is always requested but optional - if LLM doesn't provide
it or provides invalid data, we generate a default summary using the
tool name and arguments.

Args:
tool_name: Name of the tool being called
arguments: Dictionary of tool arguments from LLM

Returns:
The summary string - either from LLM or a default generated one
"""
summary = arguments.pop("summary", None)

# If valid summary provided by LLM, use it
if summary is not None and isinstance(summary, str) and summary.strip():
return summary

# Generate default summary: {tool_name}: {arguments}
args_str = json.dumps(arguments)
return f"{tool_name}: {args_str}"

def _get_action_event(
self,
tool_call: MessageToolCall,
Expand Down Expand Up @@ -423,6 +447,8 @@ def _get_action_event(
"Unexpected 'security_risk' key found in tool arguments"
)

summary = self._extract_summary(tool.name, arguments)

action: Action = tool.action_from_arguments(arguments)
except (json.JSONDecodeError, ValidationError, ValueError) as e:
err = (
Expand Down Expand Up @@ -462,6 +488,7 @@ def _get_action_event(
tool_call=tool_call,
llm_response_id=llm_response_id,
security_risk=security_risk,
summary=summary,
)
on_event(action_event)
return action_event
Expand Down
3 changes: 3 additions & 0 deletions openhands-sdk/openhands/sdk/agent/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -209,6 +209,9 @@ def make_llm_completion(
configured. This allows weaker models to omit risk field and bypass
validation requirements when analyzer is disabled. For detailed logic,
see `_extract_security_risk` method in agent.py.

Summary field is always added to tool schemas for transparency and
explainability of agent actions.
"""
if llm.uses_responses_api():
return llm.responses(
Expand Down
20 changes: 20 additions & 0 deletions openhands-sdk/openhands/sdk/event/llm_convertible/action.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,20 @@ class ActionEvent(LLMConvertibleEvent):
description="The LLM's assessment of the safety risk of this action.",
)

summary: str | None = Field(
default=None,
description=(
"A concise summary (approximately 10 words) of what this action does, "
"provided by the LLM for explainability and debugging. "
"Examples of good summaries: "
"'editing configuration file for deployment settings' | "
"'searching codebase for authentication function definitions' | "
"'installing required dependencies from package manifest' | "
"'running tests to verify bug fix' | "
"'viewing directory structure to locate source files'"
),
)

@property
def visualize(self) -> Text:
"""Return Rich Text representation of this action event."""
Expand All @@ -73,6 +87,12 @@ def visualize(self) -> Text:
if self.security_risk != risk.SecurityRisk.UNKNOWN:
content.append(self.security_risk.visualize)

# Display summary if available
if self.summary:
content.append("Summary: ", style="bold cyan")
content.append(self.summary)
content.append("\n\n")

# Display reasoning content first if available
if self.reasoning_content:
content.append("Reasoning:\n", style="bold")
Expand Down
30 changes: 28 additions & 2 deletions openhands-sdk/openhands/sdk/llm/llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -483,9 +483,21 @@ def completion(
This is the method for getting responses from the model via Completion API.
It handles message formatting, tool calling, and response processing.

Args:
messages: List of conversation messages
tools: Optional list of tools available to the model
_return_metrics: Whether to return usage metrics
add_security_risk_prediction: Add security_risk field to tool schemas
on_token: Optional callback for streaming tokens
**kwargs: Additional arguments passed to the LLM API

Returns:
LLMResponse containing the model's response and metadata.

Note:
Summary field is always added to tool schemas for transparency and
explainability of agent actions.

Raises:
ValueError: If streaming is requested (not supported).

Expand Down Expand Up @@ -513,7 +525,7 @@ def completion(
if tools:
cc_tools = [
t.to_openai_tool(
add_security_risk_prediction=add_security_risk_prediction
add_security_risk_prediction=add_security_risk_prediction,
)
for t in tools
]
Expand Down Expand Up @@ -630,6 +642,20 @@ def responses(
"""Alternative invocation path using OpenAI Responses API via LiteLLM.

Maps Message[] -> (instructions, input[]) and returns LLMResponse.

Args:
messages: List of conversation messages
tools: Optional list of tools available to the model
include: Optional list of fields to include in response
store: Whether to store the conversation
_return_metrics: Whether to return usage metrics
add_security_risk_prediction: Add security_risk field to tool schemas
on_token: Optional callback for streaming tokens (not yet supported)
**kwargs: Additional arguments passed to the API

Note:
Summary field is always added to tool schemas for transparency and
explainability of agent actions.
"""
# Streaming not yet supported
if kwargs.get("stream", False) or self.stream or on_token is not None:
Expand All @@ -643,7 +669,7 @@ def responses(
resp_tools = (
[
t.to_responses_tool(
add_security_risk_prediction=add_security_risk_prediction
add_security_risk_prediction=add_security_risk_prediction,
)
for t in tools
]
Expand Down
12 changes: 12 additions & 0 deletions openhands-sdk/openhands/sdk/llm/router/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,18 @@ def completion(
"""
This method intercepts completion calls and routes them to the appropriate
underlying LLM based on the routing logic implemented in select_llm().

Args:
messages: List of conversation messages
tools: Optional list of tools available to the model
return_metrics: Whether to return usage metrics
add_security_risk_prediction: Add security_risk field to tool schemas
on_token: Optional callback for streaming tokens
**kwargs: Additional arguments passed to the LLM API

Note:
Summary field is always added to tool schemas for transparency and
explainability of agent actions.
"""
# Select appropriate LLM
selected_model = self.select_llm(messages)
Expand Down
69 changes: 60 additions & 9 deletions openhands-sdk/openhands/sdk/tool/tool.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@
ActionT = TypeVar("ActionT", bound=Action)
ObservationT = TypeVar("ObservationT", bound=Observation)
_action_types_with_risk: dict[type, type] = {}
_action_types_with_summary: dict[type, type] = {}
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nice with the caching!



def _camel_to_snake(name: str) -> str:
Expand Down Expand Up @@ -364,17 +365,18 @@ def _get_tool_schema(
action_type: type[Schema] | None = None,
) -> dict[str, Any]:
action_type = action_type or self.action_type
action_type_with_risk = create_action_type_with_risk(action_type)

# Apply security risk enhancement if enabled
add_security_risk_prediction = add_security_risk_prediction and (
self.annotations is None or (not self.annotations.readOnlyHint)
)
schema = (
action_type_with_risk.to_mcp_schema()
if add_security_risk_prediction
else action_type.to_mcp_schema()
)
return schema
if add_security_risk_prediction:
action_type = create_action_type_with_risk(action_type)

# Always add summary field for transparency and explainability
action_type = _create_action_type_with_summary(action_type)

return action_type.to_mcp_schema()

def to_openai_tool(
self,
Expand All @@ -391,14 +393,19 @@ def to_openai_tool(
action_type: Optionally override the action_type to use for the schema.
This is useful for MCPTool to use a dynamically created action type
based on the tool's input schema.

Note:
Summary field is always added to the schema for transparency and
explainability of agent actions.
"""
return ChatCompletionToolParam(
type="function",
function=ChatCompletionToolParamFunctionChunk(
name=self.name,
description=self.description,
parameters=self._get_tool_schema(
add_security_risk_prediction, action_type
add_security_risk_prediction,
action_type,
),
),
)
Expand All @@ -412,14 +419,23 @@ def to_responses_tool(

For Responses API, function tools expect top-level keys:
{ "type": "function", "name": ..., "description": ..., "parameters": ... }

Args:
add_security_risk_prediction: Whether to add a `security_risk` field
action_type: Optional override for the action type

Note:
Summary field is always added to the schema for transparency and
explainability of agent actions.
"""

return {
"type": "function",
"name": self.name,
"description": self.description,
"parameters": self._get_tool_schema(
add_security_risk_prediction, action_type
add_security_risk_prediction,
action_type,
),
"strict": False,
}
Expand Down Expand Up @@ -479,3 +495,38 @@ def create_action_type_with_risk(action_type: type[Schema]) -> type[Schema]:
)
_action_types_with_risk[action_type] = action_type_with_risk
return action_type_with_risk


def _create_action_type_with_summary(action_type: type[Schema]) -> type[Schema]:
"""Create a new action type with summary field for LLM to predict.

This dynamically adds a 'summary' field to the action schema, allowing
the LLM to provide a brief explanation of what each action does.

Args:
action_type: The original action type to enhance

Returns:
A new type that includes the summary field
"""
action_type_with_summary = _action_types_with_summary.get(action_type)
if action_type_with_summary:
return action_type_with_summary

action_type_with_summary = type(
f"{action_type.__name__}WithSummary",
(action_type,),
{
"summary": Field(
default=None,
description=(
"A concise summary (approximately 10 words) describing what "
"this specific action does. Focus on the key operation and target. "
"Example: 'List all Python files in current directory'"
),
),
"__annotations__": {"summary": str | None},
},
)
_action_types_with_summary[action_type] = action_type_with_summary
return action_type_with_summary
32 changes: 32 additions & 0 deletions tests/integration/behavior_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -179,3 +179,35 @@ def get_conversation_summary(
def _matches_pattern(path: str, pattern: str) -> bool:
"""Helper to match file paths against patterns."""
return fnmatch.fnmatch(path, pattern) or pattern in path


def verify_all_actions_have_summary(collected_events: list[Event]) -> tuple[bool, str]:
"""
Verify that all ActionEvents have a non-empty summary field.

The summary field is always added to tool schemas and should be populated
either by the LLM or with a default value.

Args:
collected_events: List of events collected from conversation

Returns:
Tuple of (success, reason) where success is True if all actions have
summaries, and reason explains any failures
"""
from openhands.sdk.event import ActionEvent

action_events = [e for e in collected_events if isinstance(e, ActionEvent)]

if not action_events:
return True, "No action events found"

missing_summaries = []
for i, event in enumerate(action_events):
if not event.summary or not event.summary.strip():
missing_summaries.append(f"Action {i + 1}: {event.tool_name}")

if missing_summaries:
return False, f"Actions missing summaries: {', '.join(missing_summaries)}"

return True, f"All {len(action_events)} actions have summaries"
Loading
Loading