Skip to content

Commit 1220936

Browse files
Add action summary feature for tool calls (#1339)
Co-authored-by: openhands <[email protected]>
1 parent d1baf30 commit 1220936

File tree

15 files changed

+438
-19
lines changed

15 files changed

+438
-19
lines changed

openhands-sdk/openhands/sdk/agent/agent.py

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -362,6 +362,30 @@ def _extract_security_risk(
362362
security_risk = risk.SecurityRisk(raw)
363363
return security_risk
364364

365+
def _extract_summary(self, tool_name: str, arguments: dict) -> str:
366+
"""Extract and validate the summary field from tool arguments.
367+
368+
Summary field is always requested but optional - if LLM doesn't provide
369+
it or provides invalid data, we generate a default summary using the
370+
tool name and arguments.
371+
372+
Args:
373+
tool_name: Name of the tool being called
374+
arguments: Dictionary of tool arguments from LLM
375+
376+
Returns:
377+
The summary string - either from LLM or a default generated one
378+
"""
379+
summary = arguments.pop("summary", None)
380+
381+
# If valid summary provided by LLM, use it
382+
if summary is not None and isinstance(summary, str) and summary.strip():
383+
return summary
384+
385+
# Generate default summary: {tool_name}: {arguments}
386+
args_str = json.dumps(arguments)
387+
return f"{tool_name}: {args_str}"
388+
365389
def _get_action_event(
366390
self,
367391
tool_call: MessageToolCall,
@@ -423,6 +447,8 @@ def _get_action_event(
423447
"Unexpected 'security_risk' key found in tool arguments"
424448
)
425449

450+
summary = self._extract_summary(tool.name, arguments)
451+
426452
action: Action = tool.action_from_arguments(arguments)
427453
except (json.JSONDecodeError, ValidationError, ValueError) as e:
428454
err = (
@@ -462,6 +488,7 @@ def _get_action_event(
462488
tool_call=tool_call,
463489
llm_response_id=llm_response_id,
464490
security_risk=security_risk,
491+
summary=summary,
465492
)
466493
on_event(action_event)
467494
return action_event

openhands-sdk/openhands/sdk/agent/utils.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -209,6 +209,9 @@ def make_llm_completion(
209209
configured. This allows weaker models to omit risk field and bypass
210210
validation requirements when analyzer is disabled. For detailed logic,
211211
see `_extract_security_risk` method in agent.py.
212+
213+
Summary field is always added to tool schemas for transparency and
214+
explainability of agent actions.
212215
"""
213216
if llm.uses_responses_api():
214217
return llm.responses(

openhands-sdk/openhands/sdk/event/llm_convertible/action.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,20 @@ class ActionEvent(LLMConvertibleEvent):
6565
description="The LLM's assessment of the safety risk of this action.",
6666
)
6767

68+
summary: str | None = Field(
69+
default=None,
70+
description=(
71+
"A concise summary (approximately 10 words) of what this action does, "
72+
"provided by the LLM for explainability and debugging. "
73+
"Examples of good summaries: "
74+
"'editing configuration file for deployment settings' | "
75+
"'searching codebase for authentication function definitions' | "
76+
"'installing required dependencies from package manifest' | "
77+
"'running tests to verify bug fix' | "
78+
"'viewing directory structure to locate source files'"
79+
),
80+
)
81+
6882
@property
6983
def visualize(self) -> Text:
7084
"""Return Rich Text representation of this action event."""
@@ -73,6 +87,12 @@ def visualize(self) -> Text:
7387
if self.security_risk != risk.SecurityRisk.UNKNOWN:
7488
content.append(self.security_risk.visualize)
7589

90+
# Display summary if available
91+
if self.summary:
92+
content.append("Summary: ", style="bold cyan")
93+
content.append(self.summary)
94+
content.append("\n\n")
95+
7696
# Display reasoning content first if available
7797
if self.reasoning_content:
7898
content.append("Reasoning:\n", style="bold")

openhands-sdk/openhands/sdk/llm/llm.py

Lines changed: 28 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -483,9 +483,21 @@ def completion(
483483
This is the method for getting responses from the model via Completion API.
484484
It handles message formatting, tool calling, and response processing.
485485
486+
Args:
487+
messages: List of conversation messages
488+
tools: Optional list of tools available to the model
489+
_return_metrics: Whether to return usage metrics
490+
add_security_risk_prediction: Add security_risk field to tool schemas
491+
on_token: Optional callback for streaming tokens
492+
**kwargs: Additional arguments passed to the LLM API
493+
486494
Returns:
487495
LLMResponse containing the model's response and metadata.
488496
497+
Note:
498+
Summary field is always added to tool schemas for transparency and
499+
explainability of agent actions.
500+
489501
Raises:
490502
ValueError: If streaming is requested (not supported).
491503
@@ -513,7 +525,7 @@ def completion(
513525
if tools:
514526
cc_tools = [
515527
t.to_openai_tool(
516-
add_security_risk_prediction=add_security_risk_prediction
528+
add_security_risk_prediction=add_security_risk_prediction,
517529
)
518530
for t in tools
519531
]
@@ -630,6 +642,20 @@ def responses(
630642
"""Alternative invocation path using OpenAI Responses API via LiteLLM.
631643
632644
Maps Message[] -> (instructions, input[]) and returns LLMResponse.
645+
646+
Args:
647+
messages: List of conversation messages
648+
tools: Optional list of tools available to the model
649+
include: Optional list of fields to include in response
650+
store: Whether to store the conversation
651+
_return_metrics: Whether to return usage metrics
652+
add_security_risk_prediction: Add security_risk field to tool schemas
653+
on_token: Optional callback for streaming tokens (not yet supported)
654+
**kwargs: Additional arguments passed to the API
655+
656+
Note:
657+
Summary field is always added to tool schemas for transparency and
658+
explainability of agent actions.
633659
"""
634660
# Streaming not yet supported
635661
if kwargs.get("stream", False) or self.stream or on_token is not None:
@@ -643,7 +669,7 @@ def responses(
643669
resp_tools = (
644670
[
645671
t.to_responses_tool(
646-
add_security_risk_prediction=add_security_risk_prediction
672+
add_security_risk_prediction=add_security_risk_prediction,
647673
)
648674
for t in tools
649675
]

openhands-sdk/openhands/sdk/llm/router/base.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,18 @@ def completion(
5959
"""
6060
This method intercepts completion calls and routes them to the appropriate
6161
underlying LLM based on the routing logic implemented in select_llm().
62+
63+
Args:
64+
messages: List of conversation messages
65+
tools: Optional list of tools available to the model
66+
return_metrics: Whether to return usage metrics
67+
add_security_risk_prediction: Add security_risk field to tool schemas
68+
on_token: Optional callback for streaming tokens
69+
**kwargs: Additional arguments passed to the LLM API
70+
71+
Note:
72+
Summary field is always added to tool schemas for transparency and
73+
explainability of agent actions.
6274
"""
6375
# Select appropriate LLM
6476
selected_model = self.select_llm(messages)

openhands-sdk/openhands/sdk/tool/tool.py

Lines changed: 60 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@
4141
ActionT = TypeVar("ActionT", bound=Action)
4242
ObservationT = TypeVar("ObservationT", bound=Observation)
4343
_action_types_with_risk: dict[type, type] = {}
44+
_action_types_with_summary: dict[type, type] = {}
4445

4546

4647
def _camel_to_snake(name: str) -> str:
@@ -364,17 +365,18 @@ def _get_tool_schema(
364365
action_type: type[Schema] | None = None,
365366
) -> dict[str, Any]:
366367
action_type = action_type or self.action_type
367-
action_type_with_risk = create_action_type_with_risk(action_type)
368368

369+
# Apply security risk enhancement if enabled
369370
add_security_risk_prediction = add_security_risk_prediction and (
370371
self.annotations is None or (not self.annotations.readOnlyHint)
371372
)
372-
schema = (
373-
action_type_with_risk.to_mcp_schema()
374-
if add_security_risk_prediction
375-
else action_type.to_mcp_schema()
376-
)
377-
return schema
373+
if add_security_risk_prediction:
374+
action_type = create_action_type_with_risk(action_type)
375+
376+
# Always add summary field for transparency and explainability
377+
action_type = _create_action_type_with_summary(action_type)
378+
379+
return action_type.to_mcp_schema()
378380

379381
def to_openai_tool(
380382
self,
@@ -391,14 +393,19 @@ def to_openai_tool(
391393
action_type: Optionally override the action_type to use for the schema.
392394
This is useful for MCPTool to use a dynamically created action type
393395
based on the tool's input schema.
396+
397+
Note:
398+
Summary field is always added to the schema for transparency and
399+
explainability of agent actions.
394400
"""
395401
return ChatCompletionToolParam(
396402
type="function",
397403
function=ChatCompletionToolParamFunctionChunk(
398404
name=self.name,
399405
description=self.description,
400406
parameters=self._get_tool_schema(
401-
add_security_risk_prediction, action_type
407+
add_security_risk_prediction,
408+
action_type,
402409
),
403410
),
404411
)
@@ -412,14 +419,23 @@ def to_responses_tool(
412419
413420
For Responses API, function tools expect top-level keys:
414421
{ "type": "function", "name": ..., "description": ..., "parameters": ... }
422+
423+
Args:
424+
add_security_risk_prediction: Whether to add a `security_risk` field
425+
action_type: Optional override for the action type
426+
427+
Note:
428+
Summary field is always added to the schema for transparency and
429+
explainability of agent actions.
415430
"""
416431

417432
return {
418433
"type": "function",
419434
"name": self.name,
420435
"description": self.description,
421436
"parameters": self._get_tool_schema(
422-
add_security_risk_prediction, action_type
437+
add_security_risk_prediction,
438+
action_type,
423439
),
424440
"strict": False,
425441
}
@@ -479,3 +495,38 @@ def create_action_type_with_risk(action_type: type[Schema]) -> type[Schema]:
479495
)
480496
_action_types_with_risk[action_type] = action_type_with_risk
481497
return action_type_with_risk
498+
499+
500+
def _create_action_type_with_summary(action_type: type[Schema]) -> type[Schema]:
501+
"""Create a new action type with summary field for LLM to predict.
502+
503+
This dynamically adds a 'summary' field to the action schema, allowing
504+
the LLM to provide a brief explanation of what each action does.
505+
506+
Args:
507+
action_type: The original action type to enhance
508+
509+
Returns:
510+
A new type that includes the summary field
511+
"""
512+
action_type_with_summary = _action_types_with_summary.get(action_type)
513+
if action_type_with_summary:
514+
return action_type_with_summary
515+
516+
action_type_with_summary = type(
517+
f"{action_type.__name__}WithSummary",
518+
(action_type,),
519+
{
520+
"summary": Field(
521+
default=None,
522+
description=(
523+
"A concise summary (approximately 10 words) describing what "
524+
"this specific action does. Focus on the key operation and target. "
525+
"Example: 'List all Python files in current directory'"
526+
),
527+
),
528+
"__annotations__": {"summary": str | None},
529+
},
530+
)
531+
_action_types_with_summary[action_type] = action_type_with_summary
532+
return action_type_with_summary

tests/integration/behavior_utils.py

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -179,3 +179,35 @@ def get_conversation_summary(
179179
def _matches_pattern(path: str, pattern: str) -> bool:
180180
"""Helper to match file paths against patterns."""
181181
return fnmatch.fnmatch(path, pattern) or pattern in path
182+
183+
184+
def verify_all_actions_have_summary(collected_events: list[Event]) -> tuple[bool, str]:
185+
"""
186+
Verify that all ActionEvents have a non-empty summary field.
187+
188+
The summary field is always added to tool schemas and should be populated
189+
either by the LLM or with a default value.
190+
191+
Args:
192+
collected_events: List of events collected from conversation
193+
194+
Returns:
195+
Tuple of (success, reason) where success is True if all actions have
196+
summaries, and reason explains any failures
197+
"""
198+
from openhands.sdk.event import ActionEvent
199+
200+
action_events = [e for e in collected_events if isinstance(e, ActionEvent)]
201+
202+
if not action_events:
203+
return True, "No action events found"
204+
205+
missing_summaries = []
206+
for i, event in enumerate(action_events):
207+
if not event.summary or not event.summary.strip():
208+
missing_summaries.append(f"Action {i + 1}: {event.tool_name}")
209+
210+
if missing_summaries:
211+
return False, f"Actions missing summaries: {', '.join(missing_summaries)}"
212+
213+
return True, f"All {len(action_events)} actions have summaries"

0 commit comments

Comments
 (0)