|
67 | 67 | logger = get_logger(__name__) |
68 | 68 | maybe_init_laminar() |
69 | 69 |
|
| 70 | +# Maximum number of events to scan during init_state defensive checks. |
| 71 | +# SystemPromptEvent must appear within this prefix (at index 0 or 1). |
| 72 | +INIT_STATE_PREFIX_SCAN_WINDOW = 3 |
| 73 | + |
70 | 74 |
|
71 | 75 | class Agent(AgentBase): |
72 | 76 | """Main agent implementation for OpenHands. |
@@ -102,53 +106,82 @@ def init_state( |
102 | 106 | state: ConversationState, |
103 | 107 | on_event: ConversationCallbackType, |
104 | 108 | ) -> None: |
| 109 | + """Initialize conversation state. |
| 110 | +
|
| 111 | + Invariants enforced by this method: |
| 112 | + - If a SystemPromptEvent is already present, it must be within the first 3 |
| 113 | + events (index 0 or 1 in practice; index 2 is included in the scan window |
| 114 | + to detect a user message appearing before the system prompt). |
| 115 | + - A user MessageEvent should not appear before the SystemPromptEvent. |
| 116 | +
|
| 117 | + These invariants keep event ordering predictable for downstream components |
| 118 | + (condenser, UI, etc.) and also prevent accidentally materializing the full |
| 119 | + event history during initialization. |
| 120 | + """ |
105 | 121 | super().init_state(state, on_event=on_event) |
106 | | - # TODO(openhands): we should add test to test this init_state will actually |
107 | | - # modify state in-place |
108 | 122 |
|
109 | 123 | # Defensive check: Analyze state to detect unexpected initialization scenarios |
110 | 124 | # These checks help diagnose issues related to lazy loading and event ordering |
111 | 125 | # See: https://github.com/OpenHands/software-agent-sdk/issues/1785 |
112 | | - events = list(state.events) |
113 | | - has_system_prompt = any(isinstance(e, SystemPromptEvent) for e in events) |
| 126 | + # |
| 127 | + # NOTE: len() is O(1) for EventLog (file-backed implementation). |
| 128 | + event_count = len(state.events) |
| 129 | + |
| 130 | + # NOTE: state.events is intentionally an EventsListBase (Sequence-like), not |
| 131 | + # a plain list. Avoid materializing the full history via list(state.events) |
| 132 | + # here (conversations can reach 30k+ events). |
| 133 | + # |
| 134 | + # Invariant: when init_state is called, SystemPromptEvent (if present) must be |
| 135 | + # at index 0 or 1. |
| 136 | + # |
| 137 | + # Rationale: |
| 138 | + # - Local conversations start empty and init_state is responsible for adding |
| 139 | + # the SystemPromptEvent as the first event. |
| 140 | + # - Remote conversations may receive an initial ConversationStateUpdateEvent |
| 141 | + # from the agent-server immediately after subscription. In a typical remote |
| 142 | + # session prefix you may see: |
| 143 | + # [ConversationStateUpdateEvent, SystemPromptEvent, MessageEvent, ...] |
| 144 | + # |
| 145 | + # We intentionally only inspect the first few events (cheap for both local and |
| 146 | + # remote) to enforce this invariant. |
| 147 | + prefix_events = state.events[:INIT_STATE_PREFIX_SCAN_WINDOW] |
| 148 | + |
| 149 | + has_system_prompt = any(isinstance(e, SystemPromptEvent) for e in prefix_events) |
114 | 150 | has_user_message = any( |
115 | | - isinstance(e, MessageEvent) and e.source == "user" for e in events |
| 151 | + isinstance(e, MessageEvent) and e.source == "user" for e in prefix_events |
116 | 152 | ) |
117 | | - has_any_llm_event = any(isinstance(e, LLMConvertibleEvent) for e in events) |
118 | | - |
119 | 153 | # Log state for debugging initialization order issues |
120 | 154 | logger.debug( |
121 | 155 | f"init_state called: conversation_id={state.id}, " |
122 | | - f"event_count={len(events)}, " |
| 156 | + f"event_count={event_count}, " |
123 | 157 | f"has_system_prompt={has_system_prompt}, " |
124 | | - f"has_user_message={has_user_message}, " |
125 | | - f"has_any_llm_event={has_any_llm_event}" |
| 158 | + f"has_user_message={has_user_message}" |
126 | 159 | ) |
127 | 160 |
|
128 | 161 | if has_system_prompt: |
129 | | - # SystemPromptEvent already exists - this is unexpected during normal flow |
130 | | - # but could happen in persistence/resume scenarios |
131 | | - logger.warning( |
132 | | - f"init_state called but SystemPromptEvent already exists. " |
133 | | - f"conversation_id={state.id}, event_count={len(events)}. " |
134 | | - f"This may indicate double initialization or a resume scenario." |
| 162 | + # Restoring/resuming conversations is normal: a system prompt already |
| 163 | + # present means this conversation was initialized previously. |
| 164 | + logger.debug( |
| 165 | + "init_state: SystemPromptEvent already present; skipping init. " |
| 166 | + f"conversation_id={state.id}, event_count={event_count}." |
135 | 167 | ) |
136 | 168 | return |
137 | 169 |
|
138 | | - # Assert: If there are user messages but no system prompt, something is wrong |
139 | | - # The system prompt should always be added before any user messages |
| 170 | + # Assert: A user message should never appear before the system prompt. |
| 171 | + # |
| 172 | + # NOTE: This is a best-effort check based on the first few events only. |
| 173 | + # Remote conversations can include a ConversationStateUpdateEvent near the |
| 174 | + # start, so we scan a small prefix window. |
140 | 175 | if has_user_message: |
141 | | - event_types = [type(e).__name__ for e in events] |
| 176 | + event_types = [type(e).__name__ for e in prefix_events] |
142 | 177 | logger.error( |
143 | | - f"init_state: User message exists without SystemPromptEvent! " |
144 | | - f"conversation_id={state.id}, events={event_types}" |
| 178 | + f"init_state: User message found in prefix before SystemPromptEvent! " |
| 179 | + f"conversation_id={state.id}, prefix_events={event_types}" |
145 | 180 | ) |
146 | | - assert not has_user_message, ( |
147 | | - f"Unexpected state: User message exists before SystemPromptEvent. " |
148 | | - f"conversation_id={state.id}, event_count={len(events)}, " |
149 | | - f"event_types={event_types}. " |
150 | | - f"This indicates an initialization order bug - init_state should be " |
151 | | - f"called before any user messages are added to the conversation." |
| 181 | + raise AssertionError( |
| 182 | + "Unexpected state: user message exists before SystemPromptEvent. " |
| 183 | + f"conversation_id={state.id}, event_count={event_count}, " |
| 184 | + f"prefix_event_types={event_types}." |
152 | 185 | ) |
153 | 186 |
|
154 | 187 | # Prepare system message |
|
0 commit comments