OpenHands · xingyaoww · Jan 26, 2026 · Jan 26, 2026 · Jan 26, 2026 · Jan 28, 2026
diff --git a/openhands-sdk/openhands/sdk/conversation/impl/remote_conversation.py b/openhands-sdk/openhands/sdk/conversation/impl/remote_conversation.py
@@ -6,6 +6,7 @@
 import time
 import uuid
 from collections.abc import Mapping
+from queue import Empty, Queue
 from typing import SupportsIndex, overload
 from urllib.parse import urlparse
 
@@ -555,6 +556,7 @@ class RemoteConversation(BaseConversation):
     _client: httpx.Client
     _hook_processor: HookEventProcessor | None
     _cleanup_initiated: bool
+    _terminal_status_queue: Queue[str]  # Thread-safe queue for terminal status from WS
     delete_on_close: bool = False
 
     def __init__(
@@ -609,6 +611,7 @@ def __init__(
         self._client = workspace.client
         self._hook_processor = None
         self._cleanup_initiated = False
+        self._terminal_status_queue: Queue[str] = Queue()
 
         should_create = conversation_id is None
         if conversation_id is not None:
@@ -708,8 +711,21 @@ def __init__(
             # No visualization (visualizer is None)
             self._visualizer = None
 
+        # Add a callback that signals when run completes via WebSocket
+        # This ensures we wait for all events to be delivered before run() returns
+        def run_complete_callback(event: Event) -> None:
+            if isinstance(event, ConversationStateUpdateEvent):
+                if event.key == "execution_status":
+                    try:
+                        status = ConversationExecutionStatus(event.value)
+                        if status.is_terminal():
+                            self._terminal_status_queue.put(event.value)
+                    except ValueError:
+                        pass  # Unknown status value, ignore
+
         # Compose all callbacks into a single callback
-        composed_callback = BaseConversation.compose_callbacks(self._callbacks)
+        all_callbacks = self._callbacks + [run_complete_callback]
+        composed_callback = BaseConversation.compose_callbacks(all_callbacks)
 
         # Initialize WebSocket client for callbacks
         self._ws_client = WebSocketCallbackClient(
@@ -862,6 +878,14 @@ def run(
         Raises:
             ConversationRunError: If the run fails or times out.
         """
+        # Drain any stale terminal status events from previous runs.
+        # This prevents stale events from causing early returns.
+        while True:
+            try:
+                self._terminal_status_queue.get_nowait()
+            except Empty:
+                break
+
         # Trigger a run on the server using the dedicated run endpoint.
         # Let the server tell us if it's already running (409), avoiding an extra GET.
         try:
@@ -889,10 +913,20 @@ def _wait_for_run_completion(
         poll_interval: float = 1.0,
         timeout: float = 1800.0,
     ) -> None:
-        """Poll the server until the conversation is no longer running.
+        """Wait for the conversation run to complete.
+
+        This method waits for the run to complete by listening for the terminal
+        status event via WebSocket. This ensures all events are delivered before
+        returning, avoiding the race condition where polling sees "finished"
+        status before WebSocket delivers the final events.
+
+        As a fallback, it also polls the server periodically. If the WebSocket
+        is delayed or disconnected, we return after multiple consecutive polls
+        show a terminal status, and reconcile events to catch any that were
+        missed via WebSocket.
 
         Args:
-            poll_interval: Time in seconds between status polls.
+            poll_interval: Time in seconds between status polls (fallback).
             timeout: Maximum time in seconds to wait.
 
         Raises:
@@ -901,6 +935,14 @@ def _wait_for_run_completion(
                 responses are retried until timeout.
         """
         start_time = time.monotonic()
+        consecutive_terminal_polls = 0
+        # Return after this many consecutive terminal polls (fallback for WS issues).
+        # We use 3 polls to balance latency vs reliability:
+        # - 1 poll could be a transient state during shutdown
+        # - 2 polls might still catch a race condition
+        # - 3 polls (with default 1s interval = 3s total) provides high confidence
+        #   that the run is truly complete while keeping fallback latency reasonable
+        TERMINAL_POLL_THRESHOLD = 3
 
         while True:
             elapsed = time.monotonic() - start_time
@@ -913,20 +955,57 @@ def _wait_for_run_completion(
                     ),
                 )
 
+            # Wait for either:
+            # 1. WebSocket delivers terminal status event (preferred)
+            # 2. Poll interval expires (fallback - check status via REST)
+            try:
+                ws_status = self._terminal_status_queue.get(timeout=poll_interval)
+                # Handle ERROR/STUCK states - raises ConversationRunError
+                self._handle_conversation_status(ws_status)
+
+                logger.info(
+                    "Run completed via WebSocket notification "
+                    "(status: %s, elapsed: %.1fs)",
+                    ws_status,
+                    elapsed,
+                )
+                return
+            except Empty:
+                pass  # Queue.get() timed out, fall through to REST polling
+
+            # Poll the server for status as a health check and fallback.
+            # This catches ERROR/STUCK states that need immediate attention,
+            # and provides a fallback if WebSocket is delayed/disconnected.
             try:
                 status = self._poll_status_once()
             except Exception as exc:
                 self._handle_poll_exception(exc)
+                consecutive_terminal_polls = 0  # Reset on error
             else:
-                if self._handle_conversation_status(status):
-                    logger.info(
-                        "Run completed with status: %s (elapsed: %.1fs)",
-                        status,
-                        elapsed,
-                    )
-                    return
-
-            time.sleep(poll_interval)
+                # Raises ConversationRunError for ERROR/STUCK states
+                self._handle_conversation_status(status)
+
+                # Track consecutive terminal polls as a fallback for WS issues.
+                # If WebSocket is delayed/disconnected, we return after multiple
+                # consecutive polls confirm the terminal status.
+                if status and ConversationExecutionStatus(status).is_terminal():
+                    consecutive_terminal_polls += 1
+                    if consecutive_terminal_polls >= TERMINAL_POLL_THRESHOLD:
+                        logger.info(
+                            "Run completed via REST fallback after %d consecutive "
+                            "terminal polls (status: %s, elapsed: %.1fs). "
+                            "Reconciling events...",
+                            consecutive_terminal_polls,
+                            status,
+                            elapsed,
+                        )
+                        # Reconcile events to catch any that were missed via WS.
+                        # This is only called in the fallback path, so it doesn't
+                        # add overhead in the common case where WS works.
+                        self._state.events.reconcile()
+                        return
+                else:
+                    consecutive_terminal_polls = 0
 
     def _poll_status_once(self) -> str | None:
         """Fetch the current execution status from the remote conversation."""

diff --git a/openhands-sdk/openhands/sdk/conversation/state.py b/openhands-sdk/openhands/sdk/conversation/state.py
@@ -45,6 +45,25 @@ class ConversationExecutionStatus(str, Enum):
     STUCK = "stuck"  # Conversation is stuck in a loop or unable to proceed
     DELETING = "deleting"  # Conversation is in the process of being deleted
 
+    def is_terminal(self) -> bool:
+        """Check if this status represents a terminal state.
+
+        Terminal states indicate the run has completed and the agent is no longer
+        actively processing. These are: FINISHED, ERROR, STUCK.
+
+        Note: IDLE is NOT a terminal state - it's the initial state of a conversation
+        before any run has started. Including IDLE would cause false positives when
+        the WebSocket delivers the initial state update during connection.
+
+        Returns:
+            True if this is a terminal status, False otherwise.
+        """
+        return self in (
+            ConversationExecutionStatus.FINISHED,
+            ConversationExecutionStatus.ERROR,
+            ConversationExecutionStatus.STUCK,
+        )
+
 
 class ConversationState(OpenHandsModel):
     # ===== Public, validated fields =====