From 8a879be576a30ace73a2e7c93a0e0460892e7eab Mon Sep 17 00:00:00 2001 From: Avi Cohen Date: Thu, 22 Jan 2026 15:13:12 +0200 Subject: [PATCH] starknet_os_runner: add is_alive/is_ready endpoints Add liveness/readiness endpoints with structured status checks: - GET /gateway/is_alive checks bootloader file availability - GET /gateway/is_ready checks RPC availability and chain id match --- crates/starknet_os_runner/src/runner.rs | 4 + .../src/server/http_server.rs | 99 ++++++++++++++++++- .../src/virtual_snos_prover.rs | 8 ++ 3 files changed, 109 insertions(+), 2 deletions(-) diff --git a/crates/starknet_os_runner/src/runner.rs b/crates/starknet_os_runner/src/runner.rs index a3c15b40cff..59247c3484f 100644 --- a/crates/starknet_os_runner/src/runner.rs +++ b/crates/starknet_os_runner/src/runner.rs @@ -348,4 +348,8 @@ impl RpcRunnerFactory { block_id, ) } + + pub(crate) fn node_url(&self) -> &Url { + &self.node_url + } } diff --git a/crates/starknet_os_runner/src/server/http_server.rs b/crates/starknet_os_runner/src/server/http_server.rs index b939d247c60..54919e22765 100644 --- a/crates/starknet_os_runner/src/server/http_server.rs +++ b/crates/starknet_os_runner/src/server/http_server.rs @@ -9,15 +9,18 @@ use std::net::SocketAddr; use axum::extract::State; use axum::http::StatusCode; use axum::response::{IntoResponse, Response}; -use axum::routing::post; +use axum::routing::{get, post}; use axum::{Json, Router}; use blockifier_reexecution::state_reader::rpc_objects::BlockId; use serde::{Deserialize, Serialize}; use starknet_api::rpc_transaction::RpcTransaction; use starknet_api::transaction::fields::{Proof, ProofFacts}; use starknet_api::transaction::MessageToL1; +use starknet_rust::providers::jsonrpc::HttpTransport; +use starknet_rust::providers::{JsonRpcClient, Provider}; use tracing::{info, instrument}; +use crate::proving::prover::{resolve_resource_path, BOOTLOADER_FILE}; use crate::server::config::ServiceConfig; use crate::virtual_snos_prover::{VirtualSnosProver, VirtualSnosProverError}; @@ -97,6 +100,19 @@ pub struct AppState { pub(crate) prover: VirtualSnosProver, } +#[derive(Debug, Serialize)] +struct CheckStatus { + name: &'static str, + ok: bool, + message: Option, +} + +#[derive(Debug, Serialize)] +struct ServiceStatusResponse { + status: &'static str, + checks: Vec, +} + /// Handler for the prove_transaction endpoint. #[instrument(skip(app_state), fields(block_id))] async fn prove_transaction( @@ -116,9 +132,88 @@ async fn prove_transaction( Ok(Json(response)) } +/// Handler for the is_alive (liveness) endpoint. +/// +/// Returns 200 OK if local resources required for proving are available. +/// This avoids marking the service healthy when required files are missing. +async fn is_alive() -> impl IntoResponse { + let checks = vec![check_bootloader_file()]; + build_status_response(checks, StatusCode::INTERNAL_SERVER_ERROR) +} + +/// Handler for the is_ready (readiness) endpoint. +/// +/// Returns 200 OK if the server is ready to accept requests. +/// This checks external dependencies needed for serving requests. +async fn is_ready(State(app_state): State) -> impl IntoResponse { + let mut checks = vec![check_bootloader_file()]; + checks.extend(check_rpc_checks(&app_state.prover).await); + build_status_response(checks, StatusCode::SERVICE_UNAVAILABLE) +} + +fn check_bootloader_file() -> CheckStatus { + match resolve_resource_path(BOOTLOADER_FILE) { + Ok(_) => CheckStatus { name: "bootloader_file", ok: true, message: None }, + Err(err) => CheckStatus { + name: "bootloader_file", + ok: false, + message: Some(format!("Bootloader file check failed: {err}")), + }, + } +} + +async fn check_rpc_checks(prover: &VirtualSnosProver) -> Vec { + let client = JsonRpcClient::new(HttpTransport::new(prover.rpc_url().clone())); + match client.chain_id().await { + Ok(chain_id) => { + let availability_check = CheckStatus { name: "rpc_available", ok: true, message: None }; + let expected = prover.chain_id().as_hex().to_lowercase(); + let actual = chain_id.to_hex_string().to_lowercase(); + let chain_id_check = if actual == expected { + CheckStatus { name: "rpc_chain_id", ok: true, message: None } + } else { + CheckStatus { + name: "rpc_chain_id", + ok: false, + message: Some(format!( + "RPC chain id {actual} does not match expected {expected}" + )), + } + }; + vec![availability_check, chain_id_check] + } + Err(err) => vec![ + CheckStatus { + name: "rpc_available", + ok: false, + message: Some(format!("RPC unavailable: {err}")), + }, + CheckStatus { + name: "rpc_chain_id", + ok: false, + message: Some(format!("RPC chain id check skipped: RPC unavailable: {err}")), + }, + ], + } +} + +fn build_status_response( + checks: Vec, + failure_status: StatusCode, +) -> (StatusCode, Json) { + let all_ok = checks.iter().all(|check| check.ok); + let status_code = if all_ok { StatusCode::OK } else { failure_status }; + let status = if all_ok { "ok" } else { "error" }; + (status_code, Json(ServiceStatusResponse { status, checks })) +} + /// Creates the router with all endpoints. pub fn create_router(app_state: AppState) -> Router { - Router::new().route("/prove_transaction", post(prove_transaction)).with_state(app_state) + Router::new() + .route("/prove_transaction", post(prove_transaction)) + .route("/gateway/is_alive", get(is_alive)) + .route("/gateway/is_ready", get(is_ready)) + .with_state(app_state) } /// The HTTP proving server. diff --git a/crates/starknet_os_runner/src/virtual_snos_prover.rs b/crates/starknet_os_runner/src/virtual_snos_prover.rs index afa1b96db69..cc8034daf15 100644 --- a/crates/starknet_os_runner/src/virtual_snos_prover.rs +++ b/crates/starknet_os_runner/src/virtual_snos_prover.rs @@ -146,6 +146,14 @@ impl VirtualSnosProver { total_duration, }) } + + pub(crate) fn rpc_url(&self) -> &Url { + self.runner_factory.node_url() + } + + pub(crate) fn chain_id(&self) -> &ChainId { + &self.chain_id + } } /// Validates that the transaction is an Invoke transaction and extracts it.