From a6a95cac25384ddd4edf35eea18b2fb02a935e32 Mon Sep 17 00:00:00 2001 From: SantiagoPittella Date: Tue, 16 Jun 2026 18:38:17 -0300 Subject: [PATCH] fix(block-producer): add request timeout to validator sign_block call --- bin/node/src/commands/modes.rs | 20 ++++++++++++++++++-- crates/block-producer/src/lib.rs | 6 ++++++ crates/block-producer/src/server/mod.rs | 5 ++++- crates/block-producer/src/server/tests.rs | 2 ++ crates/block-producer/src/validator/mod.rs | 10 ++++++++-- 5 files changed, 38 insertions(+), 5 deletions(-) diff --git a/bin/node/src/commands/modes.rs b/bin/node/src/commands/modes.rs index 12f2ffc81..0381afde3 100644 --- a/bin/node/src/commands/modes.rs +++ b/bin/node/src/commands/modes.rs @@ -1,10 +1,12 @@ use std::sync::Arc; +use std::time::Duration; use anyhow::Context; -use miden_node_block_producer::Sequencer; +use miden_node_block_producer::{DEFAULT_VALIDATOR_TIMEOUT, Sequencer}; use miden_node_proto::clients::{Builder, NtxBuilderClient, RpcClient, ValidatorClient}; use miden_node_rpc::{Rpc, RpcMode}; use miden_node_store::State; +use miden_node_utils::clap::duration_to_human_readable_string; use miden_node_utils::tasks::Tasks; use tokio::net::TcpListener; use url::Url; @@ -43,6 +45,7 @@ impl SequencerCommand { let sequencer = Sequencer { store: Arc::clone(&state), validator_url: self.external_services.validator_url.clone(), + validator_timeout: self.external_services.validator_timeout, batch_prover_url: self.block_producer.batch.prover_url, block_prover_url: self.block_producer.block_prover.url, batch_interval: self.block_producer.batch.interval, @@ -79,6 +82,19 @@ pub struct SequencerExternalServiceOptions { #[arg(long = "validator.url", env = "MIDEN_NODE_VALIDATOR_URL", value_name = "URL")] pub validator_url: Url, + /// Request timeout for calls to the validator service. + /// + /// Bounds the sequencer's `sign_block` call so a dropped validator connection fails fast and + /// retries, rather than stalling block production until the OS-level TCP timeout. + #[arg( + long = "validator.timeout", + env = "MIDEN_NODE_VALIDATOR_TIMEOUT", + default_value = duration_to_human_readable_string(DEFAULT_VALIDATOR_TIMEOUT), + value_parser = humantime::parse_duration, + value_name = "DURATION" + )] + pub validator_timeout: Duration, + /// The network transaction builder service gRPC URL. #[arg(long = "ntx-builder.url", env = "MIDEN_NODE_NTX_BUILDER_URL", value_name = "URL")] pub ntx_builder_url: Url, @@ -88,7 +104,7 @@ impl SequencerExternalServiceOptions { fn validator_client(&self) -> anyhow::Result { Ok(Builder::new(self.validator_url.clone()) .with_tls()? - .without_timeout() + .with_timeout(self.validator_timeout) .without_metadata_version() .without_metadata_genesis() .with_otel_context_injection() diff --git a/crates/block-producer/src/lib.rs b/crates/block-producer/src/lib.rs index e35100236..6559f5f57 100644 --- a/crates/block-producer/src/lib.rs +++ b/crates/block-producer/src/lib.rs @@ -69,6 +69,12 @@ pub const DEFAULT_BLOCK_INTERVAL: Duration = Duration::from_secs(3); /// How often a batch is created. pub const DEFAULT_BATCH_INTERVAL: Duration = Duration::from_secs(1); +/// The request timeout for the sequencer's `sign_block` call to the validator. +/// +/// This bounds the wait to a fast, retryable error while leaving ample headroom above normal +/// `sign_block` latency. +pub const DEFAULT_VALIDATOR_TIMEOUT: Duration = Duration::from_secs(30); + /// The default transaction capacity of the mempool. /// /// The value is selected such that all transactions should approximately be processed within one diff --git a/crates/block-producer/src/server/mod.rs b/crates/block-producer/src/server/mod.rs index fccfda103..39c827d25 100644 --- a/crates/block-producer/src/server/mod.rs +++ b/crates/block-producer/src/server/mod.rs @@ -74,6 +74,8 @@ pub struct Sequencer { pub store: Arc, /// The address of the validator component. pub validator_url: Url, + /// The request timeout for calls to the validator component. + pub validator_timeout: Duration, /// The address of the batch prover component. pub batch_prover_url: Option, /// The address of the block prover component. @@ -101,7 +103,8 @@ impl Sequencer { pub async fn spawn(self) -> Result { info!(target: COMPONENT, "Initializing sequencer"); let store = self.store; - let validator = BlockProducerValidatorClient::new(self.validator_url.clone())?; + let validator = + BlockProducerValidatorClient::new(self.validator_url.clone(), self.validator_timeout)?; let chain_tip = store.chain_tip(Finality::Committed).await; info!(target: COMPONENT, "Sequencer initialized"); diff --git a/crates/block-producer/src/server/tests.rs b/crates/block-producer/src/server/tests.rs index da9a50db1..b3fd30928 100644 --- a/crates/block-producer/src/server/tests.rs +++ b/crates/block-producer/src/server/tests.rs @@ -14,6 +14,7 @@ use crate::{ DEFAULT_MAX_BATCHES_PER_BLOCK, DEFAULT_MAX_CONCURRENT_PROOFS, DEFAULT_MAX_TXS_PER_BATCH, + DEFAULT_VALIDATOR_TIMEOUT, Sequencer, }; @@ -26,6 +27,7 @@ async fn block_producer_starts_with_store_state() { let block_producer = Sequencer { store, validator_url: Url::parse("http://127.0.0.1:0").unwrap(), + validator_timeout: DEFAULT_VALIDATOR_TIMEOUT, batch_prover_url: None, block_prover_url: None, batch_interval: Duration::from_secs(3600), diff --git a/crates/block-producer/src/validator/mod.rs b/crates/block-producer/src/validator/mod.rs index 030ae99a4..8c5814207 100644 --- a/crates/block-producer/src/validator/mod.rs +++ b/crates/block-producer/src/validator/mod.rs @@ -1,3 +1,5 @@ +use std::time::Duration; + use miden_node_proto::clients::{Builder, ValidatorClient}; use miden_node_proto::errors::ConversionError; use miden_node_proto::generated as proto; @@ -35,12 +37,16 @@ pub struct BlockProducerValidatorClient { impl BlockProducerValidatorClient { /// Creates a new validator client with a lazy connection. - pub fn new(validator_url: Url) -> anyhow::Result { + /// + /// `timeout` bounds each request (notably `sign_block`) so that a silently dropped validator + /// connection surfaces as a fast, retryable error instead of hanging on the OS-level TCP + /// timeout and halting block production. + pub fn new(validator_url: Url, timeout: Duration) -> anyhow::Result { info!(target: COMPONENT, validator_endpoint = %validator_url, "Initializing validator client"); let validator = Builder::new(validator_url) .with_tls()? - .without_timeout() + .with_timeout(timeout) .without_metadata_version() .without_metadata_genesis() .with_otel_context_injection()