diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 8ad3a688..89165678 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -49,3 +49,11 @@ jobs: - name: Run fixture-based tests uses: ./.github/actions/run-fixture-tests + + # Reuses the release build from the test step; validates the benchmark + # harness end-to-end and its JSON output contract in a few seconds. + - name: Benchmark smoke (mock crypto) + run: | + cargo run --release --bin ethlambda -- benchmark synthetic --mock-crypto \ + --num-validators 4 --warmup-slots 4 --iterations 3 --format json \ + | jq -e '.schema_version == 1 and (.samples | length == 3)' diff --git a/Cargo.lock b/Cargo.lock index c57423fa..6ec93afe 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2012,6 +2012,7 @@ dependencies = [ "clap", "ethlambda-blockchain", "ethlambda-crypto", + "ethlambda-metrics", "ethlambda-network-api", "ethlambda-p2p", "ethlambda-rpc", @@ -2024,6 +2025,7 @@ dependencies = [ "libssz-types", "reqwest", "serde", + "serde_json", "serde_yaml_ng", "thiserror 2.0.18", "tikv-jemallocator", diff --git a/Makefile b/Makefile index d28dc505..6d3399bf 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,4 @@ -.PHONY: help fmt lint docker-build shadow-build shadow-docker-build run-devnet test docs docs-deps docs-serve +.PHONY: help fmt lint bench docker-build shadow-build shadow-docker-build run-devnet test docs docs-deps docs-serve help: ## πŸ“š Show help for each of the Makefile recipes @grep -E '^[a-zA-Z0-9_-]+:.*?## .*$$' $(MAKEFILE_LIST) | sort | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-30s\033[0m %s\n", $$1, $$2}' @@ -13,6 +13,11 @@ test: leanSpec/fixtures ## πŸ§ͺ Run all tests # Tests need to be run on release to avoid stack overflows during signature verification/aggregation cargo test --workspace --release +BENCH_ARGS ?= synthetic --mock-crypto + +bench: ## 🏁 Benchmark block building offline (override BENCH_ARGS to customize) + cargo run --release --bin ethlambda -- benchmark $(BENCH_ARGS) + GIT_COMMIT=$(shell git rev-parse HEAD) GIT_BRANCH=$(shell git rev-parse --abbrev-ref HEAD) DOCKER_TAG?=local diff --git a/bin/ethlambda/Cargo.toml b/bin/ethlambda/Cargo.toml index 3b9e5582..0711920f 100644 --- a/bin/ethlambda/Cargo.toml +++ b/bin/ethlambda/Cargo.toml @@ -21,6 +21,7 @@ shadow-integration = ["ethlambda-crypto/shadow-integration"] [dependencies] ethlambda-blockchain.workspace = true ethlambda-crypto.workspace = true +ethlambda-metrics.workspace = true ethlambda-network-api.workspace = true ethlambda-p2p.workspace = true ethlambda-types.workspace = true @@ -37,6 +38,7 @@ tracing.workspace = true tracing-subscriber = "0.3" serde.workspace = true +serde_json.workspace = true serde_yaml_ng.workspace = true hex.workspace = true diff --git a/bin/ethlambda/build.rs b/bin/ethlambda/build.rs index ad4184ed..bb2a9e87 100644 --- a/bin/ethlambda/build.rs +++ b/bin/ethlambda/build.rs @@ -1,3 +1,5 @@ +use std::path::PathBuf; + use vergen_git2::{Emitter, Git2Builder, RustcBuilder}; fn main() -> Result<(), Box> { @@ -12,5 +14,45 @@ fn main() -> Result<(), Box> { .add_instructions(&git2)? .emit()?; + emit_leansig_rev(); + Ok(()) } + +/// Embed the resolved leansig git revision from the workspace Cargo.lock. +/// +/// leansig is pinned to a moving branch, so a `cargo update` changes the +/// measured crypto with zero ethlambda diff; benchmark reports embed this +/// revision to keep results interpretable across lock bumps. +fn emit_leansig_rev() { + let rev = leansig_rev_from_lockfile().unwrap_or_else(|| "unknown".to_string()); + println!("cargo:rustc-env=ETHLAMBDA_LEANSIG_REV={rev}"); + if let Some(lockfile) = workspace_lockfile() { + println!("cargo:rerun-if-changed={}", lockfile.display()); + } +} + +fn workspace_lockfile() -> Option { + let manifest_dir = std::env::var("CARGO_MANIFEST_DIR").ok()?; + Some(PathBuf::from(manifest_dir).join("../../Cargo.lock")) +} + +fn leansig_rev_from_lockfile() -> Option { + let lockfile = std::fs::read_to_string(workspace_lockfile()?).ok()?; + let mut in_leansig_package = false; + for line in lockfile.lines() { + let line = line.trim(); + if line == "[[package]]" { + in_leansig_package = false; + } else if line == "name = \"leansig\"" { + in_leansig_package = true; + } else if in_leansig_package { + // source = "git+https://github.com/leanEthereum/leanSig?branch=devnet4#" + if let Some(source) = line.strip_prefix("source = ") { + let rev = source.trim_matches('"').rsplit('#').next()?; + return Some(rev.to_string()); + } + } + } + None +} diff --git a/bin/ethlambda/src/benchmark/corpus.rs b/bin/ethlambda/src/benchmark/corpus.rs new file mode 100644 index 00000000..330c7cc6 --- /dev/null +++ b/bin/ethlambda/src/benchmark/corpus.rs @@ -0,0 +1,145 @@ +//! Synthetic benchmark corpus: deterministic validators, a genesis store, and +//! per-slot attestation-pool seeding. + +use std::sync::Arc; + +use ethlambda_blockchain::store::produce_attestation_data; +use ethlambda_storage::{Store, backend::InMemoryBackend}; +use ethlambda_types::{ + attestation::{AggregationBits, HashedAttestationData}, + block::SingleMessageAggregate, + state::{State, Validator, ValidatorPubkeyBytes}, +}; + +/// Fixed genesis time for synthetic runs. The harness derives every tick +/// timestamp from slot numbers relative to this value and never reads the wall +/// clock, so runs are reproducible at any time of day. +const GENESIS_TIME: u64 = 1_700_000_000; + +pub(crate) struct SyntheticCorpus { + num_validators: u64, + proofs_per_data: u64, +} + +impl SyntheticCorpus { + pub(crate) fn new(num_validators: u64, proofs_per_data: u64) -> Self { + Self { + num_validators, + proofs_per_data, + } + } + + /// Build a genesis store over an in-memory backend with `num_validators` + /// seed-derived validators. + /// + /// Pubkeys are deterministic placeholder bytes: in mock-crypto mode no code + /// path decodes them (signature verification is skipped and best-proof + /// compaction never resolves pubkeys). + pub(crate) fn genesis_store(&self, seed: u64) -> Store { + let mut rng_state = seed; + let validators = (0..self.num_validators) + .map(|index| Validator { + attestation_pubkey: synthetic_pubkey(&mut rng_state), + proposal_pubkey: synthetic_pubkey(&mut rng_state), + index, + }) + .collect(); + let genesis_state = State::from_genesis(GENESIS_TIME, validators); + Store::from_anchor_state(Arc::new(InMemoryBackend::new()), genesis_state) + } + + /// Seed the pending ("new") pool with the full validator set's attestations + /// for `attestation_slot`, split into `proofs_per_data` disjoint aggregates. + /// + /// Mirrors what committee aggregators gossip during a slot: several + /// aggregates for the same `AttestationData`, each covering a validator + /// subset. The proposal tick then promotes them to the known pool, exactly + /// as on a live node. Entries are inserted in a fixed order because pool + /// insertion order pins within-entry proof choice during selection. + pub(crate) fn seed_pool(&self, store: &mut Store, attestation_slot: u64) { + let data = produce_attestation_data(store, attestation_slot); + let entries = participant_groups(self.num_validators, self.proofs_per_data) + .into_iter() + .map(|participants| { + ( + HashedAttestationData::new(data.clone()), + SingleMessageAggregate::empty(participants), + ) + }) + .collect(); + store.insert_new_aggregated_payloads_batch(entries); + } +} + +/// Partition validators 0..num_validators into `groups` disjoint bitfields, +/// assigning validator `i` to group `i % groups`. Every group is non-empty +/// (groups is capped at the validator count) and the union covers every +/// validator exactly once. +fn participant_groups(num_validators: u64, groups: u64) -> Vec { + let groups = groups.clamp(1, num_validators); + (0..groups) + .map(|group| { + let mut bits = AggregationBits::with_length(num_validators as usize) + .expect("validator count is within the bitlist limit"); + for index in (group..num_validators).step_by(groups as usize) { + bits.set(index as usize, true) + .expect("index is within the bitlist length"); + } + bits + }) + .collect() +} + +/// splitmix64: tiny deterministic generator for placeholder pubkey bytes, +/// avoiding a rand dependency. +fn splitmix64(state: &mut u64) -> u64 { + *state = state.wrapping_add(0x9e37_79b9_7f4a_7c15); + let mut z = *state; + z = (z ^ (z >> 30)).wrapping_mul(0xbf58_476d_1ce4_e5b9); + z = (z ^ (z >> 27)).wrapping_mul(0x94d0_49bb_1331_11eb); + z ^ (z >> 31) +} + +fn synthetic_pubkey(rng_state: &mut u64) -> ValidatorPubkeyBytes { + let mut bytes = [0u8; 52]; + for chunk in bytes.chunks_mut(8) { + let word = splitmix64(rng_state).to_le_bytes(); + chunk.copy_from_slice(&word[..chunk.len()]); + } + bytes +} + +#[cfg(test)] +mod tests { + use super::*; + use ethlambda_types::attestation::validator_indices; + + #[test] + fn participant_groups_partition_all_validators() { + for (validators, groups) in [(8u64, 2u64), (8, 3), (5, 8), (1, 1), (4096, 4)] { + let partition = participant_groups(validators, groups); + assert_eq!(partition.len() as u64, groups.min(validators)); + let mut seen = vec![0u32; validators as usize]; + for bits in &partition { + let indices: Vec = validator_indices(bits).collect(); + assert!(!indices.is_empty(), "every group must be non-empty"); + for index in indices { + seen[index as usize] += 1; + } + } + assert!( + seen.iter().all(|&count| count == 1), + "every validator must appear in exactly one group: {seen:?}" + ); + } + } + + #[test] + fn synthetic_pubkeys_are_deterministic() { + let mut a = 42u64; + let mut b = 42u64; + assert_eq!(synthetic_pubkey(&mut a), synthetic_pubkey(&mut b)); + let mut c = 43u64; + assert_ne!(synthetic_pubkey(&mut a), synthetic_pubkey(&mut c)); + } +} diff --git a/bin/ethlambda/src/benchmark/mod.rs b/bin/ethlambda/src/benchmark/mod.rs new file mode 100644 index 00000000..2e079dc4 --- /dev/null +++ b/bin/ethlambda/src/benchmark/mod.rs @@ -0,0 +1,285 @@ +//! Offline block-building benchmark (`ethlambda benchmark`). +//! +//! Drives the exact production proposer path β€” `produce_block_with_signatures`, +//! the same entry `BlockChainServer::propose_block` uses β€” against a synthetic +//! in-memory chain, and reports per-phase timing distributions. Gossip publish +//! and the slot-alignment sleep are outside the measured span, matching the +//! node's own `lean_block_building_time_seconds` boundary. +//! +//! See docs/plans/block-building-benchmark.md for the design and roadmap +//! (real-crypto pools and replay-from-datadir land in later milestones). + +mod corpus; +mod report; + +use std::collections::{BTreeMap, HashMap}; +use std::path::PathBuf; +use std::time::Instant; + +use ethlambda_blockchain::block_builder::ProposerConfig; +use ethlambda_blockchain::metrics::BLOCK_PROPOSAL_ATTESTATION_BUILD_PHASES; +use ethlambda_blockchain::store::{on_block_without_verification, produce_block_with_signatures}; +use ethlambda_storage::NEW_PAYLOAD_CAP; +use ethlambda_types::block::{MultiMessageAggregate, SignedBlock}; +use ethlambda_types::primitives::HashTreeRoot as _; +use eyre::WrapErr as _; + +use report::{Environment, Params, Report, Sample}; + +#[derive(Debug, clap::Args)] +pub(crate) struct BenchmarkOptions { + #[command(subcommand)] + workload: Workload, +} + +#[derive(Debug, clap::Subcommand)] +enum Workload { + /// Benchmark block building on a synthetic in-memory chain. + Synthetic(SyntheticOptions), +} + +#[derive(Debug, clap::Args)] +struct SyntheticOptions { + /// Number of validators in the synthetic genesis. + #[arg(long, default_value = "8", value_parser = clap::value_parser!(u64).range(1..=4096))] + num_validators: u64, + /// Unmeasured chain-advancement slots before measuring. Builds and imports + /// one block per slot so the measured builds run on a state with + /// representative historical roots and justifications, and warms the state + /// cache. + #[arg(long, default_value = "8")] + warmup_slots: u64, + /// Aggregate proofs seeded per AttestationData, mimicking committee + /// aggregators covering disjoint validator subsets. The default of 1 (one + /// full-coverage proof per data) keeps justification/finalization + /// advancing every slot. Higher values exercise multi-proof selection and + /// same-data collapse, but without --enable-proposer-aggregation the block + /// then carries only the best partial proof (< 2/3 coverage), so + /// justification stalls β€” the real coverage cost of disabling proposer + /// aggregation. + #[arg(long, default_value = "1", value_parser = clap::value_parser!(u64).range(1..))] + proofs_per_data: u64, + /// Deterministic seed for the synthetic validator set. Two runs with the + /// same seed and parameters produce identical per-iteration block roots. + #[arg(long, default_value = "42")] + seed: u64, + #[command(flatten)] + common: CommonOptions, +} + +#[derive(Debug, clap::Args)] +struct CommonOptions { + /// Measured iterations (one built block each), after warmup. + #[arg(long, default_value = "10", value_parser = clap::value_parser!(u64).range(1..))] + iterations: u64, + /// Seed pools with empty placeholder proofs instead of real XMSS/leanVM + /// crypto. Measures selection + best-proof compaction + state transition + /// only; runs in seconds. Conflicts with --enable-proposer-aggregation, + /// whose recursive aggregation needs real proof bytes. + #[arg(long, conflicts_with = "enable_proposer_aggregation")] + mock_crypto: bool, + /// Mirrors the node flag: collapse same-data proofs via recursive leanVM + /// aggregation instead of keeping the single best-coverage proof. + #[arg(long)] + enable_proposer_aggregation: bool, + /// Mirrors the node flag: distinct AttestationData cap per built block. + #[arg(long, default_value = "3")] + max_attestations_per_block: usize, + /// Report format printed to stdout. Logs go to stderr, so JSON output can + /// be piped directly (e.g. into jq). + #[arg(long, value_enum, default_value_t = OutputFormat::Human)] + format: OutputFormat, + /// Also write the JSON report to this file. + #[arg(long)] + output: Option, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, clap::ValueEnum)] +enum OutputFormat { + Human, + Json, +} + +pub(crate) fn run(options: BenchmarkOptions) -> eyre::Result<()> { + let Workload::Synthetic(synthetic) = options.workload; + run_synthetic(synthetic) +} + +fn run_synthetic(options: SyntheticOptions) -> eyre::Result<()> { + let common = &options.common; + eyre::ensure!( + common.mock_crypto, + "real-crypto benchmarking is not implemented yet; rerun with --mock-crypto" + ); + // The pending pool evicts whole data-root entries FIFO once its proof cap + // is exceeded, so a single slot's batch larger than the cap would silently + // seed nothing and every measured block would be empty. + eyre::ensure!( + options.proofs_per_data as usize <= NEW_PAYLOAD_CAP, + "--proofs-per-data {} exceeds the pending-pool capacity ({NEW_PAYLOAD_CAP}); \ + one slot's batch would be evicted whole and every measured block would be empty", + options.proofs_per_data + ); + + let proposer_config = ProposerConfig { + enable_proposer_aggregation: common.enable_proposer_aggregation, + max_attestations_per_block: common.max_attestations_per_block, + }; + let corpus = corpus::SyntheticCorpus::new(options.num_validators, options.proofs_per_data); + let mut store = corpus.genesis_store(options.seed); + + let total_slots = options + .warmup_slots + .checked_add(common.iterations) + .ok_or_else(|| eyre::eyre!("--warmup-slots plus --iterations overflows u64"))?; + let mut samples = Vec::with_capacity(common.iterations as usize); + for slot in 1..=total_slots { + // Seed the pending pool with the previous slot's attestations, exactly + // where gossip aggregates would sit before the proposal tick promotes + // them to the known pool. Entries from earlier slots stay in the known + // pool, as they would on a live node. + corpus.seed_pool(&mut store, slot - 1); + eyre::ensure!( + store.new_aggregated_payloads_count() > 0, + "seeded attestations were evicted from the pending pool at slot {slot}; \ + the measured workload would not match the requested parameters" + ); + let pool_entries = + store.new_aggregated_payloads_count() + store.known_aggregated_payloads_count(); + + // Round-robin proposer, matching `is_proposer`. + let proposer = slot % options.num_validators; + + let before = phase_snapshot(); + let build_start = Instant::now(); + let (block, aggregates, _checkpoints) = + produce_block_with_signatures(&mut store, slot, proposer, proposer_config) + .wrap_err_with(|| format!("block build failed at slot {slot}"))?; + let wall_seconds = build_start.elapsed().as_secs_f64(); + let phases = phase_deltas(&before, &phase_snapshot())?; + + let block_root = block.hash_tree_root(); + let attestations_packed = block.body.attestations.len(); + let aggregates_count = aggregates.len(); + + // Import the built block (outside the measured span) so the next + // iteration builds one slot ahead of head, like a live proposer; + // building repeatedly on a fixed head would make `process_slots` cost + // grow with the iteration index. + let signed_block = SignedBlock { + message: block, + proof: MultiMessageAggregate::default(), + }; + on_block_without_verification(&mut store, signed_block) + .wrap_err_with(|| format!("importing the built block failed at slot {slot}"))?; + + let measured = slot > options.warmup_slots; + let label = if measured { "measured" } else { "warmup" }; + eprintln!( + "[{slot}/{total_slots}] {label}: built block in {:.3}ms \ + (attestations={attestations_packed}, pool_entries={pool_entries})", + wall_seconds * 1e3, + ); + + if measured { + let overhead_seconds = wall_seconds - phases.values().sum::(); + samples.push(Sample { + iteration: slot - options.warmup_slots, + slot, + proposer, + block_root: format!("0x{}", hex::encode(block_root.0)), + wall_seconds, + phases, + overhead_seconds, + attestations_packed, + aggregates: aggregates_count, + pool_entries, + }); + } + } + + eyre::ensure!( + samples.len() as u64 == common.iterations, + "collected {} samples but expected {}; the measured-slot accounting drifted", + samples.len(), + common.iterations + ); + + let params = Params { + mode: "synthetic", + mock_crypto: common.mock_crypto, + num_validators: options.num_validators, + warmup_slots: options.warmup_slots, + proofs_per_data: options.proofs_per_data, + seed: options.seed, + iterations: common.iterations, + enable_proposer_aggregation: common.enable_proposer_aggregation, + max_attestations_per_block: common.max_attestations_per_block, + }; + let report = Report::new(Environment::collect(), params, samples); + + match common.format { + OutputFormat::Human => println!("{}", report.human_table()), + OutputFormat::Json => println!("{}", report.to_json()?), + } + if let Some(path) = &common.output { + std::fs::write(path, report.to_json()?) + .wrap_err_with(|| format!("failed to write report to {}", path.display()))?; + eprintln!("report written to {}", path.display()); + } + + Ok(()) +} + +const PHASE_HISTOGRAM: &str = "lean_block_proposal_attestation_build_phase_seconds"; + +/// Per-phase (sample_sum, sample_count) snapshot of the block-proposal phase +/// histogram, read from the default prometheus registry. +type PhaseSnapshot = HashMap; + +fn phase_snapshot() -> PhaseSnapshot { + ethlambda_metrics::gather() + .iter() + .filter(|family| family.name() == PHASE_HISTOGRAM) + .flat_map(|family| family.get_metric()) + .filter_map(|metric| { + let phase = metric + .get_label() + .iter() + .find(|label| label.name() == "phase")? + .value() + .to_string(); + let histogram = metric.get_histogram(); + Some(( + phase, + (histogram.get_sample_sum(), histogram.get_sample_count()), + )) + }) + .collect() +} + +/// Exact per-iteration phase durations from two snapshots around one build. +/// +/// Histogram sums accumulate the raw f64 seconds of every observation, so the +/// sum delta IS the build's phase time β€” bucket boundaries play no role. The +/// count must advance by exactly 1 per phase (each phase observes once per +/// `build_block` in this single-threaded process); anything else means the +/// accounting drifted and attribution would be wrong, so it is a hard error. +fn phase_deltas( + before: &PhaseSnapshot, + after: &PhaseSnapshot, +) -> eyre::Result> { + let mut deltas = BTreeMap::new(); + for &phase in BLOCK_PROPOSAL_ATTESTATION_BUILD_PHASES { + let (sum_before, count_before) = before.get(phase).copied().unwrap_or((0.0, 0)); + let (sum_after, count_after) = after.get(phase).copied().unwrap_or((0.0, 0)); + let observations = count_after.saturating_sub(count_before); + eyre::ensure!( + observations == 1, + "phase '{phase}' was observed {observations} times during one build (expected 1); \ + phase attribution would be wrong" + ); + deltas.insert(phase.to_string(), sum_after - sum_before); + } + Ok(deltas) +} diff --git a/bin/ethlambda/src/benchmark/report.rs b/bin/ethlambda/src/benchmark/report.rs new file mode 100644 index 00000000..1583f0d9 --- /dev/null +++ b/bin/ethlambda/src/benchmark/report.rs @@ -0,0 +1,303 @@ +//! Statistics and report emission for the block-building benchmark. +//! +//! Raw per-iteration samples are always included in the JSON report: outliers +//! are never discarded (XMSS signing and OTS window advancement produce +//! legitimate heavy tails worth inspecting), and per-iteration block roots let +//! a baseline-vs-optimized diff prove an optimization changed only speed, not +//! which attestations get selected. + +use std::collections::BTreeMap; +use std::fmt::Write as _; + +use serde::Serialize; + +use crate::version; + +/// Coefficient-of-variation threshold above which wall-time results are +/// flagged as too noisy to compare, per the benchmarking workflow standard. +const CV_WARN_THRESHOLD: f64 = 0.10; + +#[derive(Debug, Serialize)] +pub(crate) struct Sample { + pub iteration: u64, + pub slot: u64, + pub proposer: u64, + /// Determinism checksum: same seed + params must reproduce the same roots. + pub block_root: String, + pub wall_seconds: f64, + /// Per-phase seconds from histogram sum deltas. + pub phases: BTreeMap, + /// Wall time not attributed to any phase: the `produce_block_with_signatures` + /// preamble (tick advance, pool promotion, fork-choice head update, pool + /// deep-clone, block-roots scan) plus measurement slack. + pub overhead_seconds: f64, + pub attestations_packed: usize, + pub aggregates: usize, + /// Pool entries (new + known) visible to this build; reported so pool + /// growth across iterations is visible in the samples. + pub pool_entries: usize, +} + +#[derive(Debug, Serialize)] +pub(crate) struct Environment { + pub client_version: &'static str, + /// Resolved leansig git revision from Cargo.lock. leansig is pinned to a + /// moving branch, so results are not comparable across revisions. + pub leansig_rev: &'static str, + pub os: &'static str, + pub arch: &'static str, + pub available_parallelism: usize, +} + +impl Environment { + pub(crate) fn collect() -> Self { + Self { + client_version: version::CLIENT_VERSION, + leansig_rev: env!("ETHLAMBDA_LEANSIG_REV"), + os: std::env::consts::OS, + arch: std::env::consts::ARCH, + available_parallelism: std::thread::available_parallelism() + .map(|n| n.get()) + .unwrap_or(0), + } + } +} + +#[derive(Debug, Serialize)] +pub(crate) struct Params { + pub mode: &'static str, + pub mock_crypto: bool, + pub num_validators: u64, + pub warmup_slots: u64, + pub proofs_per_data: u64, + pub seed: u64, + pub iterations: u64, + pub enable_proposer_aggregation: bool, + pub max_attestations_per_block: usize, +} + +#[derive(Debug, Serialize)] +pub(crate) struct Stats { + pub count: usize, + pub min_seconds: f64, + pub mean_seconds: f64, + pub p50_seconds: f64, + pub p90_seconds: f64, + pub max_seconds: f64, + /// Coefficient of variation (stddev / mean); NaN-free (0 when mean is 0). + pub cv: f64, +} + +#[derive(Debug, Serialize)] +pub(crate) struct Summary { + pub phases: BTreeMap, + pub overhead: Stats, + pub wall: Stats, +} + +#[derive(Debug, Serialize)] +pub(crate) struct Report { + pub schema_version: u32, + pub environment: Environment, + pub params: Params, + pub samples: Vec, + pub summary: Summary, +} + +impl Report { + pub(crate) fn new(environment: Environment, params: Params, samples: Vec) -> Self { + let mut phases: BTreeMap = BTreeMap::new(); + if let Some(first) = samples.first() { + for phase in first.phases.keys() { + let values: Vec = samples + .iter() + .filter_map(|sample| sample.phases.get(phase).copied()) + .collect(); + phases.insert(phase.clone(), stats(&values)); + } + } + let overhead = stats( + &samples + .iter() + .map(|sample| sample.overhead_seconds) + .collect::>(), + ); + let wall = stats( + &samples + .iter() + .map(|sample| sample.wall_seconds) + .collect::>(), + ); + + if wall.cv > CV_WARN_THRESHOLD { + eprintln!( + "warning: wall-time coefficient of variation is {:.1}% (>{:.0}%); \ + results are noisy β€” check for background load or increase --iterations", + wall.cv * 100.0, + CV_WARN_THRESHOLD * 100.0 + ); + } + + Self { + schema_version: 1, + environment, + params, + samples, + summary: Summary { + phases, + overhead, + wall, + }, + } + } + + pub(crate) fn to_json(&self) -> eyre::Result { + serde_json::to_string_pretty(self).map_err(Into::into) + } + + pub(crate) fn human_table(&self) -> String { + let mut out = String::new(); + let params = &self.params; + let env = &self.environment; + let crypto = if params.mock_crypto { "mock" } else { "real" }; + let _ = writeln!( + out, + "Block-building benchmark β€” {} workload ({crypto} crypto)", + params.mode + ); + let _ = writeln!( + out, + " validators={} warmup_slots={} iterations={} proofs_per_data={} seed={}", + params.num_validators, + params.warmup_slots, + params.iterations, + params.proofs_per_data, + params.seed + ); + let _ = writeln!( + out, + " enable_proposer_aggregation={} max_attestations_per_block={}", + params.enable_proposer_aggregation, params.max_attestations_per_block + ); + let _ = writeln!( + out, + " {} leansig={} os={} arch={} threads={}", + env.client_version, env.leansig_rev, env.os, env.arch, env.available_parallelism + ); + let _ = writeln!(out); + let _ = writeln!( + out, + " {:<18} {:>5} {:>10} {:>10} {:>10} {:>10} {:>10}", + "phase", "count", "min", "mean", "p50", "p90", "max" + ); + for (phase, stats) in &self.summary.phases { + let _ = writeln!(out, "{}", stats_row(phase, stats)); + } + let _ = writeln!(out, "{}", stats_row("overhead", &self.summary.overhead)); + let _ = writeln!(out, "{}", stats_row("wall", &self.summary.wall)); + out + } +} + +fn stats_row(name: &str, stats: &Stats) -> String { + format!( + " {:<18} {:>5} {:>10} {:>10} {:>10} {:>10} {:>10}", + name, + stats.count, + format_ms(stats.min_seconds), + format_ms(stats.mean_seconds), + format_ms(stats.p50_seconds), + format_ms(stats.p90_seconds), + format_ms(stats.max_seconds), + ) +} + +fn format_ms(seconds: f64) -> String { + format!("{:.3}ms", seconds * 1e3) +} + +fn stats(values: &[f64]) -> Stats { + if values.is_empty() { + return Stats { + count: 0, + min_seconds: 0.0, + mean_seconds: 0.0, + p50_seconds: 0.0, + p90_seconds: 0.0, + max_seconds: 0.0, + cv: 0.0, + }; + } + let mut sorted = values.to_vec(); + sorted.sort_by(|a, b| a.total_cmp(b)); + let count = sorted.len(); + let mean = sorted.iter().sum::() / count as f64; + let variance = sorted + .iter() + .map(|value| (value - mean).powi(2)) + .sum::() + / count as f64; + let cv = if mean > 0.0 { + variance.sqrt() / mean + } else { + 0.0 + }; + Stats { + count, + min_seconds: sorted[0], + mean_seconds: mean, + p50_seconds: percentile(&sorted, 0.50), + p90_seconds: percentile(&sorted, 0.90), + max_seconds: sorted[count - 1], + cv, + } +} + +/// Nearest-rank percentile over a sorted slice (no interpolation; sample +/// counts are small so exact sample values are preferable to blends). +fn percentile(sorted: &[f64], q: f64) -> f64 { + let index = ((sorted.len() - 1) as f64 * q).round() as usize; + sorted[index] +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn percentile_handles_single_sample() { + let sorted = [7.0]; + assert_eq!(percentile(&sorted, 0.0), 7.0); + assert_eq!(percentile(&sorted, 0.5), 7.0); + assert_eq!(percentile(&sorted, 1.0), 7.0); + } + + #[test] + fn percentile_odd_and_even_lengths() { + let odd = [1.0, 2.0, 3.0, 4.0, 5.0]; + assert_eq!(percentile(&odd, 0.5), 3.0); + assert_eq!(percentile(&odd, 1.0), 5.0); + let even = [1.0, 2.0, 3.0, 4.0]; + assert_eq!(percentile(&even, 0.5), 3.0); + assert_eq!(percentile(&even, 0.0), 1.0); + } + + #[test] + fn stats_on_known_values() { + let stats = stats(&[2.0, 4.0, 4.0, 4.0, 5.0, 5.0, 7.0, 9.0]); + assert_eq!(stats.count, 8); + assert_eq!(stats.min_seconds, 2.0); + assert_eq!(stats.max_seconds, 9.0); + assert_eq!(stats.mean_seconds, 5.0); + // population stddev of this classic set is 2.0 => cv = 0.4 + assert!((stats.cv - 0.4).abs() < 1e-12); + } + + #[test] + fn stats_on_empty_input_is_zeroed() { + let stats = stats(&[]); + assert_eq!(stats.count, 0); + assert_eq!(stats.mean_seconds, 0.0); + assert_eq!(stats.cv, 0.0); + } +} diff --git a/bin/ethlambda/src/cli.rs b/bin/ethlambda/src/cli.rs index 4bbe1683..9f00c827 100644 --- a/bin/ethlambda/src/cli.rs +++ b/bin/ethlambda/src/cli.rs @@ -5,24 +5,46 @@ use std::path::PathBuf; use crate::version; +// Node options plus optional sub-commands. +// +// The seven node-required arguments are declared `Option` with +// `required = true`: together with `subcommand_negates_reqs`, clap keeps +// enforcing them (with its native missing-argument errors) for the flat node +// invocation while letting sub-commands parse without any of them. Plain +// non-`Option` fields would make sub-command invocations fail during derive +// extraction even though validation was negated. +// `args_conflicts_with_subcommands` rejects mixed invocations +// (e.g. `--genesis x benchmark`) instead of silently ignoring the node flags. +// +// NOT a doc comment: clap derive turns struct doc comments into the +// `long_about` shown by `--help`, and this note is for maintainers, not users. #[derive(Debug, clap::Parser)] -#[command(name = "ethlambda", author = "LambdaClass", version = version::CLIENT_VERSION, about = "ethlambda consensus client")] +#[command( + name = "ethlambda", + author = "LambdaClass", + version = version::CLIENT_VERSION, + about = "ethlambda consensus client", + subcommand_negates_reqs = true, + args_conflicts_with_subcommands = true +)] pub(crate) struct CliOptions { + #[command(subcommand)] + pub(crate) command: Option, /// Path to the chain genesis config (e.g., config.yaml). - #[arg(long)] - pub(crate) genesis: PathBuf, + #[arg(long, required = true)] + pub(crate) genesis: Option, /// Path to the validator registry (e.g., annotated_validators.yaml). - #[arg(long)] - pub(crate) validators: PathBuf, + #[arg(long, required = true)] + pub(crate) validators: Option, /// Path to the bootnode list (e.g., nodes.yaml). - #[arg(long)] - pub(crate) bootnodes: PathBuf, + #[arg(long, required = true)] + pub(crate) bootnodes: Option, /// Path to validator-config.yaml (validator name registry for metrics labels). - #[arg(long)] - pub(crate) validator_config: PathBuf, + #[arg(long, required = true)] + pub(crate) validator_config: Option, /// Directory containing per-validator XMSS keys (e.g., hash-sig-keys/). - #[arg(long)] - pub(crate) hash_sig_keys_dir: PathBuf, + #[arg(long, required = true)] + pub(crate) hash_sig_keys_dir: Option, #[arg(long, default_value = "9000")] pub(crate) gossipsub_port: u16, #[arg(long, default_value = "127.0.0.1")] @@ -31,11 +53,11 @@ pub(crate) struct CliOptions { pub(crate) api_port: u16, #[arg(long, default_value = "5054")] pub(crate) metrics_port: u16, - #[arg(long)] - pub(crate) node_key: PathBuf, + #[arg(long, required = true)] + pub(crate) node_key: Option, /// The node ID to look up in annotated_validators.yaml (e.g., "ethlambda_0") - #[arg(long)] - pub(crate) node_id: String, + #[arg(long, required = true)] + pub(crate) node_id: Option, /// Base URL(s) of checkpoint-sync peer API servers (e.g., http://peer:5052). /// When set, skips genesis initialization and fetches the finalized state /// and block from each peer's `/lean/v0/states/finalized` and @@ -150,3 +172,118 @@ pub(crate) struct ShadowOptions { )] pub(crate) shadow_xmss_fake_proof_size: u64, } + +#[derive(Debug, clap::Subcommand)] +pub(crate) enum Command { + /// Benchmark block building offline against a controlled workload. + Benchmark(crate::benchmark::BenchmarkOptions), +} + +#[cfg(test)] +mod tests { + use super::*; + use clap::Parser as _; + use clap::error::ErrorKind; + + /// The flat node invocation shape used by lean-quickstart, the Dockerfile, + /// and the devnet skills. It must keep parsing unchanged. + const FLAT_INVOCATION: &[&str] = &[ + "ethlambda", + "--genesis", + "config.yaml", + "--validators", + "annotated_validators.yaml", + "--bootnodes", + "nodes.yaml", + "--validator-config", + "validator-config.yaml", + "--hash-sig-keys-dir", + "hash-sig-keys/", + "--node-key", + "node.key", + "--node-id", + "ethlambda_0", + "--gossipsub-port", + "9001", + "--is-aggregator", + ]; + + #[test] + fn flat_node_invocation_parses_unchanged() { + let options = CliOptions::try_parse_from(FLAT_INVOCATION).expect("flat invocation parses"); + assert!(options.command.is_none()); + assert_eq!(options.genesis.as_deref(), Some("config.yaml".as_ref())); + assert_eq!(options.node_id.as_deref(), Some("ethlambda_0")); + assert_eq!(options.gossipsub_port, 9001); + assert!(options.is_aggregator); + } + + #[test] + fn missing_required_node_flag_keeps_clap_error() { + let without_genesis: Vec<&str> = FLAT_INVOCATION + .iter() + .enumerate() + .filter(|(i, _)| *i != 1 && *i != 2) + .map(|(_, arg)| *arg) + .collect(); + let err = CliOptions::try_parse_from(without_genesis) + .expect_err("missing --genesis must still error"); + assert_eq!(err.kind(), ErrorKind::MissingRequiredArgument); + } + + #[test] + fn benchmark_subcommand_parses_without_node_args() { + let options = CliOptions::try_parse_from([ + "ethlambda", + "benchmark", + "synthetic", + "--mock-crypto", + "--iterations", + "3", + ]) + .expect("benchmark subcommand parses without node args"); + assert!(matches!(options.command, Some(Command::Benchmark(_)))); + assert!(options.genesis.is_none()); + assert!(options.node_id.is_none()); + } + + #[test] + fn node_flags_mixed_with_subcommand_are_rejected() { + let err = CliOptions::try_parse_from([ + "ethlambda", + "--genesis", + "config.yaml", + "benchmark", + "synthetic", + ]) + .expect_err("mixing node flags with a subcommand must be rejected"); + assert_eq!(err.kind(), ErrorKind::ArgumentConflict); + } + + #[test] + fn mock_crypto_conflicts_with_proposer_aggregation() { + let err = CliOptions::try_parse_from([ + "ethlambda", + "benchmark", + "synthetic", + "--mock-crypto", + "--enable-proposer-aggregation", + ]) + .expect_err("--mock-crypto cannot drive real leanVM aggregation"); + assert_eq!(err.kind(), ErrorKind::ArgumentConflict); + } + + #[test] + fn node_id_value_named_benchmark_is_not_a_subcommand() { + let mut args: Vec<&str> = FLAT_INVOCATION.to_vec(); + let node_id_position = args + .iter() + .position(|arg| *arg == "ethlambda_0") + .expect("node id value present"); + args[node_id_position] = "benchmark"; + let options = + CliOptions::try_parse_from(args).expect("flag values must not become subcommands"); + assert!(options.command.is_none()); + assert_eq!(options.node_id.as_deref(), Some("benchmark")); + } +} diff --git a/bin/ethlambda/src/main.rs b/bin/ethlambda/src/main.rs index b933b928..c5d2c64d 100644 --- a/bin/ethlambda/src/main.rs +++ b/bin/ethlambda/src/main.rs @@ -1,3 +1,4 @@ +mod benchmark; mod checkpoint_sync; mod cli; mod fd_limit; @@ -71,15 +72,31 @@ const ASCII_ART: &str = r#" #[cfg_attr(not(feature = "shadow-integration"), tokio::main)] #[cfg_attr(feature = "shadow-integration", tokio::main(flavor = "current_thread"))] async fn main() -> eyre::Result<()> { - let filter = EnvFilter::builder() - .with_default_directive(tracing::Level::INFO.into()) - .from_env_lossy(); - let subscriber = Registry::default().with(tracing_subscriber::fmt::layer().with_filter(filter)); - tracing::subscriber::set_global_default(subscriber) - .wrap_err("failed to set global tracing subscriber")?; - let options = CliOptions::parse(); + // Benchmark mode logs to stderr (default WARN) so the report on stdout + // stays pipe-clean; the node path keeps its stdout INFO logging. + if options.command.is_some() { + let filter = EnvFilter::builder() + .with_default_directive(tracing::Level::WARN.into()) + .from_env_lossy(); + let subscriber = Registry::default().with( + tracing_subscriber::fmt::layer() + .with_writer(std::io::stderr) + .with_filter(filter), + ); + tracing::subscriber::set_global_default(subscriber) + .wrap_err("failed to set global tracing subscriber")?; + } else { + let filter = EnvFilter::builder() + .with_default_directive(tracing::Level::INFO.into()) + .from_env_lossy(); + let subscriber = + Registry::default().with(tracing_subscriber::fmt::layer().with_filter(filter)); + tracing::subscriber::set_global_default(subscriber) + .wrap_err("failed to set global tracing subscriber")?; + } + #[cfg(feature = "shadow-integration")] init_shadow_cost(&options.shadow); @@ -88,6 +105,10 @@ async fn main() -> eyre::Result<()> { ethlambda_blockchain::metrics::set_node_info("ethlambda", version::CLIENT_VERSION); ethlambda_blockchain::metrics::set_node_start_time(); + if let Some(cli::Command::Benchmark(benchmark_options)) = options.command { + return benchmark::run(benchmark_options); + } + let rpc_config = RpcConfig { http_address: options.http_address, api_port: options.api_port, @@ -117,12 +138,19 @@ async fn main() -> eyre::Result<()> { return run_test_driver(rpc_config).await; } - let node_p2p_key = read_hex_file_bytes(&options.node_key).wrap_err_with(|| { - format!( - "failed to load node key from {}", - options.node_key.display() - ) - })?; + // clap enforces the node-required arguments when no sub-command is given + // (`subcommand_negates_reqs` only lifts them for sub-commands, which + // returned above), so these unwraps cannot fail on the node path. + let config_path = require_arg(options.genesis, "--genesis")?; + let validators_path = require_arg(options.validators, "--validators")?; + let bootnodes_path = require_arg(options.bootnodes, "--bootnodes")?; + let validator_config = require_arg(options.validator_config, "--validator-config")?; + let validator_keys_dir = require_arg(options.hash_sig_keys_dir, "--hash-sig-keys-dir")?; + let node_key_path = require_arg(options.node_key, "--node-key")?; + let node_id = require_arg(options.node_id, "--node-id")?; + + let node_p2p_key = read_hex_file_bytes(&node_key_path) + .wrap_err_with(|| format!("failed to load node key from {}", node_key_path.display()))?; let p2p_socket = SocketAddr::new(IpAddr::from([0, 0, 0, 0]), options.gossipsub_port); #[cfg(all(not(target_env = "msvc"), feature = "jemalloc"))] @@ -130,13 +158,7 @@ async fn main() -> eyre::Result<()> { #[cfg(any(target_env = "msvc", not(feature = "jemalloc")))] info!("Using system allocator"); - info!(node_key=?options.node_key, "got node key"); - - let config_path = options.genesis; - let bootnodes_path = options.bootnodes; - let validators_path = options.validators; - let validator_config = options.validator_config; - let validator_keys_dir = options.hash_sig_keys_dir; + info!(node_key=?node_key_path, "got node key"); let config_yaml = std::fs::read_to_string(&config_path).wrap_err_with(|| { format!( @@ -180,9 +202,8 @@ async fn main() -> eyre::Result<()> { let bootnodes = read_bootnodes(&bootnodes_path)?; - let validator_keys = - read_validator_keys(&validators_path, &validator_keys_dir, &options.node_id) - .wrap_err("failed to load validator keys")?; + let validator_keys = read_validator_keys(&validators_path, &validator_keys_dir, &node_id) + .wrap_err("failed to load validator keys")?; let data_dir = std::path::absolute(&options.data_dir).unwrap_or_else(|_| options.data_dir.clone()); @@ -589,6 +610,14 @@ fn read_validator_keys( Ok(validator_keys) } +/// Unwrap a node-required CLI argument. +/// +/// clap's `required = true` guarantees presence whenever no sub-command is +/// given, so a failure here means the CLI definition and the node path drifted. +fn require_arg(value: Option, flag: &str) -> eyre::Result { + value.ok_or_else(|| eyre::eyre!("missing required argument {flag}")) +} + fn read_hex_file_bytes(path: impl AsRef) -> eyre::Result> { let path = path.as_ref(); let file_content = std::fs::read_to_string(path) diff --git a/crates/blockchain/src/block_builder.rs b/crates/blockchain/src/block_builder.rs index 919a7719..b25d8c7c 100644 --- a/crates/blockchain/src/block_builder.rs +++ b/crates/blockchain/src/block_builder.rs @@ -737,10 +737,14 @@ fn extend_proofs_greedily( } let mut covered: HashSet = HashSet::new(); - let mut remaining_indices: HashSet = (0..proofs.len()).collect(); + let mut remaining_indices: Vec = (0..proofs.len()).collect(); while !remaining_indices.is_empty() { - // Pick proof covering the most uncovered validators (count only, no allocation) + // Pick proof covering the most uncovered validators (count only, no + // allocation). Coverage ties break to the lowest index (pool insertion + // order): a HashSet here would let hash-iteration order pick an + // arbitrary equal-coverage winner, making the built block's + // aggregation bits differ from run to run. let best = remaining_indices .iter() .map(|&idx| { @@ -750,7 +754,7 @@ fn extend_proofs_greedily( .count(); (idx, count) }) - .max_by_key(|&(_, count)| count); + .max_by_key(|&(idx, count)| (count, Reverse(idx))); let Some((best_idx, best_count)) = best else { break; @@ -777,7 +781,7 @@ fn extend_proofs_greedily( covered.extend(new_covered); selected.push((att, proof.clone())); - remaining_indices.remove(&best_idx); + remaining_indices.retain(|&idx| idx != best_idx); } } diff --git a/crates/storage/src/lib.rs b/crates/storage/src/lib.rs index 9b21dc85..6b9a89a0 100644 --- a/crates/storage/src/lib.rs +++ b/crates/storage/src/lib.rs @@ -5,4 +5,6 @@ mod state_diff; mod store; pub use api::{ALL_TABLES, StorageBackend, StorageReadView, StorageWriteBatch, Table}; -pub use store::{ForkCheckpoints, GetForkchoiceStoreError, MAX_RESUMABLE_DB_STATE_AGE, Store}; +pub use store::{ + ForkCheckpoints, GetForkchoiceStoreError, MAX_RESUMABLE_DB_STATE_AGE, NEW_PAYLOAD_CAP, Store, +}; diff --git a/crates/storage/src/store.rs b/crates/storage/src/store.rs index c916bfca..f0a181b4 100644 --- a/crates/storage/src/store.rs +++ b/crates/storage/src/store.rs @@ -120,7 +120,9 @@ const AGGREGATED_PAYLOAD_CAP: usize = 512; /// Hard cap for the new (pending) aggregated payload buffer. /// Smaller than known since new payloads are drained every interval (~4s). -const NEW_PAYLOAD_CAP: usize = 64; +/// Public so pool-seeding callers (the block-building benchmark) can reject +/// workloads that a single insertion batch would silently evict. +pub const NEW_PAYLOAD_CAP: usize = 64; /// Hard cap for the gossip signature buffer (individual signatures, not distinct data_roots). /// With 4 validators and 4-second slots, 2048 signatures covers ~512 slots (~34 min). diff --git a/docs/plans/block-building-benchmark.md b/docs/plans/block-building-benchmark.md new file mode 100644 index 00000000..9c3ec9b4 --- /dev/null +++ b/docs/plans/block-building-benchmark.md @@ -0,0 +1,145 @@ +# Plan: `ethlambda benchmark` β€” offline block-building benchmark sub-command + +## Context + +The README roadmap lists **"Optimize block building" (issue #465)** as the top near-term +priority, but block building is only observable today through Prometheus histograms on a +live devnet β€” there is no reproducible, offline way to measure it or to compare an +optimization against a baseline. This adds an `ethlambda benchmark` sub-command that +drives the exact production proposer code path against controlled workloads. + +Fixed scope decisions: offline harness; synthetic **and** replay-from-datadir workloads; +real XMSS/leanVM crypto by default with a mock fast mode. + +## What gets measured + +The proposer pipeline as executed at interval 4, entered through the same functions the +actor calls: + +``` +produce_block_with_signatures (crates/blockchain/src/store.rs:788) ← already public + β”œβ”€ preamble: on_tick β†’ interval 0, promote attestations, + β”‚ fork-choice head, pool deep-clone β†’ reported as derived "build_overhead" + └─ build_block: select_payloads β†’ compact β†’ stf_simulate +seal_block (extracted from crates/blockchain/src/lib.rs:504-631, see refactor) + └─ sign β†’ wrap_proposer_type1 (leanVM) β†’ merge_type_2 (leanVM) +``` + +**Excluded** (same boundary as the node's own `time_block_building` metric): gossip +publish, slot-alignment sleep, block import. + +**Phase capture with zero hot-path changes**: the existing +`lean_block_proposal_attestation_build_phase_seconds` HistogramVec accumulates exact f64 +sums, observed exactly once per phase per build β€” the harness deltas per-label sums +between iterations (prometheus 0.14 exposes `get_sample_sum()`, readable in-process). +Guards: assert per-phase count advanced by exactly 1, and warn if `wall βˆ’ Ξ£phases` +exceeds 2%. + +**Statistics**: warmup 3 + 10 iterations (defaults, configurable); min/mean/p50/p90/max + +CV>10% warning per phase; raw samples always exported; outliers never auto-discarded +(XMSS rejection-sampling and OTS window advancement produce legitimate tails). Each +iteration records `block.hash_tree_root()` β€” diffing root sequences between baseline and +optimized runs proves an optimization changed only speed, not attestation selection. + +## CLI (verified on clap 4.6.1) + +Every existing flat invocation (devnet skills, Dockerfile, lean-quickstart) parses +byte-for-byte unchanged. + +- `cli.rs`: add `#[command(subcommand_negates_reqs = true, + args_conflicts_with_subcommands = true)]` + `command: Option`; the 7 required + args become `Option` with `#[arg(long, required = true)]` (mandatory β€” non-Option + fields break `ethlambda benchmark` even with `negates_reqs`). Missing-arg error + messages for the node path are unchanged; mixed `--genesis x benchmark` is rejected. +- `main.rs`: early branch right after parse (mirrors the HIVE test-driver early-return); + node path unwraps the Options in one helper. + +``` +ethlambda benchmark synthetic --num-validators 8 --warmup-slots 8 + --proofs-per-data 1 --seed 42 [--key-cache-dir ] # cache: M2 +ethlambda benchmark replay --data-dir --genesis config.yaml [--no-copy] + [--validators … --hash-sig-keys-dir … --node-id …] # enables seal +common: --iterations 10 --mock-crypto --enable-proposer-aggregation + --max-attestations-per-block 3 --format human|json --output +``` + +Implementation refinements (M1): there is no `--pool-datas` knob β€” the pool +accumulates one distinct `AttestationData` per elapsed slot naturally, exactly +as on a live node, and per-sample `pool_entries` makes the growth visible. +`--proofs-per-data` defaults to 1 (a single full-coverage aggregate per data, +what a committee aggregator emits) so justification/finalization advance every +slot; higher values exercise multi-proof selection but stall justification +without proposer aggregation β€” the real coverage cost of that node flag. +Warmup slots double as chain advancement, so there is no separate warmup- +iterations knob. + +Known pre-existing issue (unrelated): `lean-quickstart/client-cmds/ethlambda-cmd.sh` +still uses `--custom-network-config-dir`, removed in #321 β€” needs an upstream fix. + +## Harness design (`bin/ethlambda/src/benchmark/{mod,keys,corpus,report}.rs`) + +- **Iteration model**: slots advance monotonically, proposer rotates `slot % N` (matches + round-robin `is_proposer`); each built block is imported via + `on_block_without_verification` so the empty-slot gap stays constant; the pool is + re-seeded per iteration in fixed seeded order (insertion order pins proof choice). +- **Keys**: seeded in-process keygen, cached on disk keyed by (leansig rev, seed, index, + role). Minimal-window keygen costs ~1s/key in release (verified empirically; the window + floors at 131,072 epochs β€” ample for thousands of bench slots; the 2^32 lifetime is + fixed in the type and unaffected). Arbitrary N, no Docker, no fixture download. +- **Synthetic corpus**: `State::from_genesis` + `InMemoryBackend`; K warmup blocks; pool + = attestations from the last `--pool-datas` slots Γ— `--proofs-per-data` real type-1 + proofs via `aggregate_signatures` (built outside the timed span, progress on stderr). +- **`--mock-crypto`**: empty proofs, forces the `keep_best` path (clap `conflicts_with + --enable-proposer-aggregation`, since `compact` invokes the real prover), seal skipped + and reported as null-not-zero. Runs in seconds β†’ CI smoke test. +- **Replay (v1 scope)**: copies the datadir before opening (mandatory β€” `on_tick`/head + updates write Metadata per interval and RocksDB has no read-only mode; `--no-copy` + opt-out with a warning). Loads via `Store::from_db_state`, builds at head+1. Pools are + in-memory-only and unrecoverable from disk, so v1 replay measures selection + STF + + state-root realism on real deep states; supplying the node's key trio additionally + enables the seal phases. Type-2 splitting / pool recording = deferred future work. +- **Report**: human table + `--format json` (stdout pipe-clean, logs to stderr) with + `schema_version`, environment (CPU model, cores, OS, ethlambda rev via vergen, leansig + lock rev via a small `build.rs` Cargo.lock parse β€” leansig tracks the moving `devnet4` + branch), full params + seed, per-iteration raw samples. One configuration per process + invocation (global cumulative histograms, rayon/prover state). + +## The one library refactor + +Extract `crates/blockchain/src/lib.rs:504-631` (proposer sign β†’ type-1 wrap β†’ pubkey +resolution β†’ type-2 merge) into `pub fn seal_block(...) -> Result` in the blockchain crate; `propose_block` calls it. Justified: the +benchmark cannot reach these phases otherwise (a bin-side copy would drift), it collapses +six repeated error-return-with-metric blocks into one `match` (net-negative LOC), and +adding `sign`/`wrap_proposer_type1`/`merge_type_2` labels to the existing phase histogram +gives production dashboards the currently-untimed expensive steps issue #465 targets. +Verbatim move, own commit, devnet smoke before merge. `build_block` stays `pub(crate)`. + +## Milestones + +| | Deliverable | Files | +|---|---|---| +| **M1** β€” CLI + mock end-to-end | `ethlambda benchmark synthetic --mock-crypto` runs in seconds; table + JSON; flat-invocation compat tests; `make bench`; CI smoke step in the existing Test job. Includes one small library fix found by the determinism gate: `extend_proofs_greedily` kept its candidate set in a `HashSet`, so equal-coverage proof ties were broken by randomized hash order and block contents differed run to run β€” ties now break to the lowest pool index | `cli.rs`, `main.rs`, `benchmark/{mod,corpus,report}.rs`, `build.rs` (leansig rev), `Makefile`, `ci.yml`, `block_builder.rs` (tie-break) | +| **M2** β€” real crypto | `seal_block` extraction (first commit) + 3 new phase labels; seeded keygen + cache; real type-1 pools; all 7 phases measured; first baseline JSON recorded | `crates/blockchain/src/{seal.rs,lib.rs,metrics.rs}`, `benchmark/keys.rs`, `types/src/signature.rs` (keygen wrapper) | +| **M3** β€” replay + docs | replay mode against a devnet-runner datadir; `docs/benchmarking.md` + `SUMMARY.md` + README roadmap line | `benchmark/corpus.rs`, docs | + +One PR per milestone; `make fmt/lint/test` before each; M2 additionally gated by a devnet +smoke via `test-branch.sh`. + +## Verification + +- clap `try_parse_from` tests: flat invocation parses, missing-arg errors preserved, + `benchmark` parses without node args, mixed invocation rejected. +- Determinism: two same-seed runs produce identical per-iteration block-root sequences. +- Accounting: Ξ£phases β‰₯ 98% of wall per iteration, per-phase count deltas == 1. +- CI mock smoke: `benchmark synthetic --mock-crypto --num-validators 4 --iterations 3 + --format json | jq -e '.schema_version == 1'`. + +## Main risks + +- Real-mode setup cost: iterations Γ— pool proofs of leanVM proving β†’ default real run + takes minutes (mitigated: mock mode, small defaults, ETA logging, key cache). +- `seal_block` extraction touches consensus-critical `propose_block` β€” verbatim + extraction, careful review of the six error branches, devnet smoke. +- Cross-run comparability: rayon-parallel proving is machine/load-sensitive and leansig + is a moving branch β€” the env block in every report is the guard, not a fix.