Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -49,3 +49,11 @@ jobs:

- name: Run fixture-based tests
uses: ./.github/actions/run-fixture-tests

# Reuses the release build from the test step; validates the benchmark
# harness end-to-end and its JSON output contract in a few seconds.
- name: Benchmark smoke (mock crypto)
run: |
cargo run --release --bin ethlambda -- benchmark synthetic --mock-crypto \
--num-validators 4 --warmup-slots 4 --iterations 3 --format json \
| jq -e '.schema_version == 1 and (.samples | length == 3)'
2 changes: 2 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

7 changes: 6 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
.PHONY: help fmt lint docker-build shadow-build shadow-docker-build run-devnet test docs docs-deps docs-serve
.PHONY: help fmt lint bench docker-build shadow-build shadow-docker-build run-devnet test docs docs-deps docs-serve

help: ## πŸ“š Show help for each of the Makefile recipes
@grep -E '^[a-zA-Z0-9_-]+:.*?## .*$$' $(MAKEFILE_LIST) | sort | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-30s\033[0m %s\n", $$1, $$2}'
Expand All @@ -13,6 +13,11 @@ test: leanSpec/fixtures ## πŸ§ͺ Run all tests
# Tests need to be run on release to avoid stack overflows during signature verification/aggregation
cargo test --workspace --release

BENCH_ARGS ?= synthetic --mock-crypto

bench: ## 🏁 Benchmark block building offline (override BENCH_ARGS to customize)
cargo run --release --bin ethlambda -- benchmark $(BENCH_ARGS)

GIT_COMMIT=$(shell git rev-parse HEAD)
GIT_BRANCH=$(shell git rev-parse --abbrev-ref HEAD)
DOCKER_TAG?=local
Expand Down
2 changes: 2 additions & 0 deletions bin/ethlambda/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ shadow-integration = ["ethlambda-crypto/shadow-integration"]
[dependencies]
ethlambda-blockchain.workspace = true
ethlambda-crypto.workspace = true
ethlambda-metrics.workspace = true
ethlambda-network-api.workspace = true
ethlambda-p2p.workspace = true
ethlambda-types.workspace = true
Expand All @@ -37,6 +38,7 @@ tracing.workspace = true
tracing-subscriber = "0.3"

serde.workspace = true
serde_json.workspace = true
serde_yaml_ng.workspace = true
hex.workspace = true

Expand Down
42 changes: 42 additions & 0 deletions bin/ethlambda/build.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
use std::path::PathBuf;

use vergen_git2::{Emitter, Git2Builder, RustcBuilder};

fn main() -> Result<(), Box<dyn std::error::Error>> {
Expand All @@ -12,5 +14,45 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
.add_instructions(&git2)?
.emit()?;

emit_leansig_rev();

Ok(())
}

/// Embed the resolved leansig git revision from the workspace Cargo.lock.
///
/// leansig is pinned to a moving branch, so a `cargo update` changes the
/// measured crypto with zero ethlambda diff; benchmark reports embed this
/// revision to keep results interpretable across lock bumps.
fn emit_leansig_rev() {
let rev = leansig_rev_from_lockfile().unwrap_or_else(|| "unknown".to_string());
println!("cargo:rustc-env=ETHLAMBDA_LEANSIG_REV={rev}");
if let Some(lockfile) = workspace_lockfile() {
println!("cargo:rerun-if-changed={}", lockfile.display());
}
}

fn workspace_lockfile() -> Option<PathBuf> {
let manifest_dir = std::env::var("CARGO_MANIFEST_DIR").ok()?;
Some(PathBuf::from(manifest_dir).join("../../Cargo.lock"))
}

fn leansig_rev_from_lockfile() -> Option<String> {
let lockfile = std::fs::read_to_string(workspace_lockfile()?).ok()?;
let mut in_leansig_package = false;
for line in lockfile.lines() {
let line = line.trim();
if line == "[[package]]" {
in_leansig_package = false;
} else if line == "name = \"leansig\"" {
in_leansig_package = true;
} else if in_leansig_package {
// source = "git+https://github.com/leanEthereum/leanSig?branch=devnet4#<rev>"
if let Some(source) = line.strip_prefix("source = ") {
let rev = source.trim_matches('"').rsplit('#').next()?;
return Some(rev.to_string());
}
}
}
None
}
Comment on lines +40 to +58

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 The parser sets in_leansig_package = true only after seeing name = "leansig", then looks for source = ... in subsequent lines. TOML does not mandate field order within a table, so a future cargo version or lock-file reformatter that emits source before name would silently produce "unknown" for ETHLAMBDA_LEANSIG_REV. The failure is graceful but invisible β€” a benchmark report with "leansig_rev": "unknown" is still compared against other reports, potentially leading to false equivalence. A two-pass parse (collect both fields per package block before extracting the rev) would be more robust.

Suggested change
fn leansig_rev_from_lockfile() -> Option<String> {
let lockfile = std::fs::read_to_string(workspace_lockfile()?).ok()?;
let mut in_leansig_package = false;
for line in lockfile.lines() {
let line = line.trim();
if line == "[[package]]" {
in_leansig_package = false;
} else if line == "name = \"leansig\"" {
in_leansig_package = true;
} else if in_leansig_package {
// source = "git+https://github.com/leanEthereum/leanSig?branch=devnet4#<rev>"
if let Some(source) = line.strip_prefix("source = ") {
let rev = source.trim_matches('"').rsplit('#').next()?;
return Some(rev.to_string());
}
}
}
None
}
fn leansig_rev_from_lockfile() -> Option<String> {
let lockfile = std::fs::read_to_string(workspace_lockfile()?).ok()?;
// Collect both fields per [[package]] block before extracting the rev,
// so the result is independent of TOML field order within a table.
let mut pending_name: Option<String> = None;
let mut pending_source: Option<String> = None;
for line in lockfile.lines() {
let line = line.trim();
if line == "[[package]]" {
pending_name = None;
pending_source = None;
} else if let Some(name) = line.strip_prefix("name = ") {
pending_name = Some(name.trim_matches('"').to_string());
} else if let Some(source) = line.strip_prefix("source = ") {
pending_source = Some(source.trim_matches('"').to_string());
}
if pending_name.as_deref() == Some("leansig") {
if let Some(ref src) = pending_source {
// source = "git+https://github.com/leanEthereum/leanSig?branch=devnet4#<rev>"
let rev = src.rsplit('#').next()?;
return Some(rev.to_string());
}
}
}
None
}
Prompt To Fix With AI
This is a comment left during a code review.
Path: bin/ethlambda/build.rs
Line: 40-58

Comment:
The parser sets `in_leansig_package = true` only after seeing `name = "leansig"`, then looks for `source = ...` in subsequent lines. TOML does not mandate field order within a table, so a future `cargo` version or lock-file reformatter that emits `source` before `name` would silently produce "unknown" for `ETHLAMBDA_LEANSIG_REV`. The failure is graceful but invisible β€” a benchmark report with `"leansig_rev": "unknown"` is still compared against other reports, potentially leading to false equivalence. A two-pass parse (collect both fields per package block before extracting the rev) would be more robust.

```suggestion
fn leansig_rev_from_lockfile() -> Option<String> {
    let lockfile = std::fs::read_to_string(workspace_lockfile()?).ok()?;
    // Collect both fields per [[package]] block before extracting the rev,
    // so the result is independent of TOML field order within a table.
    let mut pending_name: Option<String> = None;
    let mut pending_source: Option<String> = None;
    for line in lockfile.lines() {
        let line = line.trim();
        if line == "[[package]]" {
            pending_name = None;
            pending_source = None;
        } else if let Some(name) = line.strip_prefix("name = ") {
            pending_name = Some(name.trim_matches('"').to_string());
        } else if let Some(source) = line.strip_prefix("source = ") {
            pending_source = Some(source.trim_matches('"').to_string());
        }
        if pending_name.as_deref() == Some("leansig") {
            if let Some(ref src) = pending_source {
                // source = "git+https://github.com/leanEthereum/leanSig?branch=devnet4#<rev>"
                let rev = src.rsplit('#').next()?;
                return Some(rev.to_string());
            }
        }
    }
    None
}
```

How can I resolve this? If you propose a fix, please make it concise.

145 changes: 145 additions & 0 deletions bin/ethlambda/src/benchmark/corpus.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,145 @@
//! Synthetic benchmark corpus: deterministic validators, a genesis store, and
//! per-slot attestation-pool seeding.

use std::sync::Arc;

use ethlambda_blockchain::store::produce_attestation_data;
use ethlambda_storage::{Store, backend::InMemoryBackend};
use ethlambda_types::{
attestation::{AggregationBits, HashedAttestationData},
block::SingleMessageAggregate,
state::{State, Validator, ValidatorPubkeyBytes},
};

/// Fixed genesis time for synthetic runs. The harness derives every tick
/// timestamp from slot numbers relative to this value and never reads the wall
/// clock, so runs are reproducible at any time of day.
const GENESIS_TIME: u64 = 1_700_000_000;

pub(crate) struct SyntheticCorpus {
num_validators: u64,
proofs_per_data: u64,
}

impl SyntheticCorpus {
pub(crate) fn new(num_validators: u64, proofs_per_data: u64) -> Self {
Self {
num_validators,
proofs_per_data,
}
}

/// Build a genesis store over an in-memory backend with `num_validators`
/// seed-derived validators.
///
/// Pubkeys are deterministic placeholder bytes: in mock-crypto mode no code
/// path decodes them (signature verification is skipped and best-proof
/// compaction never resolves pubkeys).
pub(crate) fn genesis_store(&self, seed: u64) -> Store {
let mut rng_state = seed;
let validators = (0..self.num_validators)
.map(|index| Validator {
attestation_pubkey: synthetic_pubkey(&mut rng_state),
proposal_pubkey: synthetic_pubkey(&mut rng_state),
index,
})
.collect();
let genesis_state = State::from_genesis(GENESIS_TIME, validators);
Store::from_anchor_state(Arc::new(InMemoryBackend::new()), genesis_state)
}

/// Seed the pending ("new") pool with the full validator set's attestations
/// for `attestation_slot`, split into `proofs_per_data` disjoint aggregates.
///
/// Mirrors what committee aggregators gossip during a slot: several
/// aggregates for the same `AttestationData`, each covering a validator
/// subset. The proposal tick then promotes them to the known pool, exactly
/// as on a live node. Entries are inserted in a fixed order because pool
/// insertion order pins within-entry proof choice during selection.
pub(crate) fn seed_pool(&self, store: &mut Store, attestation_slot: u64) {
let data = produce_attestation_data(store, attestation_slot);
let entries = participant_groups(self.num_validators, self.proofs_per_data)
.into_iter()
.map(|participants| {
(
HashedAttestationData::new(data.clone()),
SingleMessageAggregate::empty(participants),
)
})
.collect();
store.insert_new_aggregated_payloads_batch(entries);
}
}

/// Partition validators 0..num_validators into `groups` disjoint bitfields,
/// assigning validator `i` to group `i % groups`. Every group is non-empty
/// (groups is capped at the validator count) and the union covers every
/// validator exactly once.
fn participant_groups(num_validators: u64, groups: u64) -> Vec<AggregationBits> {
let groups = groups.clamp(1, num_validators);
(0..groups)
.map(|group| {
let mut bits = AggregationBits::with_length(num_validators as usize)
.expect("validator count is within the bitlist limit");
for index in (group..num_validators).step_by(groups as usize) {
bits.set(index as usize, true)
.expect("index is within the bitlist length");
}
bits
})
.collect()
}

/// splitmix64: tiny deterministic generator for placeholder pubkey bytes,
/// avoiding a rand dependency.
fn splitmix64(state: &mut u64) -> u64 {
*state = state.wrapping_add(0x9e37_79b9_7f4a_7c15);
let mut z = *state;
z = (z ^ (z >> 30)).wrapping_mul(0xbf58_476d_1ce4_e5b9);
z = (z ^ (z >> 27)).wrapping_mul(0x94d0_49bb_1331_11eb);
z ^ (z >> 31)
}

fn synthetic_pubkey(rng_state: &mut u64) -> ValidatorPubkeyBytes {
let mut bytes = [0u8; 52];
for chunk in bytes.chunks_mut(8) {
let word = splitmix64(rng_state).to_le_bytes();
chunk.copy_from_slice(&word[..chunk.len()]);
}
bytes
}

#[cfg(test)]
mod tests {
use super::*;
use ethlambda_types::attestation::validator_indices;

#[test]
fn participant_groups_partition_all_validators() {
for (validators, groups) in [(8u64, 2u64), (8, 3), (5, 8), (1, 1), (4096, 4)] {
let partition = participant_groups(validators, groups);
assert_eq!(partition.len() as u64, groups.min(validators));
let mut seen = vec![0u32; validators as usize];
for bits in &partition {
let indices: Vec<u64> = validator_indices(bits).collect();
assert!(!indices.is_empty(), "every group must be non-empty");
for index in indices {
seen[index as usize] += 1;
}
}
assert!(
seen.iter().all(|&count| count == 1),
"every validator must appear in exactly one group: {seen:?}"
);
}
}

#[test]
fn synthetic_pubkeys_are_deterministic() {
let mut a = 42u64;
let mut b = 42u64;
assert_eq!(synthetic_pubkey(&mut a), synthetic_pubkey(&mut b));
let mut c = 43u64;
assert_ne!(synthetic_pubkey(&mut a), synthetic_pubkey(&mut c));
}
}
Loading
Loading