Initial benchmark implementation

2026-03-30 15:17:33 +00:00
commit fe686214d3
18 changed files with 2474 additions and 0 deletions
--- a/.codex
+++ b/.codex
--- a/.env
+++ b/.env
@@ -0,0 +1,46 @@
 # Benchmark universe and derived scenario sizes.
 MAX_VALUE=100000000
 SPARSE_SET_PERCENT=0.004
 SEMI_SPARSE_SET_PERCENT=0.04
 NORMAL_SET_PERCENT=0.4
 DENSE_SET_PERCENT=4
 # Requested overlap percentages for the smaller set.
 LOW_OVERLAP_PERCENT=10
 MEDIUM_OVERLAP_PERCENT=50
 HIGH_OVERLAP_PERCENT=80
 # Select which density scenarios are included.
 ENABLE_SPARSE_SCENARIO=true
 ENABLE_SEMI_SPARSE_SCENARIO=true
 ENABLE_NORMAL_SCENARIO=true
 ENABLE_DENSE_SCENARIO=true
 # Select which overlap scenarios are included.
 ENABLE_LOW_OVERLAP=false
 ENABLE_MEDIUM_OVERLAP=true
 ENABLE_HIGH_OVERLAP=false
 # Benchmark execution controls.
 BENCHMARK_MIN_SAMPLES=2
 BENCHMARK_MAX_SAMPLES=5
 BENCHMARK_TARGET_TOTAL_MS=800
 # Select which algorithms are included in the benchmark run.
 ENABLE_BITSET=true
 ENABLE_SIMD_BITSET=false
 ENABLE_STD_HASH=true
 ENABLE_CUSTOM_HASH=true
 ENABLE_SORTED_MERGE=true
 # Select which benchmark phases are emitted.
 ENABLE_PREPARE_PHASE=true
 ENABLE_INTERSECTION_PHASE=true
 # Select how benchmark output is rendered.
 OUTPUT_FORMAT=markdown
 # Select which extra harness steps are counted inside each timed sample.
 TIME_PREPARE_INCLUDE_INPUT_GENERATION=false
 TIME_INTERSECTION_INCLUDE_OUTPUT_CLEAR=false
 TIME_INTERSECTION_INCLUDE_RESULT_COUNT=false
--- a/.gitignore
+++ b/.gitignore
@@ -0,0 +1 @@
 /target
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -0,0 +1,7 @@
 # This file is automatically @generated by Cargo.
 # It is not intended for manual editing.
 version = 4
 [[package]]
 name = "intersection_benchmark"
 version = "0.1.0"
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -0,0 +1,6 @@
 [package]
 name = "intersection_benchmark"
 version = "0.1.0"
 edition = "2024"
 [dependencies]
--- a/README.md
+++ b/README.md
@@ -0,0 +1,175 @@
 # Intersection Benchmark
 This project benchmarks several set-intersection strategies in Rust over the same generated input scenarios.
 The benchmark output is split into two timed phases:
 - `prepare`
  - Measures the conversion from the benchmark's raw input format, a normal array of numbers, into the algorithm's prepared internal representation.
  - This phase does not measure the later intersection itself.
  - By default, raw input generation is not included in this time.
  - If `TIME_PREPARE_INCLUDE_INPUT_GENERATION=true` in `.env`, raw input generation is included too.
 - `native`
  - Measures only the intersection step on already prepared inputs.
  - The result is written into the algorithm's native output representation.
  - This phase does not measure preparation from the raw input arrays.
  - This phase does not measure converting the native result into a plain array of numbers.
  - By default, output clearing and result counting are not included in this time.
  - If enabled in `.env`, `TIME_INTERSECTION_INCLUDE_OUTPUT_CLEAR` and `TIME_INTERSECTION_INCLUDE_RESULT_COUNT` move those extra steps into the timed window.
 Important notes about the timing model:
 - Output storage is created before timed `native` samples begin and then reused across samples.
 - Warmup runs are performed before measured samples and are not included in the reported timings.
 - Printing, formatting, statistics aggregation, and scenario planning are not part of the reported algorithm timings.
 In short:
 - `prepare` answers: how long does it take to build the algorithm's working representation?
 - `native` answers: how long does it take to compute the intersection into the algorithm's own output format?
 # Intersection benchmark suite
 - Scenarios: 8
 - Universe: `0..=100000000` (`100000001` values)
 - Set populations: sparse=`4000` (0.0040%) | semi-sparse=`40000` (0.0400%) | normal=`400000` (0.4000%) | dense=`4000000` (4.000%)
 - Overlap targets: low=10.0% | medium=50.0% | high=80.0%
 - Enabled densities: sparse=`true` | semi-sparse=`true` | normal=`true` | dense=`true`
 - Enabled overlaps: low=`false` | medium=`true` | high=`false`
 - Sampling: min=`2` | max=`5` | target total=`800ms`
 - Enabled algorithms: bitset=`true` | bitset-simd=`false` | std-hash=`true` | splitmix-hash=`true` | sorted-merge=`true`
 - Enabled phases: prepare=`true` | intersection=`true`
 - Timed extras: prepare input generation=`false` | intersection output clear=`false` | intersection result count=`false`
 - Phases per algorithm: prepare=`true` | native output intersection=`true`
 ## Scenario: ordered input | sparse set population = 0.0040% of universe | medium overlap percentage = 50.0% of each set
 - Set population: `4000` / `100000001` values
 - Overlap: requested=50.0% | actual=50.0% | shared=`2000/4000`
 | algorithm | phase | samples | mean | median | min | max |
 | --- | --- | ---: | ---: | ---: | ---: | ---: |
 | bitset | prepare | 5 | 411.335us | 401.487us | 393.202us | 444.185us |
 | bitset | native | 5 | 950.881us | 940.646us | 840.562us | 1.065ms |
 | std-hash | prepare | 5 | 80.875us | 80.946us | 79.932us | 81.558us |
 | std-hash | native | 5 | 76.875us | 75.985us | 74.445us | 81.305us |
 | splitmix-hash | prepare | 5 | 36.794us | 36.825us | 36.478us | 37.109us |
 | splitmix-hash | native | 5 | 35.065us | 33.227us | 30.197us | 43.628us |
 | sorted-merge | prepare | 5 | 350ns | 303ns | 295ns | 548ns |
 | sorted-merge | native | 5 | 3.568us | 3.541us | 3.533us | 3.646us |
 ## Scenario: ordered input | semi-sparse set population = 0.0400% of universe | medium overlap percentage = 50.0% of each set
 - Set population: `40000` / `100000001` values
 - Overlap: requested=50.0% | actual=50.0% | shared=`20000/40000`
 | algorithm | phase | samples | mean | median | min | max |
 | --- | --- | ---: | ---: | ---: | ---: | ---: |
 | bitset | prepare | 5 | 3.955ms | 3.951ms | 3.927ms | 4.014ms |
 | bitset | native | 5 | 1.428ms | 1.449ms | 1.356ms | 1.507ms |
 | std-hash | prepare | 5 | 864.919us | 860.986us | 854.337us | 880.726us |
 | std-hash | native | 5 | 905.199us | 902.901us | 898.793us | 913.503us |
 | splitmix-hash | prepare | 5 | 413.275us | 410.172us | 408.810us | 423.680us |
 | splitmix-hash | native | 5 | 469.869us | 467.963us | 465.287us | 477.299us |
 | sorted-merge | prepare | 5 | 6.172us | 6.124us | 5.921us | 6.359us |
 | sorted-merge | native | 5 | 36.202us | 36.045us | 36.023us | 36.815us |
 ## Scenario: ordered input | normal set population = 0.4000% of universe | medium overlap percentage = 50.0% of each set
 - Set population: `400000` / `100000001` values
 - Overlap: requested=50.0% | actual=50.0% | shared=`200000/400000`
 | algorithm | phase | samples | mean | median | min | max |
 | --- | --- | ---: | ---: | ---: | ---: | ---: |
 | bitset | prepare | 5 | 7.272ms | 7.200ms | 6.820ms | 7.893ms |
 | bitset | native | 5 | 1.830ms | 1.821ms | 1.815ms | 1.868ms |
 | std-hash | prepare | 5 | 11.484ms | 11.262ms | 10.767ms | 12.441ms |
 | std-hash | native | 5 | 15.501ms | 15.262ms | 14.570ms | 17.524ms |
 | splitmix-hash | prepare | 5 | 5.993ms | 6.124ms | 5.715ms | 6.193ms |
 | splitmix-hash | native | 5 | 5.363ms | 5.381ms | 5.333ms | 5.383ms |
 | sorted-merge | prepare | 5 | 309.850us | 265.482us | 248.158us | 507.857us |
 | sorted-merge | native | 5 | 567.072us | 543.143us | 527.883us | 649.634us |
 ## Scenario: ordered input | dense set population = 4.000% of universe | medium overlap percentage = 50.0% of each set
 - Set population: `4000000` / `100000001` values
 - Overlap: requested=50.0% | actual=50.0% | shared=`2000000/4000000`
 | algorithm | phase | samples | mean | median | min | max |
 | --- | --- | ---: | ---: | ---: | ---: | ---: |
 | bitset | prepare | 5 | 14.176ms | 13.606ms | 12.950ms | 17.112ms |
 | bitset | native | 5 | 1.765ms | 1.731ms | 1.674ms | 1.860ms |
 | std-hash | prepare | 2 | 441.875ms | 441.875ms | 436.380ms | 447.370ms |
 | std-hash | native | 2 | 445.366ms | 445.366ms | 442.813ms | 447.919ms |
 | splitmix-hash | prepare | 4 | 243.982ms | 239.375ms | 237.612ms | 259.564ms |
 | splitmix-hash | native | 5 | 51.967ms | 49.131ms | 48.321ms | 56.781ms |
 | sorted-merge | prepare | 5 | 11.852ms | 11.638ms | 11.416ms | 12.418ms |
 | sorted-merge | native | 5 | 5.537ms | 5.530ms | 5.517ms | 5.582ms |
 ## Scenario: unordered input | sparse set population = 0.0040% of universe | medium overlap percentage = 50.0% of each set
 - Set population: `4000` / `100000001` values
 - Overlap: requested=50.0% | actual=50.0% | shared=`2000/4000`
 | algorithm | phase | samples | mean | median | min | max |
 | --- | --- | ---: | ---: | ---: | ---: | ---: |
 | bitset | prepare | 5 | 2.264ms | 882.303us | 845.089us | 7.840ms |
 | bitset | native | 5 | 1.776ms | 1.778ms | 1.695ms | 1.861ms |
 | std-hash | prepare | 5 | 83.110us | 80.293us | 79.781us | 93.585us |
 | std-hash | native | 5 | 77.430us | 77.762us | 74.848us | 79.988us |
 | splitmix-hash | prepare | 5 | 36.017us | 35.957us | 35.943us | 36.129us |
 | splitmix-hash | native | 5 | 33.200us | 31.326us | 28.101us | 42.169us |
 | sorted-merge | prepare | 5 | 61.613us | 60.791us | 55.215us | 69.401us |
 | sorted-merge | native | 5 | 3.617us | 3.533us | 3.528us | 3.943us |
 ## Scenario: unordered input | semi-sparse set population = 0.0400% of universe | medium overlap percentage = 50.0% of each set
 - Set population: `40000` / `100000001` values
 - Overlap: requested=50.0% | actual=50.0% | shared=`20000/40000`
 | algorithm | phase | samples | mean | median | min | max |
 | --- | --- | ---: | ---: | ---: | ---: | ---: |
 | bitset | prepare | 5 | 2.221ms | 1.596ms | 1.463ms | 3.909ms |
 | bitset | native | 5 | 1.770ms | 1.761ms | 1.715ms | 1.829ms |
 | std-hash | prepare | 5 | 882.778us | 869.598us | 865.722us | 910.316us |
 | std-hash | native | 5 | 917.268us | 915.333us | 900.514us | 935.037us |
 | splitmix-hash | prepare | 5 | 417.845us | 420.083us | 411.847us | 422.302us |
 | splitmix-hash | native | 5 | 475.443us | 473.060us | 466.552us | 486.611us |
 | sorted-merge | prepare | 5 | 866.901us | 867.193us | 857.034us | 877.401us |
 | sorted-merge | native | 5 | 49.398us | 48.383us | 48.283us | 53.209us |
 ## Scenario: unordered input | normal set population = 0.4000% of universe | medium overlap percentage = 50.0% of each set
 - Set population: `400000` / `100000001` values
 - Overlap: requested=50.0% | actual=50.0% | shared=`200000/400000`
 | algorithm | phase | samples | mean | median | min | max |
 | --- | --- | ---: | ---: | ---: | ---: | ---: |
 | bitset | prepare | 5 | 4.712ms | 4.803ms | 4.476ms | 4.920ms |
 | bitset | native | 5 | 1.759ms | 1.756ms | 1.687ms | 1.844ms |
 | std-hash | prepare | 5 | 10.839ms | 10.826ms | 10.524ms | 11.178ms |
 | std-hash | native | 5 | 15.163ms | 14.614ms | 14.440ms | 17.563ms |
 | splitmix-hash | prepare | 5 | 5.632ms | 5.632ms | 5.585ms | 5.679ms |
 | splitmix-hash | native | 5 | 5.483ms | 5.432ms | 5.233ms | 5.969ms |
 | sorted-merge | prepare | 5 | 10.528ms | 10.457ms | 10.445ms | 10.814ms |
 | sorted-merge | native | 5 | 544.184us | 534.490us | 530.891us | 581.634us |
 ## Scenario: unordered input | dense set population = 4.000% of universe | medium overlap percentage = 50.0% of each set
 - Set population: `4000000` / `100000001` values
 - Overlap: requested=50.0% | actual=50.0% | shared=`2000000/4000000`
 | algorithm | phase | samples | mean | median | min | max |
 | --- | --- | ---: | ---: | ---: | ---: | ---: |
 | bitset | prepare | 5 | 58.153ms | 57.261ms | 53.448ms | 63.278ms |
 | bitset | native | 5 | 1.805ms | 1.782ms | 1.652ms | 1.985ms |
 | std-hash | prepare | 2 | 461.857ms | 461.857ms | 444.440ms | 479.274ms |
 | std-hash | native | 2 | 438.653ms | 438.653ms | 435.414ms | 441.891ms |
 | splitmix-hash | prepare | 4 | 252.147ms | 250.242ms | 243.913ms | 264.189ms |
 | splitmix-hash | native | 5 | 50.829ms | 49.904ms | 49.156ms | 55.716ms |
 | sorted-merge | prepare | 5 | 130.853ms | 130.469ms | 129.970ms | 132.748ms |
 | sorted-merge | native | 5 | 6.016ms | 5.942ms | 5.877ms | 6.351ms |
--- a/src/algorithms/bitset.rs
+++ b/src/algorithms/bitset.rs
@@ -0,0 +1,106 @@
 use crate::algorithms::IntersectionAlgorithm;
 use crate::data::Order;
 pub struct BitSetAlgorithm;
 #[derive(Clone, Debug)]
 pub struct BitSetSet {
    words: Vec<u64>,
    universe_len: usize,
 }
 #[derive(Clone, Debug)]
 pub struct BitSetIntersectionOutput {
    words: Vec<u64>,
    universe_len: usize,
 }
 impl IntersectionAlgorithm for BitSetAlgorithm {
    type Prepared = BitSetSet;
    type Output = BitSetIntersectionOutput;
    const NAME: &'static str = "bitset";
    fn prepare(input: &[u32], universe_len: usize, _order: Order) -> Self::Prepared {
        let word_count = universe_len.div_ceil(u64::BITS as usize);
        let mut words = vec![0_u64; word_count];
        for &value in input {
            let index = value as usize;
            assert!(
                index < universe_len,
                "value {value} is outside the universe"
            );
            let word_index = index / u64::BITS as usize;
            let bit_index = index % u64::BITS as usize;
            words[word_index] |= 1_u64 << bit_index;
        }
        BitSetSet {
            words,
            universe_len,
        }
    }
    fn create_output(left: &Self::Prepared, right: &Self::Prepared) -> Self::Output {
        assert_eq!(left.universe_len, right.universe_len);
        BitSetIntersectionOutput {
            words: vec![0_u64; left.words.len()],
            universe_len: left.universe_len,
        }
    }
    fn clear_output(_output: &mut Self::Output) {}
    fn intersect_into(left: &Self::Prepared, right: &Self::Prepared, output: &mut Self::Output) {
        assert_eq!(left.universe_len, right.universe_len);
        assert_eq!(left.universe_len, output.universe_len);
        for ((left_word, right_word), output_word) in left
            .words
            .iter()
            .zip(&right.words)
            .zip(output.words.iter_mut())
        {
            *output_word = left_word & right_word;
        }
    }
    fn output_len(output: &Self::Output) -> usize {
        output
            .words
            .iter()
            .map(|word| word.count_ones() as usize)
            .sum()
    }
    fn output_values(output: &Self::Output) -> Vec<u32> {
        let mut values = Vec::with_capacity(Self::output_len(output));
        for (word_index, &shared_word) in output.words.iter().enumerate() {
            push_shared_word(&mut values, shared_word, word_index, output.universe_len);
        }
        values
    }
 }
 fn push_shared_word(
    output: &mut Vec<u32>,
    mut shared: u64,
    word_index: usize,
    universe_len: usize,
 ) {
    while shared != 0 {
        let bit_index = shared.trailing_zeros() as usize;
        let value = word_index * u64::BITS as usize + bit_index;
        if value < universe_len {
            output.push(value as u32);
        }
        shared &= shared - 1;
    }
 }
--- a/src/algorithms/custom_hash.rs
+++ b/src/algorithms/custom_hash.rs
@@ -0,0 +1,119 @@
 use std::collections::HashSet;
 use std::hash::{BuildHasherDefault, Hasher};
 use crate::algorithms::IntersectionAlgorithm;
 use crate::data::Order;
 pub struct CustomHashAlgorithm;
 type SplitMixBuildHasher = BuildHasherDefault<SplitMix64Hasher>;
 #[derive(Clone, Debug)]
 pub struct CustomHashSet {
    values: HashSet<u32, SplitMixBuildHasher>,
 }
 #[derive(Clone, Debug)]
 pub struct CustomHashIntersectionOutput {
    values: HashSet<u32, SplitMixBuildHasher>,
 }
 impl IntersectionAlgorithm for CustomHashAlgorithm {
    type Prepared = CustomHashSet;
    type Output = CustomHashIntersectionOutput;
    const NAME: &'static str = "splitmix-hash";
    fn prepare(input: &[u32], _universe_len: usize, _order: Order) -> Self::Prepared {
        let mut values =
            HashSet::with_capacity_and_hasher(input.len(), SplitMixBuildHasher::default());
        values.extend(input.iter().copied());
        CustomHashSet { values }
    }
    fn create_output(left: &Self::Prepared, right: &Self::Prepared) -> Self::Output {
        CustomHashIntersectionOutput {
            values: HashSet::with_capacity_and_hasher(
                left.values.len().min(right.values.len()),
                SplitMixBuildHasher::default(),
            ),
        }
    }
    fn clear_output(output: &mut Self::Output) {
        output.values.clear();
    }
    fn intersect_into(left: &Self::Prepared, right: &Self::Prepared, output: &mut Self::Output) {
        let (smaller, larger) = ordered_sets(&left.values, &right.values);
        for &value in smaller {
            if larger.contains(&value) {
                output.values.insert(value);
            }
        }
    }
    fn output_len(output: &Self::Output) -> usize {
        output.values.len()
    }
    fn output_values(output: &Self::Output) -> Vec<u32> {
        output.values.iter().copied().collect()
    }
 }
 fn ordered_sets<'a>(
    left: &'a HashSet<u32, SplitMixBuildHasher>,
    right: &'a HashSet<u32, SplitMixBuildHasher>,
 ) -> (
    &'a HashSet<u32, SplitMixBuildHasher>,
    &'a HashSet<u32, SplitMixBuildHasher>,
 ) {
    if left.len() <= right.len() {
        (left, right)
    } else {
        (right, left)
    }
 }
 #[derive(Clone, Debug, Default)]
 pub struct SplitMix64Hasher {
    state: u64,
 }
 impl Hasher for SplitMix64Hasher {
    fn finish(&self) -> u64 {
        self.state
    }
    fn write(&mut self, bytes: &[u8]) {
        let mut state = (bytes.len() as u64).wrapping_mul(0x9E37_79B9_7F4A_7C15);
        for &byte in bytes {
            state ^= byte as u64;
            state = state.rotate_left(7).wrapping_mul(0xBF58_476D_1CE4_E5B9);
        }
        self.state = splitmix64(state);
    }
    fn write_u32(&mut self, value: u32) {
        self.state = splitmix64(value as u64);
    }
    fn write_u64(&mut self, value: u64) {
        self.state = splitmix64(value);
    }
    fn write_usize(&mut self, value: usize) {
        self.state = splitmix64(value as u64);
    }
 }
 fn splitmix64(mut value: u64) -> u64 {
    value = value.wrapping_add(0x9E37_79B9_7F4A_7C15);
    value = (value ^ (value >> 30)).wrapping_mul(0xBF58_476D_1CE4_E5B9);
    value = (value ^ (value >> 27)).wrapping_mul(0x94D0_49BB_1331_11EB);
    value ^ (value >> 31)
 }
--- a/src/algorithms/mod.rs
+++ b/src/algorithms/mod.rs
@@ -0,0 +1,21 @@
 pub mod bitset;
 pub mod custom_hash;
 pub mod simd_bitset;
 pub mod sorted_merge;
 pub mod std_hash;
 use crate::data::Order;
 pub trait IntersectionAlgorithm {
    type Prepared;
    type Output;
    const NAME: &'static str;
    fn prepare(input: &[u32], universe_len: usize, order: Order) -> Self::Prepared;
    fn create_output(left: &Self::Prepared, right: &Self::Prepared) -> Self::Output;
    fn clear_output(output: &mut Self::Output);
    fn intersect_into(left: &Self::Prepared, right: &Self::Prepared, output: &mut Self::Output);
    fn output_len(output: &Self::Output) -> usize;
    fn output_values(output: &Self::Output) -> Vec<u32>;
 }
--- a/src/algorithms/simd_bitset.rs
+++ b/src/algorithms/simd_bitset.rs
@@ -0,0 +1,230 @@
 use crate::algorithms::IntersectionAlgorithm;
 use crate::data::Order;
 #[cfg(target_arch = "aarch64")]
 use std::arch::aarch64::{uint64x2_t, vandq_u64, vld1q_u64, vst1q_u64};
 #[cfg(target_arch = "x86")]
 use std::arch::x86::{
    __m128i, __m256i, _mm_and_si128, _mm_loadu_si128, _mm_storeu_si128, _mm256_and_si256,
    _mm256_loadu_si256, _mm256_storeu_si256,
 };
 #[cfg(target_arch = "x86_64")]
 use std::arch::x86_64::{
    __m128i, __m256i, _mm_and_si128, _mm_loadu_si128, _mm_storeu_si128, _mm256_and_si256,
    _mm256_loadu_si256, _mm256_storeu_si256,
 };
 pub struct SimdBitSetAlgorithm;
 #[derive(Clone, Debug)]
 pub struct SimdBitSetSet {
    words: Vec<u64>,
    universe_len: usize,
 }
 #[derive(Clone, Debug)]
 pub struct SimdBitSetIntersectionOutput {
    words: Vec<u64>,
    universe_len: usize,
 }
 impl IntersectionAlgorithm for SimdBitSetAlgorithm {
    type Prepared = SimdBitSetSet;
    type Output = SimdBitSetIntersectionOutput;
    const NAME: &'static str = "bitset-simd";
    fn prepare(input: &[u32], universe_len: usize, _order: Order) -> Self::Prepared {
        let word_count = universe_len.div_ceil(u64::BITS as usize);
        let mut words = vec![0_u64; word_count];
        for &value in input {
            let index = value as usize;
            assert!(
                index < universe_len,
                "value {value} is outside the universe"
            );
            let word_index = index / u64::BITS as usize;
            let bit_index = index % u64::BITS as usize;
            words[word_index] |= 1_u64 << bit_index;
        }
        SimdBitSetSet {
            words,
            universe_len,
        }
    }
    fn create_output(left: &Self::Prepared, right: &Self::Prepared) -> Self::Output {
        assert_eq!(left.universe_len, right.universe_len);
        SimdBitSetIntersectionOutput {
            words: vec![0_u64; left.words.len()],
            universe_len: left.universe_len,
        }
    }
    fn clear_output(_output: &mut Self::Output) {}
    fn intersect_into(left: &Self::Prepared, right: &Self::Prepared, output: &mut Self::Output) {
        assert_eq!(left.universe_len, right.universe_len);
        assert_eq!(left.universe_len, output.universe_len);
        intersect_words(&left.words, &right.words, &mut output.words);
    }
    fn output_len(output: &Self::Output) -> usize {
        output
            .words
            .iter()
            .map(|word| word.count_ones() as usize)
            .sum()
    }
    fn output_values(output: &Self::Output) -> Vec<u32> {
        let mut values = Vec::with_capacity(Self::output_len(output));
        for (word_index, &shared_word) in output.words.iter().enumerate() {
            push_shared_word(&mut values, shared_word, word_index, output.universe_len);
        }
        values
    }
 }
 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
 fn intersect_words(left: &[u64], right: &[u64], output: &mut [u64]) {
    if std::is_x86_feature_detected!("avx2") {
        unsafe {
            intersect_avx2(left, right, output);
        }
        return;
    }
    if std::is_x86_feature_detected!("sse2") {
        unsafe {
            intersect_sse2(left, right, output);
        }
        return;
    }
    intersect_scalar(left, right, output);
 }
 #[cfg(target_arch = "aarch64")]
 fn intersect_words(left: &[u64], right: &[u64], output: &mut [u64]) {
    unsafe {
        intersect_neon(left, right, output);
    }
 }
 #[cfg(not(any(target_arch = "x86", target_arch = "x86_64", target_arch = "aarch64")))]
 fn intersect_words(left: &[u64], right: &[u64], output: &mut [u64]) {
    intersect_scalar(left, right, output);
 }
 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
 #[target_feature(enable = "avx2")]
 unsafe fn intersect_avx2(left: &[u64], right: &[u64], output: &mut [u64]) {
    let chunk_len = 4;
    let simd_end = left.len() / chunk_len * chunk_len;
    let mut word_index = 0;
    while word_index < simd_end {
        let left_vector =
            unsafe { _mm256_loadu_si256(left.as_ptr().add(word_index) as *const __m256i) };
        let right_vector =
            unsafe { _mm256_loadu_si256(right.as_ptr().add(word_index) as *const __m256i) };
        let shared_vector = _mm256_and_si256(left_vector, right_vector);
        unsafe {
            _mm256_storeu_si256(
                output.as_mut_ptr().add(word_index) as *mut __m256i,
                shared_vector,
            )
        };
        word_index += chunk_len;
    }
    intersect_scalar(
        &left[simd_end..],
        &right[simd_end..],
        &mut output[simd_end..],
    );
 }
 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
 #[target_feature(enable = "sse2")]
 unsafe fn intersect_sse2(left: &[u64], right: &[u64], output: &mut [u64]) {
    let chunk_len = 2;
    let simd_end = left.len() / chunk_len * chunk_len;
    let mut word_index = 0;
    while word_index < simd_end {
        let left_vector =
            unsafe { _mm_loadu_si128(left.as_ptr().add(word_index) as *const __m128i) };
        let right_vector =
            unsafe { _mm_loadu_si128(right.as_ptr().add(word_index) as *const __m128i) };
        let shared_vector = _mm_and_si128(left_vector, right_vector);
        unsafe {
            _mm_storeu_si128(
                output.as_mut_ptr().add(word_index) as *mut __m128i,
                shared_vector,
            )
        };
        word_index += chunk_len;
    }
    intersect_scalar(
        &left[simd_end..],
        &right[simd_end..],
        &mut output[simd_end..],
    );
 }
 #[cfg(target_arch = "aarch64")]
 unsafe fn intersect_neon(left: &[u64], right: &[u64], output: &mut [u64]) {
    let chunk_len = 2;
    let simd_end = left.len() / chunk_len * chunk_len;
    let mut word_index = 0;
    while word_index < simd_end {
        let left_vector: uint64x2_t = unsafe { vld1q_u64(left.as_ptr().add(word_index)) };
        let right_vector: uint64x2_t = unsafe { vld1q_u64(right.as_ptr().add(word_index)) };
        let shared_vector = vandq_u64(left_vector, right_vector);
        unsafe { vst1q_u64(output.as_mut_ptr().add(word_index), shared_vector) };
        word_index += chunk_len;
    }
    intersect_scalar(
        &left[simd_end..],
        &right[simd_end..],
        &mut output[simd_end..],
    );
 }
 fn intersect_scalar(left: &[u64], right: &[u64], output: &mut [u64]) {
    for ((left_word, right_word), output_word) in left.iter().zip(right).zip(output.iter_mut()) {
        *output_word = left_word & right_word;
    }
 }
 fn push_shared_word(
    output: &mut Vec<u32>,
    mut shared: u64,
    word_index: usize,
    universe_len: usize,
 ) {
    while shared != 0 {
        let bit_index = shared.trailing_zeros() as usize;
        let value = word_index * u64::BITS as usize + bit_index;
        if value < universe_len {
            output.push(value as u32);
        }
        shared &= shared - 1;
    }
 }
--- a/src/algorithms/sorted_merge.rs
+++ b/src/algorithms/sorted_merge.rs
@@ -0,0 +1,66 @@
 use crate::algorithms::IntersectionAlgorithm;
 use crate::data::Order;
 pub struct SortedMergeAlgorithm;
 #[derive(Clone, Debug)]
 pub struct SortedVecSet {
    values: Vec<u32>,
 }
 impl IntersectionAlgorithm for SortedMergeAlgorithm {
    type Prepared = SortedVecSet;
    type Output = Vec<u32>;
    const NAME: &'static str = "sorted-merge";
    fn prepare(input: &[u32], _universe_len: usize, order: Order) -> Self::Prepared {
        let values = match order {
            Order::Ordered => input.to_vec(),
            Order::Unordered => {
                let mut values = input.to_vec();
                values.sort_unstable();
                values
            }
        };
        SortedVecSet { values }
    }
    fn create_output(left: &Self::Prepared, right: &Self::Prepared) -> Self::Output {
        Vec::with_capacity(left.values.len().min(right.values.len()))
    }
    fn clear_output(output: &mut Self::Output) {
        output.clear();
    }
    fn intersect_into(left: &Self::Prepared, right: &Self::Prepared, output: &mut Self::Output) {
        intersect_impl(&left.values, &right.values, output);
    }
    fn output_len(output: &Self::Output) -> usize {
        output.len()
    }
    fn output_values(output: &Self::Output) -> Vec<u32> {
        output.clone()
    }
 }
 fn intersect_impl(left: &[u32], right: &[u32], output: &mut Vec<u32>) {
    let mut left_index = 0;
    let mut right_index = 0;
    while left_index < left.len() && right_index < right.len() {
        match left[left_index].cmp(&right[right_index]) {
            std::cmp::Ordering::Less => left_index += 1,
            std::cmp::Ordering::Greater => right_index += 1,
            std::cmp::Ordering::Equal => {
                output.push(left[left_index]);
                left_index += 1;
                right_index += 1;
            }
        }
    }
 }
--- a/src/algorithms/std_hash.rs
+++ b/src/algorithms/std_hash.rs
@@ -0,0 +1,68 @@
 use std::collections::HashSet;
 use crate::algorithms::IntersectionAlgorithm;
 use crate::data::Order;
 pub struct StdHashAlgorithm;
 #[derive(Clone, Debug)]
 pub struct StdHashSet {
    values: HashSet<u32>,
 }
 #[derive(Clone, Debug)]
 pub struct StdHashIntersectionOutput {
    values: HashSet<u32>,
 }
 impl IntersectionAlgorithm for StdHashAlgorithm {
    type Prepared = StdHashSet;
    type Output = StdHashIntersectionOutput;
    const NAME: &'static str = "std-hash";
    fn prepare(input: &[u32], _universe_len: usize, _order: Order) -> Self::Prepared {
        let mut values = HashSet::with_capacity(input.len());
        values.extend(input.iter().copied());
        StdHashSet { values }
    }
    fn create_output(left: &Self::Prepared, right: &Self::Prepared) -> Self::Output {
        StdHashIntersectionOutput {
            values: HashSet::with_capacity(left.values.len().min(right.values.len())),
        }
    }
    fn clear_output(output: &mut Self::Output) {
        output.values.clear();
    }
    fn intersect_into(left: &Self::Prepared, right: &Self::Prepared, output: &mut Self::Output) {
        let (smaller, larger) = ordered_sets(&left.values, &right.values);
        for &value in smaller {
            if larger.contains(&value) {
                output.values.insert(value);
            }
        }
    }
    fn output_len(output: &Self::Output) -> usize {
        output.values.len()
    }
    fn output_values(output: &Self::Output) -> Vec<u32> {
        output.values.iter().copied().collect()
    }
 }
 fn ordered_sets<'a>(
    left: &'a HashSet<u32>,
    right: &'a HashSet<u32>,
 ) -> (&'a HashSet<u32>, &'a HashSet<u32>) {
    if left.len() <= right.len() {
        (left, right)
    } else {
        (right, left)
    }
 }
--- a/src/benchmark.rs
+++ b/src/benchmark.rs
@@ -0,0 +1,699 @@
 use std::hint::black_box;
 use std::time::{Duration, Instant};
 use crate::algorithms::IntersectionAlgorithm;
 use crate::algorithms::bitset::BitSetAlgorithm;
 use crate::algorithms::custom_hash::CustomHashAlgorithm;
 use crate::algorithms::simd_bitset::SimdBitSetAlgorithm;
 use crate::algorithms::sorted_merge::SortedMergeAlgorithm;
 use crate::algorithms::std_hash::StdHashAlgorithm;
 use crate::data::{DatasetConfig, DatasetPlan, Density, Order, Overlap, Scenario};
 use crate::settings::{OutputFormat, settings};
 #[derive(Clone, Debug)]
 pub struct MeasurementOptions {
    pub warmup_runs: usize,
    pub min_samples: usize,
    pub max_samples: usize,
    pub target_total: Duration,
    pub include_prepare_input_generation: bool,
    pub include_intersection_output_clear: bool,
    pub include_intersection_result_count: bool,
 }
 impl Default for MeasurementOptions {
    fn default() -> Self {
        let runtime = settings();
        Self {
            warmup_runs: 1,
            min_samples: runtime.benchmark_min_samples,
            max_samples: runtime.benchmark_max_samples,
            target_total: Duration::from_millis(runtime.benchmark_target_total_ms),
            include_prepare_input_generation: runtime.time_prepare_include_input_generation,
            include_intersection_output_clear: runtime.time_intersection_include_output_clear,
            include_intersection_result_count: runtime.time_intersection_include_result_count,
        }
    }
 }
 impl MeasurementOptions {
    pub fn smoke() -> Self {
        Self {
            warmup_runs: 0,
            min_samples: 1,
            max_samples: 1,
            target_total: Duration::ZERO,
            include_prepare_input_generation: false,
            include_intersection_output_clear: false,
            include_intersection_result_count: false,
        }
    }
 }
 #[derive(Clone, Debug)]
 pub struct BenchmarkConfig {
    pub dataset: DatasetConfig,
    pub measurement: MeasurementOptions,
 }
 impl Default for BenchmarkConfig {
    fn default() -> Self {
        Self {
            dataset: DatasetConfig::default(),
            measurement: MeasurementOptions::default(),
        }
    }
 }
 impl BenchmarkConfig {
    pub fn smoke() -> Self {
        Self {
            dataset: DatasetConfig::smoke(),
            measurement: MeasurementOptions::smoke(),
        }
    }
 }
 #[derive(Clone, Copy, Debug, Eq, PartialEq)]
 pub enum BenchmarkPhase {
    Prepare,
    IntersectNativeOutput,
 }
 impl BenchmarkPhase {
    fn label(self) -> &'static str {
        match self {
            Self::Prepare => "prepare",
            Self::IntersectNativeOutput => "native",
        }
    }
 }
 #[derive(Clone, Debug)]
 pub struct MeasurementStats {
    pub samples: usize,
    pub mean: Duration,
    pub median: Duration,
    pub min: Duration,
    pub max: Duration,
 }
 impl MeasurementStats {
    fn from_samples(samples: Vec<Duration>) -> Self {
        assert!(!samples.is_empty(), "at least one sample is required");
        let mut sorted = samples.clone();
        sorted.sort_unstable();
        let total_nanos: u128 = samples.iter().map(|sample| sample.as_nanos()).sum();
        let mean = duration_from_nanos(total_nanos / samples.len() as u128);
        let median = if sorted.len() % 2 == 1 {
            sorted[sorted.len() / 2]
        } else {
            let middle = sorted.len() / 2;
            duration_from_nanos((sorted[middle - 1].as_nanos() + sorted[middle].as_nanos()) / 2)
        };
        Self {
            samples: samples.len(),
            mean,
            median,
            min: sorted[0],
            max: *sorted.last().expect("sorted is non-empty"),
        }
    }
 }
 #[derive(Clone, Debug)]
 pub struct BenchmarkRecord {
    pub scenario: Scenario,
    pub universe_len: usize,
    pub set_len: usize,
    pub set_population_percent: f64,
    pub requested_overlap: usize,
    pub actual_overlap: usize,
    pub target_overlap_percent: f64,
    pub actual_overlap_percent: f64,
    pub algorithm: &'static str,
    pub phase: BenchmarkPhase,
    pub stats: MeasurementStats,
 }
 pub fn run() {
    run_with_config(BenchmarkConfig::default());
 }
 pub fn run_with_config(config: BenchmarkConfig) {
    let results = collect_results(&config);
    if results.is_empty() {
        print_no_results(settings().output_format);
        return;
    }
    match settings().output_format {
        OutputFormat::Normal => print_normal_report(&config, &results),
        OutputFormat::Markdown => print_markdown_report(&config, &results),
    }
 }
 pub fn collect_results(config: &BenchmarkConfig) -> Vec<BenchmarkRecord> {
    let runtime = settings();
    let mut results = Vec::with_capacity(
        Scenario::all().len() * runtime.enabled_algorithm_count() * runtime.enabled_phase_count(),
    );
    for scenario in Scenario::all() {
        let plan = config.dataset.plan(scenario);
        if runtime.enable_bitset {
            benchmark_algorithm::<BitSetAlgorithm>(&plan, &config.measurement, &mut results);
        }
        if runtime.enable_simd_bitset {
            benchmark_algorithm::<SimdBitSetAlgorithm>(&plan, &config.measurement, &mut results);
        }
        if runtime.enable_std_hash {
            benchmark_algorithm::<StdHashAlgorithm>(&plan, &config.measurement, &mut results);
        }
        if runtime.enable_custom_hash {
            benchmark_algorithm::<CustomHashAlgorithm>(&plan, &config.measurement, &mut results);
        }
        if runtime.enable_sorted_merge {
            benchmark_algorithm::<SortedMergeAlgorithm>(&plan, &config.measurement, &mut results);
        }
    }
    results
 }
 fn benchmark_algorithm<A>(
    plan: &DatasetPlan,
    measurement: &MeasurementOptions,
    output: &mut Vec<BenchmarkRecord>,
 ) where
    A: IntersectionAlgorithm,
 {
    if settings().enable_prepare_phase {
        let prepare_stats = measure_prepare::<A>(plan, measurement);
        output.push(build_record::<A>(
            plan,
            BenchmarkPhase::Prepare,
            prepare_stats,
        ));
    }
    let (left, right) = prepare_pair::<A>(plan);
    if settings().enable_intersection_phase {
        let materialized_stats = measure_native_output::<A>(&left, &right, measurement);
        output.push(build_record::<A>(
            plan,
            BenchmarkPhase::IntersectNativeOutput,
            materialized_stats,
        ));
    }
 }
 fn build_record<A>(
    plan: &DatasetPlan,
    phase: BenchmarkPhase,
    stats: MeasurementStats,
 ) -> BenchmarkRecord
 where
    A: IntersectionAlgorithm,
 {
    BenchmarkRecord {
        scenario: plan.scenario,
        universe_len: plan.universe_len,
        set_len: plan.set_len,
        set_population_percent: population_percent(plan.set_len, plan.universe_len),
        requested_overlap: plan.requested_overlap,
        actual_overlap: plan.actual_overlap,
        target_overlap_percent: plan.target_overlap_percent,
        actual_overlap_percent: plan.actual_overlap_percent(),
        algorithm: A::NAME,
        phase,
        stats,
    }
 }
 fn measure_prepare<A>(plan: &DatasetPlan, measurement: &MeasurementOptions) -> MeasurementStats
 where
    A: IntersectionAlgorithm,
 {
    for _ in 0..measurement.warmup_runs {
        let (left, right) = prepare_pair::<A>(plan);
        black_box(&left);
        black_box(&right);
        drop(left);
        drop(right);
    }
    let mut samples = Vec::new();
    let mut total = Duration::ZERO;
    while samples.len() < measurement.min_samples
        || (samples.len() < measurement.max_samples && total < measurement.target_total)
    {
        let elapsed = if measurement.include_prepare_input_generation {
            let start = Instant::now();
            let (left, right) = prepare_pair::<A>(plan);
            let elapsed = start.elapsed();
            black_box(&left);
            black_box(&right);
            drop(left);
            drop(right);
            elapsed
        } else {
            let left_raw = plan.generate_left();
            let left_start = Instant::now();
            let left = A::prepare(&left_raw, plan.universe_len, plan.scenario.order);
            let left_elapsed = left_start.elapsed();
            black_box(&left);
            drop(left_raw);
            let right_raw = plan.generate_right();
            let right_start = Instant::now();
            let right = A::prepare(&right_raw, plan.universe_len, plan.scenario.order);
            let right_elapsed = right_start.elapsed();
            black_box(&right);
            drop(right_raw);
            let elapsed = left_elapsed + right_elapsed;
            drop(left);
            drop(right);
            elapsed
        };
        total += elapsed;
        samples.push(elapsed);
    }
    MeasurementStats::from_samples(samples)
 }
 fn prepare_pair<A>(plan: &DatasetPlan) -> (A::Prepared, A::Prepared)
 where
    A: IntersectionAlgorithm,
 {
    let left_raw = plan.generate_left();
    let left = A::prepare(&left_raw, plan.universe_len, plan.scenario.order);
    drop(left_raw);
    let right_raw = plan.generate_right();
    let right = A::prepare(&right_raw, plan.universe_len, plan.scenario.order);
    drop(right_raw);
    (left, right)
 }
 fn measure_native_output<A>(
    left: &A::Prepared,
    right: &A::Prepared,
    measurement: &MeasurementOptions,
 ) -> MeasurementStats
 where
    A: IntersectionAlgorithm,
 {
    let mut output = A::create_output(left, right);
    for _ in 0..measurement.warmup_runs {
        A::clear_output(&mut output);
        A::intersect_into(left, right, &mut output);
        let count = A::output_len(&output);
        black_box(&output);
        black_box(count);
    }
    let mut samples = Vec::new();
    let mut total = Duration::ZERO;
    while samples.len() < measurement.min_samples
        || (samples.len() < measurement.max_samples && total < measurement.target_total)
    {
        if !measurement.include_intersection_output_clear {
            A::clear_output(&mut output);
        }
        let start = Instant::now();
        if measurement.include_intersection_output_clear {
            A::clear_output(&mut output);
        }
        A::intersect_into(left, right, &mut output);
        let elapsed = if measurement.include_intersection_result_count {
            let count = A::output_len(&output);
            black_box(count);
            start.elapsed()
        } else {
            start.elapsed()
        };
        black_box(&output);
        if !measurement.include_intersection_result_count {
            let count = A::output_len(&output);
            black_box(count);
        }
        total += elapsed;
        samples.push(elapsed);
    }
    MeasurementStats::from_samples(samples)
 }
 fn print_no_results(output_format: OutputFormat) {
    match output_format {
        OutputFormat::Normal => println!(
            "No benchmark records were generated. Enable at least one algorithm and one phase in .env."
        ),
        OutputFormat::Markdown => println!(
            "No benchmark records were generated. Enable at least one algorithm and one phase in `.env`."
        ),
    }
 }
 fn print_normal_report(config: &BenchmarkConfig, results: &[BenchmarkRecord]) {
    let runtime = settings();
    println!("Intersection benchmark suite");
    println!("Scenarios: {}", Scenario::all().len());
    println!(
        "Universe: 0..={} ({} values)",
        runtime.max_value,
        runtime.universe_len()
    );
    println!(
        "Set populations: sparse={} ({}) semi-sparse={} ({}) normal={} ({}) dense={} ({})",
        config.dataset.sparse_size,
        format_percent(population_percent(
            config.dataset.sparse_size,
            config.dataset.universe_len,
        )),
        config.dataset.semi_sparse_size,
        format_percent(population_percent(
            config.dataset.semi_sparse_size,
            config.dataset.universe_len,
        )),
        config.dataset.normal_size,
        format_percent(population_percent(
            config.dataset.normal_size,
            config.dataset.universe_len,
        )),
        config.dataset.dense_size,
        format_percent(population_percent(
            config.dataset.dense_size,
            config.dataset.universe_len,
        ))
    );
    println!(
        "Overlap targets: low={} medium={} high={}",
        format_percent(runtime.low_overlap_percent as f64),
        format_percent(runtime.medium_overlap_percent as f64),
        format_percent(runtime.high_overlap_percent as f64)
    );
    println!(
        "Enabled densities: sparse={} semi-sparse={} normal={} dense={}",
        runtime.enable_sparse_scenario,
        runtime.enable_semi_sparse_scenario,
        runtime.enable_normal_scenario,
        runtime.enable_dense_scenario
    );
    println!(
        "Enabled overlaps: low={} medium={} high={}",
        runtime.enable_low_overlap, runtime.enable_medium_overlap, runtime.enable_high_overlap
    );
    println!(
        "Sampling: min={} max={} target_total={}ms",
        config.measurement.min_samples,
        config.measurement.max_samples,
        config.measurement.target_total.as_millis()
    );
    println!(
        "Enabled algorithms: bitset={} bitset-simd={} std-hash={} splitmix-hash={} sorted-merge={}",
        runtime.enable_bitset,
        runtime.enable_simd_bitset,
        runtime.enable_std_hash,
        runtime.enable_custom_hash,
        runtime.enable_sorted_merge
    );
    println!(
        "Enabled phases: prepare={} intersection={}",
        runtime.enable_prepare_phase, runtime.enable_intersection_phase
    );
    println!(
        "Timed extras: prepare_input_generation={} intersection_output_clear={} intersection_result_count={}",
        config.measurement.include_prepare_input_generation,
        config.measurement.include_intersection_output_clear,
        config.measurement.include_intersection_result_count
    );
    println!(
        "Phases per algorithm: prepare={} native_output_intersection={}",
        runtime.enable_prepare_phase, runtime.enable_intersection_phase
    );
    println!();
    print_normal_results(results);
 }
 fn print_normal_results(results: &[BenchmarkRecord]) {
    let mut current_scenario = None;
    for record in results {
        if current_scenario != Some(record.scenario) {
            if current_scenario.is_some() {
                println!();
            }
            current_scenario = Some(record.scenario);
            println!("{}", "-".repeat(96));
            println!(
                "Scenario: {} | {} = {} of universe | {} = {} of each set",
                describe_order(record.scenario.order),
                describe_density(record.scenario.density),
                format_percent(record.set_population_percent),
                describe_overlap(record.scenario.overlap),
                format_percent(record.target_overlap_percent)
            );
            println!(
                "          set population: {} / {} values",
                record.set_len, record.universe_len
            );
            println!(
                "          overlap: requested={} actual={} shared={}/{}{}",
                format_percent(record.target_overlap_percent),
                format_percent(record.actual_overlap_percent),
                record.actual_overlap,
                record.set_len,
                if record.actual_overlap != record.requested_overlap {
                    " adjusted-for-universe"
                } else {
                    ""
                }
            );
            println!(
                "{:<14} {:<10} {:>7} {:>12} {:>12} {:>12} {:>12}",
                "algorithm", "phase", "samples", "mean", "median", "min", "max"
            );
        }
        println!(
            "{:<14} {:<10} {:>7} {:>12} {:>12} {:>12} {:>12}",
            record.algorithm,
            record.phase.label(),
            record.stats.samples,
            format_duration(record.stats.mean),
            format_duration(record.stats.median),
            format_duration(record.stats.min),
            format_duration(record.stats.max)
        );
    }
 }
 fn print_markdown_report(config: &BenchmarkConfig, results: &[BenchmarkRecord]) {
    let runtime = settings();
    println!("# Intersection benchmark suite");
    println!();
    println!("- Scenarios: {}", Scenario::all().len());
    println!(
        "- Universe: `0..={}` (`{}` values)",
        runtime.max_value,
        runtime.universe_len()
    );
    println!(
        "- Set populations: sparse=`{}` ({}) | semi-sparse=`{}` ({}) | normal=`{}` ({}) | dense=`{}` ({})",
        config.dataset.sparse_size,
        format_percent(population_percent(
            config.dataset.sparse_size,
            config.dataset.universe_len,
        )),
        config.dataset.semi_sparse_size,
        format_percent(population_percent(
            config.dataset.semi_sparse_size,
            config.dataset.universe_len,
        )),
        config.dataset.normal_size,
        format_percent(population_percent(
            config.dataset.normal_size,
            config.dataset.universe_len,
        )),
        config.dataset.dense_size,
        format_percent(population_percent(
            config.dataset.dense_size,
            config.dataset.universe_len,
        ))
    );
    println!(
        "- Overlap targets: low={} | medium={} | high={}",
        format_percent(runtime.low_overlap_percent as f64),
        format_percent(runtime.medium_overlap_percent as f64),
        format_percent(runtime.high_overlap_percent as f64)
    );
    println!(
        "- Enabled densities: sparse=`{}` | semi-sparse=`{}` | normal=`{}` | dense=`{}`",
        runtime.enable_sparse_scenario,
        runtime.enable_semi_sparse_scenario,
        runtime.enable_normal_scenario,
        runtime.enable_dense_scenario
    );
    println!(
        "- Enabled overlaps: low=`{}` | medium=`{}` | high=`{}`",
        runtime.enable_low_overlap, runtime.enable_medium_overlap, runtime.enable_high_overlap
    );
    println!(
        "- Sampling: min=`{}` | max=`{}` | target total=`{}ms`",
        config.measurement.min_samples,
        config.measurement.max_samples,
        config.measurement.target_total.as_millis()
    );
    println!(
        "- Enabled algorithms: bitset=`{}` | bitset-simd=`{}` | std-hash=`{}` | splitmix-hash=`{}` | sorted-merge=`{}`",
        runtime.enable_bitset,
        runtime.enable_simd_bitset,
        runtime.enable_std_hash,
        runtime.enable_custom_hash,
        runtime.enable_sorted_merge
    );
    println!(
        "- Enabled phases: prepare=`{}` | intersection=`{}`",
        runtime.enable_prepare_phase, runtime.enable_intersection_phase
    );
    println!(
        "- Timed extras: prepare input generation=`{}` | intersection output clear=`{}` | intersection result count=`{}`",
        config.measurement.include_prepare_input_generation,
        config.measurement.include_intersection_output_clear,
        config.measurement.include_intersection_result_count
    );
    println!(
        "- Phases per algorithm: prepare=`{}` | native output intersection=`{}`",
        runtime.enable_prepare_phase, runtime.enable_intersection_phase
    );
    println!();
    let mut current_scenario = None;
    for record in results {
        if current_scenario != Some(record.scenario) {
            if current_scenario.is_some() {
                println!();
            }
            current_scenario = Some(record.scenario);
            println!(
                "## Scenario: {} | {} = {} of universe | {} = {} of each set",
                describe_order(record.scenario.order),
                describe_density(record.scenario.density),
                format_percent(record.set_population_percent),
                describe_overlap(record.scenario.overlap),
                format_percent(record.target_overlap_percent)
            );
            println!();
            println!(
                "- Set population: `{}` / `{}` values",
                record.set_len, record.universe_len
            );
            println!(
                "- Overlap: requested={} | actual={} | shared=`{}/{}`{}",
                format_percent(record.target_overlap_percent),
                format_percent(record.actual_overlap_percent),
                record.actual_overlap,
                record.set_len,
                if record.actual_overlap != record.requested_overlap {
                    " | adjusted for universe"
                } else {
                    ""
                }
            );
            println!();
            println!("| algorithm | phase | samples | mean | median | min | max |");
            println!("| --- | --- | ---: | ---: | ---: | ---: | ---: |");
        }
        println!(
            "| {} | {} | {} | {} | {} | {} | {} |",
            record.algorithm,
            record.phase.label(),
            record.stats.samples,
            format_duration(record.stats.mean),
            format_duration(record.stats.median),
            format_duration(record.stats.min),
            format_duration(record.stats.max)
        );
    }
 }
 fn describe_order(order: Order) -> &'static str {
    match order {
        Order::Ordered => "ordered input",
        Order::Unordered => "unordered input",
    }
 }
 fn describe_density(density: Density) -> &'static str {
    match density {
        Density::Sparse => "sparse set population",
        Density::SemiSparse => "semi-sparse set population",
        Density::Normal => "normal set population",
        Density::Dense => "dense set population",
    }
 }
 fn describe_overlap(overlap: Overlap) -> &'static str {
    match overlap {
        Overlap::Low => "low overlap percentage",
        Overlap::Medium => "medium overlap percentage",
        Overlap::High => "high overlap percentage",
    }
 }
 fn population_percent(set_len: usize, universe_len: usize) -> f64 {
    if universe_len == 0 {
        0.0
    } else {
        (set_len as f64 / universe_len as f64) * 100.0
    }
 }
 fn duration_from_nanos(nanos: u128) -> Duration {
    let seconds = nanos / 1_000_000_000;
    let subsec_nanos = (nanos % 1_000_000_000) as u32;
    Duration::new(seconds as u64, subsec_nanos)
 }
 fn format_percent(value: f64) -> String {
    if value >= 10.0 {
        format!("{value:.1}%")
    } else if value >= 1.0 {
        format!("{value:.3}%")
    } else {
        format!("{value:.4}%")
    }
 }
 fn format_duration(duration: Duration) -> String {
    let seconds = duration.as_secs_f64();
    if seconds >= 1.0 {
        format!("{seconds:.3}s")
    } else if seconds >= 0.001 {
        format!("{:.3}ms", seconds * 1_000.0)
    } else if seconds >= 0.000_001 {
        format!("{:.3}us", seconds * 1_000_000.0)
    } else {
        format!("{:.0}ns", seconds * 1_000_000_000.0)
    }
 }
--- a/src/data.rs
+++ b/src/data.rs
@@ -0,0 +1,402 @@
 use std::fmt;
 use crate::settings::settings;
 #[derive(Clone, Copy, Debug, Eq, PartialEq, Hash)]
 pub enum Order {
    Ordered,
    Unordered,
 }
 impl Order {
    pub const ALL: [Self; 2] = [Self::Ordered, Self::Unordered];
 }
 impl fmt::Display for Order {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        match self {
            Self::Ordered => write!(f, "ordered"),
            Self::Unordered => write!(f, "unordered"),
        }
    }
 }
 #[derive(Clone, Copy, Debug, Eq, PartialEq, Hash)]
 pub enum Density {
    Sparse,
    SemiSparse,
    Normal,
    Dense,
 }
 impl Density {
    pub const ALL: [Self; 4] = [Self::Sparse, Self::SemiSparse, Self::Normal, Self::Dense];
 }
 impl fmt::Display for Density {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        match self {
            Self::Sparse => write!(f, "sparse"),
            Self::SemiSparse => write!(f, "semi-sparse"),
            Self::Normal => write!(f, "normal"),
            Self::Dense => write!(f, "dense"),
        }
    }
 }
 #[derive(Clone, Copy, Debug, Eq, PartialEq, Hash)]
 pub enum Overlap {
    Low,
    Medium,
    High,
 }
 impl Overlap {
    pub const ALL: [Self; 3] = [Self::Low, Self::Medium, Self::High];
 }
 impl fmt::Display for Overlap {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        match self {
            Self::Low => write!(f, "low"),
            Self::Medium => write!(f, "medium"),
            Self::High => write!(f, "high"),
        }
    }
 }
 #[derive(Clone, Copy, Debug, Eq, PartialEq, Hash)]
 pub struct Scenario {
    pub order: Order,
    pub density: Density,
    pub overlap: Overlap,
 }
 impl Scenario {
    pub fn all() -> Vec<Self> {
        let runtime = settings();
        let mut scenarios =
            Vec::with_capacity(Order::ALL.len() * Density::ALL.len() * Overlap::ALL.len());
        for order in Order::ALL {
            for density in Density::ALL {
                if !runtime.density_enabled(density) {
                    continue;
                }
                for overlap in Overlap::ALL {
                    if !runtime.overlap_enabled(overlap) {
                        continue;
                    }
                    scenarios.push(Self {
                        order,
                        density,
                        overlap,
                    });
                }
            }
        }
        scenarios
    }
 }
 impl fmt::Display for Scenario {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        write!(f, "{}/{}/{}", self.order, self.density, self.overlap)
    }
 }
 #[derive(Clone, Debug, Eq, PartialEq)]
 pub struct RawPair {
    pub left: Vec<u32>,
    pub right: Vec<u32>,
 }
 #[derive(Clone, Copy, Debug)]
 struct Ratio {
    numerator: usize,
    denominator: usize,
 }
 impl Ratio {
    const fn new(numerator: usize, denominator: usize) -> Self {
        Self {
            numerator,
            denominator,
        }
    }
    fn from_percent(percent: usize) -> Self {
        Self::new(percent, 100)
    }
    fn apply(self, value: usize) -> usize {
        value.saturating_mul(self.numerator) / self.denominator
    }
    fn as_percentage(self) -> f64 {
        (self.numerator as f64 / self.denominator as f64) * 100.0
    }
 }
 #[derive(Clone, Debug)]
 pub struct DatasetConfig {
    pub universe_len: usize,
    pub sparse_size: usize,
    pub semi_sparse_size: usize,
    pub normal_size: usize,
    pub dense_size: usize,
    low_overlap: Ratio,
    medium_overlap: Ratio,
    high_overlap: Ratio,
 }
 impl Default for DatasetConfig {
    fn default() -> Self {
        let settings = settings();
        let universe_len = settings.universe_len();
        Self {
            universe_len,
            sparse_size: percentage_of(universe_len, settings.sparse_set_percent),
            semi_sparse_size: percentage_of(universe_len, settings.semi_sparse_set_percent),
            normal_size: percentage_of(universe_len, settings.normal_set_percent),
            dense_size: percentage_of(universe_len, settings.dense_set_percent),
            low_overlap: Ratio::from_percent(settings.low_overlap_percent),
            medium_overlap: Ratio::from_percent(settings.medium_overlap_percent),
            high_overlap: Ratio::from_percent(settings.high_overlap_percent),
        }
    }
 }
 impl DatasetConfig {
    pub fn smoke() -> Self {
        Self {
            universe_len: 101,
            sparse_size: 10,
            semi_sparse_size: 25,
            normal_size: 50,
            dense_size: 90,
            low_overlap: Ratio::new(1, 10),
            medium_overlap: Ratio::new(1, 2),
            high_overlap: Ratio::new(9, 10),
        }
    }
    pub fn plan(&self, scenario: Scenario) -> DatasetPlan {
        let set_len = self.set_size_for(scenario.density);
        assert!(
            set_len <= self.universe_len,
            "set size {set_len} exceeds universe {}",
            self.universe_len
        );
        let requested_overlap = self.overlap_ratio_for(scenario.overlap).apply(set_len);
        let minimum_overlap = set_len.saturating_mul(2).saturating_sub(self.universe_len);
        let actual_overlap = requested_overlap.max(minimum_overlap).min(set_len);
        let left_only = set_len - actual_overlap;
        let right_only = set_len - actual_overlap;
        let total_unique = actual_overlap + left_only + right_only;
        assert!(
            total_unique <= self.universe_len,
            "scenario {scenario} cannot fit inside the configured universe"
        );
        let scenario_id = scenario_id(scenario);
        let modulus = self.universe_len as u64;
        let multiplier = choose_coprime_multiplier(modulus, scenario_id);
        let addend = if modulus == 0 {
            0
        } else {
            (scenario_id.wrapping_mul(0x9E37_79B9_7F4A_7C15) + 17) % modulus
        };
        DatasetPlan {
            scenario,
            universe_len: self.universe_len,
            set_len,
            requested_overlap,
            actual_overlap,
            left_only,
            right_only,
            target_overlap_percent: self.overlap_ratio_for(scenario.overlap).as_percentage(),
            multiplier,
            addend,
            left_shuffle_seed: scenario_id ^ 0xA5A5_A5A5_DEAD_BEEF,
            right_shuffle_seed: scenario_id ^ 0x5A5A_5A5A_CAFE_BABE,
        }
    }
    fn set_size_for(&self, density: Density) -> usize {
        match density {
            Density::Sparse => self.sparse_size,
            Density::SemiSparse => self.semi_sparse_size,
            Density::Normal => self.normal_size,
            Density::Dense => self.dense_size,
        }
    }
    fn overlap_ratio_for(&self, overlap: Overlap) -> Ratio {
        match overlap {
            Overlap::Low => self.low_overlap,
            Overlap::Medium => self.medium_overlap,
            Overlap::High => self.high_overlap,
        }
    }
 }
 #[derive(Clone, Debug)]
 pub struct DatasetPlan {
    pub scenario: Scenario,
    pub universe_len: usize,
    pub set_len: usize,
    pub requested_overlap: usize,
    pub actual_overlap: usize,
    pub left_only: usize,
    pub right_only: usize,
    pub target_overlap_percent: f64,
    multiplier: u64,
    addend: u64,
    left_shuffle_seed: u64,
    right_shuffle_seed: u64,
 }
 impl DatasetPlan {
    pub fn generate_left(&self) -> Vec<u32> {
        let mut values = Vec::with_capacity(self.set_len);
        self.extend_segment(&mut values, 0, self.actual_overlap);
        self.extend_segment(&mut values, self.actual_overlap, self.left_only);
        self.finish(values, self.left_shuffle_seed)
    }
    pub fn generate_right(&self) -> Vec<u32> {
        let mut values = Vec::with_capacity(self.set_len);
        self.extend_segment(&mut values, 0, self.actual_overlap);
        self.extend_segment(
            &mut values,
            self.actual_overlap + self.left_only,
            self.right_only,
        );
        self.finish(values, self.right_shuffle_seed)
    }
    pub fn generate_pair(&self) -> RawPair {
        RawPair {
            left: self.generate_left(),
            right: self.generate_right(),
        }
    }
    pub fn actual_overlap_percent(&self) -> f64 {
        if self.set_len == 0 {
            0.0
        } else {
            (self.actual_overlap as f64 / self.set_len as f64) * 100.0
        }
    }
    pub fn overlap_was_adjusted(&self) -> bool {
        self.actual_overlap != self.requested_overlap
    }
    fn extend_segment(&self, values: &mut Vec<u32>, start: usize, len: usize) {
        for index in start..start + len {
            values.push(self.permute_index(index));
        }
    }
    fn permute_index(&self, index: usize) -> u32 {
        let modulus = self.universe_len as u64;
        (((self.multiplier * index as u64) + self.addend) % modulus) as u32
    }
    fn finish(&self, mut values: Vec<u32>, seed: u64) -> Vec<u32> {
        match self.scenario.order {
            Order::Ordered => values.sort_unstable(),
            Order::Unordered => shuffle(&mut values, seed),
        }
        values
    }
 }
 fn scenario_id(scenario: Scenario) -> u64 {
    let order = match scenario.order {
        Order::Ordered => 1_u64,
        Order::Unordered => 2_u64,
    };
    let density = match scenario.density {
        Density::Sparse => 3_u64,
        Density::SemiSparse => 5_u64,
        Density::Normal => 7_u64,
        Density::Dense => 11_u64,
    };
    let overlap = match scenario.overlap {
        Overlap::Low => 11_u64,
        Overlap::Medium => 13_u64,
        Overlap::High => 17_u64,
    };
    order * 1_000 + density * 100 + overlap
 }
 fn choose_coprime_multiplier(modulus: u64, scenario_id: u64) -> u64 {
    if modulus <= 1 {
        return 1;
    }
    let mut candidate = (scenario_id % (modulus - 1)).saturating_add(1);
    while gcd(candidate, modulus) != 1 {
        candidate += 1;
        if candidate >= modulus {
            candidate = 1;
        }
    }
    candidate
 }
 fn gcd(mut left: u64, mut right: u64) -> u64 {
    while right != 0 {
        let next = left % right;
        left = right;
        right = next;
    }
    left
 }
 fn shuffle(values: &mut [u32], seed: u64) {
    let mut rng = SplitMix64::new(seed);
    for index in (1..values.len()).rev() {
        let swap_index = (rng.next_u64() % (index as u64 + 1)) as usize;
        values.swap(index, swap_index);
    }
 }
 fn percentage_of(total: usize, percent: f64) -> usize {
    ((total as f64) * (percent / 100.0)).floor() as usize
 }
 #[derive(Clone, Debug)]
 struct SplitMix64 {
    state: u64,
 }
 impl SplitMix64 {
    fn new(seed: u64) -> Self {
        Self { state: seed }
    }
    fn next_u64(&mut self) -> u64 {
        self.state = self.state.wrapping_add(0x9E37_79B9_7F4A_7C15);
        let mut value = self.state;
        value = (value ^ (value >> 30)).wrapping_mul(0xBF58_476D_1CE4_E5B9);
        value = (value ^ (value >> 27)).wrapping_mul(0x94D0_49BB_1331_11EB);
        value ^ (value >> 31)
    }
 }
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -0,0 +1,7 @@
 pub mod algorithms;
 pub mod benchmark;
 pub mod data;
 pub mod settings;
 #[cfg(test)]
 mod tests;
--- a/src/main.rs
+++ b/src/main.rs
@@ -0,0 +1,3 @@
 fn main() {
    intersection_benchmark::benchmark::run();
 }
--- a/src/settings.rs
+++ b/src/settings.rs
@@ -0,0 +1,268 @@
 use std::collections::HashMap;
 use std::env;
 use std::fs;
 use std::path::Path;
 use std::sync::OnceLock;
 static SETTINGS: OnceLock<BenchmarkSettings> = OnceLock::new();
 #[derive(Clone, Copy, Debug, Eq, PartialEq)]
 pub enum OutputFormat {
    Normal,
    Markdown,
 }
 impl std::str::FromStr for OutputFormat {
    type Err = String;
    fn from_str(value: &str) -> Result<Self, Self::Err> {
        match value.trim().to_ascii_lowercase().as_str() {
            "normal" => Ok(Self::Normal),
            "markdown" => Ok(Self::Markdown),
            _ => Err("expected normal or markdown".to_string()),
        }
    }
 }
 #[derive(Clone, Debug)]
 pub struct BenchmarkSettings {
    pub max_value: u32,
    pub sparse_set_percent: f64,
    pub semi_sparse_set_percent: f64,
    pub normal_set_percent: f64,
    pub dense_set_percent: f64,
    pub low_overlap_percent: usize,
    pub medium_overlap_percent: usize,
    pub high_overlap_percent: usize,
    pub enable_sparse_scenario: bool,
    pub enable_semi_sparse_scenario: bool,
    pub enable_normal_scenario: bool,
    pub enable_dense_scenario: bool,
    pub enable_low_overlap: bool,
    pub enable_medium_overlap: bool,
    pub enable_high_overlap: bool,
    pub benchmark_min_samples: usize,
    pub benchmark_max_samples: usize,
    pub benchmark_target_total_ms: u64,
    pub enable_bitset: bool,
    pub enable_simd_bitset: bool,
    pub enable_std_hash: bool,
    pub enable_custom_hash: bool,
    pub enable_sorted_merge: bool,
    pub enable_prepare_phase: bool,
    pub enable_intersection_phase: bool,
    pub output_format: OutputFormat,
    pub time_prepare_include_input_generation: bool,
    pub time_intersection_include_output_clear: bool,
    pub time_intersection_include_result_count: bool,
 }
 impl BenchmarkSettings {
    pub fn universe_len(&self) -> usize {
        self.max_value as usize + 1
    }
    pub fn enabled_algorithm_count(&self) -> usize {
        [
            self.enable_bitset,
            self.enable_simd_bitset,
            self.enable_std_hash,
            self.enable_custom_hash,
            self.enable_sorted_merge,
        ]
        .into_iter()
        .filter(|enabled| *enabled)
        .count()
    }
    pub fn enabled_phase_count(&self) -> usize {
        [self.enable_prepare_phase, self.enable_intersection_phase]
            .into_iter()
            .filter(|enabled| *enabled)
            .count()
    }
    pub fn density_enabled(&self, density: crate::data::Density) -> bool {
        match density {
            crate::data::Density::Sparse => self.enable_sparse_scenario,
            crate::data::Density::SemiSparse => self.enable_semi_sparse_scenario,
            crate::data::Density::Normal => self.enable_normal_scenario,
            crate::data::Density::Dense => self.enable_dense_scenario,
        }
    }
    pub fn overlap_enabled(&self, overlap: crate::data::Overlap) -> bool {
        match overlap {
            crate::data::Overlap::Low => self.enable_low_overlap,
            crate::data::Overlap::Medium => self.enable_medium_overlap,
            crate::data::Overlap::High => self.enable_high_overlap,
        }
    }
    fn validate(&self) {
        assert!(
            self.max_value >= 1,
            "MAX_VALUE must be at least 1 so the benchmark universe is non-trivial"
        );
        assert!(
            self.benchmark_min_samples >= 1,
            "BENCHMARK_MIN_SAMPLES must be at least 1"
        );
        assert!(
            self.benchmark_max_samples >= self.benchmark_min_samples,
            "BENCHMARK_MAX_SAMPLES must be greater than or equal to BENCHMARK_MIN_SAMPLES"
        );
        assert!(
            self.enable_sparse_scenario
                || self.enable_semi_sparse_scenario
                || self.enable_normal_scenario
                || self.enable_dense_scenario,
            "Enable at least one density scenario in .env"
        );
        assert!(
            self.enable_low_overlap || self.enable_medium_overlap || self.enable_high_overlap,
            "Enable at least one overlap scenario in .env"
        );
        for (name, percent) in [
            ("SPARSE_SET_PERCENT", self.sparse_set_percent),
            ("SEMI_SPARSE_SET_PERCENT", self.semi_sparse_set_percent),
            ("NORMAL_SET_PERCENT", self.normal_set_percent),
            ("DENSE_SET_PERCENT", self.dense_set_percent),
        ] {
            assert!(
                percent.is_finite() && (0.0..=100.0).contains(&percent),
                "{name} must be between 0 and 100"
            );
        }
        for (name, percent) in [
            ("LOW_OVERLAP_PERCENT", self.low_overlap_percent),
            ("MEDIUM_OVERLAP_PERCENT", self.medium_overlap_percent),
            ("HIGH_OVERLAP_PERCENT", self.high_overlap_percent),
        ] {
            assert!(percent <= 100, "{name} must be between 0 and 100");
        }
    }
    fn load() -> Self {
        let env_file_values = read_env_file(".env");
        let settings = Self {
            max_value: read_required_parsed("MAX_VALUE", &env_file_values),
            sparse_set_percent: read_required_parsed("SPARSE_SET_PERCENT", &env_file_values),
            semi_sparse_set_percent: read_required_parsed(
                "SEMI_SPARSE_SET_PERCENT",
                &env_file_values,
            ),
            normal_set_percent: read_required_parsed("NORMAL_SET_PERCENT", &env_file_values),
            dense_set_percent: read_required_parsed("DENSE_SET_PERCENT", &env_file_values),
            low_overlap_percent: read_required_parsed("LOW_OVERLAP_PERCENT", &env_file_values),
            medium_overlap_percent: read_required_parsed(
                "MEDIUM_OVERLAP_PERCENT",
                &env_file_values,
            ),
            high_overlap_percent: read_required_parsed("HIGH_OVERLAP_PERCENT", &env_file_values),
            enable_sparse_scenario: read_required_bool("ENABLE_SPARSE_SCENARIO", &env_file_values),
            enable_semi_sparse_scenario: read_required_bool(
                "ENABLE_SEMI_SPARSE_SCENARIO",
                &env_file_values,
            ),
            enable_normal_scenario: read_required_bool("ENABLE_NORMAL_SCENARIO", &env_file_values),
            enable_dense_scenario: read_required_bool("ENABLE_DENSE_SCENARIO", &env_file_values),
            enable_low_overlap: read_required_bool("ENABLE_LOW_OVERLAP", &env_file_values),
            enable_medium_overlap: read_required_bool("ENABLE_MEDIUM_OVERLAP", &env_file_values),
            enable_high_overlap: read_required_bool("ENABLE_HIGH_OVERLAP", &env_file_values),
            benchmark_min_samples: read_required_parsed("BENCHMARK_MIN_SAMPLES", &env_file_values),
            benchmark_max_samples: read_required_parsed("BENCHMARK_MAX_SAMPLES", &env_file_values),
            benchmark_target_total_ms: read_required_parsed(
                "BENCHMARK_TARGET_TOTAL_MS",
                &env_file_values,
            ),
            enable_bitset: read_required_bool("ENABLE_BITSET", &env_file_values),
            enable_simd_bitset: read_required_bool("ENABLE_SIMD_BITSET", &env_file_values),
            enable_std_hash: read_required_bool("ENABLE_STD_HASH", &env_file_values),
            enable_custom_hash: read_required_bool("ENABLE_CUSTOM_HASH", &env_file_values),
            enable_sorted_merge: read_required_bool("ENABLE_SORTED_MERGE", &env_file_values),
            enable_prepare_phase: read_required_bool("ENABLE_PREPARE_PHASE", &env_file_values),
            enable_intersection_phase: read_required_bool(
                "ENABLE_INTERSECTION_PHASE",
                &env_file_values,
            ),
            output_format: read_required_parsed("OUTPUT_FORMAT", &env_file_values),
            time_prepare_include_input_generation: read_required_bool(
                "TIME_PREPARE_INCLUDE_INPUT_GENERATION",
                &env_file_values,
            ),
            time_intersection_include_output_clear: read_required_bool(
                "TIME_INTERSECTION_INCLUDE_OUTPUT_CLEAR",
                &env_file_values,
            ),
            time_intersection_include_result_count: read_required_bool(
                "TIME_INTERSECTION_INCLUDE_RESULT_COUNT",
                &env_file_values,
            ),
        };
        settings.validate();
        settings
    }
 }
 pub fn settings() -> &'static BenchmarkSettings {
    SETTINGS.get_or_init(BenchmarkSettings::load)
 }
 fn read_env_file(path: impl AsRef<Path>) -> HashMap<String, String> {
    let Ok(contents) = fs::read_to_string(path) else {
        return HashMap::new();
    };
    contents
        .lines()
        .map(str::trim)
        .filter(|line| !line.is_empty() && !line.starts_with('#'))
        .filter_map(|line| line.split_once('='))
        .map(|(key, value)| (key.trim().to_string(), value.trim().to_string()))
        .collect()
 }
 fn read_required_parsed<T>(key: &str, env_file_values: &HashMap<String, String>) -> T
 where
    T: std::str::FromStr,
    T::Err: std::fmt::Display,
 {
    if let Ok(value) = env::var(key) {
        return value
            .parse()
            .unwrap_or_else(|error| panic!("failed to parse {key} from environment: {error}"));
    }
    if let Some(value) = env_file_values.get(key) {
        return value
            .parse()
            .unwrap_or_else(|error| panic!("failed to parse {key} from .env: {error}"));
    }
    panic!("missing required setting {key}; define it in .env or the environment");
 }
 fn read_required_bool(key: &str, env_file_values: &HashMap<String, String>) -> bool {
    if let Ok(value) = env::var(key) {
        return parse_bool(key, &value, "environment");
    }
    if let Some(value) = env_file_values.get(key) {
        return parse_bool(key, value, ".env");
    }
    panic!("missing required setting {key}; define it in .env or the environment");
 }
 fn parse_bool(key: &str, value: &str, source: &str) -> bool {
    match value.trim().to_ascii_lowercase().as_str() {
        "true" | "1" | "yes" | "on" => true,
        "false" | "0" | "no" | "off" => false,
        _ => panic!("failed to parse {key} from {source}: expected true/false"),
    }
 }
--- a/src/tests.rs
+++ b/src/tests.rs
@@ -0,0 +1,250 @@
 use std::collections::BTreeSet;
 use crate::algorithms::IntersectionAlgorithm;
 use crate::algorithms::bitset::BitSetAlgorithm;
 use crate::algorithms::custom_hash::CustomHashAlgorithm;
 use crate::algorithms::simd_bitset::SimdBitSetAlgorithm;
 use crate::algorithms::sorted_merge::SortedMergeAlgorithm;
 use crate::algorithms::std_hash::StdHashAlgorithm;
 use crate::benchmark::{BenchmarkConfig, collect_results};
 use crate::data::{DatasetConfig, Density, Order, Overlap, Scenario};
 use crate::settings::settings;
 #[test]
 fn algorithms_handle_empty_sets() {
    assert_case_for_all_algorithms(&[], &[], &[]);
 }
 #[test]
 fn algorithms_handle_disjoint_sets() {
    assert_case_for_all_algorithms(&[1, 3, 5], &[2, 4, 6], &[]);
 }
 #[test]
 fn algorithms_handle_full_overlap() {
    assert_case_for_all_algorithms(&[1, 2, 3], &[1, 2, 3], &[1, 2, 3]);
 }
 #[test]
 fn algorithms_handle_single_shared_value() {
    assert_case_for_all_algorithms(&[10, 20, 30], &[5, 20, 25], &[20]);
 }
 #[test]
 fn algorithms_handle_boundary_values() {
    let max_value = settings().max_value;
    assert_case_for_all_algorithms(&[0, 1, max_value], &[0, max_value], &[0, max_value]);
 }
 #[test]
 fn generator_produces_exact_sizes_and_overlap() {
    let config = DatasetConfig::smoke();
    let scenario = Scenario {
        order: Order::Ordered,
        density: Density::Normal,
        overlap: Overlap::Medium,
    };
    let plan = config.plan(scenario);
    let pair = plan.generate_pair();
    assert_eq!(pair.left.len(), plan.set_len);
    assert_eq!(pair.right.len(), plan.set_len);
    assert_eq!(
        intersection_size(&pair.left, &pair.right),
        plan.actual_overlap
    );
    assert!(pair.left.windows(2).all(|window| window[0] <= window[1]));
    assert!(pair.right.windows(2).all(|window| window[0] <= window[1]));
 }
 #[test]
 fn generator_preserves_uniqueness_and_unordered_shape() {
    let config = DatasetConfig::smoke();
    let scenario = Scenario {
        order: Order::Unordered,
        density: Density::SemiSparse,
        overlap: Overlap::High,
    };
    let plan = config.plan(scenario);
    let pair = plan.generate_pair();
    assert_eq!(
        pair.left.iter().copied().collect::<BTreeSet<_>>().len(),
        pair.left.len()
    );
    assert_eq!(
        pair.right.iter().copied().collect::<BTreeSet<_>>().len(),
        pair.right.len()
    );
    assert!(!pair.left.windows(2).all(|window| window[0] <= window[1]));
    assert!(!pair.right.windows(2).all(|window| window[0] <= window[1]));
 }
 #[test]
 fn generator_adjusts_impossible_overlap_levels() {
    let config = DatasetConfig::smoke();
    let scenario = Scenario {
        order: Order::Ordered,
        density: Density::Dense,
        overlap: Overlap::Low,
    };
    let plan = config.plan(scenario);
    let set_len = config.dense_size;
    let requested_overlap = set_len / 10;
    let minimum_overlap = set_len
        .saturating_mul(2)
        .saturating_sub(config.universe_len);
    assert!(plan.overlap_was_adjusted());
    assert_eq!(plan.requested_overlap, requested_overlap);
    assert_eq!(plan.actual_overlap, requested_overlap.max(minimum_overlap));
 }
 #[test]
 fn all_algorithms_match_on_every_smoke_scenario() {
    let config = DatasetConfig::smoke();
    for scenario in Scenario::all() {
        let plan = config.plan(scenario);
        let pair = plan.generate_pair();
        let expected_values = normalized_intersection(&pair.left, &pair.right);
        let expected_count = expected_values.len();
        assert_algorithm_matches::<BitSetAlgorithm>(
            &pair.left,
            &pair.right,
            plan.universe_len,
            &expected_values,
            expected_count,
        );
        assert_algorithm_matches::<SimdBitSetAlgorithm>(
            &pair.left,
            &pair.right,
            plan.universe_len,
            &expected_values,
            expected_count,
        );
        assert_algorithm_matches::<StdHashAlgorithm>(
            &pair.left,
            &pair.right,
            plan.universe_len,
            &expected_values,
            expected_count,
        );
        assert_algorithm_matches::<CustomHashAlgorithm>(
            &pair.left,
            &pair.right,
            plan.universe_len,
            &expected_values,
            expected_count,
        );
        assert_algorithm_matches::<SortedMergeAlgorithm>(
            &pair.left,
            &pair.right,
            plan.universe_len,
            &expected_values,
            expected_count,
        );
    }
 }
 #[test]
 fn benchmark_runner_smoke_test_returns_every_result_group() {
    let results = collect_results(&BenchmarkConfig::smoke());
    let runtime = settings();
    let expected =
        Scenario::all().len() * runtime.enabled_algorithm_count() * runtime.enabled_phase_count();
    assert_eq!(results.len(), expected);
 }
 fn assert_case_for_all_algorithms(left: &[u32], right: &[u32], expected: &[u32]) {
    let mut expected_values = expected.to_vec();
    expected_values.sort_unstable();
    let universe_len = left
        .iter()
        .chain(right.iter())
        .copied()
        .max()
        .map(|value| value as usize + 1)
        .unwrap_or(1);
    assert_algorithm_matches::<BitSetAlgorithm>(
        left,
        right,
        universe_len,
        &expected_values,
        expected_values.len(),
    );
    assert_algorithm_matches::<SimdBitSetAlgorithm>(
        left,
        right,
        universe_len,
        &expected_values,
        expected_values.len(),
    );
    assert_algorithm_matches::<StdHashAlgorithm>(
        left,
        right,
        universe_len,
        &expected_values,
        expected_values.len(),
    );
    assert_algorithm_matches::<CustomHashAlgorithm>(
        left,
        right,
        universe_len,
        &expected_values,
        expected_values.len(),
    );
    assert_algorithm_matches::<SortedMergeAlgorithm>(
        left,
        right,
        universe_len,
        &expected_values,
        expected_values.len(),
    );
 }
 fn assert_algorithm_matches<A>(
    left: &[u32],
    right: &[u32],
    universe_len: usize,
    expected_values: &[u32],
    expected_count: usize,
 ) where
    A: IntersectionAlgorithm,
 {
    let prepared_left = A::prepare(left, universe_len, infer_order(left));
    let prepared_right = A::prepare(right, universe_len, infer_order(right));
    let mut output = A::create_output(&prepared_left, &prepared_right);
    A::clear_output(&mut output);
    A::intersect_into(&prepared_left, &prepared_right, &mut output);
    assert_eq!(A::output_len(&output), expected_count);
    let mut actual_values = A::output_values(&output);
    actual_values.sort_unstable();
    assert_eq!(actual_values, expected_values);
 }
 fn normalized_intersection(left: &[u32], right: &[u32]) -> Vec<u32> {
    let right_values = right.iter().copied().collect::<BTreeSet<_>>();
    let mut values = left
        .iter()
        .copied()
        .filter(|value| right_values.contains(value))
        .collect::<Vec<_>>();
    values.sort_unstable();
    values
 }
 fn intersection_size(left: &[u32], right: &[u32]) -> usize {
    normalized_intersection(left, right).len()
 }
 fn infer_order(values: &[u32]) -> Order {
    if values.windows(2).all(|window| window[0] <= window[1]) {
        Order::Ordered
    } else {
        Order::Unordered
    }
 }