From fe686214d38450316a519ea2676212e43276af89 Mon Sep 17 00:00:00 2001 From: AI Assistant Date: Mon, 30 Mar 2026 15:17:33 +0000 Subject: [PATCH] Initial benchmark implementation --- .codex | 0 .env | 46 +++ .gitignore | 1 + Cargo.lock | 7 + Cargo.toml | 6 + README.md | 175 +++++++++ src/algorithms/bitset.rs | 106 +++++ src/algorithms/custom_hash.rs | 119 ++++++ src/algorithms/mod.rs | 21 + src/algorithms/simd_bitset.rs | 230 +++++++++++ src/algorithms/sorted_merge.rs | 66 ++++ src/algorithms/std_hash.rs | 68 ++++ src/benchmark.rs | 699 +++++++++++++++++++++++++++++++++ src/data.rs | 402 +++++++++++++++++++ src/lib.rs | 7 + src/main.rs | 3 + src/settings.rs | 268 +++++++++++++ src/tests.rs | 250 ++++++++++++ 18 files changed, 2474 insertions(+) create mode 100644 .codex create mode 100644 .env create mode 100644 .gitignore create mode 100644 Cargo.lock create mode 100644 Cargo.toml create mode 100644 README.md create mode 100644 src/algorithms/bitset.rs create mode 100644 src/algorithms/custom_hash.rs create mode 100644 src/algorithms/mod.rs create mode 100644 src/algorithms/simd_bitset.rs create mode 100644 src/algorithms/sorted_merge.rs create mode 100644 src/algorithms/std_hash.rs create mode 100644 src/benchmark.rs create mode 100644 src/data.rs create mode 100644 src/lib.rs create mode 100644 src/main.rs create mode 100644 src/settings.rs create mode 100644 src/tests.rs diff --git a/.codex b/.codex new file mode 100644 index 0000000..e69de29 diff --git a/.env b/.env new file mode 100644 index 0000000..721e3a9 --- /dev/null +++ b/.env @@ -0,0 +1,46 @@ +# Benchmark universe and derived scenario sizes. +MAX_VALUE=100000000 +SPARSE_SET_PERCENT=0.004 +SEMI_SPARSE_SET_PERCENT=0.04 +NORMAL_SET_PERCENT=0.4 +DENSE_SET_PERCENT=4 + +# Requested overlap percentages for the smaller set. +LOW_OVERLAP_PERCENT=10 +MEDIUM_OVERLAP_PERCENT=50 +HIGH_OVERLAP_PERCENT=80 + +# Select which density scenarios are included. +ENABLE_SPARSE_SCENARIO=true +ENABLE_SEMI_SPARSE_SCENARIO=true +ENABLE_NORMAL_SCENARIO=true +ENABLE_DENSE_SCENARIO=true + +# Select which overlap scenarios are included. +ENABLE_LOW_OVERLAP=false +ENABLE_MEDIUM_OVERLAP=true +ENABLE_HIGH_OVERLAP=false + +# Benchmark execution controls. +BENCHMARK_MIN_SAMPLES=2 +BENCHMARK_MAX_SAMPLES=5 +BENCHMARK_TARGET_TOTAL_MS=800 + +# Select which algorithms are included in the benchmark run. +ENABLE_BITSET=true +ENABLE_SIMD_BITSET=false +ENABLE_STD_HASH=true +ENABLE_CUSTOM_HASH=true +ENABLE_SORTED_MERGE=true + +# Select which benchmark phases are emitted. +ENABLE_PREPARE_PHASE=true +ENABLE_INTERSECTION_PHASE=true + +# Select how benchmark output is rendered. +OUTPUT_FORMAT=markdown + +# Select which extra harness steps are counted inside each timed sample. +TIME_PREPARE_INCLUDE_INPUT_GENERATION=false +TIME_INTERSECTION_INCLUDE_OUTPUT_CLEAR=false +TIME_INTERSECTION_INCLUDE_RESULT_COUNT=false diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..ea8c4bf --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +/target diff --git a/Cargo.lock b/Cargo.lock new file mode 100644 index 0000000..13c1a31 --- /dev/null +++ b/Cargo.lock @@ -0,0 +1,7 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 4 + +[[package]] +name = "intersection_benchmark" +version = "0.1.0" diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..d71ef49 --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,6 @@ +[package] +name = "intersection_benchmark" +version = "0.1.0" +edition = "2024" + +[dependencies] diff --git a/README.md b/README.md new file mode 100644 index 0000000..ae8557f --- /dev/null +++ b/README.md @@ -0,0 +1,175 @@ +# Intersection Benchmark + +This project benchmarks several set-intersection strategies in Rust over the same generated input scenarios. + +The benchmark output is split into two timed phases: + +- `prepare` + - Measures the conversion from the benchmark's raw input format, a normal array of numbers, into the algorithm's prepared internal representation. + - This phase does not measure the later intersection itself. + - By default, raw input generation is not included in this time. + - If `TIME_PREPARE_INCLUDE_INPUT_GENERATION=true` in `.env`, raw input generation is included too. + +- `native` + - Measures only the intersection step on already prepared inputs. + - The result is written into the algorithm's native output representation. + - This phase does not measure preparation from the raw input arrays. + - This phase does not measure converting the native result into a plain array of numbers. + - By default, output clearing and result counting are not included in this time. + - If enabled in `.env`, `TIME_INTERSECTION_INCLUDE_OUTPUT_CLEAR` and `TIME_INTERSECTION_INCLUDE_RESULT_COUNT` move those extra steps into the timed window. + +Important notes about the timing model: + +- Output storage is created before timed `native` samples begin and then reused across samples. +- Warmup runs are performed before measured samples and are not included in the reported timings. +- Printing, formatting, statistics aggregation, and scenario planning are not part of the reported algorithm timings. + +In short: + +- `prepare` answers: how long does it take to build the algorithm's working representation? +- `native` answers: how long does it take to compute the intersection into the algorithm's own output format? + +# Intersection benchmark suite + +- Scenarios: 8 +- Universe: `0..=100000000` (`100000001` values) +- Set populations: sparse=`4000` (0.0040%) | semi-sparse=`40000` (0.0400%) | normal=`400000` (0.4000%) | dense=`4000000` (4.000%) +- Overlap targets: low=10.0% | medium=50.0% | high=80.0% +- Enabled densities: sparse=`true` | semi-sparse=`true` | normal=`true` | dense=`true` +- Enabled overlaps: low=`false` | medium=`true` | high=`false` +- Sampling: min=`2` | max=`5` | target total=`800ms` +- Enabled algorithms: bitset=`true` | bitset-simd=`false` | std-hash=`true` | splitmix-hash=`true` | sorted-merge=`true` +- Enabled phases: prepare=`true` | intersection=`true` +- Timed extras: prepare input generation=`false` | intersection output clear=`false` | intersection result count=`false` +- Phases per algorithm: prepare=`true` | native output intersection=`true` + +## Scenario: ordered input | sparse set population = 0.0040% of universe | medium overlap percentage = 50.0% of each set + +- Set population: `4000` / `100000001` values +- Overlap: requested=50.0% | actual=50.0% | shared=`2000/4000` + +| algorithm | phase | samples | mean | median | min | max | +| --- | --- | ---: | ---: | ---: | ---: | ---: | +| bitset | prepare | 5 | 411.335us | 401.487us | 393.202us | 444.185us | +| bitset | native | 5 | 950.881us | 940.646us | 840.562us | 1.065ms | +| std-hash | prepare | 5 | 80.875us | 80.946us | 79.932us | 81.558us | +| std-hash | native | 5 | 76.875us | 75.985us | 74.445us | 81.305us | +| splitmix-hash | prepare | 5 | 36.794us | 36.825us | 36.478us | 37.109us | +| splitmix-hash | native | 5 | 35.065us | 33.227us | 30.197us | 43.628us | +| sorted-merge | prepare | 5 | 350ns | 303ns | 295ns | 548ns | +| sorted-merge | native | 5 | 3.568us | 3.541us | 3.533us | 3.646us | + +## Scenario: ordered input | semi-sparse set population = 0.0400% of universe | medium overlap percentage = 50.0% of each set + +- Set population: `40000` / `100000001` values +- Overlap: requested=50.0% | actual=50.0% | shared=`20000/40000` + +| algorithm | phase | samples | mean | median | min | max | +| --- | --- | ---: | ---: | ---: | ---: | ---: | +| bitset | prepare | 5 | 3.955ms | 3.951ms | 3.927ms | 4.014ms | +| bitset | native | 5 | 1.428ms | 1.449ms | 1.356ms | 1.507ms | +| std-hash | prepare | 5 | 864.919us | 860.986us | 854.337us | 880.726us | +| std-hash | native | 5 | 905.199us | 902.901us | 898.793us | 913.503us | +| splitmix-hash | prepare | 5 | 413.275us | 410.172us | 408.810us | 423.680us | +| splitmix-hash | native | 5 | 469.869us | 467.963us | 465.287us | 477.299us | +| sorted-merge | prepare | 5 | 6.172us | 6.124us | 5.921us | 6.359us | +| sorted-merge | native | 5 | 36.202us | 36.045us | 36.023us | 36.815us | + +## Scenario: ordered input | normal set population = 0.4000% of universe | medium overlap percentage = 50.0% of each set + +- Set population: `400000` / `100000001` values +- Overlap: requested=50.0% | actual=50.0% | shared=`200000/400000` + +| algorithm | phase | samples | mean | median | min | max | +| --- | --- | ---: | ---: | ---: | ---: | ---: | +| bitset | prepare | 5 | 7.272ms | 7.200ms | 6.820ms | 7.893ms | +| bitset | native | 5 | 1.830ms | 1.821ms | 1.815ms | 1.868ms | +| std-hash | prepare | 5 | 11.484ms | 11.262ms | 10.767ms | 12.441ms | +| std-hash | native | 5 | 15.501ms | 15.262ms | 14.570ms | 17.524ms | +| splitmix-hash | prepare | 5 | 5.993ms | 6.124ms | 5.715ms | 6.193ms | +| splitmix-hash | native | 5 | 5.363ms | 5.381ms | 5.333ms | 5.383ms | +| sorted-merge | prepare | 5 | 309.850us | 265.482us | 248.158us | 507.857us | +| sorted-merge | native | 5 | 567.072us | 543.143us | 527.883us | 649.634us | + +## Scenario: ordered input | dense set population = 4.000% of universe | medium overlap percentage = 50.0% of each set + +- Set population: `4000000` / `100000001` values +- Overlap: requested=50.0% | actual=50.0% | shared=`2000000/4000000` + +| algorithm | phase | samples | mean | median | min | max | +| --- | --- | ---: | ---: | ---: | ---: | ---: | +| bitset | prepare | 5 | 14.176ms | 13.606ms | 12.950ms | 17.112ms | +| bitset | native | 5 | 1.765ms | 1.731ms | 1.674ms | 1.860ms | +| std-hash | prepare | 2 | 441.875ms | 441.875ms | 436.380ms | 447.370ms | +| std-hash | native | 2 | 445.366ms | 445.366ms | 442.813ms | 447.919ms | +| splitmix-hash | prepare | 4 | 243.982ms | 239.375ms | 237.612ms | 259.564ms | +| splitmix-hash | native | 5 | 51.967ms | 49.131ms | 48.321ms | 56.781ms | +| sorted-merge | prepare | 5 | 11.852ms | 11.638ms | 11.416ms | 12.418ms | +| sorted-merge | native | 5 | 5.537ms | 5.530ms | 5.517ms | 5.582ms | + +## Scenario: unordered input | sparse set population = 0.0040% of universe | medium overlap percentage = 50.0% of each set + +- Set population: `4000` / `100000001` values +- Overlap: requested=50.0% | actual=50.0% | shared=`2000/4000` + +| algorithm | phase | samples | mean | median | min | max | +| --- | --- | ---: | ---: | ---: | ---: | ---: | +| bitset | prepare | 5 | 2.264ms | 882.303us | 845.089us | 7.840ms | +| bitset | native | 5 | 1.776ms | 1.778ms | 1.695ms | 1.861ms | +| std-hash | prepare | 5 | 83.110us | 80.293us | 79.781us | 93.585us | +| std-hash | native | 5 | 77.430us | 77.762us | 74.848us | 79.988us | +| splitmix-hash | prepare | 5 | 36.017us | 35.957us | 35.943us | 36.129us | +| splitmix-hash | native | 5 | 33.200us | 31.326us | 28.101us | 42.169us | +| sorted-merge | prepare | 5 | 61.613us | 60.791us | 55.215us | 69.401us | +| sorted-merge | native | 5 | 3.617us | 3.533us | 3.528us | 3.943us | + +## Scenario: unordered input | semi-sparse set population = 0.0400% of universe | medium overlap percentage = 50.0% of each set + +- Set population: `40000` / `100000001` values +- Overlap: requested=50.0% | actual=50.0% | shared=`20000/40000` + +| algorithm | phase | samples | mean | median | min | max | +| --- | --- | ---: | ---: | ---: | ---: | ---: | +| bitset | prepare | 5 | 2.221ms | 1.596ms | 1.463ms | 3.909ms | +| bitset | native | 5 | 1.770ms | 1.761ms | 1.715ms | 1.829ms | +| std-hash | prepare | 5 | 882.778us | 869.598us | 865.722us | 910.316us | +| std-hash | native | 5 | 917.268us | 915.333us | 900.514us | 935.037us | +| splitmix-hash | prepare | 5 | 417.845us | 420.083us | 411.847us | 422.302us | +| splitmix-hash | native | 5 | 475.443us | 473.060us | 466.552us | 486.611us | +| sorted-merge | prepare | 5 | 866.901us | 867.193us | 857.034us | 877.401us | +| sorted-merge | native | 5 | 49.398us | 48.383us | 48.283us | 53.209us | + +## Scenario: unordered input | normal set population = 0.4000% of universe | medium overlap percentage = 50.0% of each set + +- Set population: `400000` / `100000001` values +- Overlap: requested=50.0% | actual=50.0% | shared=`200000/400000` + +| algorithm | phase | samples | mean | median | min | max | +| --- | --- | ---: | ---: | ---: | ---: | ---: | +| bitset | prepare | 5 | 4.712ms | 4.803ms | 4.476ms | 4.920ms | +| bitset | native | 5 | 1.759ms | 1.756ms | 1.687ms | 1.844ms | +| std-hash | prepare | 5 | 10.839ms | 10.826ms | 10.524ms | 11.178ms | +| std-hash | native | 5 | 15.163ms | 14.614ms | 14.440ms | 17.563ms | +| splitmix-hash | prepare | 5 | 5.632ms | 5.632ms | 5.585ms | 5.679ms | +| splitmix-hash | native | 5 | 5.483ms | 5.432ms | 5.233ms | 5.969ms | +| sorted-merge | prepare | 5 | 10.528ms | 10.457ms | 10.445ms | 10.814ms | +| sorted-merge | native | 5 | 544.184us | 534.490us | 530.891us | 581.634us | + +## Scenario: unordered input | dense set population = 4.000% of universe | medium overlap percentage = 50.0% of each set + +- Set population: `4000000` / `100000001` values +- Overlap: requested=50.0% | actual=50.0% | shared=`2000000/4000000` + +| algorithm | phase | samples | mean | median | min | max | +| --- | --- | ---: | ---: | ---: | ---: | ---: | +| bitset | prepare | 5 | 58.153ms | 57.261ms | 53.448ms | 63.278ms | +| bitset | native | 5 | 1.805ms | 1.782ms | 1.652ms | 1.985ms | +| std-hash | prepare | 2 | 461.857ms | 461.857ms | 444.440ms | 479.274ms | +| std-hash | native | 2 | 438.653ms | 438.653ms | 435.414ms | 441.891ms | +| splitmix-hash | prepare | 4 | 252.147ms | 250.242ms | 243.913ms | 264.189ms | +| splitmix-hash | native | 5 | 50.829ms | 49.904ms | 49.156ms | 55.716ms | +| sorted-merge | prepare | 5 | 130.853ms | 130.469ms | 129.970ms | 132.748ms | +| sorted-merge | native | 5 | 6.016ms | 5.942ms | 5.877ms | 6.351ms | + + + diff --git a/src/algorithms/bitset.rs b/src/algorithms/bitset.rs new file mode 100644 index 0000000..ab527a6 --- /dev/null +++ b/src/algorithms/bitset.rs @@ -0,0 +1,106 @@ +use crate::algorithms::IntersectionAlgorithm; +use crate::data::Order; + +pub struct BitSetAlgorithm; + +#[derive(Clone, Debug)] +pub struct BitSetSet { + words: Vec, + universe_len: usize, +} + +#[derive(Clone, Debug)] +pub struct BitSetIntersectionOutput { + words: Vec, + universe_len: usize, +} + +impl IntersectionAlgorithm for BitSetAlgorithm { + type Prepared = BitSetSet; + type Output = BitSetIntersectionOutput; + + const NAME: &'static str = "bitset"; + + fn prepare(input: &[u32], universe_len: usize, _order: Order) -> Self::Prepared { + let word_count = universe_len.div_ceil(u64::BITS as usize); + let mut words = vec![0_u64; word_count]; + + for &value in input { + let index = value as usize; + assert!( + index < universe_len, + "value {value} is outside the universe" + ); + + let word_index = index / u64::BITS as usize; + let bit_index = index % u64::BITS as usize; + words[word_index] |= 1_u64 << bit_index; + } + + BitSetSet { + words, + universe_len, + } + } + + fn create_output(left: &Self::Prepared, right: &Self::Prepared) -> Self::Output { + assert_eq!(left.universe_len, right.universe_len); + + BitSetIntersectionOutput { + words: vec![0_u64; left.words.len()], + universe_len: left.universe_len, + } + } + + fn clear_output(_output: &mut Self::Output) {} + + fn intersect_into(left: &Self::Prepared, right: &Self::Prepared, output: &mut Self::Output) { + assert_eq!(left.universe_len, right.universe_len); + assert_eq!(left.universe_len, output.universe_len); + + for ((left_word, right_word), output_word) in left + .words + .iter() + .zip(&right.words) + .zip(output.words.iter_mut()) + { + *output_word = left_word & right_word; + } + } + + fn output_len(output: &Self::Output) -> usize { + output + .words + .iter() + .map(|word| word.count_ones() as usize) + .sum() + } + + fn output_values(output: &Self::Output) -> Vec { + let mut values = Vec::with_capacity(Self::output_len(output)); + + for (word_index, &shared_word) in output.words.iter().enumerate() { + push_shared_word(&mut values, shared_word, word_index, output.universe_len); + } + + values + } +} + +fn push_shared_word( + output: &mut Vec, + mut shared: u64, + word_index: usize, + universe_len: usize, +) { + while shared != 0 { + let bit_index = shared.trailing_zeros() as usize; + let value = word_index * u64::BITS as usize + bit_index; + + if value < universe_len { + output.push(value as u32); + } + + shared &= shared - 1; + } +} diff --git a/src/algorithms/custom_hash.rs b/src/algorithms/custom_hash.rs new file mode 100644 index 0000000..2ef04b5 --- /dev/null +++ b/src/algorithms/custom_hash.rs @@ -0,0 +1,119 @@ +use std::collections::HashSet; +use std::hash::{BuildHasherDefault, Hasher}; + +use crate::algorithms::IntersectionAlgorithm; +use crate::data::Order; + +pub struct CustomHashAlgorithm; + +type SplitMixBuildHasher = BuildHasherDefault; + +#[derive(Clone, Debug)] +pub struct CustomHashSet { + values: HashSet, +} + +#[derive(Clone, Debug)] +pub struct CustomHashIntersectionOutput { + values: HashSet, +} + +impl IntersectionAlgorithm for CustomHashAlgorithm { + type Prepared = CustomHashSet; + type Output = CustomHashIntersectionOutput; + + const NAME: &'static str = "splitmix-hash"; + + fn prepare(input: &[u32], _universe_len: usize, _order: Order) -> Self::Prepared { + let mut values = + HashSet::with_capacity_and_hasher(input.len(), SplitMixBuildHasher::default()); + values.extend(input.iter().copied()); + CustomHashSet { values } + } + + fn create_output(left: &Self::Prepared, right: &Self::Prepared) -> Self::Output { + CustomHashIntersectionOutput { + values: HashSet::with_capacity_and_hasher( + left.values.len().min(right.values.len()), + SplitMixBuildHasher::default(), + ), + } + } + + fn clear_output(output: &mut Self::Output) { + output.values.clear(); + } + + fn intersect_into(left: &Self::Prepared, right: &Self::Prepared, output: &mut Self::Output) { + let (smaller, larger) = ordered_sets(&left.values, &right.values); + + for &value in smaller { + if larger.contains(&value) { + output.values.insert(value); + } + } + } + + fn output_len(output: &Self::Output) -> usize { + output.values.len() + } + + fn output_values(output: &Self::Output) -> Vec { + output.values.iter().copied().collect() + } +} + +fn ordered_sets<'a>( + left: &'a HashSet, + right: &'a HashSet, +) -> ( + &'a HashSet, + &'a HashSet, +) { + if left.len() <= right.len() { + (left, right) + } else { + (right, left) + } +} + +#[derive(Clone, Debug, Default)] +pub struct SplitMix64Hasher { + state: u64, +} + +impl Hasher for SplitMix64Hasher { + fn finish(&self) -> u64 { + self.state + } + + fn write(&mut self, bytes: &[u8]) { + let mut state = (bytes.len() as u64).wrapping_mul(0x9E37_79B9_7F4A_7C15); + + for &byte in bytes { + state ^= byte as u64; + state = state.rotate_left(7).wrapping_mul(0xBF58_476D_1CE4_E5B9); + } + + self.state = splitmix64(state); + } + + fn write_u32(&mut self, value: u32) { + self.state = splitmix64(value as u64); + } + + fn write_u64(&mut self, value: u64) { + self.state = splitmix64(value); + } + + fn write_usize(&mut self, value: usize) { + self.state = splitmix64(value as u64); + } +} + +fn splitmix64(mut value: u64) -> u64 { + value = value.wrapping_add(0x9E37_79B9_7F4A_7C15); + value = (value ^ (value >> 30)).wrapping_mul(0xBF58_476D_1CE4_E5B9); + value = (value ^ (value >> 27)).wrapping_mul(0x94D0_49BB_1331_11EB); + value ^ (value >> 31) +} diff --git a/src/algorithms/mod.rs b/src/algorithms/mod.rs new file mode 100644 index 0000000..bbee2b8 --- /dev/null +++ b/src/algorithms/mod.rs @@ -0,0 +1,21 @@ +pub mod bitset; +pub mod custom_hash; +pub mod simd_bitset; +pub mod sorted_merge; +pub mod std_hash; + +use crate::data::Order; + +pub trait IntersectionAlgorithm { + type Prepared; + type Output; + + const NAME: &'static str; + + fn prepare(input: &[u32], universe_len: usize, order: Order) -> Self::Prepared; + fn create_output(left: &Self::Prepared, right: &Self::Prepared) -> Self::Output; + fn clear_output(output: &mut Self::Output); + fn intersect_into(left: &Self::Prepared, right: &Self::Prepared, output: &mut Self::Output); + fn output_len(output: &Self::Output) -> usize; + fn output_values(output: &Self::Output) -> Vec; +} diff --git a/src/algorithms/simd_bitset.rs b/src/algorithms/simd_bitset.rs new file mode 100644 index 0000000..1091af6 --- /dev/null +++ b/src/algorithms/simd_bitset.rs @@ -0,0 +1,230 @@ +use crate::algorithms::IntersectionAlgorithm; +use crate::data::Order; + +#[cfg(target_arch = "aarch64")] +use std::arch::aarch64::{uint64x2_t, vandq_u64, vld1q_u64, vst1q_u64}; +#[cfg(target_arch = "x86")] +use std::arch::x86::{ + __m128i, __m256i, _mm_and_si128, _mm_loadu_si128, _mm_storeu_si128, _mm256_and_si256, + _mm256_loadu_si256, _mm256_storeu_si256, +}; +#[cfg(target_arch = "x86_64")] +use std::arch::x86_64::{ + __m128i, __m256i, _mm_and_si128, _mm_loadu_si128, _mm_storeu_si128, _mm256_and_si256, + _mm256_loadu_si256, _mm256_storeu_si256, +}; + +pub struct SimdBitSetAlgorithm; + +#[derive(Clone, Debug)] +pub struct SimdBitSetSet { + words: Vec, + universe_len: usize, +} + +#[derive(Clone, Debug)] +pub struct SimdBitSetIntersectionOutput { + words: Vec, + universe_len: usize, +} + +impl IntersectionAlgorithm for SimdBitSetAlgorithm { + type Prepared = SimdBitSetSet; + type Output = SimdBitSetIntersectionOutput; + + const NAME: &'static str = "bitset-simd"; + + fn prepare(input: &[u32], universe_len: usize, _order: Order) -> Self::Prepared { + let word_count = universe_len.div_ceil(u64::BITS as usize); + let mut words = vec![0_u64; word_count]; + + for &value in input { + let index = value as usize; + assert!( + index < universe_len, + "value {value} is outside the universe" + ); + + let word_index = index / u64::BITS as usize; + let bit_index = index % u64::BITS as usize; + words[word_index] |= 1_u64 << bit_index; + } + + SimdBitSetSet { + words, + universe_len, + } + } + + fn create_output(left: &Self::Prepared, right: &Self::Prepared) -> Self::Output { + assert_eq!(left.universe_len, right.universe_len); + + SimdBitSetIntersectionOutput { + words: vec![0_u64; left.words.len()], + universe_len: left.universe_len, + } + } + + fn clear_output(_output: &mut Self::Output) {} + + fn intersect_into(left: &Self::Prepared, right: &Self::Prepared, output: &mut Self::Output) { + assert_eq!(left.universe_len, right.universe_len); + assert_eq!(left.universe_len, output.universe_len); + intersect_words(&left.words, &right.words, &mut output.words); + } + + fn output_len(output: &Self::Output) -> usize { + output + .words + .iter() + .map(|word| word.count_ones() as usize) + .sum() + } + + fn output_values(output: &Self::Output) -> Vec { + let mut values = Vec::with_capacity(Self::output_len(output)); + + for (word_index, &shared_word) in output.words.iter().enumerate() { + push_shared_word(&mut values, shared_word, word_index, output.universe_len); + } + + values + } +} + +#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +fn intersect_words(left: &[u64], right: &[u64], output: &mut [u64]) { + if std::is_x86_feature_detected!("avx2") { + unsafe { + intersect_avx2(left, right, output); + } + return; + } + + if std::is_x86_feature_detected!("sse2") { + unsafe { + intersect_sse2(left, right, output); + } + return; + } + + intersect_scalar(left, right, output); +} + +#[cfg(target_arch = "aarch64")] +fn intersect_words(left: &[u64], right: &[u64], output: &mut [u64]) { + unsafe { + intersect_neon(left, right, output); + } +} + +#[cfg(not(any(target_arch = "x86", target_arch = "x86_64", target_arch = "aarch64")))] +fn intersect_words(left: &[u64], right: &[u64], output: &mut [u64]) { + intersect_scalar(left, right, output); +} + +#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +#[target_feature(enable = "avx2")] +unsafe fn intersect_avx2(left: &[u64], right: &[u64], output: &mut [u64]) { + let chunk_len = 4; + let simd_end = left.len() / chunk_len * chunk_len; + let mut word_index = 0; + + while word_index < simd_end { + let left_vector = + unsafe { _mm256_loadu_si256(left.as_ptr().add(word_index) as *const __m256i) }; + let right_vector = + unsafe { _mm256_loadu_si256(right.as_ptr().add(word_index) as *const __m256i) }; + let shared_vector = _mm256_and_si256(left_vector, right_vector); + unsafe { + _mm256_storeu_si256( + output.as_mut_ptr().add(word_index) as *mut __m256i, + shared_vector, + ) + }; + + word_index += chunk_len; + } + + intersect_scalar( + &left[simd_end..], + &right[simd_end..], + &mut output[simd_end..], + ); +} + +#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +#[target_feature(enable = "sse2")] +unsafe fn intersect_sse2(left: &[u64], right: &[u64], output: &mut [u64]) { + let chunk_len = 2; + let simd_end = left.len() / chunk_len * chunk_len; + let mut word_index = 0; + + while word_index < simd_end { + let left_vector = + unsafe { _mm_loadu_si128(left.as_ptr().add(word_index) as *const __m128i) }; + let right_vector = + unsafe { _mm_loadu_si128(right.as_ptr().add(word_index) as *const __m128i) }; + let shared_vector = _mm_and_si128(left_vector, right_vector); + unsafe { + _mm_storeu_si128( + output.as_mut_ptr().add(word_index) as *mut __m128i, + shared_vector, + ) + }; + + word_index += chunk_len; + } + + intersect_scalar( + &left[simd_end..], + &right[simd_end..], + &mut output[simd_end..], + ); +} + +#[cfg(target_arch = "aarch64")] +unsafe fn intersect_neon(left: &[u64], right: &[u64], output: &mut [u64]) { + let chunk_len = 2; + let simd_end = left.len() / chunk_len * chunk_len; + let mut word_index = 0; + + while word_index < simd_end { + let left_vector: uint64x2_t = unsafe { vld1q_u64(left.as_ptr().add(word_index)) }; + let right_vector: uint64x2_t = unsafe { vld1q_u64(right.as_ptr().add(word_index)) }; + let shared_vector = vandq_u64(left_vector, right_vector); + unsafe { vst1q_u64(output.as_mut_ptr().add(word_index), shared_vector) }; + + word_index += chunk_len; + } + + intersect_scalar( + &left[simd_end..], + &right[simd_end..], + &mut output[simd_end..], + ); +} + +fn intersect_scalar(left: &[u64], right: &[u64], output: &mut [u64]) { + for ((left_word, right_word), output_word) in left.iter().zip(right).zip(output.iter_mut()) { + *output_word = left_word & right_word; + } +} + +fn push_shared_word( + output: &mut Vec, + mut shared: u64, + word_index: usize, + universe_len: usize, +) { + while shared != 0 { + let bit_index = shared.trailing_zeros() as usize; + let value = word_index * u64::BITS as usize + bit_index; + + if value < universe_len { + output.push(value as u32); + } + + shared &= shared - 1; + } +} diff --git a/src/algorithms/sorted_merge.rs b/src/algorithms/sorted_merge.rs new file mode 100644 index 0000000..1d8344b --- /dev/null +++ b/src/algorithms/sorted_merge.rs @@ -0,0 +1,66 @@ +use crate::algorithms::IntersectionAlgorithm; +use crate::data::Order; + +pub struct SortedMergeAlgorithm; + +#[derive(Clone, Debug)] +pub struct SortedVecSet { + values: Vec, +} + +impl IntersectionAlgorithm for SortedMergeAlgorithm { + type Prepared = SortedVecSet; + type Output = Vec; + + const NAME: &'static str = "sorted-merge"; + + fn prepare(input: &[u32], _universe_len: usize, order: Order) -> Self::Prepared { + let values = match order { + Order::Ordered => input.to_vec(), + Order::Unordered => { + let mut values = input.to_vec(); + values.sort_unstable(); + values + } + }; + + SortedVecSet { values } + } + + fn create_output(left: &Self::Prepared, right: &Self::Prepared) -> Self::Output { + Vec::with_capacity(left.values.len().min(right.values.len())) + } + + fn clear_output(output: &mut Self::Output) { + output.clear(); + } + + fn intersect_into(left: &Self::Prepared, right: &Self::Prepared, output: &mut Self::Output) { + intersect_impl(&left.values, &right.values, output); + } + + fn output_len(output: &Self::Output) -> usize { + output.len() + } + + fn output_values(output: &Self::Output) -> Vec { + output.clone() + } +} + +fn intersect_impl(left: &[u32], right: &[u32], output: &mut Vec) { + let mut left_index = 0; + let mut right_index = 0; + + while left_index < left.len() && right_index < right.len() { + match left[left_index].cmp(&right[right_index]) { + std::cmp::Ordering::Less => left_index += 1, + std::cmp::Ordering::Greater => right_index += 1, + std::cmp::Ordering::Equal => { + output.push(left[left_index]); + left_index += 1; + right_index += 1; + } + } + } +} diff --git a/src/algorithms/std_hash.rs b/src/algorithms/std_hash.rs new file mode 100644 index 0000000..e00eba8 --- /dev/null +++ b/src/algorithms/std_hash.rs @@ -0,0 +1,68 @@ +use std::collections::HashSet; + +use crate::algorithms::IntersectionAlgorithm; +use crate::data::Order; + +pub struct StdHashAlgorithm; + +#[derive(Clone, Debug)] +pub struct StdHashSet { + values: HashSet, +} + +#[derive(Clone, Debug)] +pub struct StdHashIntersectionOutput { + values: HashSet, +} + +impl IntersectionAlgorithm for StdHashAlgorithm { + type Prepared = StdHashSet; + type Output = StdHashIntersectionOutput; + + const NAME: &'static str = "std-hash"; + + fn prepare(input: &[u32], _universe_len: usize, _order: Order) -> Self::Prepared { + let mut values = HashSet::with_capacity(input.len()); + values.extend(input.iter().copied()); + StdHashSet { values } + } + + fn create_output(left: &Self::Prepared, right: &Self::Prepared) -> Self::Output { + StdHashIntersectionOutput { + values: HashSet::with_capacity(left.values.len().min(right.values.len())), + } + } + + fn clear_output(output: &mut Self::Output) { + output.values.clear(); + } + + fn intersect_into(left: &Self::Prepared, right: &Self::Prepared, output: &mut Self::Output) { + let (smaller, larger) = ordered_sets(&left.values, &right.values); + + for &value in smaller { + if larger.contains(&value) { + output.values.insert(value); + } + } + } + + fn output_len(output: &Self::Output) -> usize { + output.values.len() + } + + fn output_values(output: &Self::Output) -> Vec { + output.values.iter().copied().collect() + } +} + +fn ordered_sets<'a>( + left: &'a HashSet, + right: &'a HashSet, +) -> (&'a HashSet, &'a HashSet) { + if left.len() <= right.len() { + (left, right) + } else { + (right, left) + } +} diff --git a/src/benchmark.rs b/src/benchmark.rs new file mode 100644 index 0000000..c8eca6c --- /dev/null +++ b/src/benchmark.rs @@ -0,0 +1,699 @@ +use std::hint::black_box; +use std::time::{Duration, Instant}; + +use crate::algorithms::IntersectionAlgorithm; +use crate::algorithms::bitset::BitSetAlgorithm; +use crate::algorithms::custom_hash::CustomHashAlgorithm; +use crate::algorithms::simd_bitset::SimdBitSetAlgorithm; +use crate::algorithms::sorted_merge::SortedMergeAlgorithm; +use crate::algorithms::std_hash::StdHashAlgorithm; +use crate::data::{DatasetConfig, DatasetPlan, Density, Order, Overlap, Scenario}; +use crate::settings::{OutputFormat, settings}; + +#[derive(Clone, Debug)] +pub struct MeasurementOptions { + pub warmup_runs: usize, + pub min_samples: usize, + pub max_samples: usize, + pub target_total: Duration, + pub include_prepare_input_generation: bool, + pub include_intersection_output_clear: bool, + pub include_intersection_result_count: bool, +} + +impl Default for MeasurementOptions { + fn default() -> Self { + let runtime = settings(); + + Self { + warmup_runs: 1, + min_samples: runtime.benchmark_min_samples, + max_samples: runtime.benchmark_max_samples, + target_total: Duration::from_millis(runtime.benchmark_target_total_ms), + include_prepare_input_generation: runtime.time_prepare_include_input_generation, + include_intersection_output_clear: runtime.time_intersection_include_output_clear, + include_intersection_result_count: runtime.time_intersection_include_result_count, + } + } +} + +impl MeasurementOptions { + pub fn smoke() -> Self { + Self { + warmup_runs: 0, + min_samples: 1, + max_samples: 1, + target_total: Duration::ZERO, + include_prepare_input_generation: false, + include_intersection_output_clear: false, + include_intersection_result_count: false, + } + } +} + +#[derive(Clone, Debug)] +pub struct BenchmarkConfig { + pub dataset: DatasetConfig, + pub measurement: MeasurementOptions, +} + +impl Default for BenchmarkConfig { + fn default() -> Self { + Self { + dataset: DatasetConfig::default(), + measurement: MeasurementOptions::default(), + } + } +} + +impl BenchmarkConfig { + pub fn smoke() -> Self { + Self { + dataset: DatasetConfig::smoke(), + measurement: MeasurementOptions::smoke(), + } + } +} + +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +pub enum BenchmarkPhase { + Prepare, + IntersectNativeOutput, +} + +impl BenchmarkPhase { + fn label(self) -> &'static str { + match self { + Self::Prepare => "prepare", + Self::IntersectNativeOutput => "native", + } + } +} + +#[derive(Clone, Debug)] +pub struct MeasurementStats { + pub samples: usize, + pub mean: Duration, + pub median: Duration, + pub min: Duration, + pub max: Duration, +} + +impl MeasurementStats { + fn from_samples(samples: Vec) -> Self { + assert!(!samples.is_empty(), "at least one sample is required"); + + let mut sorted = samples.clone(); + sorted.sort_unstable(); + + let total_nanos: u128 = samples.iter().map(|sample| sample.as_nanos()).sum(); + let mean = duration_from_nanos(total_nanos / samples.len() as u128); + let median = if sorted.len() % 2 == 1 { + sorted[sorted.len() / 2] + } else { + let middle = sorted.len() / 2; + duration_from_nanos((sorted[middle - 1].as_nanos() + sorted[middle].as_nanos()) / 2) + }; + + Self { + samples: samples.len(), + mean, + median, + min: sorted[0], + max: *sorted.last().expect("sorted is non-empty"), + } + } +} + +#[derive(Clone, Debug)] +pub struct BenchmarkRecord { + pub scenario: Scenario, + pub universe_len: usize, + pub set_len: usize, + pub set_population_percent: f64, + pub requested_overlap: usize, + pub actual_overlap: usize, + pub target_overlap_percent: f64, + pub actual_overlap_percent: f64, + pub algorithm: &'static str, + pub phase: BenchmarkPhase, + pub stats: MeasurementStats, +} + +pub fn run() { + run_with_config(BenchmarkConfig::default()); +} + +pub fn run_with_config(config: BenchmarkConfig) { + let results = collect_results(&config); + if results.is_empty() { + print_no_results(settings().output_format); + return; + } + match settings().output_format { + OutputFormat::Normal => print_normal_report(&config, &results), + OutputFormat::Markdown => print_markdown_report(&config, &results), + } +} + +pub fn collect_results(config: &BenchmarkConfig) -> Vec { + let runtime = settings(); + let mut results = Vec::with_capacity( + Scenario::all().len() * runtime.enabled_algorithm_count() * runtime.enabled_phase_count(), + ); + + for scenario in Scenario::all() { + let plan = config.dataset.plan(scenario); + + if runtime.enable_bitset { + benchmark_algorithm::(&plan, &config.measurement, &mut results); + } + if runtime.enable_simd_bitset { + benchmark_algorithm::(&plan, &config.measurement, &mut results); + } + if runtime.enable_std_hash { + benchmark_algorithm::(&plan, &config.measurement, &mut results); + } + if runtime.enable_custom_hash { + benchmark_algorithm::(&plan, &config.measurement, &mut results); + } + if runtime.enable_sorted_merge { + benchmark_algorithm::(&plan, &config.measurement, &mut results); + } + } + + results +} + +fn benchmark_algorithm( + plan: &DatasetPlan, + measurement: &MeasurementOptions, + output: &mut Vec, +) where + A: IntersectionAlgorithm, +{ + if settings().enable_prepare_phase { + let prepare_stats = measure_prepare::(plan, measurement); + output.push(build_record::( + plan, + BenchmarkPhase::Prepare, + prepare_stats, + )); + } + + let (left, right) = prepare_pair::(plan); + + if settings().enable_intersection_phase { + let materialized_stats = measure_native_output::(&left, &right, measurement); + output.push(build_record::( + plan, + BenchmarkPhase::IntersectNativeOutput, + materialized_stats, + )); + } +} + +fn build_record( + plan: &DatasetPlan, + phase: BenchmarkPhase, + stats: MeasurementStats, +) -> BenchmarkRecord +where + A: IntersectionAlgorithm, +{ + BenchmarkRecord { + scenario: plan.scenario, + universe_len: plan.universe_len, + set_len: plan.set_len, + set_population_percent: population_percent(plan.set_len, plan.universe_len), + requested_overlap: plan.requested_overlap, + actual_overlap: plan.actual_overlap, + target_overlap_percent: plan.target_overlap_percent, + actual_overlap_percent: plan.actual_overlap_percent(), + algorithm: A::NAME, + phase, + stats, + } +} + +fn measure_prepare(plan: &DatasetPlan, measurement: &MeasurementOptions) -> MeasurementStats +where + A: IntersectionAlgorithm, +{ + for _ in 0..measurement.warmup_runs { + let (left, right) = prepare_pair::(plan); + black_box(&left); + black_box(&right); + drop(left); + drop(right); + } + + let mut samples = Vec::new(); + let mut total = Duration::ZERO; + + while samples.len() < measurement.min_samples + || (samples.len() < measurement.max_samples && total < measurement.target_total) + { + let elapsed = if measurement.include_prepare_input_generation { + let start = Instant::now(); + let (left, right) = prepare_pair::(plan); + let elapsed = start.elapsed(); + black_box(&left); + black_box(&right); + drop(left); + drop(right); + elapsed + } else { + let left_raw = plan.generate_left(); + let left_start = Instant::now(); + let left = A::prepare(&left_raw, plan.universe_len, plan.scenario.order); + let left_elapsed = left_start.elapsed(); + black_box(&left); + drop(left_raw); + + let right_raw = plan.generate_right(); + let right_start = Instant::now(); + let right = A::prepare(&right_raw, plan.universe_len, plan.scenario.order); + let right_elapsed = right_start.elapsed(); + black_box(&right); + drop(right_raw); + + let elapsed = left_elapsed + right_elapsed; + drop(left); + drop(right); + elapsed + }; + + total += elapsed; + samples.push(elapsed); + } + + MeasurementStats::from_samples(samples) +} + +fn prepare_pair(plan: &DatasetPlan) -> (A::Prepared, A::Prepared) +where + A: IntersectionAlgorithm, +{ + let left_raw = plan.generate_left(); + let left = A::prepare(&left_raw, plan.universe_len, plan.scenario.order); + drop(left_raw); + + let right_raw = plan.generate_right(); + let right = A::prepare(&right_raw, plan.universe_len, plan.scenario.order); + drop(right_raw); + + (left, right) +} + +fn measure_native_output( + left: &A::Prepared, + right: &A::Prepared, + measurement: &MeasurementOptions, +) -> MeasurementStats +where + A: IntersectionAlgorithm, +{ + let mut output = A::create_output(left, right); + + for _ in 0..measurement.warmup_runs { + A::clear_output(&mut output); + A::intersect_into(left, right, &mut output); + let count = A::output_len(&output); + black_box(&output); + black_box(count); + } + + let mut samples = Vec::new(); + let mut total = Duration::ZERO; + + while samples.len() < measurement.min_samples + || (samples.len() < measurement.max_samples && total < measurement.target_total) + { + if !measurement.include_intersection_output_clear { + A::clear_output(&mut output); + } + let start = Instant::now(); + if measurement.include_intersection_output_clear { + A::clear_output(&mut output); + } + A::intersect_into(left, right, &mut output); + let elapsed = if measurement.include_intersection_result_count { + let count = A::output_len(&output); + black_box(count); + start.elapsed() + } else { + start.elapsed() + }; + + black_box(&output); + if !measurement.include_intersection_result_count { + let count = A::output_len(&output); + black_box(count); + } + total += elapsed; + samples.push(elapsed); + } + + MeasurementStats::from_samples(samples) +} + +fn print_no_results(output_format: OutputFormat) { + match output_format { + OutputFormat::Normal => println!( + "No benchmark records were generated. Enable at least one algorithm and one phase in .env." + ), + OutputFormat::Markdown => println!( + "No benchmark records were generated. Enable at least one algorithm and one phase in `.env`." + ), + } +} + +fn print_normal_report(config: &BenchmarkConfig, results: &[BenchmarkRecord]) { + let runtime = settings(); + + println!("Intersection benchmark suite"); + println!("Scenarios: {}", Scenario::all().len()); + println!( + "Universe: 0..={} ({} values)", + runtime.max_value, + runtime.universe_len() + ); + println!( + "Set populations: sparse={} ({}) semi-sparse={} ({}) normal={} ({}) dense={} ({})", + config.dataset.sparse_size, + format_percent(population_percent( + config.dataset.sparse_size, + config.dataset.universe_len, + )), + config.dataset.semi_sparse_size, + format_percent(population_percent( + config.dataset.semi_sparse_size, + config.dataset.universe_len, + )), + config.dataset.normal_size, + format_percent(population_percent( + config.dataset.normal_size, + config.dataset.universe_len, + )), + config.dataset.dense_size, + format_percent(population_percent( + config.dataset.dense_size, + config.dataset.universe_len, + )) + ); + println!( + "Overlap targets: low={} medium={} high={}", + format_percent(runtime.low_overlap_percent as f64), + format_percent(runtime.medium_overlap_percent as f64), + format_percent(runtime.high_overlap_percent as f64) + ); + println!( + "Enabled densities: sparse={} semi-sparse={} normal={} dense={}", + runtime.enable_sparse_scenario, + runtime.enable_semi_sparse_scenario, + runtime.enable_normal_scenario, + runtime.enable_dense_scenario + ); + println!( + "Enabled overlaps: low={} medium={} high={}", + runtime.enable_low_overlap, runtime.enable_medium_overlap, runtime.enable_high_overlap + ); + println!( + "Sampling: min={} max={} target_total={}ms", + config.measurement.min_samples, + config.measurement.max_samples, + config.measurement.target_total.as_millis() + ); + println!( + "Enabled algorithms: bitset={} bitset-simd={} std-hash={} splitmix-hash={} sorted-merge={}", + runtime.enable_bitset, + runtime.enable_simd_bitset, + runtime.enable_std_hash, + runtime.enable_custom_hash, + runtime.enable_sorted_merge + ); + println!( + "Enabled phases: prepare={} intersection={}", + runtime.enable_prepare_phase, runtime.enable_intersection_phase + ); + println!( + "Timed extras: prepare_input_generation={} intersection_output_clear={} intersection_result_count={}", + config.measurement.include_prepare_input_generation, + config.measurement.include_intersection_output_clear, + config.measurement.include_intersection_result_count + ); + println!( + "Phases per algorithm: prepare={} native_output_intersection={}", + runtime.enable_prepare_phase, runtime.enable_intersection_phase + ); + println!(); + print_normal_results(results); +} + +fn print_normal_results(results: &[BenchmarkRecord]) { + let mut current_scenario = None; + + for record in results { + if current_scenario != Some(record.scenario) { + if current_scenario.is_some() { + println!(); + } + current_scenario = Some(record.scenario); + println!("{}", "-".repeat(96)); + println!( + "Scenario: {} | {} = {} of universe | {} = {} of each set", + describe_order(record.scenario.order), + describe_density(record.scenario.density), + format_percent(record.set_population_percent), + describe_overlap(record.scenario.overlap), + format_percent(record.target_overlap_percent) + ); + println!( + " set population: {} / {} values", + record.set_len, record.universe_len + ); + println!( + " overlap: requested={} actual={} shared={}/{}{}", + format_percent(record.target_overlap_percent), + format_percent(record.actual_overlap_percent), + record.actual_overlap, + record.set_len, + if record.actual_overlap != record.requested_overlap { + " adjusted-for-universe" + } else { + "" + } + ); + println!( + "{:<14} {:<10} {:>7} {:>12} {:>12} {:>12} {:>12}", + "algorithm", "phase", "samples", "mean", "median", "min", "max" + ); + } + + println!( + "{:<14} {:<10} {:>7} {:>12} {:>12} {:>12} {:>12}", + record.algorithm, + record.phase.label(), + record.stats.samples, + format_duration(record.stats.mean), + format_duration(record.stats.median), + format_duration(record.stats.min), + format_duration(record.stats.max) + ); + } +} + +fn print_markdown_report(config: &BenchmarkConfig, results: &[BenchmarkRecord]) { + let runtime = settings(); + + println!("# Intersection benchmark suite"); + println!(); + println!("- Scenarios: {}", Scenario::all().len()); + println!( + "- Universe: `0..={}` (`{}` values)", + runtime.max_value, + runtime.universe_len() + ); + println!( + "- Set populations: sparse=`{}` ({}) | semi-sparse=`{}` ({}) | normal=`{}` ({}) | dense=`{}` ({})", + config.dataset.sparse_size, + format_percent(population_percent( + config.dataset.sparse_size, + config.dataset.universe_len, + )), + config.dataset.semi_sparse_size, + format_percent(population_percent( + config.dataset.semi_sparse_size, + config.dataset.universe_len, + )), + config.dataset.normal_size, + format_percent(population_percent( + config.dataset.normal_size, + config.dataset.universe_len, + )), + config.dataset.dense_size, + format_percent(population_percent( + config.dataset.dense_size, + config.dataset.universe_len, + )) + ); + println!( + "- Overlap targets: low={} | medium={} | high={}", + format_percent(runtime.low_overlap_percent as f64), + format_percent(runtime.medium_overlap_percent as f64), + format_percent(runtime.high_overlap_percent as f64) + ); + println!( + "- Enabled densities: sparse=`{}` | semi-sparse=`{}` | normal=`{}` | dense=`{}`", + runtime.enable_sparse_scenario, + runtime.enable_semi_sparse_scenario, + runtime.enable_normal_scenario, + runtime.enable_dense_scenario + ); + println!( + "- Enabled overlaps: low=`{}` | medium=`{}` | high=`{}`", + runtime.enable_low_overlap, runtime.enable_medium_overlap, runtime.enable_high_overlap + ); + println!( + "- Sampling: min=`{}` | max=`{}` | target total=`{}ms`", + config.measurement.min_samples, + config.measurement.max_samples, + config.measurement.target_total.as_millis() + ); + println!( + "- Enabled algorithms: bitset=`{}` | bitset-simd=`{}` | std-hash=`{}` | splitmix-hash=`{}` | sorted-merge=`{}`", + runtime.enable_bitset, + runtime.enable_simd_bitset, + runtime.enable_std_hash, + runtime.enable_custom_hash, + runtime.enable_sorted_merge + ); + println!( + "- Enabled phases: prepare=`{}` | intersection=`{}`", + runtime.enable_prepare_phase, runtime.enable_intersection_phase + ); + println!( + "- Timed extras: prepare input generation=`{}` | intersection output clear=`{}` | intersection result count=`{}`", + config.measurement.include_prepare_input_generation, + config.measurement.include_intersection_output_clear, + config.measurement.include_intersection_result_count + ); + println!( + "- Phases per algorithm: prepare=`{}` | native output intersection=`{}`", + runtime.enable_prepare_phase, runtime.enable_intersection_phase + ); + println!(); + + let mut current_scenario = None; + + for record in results { + if current_scenario != Some(record.scenario) { + if current_scenario.is_some() { + println!(); + } + current_scenario = Some(record.scenario); + println!( + "## Scenario: {} | {} = {} of universe | {} = {} of each set", + describe_order(record.scenario.order), + describe_density(record.scenario.density), + format_percent(record.set_population_percent), + describe_overlap(record.scenario.overlap), + format_percent(record.target_overlap_percent) + ); + println!(); + println!( + "- Set population: `{}` / `{}` values", + record.set_len, record.universe_len + ); + println!( + "- Overlap: requested={} | actual={} | shared=`{}/{}`{}", + format_percent(record.target_overlap_percent), + format_percent(record.actual_overlap_percent), + record.actual_overlap, + record.set_len, + if record.actual_overlap != record.requested_overlap { + " | adjusted for universe" + } else { + "" + } + ); + println!(); + println!("| algorithm | phase | samples | mean | median | min | max |"); + println!("| --- | --- | ---: | ---: | ---: | ---: | ---: |"); + } + + println!( + "| {} | {} | {} | {} | {} | {} | {} |", + record.algorithm, + record.phase.label(), + record.stats.samples, + format_duration(record.stats.mean), + format_duration(record.stats.median), + format_duration(record.stats.min), + format_duration(record.stats.max) + ); + } +} + +fn describe_order(order: Order) -> &'static str { + match order { + Order::Ordered => "ordered input", + Order::Unordered => "unordered input", + } +} + +fn describe_density(density: Density) -> &'static str { + match density { + Density::Sparse => "sparse set population", + Density::SemiSparse => "semi-sparse set population", + Density::Normal => "normal set population", + Density::Dense => "dense set population", + } +} + +fn describe_overlap(overlap: Overlap) -> &'static str { + match overlap { + Overlap::Low => "low overlap percentage", + Overlap::Medium => "medium overlap percentage", + Overlap::High => "high overlap percentage", + } +} + +fn population_percent(set_len: usize, universe_len: usize) -> f64 { + if universe_len == 0 { + 0.0 + } else { + (set_len as f64 / universe_len as f64) * 100.0 + } +} + +fn duration_from_nanos(nanos: u128) -> Duration { + let seconds = nanos / 1_000_000_000; + let subsec_nanos = (nanos % 1_000_000_000) as u32; + Duration::new(seconds as u64, subsec_nanos) +} + +fn format_percent(value: f64) -> String { + if value >= 10.0 { + format!("{value:.1}%") + } else if value >= 1.0 { + format!("{value:.3}%") + } else { + format!("{value:.4}%") + } +} + +fn format_duration(duration: Duration) -> String { + let seconds = duration.as_secs_f64(); + + if seconds >= 1.0 { + format!("{seconds:.3}s") + } else if seconds >= 0.001 { + format!("{:.3}ms", seconds * 1_000.0) + } else if seconds >= 0.000_001 { + format!("{:.3}us", seconds * 1_000_000.0) + } else { + format!("{:.0}ns", seconds * 1_000_000_000.0) + } +} diff --git a/src/data.rs b/src/data.rs new file mode 100644 index 0000000..7405a6a --- /dev/null +++ b/src/data.rs @@ -0,0 +1,402 @@ +use std::fmt; + +use crate::settings::settings; + +#[derive(Clone, Copy, Debug, Eq, PartialEq, Hash)] +pub enum Order { + Ordered, + Unordered, +} + +impl Order { + pub const ALL: [Self; 2] = [Self::Ordered, Self::Unordered]; +} + +impl fmt::Display for Order { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Self::Ordered => write!(f, "ordered"), + Self::Unordered => write!(f, "unordered"), + } + } +} + +#[derive(Clone, Copy, Debug, Eq, PartialEq, Hash)] +pub enum Density { + Sparse, + SemiSparse, + Normal, + Dense, +} + +impl Density { + pub const ALL: [Self; 4] = [Self::Sparse, Self::SemiSparse, Self::Normal, Self::Dense]; +} + +impl fmt::Display for Density { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Self::Sparse => write!(f, "sparse"), + Self::SemiSparse => write!(f, "semi-sparse"), + Self::Normal => write!(f, "normal"), + Self::Dense => write!(f, "dense"), + } + } +} + +#[derive(Clone, Copy, Debug, Eq, PartialEq, Hash)] +pub enum Overlap { + Low, + Medium, + High, +} + +impl Overlap { + pub const ALL: [Self; 3] = [Self::Low, Self::Medium, Self::High]; +} + +impl fmt::Display for Overlap { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Self::Low => write!(f, "low"), + Self::Medium => write!(f, "medium"), + Self::High => write!(f, "high"), + } + } +} + +#[derive(Clone, Copy, Debug, Eq, PartialEq, Hash)] +pub struct Scenario { + pub order: Order, + pub density: Density, + pub overlap: Overlap, +} + +impl Scenario { + pub fn all() -> Vec { + let runtime = settings(); + let mut scenarios = + Vec::with_capacity(Order::ALL.len() * Density::ALL.len() * Overlap::ALL.len()); + + for order in Order::ALL { + for density in Density::ALL { + if !runtime.density_enabled(density) { + continue; + } + for overlap in Overlap::ALL { + if !runtime.overlap_enabled(overlap) { + continue; + } + scenarios.push(Self { + order, + density, + overlap, + }); + } + } + } + + scenarios + } +} + +impl fmt::Display for Scenario { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}/{}/{}", self.order, self.density, self.overlap) + } +} + +#[derive(Clone, Debug, Eq, PartialEq)] +pub struct RawPair { + pub left: Vec, + pub right: Vec, +} + +#[derive(Clone, Copy, Debug)] +struct Ratio { + numerator: usize, + denominator: usize, +} + +impl Ratio { + const fn new(numerator: usize, denominator: usize) -> Self { + Self { + numerator, + denominator, + } + } + + fn from_percent(percent: usize) -> Self { + Self::new(percent, 100) + } + + fn apply(self, value: usize) -> usize { + value.saturating_mul(self.numerator) / self.denominator + } + + fn as_percentage(self) -> f64 { + (self.numerator as f64 / self.denominator as f64) * 100.0 + } +} + +#[derive(Clone, Debug)] +pub struct DatasetConfig { + pub universe_len: usize, + pub sparse_size: usize, + pub semi_sparse_size: usize, + pub normal_size: usize, + pub dense_size: usize, + low_overlap: Ratio, + medium_overlap: Ratio, + high_overlap: Ratio, +} + +impl Default for DatasetConfig { + fn default() -> Self { + let settings = settings(); + let universe_len = settings.universe_len(); + + Self { + universe_len, + sparse_size: percentage_of(universe_len, settings.sparse_set_percent), + semi_sparse_size: percentage_of(universe_len, settings.semi_sparse_set_percent), + normal_size: percentage_of(universe_len, settings.normal_set_percent), + dense_size: percentage_of(universe_len, settings.dense_set_percent), + low_overlap: Ratio::from_percent(settings.low_overlap_percent), + medium_overlap: Ratio::from_percent(settings.medium_overlap_percent), + high_overlap: Ratio::from_percent(settings.high_overlap_percent), + } + } +} + +impl DatasetConfig { + pub fn smoke() -> Self { + Self { + universe_len: 101, + sparse_size: 10, + semi_sparse_size: 25, + normal_size: 50, + dense_size: 90, + low_overlap: Ratio::new(1, 10), + medium_overlap: Ratio::new(1, 2), + high_overlap: Ratio::new(9, 10), + } + } + + pub fn plan(&self, scenario: Scenario) -> DatasetPlan { + let set_len = self.set_size_for(scenario.density); + assert!( + set_len <= self.universe_len, + "set size {set_len} exceeds universe {}", + self.universe_len + ); + + let requested_overlap = self.overlap_ratio_for(scenario.overlap).apply(set_len); + let minimum_overlap = set_len.saturating_mul(2).saturating_sub(self.universe_len); + let actual_overlap = requested_overlap.max(minimum_overlap).min(set_len); + let left_only = set_len - actual_overlap; + let right_only = set_len - actual_overlap; + let total_unique = actual_overlap + left_only + right_only; + + assert!( + total_unique <= self.universe_len, + "scenario {scenario} cannot fit inside the configured universe" + ); + + let scenario_id = scenario_id(scenario); + let modulus = self.universe_len as u64; + let multiplier = choose_coprime_multiplier(modulus, scenario_id); + let addend = if modulus == 0 { + 0 + } else { + (scenario_id.wrapping_mul(0x9E37_79B9_7F4A_7C15) + 17) % modulus + }; + + DatasetPlan { + scenario, + universe_len: self.universe_len, + set_len, + requested_overlap, + actual_overlap, + left_only, + right_only, + target_overlap_percent: self.overlap_ratio_for(scenario.overlap).as_percentage(), + multiplier, + addend, + left_shuffle_seed: scenario_id ^ 0xA5A5_A5A5_DEAD_BEEF, + right_shuffle_seed: scenario_id ^ 0x5A5A_5A5A_CAFE_BABE, + } + } + + fn set_size_for(&self, density: Density) -> usize { + match density { + Density::Sparse => self.sparse_size, + Density::SemiSparse => self.semi_sparse_size, + Density::Normal => self.normal_size, + Density::Dense => self.dense_size, + } + } + + fn overlap_ratio_for(&self, overlap: Overlap) -> Ratio { + match overlap { + Overlap::Low => self.low_overlap, + Overlap::Medium => self.medium_overlap, + Overlap::High => self.high_overlap, + } + } +} + +#[derive(Clone, Debug)] +pub struct DatasetPlan { + pub scenario: Scenario, + pub universe_len: usize, + pub set_len: usize, + pub requested_overlap: usize, + pub actual_overlap: usize, + pub left_only: usize, + pub right_only: usize, + pub target_overlap_percent: f64, + multiplier: u64, + addend: u64, + left_shuffle_seed: u64, + right_shuffle_seed: u64, +} + +impl DatasetPlan { + pub fn generate_left(&self) -> Vec { + let mut values = Vec::with_capacity(self.set_len); + self.extend_segment(&mut values, 0, self.actual_overlap); + self.extend_segment(&mut values, self.actual_overlap, self.left_only); + self.finish(values, self.left_shuffle_seed) + } + + pub fn generate_right(&self) -> Vec { + let mut values = Vec::with_capacity(self.set_len); + self.extend_segment(&mut values, 0, self.actual_overlap); + self.extend_segment( + &mut values, + self.actual_overlap + self.left_only, + self.right_only, + ); + self.finish(values, self.right_shuffle_seed) + } + + pub fn generate_pair(&self) -> RawPair { + RawPair { + left: self.generate_left(), + right: self.generate_right(), + } + } + + pub fn actual_overlap_percent(&self) -> f64 { + if self.set_len == 0 { + 0.0 + } else { + (self.actual_overlap as f64 / self.set_len as f64) * 100.0 + } + } + + pub fn overlap_was_adjusted(&self) -> bool { + self.actual_overlap != self.requested_overlap + } + + fn extend_segment(&self, values: &mut Vec, start: usize, len: usize) { + for index in start..start + len { + values.push(self.permute_index(index)); + } + } + + fn permute_index(&self, index: usize) -> u32 { + let modulus = self.universe_len as u64; + (((self.multiplier * index as u64) + self.addend) % modulus) as u32 + } + + fn finish(&self, mut values: Vec, seed: u64) -> Vec { + match self.scenario.order { + Order::Ordered => values.sort_unstable(), + Order::Unordered => shuffle(&mut values, seed), + } + + values + } +} + +fn scenario_id(scenario: Scenario) -> u64 { + let order = match scenario.order { + Order::Ordered => 1_u64, + Order::Unordered => 2_u64, + }; + + let density = match scenario.density { + Density::Sparse => 3_u64, + Density::SemiSparse => 5_u64, + Density::Normal => 7_u64, + Density::Dense => 11_u64, + }; + + let overlap = match scenario.overlap { + Overlap::Low => 11_u64, + Overlap::Medium => 13_u64, + Overlap::High => 17_u64, + }; + + order * 1_000 + density * 100 + overlap +} + +fn choose_coprime_multiplier(modulus: u64, scenario_id: u64) -> u64 { + if modulus <= 1 { + return 1; + } + + let mut candidate = (scenario_id % (modulus - 1)).saturating_add(1); + while gcd(candidate, modulus) != 1 { + candidate += 1; + if candidate >= modulus { + candidate = 1; + } + } + + candidate +} + +fn gcd(mut left: u64, mut right: u64) -> u64 { + while right != 0 { + let next = left % right; + left = right; + right = next; + } + + left +} + +fn shuffle(values: &mut [u32], seed: u64) { + let mut rng = SplitMix64::new(seed); + + for index in (1..values.len()).rev() { + let swap_index = (rng.next_u64() % (index as u64 + 1)) as usize; + values.swap(index, swap_index); + } +} + +fn percentage_of(total: usize, percent: f64) -> usize { + ((total as f64) * (percent / 100.0)).floor() as usize +} + +#[derive(Clone, Debug)] +struct SplitMix64 { + state: u64, +} + +impl SplitMix64 { + fn new(seed: u64) -> Self { + Self { state: seed } + } + + fn next_u64(&mut self) -> u64 { + self.state = self.state.wrapping_add(0x9E37_79B9_7F4A_7C15); + let mut value = self.state; + value = (value ^ (value >> 30)).wrapping_mul(0xBF58_476D_1CE4_E5B9); + value = (value ^ (value >> 27)).wrapping_mul(0x94D0_49BB_1331_11EB); + value ^ (value >> 31) + } +} diff --git a/src/lib.rs b/src/lib.rs new file mode 100644 index 0000000..1a2d99a --- /dev/null +++ b/src/lib.rs @@ -0,0 +1,7 @@ +pub mod algorithms; +pub mod benchmark; +pub mod data; +pub mod settings; + +#[cfg(test)] +mod tests; diff --git a/src/main.rs b/src/main.rs new file mode 100644 index 0000000..73454c9 --- /dev/null +++ b/src/main.rs @@ -0,0 +1,3 @@ +fn main() { + intersection_benchmark::benchmark::run(); +} diff --git a/src/settings.rs b/src/settings.rs new file mode 100644 index 0000000..3ccea83 --- /dev/null +++ b/src/settings.rs @@ -0,0 +1,268 @@ +use std::collections::HashMap; +use std::env; +use std::fs; +use std::path::Path; +use std::sync::OnceLock; + +static SETTINGS: OnceLock = OnceLock::new(); + +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +pub enum OutputFormat { + Normal, + Markdown, +} + +impl std::str::FromStr for OutputFormat { + type Err = String; + + fn from_str(value: &str) -> Result { + match value.trim().to_ascii_lowercase().as_str() { + "normal" => Ok(Self::Normal), + "markdown" => Ok(Self::Markdown), + _ => Err("expected normal or markdown".to_string()), + } + } +} + +#[derive(Clone, Debug)] +pub struct BenchmarkSettings { + pub max_value: u32, + pub sparse_set_percent: f64, + pub semi_sparse_set_percent: f64, + pub normal_set_percent: f64, + pub dense_set_percent: f64, + pub low_overlap_percent: usize, + pub medium_overlap_percent: usize, + pub high_overlap_percent: usize, + pub enable_sparse_scenario: bool, + pub enable_semi_sparse_scenario: bool, + pub enable_normal_scenario: bool, + pub enable_dense_scenario: bool, + pub enable_low_overlap: bool, + pub enable_medium_overlap: bool, + pub enable_high_overlap: bool, + pub benchmark_min_samples: usize, + pub benchmark_max_samples: usize, + pub benchmark_target_total_ms: u64, + pub enable_bitset: bool, + pub enable_simd_bitset: bool, + pub enable_std_hash: bool, + pub enable_custom_hash: bool, + pub enable_sorted_merge: bool, + pub enable_prepare_phase: bool, + pub enable_intersection_phase: bool, + pub output_format: OutputFormat, + pub time_prepare_include_input_generation: bool, + pub time_intersection_include_output_clear: bool, + pub time_intersection_include_result_count: bool, +} + +impl BenchmarkSettings { + pub fn universe_len(&self) -> usize { + self.max_value as usize + 1 + } + + pub fn enabled_algorithm_count(&self) -> usize { + [ + self.enable_bitset, + self.enable_simd_bitset, + self.enable_std_hash, + self.enable_custom_hash, + self.enable_sorted_merge, + ] + .into_iter() + .filter(|enabled| *enabled) + .count() + } + + pub fn enabled_phase_count(&self) -> usize { + [self.enable_prepare_phase, self.enable_intersection_phase] + .into_iter() + .filter(|enabled| *enabled) + .count() + } + + pub fn density_enabled(&self, density: crate::data::Density) -> bool { + match density { + crate::data::Density::Sparse => self.enable_sparse_scenario, + crate::data::Density::SemiSparse => self.enable_semi_sparse_scenario, + crate::data::Density::Normal => self.enable_normal_scenario, + crate::data::Density::Dense => self.enable_dense_scenario, + } + } + + pub fn overlap_enabled(&self, overlap: crate::data::Overlap) -> bool { + match overlap { + crate::data::Overlap::Low => self.enable_low_overlap, + crate::data::Overlap::Medium => self.enable_medium_overlap, + crate::data::Overlap::High => self.enable_high_overlap, + } + } + + fn validate(&self) { + assert!( + self.max_value >= 1, + "MAX_VALUE must be at least 1 so the benchmark universe is non-trivial" + ); + assert!( + self.benchmark_min_samples >= 1, + "BENCHMARK_MIN_SAMPLES must be at least 1" + ); + assert!( + self.benchmark_max_samples >= self.benchmark_min_samples, + "BENCHMARK_MAX_SAMPLES must be greater than or equal to BENCHMARK_MIN_SAMPLES" + ); + assert!( + self.enable_sparse_scenario + || self.enable_semi_sparse_scenario + || self.enable_normal_scenario + || self.enable_dense_scenario, + "Enable at least one density scenario in .env" + ); + assert!( + self.enable_low_overlap || self.enable_medium_overlap || self.enable_high_overlap, + "Enable at least one overlap scenario in .env" + ); + + for (name, percent) in [ + ("SPARSE_SET_PERCENT", self.sparse_set_percent), + ("SEMI_SPARSE_SET_PERCENT", self.semi_sparse_set_percent), + ("NORMAL_SET_PERCENT", self.normal_set_percent), + ("DENSE_SET_PERCENT", self.dense_set_percent), + ] { + assert!( + percent.is_finite() && (0.0..=100.0).contains(&percent), + "{name} must be between 0 and 100" + ); + } + + for (name, percent) in [ + ("LOW_OVERLAP_PERCENT", self.low_overlap_percent), + ("MEDIUM_OVERLAP_PERCENT", self.medium_overlap_percent), + ("HIGH_OVERLAP_PERCENT", self.high_overlap_percent), + ] { + assert!(percent <= 100, "{name} must be between 0 and 100"); + } + } + + fn load() -> Self { + let env_file_values = read_env_file(".env"); + + let settings = Self { + max_value: read_required_parsed("MAX_VALUE", &env_file_values), + sparse_set_percent: read_required_parsed("SPARSE_SET_PERCENT", &env_file_values), + semi_sparse_set_percent: read_required_parsed( + "SEMI_SPARSE_SET_PERCENT", + &env_file_values, + ), + normal_set_percent: read_required_parsed("NORMAL_SET_PERCENT", &env_file_values), + dense_set_percent: read_required_parsed("DENSE_SET_PERCENT", &env_file_values), + low_overlap_percent: read_required_parsed("LOW_OVERLAP_PERCENT", &env_file_values), + medium_overlap_percent: read_required_parsed( + "MEDIUM_OVERLAP_PERCENT", + &env_file_values, + ), + high_overlap_percent: read_required_parsed("HIGH_OVERLAP_PERCENT", &env_file_values), + enable_sparse_scenario: read_required_bool("ENABLE_SPARSE_SCENARIO", &env_file_values), + enable_semi_sparse_scenario: read_required_bool( + "ENABLE_SEMI_SPARSE_SCENARIO", + &env_file_values, + ), + enable_normal_scenario: read_required_bool("ENABLE_NORMAL_SCENARIO", &env_file_values), + enable_dense_scenario: read_required_bool("ENABLE_DENSE_SCENARIO", &env_file_values), + enable_low_overlap: read_required_bool("ENABLE_LOW_OVERLAP", &env_file_values), + enable_medium_overlap: read_required_bool("ENABLE_MEDIUM_OVERLAP", &env_file_values), + enable_high_overlap: read_required_bool("ENABLE_HIGH_OVERLAP", &env_file_values), + benchmark_min_samples: read_required_parsed("BENCHMARK_MIN_SAMPLES", &env_file_values), + benchmark_max_samples: read_required_parsed("BENCHMARK_MAX_SAMPLES", &env_file_values), + benchmark_target_total_ms: read_required_parsed( + "BENCHMARK_TARGET_TOTAL_MS", + &env_file_values, + ), + enable_bitset: read_required_bool("ENABLE_BITSET", &env_file_values), + enable_simd_bitset: read_required_bool("ENABLE_SIMD_BITSET", &env_file_values), + enable_std_hash: read_required_bool("ENABLE_STD_HASH", &env_file_values), + enable_custom_hash: read_required_bool("ENABLE_CUSTOM_HASH", &env_file_values), + enable_sorted_merge: read_required_bool("ENABLE_SORTED_MERGE", &env_file_values), + enable_prepare_phase: read_required_bool("ENABLE_PREPARE_PHASE", &env_file_values), + enable_intersection_phase: read_required_bool( + "ENABLE_INTERSECTION_PHASE", + &env_file_values, + ), + output_format: read_required_parsed("OUTPUT_FORMAT", &env_file_values), + time_prepare_include_input_generation: read_required_bool( + "TIME_PREPARE_INCLUDE_INPUT_GENERATION", + &env_file_values, + ), + time_intersection_include_output_clear: read_required_bool( + "TIME_INTERSECTION_INCLUDE_OUTPUT_CLEAR", + &env_file_values, + ), + time_intersection_include_result_count: read_required_bool( + "TIME_INTERSECTION_INCLUDE_RESULT_COUNT", + &env_file_values, + ), + }; + + settings.validate(); + settings + } +} + +pub fn settings() -> &'static BenchmarkSettings { + SETTINGS.get_or_init(BenchmarkSettings::load) +} + +fn read_env_file(path: impl AsRef) -> HashMap { + let Ok(contents) = fs::read_to_string(path) else { + return HashMap::new(); + }; + + contents + .lines() + .map(str::trim) + .filter(|line| !line.is_empty() && !line.starts_with('#')) + .filter_map(|line| line.split_once('=')) + .map(|(key, value)| (key.trim().to_string(), value.trim().to_string())) + .collect() +} + +fn read_required_parsed(key: &str, env_file_values: &HashMap) -> T +where + T: std::str::FromStr, + T::Err: std::fmt::Display, +{ + if let Ok(value) = env::var(key) { + return value + .parse() + .unwrap_or_else(|error| panic!("failed to parse {key} from environment: {error}")); + } + + if let Some(value) = env_file_values.get(key) { + return value + .parse() + .unwrap_or_else(|error| panic!("failed to parse {key} from .env: {error}")); + } + + panic!("missing required setting {key}; define it in .env or the environment"); +} + +fn read_required_bool(key: &str, env_file_values: &HashMap) -> bool { + if let Ok(value) = env::var(key) { + return parse_bool(key, &value, "environment"); + } + + if let Some(value) = env_file_values.get(key) { + return parse_bool(key, value, ".env"); + } + + panic!("missing required setting {key}; define it in .env or the environment"); +} + +fn parse_bool(key: &str, value: &str, source: &str) -> bool { + match value.trim().to_ascii_lowercase().as_str() { + "true" | "1" | "yes" | "on" => true, + "false" | "0" | "no" | "off" => false, + _ => panic!("failed to parse {key} from {source}: expected true/false"), + } +} diff --git a/src/tests.rs b/src/tests.rs new file mode 100644 index 0000000..fcb3aa7 --- /dev/null +++ b/src/tests.rs @@ -0,0 +1,250 @@ +use std::collections::BTreeSet; + +use crate::algorithms::IntersectionAlgorithm; +use crate::algorithms::bitset::BitSetAlgorithm; +use crate::algorithms::custom_hash::CustomHashAlgorithm; +use crate::algorithms::simd_bitset::SimdBitSetAlgorithm; +use crate::algorithms::sorted_merge::SortedMergeAlgorithm; +use crate::algorithms::std_hash::StdHashAlgorithm; +use crate::benchmark::{BenchmarkConfig, collect_results}; +use crate::data::{DatasetConfig, Density, Order, Overlap, Scenario}; +use crate::settings::settings; + +#[test] +fn algorithms_handle_empty_sets() { + assert_case_for_all_algorithms(&[], &[], &[]); +} + +#[test] +fn algorithms_handle_disjoint_sets() { + assert_case_for_all_algorithms(&[1, 3, 5], &[2, 4, 6], &[]); +} + +#[test] +fn algorithms_handle_full_overlap() { + assert_case_for_all_algorithms(&[1, 2, 3], &[1, 2, 3], &[1, 2, 3]); +} + +#[test] +fn algorithms_handle_single_shared_value() { + assert_case_for_all_algorithms(&[10, 20, 30], &[5, 20, 25], &[20]); +} + +#[test] +fn algorithms_handle_boundary_values() { + let max_value = settings().max_value; + assert_case_for_all_algorithms(&[0, 1, max_value], &[0, max_value], &[0, max_value]); +} + +#[test] +fn generator_produces_exact_sizes_and_overlap() { + let config = DatasetConfig::smoke(); + let scenario = Scenario { + order: Order::Ordered, + density: Density::Normal, + overlap: Overlap::Medium, + }; + let plan = config.plan(scenario); + let pair = plan.generate_pair(); + + assert_eq!(pair.left.len(), plan.set_len); + assert_eq!(pair.right.len(), plan.set_len); + assert_eq!( + intersection_size(&pair.left, &pair.right), + plan.actual_overlap + ); + assert!(pair.left.windows(2).all(|window| window[0] <= window[1])); + assert!(pair.right.windows(2).all(|window| window[0] <= window[1])); +} + +#[test] +fn generator_preserves_uniqueness_and_unordered_shape() { + let config = DatasetConfig::smoke(); + let scenario = Scenario { + order: Order::Unordered, + density: Density::SemiSparse, + overlap: Overlap::High, + }; + let plan = config.plan(scenario); + let pair = plan.generate_pair(); + + assert_eq!( + pair.left.iter().copied().collect::>().len(), + pair.left.len() + ); + assert_eq!( + pair.right.iter().copied().collect::>().len(), + pair.right.len() + ); + assert!(!pair.left.windows(2).all(|window| window[0] <= window[1])); + assert!(!pair.right.windows(2).all(|window| window[0] <= window[1])); +} + +#[test] +fn generator_adjusts_impossible_overlap_levels() { + let config = DatasetConfig::smoke(); + let scenario = Scenario { + order: Order::Ordered, + density: Density::Dense, + overlap: Overlap::Low, + }; + let plan = config.plan(scenario); + let set_len = config.dense_size; + let requested_overlap = set_len / 10; + let minimum_overlap = set_len + .saturating_mul(2) + .saturating_sub(config.universe_len); + + assert!(plan.overlap_was_adjusted()); + assert_eq!(plan.requested_overlap, requested_overlap); + assert_eq!(plan.actual_overlap, requested_overlap.max(minimum_overlap)); +} + +#[test] +fn all_algorithms_match_on_every_smoke_scenario() { + let config = DatasetConfig::smoke(); + + for scenario in Scenario::all() { + let plan = config.plan(scenario); + let pair = plan.generate_pair(); + let expected_values = normalized_intersection(&pair.left, &pair.right); + let expected_count = expected_values.len(); + + assert_algorithm_matches::( + &pair.left, + &pair.right, + plan.universe_len, + &expected_values, + expected_count, + ); + assert_algorithm_matches::( + &pair.left, + &pair.right, + plan.universe_len, + &expected_values, + expected_count, + ); + assert_algorithm_matches::( + &pair.left, + &pair.right, + plan.universe_len, + &expected_values, + expected_count, + ); + assert_algorithm_matches::( + &pair.left, + &pair.right, + plan.universe_len, + &expected_values, + expected_count, + ); + assert_algorithm_matches::( + &pair.left, + &pair.right, + plan.universe_len, + &expected_values, + expected_count, + ); + } +} + +#[test] +fn benchmark_runner_smoke_test_returns_every_result_group() { + let results = collect_results(&BenchmarkConfig::smoke()); + let runtime = settings(); + let expected = + Scenario::all().len() * runtime.enabled_algorithm_count() * runtime.enabled_phase_count(); + + assert_eq!(results.len(), expected); +} + +fn assert_case_for_all_algorithms(left: &[u32], right: &[u32], expected: &[u32]) { + let mut expected_values = expected.to_vec(); + expected_values.sort_unstable(); + let universe_len = left + .iter() + .chain(right.iter()) + .copied() + .max() + .map(|value| value as usize + 1) + .unwrap_or(1); + + assert_algorithm_matches::( + left, + right, + universe_len, + &expected_values, + expected_values.len(), + ); + assert_algorithm_matches::( + left, + right, + universe_len, + &expected_values, + expected_values.len(), + ); + assert_algorithm_matches::( + left, + right, + universe_len, + &expected_values, + expected_values.len(), + ); + assert_algorithm_matches::( + left, + right, + universe_len, + &expected_values, + expected_values.len(), + ); + assert_algorithm_matches::( + left, + right, + universe_len, + &expected_values, + expected_values.len(), + ); +} + +fn assert_algorithm_matches( + left: &[u32], + right: &[u32], + universe_len: usize, + expected_values: &[u32], + expected_count: usize, +) where + A: IntersectionAlgorithm, +{ + let prepared_left = A::prepare(left, universe_len, infer_order(left)); + let prepared_right = A::prepare(right, universe_len, infer_order(right)); + let mut output = A::create_output(&prepared_left, &prepared_right); + A::clear_output(&mut output); + A::intersect_into(&prepared_left, &prepared_right, &mut output); + assert_eq!(A::output_len(&output), expected_count); + let mut actual_values = A::output_values(&output); + actual_values.sort_unstable(); + assert_eq!(actual_values, expected_values); +} + +fn normalized_intersection(left: &[u32], right: &[u32]) -> Vec { + let right_values = right.iter().copied().collect::>(); + let mut values = left + .iter() + .copied() + .filter(|value| right_values.contains(value)) + .collect::>(); + values.sort_unstable(); + values +} + +fn intersection_size(left: &[u32], right: &[u32]) -> usize { + normalized_intersection(left, right).len() +} + +fn infer_order(values: &[u32]) -> Order { + if values.windows(2).all(|window| window[0] <= window[1]) { + Order::Ordered + } else { + Order::Unordered + } +}