use tracing::{debug, info};
use crate::ssi_validation::DiscoveredEdge;
use crate::witness_objects::KeySummary;
#[derive(Debug, Clone, Copy, PartialEq)]
pub struct VoiMetrics {
pub c_b: f64,
pub fp_b: f64,
pub delta_fp_b: f64,
pub l_abort: f64,
pub cost_refine_b: f64,
}
impl VoiMetrics {
#[must_use]
pub fn benefit(&self) -> f64 {
self.c_b * self.delta_fp_b * self.l_abort
}
#[must_use]
pub fn voi(&self) -> f64 {
self.benefit() - self.cost_refine_b
}
#[must_use]
pub fn should_invest(&self) -> bool {
self.voi() > 0.0
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct RefinementBudget {
pub max_bytes: usize,
pub max_buckets: usize,
}
impl RefinementBudget {
#[must_use]
pub const fn new(max_bytes: usize, max_buckets: usize) -> Self {
Self {
max_bytes,
max_buckets,
}
}
#[must_use]
pub const fn v1_default() -> Self {
Self {
max_bytes: 4096,
max_buckets: 16,
}
}
}
impl Default for RefinementBudget {
fn default() -> Self {
Self::v1_default()
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub enum RefinementPriority {
CellBitmap = 4,
ByteRangeList = 3,
HashedKeySet = 2,
ExactKeys = 1,
}
#[derive(Debug, Clone)]
pub struct RefinementDecision {
pub range_prefix: u32,
pub voi_score: f64,
pub refinement_type: RefinementPriority,
pub key_summary: KeySummary,
pub bytes_used: usize,
}
#[derive(Debug, Clone)]
pub struct RefinementResult {
pub confirmed_edges: Vec<DiscoveredEdge>,
pub eliminated_edges: Vec<DiscoveredEdge>,
pub decisions: Vec<RefinementDecision>,
pub bytes_used: usize,
pub buckets_refined: usize,
}
pub fn refine_edges(
in_edges: Vec<DiscoveredEdge>,
out_edges: Vec<DiscoveredEdge>,
refinements: &[(u32, KeySummary)],
budget: &RefinementBudget,
) -> RefinementResult {
let mut confirmed_in = Vec::new();
let mut confirmed_out = Vec::new();
let mut eliminated = Vec::new();
let mut decisions = Vec::new();
let mut bytes_used = 0_usize;
let mut buckets_refined = 0_usize;
for edge in in_edges {
if buckets_refined >= budget.max_buckets || bytes_used >= budget.max_bytes {
confirmed_in.push(edge);
continue;
}
let page = crate::ssi_validation::witness_key_page(&edge.overlap_key)
.map(|p| p.get())
.unwrap_or(0);
if let Some((_, summary)) = refinements.iter().find(|(p, _)| *p == page) {
let estimated_bytes = estimate_summary_bytes(summary);
if bytes_used + estimated_bytes > budget.max_bytes {
confirmed_in.push(edge);
continue;
}
if summary.may_overlap(&edge.overlap_key) {
confirmed_in.push(edge);
} else {
debug!(
bead_id = "bd-1oxe",
from = ?edge.from,
to = ?edge.to,
key = ?edge.overlap_key,
"refinement eliminated false positive incoming edge"
);
eliminated.push(edge);
}
bytes_used += estimated_bytes;
buckets_refined += 1;
decisions.push(RefinementDecision {
range_prefix: page,
voi_score: 0.0, refinement_type: summary_to_priority(summary),
key_summary: summary.clone(),
bytes_used: estimated_bytes,
});
} else {
confirmed_in.push(edge);
}
}
for edge in out_edges {
if buckets_refined >= budget.max_buckets || bytes_used >= budget.max_bytes {
confirmed_out.push(edge);
continue;
}
let page = crate::ssi_validation::witness_key_page(&edge.overlap_key)
.map(|p| p.get())
.unwrap_or(0);
if let Some((_, summary)) = refinements.iter().find(|(p, _)| *p == page) {
let estimated_bytes = estimate_summary_bytes(summary);
if bytes_used + estimated_bytes > budget.max_bytes {
confirmed_out.push(edge);
continue;
}
if summary.may_overlap(&edge.overlap_key) {
confirmed_out.push(edge);
} else {
debug!(
bead_id = "bd-1oxe",
from = ?edge.from,
to = ?edge.to,
key = ?edge.overlap_key,
"refinement eliminated false positive outgoing edge"
);
eliminated.push(edge);
}
bytes_used += estimated_bytes;
buckets_refined += 1;
decisions.push(RefinementDecision {
range_prefix: page,
voi_score: 0.0,
refinement_type: summary_to_priority(summary),
key_summary: summary.clone(),
bytes_used: estimated_bytes,
});
} else {
confirmed_out.push(edge);
}
}
if !eliminated.is_empty() {
info!(
bead_id = "bd-1oxe",
eliminated = eliminated.len(),
confirmed_in = confirmed_in.len(),
confirmed_out = confirmed_out.len(),
bytes_used,
buckets_refined,
"refinement complete"
);
}
let mut confirmed_edges = confirmed_in;
confirmed_edges.extend(confirmed_out);
RefinementResult {
confirmed_edges,
eliminated_edges: eliminated,
decisions,
bytes_used,
buckets_refined,
}
}
fn estimate_summary_bytes(summary: &KeySummary) -> usize {
match summary {
KeySummary::ExactKeys(keys) => keys.len() * 16,
KeySummary::HashedKeySet(hashes) => hashes.len() * 8,
KeySummary::PageBitmap(pages) => pages.len() * 4,
KeySummary::CellBitmap(cells) => cells.len() * 8,
KeySummary::ByteRangeList(ranges) => ranges.len() * 8,
KeySummary::Chunked(chunks) => chunks
.iter()
.map(|c| estimate_summary_bytes(&c.summary) + 4)
.sum(),
}
}
fn summary_to_priority(summary: &KeySummary) -> RefinementPriority {
match summary {
KeySummary::CellBitmap(_) => RefinementPriority::CellBitmap,
KeySummary::ByteRangeList(_) => RefinementPriority::ByteRangeList,
KeySummary::HashedKeySet(_) | KeySummary::PageBitmap(_) | KeySummary::Chunked(_) => {
RefinementPriority::HashedKeySet
}
KeySummary::ExactKeys(_) => RefinementPriority::ExactKeys,
}
}
#[cfg(test)]
mod tests {
use super::*;
use fsqlite_types::{PageNumber, TxnEpoch, TxnId, TxnToken, WitnessKey};
use std::collections::BTreeSet;
fn test_token(id: u64) -> TxnToken {
TxnToken::new(TxnId::new(id).unwrap(), TxnEpoch::new(0))
}
fn page_key(pgno: u32) -> WitnessKey {
WitnessKey::Page(PageNumber::new(pgno).unwrap())
}
fn make_edge(from_id: u64, to_id: u64, pgno: u32) -> DiscoveredEdge {
DiscoveredEdge {
from: test_token(from_id),
to: test_token(to_id),
overlap_key: page_key(pgno),
source_is_active: true,
source_has_in_rw: false,
}
}
#[test]
fn test_page_level_catches_true_conflict() {
let in_edges = vec![make_edge(1, 2, 5)];
let result = refine_edges(
in_edges,
Vec::new(),
&[], &RefinementBudget::v1_default(),
);
assert_eq!(
result.confirmed_edges.len(),
1,
"without refinement, page-level edge must pass through"
);
assert!(result.eliminated_edges.is_empty());
}
#[test]
fn test_cell_level_reduces_false_positives() {
let in_edges_fp = vec![make_edge(1, 2, 10)];
let refinements = vec![(
10_u32,
KeySummary::CellBitmap(BTreeSet::from([(5_u64 << 32) | 0x2a])), )];
let result = refine_edges(
in_edges_fp,
Vec::new(),
&refinements,
&RefinementBudget::v1_default(),
);
assert_eq!(
result.eliminated_edges.len(),
1,
"cell refinement should eliminate false positive"
);
assert!(result.confirmed_edges.is_empty());
let in_edges_no_refine = vec![make_edge(1, 2, 10)];
let result_no_refine = refine_edges(
in_edges_no_refine,
Vec::new(),
&[], &RefinementBudget::v1_default(),
);
assert_eq!(
result_no_refine.confirmed_edges.len(),
1,
"without refinement, edge passes through"
);
}
#[test]
fn test_cell_witness_reduces_false_positives() {
test_cell_level_reduces_false_positives();
}
#[test]
fn test_refinement_budget_respected() {
let in_edges = vec![make_edge(1, 2, 5), make_edge(3, 4, 10), make_edge(5, 6, 15)];
let refinements = vec![
(5_u32, KeySummary::ExactKeys(vec![page_key(99)])), (10_u32, KeySummary::ExactKeys(vec![page_key(99)])), (15_u32, KeySummary::ExactKeys(vec![page_key(99)])), ];
let budget = RefinementBudget::new(4096, 1);
let result = refine_edges(in_edges, Vec::new(), &refinements, &budget);
assert_eq!(result.buckets_refined, 1);
assert_eq!(result.eliminated_edges.len(), 1);
assert_eq!(
result.confirmed_edges.len(),
2,
"budget-exceeded edges pass through"
);
}
#[test]
fn test_byte_range_witness_finer_than_page() {
let budget = RefinementBudget::v1_default();
let in_edges = vec![make_edge(1, 2, 10)];
let non_overlap = refine_edges(
in_edges,
Vec::new(),
&[(
10_u32,
KeySummary::ByteRangeList(vec![(11_u32, 0_u16, 64_u16)]),
)],
&budget,
);
assert_eq!(non_overlap.eliminated_edges.len(), 1);
assert!(non_overlap.confirmed_edges.is_empty());
let overlap = refine_edges(
vec![make_edge(1, 2, 10)],
Vec::new(),
&[(
10_u32,
KeySummary::ByteRangeList(vec![(10_u32, 32_u16, 64_u16)]),
)],
&budget,
);
assert_eq!(overlap.confirmed_edges.len(), 1);
assert!(overlap.eliminated_edges.is_empty());
}
#[test]
fn test_voi_metric_computation() {
let metrics = VoiMetrics {
c_b: 10.0, fp_b: 0.8, delta_fp_b: 0.7, l_abort: 100.0, cost_refine_b: 50.0, };
let benefit = metrics.benefit();
assert!(
(benefit - 700.0).abs() < 1e-10,
"benefit = c_b * delta_fp_b * L_abort"
);
let voi = metrics.voi();
assert!((voi - 650.0).abs() < 1e-10, "VOI = benefit - cost");
assert!(voi > 0.0, "positive VOI means refinement is cost-effective");
assert!(metrics.should_invest());
let expensive = VoiMetrics {
c_b: 0.1,
fp_b: 0.1,
delta_fp_b: 0.05,
l_abort: 10.0,
cost_refine_b: 100.0,
};
assert!(
expensive.voi() < 0.0,
"negative VOI means refinement not worth it"
);
assert!(!expensive.should_invest());
}
#[test]
fn test_voi_framework_computes_actionable_score() {
let invest = VoiMetrics {
c_b: 8.0,
fp_b: 0.6,
delta_fp_b: 0.5,
l_abort: 120.0,
cost_refine_b: 100.0,
};
let skip = VoiMetrics {
c_b: 0.2,
fp_b: 0.1,
delta_fp_b: 0.05,
l_abort: 10.0,
cost_refine_b: 50.0,
};
assert!(invest.should_invest(), "VOI>0 should recommend refine");
assert!(!skip.should_invest(), "VOI<=0 should recommend skip");
}
#[test]
fn test_disabling_refinement_is_sound() {
let in_edges = vec![make_edge(1, 2, 5), make_edge(3, 4, 10)];
let out_edges = vec![make_edge(2, 3, 7)];
let result = refine_edges(in_edges, out_edges, &[], &RefinementBudget::v1_default());
assert_eq!(result.confirmed_edges.len(), 3, "all edges pass through");
assert!(result.eliminated_edges.is_empty(), "no edges eliminated");
assert_eq!(result.buckets_refined, 0);
}
#[test]
fn test_refinement_preserves_no_false_negatives() {
test_disabling_refinement_is_sound();
}
#[test]
fn test_byte_budget_enforcement() {
let in_edges = vec![make_edge(1, 2, 5), make_edge(3, 4, 10)];
let refinements = vec![
(5_u32, KeySummary::ExactKeys(vec![page_key(99)])),
(10_u32, KeySummary::ExactKeys(vec![page_key(99)])),
];
let budget = RefinementBudget::new(16, 100);
let result = refine_edges(in_edges, Vec::new(), &refinements, &budget);
assert_eq!(result.buckets_refined, 1);
assert!(result.bytes_used <= 16);
}
}