use serde::{Deserialize, Serialize};
use std::collections::{HashMap, HashSet};
use std::io;
use std::path::Path;
pub const MANIFEST_FILE: &str = "seg_manifest.json";
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
pub enum PropRange {
NumericMinMax { min: f64, max: f64 },
StringBloomPlaceholder,
}
#[allow(dead_code)]
impl PropRange {
pub fn numeric(value: f64) -> Self {
PropRange::NumericMinMax {
min: value,
max: value,
}
}
pub fn expand_with(&mut self, value: f64) {
if let PropRange::NumericMinMax { min, max } = self {
if value < *min {
*min = value;
}
if value > *max {
*max = value;
}
}
}
pub fn might_contain_numeric(&self, value: f64) -> bool {
match self {
PropRange::NumericMinMax { min, max } => *min <= value && value <= *max,
PropRange::StringBloomPlaceholder => true,
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
pub struct SegmentSummary {
pub segment_id: u32,
pub node_id_lo: u32,
pub node_id_hi: u32,
pub edge_count: u64,
#[serde(default)]
pub conn_types: HashSet<u64>,
#[serde(default)]
pub node_type_counts: HashMap<u64, u32>,
#[serde(default)]
pub indexed_prop_ranges: Vec<(u64, u64, PropRange)>,
}
impl SegmentSummary {
pub fn new(segment_id: u32, node_id_lo: u32) -> Self {
Self {
segment_id,
node_id_lo,
node_id_hi: node_id_lo,
edge_count: 0,
conn_types: HashSet::new(),
node_type_counts: HashMap::new(),
indexed_prop_ranges: Vec::new(),
}
}
#[allow(dead_code)] pub fn find_indexed_range(&self, node_type_hash: u64, prop_hash: u64) -> Option<&PropRange> {
self.indexed_prop_ranges
.iter()
.find(|(nt, p, _)| *nt == node_type_hash && *p == prop_hash)
.map(|(_, _, r)| r)
}
#[inline]
#[allow(dead_code)] pub fn covers_node(&self, node_id: u32) -> bool {
node_id >= self.node_id_lo && node_id < self.node_id_hi
}
#[inline]
#[allow(dead_code)] pub fn has_conn_type(&self, conn_type_hash: u64) -> bool {
self.conn_types.contains(&conn_type_hash)
}
#[inline]
#[allow(dead_code)] pub fn has_node_type(&self, node_type_hash: u64) -> bool {
self.node_type_counts.contains_key(&node_type_hash)
}
#[allow(dead_code)] pub fn might_match_numeric_prop(
&self,
node_type_hash: u64,
prop_hash: u64,
value: f64,
) -> bool {
if !self.has_node_type(node_type_hash) {
return false;
}
match self.find_indexed_range(node_type_hash, prop_hash) {
Some(range) => range.might_contain_numeric(value),
None => true,
}
}
}
#[derive(Debug, Default, Clone, Serialize, Deserialize)]
pub struct SegmentManifest {
pub segments: Vec<SegmentSummary>,
}
impl SegmentManifest {
pub fn new() -> Self {
Self::default()
}
#[allow(dead_code)] pub fn len(&self) -> usize {
self.segments.len()
}
pub fn is_empty(&self) -> bool {
self.segments.is_empty()
}
pub fn append(&mut self, summary: SegmentSummary) -> usize {
let idx = self.segments.len();
self.segments.push(summary);
idx
}
#[allow(dead_code)] pub fn get(&self, index: usize) -> Option<&SegmentSummary> {
self.segments.get(index)
}
#[allow(dead_code)] pub fn candidates_for_node_range<'a>(
&'a self,
lo: u32,
hi: u32,
) -> impl Iterator<Item = &'a SegmentSummary> + 'a {
self.segments.iter().filter(move |s| {
s.node_id_hi > lo && s.node_id_lo < hi
})
}
#[allow(dead_code)] pub fn candidates_for_conn_type<'a>(
&'a self,
conn_type_hash: u64,
) -> impl Iterator<Item = &'a SegmentSummary> + 'a {
self.segments
.iter()
.filter(move |s| s.has_conn_type(conn_type_hash))
}
#[allow(dead_code)] pub fn candidates_for_node_type<'a>(
&'a self,
node_type_hash: u64,
) -> impl Iterator<Item = &'a SegmentSummary> + 'a {
self.segments
.iter()
.filter(move |s| s.has_node_type(node_type_hash))
}
pub fn save_to(&self, dir: &Path) -> io::Result<()> {
let json = serde_json::to_string_pretty(self).map_err(io::Error::other)?;
std::fs::write(dir.join(MANIFEST_FILE), json)
}
pub fn load_from(dir: &Path) -> io::Result<Self> {
let path = dir.join(MANIFEST_FILE);
if !path.exists() {
return Ok(Self::new());
}
let json = std::fs::read_to_string(&path)?;
serde_json::from_str(&json).map_err(io::Error::other)
}
}
#[cfg(test)]
mod tests {
use super::*;
use tempfile::TempDir;
#[test]
fn prop_range_numeric_expands_and_contains() {
let mut r = PropRange::numeric(5.0);
r.expand_with(10.0);
r.expand_with(2.5);
assert!(matches!(
r,
PropRange::NumericMinMax {
min: 2.5,
max: 10.0
}
));
assert!(r.might_contain_numeric(5.0));
assert!(r.might_contain_numeric(2.5));
assert!(r.might_contain_numeric(10.0));
assert!(!r.might_contain_numeric(11.0));
assert!(!r.might_contain_numeric(2.0));
}
#[test]
fn prop_range_placeholder_never_prunes() {
assert!(PropRange::StringBloomPlaceholder.might_contain_numeric(0.0));
assert!(PropRange::StringBloomPlaceholder.might_contain_numeric(1e18));
}
#[test]
fn segment_summary_tracks_range() {
let s = SegmentSummary {
segment_id: 0,
node_id_lo: 100,
node_id_hi: 200,
..SegmentSummary::new(0, 100)
};
assert!(!s.covers_node(99));
assert!(s.covers_node(100));
assert!(s.covers_node(199));
assert!(!s.covers_node(200));
}
#[test]
fn manifest_append_and_filter() {
let mut m = SegmentManifest::new();
let mut s0 = SegmentSummary::new(0, 0);
s0.node_id_hi = 100;
s0.conn_types.insert(42);
s0.node_type_counts.insert(7, 50);
let mut s1 = SegmentSummary::new(1, 100);
s1.node_id_hi = 200;
s1.conn_types.insert(99);
s1.node_type_counts.insert(7, 75);
m.append(s0);
m.append(s1);
assert_eq!(m.len(), 2);
let hits: Vec<_> = m.candidates_for_node_range(50, 120).collect();
assert_eq!(hits.len(), 2); let hits: Vec<_> = m.candidates_for_node_range(150, 180).collect();
assert_eq!(hits.len(), 1);
assert_eq!(hits[0].segment_id, 1);
let hits: Vec<_> = m.candidates_for_conn_type(42).collect();
assert_eq!(hits.len(), 1);
assert_eq!(hits[0].segment_id, 0);
let hits: Vec<_> = m.candidates_for_node_type(7).collect();
assert_eq!(hits.len(), 2);
let hits: Vec<_> = m.candidates_for_node_type(1234).collect();
assert_eq!(hits.len(), 0);
}
#[test]
fn indexed_prop_prunes_out_of_range() {
let mut s = SegmentSummary::new(0, 0);
s.node_id_hi = 100;
s.node_type_counts.insert(7, 10);
s.indexed_prop_ranges.push((
7,
99,
PropRange::NumericMinMax {
min: 10.0,
max: 20.0,
},
));
assert!(s.might_match_numeric_prop(7, 99, 15.0));
assert!(!s.might_match_numeric_prop(7, 99, 25.0));
assert!(!s.might_match_numeric_prop(123, 99, 15.0));
assert!(s.might_match_numeric_prop(7, 77, 15.0));
}
#[test]
fn save_and_load_round_trip() {
let tmp = TempDir::new().unwrap();
let mut m = SegmentManifest::new();
let mut s = SegmentSummary::new(0, 0);
s.node_id_hi = 1000;
s.edge_count = 5000;
s.conn_types.insert(42);
s.node_type_counts.insert(7, 500);
s.indexed_prop_ranges.push((
7,
99,
PropRange::NumericMinMax {
min: 1.0,
max: 99.0,
},
));
m.append(s);
m.save_to(tmp.path()).unwrap();
let loaded = SegmentManifest::load_from(tmp.path()).unwrap();
assert_eq!(loaded.len(), 1);
let s2 = &loaded.segments[0];
assert_eq!(s2.node_id_hi, 1000);
assert_eq!(s2.edge_count, 5000);
assert!(s2.has_conn_type(42));
assert_eq!(s2.node_type_counts.get(&7), Some(&500));
assert!(s2.find_indexed_range(7, 99).is_some());
}
#[test]
fn missing_file_loads_as_empty() {
let tmp = TempDir::new().unwrap();
let m = SegmentManifest::load_from(tmp.path()).unwrap();
assert!(m.is_empty());
}
}