use crate::ContentHash;
use std::cmp::Ordering;
pub struct SpanIndex {
roots: Vec<Entry>,
}
struct Entry {
start: u64,
end: u64,
value: ContentHash,
children: Option<Box<Node>>,
}
struct Node {
entries: Vec<Entry>,
}
impl SpanIndex {
pub fn build(input: Vec<(u64, u64, ContentHash)>) -> Self {
let mut sorted = input;
sorted.sort_by(|a, b| a.0.cmp(&b.0).then(b.1.cmp(&a.1)));
let entries = Self::build_level(&sorted, 0, sorted.len());
SpanIndex { roots: entries }
}
fn build_level(
spans: &[(u64, u64, ContentHash)],
from: usize,
to: usize,
) -> Vec<Entry> {
if from >= to {
return Vec::new();
}
let mut entries = Vec::new();
let mut i = from;
while i < to {
let (start, length, ref data) = spans[i];
let end = start + length;
let mut j = i + 1;
while j < to {
let (child_start, child_length, _) = spans[j];
if child_start + child_length <= end && child_start >= start {
j += 1;
} else {
break;
}
}
let children = if j > i + 1 {
let child_entries = Self::build_level(spans, i + 1, j);
if child_entries.is_empty() {
None
} else {
Some(Box::new(Node {
entries: child_entries,
}))
}
} else {
None
};
entries.push(Entry {
start,
end,
value: *data,
children,
});
i = j;
}
entries
}
pub fn query(&self, point: u64) -> Option<ContentHash> {
Self::search_entries(&self.roots, point).copied()
}
fn search_entries(entries: &[Entry], point: u64) -> Option<&ContentHash> {
let result = entries.binary_search_by(|e| {
if point < e.start {
Ordering::Greater
} else if point >= e.end {
Ordering::Less
} else {
Ordering::Equal
}
});
let Ok(idx) = result else {
return None;
};
let entry = &entries[idx];
if let Some(ref children) = entry.children
&& let Some(v) = Self::search_entries(&children.entries, point)
{
return Some(v);
}
Some(&entry.value)
}
pub fn len(&self) -> usize {
self.count_entries(&self.roots)
}
pub fn is_empty(&self) -> bool {
self.roots.is_empty()
}
fn count_entries(&self, entries: &[Entry]) -> usize {
let mut count = entries.len();
for entry in entries {
if let Some(ref children) = entry.children {
count += self.count_entries(&children.entries);
}
}
count
}
}
#[cfg(test)]
mod tests {
use super::*;
fn hash(n: u8) -> ContentHash {
ContentHash::new(&[n, 0, 0, 0])
}
#[test]
fn build_empty() {
let index = SpanIndex::build(vec![]);
assert!(index.is_empty());
assert_eq!(index.len(), 0);
assert_eq!(index.query(0), None);
}
#[test]
fn single_span() {
let index = SpanIndex::build(vec![(10, 5, hash(1))]);
assert_eq!(index.len(), 1);
assert_eq!(index.query(9), None);
assert_eq!(index.query(10), Some(hash(1)));
assert_eq!(index.query(14), Some(hash(1)));
assert_eq!(index.query(15), None);
}
#[test]
fn non_overlapping_siblings() {
let index = SpanIndex::build(vec![
(0, 5, hash(1)),
(10, 5, hash(2)),
(20, 5, hash(3)),
]);
assert_eq!(index.len(), 3);
assert_eq!(index.query(2), Some(hash(1)));
assert_eq!(index.query(12), Some(hash(2)));
assert_eq!(index.query(22), Some(hash(3)));
assert_eq!(index.query(7), None);
}
#[test]
fn nested_parent_child() {
let index = SpanIndex::build(vec![
(0, 20, hash(1)),
(5, 5, hash(2)),
]);
assert_eq!(index.len(), 2);
assert_eq!(index.query(0), Some(hash(1)));
assert_eq!(index.query(5), Some(hash(2)));
assert_eq!(index.query(9), Some(hash(2)));
assert_eq!(index.query(10), Some(hash(1)));
assert_eq!(index.query(19), Some(hash(1)));
assert_eq!(index.query(20), None);
}
#[test]
fn deeply_nested() {
let index = SpanIndex::build(vec![
(0, 100, hash(1)),
(10, 40, hash(2)),
(20, 10, hash(3)),
]);
assert_eq!(index.len(), 3);
assert_eq!(index.query(5), Some(hash(1)));
assert_eq!(index.query(15), Some(hash(2)));
assert_eq!(index.query(25), Some(hash(3)));
assert_eq!(index.query(35), Some(hash(2)));
assert_eq!(index.query(60), Some(hash(1)));
}
#[test]
fn query_at_boundaries() {
let index = SpanIndex::build(vec![(0, 10, hash(1))]);
assert_eq!(index.query(0), Some(hash(1)));
assert_eq!(index.query(9), Some(hash(1)));
assert_eq!(index.query(10), None);
}
#[test]
fn gaps_between_spans() {
let index = SpanIndex::build(vec![
(0, 10, hash(1)),
(20, 10, hash(2)),
]);
assert_eq!(index.query(5), Some(hash(1)));
assert_eq!(index.query(10), None);
assert_eq!(index.query(15), None);
assert_eq!(index.query(25), Some(hash(2)));
}
#[test]
fn multiple_children_of_one_parent() {
let index = SpanIndex::build(vec![
(0, 100, hash(1)),
(10, 10, hash(2)),
(30, 10, hash(3)),
(50, 10, hash(4)),
]);
assert_eq!(index.len(), 4);
assert_eq!(index.query(5), Some(hash(1)));
assert_eq!(index.query(15), Some(hash(2)));
assert_eq!(index.query(25), Some(hash(1)));
assert_eq!(index.query(35), Some(hash(3)));
assert_eq!(index.query(45), Some(hash(1)));
assert_eq!(index.query(55), Some(hash(4)));
assert_eq!(index.query(65), Some(hash(1)));
}
#[test]
fn unsorted_input_is_handled() {
let index = SpanIndex::build(vec![
(50, 10, hash(3)),
(0, 100, hash(1)),
(10, 10, hash(2)),
]);
assert_eq!(index.len(), 3);
assert_eq!(index.query(5), Some(hash(1)));
assert_eq!(index.query(15), Some(hash(2)));
assert_eq!(index.query(55), Some(hash(3)));
assert_eq!(index.query(70), Some(hash(1)));
}
#[test]
fn adjacent_trees_with_children() {
let index = SpanIndex::build(vec![
(0, 50, hash(1)),
(5, 10, hash(2)),
(100, 50, hash(3)),
(110, 10, hash(4)),
]);
assert_eq!(index.len(), 4);
assert_eq!(index.query(2), Some(hash(1)));
assert_eq!(index.query(8), Some(hash(2)));
assert_eq!(index.query(60), None);
assert_eq!(index.query(105), Some(hash(3)));
assert_eq!(index.query(115), Some(hash(4)));
}
#[test]
fn child_at_parent_start() {
let index = SpanIndex::build(vec![
(0, 100, hash(1)),
(0, 20, hash(2)),
]);
assert_eq!(index.query(10), Some(hash(2)));
assert_eq!(index.query(50), Some(hash(1)));
}
#[test]
fn child_at_parent_end() {
let index = SpanIndex::build(vec![
(0, 100, hash(1)),
(80, 20, hash(2)),
]);
assert_eq!(index.query(50), Some(hash(1)));
assert_eq!(index.query(90), Some(hash(2)));
assert_eq!(index.query(99), Some(hash(2)));
assert_eq!(index.query(100), None);
}
#[test]
fn zero_length_span_is_invisible() {
let index = SpanIndex::build(vec![(10, 0, hash(1))]);
assert_eq!(index.query(10), None);
}
#[test]
fn single_byte_span() {
let index = SpanIndex::build(vec![(10, 1, hash(1))]);
assert_eq!(index.query(9), None);
assert_eq!(index.query(10), Some(hash(1)));
assert_eq!(index.query(11), None);
}
#[test]
fn realistic_ast_structure() {
let index = SpanIndex::build(vec![
(0, 200, hash(1)), (10, 180, hash(2)), (20, 80, hash(3)), (30, 20, hash(4)), (55, 20, hash(5)), (110, 60, hash(6)), (120, 30, hash(7)), ]);
assert_eq!(index.len(), 7);
assert_eq!(index.query(5), Some(hash(1))); assert_eq!(index.query(15), Some(hash(2))); assert_eq!(index.query(25), Some(hash(3))); assert_eq!(index.query(35), Some(hash(4))); assert_eq!(index.query(60), Some(hash(5))); assert_eq!(index.query(115), Some(hash(6))); assert_eq!(index.query(130), Some(hash(7))); assert_eq!(index.query(200), None); }
#[test]
fn four_levels_deep() {
let index = SpanIndex::build(vec![
(0, 1000, hash(1)),
(100, 800, hash(2)),
(200, 600, hash(3)),
(300, 400, hash(4)),
]);
assert_eq!(index.len(), 4);
assert_eq!(index.query(50), Some(hash(1)));
assert_eq!(index.query(150), Some(hash(2)));
assert_eq!(index.query(250), Some(hash(3)));
assert_eq!(index.query(500), Some(hash(4)));
assert_eq!(index.query(750), Some(hash(3)));
assert_eq!(index.query(850), Some(hash(2)));
assert_eq!(index.query(950), Some(hash(1)));
}
}