laburnum 1.17.0

An LSP framework for building language servers and compilers, powered by an incremental query tree with content-addressed storage, task-based dataflow, and parallel queries.
Documentation
// Copyright Two Neutron Stars Incorporated and contributors
// SPDX-License-Identifier: BlueOak-1.0.0

use crate::ContentHash;
use std::cmp::Ordering;

/// A tree-structured span index that mirrors AST nesting structure.
///
/// Each node contains non-overlapping sibling spans, each of which may
/// have children representing nested spans. Queries find the innermost
/// span containing a given byte offset via binary search at each level.
pub struct SpanIndex {
  roots: Vec<Entry>,
}

struct Entry {
  start: u64,
  end: u64,
  value: ContentHash,
  children: Option<Box<Node>>,
}

struct Node {
  entries: Vec<Entry>,
}

impl SpanIndex {
  /// Build from a list of `(start, length, content_hash)` spans.
  ///
  /// Spans are sorted by `(start ASC, length DESC)` so parents come
  /// before children, then recursively partitioned into a nested tree.
  pub fn build(input: Vec<(u64, u64, ContentHash)>) -> Self {
    let mut sorted = input;
    sorted.sort_by(|a, b| a.0.cmp(&b.0).then(b.1.cmp(&a.1)));

    let entries = Self::build_level(&sorted, 0, sorted.len());
    SpanIndex { roots: entries }
  }

  fn build_level(
    spans: &[(u64, u64, ContentHash)],
    from: usize,
    to: usize,
  ) -> Vec<Entry> {
    if from >= to {
      return Vec::new();
    }

    let mut entries = Vec::new();
    let mut i = from;

    while i < to {
      let (start, length, ref data) = spans[i];
      let end = start + length;

      // Find all spans that are children of this one (contained within it)
      let mut j = i + 1;
      while j < to {
        let (child_start, child_length, _) = spans[j];
        if child_start + child_length <= end && child_start >= start {
          j += 1;
        } else {
          break;
        }
      }

      let children = if j > i + 1 {
        let child_entries = Self::build_level(spans, i + 1, j);
        if child_entries.is_empty() {
          None
        } else {
          Some(Box::new(Node {
            entries: child_entries,
          }))
        }
      } else {
        None
      };

      entries.push(Entry {
        start,
        end,
        value: *data,
        children,
      });

      i = j;
    }

    entries
  }

  /// Find the innermost span containing `point`, returning its content hash.
  pub fn query(&self, point: u64) -> Option<ContentHash> {
    Self::search_entries(&self.roots, point).copied()
  }

  fn search_entries(entries: &[Entry], point: u64) -> Option<&ContentHash> {
    // Binary search for the entry containing point
    let result = entries.binary_search_by(|e| {
      if point < e.start {
        Ordering::Greater
      } else if point >= e.end {
        Ordering::Less
      } else {
        Ordering::Equal
      }
    });

    let Ok(idx) = result else {
      return None;
    };

    let entry = &entries[idx];

    // Try to find a more specific (deeper) match in children
    if let Some(ref children) = entry.children
      && let Some(v) = Self::search_entries(&children.entries, point)
    {
      return Some(v);
    }

    Some(&entry.value)
  }

  /// Total number of spans in the index.
  pub fn len(&self) -> usize {
    self.count_entries(&self.roots)
  }

  /// Returns `true` if the index contains no spans.
  pub fn is_empty(&self) -> bool {
    self.roots.is_empty()
  }

  fn count_entries(&self, entries: &[Entry]) -> usize {
    let mut count = entries.len();
    for entry in entries {
      if let Some(ref children) = entry.children {
        count += self.count_entries(&children.entries);
      }
    }
    count
  }
}

#[cfg(test)]
mod tests {
  use super::*;

  fn hash(n: u8) -> ContentHash {
    ContentHash::new(&[n, 0, 0, 0])
  }

  #[test]
  fn build_empty() {
    let index = SpanIndex::build(vec![]);
    assert!(index.is_empty());
    assert_eq!(index.len(), 0);
    assert_eq!(index.query(0), None);
  }

  #[test]
  fn single_span() {
    let index = SpanIndex::build(vec![(10, 5, hash(1))]);
    assert_eq!(index.len(), 1);
    assert_eq!(index.query(9), None);
    assert_eq!(index.query(10), Some(hash(1)));
    assert_eq!(index.query(14), Some(hash(1)));
    assert_eq!(index.query(15), None);
  }

  #[test]
  fn non_overlapping_siblings() {
    let index = SpanIndex::build(vec![
      (0, 5, hash(1)),
      (10, 5, hash(2)),
      (20, 5, hash(3)),
    ]);
    assert_eq!(index.len(), 3);
    assert_eq!(index.query(2), Some(hash(1)));
    assert_eq!(index.query(12), Some(hash(2)));
    assert_eq!(index.query(22), Some(hash(3)));
    assert_eq!(index.query(7), None);
  }

  #[test]
  fn nested_parent_child() {
    // Parent [0..20), child [5..10)
    let index = SpanIndex::build(vec![
      (0, 20, hash(1)),
      (5, 5, hash(2)),
    ]);
    assert_eq!(index.len(), 2);
    assert_eq!(index.query(0), Some(hash(1)));
    assert_eq!(index.query(5), Some(hash(2)));
    assert_eq!(index.query(9), Some(hash(2)));
    assert_eq!(index.query(10), Some(hash(1)));
    assert_eq!(index.query(19), Some(hash(1)));
    assert_eq!(index.query(20), None);
  }

  #[test]
  fn deeply_nested() {
    // Outer [0..100), middle [10..50), inner [20..30)
    let index = SpanIndex::build(vec![
      (0, 100, hash(1)),
      (10, 40, hash(2)),
      (20, 10, hash(3)),
    ]);
    assert_eq!(index.len(), 3);
    assert_eq!(index.query(5), Some(hash(1)));
    assert_eq!(index.query(15), Some(hash(2)));
    assert_eq!(index.query(25), Some(hash(3)));
    assert_eq!(index.query(35), Some(hash(2)));
    assert_eq!(index.query(60), Some(hash(1)));
  }

  #[test]
  fn query_at_boundaries() {
    let index = SpanIndex::build(vec![(0, 10, hash(1))]);
    assert_eq!(index.query(0), Some(hash(1)));
    assert_eq!(index.query(9), Some(hash(1)));
    assert_eq!(index.query(10), None);
  }

  #[test]
  fn gaps_between_spans() {
    // [aaa]  gap  [bbb]
    let index = SpanIndex::build(vec![
      (0, 10, hash(1)),
      (20, 10, hash(2)),
    ]);
    assert_eq!(index.query(5), Some(hash(1)));
    assert_eq!(index.query(10), None);
    assert_eq!(index.query(15), None);
    assert_eq!(index.query(25), Some(hash(2)));
  }

  #[test]
  fn multiple_children_of_one_parent() {
    // [       parent        ]
    //   [c1]  [c2]  [c3]
    let index = SpanIndex::build(vec![
      (0, 100, hash(1)),
      (10, 10, hash(2)),
      (30, 10, hash(3)),
      (50, 10, hash(4)),
    ]);
    assert_eq!(index.len(), 4);
    assert_eq!(index.query(5), Some(hash(1)));
    assert_eq!(index.query(15), Some(hash(2)));
    assert_eq!(index.query(25), Some(hash(1)));
    assert_eq!(index.query(35), Some(hash(3)));
    assert_eq!(index.query(45), Some(hash(1)));
    assert_eq!(index.query(55), Some(hash(4)));
    assert_eq!(index.query(65), Some(hash(1)));
  }

  #[test]
  fn unsorted_input_is_handled() {
    // Input deliberately out of order — build should sort
    let index = SpanIndex::build(vec![
      (50, 10, hash(3)),
      (0, 100, hash(1)),
      (10, 10, hash(2)),
    ]);
    assert_eq!(index.len(), 3);
    assert_eq!(index.query(5), Some(hash(1)));
    assert_eq!(index.query(15), Some(hash(2)));
    assert_eq!(index.query(55), Some(hash(3)));
    assert_eq!(index.query(70), Some(hash(1)));
  }

  #[test]
  fn adjacent_trees_with_children() {
    // Two separate top-level trees, each with a child
    // [  t1  ]          [  t2  ]
    //  [c1]              [c2]
    let index = SpanIndex::build(vec![
      (0, 50, hash(1)),
      (5, 10, hash(2)),
      (100, 50, hash(3)),
      (110, 10, hash(4)),
    ]);
    assert_eq!(index.len(), 4);
    assert_eq!(index.query(2), Some(hash(1)));
    assert_eq!(index.query(8), Some(hash(2)));
    assert_eq!(index.query(60), None);
    assert_eq!(index.query(105), Some(hash(3)));
    assert_eq!(index.query(115), Some(hash(4)));
  }

  #[test]
  fn child_at_parent_start() {
    // Child starts at exactly the same offset as parent
    // [parent     ]
    // [child]
    let index = SpanIndex::build(vec![
      (0, 100, hash(1)),
      (0, 20, hash(2)),
    ]);
    assert_eq!(index.query(10), Some(hash(2)));
    assert_eq!(index.query(50), Some(hash(1)));
  }

  #[test]
  fn child_at_parent_end() {
    // Child ends at exactly the same offset as parent
    // [     parent]
    //       [child]
    let index = SpanIndex::build(vec![
      (0, 100, hash(1)),
      (80, 20, hash(2)),
    ]);
    assert_eq!(index.query(50), Some(hash(1)));
    assert_eq!(index.query(90), Some(hash(2)));
    assert_eq!(index.query(99), Some(hash(2)));
    assert_eq!(index.query(100), None);
  }

  #[test]
  fn zero_length_span_is_invisible() {
    let index = SpanIndex::build(vec![(10, 0, hash(1))]);
    assert_eq!(index.query(10), None);
  }

  #[test]
  fn single_byte_span() {
    let index = SpanIndex::build(vec![(10, 1, hash(1))]);
    assert_eq!(index.query(9), None);
    assert_eq!(index.query(10), Some(hash(1)));
    assert_eq!(index.query(11), None);
  }

  #[test]
  fn realistic_ast_structure() {
    // fn foo() { if x { a; b; } else { c; } }
    let index = SpanIndex::build(vec![
      (0, 200, hash(1)),    // fn
      (10, 180, hash(2)),   // body
      (20, 80, hash(3)),    // if
      (30, 20, hash(4)),    // stmt_a
      (55, 20, hash(5)),    // stmt_b
      (110, 60, hash(6)),   // else
      (120, 30, hash(7)),   // stmt_c
    ]);
    assert_eq!(index.len(), 7);
    assert_eq!(index.query(5), Some(hash(1)));   // in fn, outside body
    assert_eq!(index.query(15), Some(hash(2)));  // in body, outside if/else
    assert_eq!(index.query(25), Some(hash(3)));  // in if, outside stmts
    assert_eq!(index.query(35), Some(hash(4)));  // in stmt_a
    assert_eq!(index.query(60), Some(hash(5)));  // in stmt_b
    assert_eq!(index.query(115), Some(hash(6))); // in else, outside stmt_c
    assert_eq!(index.query(130), Some(hash(7))); // in stmt_c
    assert_eq!(index.query(200), None);          // past fn
  }

  #[test]
  fn four_levels_deep() {
    // root [0..1000), l1 [100..900), l2 [200..800), l3 [300..700)
    let index = SpanIndex::build(vec![
      (0, 1000, hash(1)),
      (100, 800, hash(2)),
      (200, 600, hash(3)),
      (300, 400, hash(4)),
    ]);
    assert_eq!(index.len(), 4);
    assert_eq!(index.query(50), Some(hash(1)));
    assert_eq!(index.query(150), Some(hash(2)));
    assert_eq!(index.query(250), Some(hash(3)));
    assert_eq!(index.query(500), Some(hash(4)));
    assert_eq!(index.query(750), Some(hash(3)));
    assert_eq!(index.query(850), Some(hash(2)));
    assert_eq!(index.query(950), Some(hash(1)));
  }
}