Skip to main content

lex_core/lex/wire/
range.rs

1//! `Range` / `Position` conversion between lex-core and `lex_extension`.
2//!
3//! Forward path: drops `span` (byte offsets) and `origin_path`
4//! (`origin_path` is lifted to the wire node's `origin` field by the
5//! caller). Reverse path reconstructs `span = 0..0` since byte offsets
6//! are advisory in spliced content; when callers thread the wire
7//! `origin` string through [`range_from_wire_with_origin`], the
8//! `origin_path` round-trips back into `Range.origin_path` so spliced
9//! nodes carry the correct origin downstream.
10//!
11//! [`OriginInterner`] dedupes the `Arc<PathBuf>` allocations: large
12//! payloads with thousands of nodes share a handful of distinct
13//! origin strings (typically just the loaded file's path), so we
14//! keep one `Arc<PathBuf>` per unique string and clone the `Arc`
15//! into each node's `Range.origin_path` rather than allocating a
16//! fresh `PathBuf` per node.
17
18use std::collections::HashMap;
19use std::path::PathBuf;
20use std::sync::Arc;
21
22use crate::lex::ast::range::{Position as CorePosition, Range as CoreRange};
23use lex_extension::wire::{Position as WirePosition, Range as WireRange};
24
25/// Shared origin pool used while decoding a single wire payload.
26///
27/// `from_wire_subtree` typically walks a tree where 99% of nodes
28/// share the same `origin` string (the loaded file's path). Without
29/// interning, every node would allocate a fresh `Arc<PathBuf>`,
30/// inflating memory by O(node-count). The interner caches one
31/// `Arc<PathBuf>` per distinct origin string seen during the walk
32/// and clones the `Arc` into each node's `origin_path`.
33#[derive(Default)]
34pub(crate) struct OriginInterner {
35    cache: HashMap<String, Arc<PathBuf>>,
36}
37
38impl OriginInterner {
39    pub(crate) fn new() -> Self {
40        Self::default()
41    }
42
43    /// Return the cached `Arc<PathBuf>` for `s`, creating it on first
44    /// sight. Callers `Arc::clone` the result rather than building a
45    /// new path each time.
46    pub(crate) fn intern(&mut self, s: &str) -> Arc<PathBuf> {
47        if let Some(arc) = self.cache.get(s) {
48            return Arc::clone(arc);
49        }
50        let arc = Arc::new(PathBuf::from(s));
51        self.cache.insert(s.to_string(), Arc::clone(&arc));
52        arc
53    }
54}
55
56pub(crate) fn position_to_wire(p: &CorePosition) -> WirePosition {
57    // Wire format pins line/column to u32. lex-core stores them as
58    // usize because they index bytes within typical 64-bit address
59    // space; values that exceed u32::MAX would mean a single document
60    // containing >4 billion lines, which doesn't happen in practice.
61    // Saturate-and-debug-assert so a future regression surfaces in
62    // dev rather than producing a wrapped value silently.
63    let line = u32::try_from(p.line).unwrap_or_else(|_| {
64        debug_assert!(false, "position line {} exceeds u32::MAX", p.line);
65        u32::MAX
66    });
67    let column = u32::try_from(p.column).unwrap_or_else(|_| {
68        debug_assert!(false, "position column {} exceeds u32::MAX", p.column);
69        u32::MAX
70    });
71    WirePosition::new(line, column)
72}
73
74pub(crate) fn position_from_wire(p: &WirePosition) -> CorePosition {
75    CorePosition::new(p.line() as usize, p.column() as usize)
76}
77
78pub fn range_to_wire(r: &CoreRange) -> WireRange {
79    WireRange::new(position_to_wire(&r.start), position_to_wire(&r.end))
80}
81
82pub(crate) fn range_from_wire(r: &WireRange) -> CoreRange {
83    CoreRange::new(
84        0..0,
85        position_from_wire(&r.start),
86        position_from_wire(&r.end),
87    )
88}
89
90/// Like [`range_from_wire`] but also restores `origin_path` from the
91/// wire `origin` string, sharing the underlying `Arc<PathBuf>` with
92/// every other node that has the same origin via `interner`.
93pub(crate) fn range_from_wire_with_origin(
94    r: &WireRange,
95    origin: Option<&str>,
96    interner: &mut OriginInterner,
97) -> CoreRange {
98    let mut range = range_from_wire(r);
99    if let Some(s) = origin {
100        range.origin_path = Some(interner.intern(s));
101    }
102    range
103}
104
105/// Lift a lex-core `Range`'s `origin_path` to the wire `origin` string.
106pub fn origin_string(r: &CoreRange) -> Option<String> {
107    r.origin_path
108        .as_ref()
109        .map(|p| p.to_string_lossy().into_owned())
110}
111
112#[cfg(test)]
113mod tests {
114    use super::*;
115
116    #[test]
117    fn position_round_trip() {
118        let core = CorePosition::new(12, 34);
119        let wire = position_to_wire(&core);
120        assert_eq!(wire, WirePosition::new(12, 34));
121        let back = position_from_wire(&wire);
122        assert_eq!(back.line, 12);
123        assert_eq!(back.column, 34);
124    }
125
126    #[test]
127    fn range_round_trip() {
128        let core = CoreRange::new(10..20, CorePosition::new(1, 2), CorePosition::new(1, 12));
129        let wire = range_to_wire(&core);
130        let back = range_from_wire(&wire);
131        // span is dropped, but line/col are preserved
132        assert_eq!(back.start.line, 1);
133        assert_eq!(back.start.column, 2);
134        assert_eq!(back.end.line, 1);
135        assert_eq!(back.end.column, 12);
136    }
137
138    #[test]
139    fn interner_shares_arc_for_repeated_origins() {
140        // Two `range_from_wire_with_origin` calls with the same
141        // origin string must produce ranges whose `origin_path`
142        // points at the *same* Arc allocation (Arc::ptr_eq), not
143        // separate `PathBuf` clones.
144        let mut interner = OriginInterner::new();
145        let r1 = WireRange::new(WirePosition::new(0, 0), WirePosition::new(0, 0));
146        let r2 = WireRange::new(WirePosition::new(1, 0), WirePosition::new(1, 0));
147
148        let a = range_from_wire_with_origin(&r1, Some("/repo/file.lex"), &mut interner);
149        let b = range_from_wire_with_origin(&r2, Some("/repo/file.lex"), &mut interner);
150
151        let a_arc = a.origin_path.expect("a has origin");
152        let b_arc = b.origin_path.expect("b has origin");
153        assert!(
154            Arc::ptr_eq(&a_arc, &b_arc),
155            "interner must share Arc<PathBuf> for identical origin strings"
156        );
157    }
158
159    #[test]
160    fn interner_keeps_distinct_arcs_for_different_origins() {
161        let mut interner = OriginInterner::new();
162        let r = WireRange::new(WirePosition::new(0, 0), WirePosition::new(0, 0));
163        let a = range_from_wire_with_origin(&r, Some("/repo/a.lex"), &mut interner);
164        let b = range_from_wire_with_origin(&r, Some("/repo/b.lex"), &mut interner);
165        assert!(
166            !Arc::ptr_eq(
167                a.origin_path.as_ref().unwrap(),
168                b.origin_path.as_ref().unwrap()
169            ),
170            "different origin strings must keep separate Arcs"
171        );
172    }
173
174    #[test]
175    fn interner_treats_none_origin_as_unstamped() {
176        let mut interner = OriginInterner::new();
177        let r = WireRange::new(WirePosition::new(0, 0), WirePosition::new(0, 0));
178        let a = range_from_wire_with_origin(&r, None, &mut interner);
179        assert!(a.origin_path.is_none());
180    }
181}