Skip to main content

orrery_parser/
source_map.rs

1//! Source map for multi-file span tracking.
2//!
3//! This module provides a virtual byte address space that maps spans from
4//! multiple source files into a single continuous address space. Each file
5//! is placed at a unique offset range, with 1-byte gaps between files to
6//! ensure no span can accidentally straddle a file boundary.
7//!
8//! This follows the pattern used by `rustc` and `rust-analyzer`.
9//!
10//! # Virtual Address Space Layout
11//!
12//! ```text
13//! File A (100 bytes): offsets 0..100
14//! gap: 1 byte (offset 100)
15//! File B (80 bytes):  offsets 101..181
16//! gap: 1 byte (offset 181)
17//! File C (50 bytes):  offsets 182..232
18//! ```
19//!
20//! # Example
21//!
22//! ```text
23//! # use orrery_parser::source_map::SourceMap;
24//! let mut map = SourceMap::new();
25//!
26//! let offset_a = map.add_file("a.orr", "hello", None);
27//! assert_eq!(offset_a, 0);
28//!
29//! let offset_b = map.add_file("b.orr", "world", None);
30//! assert_eq!(offset_b, 6); // 5 bytes + 1-byte gap
31//! ```
32
33use crate::span::Span;
34
35/// A single source file registered in the source map.
36///
37/// Each file occupies a contiguous range `[start_offset, end_offset)` in the
38/// virtual address space. The `end_offset` is exclusive — valid byte positions
39/// within this file are `start_offset..end_offset`.
40#[derive(Debug, Clone)]
41pub struct SourceFile<'a> {
42    /// Human-readable name, typically a file path.
43    name: String,
44    /// The full source text of the file.
45    source: &'a str,
46    /// Byte offset where this file starts in the virtual address space.
47    start_offset: usize,
48    /// Byte offset where this file ends (exclusive) in the virtual address space.
49    end_offset: usize,
50    /// Span of the `import "path";` declaration that first imported this file.
51    /// `None` for the root/entry file.
52    first_imported_at: Option<Span>,
53}
54
55impl SourceFile<'_> {
56    /// Returns the human-readable name of this file.
57    pub fn name(&self) -> &str {
58        &self.name
59    }
60
61    /// Returns the source text of this file.
62    pub fn source(&self) -> &str {
63        self.source
64    }
65
66    /// Returns the start offset in the virtual address space.
67    pub fn start_offset(&self) -> usize {
68        self.start_offset
69    }
70
71    /// Returns the end offset (exclusive) in the virtual address space.
72    pub fn end_offset(&self) -> usize {
73        self.end_offset
74    }
75
76    /// Returns the span where this file was first imported, if any.
77    ///
78    /// Returns `None` for the root/entry file.
79    pub fn first_imported_at(&self) -> Option<Span> {
80        self.first_imported_at
81    }
82
83    /// Returns the length of the source text in bytes.
84    pub fn len(&self) -> usize {
85        self.source.len()
86    }
87
88    /// Returns `true` if the source text is empty.
89    pub fn is_empty(&self) -> bool {
90        self.source.is_empty()
91    }
92}
93
94/// Maps virtual byte offsets to source files.
95///
96/// `SourceMap` places all source files into a virtual byte address space,
97/// enabling [`Span`] values from different files to coexist without
98/// modification. Files are separated by 1-byte gaps to prevent spans
99/// from accidentally straddling file boundaries.
100///
101/// # Example
102///
103/// ```text
104/// # use orrery_parser::source_map::SourceMap;
105/// let mut map = SourceMap::new();
106///
107/// let base = map.add_file("main.orr", "diagram component;\nbox: Rectangle;", None);
108///
109/// let file = map.lookup_file(base).unwrap();
110/// assert_eq!(file.name(), "main.orr");
111/// ```
112#[derive(Debug, Default)]
113pub struct SourceMap<'a> {
114    files: Vec<SourceFile<'a>>,
115    /// Byte offset where the next added file will start.
116    next_offset: usize,
117}
118
119impl<'a> SourceMap<'a> {
120    /// Create a new, empty source map.
121    pub fn new() -> Self {
122        Self::default()
123    }
124
125    /// Registers a source file in the virtual address space.
126    ///
127    /// The file is placed at the next available offset, separated from the
128    /// previous file by a 1-byte gap.
129    ///
130    /// # Arguments
131    ///
132    /// * `name` - Human-readable name for the file (typically its path).
133    /// * `source` - The full source text.
134    /// * `imported_at` - The span of the `import` declaration that triggered
135    ///   loading this file, or `None` for the root file.
136    ///
137    /// # Returns
138    ///
139    /// The `base_offset` at which this file starts in the virtual address space.
140    pub fn add_file(
141        &mut self,
142        name: impl Into<String>,
143        source: &'a str,
144        imported_at: Option<Span>,
145    ) -> usize {
146        let start_offset = self.next_offset;
147        let end_offset = start_offset + source.len();
148
149        self.files.push(SourceFile {
150            name: name.into(),
151            source,
152            start_offset,
153            end_offset,
154            first_imported_at: imported_at,
155        });
156
157        // Next file starts after a 1-byte gap.
158        self.next_offset = end_offset + 1;
159
160        start_offset
161    }
162
163    /// Looks up the source file containing the given virtual offset.
164    ///
165    /// Uses binary search for O(log n) performance.
166    ///
167    /// # Arguments
168    ///
169    /// * `offset` - A byte offset in the virtual address space.
170    ///
171    /// # Returns
172    ///
173    /// The [`SourceFile`] containing `offset`, or `None` if the offset falls
174    /// in a gap between files or is out of range.
175    pub fn lookup_file(&self, offset: usize) -> Option<&SourceFile<'a>> {
176        // `partition_point` returns the count of files whose `start_offset <= offset`.
177        let idx = self.files.partition_point(|f| f.start_offset <= offset);
178        if idx == 0 {
179            return None;
180        }
181        let file = &self.files[idx - 1];
182        if offset < file.end_offset {
183            Some(file)
184        } else {
185            None // offset is in a gap or past the last file
186        }
187    }
188
189    /// Looks up the source file containing the given [`Span`].
190    ///
191    /// This verifies that the *entire* span (not just its start offset)
192    /// falls within a single file.
193    ///
194    /// # Arguments
195    ///
196    /// * `span` - A [`Span`] in the virtual address space.
197    ///
198    /// # Returns
199    ///
200    /// The [`SourceFile`] containing the full span, or `None` if the span
201    /// crosses a file boundary, falls in a gap, or is out of range.
202    pub fn lookup_file_by_span(&self, span: Span) -> Option<&SourceFile<'a>> {
203        let file = self.lookup_file(span.start())?;
204        // Verify the entire span stays within this file.
205        if span.end() <= file.end_offset {
206            Some(file)
207        } else {
208            None
209        }
210    }
211
212    /// Extracts the source text corresponding to a span.
213    ///
214    /// # Arguments
215    ///
216    /// * `span` - A [`Span`] in the virtual address space.
217    ///
218    /// # Returns
219    ///
220    /// The source text slice, or `None` if the span crosses a file boundary,
221    /// falls in a gap, or is out of range.
222    pub fn source_slice(&self, span: Span) -> Option<&str> {
223        let file = self.lookup_file_by_span(span)?;
224        let local_start = span.start() - file.start_offset;
225        let local_end = span.end() - file.start_offset;
226        Some(&file.source[local_start..local_end])
227    }
228
229    /// Returns the number of registered source files.
230    pub fn file_count(&self) -> usize {
231        self.files.len()
232    }
233
234    /// Returns all registered source files.
235    pub fn files(&self) -> &[SourceFile<'a>] {
236        &self.files
237    }
238}
239
240#[cfg(test)]
241mod tests {
242    use super::*;
243
244    #[test]
245    fn add_single_file_returns_zero_offset() {
246        let mut map = SourceMap::new();
247        let offset = map.add_file("a.orr", "hello", None);
248        assert_eq!(offset, 0);
249        assert_eq!(map.file_count(), 1);
250    }
251
252    #[test]
253    fn add_multiple_files_with_gaps() {
254        let mut map = SourceMap::new();
255
256        // File A: 5 bytes at 0..5
257        let a = map.add_file("a.orr", "hello", None);
258        assert_eq!(a, 0);
259
260        // File B: 5 bytes at 6..11 (1-byte gap at offset 5)
261        let b = map.add_file("b.orr", "world", None);
262        assert_eq!(b, 6);
263
264        // File C: 3 bytes at 12..15 (1-byte gap at offset 11)
265        let c = map.add_file("c.orr", "foo", None);
266        assert_eq!(c, 12);
267
268        assert_eq!(map.file_count(), 3);
269    }
270
271    #[test]
272    fn add_empty_file() {
273        let mut map = SourceMap::new();
274
275        let a = map.add_file("empty.orr", "", None);
276        assert_eq!(a, 0);
277
278        // Next file starts at offset 1 (0-byte file + 1-byte gap).
279        let b = map.add_file("b.orr", "x", None);
280        assert_eq!(b, 1);
281    }
282
283    #[test]
284    fn add_file_records_imported_at_span() {
285        let mut map = SourceMap::new();
286        let import_span = Span::new(10..25);
287
288        map.add_file("root.orr", "root", None);
289        map.add_file("lib.orr", "lib", Some(import_span));
290
291        assert!(map.files()[0].first_imported_at().is_none());
292        assert_eq!(map.files()[1].first_imported_at(), Some(import_span));
293    }
294
295    #[test]
296    fn source_file_accessors() {
297        let mut map = SourceMap::new();
298        map.add_file("test.orr", "abc", None);
299
300        let file = &map.files()[0];
301        assert_eq!(file.name(), "test.orr");
302        assert_eq!(file.source(), "abc");
303        assert_eq!(file.start_offset(), 0);
304        assert_eq!(file.end_offset(), 3);
305        assert_eq!(file.len(), 3);
306        assert!(!file.is_empty());
307    }
308
309    #[test]
310    fn empty_source_file() {
311        let mut map = SourceMap::new();
312        map.add_file("empty.orr", "", None);
313
314        let file = &map.files()[0];
315        assert_eq!(file.len(), 0);
316        assert!(file.is_empty());
317        assert_eq!(file.start_offset(), file.end_offset());
318    }
319
320    #[test]
321    fn lookup_file_single() {
322        let mut map = SourceMap::new();
323        map.add_file("a.orr", "hello", None);
324
325        // Every byte position within the file resolves.
326        for i in 0..5 {
327            let file = map.lookup_file(i);
328            assert!(file.is_some(), "offset {i} should resolve");
329            assert_eq!(file.unwrap().name(), "a.orr");
330        }
331
332        // Past end → None.
333        assert!(map.lookup_file(5).is_none());
334        assert!(map.lookup_file(100).is_none());
335    }
336
337    #[test]
338    fn lookup_file_multiple() {
339        let mut map = SourceMap::new();
340        map.add_file("a.orr", "hello", None); // 0..5
341        map.add_file("b.orr", "world", None); // 6..11
342        map.add_file("c.orr", "foo", None); // 12..15
343
344        assert_eq!(map.lookup_file(0).unwrap().name(), "a.orr");
345        assert_eq!(map.lookup_file(4).unwrap().name(), "a.orr");
346        assert!(map.lookup_file(5).is_none()); // gap
347        assert_eq!(map.lookup_file(6).unwrap().name(), "b.orr");
348        assert_eq!(map.lookup_file(10).unwrap().name(), "b.orr");
349        assert!(map.lookup_file(11).is_none()); // gap
350        assert_eq!(map.lookup_file(12).unwrap().name(), "c.orr");
351        assert_eq!(map.lookup_file(14).unwrap().name(), "c.orr");
352        assert!(map.lookup_file(15).is_none()); // past end
353    }
354
355    #[test]
356    fn lookup_file_empty_map() {
357        let map = SourceMap::new();
358        assert!(map.lookup_file(0).is_none());
359    }
360
361    #[test]
362    fn lookup_file_empty_file_returns_none() {
363        let mut map = SourceMap::new();
364        map.add_file("empty.orr", "", None); // 0..0
365
366        // Empty file has no valid byte positions.
367        assert!(map.lookup_file(0).is_none());
368    }
369
370    #[test]
371    fn source_slice_within_file() {
372        let mut map = SourceMap::new();
373        map.add_file("a.orr", "hello world", None);
374
375        let slice = map.source_slice(Span::new(0..5)).unwrap();
376        assert_eq!(slice, "hello");
377
378        let slice = map.source_slice(Span::new(6..11)).unwrap();
379        assert_eq!(slice, "world");
380    }
381
382    #[test]
383    fn source_slice_entire_file() {
384        let mut map = SourceMap::new();
385        map.add_file("a.orr", "hello", None);
386
387        let slice = map.source_slice(Span::new(0..5)).unwrap();
388        assert_eq!(slice, "hello");
389    }
390
391    #[test]
392    fn source_slice_empty_span() {
393        let mut map = SourceMap::new();
394        map.add_file("a.orr", "hello", None);
395
396        let slice = map.source_slice(Span::new(2..2)).unwrap();
397        assert_eq!(slice, "");
398    }
399
400    #[test]
401    fn source_slice_second_file() {
402        let mut map = SourceMap::new();
403        map.add_file("a.orr", "hello", None); // 0..5
404        map.add_file("b.orr", "world", None); // 6..11
405
406        let slice = map.source_slice(Span::new(6..11)).unwrap();
407        assert_eq!(slice, "world");
408
409        let slice = map.source_slice(Span::new(8..11)).unwrap();
410        assert_eq!(slice, "rld");
411    }
412
413    #[test]
414    fn source_slice_crossing_files_returns_none() {
415        let mut map = SourceMap::new();
416        map.add_file("a.orr", "hello", None); // 0..5
417        map.add_file("b.orr", "world", None); // 6..11
418
419        // Span from file A into the gap.
420        assert!(map.source_slice(Span::new(3..6)).is_none());
421
422        // Span from file A into file B.
423        assert!(map.source_slice(Span::new(3..8)).is_none());
424    }
425
426    #[test]
427    fn source_slice_in_gap_returns_none() {
428        let mut map = SourceMap::new();
429        map.add_file("a.orr", "hello", None); // 0..5
430        map.add_file("b.orr", "world", None); // 6..11
431
432        // Span starting in the gap.
433        assert!(map.source_slice(Span::new(5..6)).is_none());
434    }
435
436    #[test]
437    fn source_slice_out_of_range_returns_none() {
438        let mut map = SourceMap::new();
439        map.add_file("a.orr", "hello", None);
440
441        assert!(map.source_slice(Span::new(100..105)).is_none());
442    }
443
444    #[test]
445    fn virtual_address_space_layout() {
446        let source_a = "x".repeat(100);
447        let source_b = "y".repeat(80);
448        let source_c = "z".repeat(50);
449        let mut map = SourceMap::new();
450
451        // File A: 100 bytes → offsets 0..100
452        let a = map.add_file("a.orr", &source_a, None);
453        assert_eq!(a, 0);
454        assert_eq!(map.files()[0].start_offset(), 0);
455        assert_eq!(map.files()[0].end_offset(), 100);
456
457        // File B: 80 bytes → offsets 101..181 (gap at 100)
458        let b = map.add_file("b.orr", &source_b, None);
459        assert_eq!(b, 101);
460        assert_eq!(map.files()[1].start_offset(), 101);
461        assert_eq!(map.files()[1].end_offset(), 181);
462
463        // File C: 50 bytes → offsets 182..232 (gap at 181)
464        let c = map.add_file("c.orr", &source_c, None);
465        assert_eq!(c, 182);
466        assert_eq!(map.files()[2].start_offset(), 182);
467        assert_eq!(map.files()[2].end_offset(), 232);
468
469        // Gap bytes resolve to no file.
470        assert!(map.lookup_file(100).is_none());
471        assert!(map.lookup_file(181).is_none());
472
473        // Last valid byte of each file resolves correctly.
474        assert_eq!(map.lookup_file(99).unwrap().name(), "a.orr");
475        assert_eq!(map.lookup_file(180).unwrap().name(), "b.orr");
476        assert_eq!(map.lookup_file(231).unwrap().name(), "c.orr");
477
478        // Past end is out of range.
479        assert!(map.lookup_file(232).is_none());
480    }
481}