Skip to main content

lex_core/lex/ast/
range.rs

1//! Position and location tracking for source code locations
2//!
3//! This module defines the data structures for representing positions and locations in source code,
4//! as well as utilities for converting byte offsets to line/column positions.
5//!
6//! ## Types
7//!
8//! - [`Position`] - A line:column position in source code
9//! - [`Range`] - A source code range with start/end positions and byte span
10//! - [`SourceLocation`] - Utility for converting byte offsets to positions
11//!
12//! ## Key Design
13//!
14//! - Mandatory locations: All AST nodes have required `location: Range` fields
15//! - No null locations: Default position is (0, 0) to (0, 0), never None
16//! - Byte ranges preserved: Stores both byte spans and line:column positions
17//! - Unicode-aware: Handles multi-byte UTF-8 characters correctly via `char_indices()`
18//! - Efficient conversion: O(log n) binary search for byte-to-position conversion
19//!
20//! ## Usage
21//!
22//! The typical flow is:
23//! 1. Lexer produces `(Token, std::ops::Range<usize>)` pairs (byte offsets)
24//! 2. Parser converts byte ranges to `Range` using `SourceLocation::byte_range_to_ast_range()`
25//! 3. AST nodes store these `Range` values for error reporting and tooling
26//!
27//! See `src/lex/building/location.rs` for the canonical location conversion and aggregation utilities.
28
29use serde::{Deserialize, Serialize};
30use std::fmt;
31use std::ops::Range as ByteRange;
32use std::path::{Path, PathBuf};
33use std::sync::Arc;
34
35/// Represents a position in source code (line and column)
36#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize, Deserialize)]
37pub struct Position {
38    pub line: usize,
39    pub column: usize,
40}
41
42impl Position {
43    pub fn new(line: usize, column: usize) -> Self {
44        Self { line, column }
45    }
46}
47
48impl fmt::Display for Position {
49    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
50        write!(f, "{}:{}", self.line, self.column)
51    }
52}
53
54impl Default for Position {
55    fn default() -> Self {
56        Self::new(0, 0)
57    }
58}
59
60/// Represents a location in source code (start and end positions)
61///
62/// Carries an optional `origin_path` identifying the source file the range refers
63/// to. For ranges built directly by the parser this is `None`; ranges produced
64/// by include resolution (`lex_core::includes`) carry the canonical path of the
65/// file they came from. The field is metadata: it does not affect parsing,
66/// formatting, or any structural operation, but it is consulted by file-reference
67/// resolution and diagnostics so that information attached to nodes from an
68/// included file points at the authoring location, not the post-merge one.
69///
70/// `Range` is `#[non_exhaustive]`: external code must construct via `Range::new`
71/// (or builders such as `with_origin`) rather than struct literals, so future
72/// metadata fields can be added without a breaking API change.
73///
74/// Equality and hashing are *positional only* — `origin_path` is intentionally
75/// excluded from `PartialEq`/`Hash`. Two ranges with the same span and positions
76/// are equal regardless of which file they came from. This matches what is
77/// preserved through serde (`origin_path` is `#[serde(skip)]`), so a value can
78/// round-trip through JSON without breaking equality.
79#[derive(Debug, Clone, Serialize, Deserialize)]
80#[non_exhaustive]
81pub struct Range {
82    pub span: ByteRange<usize>,
83    pub start: Position,
84    pub end: Position,
85    /// Optional path of the file this range was authored in.
86    ///
87    /// `None` for ranges that have not been touched by include resolution.
88    /// Currently skipped from (de)serialization because `Arc<T>` deserialization
89    /// requires serde's opt-in `rc` feature; the field is metadata and is not
90    /// part of any wire format today. When a use case needs origin info on the
91    /// wire, switch to a custom (de)serialization that emits the path as a
92    /// string.
93    #[serde(skip)]
94    pub origin_path: Option<Arc<PathBuf>>,
95}
96
97impl PartialEq for Range {
98    fn eq(&self, other: &Self) -> bool {
99        // Positional equality only — see struct doc.
100        self.span == other.span && self.start == other.start && self.end == other.end
101    }
102}
103
104impl Eq for Range {}
105
106impl std::hash::Hash for Range {
107    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
108        // Positional hashing only — see struct doc.
109        self.span.hash(state);
110        self.start.hash(state);
111        self.end.hash(state);
112    }
113}
114
115impl Range {
116    pub fn new(span: ByteRange<usize>, start: Position, end: Position) -> Self {
117        Self {
118            span,
119            start,
120            end,
121            origin_path: None,
122        }
123    }
124
125    /// Builder: attach an origin path to this range.
126    ///
127    /// Intended use: the include resolver, after parsing each loaded file,
128    /// walks the resulting tree and stamps the file's canonical path on every
129    /// range. Direct parser output should leave this `None`.
130    pub fn with_origin(mut self, path: Arc<PathBuf>) -> Self {
131        self.origin_path = Some(path);
132        self
133    }
134
135    /// Borrow the origin path, if any.
136    pub fn origin(&self) -> Option<&Path> {
137        self.origin_path.as_deref().map(PathBuf::as_path)
138    }
139
140    /// Check if a position is contained within this location
141    pub fn contains(&self, pos: Position) -> bool {
142        (self.start.line < pos.line
143            || (self.start.line == pos.line && self.start.column <= pos.column))
144            && (self.end.line > pos.line
145                || (self.end.line == pos.line && self.end.column >= pos.column))
146    }
147
148    /// Check if another location overlaps with this location
149    pub fn overlaps(&self, other: &Range) -> bool {
150        self.contains(other.start)
151            || self.contains(other.end)
152            || other.contains(self.start)
153            || other.contains(self.end)
154    }
155
156    /// Build a bounding box that contains all provided ranges.
157    ///
158    /// The result's `origin_path` is `Some(p)` only when every input range
159    /// shares the same origin `p`; mixed-origin inputs (or any `None` mixed
160    /// with `Some`) yield `None`. This avoids reporting one file's origin on
161    /// coordinates that came from another file — relevant after include
162    /// resolution, when a parent node may aggregate children from the host
163    /// file and from spliced-in files.
164    pub fn bounding_box<'a, I>(mut ranges: I) -> Option<Range>
165    where
166        I: Iterator<Item = &'a Range>,
167    {
168        let first = ranges.next()?.clone();
169        let mut span_start = first.span.start;
170        let mut span_end = first.span.end;
171        let mut start_pos = first.start;
172        let mut end_pos = first.end;
173        let mut origin = first.origin_path.clone();
174
175        for range in ranges {
176            if range.start < start_pos {
177                start_pos = range.start;
178                span_start = range.span.start;
179            } else if range.start == start_pos {
180                span_start = span_start.min(range.span.start);
181            }
182
183            if range.end > end_pos {
184                end_pos = range.end;
185                span_end = range.span.end;
186            } else if range.end == end_pos {
187                span_end = span_end.max(range.span.end);
188            }
189
190            if origin != range.origin_path {
191                origin = None;
192            }
193        }
194
195        let mut bbox = Range::new(span_start..span_end, start_pos, end_pos);
196        bbox.origin_path = origin;
197        Some(bbox)
198    }
199}
200
201impl fmt::Display for Range {
202    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
203        write!(f, "{}..{}", self.start, self.end)
204    }
205}
206
207impl Default for Range {
208    fn default() -> Self {
209        Self::new(
210            ByteRange { start: 0, end: 0 },
211            Position::default(),
212            Position::default(),
213        )
214    }
215}
216
217/// Provides fast conversion from byte offsets to line/column positions
218pub struct SourceLocation {
219    /// Byte offsets where each line starts
220    line_starts: Vec<usize>,
221}
222
223impl SourceLocation {
224    /// Create a new SourceLocation from source code
225    pub fn new(source: &str) -> Self {
226        let mut line_starts = vec![0];
227
228        for (byte_pos, ch) in source.char_indices() {
229            if ch == '\n' {
230                line_starts.push(byte_pos + 1);
231            }
232        }
233
234        Self { line_starts }
235    }
236
237    /// Convert a byte offset to a line/column position
238    pub fn byte_to_position(&self, byte_offset: usize) -> Position {
239        let line = self
240            .line_starts
241            .binary_search(&byte_offset)
242            .unwrap_or_else(|i| i - 1);
243
244        let column = byte_offset - self.line_starts[line];
245
246        Position::new(line, column)
247    }
248
249    /// Convert a byte range to a location
250    pub fn byte_range_to_ast_range(&self, range: &ByteRange<usize>) -> Range {
251        Range::new(
252            range.clone(),
253            self.byte_to_position(range.start),
254            self.byte_to_position(range.end),
255        )
256    }
257
258    /// Get the total number of lines in the source
259    pub fn line_count(&self) -> usize {
260        self.line_starts.len()
261    }
262
263    /// Get the byte offset for the start of a line
264    pub fn line_start(&self, line: usize) -> Option<usize> {
265        self.line_starts.get(line).copied()
266    }
267}
268
269#[cfg(test)]
270mod tests {
271    use super::*;
272    // @audit: no_source
273
274    // @audit: no_source
275    #[test]
276    fn test_position_creation() {
277        let pos = Position::new(5, 10);
278        assert_eq!(pos.line, 5);
279        assert_eq!(pos.column, 10);
280    }
281
282    // @audit: no_source
283    #[test]
284    fn test_position_comparison() {
285        let pos1 = Position::new(1, 5);
286        let pos2 = Position::new(1, 5);
287        let pos3 = Position::new(2, 3);
288
289        assert_eq!(pos1, pos2);
290        assert_ne!(pos1, pos3);
291        assert!(pos1 < pos3);
292    }
293
294    // @audit: no_source
295    #[test]
296    fn test_location_creation() {
297        let start = Position::new(0, 0);
298        let end = Position::new(2, 5);
299        let location = Range::new(0..0, start, end);
300
301        assert_eq!(location.start, start);
302        assert_eq!(location.end, end);
303    }
304
305    // @audit: no_source
306    #[test]
307    fn test_location_contains_single_line() {
308        let location = Range::new(0..0, Position::new(0, 0), Position::new(0, 10));
309
310        assert!(location.contains(Position::new(0, 0)));
311        assert!(location.contains(Position::new(0, 5)));
312        assert!(location.contains(Position::new(0, 10)));
313
314        assert!(!location.contains(Position::new(0, 11)));
315        assert!(!location.contains(Position::new(1, 0)));
316    }
317
318    // @audit: no_source
319    #[test]
320    fn test_location_contains_multiline() {
321        let location = Range::new(0..0, Position::new(1, 5), Position::new(2, 10));
322
323        // Before location
324        assert!(!location.contains(Position::new(1, 4)));
325        assert!(!location.contains(Position::new(0, 5)));
326
327        // In location
328        assert!(location.contains(Position::new(1, 5)));
329        assert!(location.contains(Position::new(1, 10)));
330        assert!(location.contains(Position::new(2, 0)));
331        assert!(location.contains(Position::new(2, 10)));
332
333        // After location
334        assert!(!location.contains(Position::new(2, 11)));
335        assert!(!location.contains(Position::new(3, 0)));
336    }
337
338    // @audit: no_source
339    #[test]
340    fn test_location_overlaps() {
341        let location1 = Range::new(0..0, Position::new(0, 0), Position::new(1, 5));
342        let location2 = Range::new(0..0, Position::new(1, 0), Position::new(2, 5));
343        let location3 = Range::new(0..0, Position::new(3, 0), Position::new(4, 5));
344
345        assert!(location1.overlaps(&location2));
346        assert!(location2.overlaps(&location1));
347        assert!(!location1.overlaps(&location3));
348        assert!(!location3.overlaps(&location1));
349    }
350
351    #[test]
352    fn test_bounding_box_ranges() {
353        let ranges = [
354            Range::new(2..5, Position::new(0, 2), Position::new(0, 5)),
355            Range::new(10..20, Position::new(3, 0), Position::new(4, 3)),
356        ];
357
358        let bbox = Range::bounding_box(ranges.iter()).unwrap();
359        assert_eq!(bbox.span, 2..20);
360        assert_eq!(bbox.start, Position::new(0, 2));
361        assert_eq!(bbox.end, Position::new(4, 3));
362    }
363
364    #[test]
365    fn test_bounding_box_empty_iter() {
366        let iter = std::iter::empty::<&Range>();
367        assert!(Range::bounding_box(iter).is_none());
368    }
369
370    #[test]
371    fn test_origin_defaults_to_none() {
372        let range = Range::new(0..5, Position::new(0, 0), Position::new(0, 5));
373        assert!(range.origin_path.is_none());
374        assert!(range.origin().is_none());
375        assert!(Range::default().origin_path.is_none());
376    }
377
378    #[test]
379    fn test_with_origin_attaches_path() {
380        let path = Arc::new(PathBuf::from("/tmp/foo.lex"));
381        let range = Range::new(0..5, Position::new(0, 0), Position::new(0, 5))
382            .with_origin(Arc::clone(&path));
383        assert_eq!(range.origin(), Some(Path::new("/tmp/foo.lex")));
384        assert!(Arc::ptr_eq(&path, range.origin_path.as_ref().unwrap()));
385    }
386
387    #[test]
388    fn test_origin_does_not_affect_position_predicates() {
389        let with_origin = Range::new(0..5, Position::new(0, 0), Position::new(0, 5))
390            .with_origin(Arc::new(PathBuf::from("/x.lex")));
391        let without = Range::new(0..5, Position::new(0, 0), Position::new(0, 5));
392
393        // Position-based queries are unaffected by origin.
394        assert!(with_origin.contains(Position::new(0, 2)));
395        assert!(without.contains(Position::new(0, 2)));
396        assert!(with_origin.overlaps(&without));
397    }
398
399    #[test]
400    fn test_equality_ignores_origin() {
401        // Origin is metadata; two ranges at the same position are equal
402        // regardless of origin. This preserves equality across serde
403        // round-trips (origin_path is `#[serde(skip)]`).
404        let a = Range::new(0..5, Position::new(0, 0), Position::new(0, 5));
405        let b = Range::new(0..5, Position::new(0, 0), Position::new(0, 5))
406            .with_origin(Arc::new(PathBuf::from("/x.lex")));
407        let c = Range::new(0..5, Position::new(0, 0), Position::new(0, 5))
408            .with_origin(Arc::new(PathBuf::from("/y.lex")));
409        assert_eq!(a, b);
410        assert_eq!(b, c);
411        // Same hash too, otherwise PartialEq/Hash contract is violated.
412        use std::collections::hash_map::DefaultHasher;
413        use std::hash::{Hash, Hasher};
414        let h = |r: &Range| {
415            let mut s = DefaultHasher::new();
416            r.hash(&mut s);
417            s.finish()
418        };
419        assert_eq!(h(&a), h(&b));
420        assert_eq!(h(&b), h(&c));
421    }
422
423    #[test]
424    fn test_bounding_box_keeps_origin_when_all_match() {
425        let path = Arc::new(PathBuf::from("/included.lex"));
426        let ranges = [
427            Range::new(2..5, Position::new(0, 2), Position::new(0, 5))
428                .with_origin(Arc::clone(&path)),
429            Range::new(10..20, Position::new(3, 0), Position::new(4, 3))
430                .with_origin(Arc::clone(&path)),
431        ];
432        let bbox = Range::bounding_box(ranges.iter()).unwrap();
433        assert_eq!(bbox.origin(), Some(Path::new("/included.lex")));
434    }
435
436    #[test]
437    fn test_bounding_box_clears_origin_on_mixed_inputs() {
438        // Same-origin first, mixed later → None.
439        let ranges_some_then_none = [
440            Range::new(2..5, Position::new(0, 2), Position::new(0, 5))
441                .with_origin(Arc::new(PathBuf::from("/a.lex"))),
442            Range::new(10..20, Position::new(3, 0), Position::new(4, 3)),
443        ];
444        let bbox = Range::bounding_box(ranges_some_then_none.iter()).unwrap();
445        assert!(bbox.origin().is_none());
446
447        // Different Some origins → None.
448        let ranges_two_origins = [
449            Range::new(2..5, Position::new(0, 2), Position::new(0, 5))
450                .with_origin(Arc::new(PathBuf::from("/a.lex"))),
451            Range::new(10..20, Position::new(3, 0), Position::new(4, 3))
452                .with_origin(Arc::new(PathBuf::from("/b.lex"))),
453        ];
454        let bbox = Range::bounding_box(ranges_two_origins.iter()).unwrap();
455        assert!(bbox.origin().is_none());
456
457        // None first, Some later → still None (the previous behavior in
458        // the "first wins" implementation would also be None here, but
459        // for the wrong reason; this asserts the policy now is uniform).
460        let ranges_none_then_some = [
461            Range::new(2..5, Position::new(0, 2), Position::new(0, 5)),
462            Range::new(10..20, Position::new(3, 0), Position::new(4, 3))
463                .with_origin(Arc::new(PathBuf::from("/x.lex"))),
464        ];
465        let bbox = Range::bounding_box(ranges_none_then_some.iter()).unwrap();
466        assert!(bbox.origin().is_none());
467    }
468
469    #[test]
470    fn test_serialization_skips_origin_field() {
471        // origin_path is `#[serde(skip)]` — never appears in JSON in either
472        // direction. This keeps existing AST JSON output byte-identical and
473        // avoids the serde `rc` feature for Arc deserialization. When a wire
474        // format needs origin info, swap to a custom (de)serializer.
475        let none_range = Range::new(0..5, Position::new(0, 0), Position::new(0, 5));
476        let some_range = Range::new(0..5, Position::new(0, 0), Position::new(0, 5))
477            .with_origin(Arc::new(PathBuf::from("/x.lex")));
478        let json_none = serde_json::to_string(&none_range).unwrap();
479        let json_some = serde_json::to_string(&some_range).unwrap();
480        assert!(!json_none.contains("origin_path"));
481        assert!(!json_some.contains("origin_path"));
482        assert_eq!(json_none, json_some);
483    }
484
485    #[test]
486    fn test_deserialization_yields_none_origin() {
487        // Deserializing JSON that has no origin_path field gives None, even
488        // when the in-memory value originally had Some.
489        let original = Range::new(0..5, Position::new(0, 0), Position::new(0, 5))
490            .with_origin(Arc::new(PathBuf::from("/x.lex")));
491        let json = serde_json::to_string(&original).unwrap();
492        let restored: Range = serde_json::from_str(&json).unwrap();
493        assert!(restored.origin().is_none());
494    }
495
496    // @audit: no_source
497    #[test]
498    fn test_position_display() {
499        let pos = Position::new(5, 10);
500        assert_eq!(format!("{pos}"), "5:10");
501    }
502
503    // @audit: no_source
504    #[test]
505    fn test_location_display() {
506        let location = Range::new(0..0, Position::new(1, 0), Position::new(2, 5));
507        assert_eq!(format!("{location}"), "1:0..2:5");
508    }
509
510    // @audit: no_source
511    #[test]
512    fn test_byte_to_position_single_line() {
513        let loc = SourceLocation::new("Hello");
514        assert_eq!(loc.byte_to_position(0), Position::new(0, 0));
515        assert_eq!(loc.byte_to_position(1), Position::new(0, 1));
516        assert_eq!(loc.byte_to_position(4), Position::new(0, 4));
517    }
518
519    // @audit: no_source
520    #[test]
521    fn test_byte_to_position_multiline() {
522        let loc = SourceLocation::new("Hello\nworld\ntest");
523
524        // First line
525        assert_eq!(loc.byte_to_position(0), Position::new(0, 0));
526        assert_eq!(loc.byte_to_position(5), Position::new(0, 5));
527
528        // Second line
529        assert_eq!(loc.byte_to_position(6), Position::new(1, 0));
530        assert_eq!(loc.byte_to_position(10), Position::new(1, 4));
531
532        // Third line
533        assert_eq!(loc.byte_to_position(12), Position::new(2, 0));
534        assert_eq!(loc.byte_to_position(15), Position::new(2, 3));
535    }
536
537    // @audit: no_source
538    #[test]
539    fn test_byte_to_position_with_unicode() {
540        let loc = SourceLocation::new("Hello\nwörld");
541        // Unicode characters take multiple bytes
542        assert_eq!(loc.byte_to_position(6), Position::new(1, 0));
543        assert_eq!(loc.byte_to_position(7), Position::new(1, 1));
544    }
545
546    #[test]
547    fn test_range_to_location_single_line() {
548        let loc = SourceLocation::new("Hello World");
549        let location = loc.byte_range_to_ast_range(&(0..5));
550
551        assert_eq!(location.start, Position::new(0, 0));
552        assert_eq!(location.end, Position::new(0, 5));
553    }
554
555    #[test]
556    fn test_range_to_location_multiline() {
557        let loc = SourceLocation::new("Hello\nWorld\nTest");
558        let location = loc.byte_range_to_ast_range(&(6..12));
559
560        assert_eq!(location.start, Position::new(1, 0));
561        assert_eq!(location.end, Position::new(2, 0));
562    }
563
564    #[test]
565    fn test_line_count() {
566        assert_eq!(SourceLocation::new("single").line_count(), 1);
567        assert_eq!(SourceLocation::new("line1\nline2").line_count(), 2);
568        assert_eq!(SourceLocation::new("line1\nline2\nline3").line_count(), 3);
569    }
570
571    #[test]
572    fn test_line_start() {
573        let loc = SourceLocation::new("Hello\nWorld\nTest");
574
575        assert_eq!(loc.line_start(0), Some(0));
576        assert_eq!(loc.line_start(1), Some(6));
577        assert_eq!(loc.line_start(2), Some(12));
578        assert_eq!(loc.line_start(3), None);
579    }
580}
581
582#[cfg(test)]
583mod ast_integration_tests {
584    use crate::lex::ast::{
585        elements::Session,
586        range::{Position, Range},
587        traits::{AstNode, Container},
588    };
589
590    #[test]
591    fn test_start_position() {
592        let location = Range::new(0..0, Position::new(1, 0), Position::new(1, 10));
593        let session = Session::with_title("Title".to_string()).at(location);
594        assert_eq!(session.start_position(), Position::new(1, 0));
595    }
596
597    #[test]
598    fn test_find_nodes_at_position() {
599        use crate::lex::ast::elements::ContentItem;
600        use crate::lex::ast::elements::Document;
601        use crate::lex::ast::find_nodes_at_position;
602
603        let location1 = Range::new(0..0, Position::new(1, 0), Position::new(1, 10));
604        let location2 = Range::new(0..0, Position::new(2, 0), Position::new(2, 10));
605        let session1 = Session::with_title("Title1".to_string()).at(location1);
606        let session2 = Session::with_title("Title2".to_string()).at(location2);
607        let document = Document::with_content(vec![
608            ContentItem::Session(session1),
609            ContentItem::Session(session2),
610        ]);
611        let nodes = find_nodes_at_position(&document, Position::new(1, 5));
612        assert_eq!(nodes.len(), 1);
613        assert_eq!(nodes[0].node_type(), "Session");
614        assert_eq!(nodes[0].display_label(), "Title1");
615    }
616
617    #[test]
618    fn test_find_nested_nodes_at_position() {
619        use crate::lex::ast::elements::{ContentItem, Document, Paragraph};
620        use crate::lex::ast::find_nodes_at_position;
621
622        let para_location = Range::new(0..0, Position::new(2, 0), Position::new(2, 10));
623        let paragraph = Paragraph::from_line("Nested".to_string()).at(para_location);
624        let session_location = Range::new(0..0, Position::new(1, 0), Position::new(3, 0));
625        let mut session = Session::with_title("Title".to_string()).at(session_location);
626        session
627            .children_mut()
628            .push(ContentItem::Paragraph(paragraph));
629        let document = Document::with_content(vec![ContentItem::Session(session)]);
630        let nodes = find_nodes_at_position(&document, Position::new(2, 5));
631        // Now we get only the deepest element: TextLine
632        assert_eq!(nodes.len(), 1);
633        assert_eq!(nodes[0].node_type(), "TextLine");
634    }
635}