Skip to main content

lex_core/lex/ast/
range.rs

1//! Position and location tracking for source code locations
2//!
3//! This module defines the data structures for representing positions and locations in source code,
4//! as well as utilities for converting byte offsets to line/column positions.
5//!
6//! ## Types
7//!
8//! - [`Position`] - A line:column position in source code
9//! - [`Range`] - A source code range with start/end positions and byte span
10//! - [`SourceLocation`] - Utility for converting byte offsets to positions
11//!
12//! ## Key Design
13//!
14//! - Mandatory locations: All AST nodes have required `location: Range` fields
15//! - No null locations: Default position is (0, 0) to (0, 0), never None
16//! - Byte ranges preserved: Stores both byte spans and line:column positions
17//! - Unicode-aware: Handles multi-byte UTF-8 characters correctly via `char_indices()`
18//! - Efficient conversion: O(log n) binary search for byte-to-position conversion
19//!
20//! ## Usage
21//!
22//! The typical flow is:
23//! 1. Lexer produces `(Token, std::ops::Range<usize>)` pairs (byte offsets)
24//! 2. Parser converts byte ranges to `Range` using `SourceLocation::byte_range_to_ast_range()`
25//! 3. AST nodes store these `Range` values for error reporting and tooling
26//!
27//! See `src/lex/building/location.rs` for the canonical location conversion and aggregation utilities.
28
29use serde::{Deserialize, Serialize};
30use std::fmt;
31use std::ops::Range as ByteRange;
32
33/// Represents a position in source code (line and column)
34#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize, Deserialize)]
35pub struct Position {
36    pub line: usize,
37    pub column: usize,
38}
39
40impl Position {
41    pub fn new(line: usize, column: usize) -> Self {
42        Self { line, column }
43    }
44}
45
46impl fmt::Display for Position {
47    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
48        write!(f, "{}:{}", self.line, self.column)
49    }
50}
51
52impl Default for Position {
53    fn default() -> Self {
54        Self::new(0, 0)
55    }
56}
57
58/// Represents a location in source code (start and end positions)
59#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
60pub struct Range {
61    pub span: ByteRange<usize>,
62    pub start: Position,
63    pub end: Position,
64}
65
66impl Range {
67    pub fn new(span: ByteRange<usize>, start: Position, end: Position) -> Self {
68        Self { span, start, end }
69    }
70
71    /// Check if a position is contained within this location
72    pub fn contains(&self, pos: Position) -> bool {
73        (self.start.line < pos.line
74            || (self.start.line == pos.line && self.start.column <= pos.column))
75            && (self.end.line > pos.line
76                || (self.end.line == pos.line && self.end.column >= pos.column))
77    }
78
79    /// Check if another location overlaps with this location
80    pub fn overlaps(&self, other: &Range) -> bool {
81        self.contains(other.start)
82            || self.contains(other.end)
83            || other.contains(self.start)
84            || other.contains(self.end)
85    }
86
87    /// Build a bounding box that contains all provided ranges.
88    pub fn bounding_box<'a, I>(mut ranges: I) -> Option<Range>
89    where
90        I: Iterator<Item = &'a Range>,
91    {
92        let first = ranges.next()?.clone();
93        let mut span_start = first.span.start;
94        let mut span_end = first.span.end;
95        let mut start_pos = first.start;
96        let mut end_pos = first.end;
97
98        for range in ranges {
99            if range.start < start_pos {
100                start_pos = range.start;
101                span_start = range.span.start;
102            } else if range.start == start_pos {
103                span_start = span_start.min(range.span.start);
104            }
105
106            if range.end > end_pos {
107                end_pos = range.end;
108                span_end = range.span.end;
109            } else if range.end == end_pos {
110                span_end = span_end.max(range.span.end);
111            }
112        }
113
114        Some(Range::new(span_start..span_end, start_pos, end_pos))
115    }
116}
117
118impl fmt::Display for Range {
119    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
120        write!(f, "{}..{}", self.start, self.end)
121    }
122}
123
124impl Default for Range {
125    fn default() -> Self {
126        Self::new(
127            ByteRange { start: 0, end: 0 },
128            Position::default(),
129            Position::default(),
130        )
131    }
132}
133
134/// Provides fast conversion from byte offsets to line/column positions
135pub struct SourceLocation {
136    /// Byte offsets where each line starts
137    line_starts: Vec<usize>,
138}
139
140impl SourceLocation {
141    /// Create a new SourceLocation from source code
142    pub fn new(source: &str) -> Self {
143        let mut line_starts = vec![0];
144
145        for (byte_pos, ch) in source.char_indices() {
146            if ch == '\n' {
147                line_starts.push(byte_pos + 1);
148            }
149        }
150
151        Self { line_starts }
152    }
153
154    /// Convert a byte offset to a line/column position
155    pub fn byte_to_position(&self, byte_offset: usize) -> Position {
156        let line = self
157            .line_starts
158            .binary_search(&byte_offset)
159            .unwrap_or_else(|i| i - 1);
160
161        let column = byte_offset - self.line_starts[line];
162
163        Position::new(line, column)
164    }
165
166    /// Convert a byte range to a location
167    pub fn byte_range_to_ast_range(&self, range: &ByteRange<usize>) -> Range {
168        Range::new(
169            range.clone(),
170            self.byte_to_position(range.start),
171            self.byte_to_position(range.end),
172        )
173    }
174
175    /// Get the total number of lines in the source
176    pub fn line_count(&self) -> usize {
177        self.line_starts.len()
178    }
179
180    /// Get the byte offset for the start of a line
181    pub fn line_start(&self, line: usize) -> Option<usize> {
182        self.line_starts.get(line).copied()
183    }
184}
185
186#[cfg(test)]
187mod tests {
188    use super::*;
189    // @audit: no_source
190
191    // @audit: no_source
192    #[test]
193    fn test_position_creation() {
194        let pos = Position::new(5, 10);
195        assert_eq!(pos.line, 5);
196        assert_eq!(pos.column, 10);
197    }
198
199    // @audit: no_source
200    #[test]
201    fn test_position_comparison() {
202        let pos1 = Position::new(1, 5);
203        let pos2 = Position::new(1, 5);
204        let pos3 = Position::new(2, 3);
205
206        assert_eq!(pos1, pos2);
207        assert_ne!(pos1, pos3);
208        assert!(pos1 < pos3);
209    }
210
211    // @audit: no_source
212    #[test]
213    fn test_location_creation() {
214        let start = Position::new(0, 0);
215        let end = Position::new(2, 5);
216        let location = Range::new(0..0, start, end);
217
218        assert_eq!(location.start, start);
219        assert_eq!(location.end, end);
220    }
221
222    // @audit: no_source
223    #[test]
224    fn test_location_contains_single_line() {
225        let location = Range::new(0..0, Position::new(0, 0), Position::new(0, 10));
226
227        assert!(location.contains(Position::new(0, 0)));
228        assert!(location.contains(Position::new(0, 5)));
229        assert!(location.contains(Position::new(0, 10)));
230
231        assert!(!location.contains(Position::new(0, 11)));
232        assert!(!location.contains(Position::new(1, 0)));
233    }
234
235    // @audit: no_source
236    #[test]
237    fn test_location_contains_multiline() {
238        let location = Range::new(0..0, Position::new(1, 5), Position::new(2, 10));
239
240        // Before location
241        assert!(!location.contains(Position::new(1, 4)));
242        assert!(!location.contains(Position::new(0, 5)));
243
244        // In location
245        assert!(location.contains(Position::new(1, 5)));
246        assert!(location.contains(Position::new(1, 10)));
247        assert!(location.contains(Position::new(2, 0)));
248        assert!(location.contains(Position::new(2, 10)));
249
250        // After location
251        assert!(!location.contains(Position::new(2, 11)));
252        assert!(!location.contains(Position::new(3, 0)));
253    }
254
255    // @audit: no_source
256    #[test]
257    fn test_location_overlaps() {
258        let location1 = Range::new(0..0, Position::new(0, 0), Position::new(1, 5));
259        let location2 = Range::new(0..0, Position::new(1, 0), Position::new(2, 5));
260        let location3 = Range::new(0..0, Position::new(3, 0), Position::new(4, 5));
261
262        assert!(location1.overlaps(&location2));
263        assert!(location2.overlaps(&location1));
264        assert!(!location1.overlaps(&location3));
265        assert!(!location3.overlaps(&location1));
266    }
267
268    #[test]
269    fn test_bounding_box_ranges() {
270        let ranges = [
271            Range::new(2..5, Position::new(0, 2), Position::new(0, 5)),
272            Range::new(10..20, Position::new(3, 0), Position::new(4, 3)),
273        ];
274
275        let bbox = Range::bounding_box(ranges.iter()).unwrap();
276        assert_eq!(bbox.span, 2..20);
277        assert_eq!(bbox.start, Position::new(0, 2));
278        assert_eq!(bbox.end, Position::new(4, 3));
279    }
280
281    #[test]
282    fn test_bounding_box_empty_iter() {
283        let iter = std::iter::empty::<&Range>();
284        assert!(Range::bounding_box(iter).is_none());
285    }
286
287    // @audit: no_source
288    #[test]
289    fn test_position_display() {
290        let pos = Position::new(5, 10);
291        assert_eq!(format!("{pos}"), "5:10");
292    }
293
294    // @audit: no_source
295    #[test]
296    fn test_location_display() {
297        let location = Range::new(0..0, Position::new(1, 0), Position::new(2, 5));
298        assert_eq!(format!("{location}"), "1:0..2:5");
299    }
300
301    // @audit: no_source
302    #[test]
303    fn test_byte_to_position_single_line() {
304        let loc = SourceLocation::new("Hello");
305        assert_eq!(loc.byte_to_position(0), Position::new(0, 0));
306        assert_eq!(loc.byte_to_position(1), Position::new(0, 1));
307        assert_eq!(loc.byte_to_position(4), Position::new(0, 4));
308    }
309
310    // @audit: no_source
311    #[test]
312    fn test_byte_to_position_multiline() {
313        let loc = SourceLocation::new("Hello\nworld\ntest");
314
315        // First line
316        assert_eq!(loc.byte_to_position(0), Position::new(0, 0));
317        assert_eq!(loc.byte_to_position(5), Position::new(0, 5));
318
319        // Second line
320        assert_eq!(loc.byte_to_position(6), Position::new(1, 0));
321        assert_eq!(loc.byte_to_position(10), Position::new(1, 4));
322
323        // Third line
324        assert_eq!(loc.byte_to_position(12), Position::new(2, 0));
325        assert_eq!(loc.byte_to_position(15), Position::new(2, 3));
326    }
327
328    // @audit: no_source
329    #[test]
330    fn test_byte_to_position_with_unicode() {
331        let loc = SourceLocation::new("Hello\nwörld");
332        // Unicode characters take multiple bytes
333        assert_eq!(loc.byte_to_position(6), Position::new(1, 0));
334        assert_eq!(loc.byte_to_position(7), Position::new(1, 1));
335    }
336
337    #[test]
338    fn test_range_to_location_single_line() {
339        let loc = SourceLocation::new("Hello World");
340        let location = loc.byte_range_to_ast_range(&(0..5));
341
342        assert_eq!(location.start, Position::new(0, 0));
343        assert_eq!(location.end, Position::new(0, 5));
344    }
345
346    #[test]
347    fn test_range_to_location_multiline() {
348        let loc = SourceLocation::new("Hello\nWorld\nTest");
349        let location = loc.byte_range_to_ast_range(&(6..12));
350
351        assert_eq!(location.start, Position::new(1, 0));
352        assert_eq!(location.end, Position::new(2, 0));
353    }
354
355    #[test]
356    fn test_line_count() {
357        assert_eq!(SourceLocation::new("single").line_count(), 1);
358        assert_eq!(SourceLocation::new("line1\nline2").line_count(), 2);
359        assert_eq!(SourceLocation::new("line1\nline2\nline3").line_count(), 3);
360    }
361
362    #[test]
363    fn test_line_start() {
364        let loc = SourceLocation::new("Hello\nWorld\nTest");
365
366        assert_eq!(loc.line_start(0), Some(0));
367        assert_eq!(loc.line_start(1), Some(6));
368        assert_eq!(loc.line_start(2), Some(12));
369        assert_eq!(loc.line_start(3), None);
370    }
371}
372
373#[cfg(test)]
374mod ast_integration_tests {
375    use crate::lex::ast::{
376        elements::Session,
377        range::{Position, Range},
378        traits::{AstNode, Container},
379    };
380
381    #[test]
382    fn test_start_position() {
383        let location = Range::new(0..0, Position::new(1, 0), Position::new(1, 10));
384        let session = Session::with_title("Title".to_string()).at(location);
385        assert_eq!(session.start_position(), Position::new(1, 0));
386    }
387
388    #[test]
389    fn test_find_nodes_at_position() {
390        use crate::lex::ast::elements::ContentItem;
391        use crate::lex::ast::elements::Document;
392        use crate::lex::ast::find_nodes_at_position;
393
394        let location1 = Range::new(0..0, Position::new(1, 0), Position::new(1, 10));
395        let location2 = Range::new(0..0, Position::new(2, 0), Position::new(2, 10));
396        let session1 = Session::with_title("Title1".to_string()).at(location1);
397        let session2 = Session::with_title("Title2".to_string()).at(location2);
398        let document = Document::with_content(vec![
399            ContentItem::Session(session1),
400            ContentItem::Session(session2),
401        ]);
402        let nodes = find_nodes_at_position(&document, Position::new(1, 5));
403        assert_eq!(nodes.len(), 1);
404        assert_eq!(nodes[0].node_type(), "Session");
405        assert_eq!(nodes[0].display_label(), "Title1");
406    }
407
408    #[test]
409    fn test_find_nested_nodes_at_position() {
410        use crate::lex::ast::elements::{ContentItem, Document, Paragraph};
411        use crate::lex::ast::find_nodes_at_position;
412
413        let para_location = Range::new(0..0, Position::new(2, 0), Position::new(2, 10));
414        let paragraph = Paragraph::from_line("Nested".to_string()).at(para_location);
415        let session_location = Range::new(0..0, Position::new(1, 0), Position::new(3, 0));
416        let mut session = Session::with_title("Title".to_string()).at(session_location);
417        session
418            .children_mut()
419            .push(ContentItem::Paragraph(paragraph));
420        let document = Document::with_content(vec![ContentItem::Session(session)]);
421        let nodes = find_nodes_at_position(&document, Position::new(2, 5));
422        // Now we get only the deepest element: TextLine
423        assert_eq!(nodes.len(), 1);
424        assert_eq!(nodes[0].node_type(), "TextLine");
425    }
426}