Skip to main content

shuck_ast/
span.rs

1//! Source location tracking for error messages and $LINENO
2//!
3//! Provides position and span types for tracking source locations through
4//! lexing, parsing, and execution.
5
6/// A position in source code.
7#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
8pub struct Position {
9    /// 1-based line number
10    pub line: usize,
11    /// 1-based column number (byte offset within line)
12    pub column: usize,
13    /// 0-based byte offset from start of input
14    pub offset: usize,
15}
16
17impl Position {
18    /// Create a new position at line 1, column 1, offset 0.
19    pub fn new() -> Self {
20        Self {
21            line: 1,
22            column: 1,
23            offset: 0,
24        }
25    }
26
27    /// Advance position by one character.
28    pub fn advance(&mut self, ch: char) {
29        self.offset += ch.len_utf8();
30        if ch == '\n' {
31            self.line += 1;
32            self.column = 1;
33        } else {
34            self.column += 1;
35        }
36    }
37
38    /// Return a new position advanced by every character in `text`.
39    pub fn advanced_by(mut self, text: &str) -> Self {
40        let bytes = text.as_bytes();
41        let mut newline_count: usize = 0;
42        let mut last_newline: Option<usize> = None;
43
44        for (i, &byte) in bytes.iter().enumerate() {
45            if byte >= 0x80 {
46                for ch in text.chars() {
47                    self.advance(ch);
48                }
49                return self;
50            }
51            if byte == b'\n' {
52                newline_count += 1;
53                last_newline = Some(i);
54            }
55        }
56
57        let len = bytes.len();
58        self.offset += len;
59        self.line += newline_count;
60        self.column = match last_newline {
61            Some(idx) => len - idx,
62            None => self.column + len,
63        };
64        self
65    }
66
67    /// Rebase a position from a nested source onto an absolute base position.
68    pub fn rebased(self, base: Position) -> Self {
69        Self {
70            line: base.line + self.line.saturating_sub(1),
71            column: if self.line <= 1 {
72                base.column + self.column.saturating_sub(1)
73            } else {
74                self.column
75            },
76            offset: base.offset + self.offset,
77        }
78    }
79}
80
81impl std::fmt::Display for Position {
82    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
83        write!(f, "{}:{}", self.line, self.column)
84    }
85}
86
87/// A span of source code (start to end position).
88#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
89pub struct Span {
90    /// Start position (inclusive)
91    pub start: Position,
92    /// End position (exclusive)
93    pub end: Position,
94}
95
96impl Span {
97    /// Create an empty span at the default position.
98    pub fn new() -> Self {
99        Self::default()
100    }
101
102    /// Create a span from start to end positions.
103    pub fn from_positions(start: Position, end: Position) -> Self {
104        Self { start, end }
105    }
106
107    /// Create a span covering a single position.
108    pub fn at(pos: Position) -> Self {
109        Self {
110            start: pos,
111            end: pos,
112        }
113    }
114
115    /// Merge two spans into one covering both.
116    pub fn merge(self, other: Span) -> Self {
117        let start = if self.start.offset <= other.start.offset {
118            self.start
119        } else {
120            other.start
121        };
122        let end = if self.end.offset >= other.end.offset {
123            self.end
124        } else {
125            other.end
126        };
127        Self { start, end }
128    }
129
130    /// Rebase a span from a nested source onto an absolute base position.
131    pub fn rebased(self, base: Position) -> Self {
132        Self {
133            start: self.start.rebased(base),
134            end: self.end.rebased(base),
135        }
136    }
137
138    /// Slice the source text covered by this span.
139    pub fn slice<'a>(&self, source: &'a str) -> &'a str {
140        slice_with_byte_offsets(source, self.start.offset, self.end.offset)
141    }
142
143    /// Convert this span to a [`TextRange`] using only the byte offsets.
144    pub fn to_range(self) -> TextRange {
145        TextRange::new(
146            TextSize::new(self.start.offset as u32),
147            TextSize::new(self.end.offset as u32),
148        )
149    }
150
151    /// Get the starting line number.
152    pub fn line(&self) -> usize {
153        self.start.line
154    }
155}
156
157impl std::fmt::Display for Span {
158    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
159        if self.start.line == self.end.line {
160            write!(f, "line {}", self.start.line)
161        } else {
162            write!(f, "lines {}-{}", self.start.line, self.end.line)
163        }
164    }
165}
166
167/// A byte offset in source text, analogous to ruff's `TextSize`.
168#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Default)]
169pub struct TextSize(u32);
170
171impl TextSize {
172    /// Create a new `TextSize` from a raw `u32` byte offset.
173    pub const fn new(raw: u32) -> Self {
174        Self(raw)
175    }
176
177    /// Return the raw `u32` value.
178    pub const fn to_u32(self) -> u32 {
179        self.0
180    }
181}
182
183impl From<u32> for TextSize {
184    fn from(raw: u32) -> Self {
185        Self(raw)
186    }
187}
188
189impl From<TextSize> for usize {
190    fn from(size: TextSize) -> Self {
191        size.0 as usize
192    }
193}
194
195impl std::ops::Add for TextSize {
196    type Output = Self;
197    fn add(self, rhs: Self) -> Self {
198        Self(self.0 + rhs.0)
199    }
200}
201
202impl std::ops::Sub for TextSize {
203    type Output = Self;
204    fn sub(self, rhs: Self) -> Self {
205        Self(self.0 - rhs.0)
206    }
207}
208
209/// A half-open byte range in source text, analogous to ruff's `TextRange`.
210#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Default)]
211pub struct TextRange {
212    start: TextSize,
213    end: TextSize,
214}
215
216impl TextRange {
217    /// Create a new range from start (inclusive) to end (exclusive).
218    pub const fn new(start: TextSize, end: TextSize) -> Self {
219        Self { start, end }
220    }
221
222    /// Start offset (inclusive).
223    pub const fn start(self) -> TextSize {
224        self.start
225    }
226
227    /// End offset (exclusive).
228    pub const fn end(self) -> TextSize {
229        self.end
230    }
231
232    /// Length in bytes.
233    pub const fn len(self) -> TextSize {
234        TextSize(self.end.0 - self.start.0)
235    }
236
237    /// Whether the range is empty.
238    pub const fn is_empty(self) -> bool {
239        self.start.0 == self.end.0
240    }
241
242    /// Slice the source text covered by this range.
243    pub fn slice<'a>(&self, source: &'a str) -> &'a str {
244        slice_with_byte_offsets(source, usize::from(self.start), usize::from(self.end))
245    }
246
247    /// Shift the range by adding a base offset to both start and end.
248    pub fn offset_by(self, base: TextSize) -> Self {
249        Self {
250            start: self.start + base,
251            end: self.end + base,
252        }
253    }
254}
255
256fn slice_with_byte_offsets(source: &str, start: usize, end: usize) -> &str {
257    if start > end || end > source.len() {
258        return "";
259    }
260
261    if let Some(slice) = source.get(start..end) {
262        return slice;
263    }
264
265    let start = floor_char_boundary(source, start);
266    let end = ceil_char_boundary(source, end);
267    source.get(start..end).unwrap_or("")
268}
269
270fn floor_char_boundary(source: &str, offset: usize) -> usize {
271    let mut offset = offset.min(source.len());
272    while offset > 0 && !source.is_char_boundary(offset) {
273        offset -= 1;
274    }
275    offset
276}
277
278fn ceil_char_boundary(source: &str, offset: usize) -> usize {
279    let mut offset = offset.min(source.len());
280    while offset < source.len() && !source.is_char_boundary(offset) {
281        offset += 1;
282    }
283    offset
284}
285
286#[cfg(test)]
287mod tests {
288    use super::*;
289
290    #[test]
291    fn test_position_advance() {
292        let mut pos = Position::new();
293        assert_eq!(pos.line, 1);
294        assert_eq!(pos.column, 1);
295        assert_eq!(pos.offset, 0);
296
297        pos.advance('a');
298        assert_eq!(pos.line, 1);
299        assert_eq!(pos.column, 2);
300        assert_eq!(pos.offset, 1);
301
302        pos.advance('\n');
303        assert_eq!(pos.line, 2);
304        assert_eq!(pos.column, 1);
305        assert_eq!(pos.offset, 2);
306
307        pos.advance('b');
308        assert_eq!(pos.line, 2);
309        assert_eq!(pos.column, 2);
310        assert_eq!(pos.offset, 3);
311    }
312
313    #[test]
314    fn test_position_display() {
315        let pos = Position {
316            line: 5,
317            column: 10,
318            offset: 50,
319        };
320        assert_eq!(format!("{}", pos), "5:10");
321    }
322
323    #[test]
324    fn test_span_merge() {
325        let span1 = Span {
326            start: Position {
327                line: 1,
328                column: 1,
329                offset: 0,
330            },
331            end: Position {
332                line: 1,
333                column: 5,
334                offset: 4,
335            },
336        };
337        let span2 = Span {
338            start: Position {
339                line: 1,
340                column: 10,
341                offset: 9,
342            },
343            end: Position {
344                line: 2,
345                column: 3,
346                offset: 15,
347            },
348        };
349        let merged = span1.merge(span2);
350        assert_eq!(merged.start.offset, 0);
351        assert_eq!(merged.end.offset, 15);
352    }
353
354    #[test]
355    fn test_span_display() {
356        let single_line = Span {
357            start: Position {
358                line: 3,
359                column: 1,
360                offset: 0,
361            },
362            end: Position {
363                line: 3,
364                column: 10,
365                offset: 9,
366            },
367        };
368        assert_eq!(format!("{}", single_line), "line 3");
369
370        let multi_line = Span {
371            start: Position {
372                line: 1,
373                column: 1,
374                offset: 0,
375            },
376            end: Position {
377                line: 5,
378                column: 1,
379                offset: 50,
380            },
381        };
382        assert_eq!(format!("{}", multi_line), "lines 1-5");
383    }
384
385    #[test]
386    fn span_slice_handles_non_char_boundaries() {
387        let source = "a─b";
388        let span = Span::from_positions(
389            Position {
390                line: 1,
391                column: 2,
392                offset: 1,
393            },
394            Position {
395                line: 1,
396                column: 3,
397                offset: 3,
398            },
399        );
400
401        assert_eq!(span.slice(source), "─");
402    }
403
404    #[test]
405    fn text_range_slice_handles_non_char_boundaries() {
406        let source = "x🔉y";
407        let range = TextRange::new(TextSize::new(1), TextSize::new(4));
408
409        assert_eq!(range.slice(source), "🔉");
410    }
411}