Skip to main content

shuck_ast/
span.rs

1//! Source location tracking for error messages and $LINENO
2//!
3//! Provides position and span types for tracking source locations through
4//! lexing, parsing, and execution.
5
6/// A position in source code.
7#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
8pub struct Position {
9    /// 1-based line number
10    pub line: usize,
11    /// 1-based column number (byte offset within line)
12    pub column: usize,
13    /// 0-based byte offset from start of input
14    pub offset: usize,
15}
16
17impl Position {
18    /// Create a new position at line 1, column 1, offset 0.
19    pub fn new() -> Self {
20        Self {
21            line: 1,
22            column: 1,
23            offset: 0,
24        }
25    }
26
27    /// Advance position by one character.
28    pub fn advance(&mut self, ch: char) {
29        self.offset += ch.len_utf8();
30        if ch == '\n' {
31            self.line += 1;
32            self.column = 1;
33        } else {
34            self.column += 1;
35        }
36    }
37
38    /// Return a new position advanced by every character in `text`.
39    pub fn advanced_by(mut self, text: &str) -> Self {
40        for ch in text.chars() {
41            self.advance(ch);
42        }
43        self
44    }
45
46    /// Rebase a position from a nested source onto an absolute base position.
47    pub fn rebased(self, base: Position) -> Self {
48        Self {
49            line: base.line + self.line.saturating_sub(1),
50            column: if self.line <= 1 {
51                base.column + self.column.saturating_sub(1)
52            } else {
53                self.column
54            },
55            offset: base.offset + self.offset,
56        }
57    }
58}
59
60impl std::fmt::Display for Position {
61    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
62        write!(f, "{}:{}", self.line, self.column)
63    }
64}
65
66/// A span of source code (start to end position).
67#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
68pub struct Span {
69    /// Start position (inclusive)
70    pub start: Position,
71    /// End position (exclusive)
72    pub end: Position,
73}
74
75impl Span {
76    /// Create an empty span at the default position.
77    pub fn new() -> Self {
78        Self::default()
79    }
80
81    /// Create a span from start to end positions.
82    pub fn from_positions(start: Position, end: Position) -> Self {
83        Self { start, end }
84    }
85
86    /// Create a span covering a single position.
87    pub fn at(pos: Position) -> Self {
88        Self {
89            start: pos,
90            end: pos,
91        }
92    }
93
94    /// Merge two spans into one covering both.
95    pub fn merge(self, other: Span) -> Self {
96        let start = if self.start.offset <= other.start.offset {
97            self.start
98        } else {
99            other.start
100        };
101        let end = if self.end.offset >= other.end.offset {
102            self.end
103        } else {
104            other.end
105        };
106        Self { start, end }
107    }
108
109    /// Rebase a span from a nested source onto an absolute base position.
110    pub fn rebased(self, base: Position) -> Self {
111        Self {
112            start: self.start.rebased(base),
113            end: self.end.rebased(base),
114        }
115    }
116
117    /// Slice the source text covered by this span.
118    pub fn slice<'a>(&self, source: &'a str) -> &'a str {
119        slice_with_byte_offsets(source, self.start.offset, self.end.offset)
120    }
121
122    /// Convert this span to a [`TextRange`] using only the byte offsets.
123    pub fn to_range(self) -> TextRange {
124        TextRange::new(
125            TextSize::new(self.start.offset as u32),
126            TextSize::new(self.end.offset as u32),
127        )
128    }
129
130    /// Get the starting line number.
131    pub fn line(&self) -> usize {
132        self.start.line
133    }
134}
135
136impl std::fmt::Display for Span {
137    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
138        if self.start.line == self.end.line {
139            write!(f, "line {}", self.start.line)
140        } else {
141            write!(f, "lines {}-{}", self.start.line, self.end.line)
142        }
143    }
144}
145
146/// A byte offset in source text, analogous to ruff's `TextSize`.
147#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Default)]
148pub struct TextSize(u32);
149
150impl TextSize {
151    /// Create a new `TextSize` from a raw `u32` byte offset.
152    pub const fn new(raw: u32) -> Self {
153        Self(raw)
154    }
155
156    /// Return the raw `u32` value.
157    pub const fn to_u32(self) -> u32 {
158        self.0
159    }
160}
161
162impl From<u32> for TextSize {
163    fn from(raw: u32) -> Self {
164        Self(raw)
165    }
166}
167
168impl From<TextSize> for usize {
169    fn from(size: TextSize) -> Self {
170        size.0 as usize
171    }
172}
173
174impl std::ops::Add for TextSize {
175    type Output = Self;
176    fn add(self, rhs: Self) -> Self {
177        Self(self.0 + rhs.0)
178    }
179}
180
181impl std::ops::Sub for TextSize {
182    type Output = Self;
183    fn sub(self, rhs: Self) -> Self {
184        Self(self.0 - rhs.0)
185    }
186}
187
188/// A half-open byte range in source text, analogous to ruff's `TextRange`.
189#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Default)]
190pub struct TextRange {
191    start: TextSize,
192    end: TextSize,
193}
194
195impl TextRange {
196    /// Create a new range from start (inclusive) to end (exclusive).
197    pub const fn new(start: TextSize, end: TextSize) -> Self {
198        Self { start, end }
199    }
200
201    /// Start offset (inclusive).
202    pub const fn start(self) -> TextSize {
203        self.start
204    }
205
206    /// End offset (exclusive).
207    pub const fn end(self) -> TextSize {
208        self.end
209    }
210
211    /// Length in bytes.
212    pub const fn len(self) -> TextSize {
213        TextSize(self.end.0 - self.start.0)
214    }
215
216    /// Whether the range is empty.
217    pub const fn is_empty(self) -> bool {
218        self.start.0 == self.end.0
219    }
220
221    /// Slice the source text covered by this range.
222    pub fn slice<'a>(&self, source: &'a str) -> &'a str {
223        slice_with_byte_offsets(source, usize::from(self.start), usize::from(self.end))
224    }
225
226    /// Shift the range by adding a base offset to both start and end.
227    pub fn offset_by(self, base: TextSize) -> Self {
228        Self {
229            start: self.start + base,
230            end: self.end + base,
231        }
232    }
233}
234
235fn slice_with_byte_offsets(source: &str, start: usize, end: usize) -> &str {
236    if start > end || end > source.len() {
237        return "";
238    }
239
240    if let Some(slice) = source.get(start..end) {
241        return slice;
242    }
243
244    let start = floor_char_boundary(source, start);
245    let end = ceil_char_boundary(source, end);
246    source.get(start..end).unwrap_or("")
247}
248
249fn floor_char_boundary(source: &str, offset: usize) -> usize {
250    let mut offset = offset.min(source.len());
251    while offset > 0 && !source.is_char_boundary(offset) {
252        offset -= 1;
253    }
254    offset
255}
256
257fn ceil_char_boundary(source: &str, offset: usize) -> usize {
258    let mut offset = offset.min(source.len());
259    while offset < source.len() && !source.is_char_boundary(offset) {
260        offset += 1;
261    }
262    offset
263}
264
265#[cfg(test)]
266mod tests {
267    use super::*;
268
269    #[test]
270    fn test_position_advance() {
271        let mut pos = Position::new();
272        assert_eq!(pos.line, 1);
273        assert_eq!(pos.column, 1);
274        assert_eq!(pos.offset, 0);
275
276        pos.advance('a');
277        assert_eq!(pos.line, 1);
278        assert_eq!(pos.column, 2);
279        assert_eq!(pos.offset, 1);
280
281        pos.advance('\n');
282        assert_eq!(pos.line, 2);
283        assert_eq!(pos.column, 1);
284        assert_eq!(pos.offset, 2);
285
286        pos.advance('b');
287        assert_eq!(pos.line, 2);
288        assert_eq!(pos.column, 2);
289        assert_eq!(pos.offset, 3);
290    }
291
292    #[test]
293    fn test_position_display() {
294        let pos = Position {
295            line: 5,
296            column: 10,
297            offset: 50,
298        };
299        assert_eq!(format!("{}", pos), "5:10");
300    }
301
302    #[test]
303    fn test_span_merge() {
304        let span1 = Span {
305            start: Position {
306                line: 1,
307                column: 1,
308                offset: 0,
309            },
310            end: Position {
311                line: 1,
312                column: 5,
313                offset: 4,
314            },
315        };
316        let span2 = Span {
317            start: Position {
318                line: 1,
319                column: 10,
320                offset: 9,
321            },
322            end: Position {
323                line: 2,
324                column: 3,
325                offset: 15,
326            },
327        };
328        let merged = span1.merge(span2);
329        assert_eq!(merged.start.offset, 0);
330        assert_eq!(merged.end.offset, 15);
331    }
332
333    #[test]
334    fn test_span_display() {
335        let single_line = Span {
336            start: Position {
337                line: 3,
338                column: 1,
339                offset: 0,
340            },
341            end: Position {
342                line: 3,
343                column: 10,
344                offset: 9,
345            },
346        };
347        assert_eq!(format!("{}", single_line), "line 3");
348
349        let multi_line = Span {
350            start: Position {
351                line: 1,
352                column: 1,
353                offset: 0,
354            },
355            end: Position {
356                line: 5,
357                column: 1,
358                offset: 50,
359            },
360        };
361        assert_eq!(format!("{}", multi_line), "lines 1-5");
362    }
363
364    #[test]
365    fn span_slice_handles_non_char_boundaries() {
366        let source = "a─b";
367        let span = Span::from_positions(
368            Position {
369                line: 1,
370                column: 2,
371                offset: 1,
372            },
373            Position {
374                line: 1,
375                column: 3,
376                offset: 3,
377            },
378        );
379
380        assert_eq!(span.slice(source), "─");
381    }
382
383    #[test]
384    fn text_range_slice_handles_non_char_boundaries() {
385        let source = "x🔉y";
386        let range = TextRange::new(TextSize::new(1), TextSize::new(4));
387
388        assert_eq!(range.slice(source), "🔉");
389    }
390}