lisbeth_error/
span.rs

1//! Some span data structures
2//!
3//! # Description
4//!
5//! This module contains the [`Span`] and [`SpannedStr`] data structures. The
6//! difference between them is that [`SpannedStr`] contains the inner text while
7//! [`Span`] contains only its position. Consequently, [`SpannedStr`] is used
8//! during the lexing and parsing steps, but the AST generated *should* contain
9//! only [`Span`].
10//!
11//! # A consistency note
12//!
13//! Inconsistent results may occur when [`Span`] and [`SpannedStr`] coming from
14//! different places are used toghether. This is fine for most use-cases, in
15//! which a single process in invoked for a single input unit.
16
17use std::cmp::{Ord, Ordering};
18
19/// Represents a position in the input data.
20///
21/// Positions are 0-indexed, meaning that the first character of each line has
22/// 0 as column number. The same goes for the line number.
23#[derive(Copy, Clone, Debug, Eq, PartialEq)]
24pub struct Position {
25    line: u32,
26    col: u32,
27    offset: u32,
28}
29
30impl Position {
31    const BEGINNING: Position = Position {
32        line: 0,
33        col: 0,
34        offset: 0,
35    };
36
37    fn advance_with(self, s: &str) -> Position {
38        let Position {
39            mut line,
40            mut col,
41            mut offset,
42        } = self;
43
44        s.chars().for_each(|c| {
45            if c == '\n' {
46                line += 1;
47                col = 0
48            } else {
49                col += 1;
50            }
51        });
52
53        offset += s.len() as u32;
54
55        Position { line, col, offset }
56    }
57
58    /// Returns the position's line.
59    #[inline]
60    pub const fn line(self) -> u32 {
61        self.line
62    }
63
64    /// Returns the position's column.
65    #[inline]
66    pub const fn col(self) -> u32 {
67        self.col
68    }
69
70    /// Returns the position's offset from the beginning of the file.
71    #[inline]
72    pub const fn offset(self) -> u32 {
73        self.offset
74    }
75}
76
77// Note: when the following documentation is modified, remember to update the
78// doc for Position::Ord accordingly.
79/// # Warning
80///
81/// Positions can be compared toghether only if they come from the same input
82/// unit. If they do not, then inconsistencies may occur.
83///
84/// # Panics
85///
86/// In debug mode, this function may panic if the two positions are not from the
87/// same input unit. In release mode, this function does not panic.
88impl PartialOrd for Position {
89    fn partial_cmp(&self, other: &Position) -> Option<Ordering> {
90        Some(self.cmp(other))
91    }
92}
93
94// Note: when the following documentation is modified, remember to update the
95// doc for Position::PartialOrd accordingly.
96/// # Warning
97///
98/// Positions can be compared toghether only if they come from the same input
99/// unit. If they do not, then inconsistencies may occur.
100///
101/// # Panics
102///
103/// In debug mode, this function may panic if the two positions are not from the
104/// same input unit. In release mode, this function does not panic.
105impl Ord for Position {
106    #[cfg(debug)]
107    fn cmp(&self, other: &Position) -> Ordering {
108        let offset_provided = self.offset.cmp(&other.offset);
109
110        let lc_provided = match self.line.cmp(&other.line) {
111            Ordering::Equal => self.col.cmp(&other.col),
112            any => any,
113        };
114
115        assert!(
116            offset_provided != lc_provided,
117            "Attempt to perform an inconsistent span comparaison",
118        );
119
120        offset_provided
121    }
122
123    #[cfg(not(debug))]
124    fn cmp(&self, other: &Position) -> Ordering {
125        self.offset.cmp(&other.offset)
126    }
127}
128
129/// Represents the position of a piece of code in the input file.
130///
131/// A `Span` is represented as the start and end position. Every character that
132/// is between these two position is considered as *inside* the span.
133#[derive(Copy, Clone, Debug, PartialEq)]
134pub struct Span {
135    start: Position,
136    end: Position,
137}
138
139impl Span {
140    /// Returns the span's starting position.
141    #[inline]
142    pub const fn start(self) -> Position {
143        self.start
144    }
145
146    /// Returns the span's ending position.
147    ///
148    /// The position ends on the next non-spanned part:
149    ///
150    /// ```rust
151    /// use lisbeth_error::span::SpannedStr;
152    ///
153    /// let s = SpannedStr::input_file("hello");
154    /// assert_eq!(s.span().end().col(), 5);
155    /// ```
156    #[inline]
157    pub const fn end(self) -> Position {
158        self.end
159    }
160
161    #[inline]
162    const fn split_with(self, mid: Position) -> (Span, Span) {
163        let Span { start, end } = self;
164
165        let left = Span { start, end: mid };
166        let right = Span { start: mid, end };
167
168        (left, right)
169    }
170
171    pub(crate) fn of_file(input: &str) -> Span {
172        let start = Position::BEGINNING;
173        let end = start.advance_with(input);
174
175        Span { start, end }
176    }
177}
178
179/// Represents a portion of input file.
180///
181/// This is represented the same way as [`Span`], but with an additionnal
182/// content field.
183///
184/// It is initially created with the [`input_file`] function, and can then be
185/// splitted at desired index. Its content and span can be accessed with the
186/// [`content`] and [`span`] methods.
187///
188/// # Example
189///
190/// The following code shows how to extract a sequence of numbers separated by
191/// non-digit characters.
192///
193/// ```rust
194/// use lisbeth_error::span::{Span, SpannedStr};
195///
196/// #[derive(Debug)]
197/// struct Number(u32, Span);
198///
199/// // Parses a number from input, if any failure occurs, returns None
200/// fn extract_number<'a>(input: SpannedStr<'a>) -> (Number, SpannedStr<'a>) {
201///     let (matched, tail) = input.take_while(char::is_numeric);
202///
203///     let value = matched.content().parse().unwrap();
204///     let number = Number(value, matched.span());
205///     (number, tail)
206/// }
207///
208/// let input = SpannedStr::input_file("42 or nothing");
209/// let (number, tail) = extract_number(input);
210///
211/// assert_eq!(number.0, 42);
212/// assert_eq!(tail.content(), " or nothing");
213/// ```
214///
215/// [`input_file`]: SpannedStr::input_file
216/// [`content`]: SpannedStr::content
217/// [`span`]: SpannedStr::span
218#[derive(Copy, Clone, Debug, PartialEq)]
219pub struct SpannedStr<'a> {
220    span: Span,
221    content: &'a str,
222}
223
224impl<'a> SpannedStr<'a> {
225    /// Creates a new [`SpannedStr`] from an input file.
226    ///
227    /// This returned spanned string can then be splitted at various places
228    /// during the parsing step.
229    ///
230    /// # Example
231    ///
232    /// ```rust
233    /// use lisbeth_error::span::SpannedStr;
234    ///
235    /// let file_content = "fn main() { println!(\"Hello, world!\"); }";
236    ///
237    /// let whole_file = SpannedStr::input_file(file_content);
238    /// ```
239    pub fn input_file(content: &'a str) -> SpannedStr<'a> {
240        let span = Span::of_file(content);
241
242        SpannedStr { span, content }
243    }
244
245    // Note: span must represent the same source as content, otherwise
246    // inconsistent results may occur.
247    //
248    // In debug mode, it is ensured that:
249    //   - span.start == Position::BEGINNING,
250    //   - span.end.offset == content.len().
251    pub(crate) fn assemble(content: &'a str, span: Span) -> SpannedStr<'a> {
252        debug_assert_eq!(
253            span.start,
254            Position::BEGINNING,
255            "Attempt to create a SpannedStr that does not start at the beginning of the file",
256        );
257        debug_assert_eq!(
258            span.end.offset as usize,
259            content.len(),
260            "Attempt to create a SpannedStr with an incorrect length",
261        );
262
263        SpannedStr { content, span }
264    }
265
266    /// Returns the contained [`Span`].
267    ///
268    /// The span contains the position at which the content is located in the
269    /// input data.
270    ///
271    /// # Example
272    ///
273    /// ```rust
274    /// use lisbeth_error::span::SpannedStr;
275    ///
276    /// let a = SpannedStr::input_file("foo bar");
277    /// let b = SpannedStr::input_file("baz qux");
278    ///
279    /// // a and b have the same length and the same starting point, so they
280    /// // have the same span.
281    /// assert_eq!(a.span(), b.span());
282    /// ```
283    pub const fn span(self) -> Span {
284        self.span
285    }
286
287    /// Returns the span content.
288    ///
289    /// # Example
290    ///
291    /// ```rust
292    /// use lisbeth_error::span:: SpannedStr;
293    ///
294    /// let a = SpannedStr::input_file("hello");
295    /// assert_eq!(a.content(), "hello");
296    /// ```
297    pub const fn content(self) -> &'a str {
298        self.content
299    }
300
301    /// Splits the spanned string at a given byte index.
302    ///
303    /// This method works the same way as [str::split_at], but updates the span
304    /// so that it is still correct.
305    ///
306    /// # Panics
307    ///
308    /// This method panics when one of the condition listed in [`str::split_at`]
309    /// is met.
310    ///
311    /// # Example
312    ///
313    /// ```rust
314    /// use lisbeth_error::span::SpannedStr;
315    ///
316    /// let input = SpannedStr::input_file("helloworld");
317    /// let (left, right) = input.split_at(5);
318    ///
319    /// assert_eq!(left.content(), "hello");
320    /// assert_eq!(right.content(), "world");
321    /// ```
322    pub fn split_at(self, idx: usize) -> (SpannedStr<'a>, SpannedStr<'a>) {
323        let (left_content, right_content) = self.content.split_at(idx);
324
325        let mid = self.span.start.advance_with(left_content);
326        let (left_span, right_span) = self.span.split_with(mid);
327
328        let left_sstr = SpannedStr {
329            span: left_span,
330            content: left_content,
331        };
332
333        let right_sstr = SpannedStr {
334            span: right_span,
335            content: right_content,
336        };
337
338        (left_sstr, right_sstr)
339    }
340
341    /// Returns the longest prefix of input that match a given a condition.
342    ///
343    /// # Example
344    ///
345    /// ```rust
346    /// use lisbeth_error::span::SpannedStr;
347    ///
348    /// let i = SpannedStr::input_file("42 101");
349    /// let (number, tail) = i.take_while(char::is_numeric);
350    ///
351    /// assert_eq!(number.content(), "42");
352    /// assert_eq!(tail.content(), " 101");
353    /// ```
354    pub fn take_while<F>(self, mut f: F) -> (SpannedStr<'a>, SpannedStr<'a>)
355    where
356        F: FnMut(char) -> bool,
357    {
358        let idx = self
359            .content
360            .char_indices()
361            .find(|(_, chr)| !f(*chr))
362            .map(|(idx, _)| idx)
363            .unwrap_or_else(|| self.content.len());
364
365        self.split_at(idx)
366    }
367}
368
369#[cfg(test)]
370mod tests {
371    use super::*;
372
373    mod position {
374        use super::*;
375
376        #[test]
377        fn advance_with_no_line_return() {
378            let p = Position::BEGINNING.advance_with("hello, world");
379
380            assert_eq!(p.line, 0);
381            assert_eq!(p.col, 12);
382            assert_eq!(p.offset, 12);
383        }
384
385        #[test]
386        fn advance_with_line_return() {
387            let p = Position::BEGINNING.advance_with("\n\n\n");
388
389            assert_eq!(p.line, 3);
390            assert_eq!(p.col, 0);
391            assert_eq!(p.offset, 3);
392        }
393
394        #[test]
395        fn advance_with_mixed() {
396            let p = Position::BEGINNING.advance_with("Hello,\nworld");
397
398            assert_eq!(p.line, 1);
399            assert_eq!(p.col, 5);
400            assert_eq!(p.offset, 12);
401        }
402
403        #[test]
404        fn advance_with_empty() {
405            let p = Position::BEGINNING.advance_with("");
406            assert_eq!(p, Position::BEGINNING);
407        }
408
409        #[test]
410        fn advance_with_two_times() {
411            let p = Position::BEGINNING.advance_with("foo bar");
412            let p = p.advance_with(" baz");
413
414            assert_eq!(p.line, 0);
415            assert_eq!(p.col, 11);
416            assert_eq!(p.offset, 11);
417        }
418
419        #[test]
420        fn ord_simple() {
421            let p = Position::BEGINNING.advance_with("hello, world!");
422            let q = p.advance_with(" How are you?");
423
424            assert!(p < q);
425        }
426
427        #[test]
428        fn ord_only_cares_about_offset() {
429            // This is part of the inconsistency paragraph in the module documentation
430            let p = Position {
431                line: 10,
432                col: 20,
433                offset: 1000,
434            };
435
436            let q = Position {
437                line: 100,
438                col: 25,
439                offset: 10,
440            };
441
442            assert!(p > q);
443        }
444    }
445
446    mod span {
447        use super::*;
448
449        #[test]
450        fn of_file() {
451            let i = "hello, world";
452            let left = Span::of_file("hello, world");
453
454            let start = Position::BEGINNING;
455            let end = start.advance_with(i);
456            let right = Span { start, end };
457
458            assert_eq!(left, right);
459        }
460    }
461
462    mod spanned_str {
463        use super::*;
464
465        #[test]
466        fn input_file_simple() {
467            let sstr = SpannedStr::input_file("hello\nworld");
468
469            assert_eq!(sstr.span.start, Position::BEGINNING);
470            assert_eq!(sstr.span.end.line, 1);
471            assert_eq!(sstr.span.end.col, 5);
472        }
473
474        #[test]
475        fn span_and_content() {
476            let span = Span {
477                start: Position {
478                    line: 10,
479                    col: 0,
480                    offset: 100,
481                },
482                end: Position {
483                    line: 15,
484                    col: 10,
485                    offset: 150,
486                },
487            };
488
489            let content = "hello, world";
490
491            let sstr = SpannedStr { content, span };
492
493            assert_eq!(sstr.span(), span);
494            assert_eq!(sstr.content(), content);
495        }
496
497        #[test]
498        fn split_at_working() {
499            let input = SpannedStr::input_file("foobar");
500            let (left, right) = input.split_at(3);
501
502            assert_eq!(left.content, "foo");
503            assert_eq!(right.content, "bar");
504
505            let left_span = Span {
506                start: Position {
507                    line: 0,
508                    col: 0,
509                    offset: 0,
510                },
511                end: Position {
512                    line: 0,
513                    col: 3,
514                    offset: 3,
515                },
516            };
517
518            let right_span = Span {
519                start: Position {
520                    line: 0,
521                    col: 3,
522                    offset: 3,
523                },
524                end: Position {
525                    line: 0,
526                    col: 6,
527                    offset: 6,
528                },
529            };
530
531            assert_eq!(left.span, left_span);
532            assert_eq!(right.span, right_span);
533        }
534
535        #[test]
536        #[should_panic(expected = "byte index 15 is out of bounds of `hello, world`")]
537        fn split_at_out_of_bounds() {
538            let f = SpannedStr::input_file("hello, world");
539            f.split_at(15);
540        }
541
542        #[test]
543        #[should_panic(
544            expected = "byte index 2 is not a char boundary; it is inside \'é\' (bytes 1..3) of `Vélo`"
545        )]
546        fn split_at_non_boundary() {
547            let f = SpannedStr::input_file("Vélo");
548            f.split_at(2);
549        }
550
551        #[test]
552        fn take_while() {
553            let (left, right) = SpannedStr::input_file("foo bar").take_while(|c| c != ' ');
554
555            assert_eq!(left.content, "foo");
556            assert_eq!(right.content, " bar");
557        }
558
559        #[test]
560        fn take_while_empty_string() {
561            let input = SpannedStr::input_file("");
562            let (left, right) = input.take_while(|_| true);
563
564            assert_eq!(left, input);
565            assert_eq!(right, input);
566        }
567
568        #[test]
569        fn take_while_non_ascii() {
570            let (left, right) = SpannedStr::input_file("éêè").take_while(|c| c != 'è');
571
572            assert_eq!(left.content, "éê");
573            assert_eq!(right.content, "è");
574        }
575    }
576}