lisbeth_error/span.rs
1//! Some span data structures
2//!
3//! # Description
4//!
5//! This module contains the [`Span`] and [`SpannedStr`] data structures. The
6//! difference between them is that [`SpannedStr`] contains the inner text while
7//! [`Span`] contains only its position. Consequently, [`SpannedStr`] is used
8//! during the lexing and parsing steps, but the AST generated *should* contain
9//! only [`Span`].
10//!
11//! # A consistency note
12//!
13//! Inconsistent results may occur when [`Span`] and [`SpannedStr`] coming from
14//! different places are used toghether. This is fine for most use-cases, in
15//! which a single process in invoked for a single input unit.
16
17use std::cmp::{Ord, Ordering};
18
19/// Represents a position in the input data.
20///
21/// Positions are 0-indexed, meaning that the first character of each line has
22/// 0 as column number. The same goes for the line number.
23#[derive(Copy, Clone, Debug, Eq, PartialEq)]
24pub struct Position {
25 line: u32,
26 col: u32,
27 offset: u32,
28}
29
30impl Position {
31 const BEGINNING: Position = Position {
32 line: 0,
33 col: 0,
34 offset: 0,
35 };
36
37 fn advance_with(self, s: &str) -> Position {
38 let Position {
39 mut line,
40 mut col,
41 mut offset,
42 } = self;
43
44 s.chars().for_each(|c| {
45 if c == '\n' {
46 line += 1;
47 col = 0
48 } else {
49 col += 1;
50 }
51 });
52
53 offset += s.len() as u32;
54
55 Position { line, col, offset }
56 }
57
58 /// Returns the position's line.
59 #[inline]
60 pub const fn line(self) -> u32 {
61 self.line
62 }
63
64 /// Returns the position's column.
65 #[inline]
66 pub const fn col(self) -> u32 {
67 self.col
68 }
69
70 /// Returns the position's offset from the beginning of the file.
71 #[inline]
72 pub const fn offset(self) -> u32 {
73 self.offset
74 }
75}
76
77// Note: when the following documentation is modified, remember to update the
78// doc for Position::Ord accordingly.
79/// # Warning
80///
81/// Positions can be compared toghether only if they come from the same input
82/// unit. If they do not, then inconsistencies may occur.
83///
84/// # Panics
85///
86/// In debug mode, this function may panic if the two positions are not from the
87/// same input unit. In release mode, this function does not panic.
88impl PartialOrd for Position {
89 fn partial_cmp(&self, other: &Position) -> Option<Ordering> {
90 Some(self.cmp(other))
91 }
92}
93
94// Note: when the following documentation is modified, remember to update the
95// doc for Position::PartialOrd accordingly.
96/// # Warning
97///
98/// Positions can be compared toghether only if they come from the same input
99/// unit. If they do not, then inconsistencies may occur.
100///
101/// # Panics
102///
103/// In debug mode, this function may panic if the two positions are not from the
104/// same input unit. In release mode, this function does not panic.
105impl Ord for Position {
106 #[cfg(debug)]
107 fn cmp(&self, other: &Position) -> Ordering {
108 let offset_provided = self.offset.cmp(&other.offset);
109
110 let lc_provided = match self.line.cmp(&other.line) {
111 Ordering::Equal => self.col.cmp(&other.col),
112 any => any,
113 };
114
115 assert!(
116 offset_provided != lc_provided,
117 "Attempt to perform an inconsistent span comparaison",
118 );
119
120 offset_provided
121 }
122
123 #[cfg(not(debug))]
124 fn cmp(&self, other: &Position) -> Ordering {
125 self.offset.cmp(&other.offset)
126 }
127}
128
129/// Represents the position of a piece of code in the input file.
130///
131/// A `Span` is represented as the start and end position. Every character that
132/// is between these two position is considered as *inside* the span.
133#[derive(Copy, Clone, Debug, PartialEq)]
134pub struct Span {
135 start: Position,
136 end: Position,
137}
138
139impl Span {
140 /// Returns the span's starting position.
141 #[inline]
142 pub const fn start(self) -> Position {
143 self.start
144 }
145
146 /// Returns the span's ending position.
147 ///
148 /// The position ends on the next non-spanned part:
149 ///
150 /// ```rust
151 /// use lisbeth_error::span::SpannedStr;
152 ///
153 /// let s = SpannedStr::input_file("hello");
154 /// assert_eq!(s.span().end().col(), 5);
155 /// ```
156 #[inline]
157 pub const fn end(self) -> Position {
158 self.end
159 }
160
161 #[inline]
162 const fn split_with(self, mid: Position) -> (Span, Span) {
163 let Span { start, end } = self;
164
165 let left = Span { start, end: mid };
166 let right = Span { start: mid, end };
167
168 (left, right)
169 }
170
171 pub(crate) fn of_file(input: &str) -> Span {
172 let start = Position::BEGINNING;
173 let end = start.advance_with(input);
174
175 Span { start, end }
176 }
177}
178
179/// Represents a portion of input file.
180///
181/// This is represented the same way as [`Span`], but with an additionnal
182/// content field.
183///
184/// It is initially created with the [`input_file`] function, and can then be
185/// splitted at desired index. Its content and span can be accessed with the
186/// [`content`] and [`span`] methods.
187///
188/// # Example
189///
190/// The following code shows how to extract a sequence of numbers separated by
191/// non-digit characters.
192///
193/// ```rust
194/// use lisbeth_error::span::{Span, SpannedStr};
195///
196/// #[derive(Debug)]
197/// struct Number(u32, Span);
198///
199/// // Parses a number from input, if any failure occurs, returns None
200/// fn extract_number<'a>(input: SpannedStr<'a>) -> (Number, SpannedStr<'a>) {
201/// let (matched, tail) = input.take_while(char::is_numeric);
202///
203/// let value = matched.content().parse().unwrap();
204/// let number = Number(value, matched.span());
205/// (number, tail)
206/// }
207///
208/// let input = SpannedStr::input_file("42 or nothing");
209/// let (number, tail) = extract_number(input);
210///
211/// assert_eq!(number.0, 42);
212/// assert_eq!(tail.content(), " or nothing");
213/// ```
214///
215/// [`input_file`]: SpannedStr::input_file
216/// [`content`]: SpannedStr::content
217/// [`span`]: SpannedStr::span
218#[derive(Copy, Clone, Debug, PartialEq)]
219pub struct SpannedStr<'a> {
220 span: Span,
221 content: &'a str,
222}
223
224impl<'a> SpannedStr<'a> {
225 /// Creates a new [`SpannedStr`] from an input file.
226 ///
227 /// This returned spanned string can then be splitted at various places
228 /// during the parsing step.
229 ///
230 /// # Example
231 ///
232 /// ```rust
233 /// use lisbeth_error::span::SpannedStr;
234 ///
235 /// let file_content = "fn main() { println!(\"Hello, world!\"); }";
236 ///
237 /// let whole_file = SpannedStr::input_file(file_content);
238 /// ```
239 pub fn input_file(content: &'a str) -> SpannedStr<'a> {
240 let span = Span::of_file(content);
241
242 SpannedStr { span, content }
243 }
244
245 // Note: span must represent the same source as content, otherwise
246 // inconsistent results may occur.
247 //
248 // In debug mode, it is ensured that:
249 // - span.start == Position::BEGINNING,
250 // - span.end.offset == content.len().
251 pub(crate) fn assemble(content: &'a str, span: Span) -> SpannedStr<'a> {
252 debug_assert_eq!(
253 span.start,
254 Position::BEGINNING,
255 "Attempt to create a SpannedStr that does not start at the beginning of the file",
256 );
257 debug_assert_eq!(
258 span.end.offset as usize,
259 content.len(),
260 "Attempt to create a SpannedStr with an incorrect length",
261 );
262
263 SpannedStr { content, span }
264 }
265
266 /// Returns the contained [`Span`].
267 ///
268 /// The span contains the position at which the content is located in the
269 /// input data.
270 ///
271 /// # Example
272 ///
273 /// ```rust
274 /// use lisbeth_error::span::SpannedStr;
275 ///
276 /// let a = SpannedStr::input_file("foo bar");
277 /// let b = SpannedStr::input_file("baz qux");
278 ///
279 /// // a and b have the same length and the same starting point, so they
280 /// // have the same span.
281 /// assert_eq!(a.span(), b.span());
282 /// ```
283 pub const fn span(self) -> Span {
284 self.span
285 }
286
287 /// Returns the span content.
288 ///
289 /// # Example
290 ///
291 /// ```rust
292 /// use lisbeth_error::span:: SpannedStr;
293 ///
294 /// let a = SpannedStr::input_file("hello");
295 /// assert_eq!(a.content(), "hello");
296 /// ```
297 pub const fn content(self) -> &'a str {
298 self.content
299 }
300
301 /// Splits the spanned string at a given byte index.
302 ///
303 /// This method works the same way as [str::split_at], but updates the span
304 /// so that it is still correct.
305 ///
306 /// # Panics
307 ///
308 /// This method panics when one of the condition listed in [`str::split_at`]
309 /// is met.
310 ///
311 /// # Example
312 ///
313 /// ```rust
314 /// use lisbeth_error::span::SpannedStr;
315 ///
316 /// let input = SpannedStr::input_file("helloworld");
317 /// let (left, right) = input.split_at(5);
318 ///
319 /// assert_eq!(left.content(), "hello");
320 /// assert_eq!(right.content(), "world");
321 /// ```
322 pub fn split_at(self, idx: usize) -> (SpannedStr<'a>, SpannedStr<'a>) {
323 let (left_content, right_content) = self.content.split_at(idx);
324
325 let mid = self.span.start.advance_with(left_content);
326 let (left_span, right_span) = self.span.split_with(mid);
327
328 let left_sstr = SpannedStr {
329 span: left_span,
330 content: left_content,
331 };
332
333 let right_sstr = SpannedStr {
334 span: right_span,
335 content: right_content,
336 };
337
338 (left_sstr, right_sstr)
339 }
340
341 /// Returns the longest prefix of input that match a given a condition.
342 ///
343 /// # Example
344 ///
345 /// ```rust
346 /// use lisbeth_error::span::SpannedStr;
347 ///
348 /// let i = SpannedStr::input_file("42 101");
349 /// let (number, tail) = i.take_while(char::is_numeric);
350 ///
351 /// assert_eq!(number.content(), "42");
352 /// assert_eq!(tail.content(), " 101");
353 /// ```
354 pub fn take_while<F>(self, mut f: F) -> (SpannedStr<'a>, SpannedStr<'a>)
355 where
356 F: FnMut(char) -> bool,
357 {
358 let idx = self
359 .content
360 .char_indices()
361 .find(|(_, chr)| !f(*chr))
362 .map(|(idx, _)| idx)
363 .unwrap_or_else(|| self.content.len());
364
365 self.split_at(idx)
366 }
367}
368
369#[cfg(test)]
370mod tests {
371 use super::*;
372
373 mod position {
374 use super::*;
375
376 #[test]
377 fn advance_with_no_line_return() {
378 let p = Position::BEGINNING.advance_with("hello, world");
379
380 assert_eq!(p.line, 0);
381 assert_eq!(p.col, 12);
382 assert_eq!(p.offset, 12);
383 }
384
385 #[test]
386 fn advance_with_line_return() {
387 let p = Position::BEGINNING.advance_with("\n\n\n");
388
389 assert_eq!(p.line, 3);
390 assert_eq!(p.col, 0);
391 assert_eq!(p.offset, 3);
392 }
393
394 #[test]
395 fn advance_with_mixed() {
396 let p = Position::BEGINNING.advance_with("Hello,\nworld");
397
398 assert_eq!(p.line, 1);
399 assert_eq!(p.col, 5);
400 assert_eq!(p.offset, 12);
401 }
402
403 #[test]
404 fn advance_with_empty() {
405 let p = Position::BEGINNING.advance_with("");
406 assert_eq!(p, Position::BEGINNING);
407 }
408
409 #[test]
410 fn advance_with_two_times() {
411 let p = Position::BEGINNING.advance_with("foo bar");
412 let p = p.advance_with(" baz");
413
414 assert_eq!(p.line, 0);
415 assert_eq!(p.col, 11);
416 assert_eq!(p.offset, 11);
417 }
418
419 #[test]
420 fn ord_simple() {
421 let p = Position::BEGINNING.advance_with("hello, world!");
422 let q = p.advance_with(" How are you?");
423
424 assert!(p < q);
425 }
426
427 #[test]
428 fn ord_only_cares_about_offset() {
429 // This is part of the inconsistency paragraph in the module documentation
430 let p = Position {
431 line: 10,
432 col: 20,
433 offset: 1000,
434 };
435
436 let q = Position {
437 line: 100,
438 col: 25,
439 offset: 10,
440 };
441
442 assert!(p > q);
443 }
444 }
445
446 mod span {
447 use super::*;
448
449 #[test]
450 fn of_file() {
451 let i = "hello, world";
452 let left = Span::of_file("hello, world");
453
454 let start = Position::BEGINNING;
455 let end = start.advance_with(i);
456 let right = Span { start, end };
457
458 assert_eq!(left, right);
459 }
460 }
461
462 mod spanned_str {
463 use super::*;
464
465 #[test]
466 fn input_file_simple() {
467 let sstr = SpannedStr::input_file("hello\nworld");
468
469 assert_eq!(sstr.span.start, Position::BEGINNING);
470 assert_eq!(sstr.span.end.line, 1);
471 assert_eq!(sstr.span.end.col, 5);
472 }
473
474 #[test]
475 fn span_and_content() {
476 let span = Span {
477 start: Position {
478 line: 10,
479 col: 0,
480 offset: 100,
481 },
482 end: Position {
483 line: 15,
484 col: 10,
485 offset: 150,
486 },
487 };
488
489 let content = "hello, world";
490
491 let sstr = SpannedStr { content, span };
492
493 assert_eq!(sstr.span(), span);
494 assert_eq!(sstr.content(), content);
495 }
496
497 #[test]
498 fn split_at_working() {
499 let input = SpannedStr::input_file("foobar");
500 let (left, right) = input.split_at(3);
501
502 assert_eq!(left.content, "foo");
503 assert_eq!(right.content, "bar");
504
505 let left_span = Span {
506 start: Position {
507 line: 0,
508 col: 0,
509 offset: 0,
510 },
511 end: Position {
512 line: 0,
513 col: 3,
514 offset: 3,
515 },
516 };
517
518 let right_span = Span {
519 start: Position {
520 line: 0,
521 col: 3,
522 offset: 3,
523 },
524 end: Position {
525 line: 0,
526 col: 6,
527 offset: 6,
528 },
529 };
530
531 assert_eq!(left.span, left_span);
532 assert_eq!(right.span, right_span);
533 }
534
535 #[test]
536 #[should_panic(expected = "byte index 15 is out of bounds of `hello, world`")]
537 fn split_at_out_of_bounds() {
538 let f = SpannedStr::input_file("hello, world");
539 f.split_at(15);
540 }
541
542 #[test]
543 #[should_panic(
544 expected = "byte index 2 is not a char boundary; it is inside \'é\' (bytes 1..3) of `Vélo`"
545 )]
546 fn split_at_non_boundary() {
547 let f = SpannedStr::input_file("Vélo");
548 f.split_at(2);
549 }
550
551 #[test]
552 fn take_while() {
553 let (left, right) = SpannedStr::input_file("foo bar").take_while(|c| c != ' ');
554
555 assert_eq!(left.content, "foo");
556 assert_eq!(right.content, " bar");
557 }
558
559 #[test]
560 fn take_while_empty_string() {
561 let input = SpannedStr::input_file("");
562 let (left, right) = input.take_while(|_| true);
563
564 assert_eq!(left, input);
565 assert_eq!(right, input);
566 }
567
568 #[test]
569 fn take_while_non_ascii() {
570 let (left, right) = SpannedStr::input_file("éêè").take_while(|c| c != 'è');
571
572 assert_eq!(left.content, "éê");
573 assert_eq!(right.content, "è");
574 }
575 }
576}