1use std::{fmt::Display, iter};
2
3use lalrpop_util::lexer::Token;
4use thiserror::Error;
5
6#[derive(Debug, Clone, PartialEq)]
7pub struct Location {
8    pub line: usize,
9    pub column: usize,
10    pub absolute: usize,
11}
12
13#[derive(Debug, Default)]
14pub struct Span {
15    pub start: Option<Location>,
16    pub end: Option<Location>,
17}
18
19impl Display for Span {
20    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
21        match (&self.start, &self.end) {
22            (Some(start), Some(end)) if start == end => {
23                write!(f, "[{}:{}]", start.line, start.column)
24            }
25            (Some(location), None) | (None, Some(location)) => {
26                write!(f, "[{}:{}]", location.line, location.column)
27            }
28            (Some(start), Some(end)) => write!(
29                f,
30                "[{}:{}]->[{}:{}]",
31                start.line, start.column, end.line, end.column
32            ),
33            (None, None) => write!(f, "?",),
34        }
35    }
36}
37
38impl Span {
39    fn single_location(location: Location) -> Span {
40        Span {
41            start: Some(location.clone()),
42            end: Some(location),
43        }
44    }
45}
46
47#[derive(Error, Debug)]
48#[error("Error parsing: {msg} at {span}")]
49pub struct ParseError {
50    pub msg: String,
51    pub expected: Vec<String>,
52    pub span: Span,
53}
54
55impl ParseError {
56    pub(super) fn from_lalrpop(
57        src_str: &str,
58        err: lalrpop_util::ParseError<usize, Token<'_>, &str>,
59    ) -> Self {
60        use lalrpop_util::ParseError::*;
61
62        match err {
63            InvalidToken { location } => ParseError {
64                span: byte_pos_to_src_location(src_str, location)
65                    .map(Span::single_location)
66                    .unwrap_or_default(),
67                expected: Vec::new(),
68                msg: "invalid token".into(),
69            },
70            UnrecognizedEof { location, expected } => ParseError {
71                msg: "unrecognized eof".into(),
72                span: byte_pos_to_src_location(src_str, location)
73                    .map(Span::single_location)
74                    .unwrap_or_default(),
75                expected,
76            },
77            UnrecognizedToken {
78                token: (start, token, end),
79                expected,
80            } => ParseError {
81                msg: format!("unrecognized token: '{}'", token),
82                span: Span {
83                    start: byte_pos_to_src_location(src_str, start),
84                    end: byte_pos_to_src_location(src_str, end),
85                },
86                expected,
87            },
88            ExtraToken {
89                token: (start, token, end),
90            } => ParseError {
91                msg: format!("extra token: '{}'", token),
92                span: Span {
93                    start: byte_pos_to_src_location(src_str, start),
94                    end: byte_pos_to_src_location(src_str, end),
95                },
96                expected: Vec::new(),
97            },
98            User { error } => ParseError {
99                msg: error.into(),
100                expected: Vec::new(),
101                span: Span::default(),
102            },
103        }
104    }
105}
106
107fn byte_pos_to_src_location(src_str: &str, byte_pos: usize) -> Option<Location> {
110    let src_bytes = src_str.as_bytes();
111    let total_len = src_bytes.len();
112
113    let line_indices: Vec<usize> = {
114        let input_indices = src_bytes
115            .iter()
116            .enumerate()
117            .filter(|&(_, b)| *b == b'\n')
118            .map(|(i, _)| i + 1); iter::once(0).chain(input_indices).collect()
121    };
122
123    if byte_pos <= total_len {
124        let num_lines = line_indices.len();
125
126        let line_index = (0..num_lines)
127            .find(|&i| line_indices[i] > byte_pos)
128            .map(|i| i - 1)
129            .unwrap_or(num_lines - 1);
130
131        let line_byte_pos = line_indices[line_index];
132        Some(Location {
133            line: line_index,
134            column: byte_pos - line_byte_pos,
135            absolute: byte_pos,
136        })
137    } else {
138        None
139    }
140}