textparse/
parse.rs

1use crate::{Position, Span};
2use std::fmt::Write;
3use std::{
4    any::{Any, TypeId},
5    borrow::{Borrow, Cow},
6    cmp::Ordering,
7    collections::{BTreeMap, HashMap},
8    error::Error,
9    path::{Path, PathBuf},
10};
11
12pub use textparse_derive::Parse;
13
14/// This trait allows for parsing an item from text.
15pub trait Parse: 'static + Span + Clone + Sized {
16    /// Parses an item.
17    ///
18    /// `None` means parse failure.
19    fn parse(parser: &mut Parser) -> Option<Self>;
20
21    /// Name of the item to be parsed.
22    fn name() -> Option<fn() -> String> {
23        None
24    }
25}
26
27impl<T: Parse> Parse for Box<T> {
28    fn parse(parser: &mut Parser) -> Option<Self> {
29        parser.parse().map(Box::new)
30    }
31
32    fn name() -> Option<fn() -> String> {
33        T::name()
34    }
35}
36
37impl<T0: Parse, T1: Parse> Parse for (T0, T1) {
38    fn parse(parser: &mut Parser) -> Option<Self> {
39        Some((parser.parse()?, parser.parse()?))
40    }
41}
42
43impl<T0: Parse, T1: Parse, T2: Parse> Parse for (T0, T1, T2) {
44    fn parse(parser: &mut Parser) -> Option<Self> {
45        Some((parser.parse()?, parser.parse()?, parser.parse()?))
46    }
47}
48
49impl<T0: Parse, T1: Parse, T2: Parse, T3: Parse> Parse for (T0, T1, T2, T3) {
50    fn parse(parser: &mut Parser) -> Option<Self> {
51        Some((
52            parser.parse()?,
53            parser.parse()?,
54            parser.parse()?,
55            parser.parse()?,
56        ))
57    }
58}
59
60impl<T0: Parse, T1: Parse, T2: Parse, T3: Parse, T4: Parse> Parse for (T0, T1, T2, T3, T4) {
61    fn parse(parser: &mut Parser) -> Option<Self> {
62        Some((
63            parser.parse()?,
64            parser.parse()?,
65            parser.parse()?,
66            parser.parse()?,
67            parser.parse()?,
68        ))
69    }
70}
71
72impl<T0: Parse, T1: Parse, T2: Parse, T3: Parse, T4: Parse, T5: Parse> Parse
73    for (T0, T1, T2, T3, T4, T5)
74{
75    fn parse(parser: &mut Parser) -> Option<Self> {
76        Some((
77            parser.parse()?,
78            parser.parse()?,
79            parser.parse()?,
80            parser.parse()?,
81            parser.parse()?,
82            parser.parse()?,
83        ))
84    }
85}
86
87/// Parser.
88#[derive(Debug)]
89pub struct Parser<'a> {
90    text: Cow<'a, str>,
91    position: Position,
92    level: usize,
93    expected: Expected,
94    memo: HashMap<TypeId, BTreeMap<Position, Option<Box<dyn Any>>>>,
95}
96
97impl<'a> Parser<'a> {
98    /// Makes a new [`Parser`] instance.
99    pub fn new(text: &'a str) -> Self {
100        Self {
101            text: Cow::Borrowed(text),
102            position: Position::default(),
103            level: 0,
104            expected: Expected::default(),
105            memo: HashMap::default(),
106        }
107    }
108
109    /// Returns the current position.
110    pub fn current_position(&self) -> Position {
111        self.position
112    }
113
114    /// Returns `true` if the parser has reached EOS, otherwise `false`.
115    pub fn is_eos(&self) -> bool {
116        self.text.len() == self.position.get()
117    }
118
119    /// Returns the full text.
120    pub fn text(&self) -> &str {
121        self.text.borrow()
122    }
123
124    /// Returns the remaining, un-parsed text.
125    pub fn remaining_text(&self) -> &str {
126        &self.text[self.position.get()..]
127    }
128
129    /// Peeks the next character.
130    pub fn peek_char(&self) -> Option<char> {
131        self.remaining_text().chars().next()
132    }
133
134    /// Reads the next character.
135    pub fn read_char(&mut self) -> Option<char> {
136        if let Some(c) = self.peek_char() {
137            self.position = Position::new(self.position.get() + c.len_utf8());
138            Some(c)
139        } else {
140            None
141        }
142    }
143
144    /// Parses an item.
145    pub fn parse<T: Parse>(&mut self) -> Option<T> {
146        if let Some(result) = self.get_parse_result::<T>(self.position) {
147            let result = result.cloned();
148            if let Some(t) = &result {
149                self.position = t.end_position();
150            }
151            return result;
152        }
153
154        let start = self.position;
155
156        let has_name = if let Some(name) = T::name() {
157            self.update_expected::<T>(name);
158            true
159        } else {
160            false
161        };
162        self.set_parse_result_if_absent::<T>(start, None);
163        if has_name {
164            self.level += 1;
165        }
166        let result = T::parse(self);
167        if has_name {
168            self.level -= 1;
169        }
170
171        self.set_parse_result(start, result.clone());
172
173        if result.is_none() {
174            self.position = start;
175        }
176        result
177    }
178
179    /// Returns parsed items of which type is `T`.
180    pub fn parsed_items<T: Parse>(&self) -> impl Iterator<Item = (Position, &T)> {
181        self.memo
182            .get(&TypeId::of::<T>())
183            .into_iter()
184            .flat_map(|map| {
185                map.iter().filter_map(|(position, result)| {
186                    result
187                        .as_ref()
188                        .map(|item| (*position, item.downcast_ref::<T>().expect("unreachable")))
189                })
190            })
191    }
192
193    /// Converts [`Parser`] into [`ParseError`].
194    ///
195    /// You should call this method only when `Parser::parse()` returned `None`.
196    pub fn into_parse_error(self) -> ParseError {
197        ParseError::new(self.into_owned())
198    }
199
200    fn into_owned(self) -> Parser<'static> {
201        Parser {
202            text: Cow::Owned(self.text.into_owned()),
203            position: self.position,
204            level: self.level,
205            expected: self.expected,
206            memo: self.memo,
207        }
208    }
209
210    fn update_expected<T: Parse>(&mut self, name: fn() -> String) {
211        match (
212            self.expected.position.cmp(&self.position),
213            self.expected.level.cmp(&self.level),
214        ) {
215            (Ordering::Equal, Ordering::Equal) => {
216                self.expected.add_item::<T>(name);
217            }
218            (Ordering::Less, _) | (Ordering::Equal, Ordering::Greater) => {
219                self.expected = Expected::new::<T>(self.position, self.level, name);
220            }
221            _ => {}
222        }
223    }
224
225    fn set_parse_result<T: Parse>(&mut self, position: Position, result: Option<T>) {
226        self.memo
227            .entry(TypeId::of::<T>())
228            .or_default()
229            .insert(position, result.map(|t| Box::new(t) as Box<dyn Any>));
230    }
231
232    fn set_parse_result_if_absent<T: Parse>(&mut self, position: Position, result: Option<T>) {
233        self.memo
234            .entry(TypeId::of::<T>())
235            .or_default()
236            .entry(position)
237            .or_insert_with(|| result.map(|t| Box::new(t) as Box<dyn Any>));
238    }
239
240    fn get_parse_result<T: Parse>(&self, position: Position) -> Option<Option<&T>> {
241        self.memo
242            .get(&TypeId::of::<T>())
243            .and_then(|map| map.get(&position))
244            .map(|result| {
245                result
246                    .as_ref()
247                    .map(|item| item.downcast_ref::<T>().expect("unreachable"))
248            })
249    }
250}
251
252#[derive(Debug, Default)]
253struct Expected {
254    position: Position,
255    level: usize,
256    expected_items: HashMap<TypeId, fn() -> String>,
257}
258
259impl Expected {
260    fn new<T: Parse>(position: Position, level: usize, name: fn() -> String) -> Self {
261        let mut this = Self {
262            position,
263            level,
264            expected_items: Default::default(),
265        };
266        this.add_item::<T>(name);
267        this
268    }
269
270    fn add_item<T: Parse>(&mut self, name: fn() -> String) {
271        self.expected_items.insert(TypeId::of::<T>(), name);
272    }
273
274    fn items(&self) -> impl '_ + Iterator<Item = String> {
275        self.expected_items.values().map(|f| f())
276    }
277}
278
279/// Parse error.
280pub struct ParseError {
281    parser: Parser<'static>,
282    file_path: PathBuf,
283}
284
285impl ParseError {
286    fn new(parser: Parser<'static>) -> Self {
287        Self {
288            parser,
289            file_path: PathBuf::from("<UNKNOWN>"),
290        }
291    }
292
293    /// Sets the file path of the parse target text.
294    ///
295    /// The default value is `<UNKNOWN>`.
296    pub fn file_path<P: AsRef<Path>>(mut self, file_path: P) -> Self {
297        self.file_path = file_path.as_ref().to_path_buf();
298        self
299    }
300
301    fn error_reason(&self) -> Result<String, std::fmt::Error> {
302        let mut s = String::new();
303        let mut expected_items = self.parser.expected.items().collect::<Vec<_>>();
304        expected_items.sort();
305        match expected_items.len() {
306            0 => {}
307            1 => {
308                write!(s, "expected {}", expected_items[0])?;
309            }
310            n => {
311                write!(s, "expected one of {}", expected_items[0])?;
312                for (i, item) in expected_items.iter().enumerate().skip(1) {
313                    if i + 1 == n {
314                        write!(s, ", or {item}")?;
315                    } else {
316                        write!(s, ", {item}")?;
317                    }
318                }
319            }
320        }
321        Ok(s)
322    }
323}
324
325impl Error for ParseError {}
326
327impl std::fmt::Debug for ParseError {
328    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
329        write!(f, "{self}")
330    }
331}
332
333impl std::fmt::Display for ParseError {
334    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
335        let offset = self.parser.expected.position.get();
336        let (line, column) = self
337            .parser
338            .expected
339            .position
340            .line_and_column(&self.parser.text);
341        let reason = self.error_reason()?;
342        write!(f, "{reason}")?;
343
344        if offset == self.parser.text.len() {
345            write!(f, ", reached EOS")?;
346        }
347        writeln!(f)?;
348
349        writeln!(
350            f,
351            "  --> {}:{line}:{column}",
352            self.file_path.to_string_lossy()
353        )?;
354
355        let line_len = format!("{line}").len();
356        writeln!(f, "{:line_len$} |", ' ')?;
357        writeln!(
358            f,
359            "{line} | {}",
360            self.parser.text[offset + 1 - column..]
361                .lines()
362                .next()
363                .unwrap_or("")
364        )?;
365        writeln!(f, "{:line_len$} | {:>column$} {reason}", ' ', '^')?;
366        Ok(())
367    }
368}