flexi_parse/
scanner.rs

1use crate::error::Error;
2use crate::error::ErrorKind;
3use crate::token::CarriageReturn;
4use crate::token::Ident;
5use crate::token::LitStrDoubleQuote;
6use crate::token::LitStrSingleQuote;
7use crate::token::NewLine;
8use crate::token::PunctKind;
9use crate::token::SingleCharPunct;
10use crate::token::Space2;
11use crate::token::Spacing;
12use crate::token::Tab;
13use crate::token::WhiteSpace;
14use crate::Entry;
15use crate::Result;
16use crate::SourceFile;
17use crate::Span;
18use crate::TokenStream;
19
20use std::sync::Arc;
21
22fn valid_ident_char(c: Option<char>) -> bool {
23    c.map_or(false, |c| c.is_alphanumeric() || c == '_')
24}
25
26#[derive(Debug)]
27struct Scanner {
28    current: usize,
29    end: usize,
30    errors: Error,
31    source: Arc<SourceFile>,
32}
33
34impl Scanner {
35    fn scan(mut self) -> (TokenStream, Option<Error>) {
36        let mut tokens = vec![];
37
38        while !self.is_at_end() {
39            match self.scan_token() {
40                Ok(Some(token)) => tokens.push(token),
41                Ok(None) => {}
42                Err(err) => {
43                    self.errors.add(err);
44                    tokens.push(Entry::Error(Span::new(0, 0, Arc::clone(&self.source))));
45                    break;
46                }
47            }
48        }
49
50        let errors = if self.errors.is_empty() {
51            None
52        } else {
53            Some(self.errors)
54        };
55
56        (TokenStream::new(tokens, Some(self.source)), errors)
57    }
58
59    fn scan_token(&mut self) -> Result<Option<Entry>> {
60        let token = match self.peek(0)? {
61            #[cfg(feature = "scan-strings")]
62            '"' => {
63                let start = self.current;
64                let mut buf = String::new();
65                self.current += 1;
66                while self.peek(0)? != '"' {
67                    buf.push(self.peek(0)?);
68                    self.current += 1;
69                }
70                self.current += 1;
71
72                let span = Span::new(start, self.current, Arc::clone(&self.source));
73
74                Entry::LitStrDoubleQuote(LitStrDoubleQuote::new(buf, span))
75            }
76            #[cfg(feature = "scan-strings")]
77            '\'' => {
78                let start = self.current;
79                let mut buf = String::new();
80                self.current += 1;
81                while self.peek(0)? != '\'' {
82                    buf.push(self.peek(0)?);
83                    self.current += 1;
84                }
85                self.current += 1;
86
87                let span = Span::new(start, self.current, Arc::clone(&self.source));
88
89                Entry::LitStrSingleQuote(LitStrSingleQuote::new(buf, span))
90            }
91            c if PunctKind::try_from(c).is_ok() => {
92                let kind = c.try_into().unwrap();
93                let span = Span::new(self.current, self.current + 1, Arc::clone(&self.source));
94                self.current += 1;
95                let spacing = if self.peek(0).is_ok_and(|c| PunctKind::try_from(c).is_ok()) {
96                    Spacing::Joint
97                } else {
98                    Spacing::Alone
99                };
100
101                Entry::Punct(SingleCharPunct {
102                    kind,
103                    spacing,
104                    span,
105                })
106            }
107            c if c.is_alphanumeric() || c == '_' => {
108                let start = self.current;
109                while valid_ident_char(self.peek(0).ok()) {
110                    self.current += 1;
111                }
112                let string = self.source.contents[start..self.current].to_string();
113                let span = Span::new(start, self.current, Arc::clone(&self.source));
114
115                Entry::Ident(Ident { string, span })
116            }
117            ' ' if self.peek(1).is_ok_and(|c| c == ' ') => {
118                self.current += 2;
119                Entry::WhiteSpace(WhiteSpace::Space2(Space2 {
120                    span: Span::new(self.current - 2, self.current, Arc::clone(&self.source)),
121                }))
122            }
123            ' ' => {
124                self.current += 1;
125                return Ok(None);
126            }
127            '\t' => {
128                let span = Span::new(self.current, self.current + 1, Arc::clone(&self.source));
129                self.current += 1;
130                Entry::WhiteSpace(WhiteSpace::Tab(Tab { span }))
131            }
132            '\n' => {
133                let span = Span::new(self.current, self.current + 1, Arc::clone(&self.source));
134                self.current += 1;
135                Entry::WhiteSpace(WhiteSpace::NewLine(NewLine { span }))
136            }
137            '\u{000D}' => {
138                let span = Span::new(self.current, self.current + 1, Arc::clone(&self.source));
139                self.current += 1;
140                Entry::WhiteSpace(WhiteSpace::CarriageReturn(CarriageReturn { span }))
141            }
142            _ => {
143                self.current += 1;
144                return Err(Error::new(
145                    Arc::clone(&self.source),
146                    ErrorKind::UnknownCharacter(Span::new(
147                        self.current,
148                        self.current + 1,
149                        Arc::clone(&self.source),
150                    )),
151                ));
152            }
153        };
154
155        Ok(Some(token))
156    }
157
158    fn peek(&mut self, offset: usize) -> Result<char> {
159        if self.current + offset >= self.source.contents.len() {
160            Err(Error::new(
161                Arc::clone(&self.source),
162                ErrorKind::EndOfFile(self.source.contents.len()),
163            ))
164        } else {
165            Ok(
166                self.source.contents[self.current + offset..=self.current + offset]
167                    .chars()
168                    .next()
169                    .unwrap(),
170            )
171        }
172    }
173
174    fn is_at_end(&mut self) -> bool {
175        self.current >= self.end
176    }
177}
178
179pub(crate) fn scan(
180    source: Arc<SourceFile>,
181    start: usize,
182    end: Option<usize>,
183) -> (TokenStream, Option<Error>) {
184    let (tokens, errors) = Scanner {
185        current: start,
186        end: end.unwrap_or(source.contents.len()),
187        errors: Error::empty(),
188        source,
189    }
190    .scan();
191    (tokens, errors)
192}