1use crate::error::Error;
2use crate::error::ErrorKind;
3use crate::token::CarriageReturn;
4use crate::token::Ident;
5use crate::token::LitStrDoubleQuote;
6use crate::token::LitStrSingleQuote;
7use crate::token::NewLine;
8use crate::token::PunctKind;
9use crate::token::SingleCharPunct;
10use crate::token::Space2;
11use crate::token::Spacing;
12use crate::token::Tab;
13use crate::token::WhiteSpace;
14use crate::Entry;
15use crate::Result;
16use crate::SourceFile;
17use crate::Span;
18use crate::TokenStream;
19
20use std::sync::Arc;
21
22fn valid_ident_char(c: Option<char>) -> bool {
23 c.map_or(false, |c| c.is_alphanumeric() || c == '_')
24}
25
26#[derive(Debug)]
27struct Scanner {
28 current: usize,
29 end: usize,
30 errors: Error,
31 source: Arc<SourceFile>,
32}
33
34impl Scanner {
35 fn scan(mut self) -> (TokenStream, Option<Error>) {
36 let mut tokens = vec![];
37
38 while !self.is_at_end() {
39 match self.scan_token() {
40 Ok(Some(token)) => tokens.push(token),
41 Ok(None) => {}
42 Err(err) => {
43 self.errors.add(err);
44 tokens.push(Entry::Error(Span::new(0, 0, Arc::clone(&self.source))));
45 break;
46 }
47 }
48 }
49
50 let errors = if self.errors.is_empty() {
51 None
52 } else {
53 Some(self.errors)
54 };
55
56 (TokenStream::new(tokens, Some(self.source)), errors)
57 }
58
59 fn scan_token(&mut self) -> Result<Option<Entry>> {
60 let token = match self.peek(0)? {
61 #[cfg(feature = "scan-strings")]
62 '"' => {
63 let start = self.current;
64 let mut buf = String::new();
65 self.current += 1;
66 while self.peek(0)? != '"' {
67 buf.push(self.peek(0)?);
68 self.current += 1;
69 }
70 self.current += 1;
71
72 let span = Span::new(start, self.current, Arc::clone(&self.source));
73
74 Entry::LitStrDoubleQuote(LitStrDoubleQuote::new(buf, span))
75 }
76 #[cfg(feature = "scan-strings")]
77 '\'' => {
78 let start = self.current;
79 let mut buf = String::new();
80 self.current += 1;
81 while self.peek(0)? != '\'' {
82 buf.push(self.peek(0)?);
83 self.current += 1;
84 }
85 self.current += 1;
86
87 let span = Span::new(start, self.current, Arc::clone(&self.source));
88
89 Entry::LitStrSingleQuote(LitStrSingleQuote::new(buf, span))
90 }
91 c if PunctKind::try_from(c).is_ok() => {
92 let kind = c.try_into().unwrap();
93 let span = Span::new(self.current, self.current + 1, Arc::clone(&self.source));
94 self.current += 1;
95 let spacing = if self.peek(0).is_ok_and(|c| PunctKind::try_from(c).is_ok()) {
96 Spacing::Joint
97 } else {
98 Spacing::Alone
99 };
100
101 Entry::Punct(SingleCharPunct {
102 kind,
103 spacing,
104 span,
105 })
106 }
107 c if c.is_alphanumeric() || c == '_' => {
108 let start = self.current;
109 while valid_ident_char(self.peek(0).ok()) {
110 self.current += 1;
111 }
112 let string = self.source.contents[start..self.current].to_string();
113 let span = Span::new(start, self.current, Arc::clone(&self.source));
114
115 Entry::Ident(Ident { string, span })
116 }
117 ' ' if self.peek(1).is_ok_and(|c| c == ' ') => {
118 self.current += 2;
119 Entry::WhiteSpace(WhiteSpace::Space2(Space2 {
120 span: Span::new(self.current - 2, self.current, Arc::clone(&self.source)),
121 }))
122 }
123 ' ' => {
124 self.current += 1;
125 return Ok(None);
126 }
127 '\t' => {
128 let span = Span::new(self.current, self.current + 1, Arc::clone(&self.source));
129 self.current += 1;
130 Entry::WhiteSpace(WhiteSpace::Tab(Tab { span }))
131 }
132 '\n' => {
133 let span = Span::new(self.current, self.current + 1, Arc::clone(&self.source));
134 self.current += 1;
135 Entry::WhiteSpace(WhiteSpace::NewLine(NewLine { span }))
136 }
137 '\u{000D}' => {
138 let span = Span::new(self.current, self.current + 1, Arc::clone(&self.source));
139 self.current += 1;
140 Entry::WhiteSpace(WhiteSpace::CarriageReturn(CarriageReturn { span }))
141 }
142 _ => {
143 self.current += 1;
144 return Err(Error::new(
145 Arc::clone(&self.source),
146 ErrorKind::UnknownCharacter(Span::new(
147 self.current,
148 self.current + 1,
149 Arc::clone(&self.source),
150 )),
151 ));
152 }
153 };
154
155 Ok(Some(token))
156 }
157
158 fn peek(&mut self, offset: usize) -> Result<char> {
159 if self.current + offset >= self.source.contents.len() {
160 Err(Error::new(
161 Arc::clone(&self.source),
162 ErrorKind::EndOfFile(self.source.contents.len()),
163 ))
164 } else {
165 Ok(
166 self.source.contents[self.current + offset..=self.current + offset]
167 .chars()
168 .next()
169 .unwrap(),
170 )
171 }
172 }
173
174 fn is_at_end(&mut self) -> bool {
175 self.current >= self.end
176 }
177}
178
179pub(crate) fn scan(
180 source: Arc<SourceFile>,
181 start: usize,
182 end: Option<usize>,
183) -> (TokenStream, Option<Error>) {
184 let (tokens, errors) = Scanner {
185 current: start,
186 end: end.unwrap_or(source.contents.len()),
187 errors: Error::empty(),
188 source,
189 }
190 .scan();
191 (tokens, errors)
192}