1use b2c2_common::*;
5use std::io::{self, BufRead};
6use std::result;
7
8#[cfg(test)]
9mod test;
10
11type Result = result::Result<(usize, Vec<(usize, Token)>), SyntaxError>;
12
13pub struct Tokenizer<R> {
14 line_number: usize,
15 reader: R,
16}
17
18impl<R> Tokenizer<R> {
19 pub fn new(reader: R) -> Self {
20 Self {
21 line_number: 0,
22 reader,
23 }
24 }
25}
26
27impl<R: BufRead> Iterator for Tokenizer<R> {
28 type Item = io::Result<Result>;
29 fn next(&mut self) -> Option<Self::Item> {
30 self.line_number += 1;
31 let mut line = String::new();
32 match self.reader.read_line(&mut line) {
33 Err(error) => Some(Err(error)),
34 Ok(0) => None,
35 Ok(_) => Some(Ok(self.parse_line(&line))),
36 }
37 }
38}
39
40impl<R> Tokenizer<R> {
41 fn parse_line(&self, src: &str) -> Result {
42 let mut line = src.trim_start();
43 let mut ret: Vec<(usize, Token)> = vec![];
44 while !line.is_empty() && !is_comment(line, ret.is_empty()) {
45 let pos = src.len() - line.len() + 1;
46 match take_token(line) {
47 Some((Token::Keyword(Keyword::Mid), rest)) if !ret.is_empty() => {
48 ret.push((pos, Token::Function(Function::Mid)));
49 line = rest.trim_start();
50 }
51 Some((token, rest)) => {
52 ret.push((pos, token));
53 line = rest.trim_start();
54 }
55 None => {
56 return Err(SyntaxError::new(
57 self.line_number,
58 pos,
59 format!("不正なトークンです: {}", line),
60 ))
61 }
62 }
63 }
64 Ok((self.line_number, ret))
65 }
66}
67
68fn is_comment(line: &str, is_toplevel: bool) -> bool {
69 line.starts_with('\'')
70 || (is_toplevel
71 && take_word(line)
72 .filter(|(word, _)| "Rem".eq_ignore_ascii_case(word))
73 .is_some())
74}
75
76fn take_token(line: &str) -> Option<(Token, &str)> {
77 [
78 take_word_token,
79 take_hex_integer_token,
80 take_integer_token,
81 take_char_token,
82 take_string_token,
83 take_operator_token,
84 ]
85 .iter()
86 .find_map(|f| f(line))
87}
88
89fn take_hex_integer_token(s: &str) -> Option<(Token, &str)> {
90 let mut char_indices = s.char_indices();
91 char_indices.next().filter(|(_, ch)| *ch == '&')?;
92 char_indices
93 .next()
94 .filter(|(_, ch)| 'H'.eq_ignore_ascii_case(ch))?;
95 let mut char_indices = char_indices.peekable();
96 let &(prefix_position, _) = char_indices.peek()?;
97 let split_position = char_indices
98 .find(|(_, ch)| !ch.is_ascii_hexdigit())
99 .map_or(s.len(), |(p, _)| p);
100 let (num, rest) = s.split_at(split_position);
101 let (_prefix, num) = num.split_at(prefix_position);
102 u16::from_str_radix(num, 16)
103 .ok()
104 .map(|n| (Token::Integer(n as i16 as i32), rest))
105}
106
107fn take_integer_token(s: &str) -> Option<(Token, &str)> {
108 let split_position = s
109 .char_indices()
110 .find(|(_, ch)| !ch.is_ascii_digit())
111 .map_or(s.len(), |(p, _)| p);
112 let (number, rest) = s.split_at(split_position);
113 number
114 .parse::<i32>()
115 .ok()
116 .filter(|n| *n <= (i16::MIN as i32).abs())
117 .map(|n| (Token::Integer(n), rest))
118}
119
120fn take_char_token(s: &str) -> Option<(Token, &str)> {
121 let mut char_indices = s.char_indices();
122 char_indices.next().filter(|(_, ch)| *ch == '"')?;
123 let mut quotation = false;
124 let mut split_position = s.len();
125 let mut text: Option<char> = None;
126 for (p, ch) in char_indices {
127 if quotation {
128 if ch == '"' {
129 if text.is_some() {
130 return None;
131 }
132 quotation = false;
133 text = Some('"');
134 } else if text.is_some() {
135 split_position = p;
136 break;
137 } else {
138 return None;
139 }
140 } else if ch == '"' {
141 quotation = true;
142 } else if text.is_none() {
143 text = Some(ch);
144 } else {
145 return None;
146 }
147 }
148 if !quotation {
149 return None;
150 }
151 let ch = text.take()?;
152 let (_, rest) = s.split_at(split_position);
153 let (suffix, rest) = take_word(rest)?;
154 if !"c".eq_ignore_ascii_case(suffix) {
155 return None;
156 }
157 Some((Token::Character(ch), rest))
158}
159
160fn take_string_token(s: &str) -> Option<(Token, &str)> {
161 let mut char_indices = s.char_indices();
162 char_indices.next().filter(|(_, ch)| *ch == '"')?;
163 let mut quotation = false;
164 let mut split_position = s.len();
165 let mut text = String::new();
166 for (p, ch) in char_indices {
167 if quotation {
168 if ch == '"' {
169 quotation = false;
170 text.push('"');
171 } else {
172 split_position = p;
173 break;
174 }
175 } else if ch == '"' {
176 quotation = true;
177 } else {
178 text.push(ch);
179 }
180 }
181 if !quotation || text.chars().count() > 256 {
182 return None;
183 }
184 let (_, rest) = s.split_at(split_position);
185 Some((Token::String(text), rest))
186}
187
188fn take_operator_token(s: &str) -> Option<(Token, &str)> {
189 let mut char_indices = s.char_indices().take(5);
190 char_indices
191 .next()
192 .filter(|(_, ch)| !ch.is_ascii_alphanumeric())?;
193 char_indices
194 .chain(vec![(s.len(), '\n')])
195 .filter_map(|(p, _)| {
196 let (word, rest) = s.split_at(p);
197 Operator::parse(word).map(|token| (token, rest))
198 })
199 .last()
200}
201
202fn take_word(s: &str) -> Option<(&str, &str)> {
203 let mut char_indices = s.char_indices();
204 char_indices
205 .next()
206 .filter(|(_, head)| head.is_ascii_alphabetic())?;
207 let split_position = char_indices
208 .find(|(_, ch)| !(ch.is_ascii_alphanumeric() || *ch == '_'))
209 .map_or(s.len(), |(p, _)| p);
210 Some(s.split_at(split_position)).filter(|(word, _)| word.chars().count() <= 30)
211}
212
213fn parse_boolean(token: &str) -> Option<Token> {
214 token
215 .to_ascii_lowercase()
216 .parse::<bool>()
217 .map(Token::Boolean)
218 .ok()
219}
220
221fn take_word_token(s: &str) -> Option<(Token, &str)> {
222 take_word(s).and_then(|(word, rest)| {
223 [
224 parse_boolean,
225 Keyword::parse,
226 TypeName::parse,
227 Function::parse,
228 Operator::parse,
229 ]
230 .iter()
231 .find_map(|f| f(word))
232 .or_else(|| Some(Token::Name(word.into())))
233 .map(|token| (token, rest))
234 })
235}
236
237#[derive(PartialEq, Eq, Clone, Debug)]
238pub enum Token {
239 Name(String),
240 Keyword(Keyword),
241 Function(Function),
242 TypeName(TypeName),
243 Operator(Operator),
244 String(String),
245 Integer(i32),
246 Boolean(bool),
247 Character(char),
248}
249
250macro_rules! enumdef {
251 ($v:ident,) => (1);
252 ($v:ident, $($vs:ident,)*) => (1 + enumdef!($($vs,)*));
253 ($name:ident; $array:ident;; $($value:ident,)* ) => {
254 #[derive(PartialEq,Eq,Clone,Copy,Debug)]
255 pub enum $name {
256 $($value,)*
257 }
258 static $array: [$name; enumdef!($($value,)*)] = [
259 $($name::$value,)*
260 ];
261 impl std::convert::TryFrom<&str> for $name {
262 type Error = ();
263 fn try_from(token: &str) -> std::result::Result<Self, Self::Error> {
264 $array
265 .iter()
266 .find(|v| v.to_str().eq_ignore_ascii_case(token))
267 .cloned()
268 .ok_or(())
269 }
270 }
271 impl From<$name> for Token {
272 fn from(v: $name) -> Token {
273 Token::$name(v)
274 }
275 }
276 impl $name {
277 fn parse(token: &str) -> Option<Token> {
278 use std::convert::*;
279 $name::try_from(token).map(Into::into).ok()
280 }
281 }
282 impl std::fmt::Display for $name {
283 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
284 self.to_str().fmt(f)
285 }
286 }
287 };
288 ($name:ident; $array:ident; $($value:ident,)* ) => {
289 enumdef!($name; $array;; $($value,)*);
290 impl $name {
291 fn to_str(self) -> &'static str {
292 match self {
293 $($name::$value => stringify!($value),)*
294 }
295 }
296 }
297 };
298}
299
300enumdef!(
301 Keyword;
302 KEYWORDS;
303 Argument,
304 As,
305 ByRef,
306 ByVal,
307 Call,
308 Case,
309 Continue,
310 Else,
311 ElseIf,
312 End,
313 Exit,
314 Extern,
315 Dim,
316 Do,
317 Fill,
318 For,
319 From,
320 If,
321 Input,
322 Loop,
323 Mid,
324 Next,
325 Option,
326 Print,
327 Rem,
328 Select,
329 Step,
330 Sub,
331 Then,
332 To,
333 Until,
334 While,
335 With,
336);
337
338enumdef!(
339 Function;
340 FUNCTIONS;
341 Abs,
342 Array,
343 Asc,
344 CArray,
345 CBool,
346 Chr,
347 CInt,
348 CStr,
349 Eof,
350 Len,
351 Max,
352 Mid,
353 Min,
354 Space,
355 String,
356 SubArray,
357);
358
359enumdef!(
360 TypeName;
361 TYPE_NAMES;
362 Boolean,
363 Integer,
364 String,
365);
366
367enumdef!(
368 Operator;
369 OPERATORS;;
370 Mod,
371 Not,
372 And,
373 Xor,
374 Or,
375 NotEqual,
376 LessOrEequal,
377 GreaterOrEqual,
378 ShiftLeftArithmetic,
379 ShiftRightArithmetic,
380 ShiftLeftLogical,
381 ShiftRightLogical,
382 AddInto,
383 SubInto,
384 Equal,
385 LessThan,
386 GreaterThan,
387 Add,
388 Sub,
389 Mul,
390 Div,
391 Concat,
392 OpenBracket,
393 CloseBracket,
394 Comma,
395);
396
397impl Operator {
398 fn to_str(self) -> &'static str {
399 use Operator::*;
400 match self {
401 Mod => "Mod",
402 Not => "Not",
403 And => "And",
404 Xor => "Xor",
405 Or => "Or",
406 NotEqual => "<>",
407 LessOrEequal => "<=",
408 GreaterOrEqual => ">=",
409 ShiftLeftArithmetic => "<<",
410 ShiftRightArithmetic => ">>",
411 ShiftLeftLogical => "<<<",
412 ShiftRightLogical => ">>>",
413 AddInto => "+=",
414 SubInto => "-=",
415 Equal => "=",
416 LessThan => "<",
417 GreaterThan => ">",
418 Add => "+",
419 Sub => "-",
420 Mul => "*",
421 Div => "\\",
422 Concat => "&",
423 OpenBracket => "(",
424 CloseBracket => ")",
425 Comma => ",",
426 }
427 }
428}