1use crate::ast::*;
6use crate::output::Digits;
7use crate::types::{BigInt, BigRat, Numeric, TimeZone};
8use std::iter::Peekable;
9use std::str::Chars;
10
11#[derive(Debug, Clone)]
12pub enum Token {
13 Newline,
14 Comment(usize),
15 Ident(String),
16 Decimal(String, Option<String>, Option<String>),
17 Exponent(String),
18 Hex(String),
19 Oct(String),
20 Bin(String),
21 Quote(String),
22 Slash,
23 Pipe,
24 Semicolon,
25 Equals,
26 Caret,
27 Eof,
28 LPar,
29 RPar,
30 Plus,
31 Minus,
32 Asterisk,
33 DashArrow,
34 Colon,
35 DoubleLAngle,
36 DoubleRAngle,
37 KeywordMod,
38 KeywordXor,
39 KeywordOr,
40 KeywordAnd,
41 Date(Vec<DateToken>),
42 Comma,
43 Degree(Degree),
44 Percent,
45 Error(String),
46}
47
48fn describe(token: &Token) -> String {
49 match *token {
50 Token::Newline | Token::Comment(_) => "\\n".to_owned(),
51 Token::Ident(_) => "ident".to_owned(),
52 Token::Decimal(_, _, _) => "number".to_owned(),
53 Token::Exponent(_) => "exponent".to_owned(),
54 Token::Hex(_) => "hex".to_owned(),
55 Token::Oct(_) => "octal".to_owned(),
56 Token::Bin(_) => "binary".to_owned(),
57 Token::Quote(_) => "quote".to_owned(),
58 Token::Slash => "`/`".to_owned(),
59 Token::Pipe => "`|`".to_owned(),
60 Token::Semicolon => "`;`".to_owned(),
61 Token::Equals => "`=`".to_owned(),
62 Token::Caret => "`^`".to_owned(),
63 Token::Eof => "eof".to_owned(),
64 Token::LPar => "`(`".to_owned(),
65 Token::RPar => "`)`".to_owned(),
66 Token::Plus => "`+`".to_owned(),
67 Token::Minus => "`-`".to_owned(),
68 Token::Asterisk => "`*`".to_owned(),
69 Token::DashArrow => "`->`".to_owned(),
70 Token::Colon => "`:`".to_owned(),
71 Token::DoubleLAngle => "`<<`".to_owned(),
72 Token::DoubleRAngle => "`>>`".to_owned(),
73 Token::KeywordMod => "`mod`".to_owned(),
74 Token::KeywordXor => "`xor`".to_owned(),
75 Token::KeywordOr => "`or`".to_owned(),
76 Token::KeywordAnd => "`and`".to_owned(),
77 Token::Date(_) => "date literal".to_owned(),
78 Token::Comma => "`,`".to_owned(),
79 Token::Percent => "%".to_owned(),
80 Token::Degree(ref deg) => format!("`{}`", deg),
81 Token::Error(ref e) => format!("<{}>", e),
82 }
83}
84
85#[derive(Clone)]
86pub struct TokenIterator<'a>(Peekable<Chars<'a>>);
87
88impl<'a> TokenIterator<'a> {
89 pub fn new(input: &'a str) -> TokenIterator<'a> {
90 TokenIterator(input.chars().peekable())
91 }
92}
93
94fn is_currency(ch: char) -> bool {
97 match ch {
98 '$' | '¢' | '£' | '¤' | '¥' | '֏' | '؋' | '߾' | '߿' | '৲' | '৳' | '৻' | '૱' | '௹' | '฿'
99 | '៛' | '₠' | '₡' | '₢' | '₣' | '₤' | '₥' | '₦' | '₧' | '₨' | '₩' | '₪' | '₫' | '€'
100 | '₭' | '₮' | '₯' | '₰' | '₱' | '₲' | '₳' | '₴' | '₵' | '₶' | '₷' | '₸' | '₹' | '₺'
101 | '₻' | '₼' | '₽' | '₾' | '₿' | '⃀' | '꠸' | '﷼' | '﹩' | '$' | '¢' | '£' | '¥'
102 | '₩' | '𑿝' | '𑿞' | '𑿟' | '𑿠' | '𞋿' | '𞲰' => true,
103 _ => false,
104 }
105}
106
107fn digit_from_superscript(sup: char) -> Option<char> {
108 match sup {
110 '⁰' => Some('0'),
111 '¹' => Some('1'),
112 '²' => Some('2'),
113 '³' => Some('3'),
114 '⁴' => Some('4'),
115 '⁵' => Some('5'),
116 '⁶' => Some('6'),
117 '⁷' => Some('7'),
118 '⁸' => Some('8'),
119 '⁹' => Some('9'),
120 _ => None,
121 }
122}
123
124impl<'a> Iterator for TokenIterator<'a> {
125 type Item = Token;
126
127 fn next(&mut self) -> Option<Token> {
128 if self.0.peek().is_none() {
129 return Some(Token::Eof);
130 }
131 let res = match self.0.next().unwrap() {
132 ' ' | '\t' => return self.next(),
133 '\n' => Token::Newline,
134 '(' => Token::LPar,
135 ')' => Token::RPar,
136 '+' => Token::Plus,
137 ';' => Token::Semicolon,
138 '%' => Token::Percent,
139 '=' => Token::Equals,
140 '^' => Token::Caret,
141 ',' => Token::Comma,
142 '|' | '\u{2044}' | '\u{2215}' => Token::Pipe,
145 ':' => Token::Colon,
146 '→' => Token::DashArrow,
147 '<' if self.0.peek().cloned() == Some('<') => {
148 self.0.next();
149 Token::DoubleLAngle
150 }
151 '>' if self.0.peek().cloned() == Some('>') => {
152 self.0.next();
153 Token::DoubleRAngle
154 }
155 '*' => {
156 if self.0.peek().cloned() == Some('*') {
157 self.0.next();
158 Token::Caret
159 } else {
160 Token::Asterisk
161 }
162 }
163 '⋅' | '×' => Token::Asterisk,
166 '-' => match self.0.peek().cloned() {
167 Some('>') => {
168 self.0.next();
169 Token::DashArrow
170 }
171 _ => Token::Minus,
172 },
173 '\u{2212}' => Token::Minus,
175 '÷' => Token::Slash,
177 '/' => match self.0.peek() {
178 Some(&'/') => loop {
179 match self.0.next() {
180 None | Some('\n') => return Some(Token::Comment(1)),
181 _ => (),
182 }
183 },
184 Some(&'*') => {
185 let mut lines = 0;
186 loop {
187 if let Some(&'\n') = self.0.peek() {
188 lines += 1;
189 }
190 if let Some('*') = self.0.next() {
191 if let Some(&'/') = self.0.peek() {
192 self.0.next();
193 return Some(Token::Comment(lines));
194 }
195 }
196 if self.0.peek() == None {
197 return Some(Token::Error("Expected `*/`, got EOF".to_string()));
198 }
199 }
200 }
201 _ => Token::Slash,
202 },
203 x @ '0'..='9' | x @ '.' => {
204 if x == '0' && self.0.peek() == Some(&'x') {
205 self.0.next();
206 let mut hex = String::new();
207
208 while let Some(c) = self.0.peek().cloned() {
209 match c {
210 '0'..='9' | 'a'..='f' | 'A'..='F' => hex.push(self.0.next().unwrap()),
211 '\u{2009}' | '_' => {
212 self.0.next();
213 }
214 _ => break,
215 }
216 }
217 if hex.is_empty() {
218 return Some(Token::Error(
219 "Malformed hexadecimal literal: No digits after 0x".to_owned(),
220 ));
221 }
222 return Some(Token::Hex(hex));
223 }
224
225 if x == '0' && self.0.peek() == Some(&'o') {
226 self.0.next();
227 let mut oct = String::new();
228
229 while let Some(c) = self.0.peek().cloned() {
230 match c {
231 '0'..='7' => oct.push(self.0.next().unwrap()),
232 '\u{2009}' | '_' => {
233 self.0.next();
234 }
235 _ => break,
236 }
237 }
238 if oct.is_empty() {
239 return Some(Token::Error(
240 "Malformed octal literal: No digits after 0o".to_owned(),
241 ));
242 }
243 return Some(Token::Oct(oct));
244 }
245
246 if x == '0' && self.0.peek() == Some(&'b') {
247 self.0.next();
248 let mut bin = String::new();
249
250 while let Some(c) = self.0.peek().cloned() {
251 match c {
252 '0' | '1' => bin.push(self.0.next().unwrap()),
253 '\u{2009}' | '_' => {
254 self.0.next();
255 }
256 _ => break,
257 }
258 }
259 if bin.is_empty() {
260 return Some(Token::Error(
261 "Malformed binary literal: No digits after 0b".to_owned(),
262 ));
263 }
264 return Some(Token::Bin(bin));
265 }
266
267 let mut integer = String::new();
268 let mut frac = None;
269 let mut exp = None;
270
271 if x != '.' {
273 integer.push(x);
274 while let Some(c) = self.0.peek().cloned() {
275 match c {
276 '0'..='9' => integer.push(self.0.next().unwrap()),
277 '\u{2009}' | '_' => {
278 self.0.next();
279 }
280 _ => break,
281 }
282 }
283 } else {
284 integer.push('0');
285 }
286 if x == '.' || Some('.') == self.0.peek().cloned() {
288 let mut buf = String::new();
289 if x != '.' {
290 self.0.next();
291 }
292 while let Some(c) = self.0.peek().cloned() {
293 match c {
294 '0'..='9' => buf.push(self.0.next().unwrap()),
295 '\u{2009}' | '_' => {
296 self.0.next();
297 }
298 _ => break,
299 }
300 }
301 if buf.is_empty() {
302 return Some(Token::Error(
303 "Malformed number literal: No digits after decimal point".to_owned(),
304 ));
305 }
306 frac = Some(buf)
307 }
308 if let Some('e') = self.0.peek().cloned().map(|x| x.to_ascii_lowercase()) {
310 let mut buf = String::new();
311 self.0.next();
312 if let Some('e') = self.0.peek().cloned().map(|x| x.to_ascii_lowercase()) {
313 self.0.next();
314 }
315 if let Some(c) = self.0.peek().cloned() {
316 match c {
317 '-' => {
318 buf.push(self.0.next().unwrap());
319 }
320 '+' => {
321 self.0.next();
322 }
323 _ => (),
324 }
325 }
326 while let Some(c) = self.0.peek().cloned() {
327 match c {
328 '0'..='9' => buf.push(self.0.next().unwrap()),
329 '\u{2009}' | '_' => {
331 self.0.next();
332 }
333 _ => break,
334 }
335 }
336 if buf.is_empty() {
337 return Some(Token::Error(
338 "Malformed number literal: No digits after exponent".to_owned(),
339 ));
340 }
341 exp = Some(buf)
342 }
343 Token::Decimal(integer, frac, exp)
344 }
345 x if digit_from_superscript(x).is_some() => {
346 let mut integer = String::new();
347 integer.push(digit_from_superscript(x).unwrap());
348 while let Some(c) = self.0.peek().cloned() {
349 if let Some(digit) = digit_from_superscript(c) {
350 self.0.next();
351 integer.push(digit);
352 } else {
353 break;
354 }
355 }
356 Token::Exponent(integer)
357 }
358 '\\' => match self.0.next() {
359 Some('u') => {
360 let mut buf = String::new();
361 while let Some(c) = self.0.peek().cloned() {
362 if c.is_digit(16) {
363 buf.push(self.0.next().unwrap());
364 } else {
365 break;
366 }
367 }
368 let v = u32::from_str_radix(&*buf, 16).unwrap();
369 if let Some(c) = ::std::char::from_u32(v) {
370 let mut buf = String::new();
371 buf.push(c);
372 Token::Ident(buf)
373 } else {
374 Token::Error(format!("Invalid unicode scalar: {:x}", v))
375 }
376 }
377 _ => Token::Error("Unexpected \\".to_string()),
378 },
379 '\'' => {
380 let mut buf = String::new();
381 loop {
382 match self.0.next() {
383 None | Some('\n') => {
384 return Some(Token::Error("Unexpected newline or EOF".to_string()))
385 }
386 Some('\\') => match self.0.next() {
387 Some('\'') => buf.push('\''),
388 Some('n') => buf.push('\n'),
389 Some('t') => buf.push('\t'),
390 Some(c) => {
391 return Some(Token::Error(format!(
392 "Invalid escape sequence \\{}",
393 c
394 )))
395 }
396 None => return Some(Token::Error("Unexpected EOF".to_string())),
397 },
398 Some('\'') => break,
399 Some(c) => buf.push(c),
400 }
401 }
402 Token::Quote(buf)
403 }
404 '#' => {
405 let mut toks = vec![];
406 while self.0.peek().is_some() {
407 let res = match self.0.next().unwrap() {
408 '#' => break,
409 ':' => DateToken::Colon,
410 '-' => DateToken::Dash,
411 '+' => DateToken::Plus,
412 x if x.is_whitespace() => {
413 while self.0.peek().map(|c| c.is_whitespace()).unwrap_or(false) {
414 self.0.next();
415 }
416 DateToken::Space
417 }
418 x if x.is_digit(10) => {
419 let mut integer = String::new();
420 integer.push(x);
421 while let Some(c) = self.0.peek().cloned() {
422 if c.is_digit(10) {
423 self.0.next();
424 integer.push(c);
425 } else {
426 break;
427 }
428 }
429 let frac = if let Some('.') = self.0.peek().cloned() {
430 let mut frac = String::new();
431 self.0.next();
432 while let Some(c) = self.0.peek().cloned() {
433 if c.is_digit(10) {
434 self.0.next();
435 frac.push(c);
436 } else {
437 break;
438 }
439 }
440 Some(frac)
441 } else {
442 None
443 };
444 DateToken::Number(integer, frac)
445 }
446 x => {
447 let mut buf = String::new();
448 buf.push(x);
449 while let Some(c) = self.0.peek().cloned() {
450 if !"#:-+ ".contains(c) && !c.is_digit(10) {
451 self.0.next();
452 buf.push(c);
453 } else {
454 break;
455 }
456 }
457 DateToken::Literal(buf)
458 } };
460 toks.push(res);
461 }
462 if let Some(&DateToken::Space) = toks.first() {
463 toks.remove(0);
464 }
465 if let Some(&DateToken::Space) = toks.last() {
466 toks.pop();
467 }
468 Token::Date(toks)
469 }
470 '"' => {
471 let mut buf = String::new();
472 while let Some(c) = self.0.next() {
473 if c == '\\' {
474 if let Some(c) = self.0.next() {
475 buf.push(c);
476 }
477 } else if c == '"' {
478 break;
479 } else {
480 buf.push(c);
481 }
482 }
483 Token::Ident(buf)
484 }
485 x => {
486 let mut buf = String::new();
487 let mut prev = x;
488 buf.push(x);
489 while let Some(c) = self.0.peek().cloned() {
490 if digit_from_superscript(c).is_some() {
491 break;
493 } else if c.is_digit(10) && is_currency(prev) {
494 break;
496 } else if c.is_alphanumeric() || c == '_' || c == '$' {
497 prev = self.0.next().unwrap();
498 buf.push(prev);
499 } else {
500 break;
501 }
502 }
503 match &*buf {
504 "degC" | "°C" | "celsius" | "℃" => Token::Degree(Degree::Celsius),
505 "degF" | "°F" | "fahrenheit" | "℉" => Token::Degree(Degree::Fahrenheit),
506 "degRé" | "°Ré" | "degRe" | "°Re" | "réaumur" | "reaumur" => {
507 Token::Degree(Degree::Reaumur)
508 }
509 "degRø" | "°Rø" | "degRo" | "°Ro" | "rømer" | "romer" => {
510 Token::Degree(Degree::Romer)
511 }
512 "degDe" | "°De" | "delisle" => Token::Degree(Degree::Delisle),
513 "degN" | "°N" | "degnewton" => Token::Degree(Degree::Newton),
514 "per" => Token::Slash,
515 "to" | "in" => Token::DashArrow,
516 "mod" => Token::KeywordMod,
517 "and" => Token::KeywordAnd,
518 "or" => Token::KeywordOr,
519 "xor" => Token::KeywordXor,
520 _ => Token::Ident(buf),
521 }
522 }
523 };
524 Some(res)
525 }
526}
527
528pub type Iter<'a> = Peekable<TokenIterator<'a>>;
529
530fn attr_from_name(name: &str) -> Option<&'static str> {
531 match name {
532 "int" | "international" => Some("int"),
533 "UKSJJ" => Some("UKSJJ"),
534 "UKB" => Some("UKB"),
535 "UKC" => Some("UKC"),
536 "UKK" => Some("UKK"),
537 "imperial" | "british" | "UK" => Some("br"),
538 "survey" | "geodetic" => Some("survey"),
539 "irish" => Some("irish"),
540 "aust" | "australian" => Some("aust"),
541 "roman" => Some("roman"),
542 "egyptian" => Some("egyptian"),
543 "greek" => Some("greek"),
544 "olympic" => Some("olympic"),
545 _ => None,
546 }
547}
548
549fn parse_function(iter: &mut Iter<'_>, func: Function) -> Expr {
550 let args = match iter.peek().cloned().unwrap() {
551 Token::LPar => {
552 iter.next();
553 let mut args = vec![];
554 loop {
555 if let Some(&Token::RPar) = iter.peek() {
556 iter.next();
557 break;
558 }
559 args.push(parse_expr(iter));
560 match iter.peek().cloned().unwrap() {
561 Token::Comma => {
562 iter.next();
563 }
564 Token::RPar => (),
565 x => {
566 return Expr::new_error(format!(
567 "Expected `,` or `)`, got {}",
568 describe(&x)
569 ))
570 }
571 }
572 }
573 args
574 }
575 _ => vec![parse_unary(iter)],
576 };
577 Expr::new_call(func, args)
578}
579
580fn parse_radix(num: &str, base: u32, description: &str) -> Expr {
581 BigInt::from_str_radix(num, base)
582 .map(|x| BigRat::ratio(&x, &BigInt::one()))
583 .map(Numeric::Rational)
584 .map(Expr::new_const)
585 .unwrap_or_else(|_| Expr::new_error(format!("Failed to parse {}", description)))
586}
587
588fn parse_term(iter: &mut Iter<'_>) -> Expr {
589 match iter.next().unwrap() {
590 Token::Ident(ref id) => {
591 if let Some(func) = Function::from_name(id) {
592 parse_function(iter, func)
593 } else if let Some(attr) = attr_from_name(id) {
594 match iter.peek().cloned().unwrap() {
595 Token::Ident(ref name) => {
596 iter.next();
597 Expr::new_unit(format!("{}{}", attr, name))
598 }
599 x => Expr::new_error(format!(
600 "Attribute must be followed by ident, got {}",
601 describe(&x)
602 )),
603 }
604 } else {
605 match iter.peek().cloned().unwrap() {
606 Token::Ident(ref s) if s == "of" => {
607 iter.next();
608 Expr::new_of(id, parse_juxt(iter))
609 }
610 _ => Expr::new_unit(id.to_string()),
611 }
612 }
613 }
614 Token::Quote(string) => Expr::Quote { string },
615 Token::Decimal(num, frac, exp) => Numeric::from_parts(
616 &*num,
617 frac.as_ref().map(|x| &**x),
618 exp.as_ref().map(|x| &**x),
619 )
620 .map(Expr::new_const)
621 .unwrap_or_else(Expr::new_error),
622 Token::Hex(num) => parse_radix(&*num, 16, "hex"),
623 Token::Oct(num) => parse_radix(&*num, 8, "octal"),
624 Token::Bin(num) => parse_radix(&*num, 2, "binary"),
625 Token::LPar => {
626 let res = parse_expr(iter);
627 match iter.next().unwrap() {
628 Token::RPar => res,
629 x => Expr::new_error(format!("Expected `)`, got {}", describe(&x))),
630 }
631 }
632 Token::Percent => Expr::new_unit("percent".to_owned()),
633 Token::Date(tokens) => Expr::Date { tokens },
634 Token::Comment(_) => parse_term(iter),
635 x => Expr::new_error(format!("Expected term, got {}", describe(&x))),
636 }
637}
638
639fn parse_suffix(iter: &mut Iter<'_>) -> Expr {
640 let left = parse_term(iter);
641 match *iter.peek().unwrap() {
642 Token::Percent => {
643 let mut left = left;
644 while let Some(&Token::Percent) = iter.peek() {
645 iter.next();
646 left = Expr::new_mul(vec![left, Expr::new_unit("percent".to_owned())]);
647 }
648 left
649 }
650 _ => left,
651 }
652}
653
654fn parse_pow(iter: &mut Iter<'_>) -> Expr {
655 let left = parse_suffix(iter);
656 match *iter.peek().unwrap() {
657 Token::Caret => {
658 iter.next();
659 let right = parse_pow(iter);
660 Expr::new_pow(left, right)
661 }
662 Token::Exponent(ref exp_str) => {
663 let res = Numeric::from_parts(&exp_str, None, None);
664 let exp = res.map(Expr::new_const).unwrap_or_else(Expr::new_error);
665 iter.next();
666 Expr::new_pow(left, exp)
667 }
668 _ => left,
669 }
670}
671
672fn parse_unary(iter: &mut Iter<'_>) -> Expr {
673 match *iter.peek().unwrap() {
674 Token::Plus => {
675 iter.next();
676 Expr::new_plus(parse_unary(iter))
677 }
678 Token::Minus => {
679 iter.next();
680 Expr::new_negate(parse_unary(iter))
681 }
682 _ => parse_pow(iter),
683 }
684}
685
686fn parse_frac(iter: &mut Iter<'_>) -> Expr {
687 let left = parse_unary(iter);
688 match *iter.peek().unwrap() {
689 Token::Pipe => {
690 iter.next();
691 let right = parse_unary(iter);
692 Expr::new_frac(left, right)
693 }
694 _ => left,
695 }
696}
697
698fn parse_juxt(iter: &mut Iter<'_>) -> Expr {
699 let mut terms = vec![parse_frac(iter)];
700 loop {
701 match iter.peek().cloned().unwrap() {
702 Token::Asterisk
703 | Token::Slash
704 | Token::Comma
705 | Token::Equals
706 | Token::Plus
707 | Token::Minus
708 | Token::DashArrow
709 | Token::RPar
710 | Token::Newline
711 | Token::DoubleLAngle
712 | Token::DoubleRAngle
713 | Token::KeywordMod
714 | Token::KeywordAnd
715 | Token::KeywordOr
716 | Token::KeywordXor
717 | Token::Comment(_)
718 | Token::Eof => break,
719 Token::Degree(deg) => {
720 iter.next();
721 terms = vec![Expr::new_suffix(deg, Expr::new_mul(terms))]
722 }
723 _ => terms.push(parse_frac(iter)),
724 }
725 }
726 if terms.len() == 1 {
727 terms.pop().unwrap()
728 } else {
729 Expr::new_mul(terms)
730 }
731}
732
733fn parse_div(iter: &mut Iter<'_>) -> Expr {
734 let mut terms = vec![parse_juxt(iter)];
735 loop {
736 match iter.peek().cloned().unwrap() {
737 Token::Slash => {
738 iter.next();
739 let left = Expr::new_mul(terms.drain(..).collect());
740 terms = vec![Expr::new_frac(left, parse_juxt(iter))];
741 }
742 Token::Asterisk => {
743 iter.next();
744 terms.push(parse_juxt(iter));
745 }
746 Token::DoubleLAngle => {
747 iter.next();
748 let left = Expr::new_mul(terms.drain(..).collect());
749 terms = vec![Expr::new_bin(BinOpType::ShiftL, left, parse_juxt(iter))];
750 }
751 Token::DoubleRAngle => {
752 iter.next();
753 let left = Expr::new_mul(terms.drain(..).collect());
754 terms = vec![Expr::new_bin(BinOpType::ShiftR, left, parse_juxt(iter))];
755 }
756 Token::KeywordMod => {
757 iter.next();
758 let left = Expr::new_mul(terms.drain(..).collect());
759 terms = vec![Expr::new_bin(BinOpType::Mod, left, parse_juxt(iter))];
760 }
761 Token::KeywordAnd => {
762 iter.next();
763 let left = Expr::new_mul(terms.drain(..).collect());
764 terms = vec![Expr::new_bin(BinOpType::And, left, parse_juxt(iter))];
765 }
766 Token::KeywordOr => {
767 iter.next();
768 let left = Expr::new_mul(terms.drain(..).collect());
769 terms = vec![Expr::new_bin(BinOpType::Or, left, parse_juxt(iter))];
770 }
771 Token::KeywordXor => {
772 iter.next();
773 let left = Expr::new_mul(terms.drain(..).collect());
774 terms = vec![Expr::new_bin(BinOpType::Xor, left, parse_juxt(iter))];
775 }
776 _ => break,
777 }
778 }
779 if terms.len() == 1 {
780 terms.pop().unwrap()
781 } else {
782 Expr::new_mul(terms)
783 }
784}
785
786fn parse_add(iter: &mut Iter<'_>) -> Expr {
787 let mut left = parse_div(iter);
788 loop {
789 match *iter.peek().unwrap() {
790 Token::Plus => {
791 iter.next();
792 let right = parse_div(iter);
793 left = Expr::new_add(left, right)
794 }
795 Token::Minus => {
796 iter.next();
797 let right = parse_div(iter);
798 left = Expr::new_sub(left, right)
799 }
800 _ => return left,
801 }
802 }
803}
804
805fn parse_eq(iter: &mut Iter<'_>) -> Expr {
806 let left = parse_add(iter);
807 match iter.peek().cloned().unwrap() {
808 Token::Equals => {
809 iter.next();
810 let right = parse_add(iter);
811 Expr::new_equals(left, right)
812 }
813 _ => left,
814 }
815}
816
817pub fn parse_expr(iter: &mut Iter<'_>) -> Expr {
818 parse_eq(iter)
819}
820
821pub fn parse_unitlist(iter: &mut Iter<'_>) -> Option<Vec<String>> {
822 let mut expecting_term = true;
823 let mut res = vec![];
824 loop {
825 match iter.next().unwrap() {
826 Token::Ident(ref ident) if expecting_term => {
827 res.push(ident.clone());
828 expecting_term = false;
829 }
830 Token::Comma | Token::Semicolon if !expecting_term => {
831 expecting_term = true;
832 }
833 Token::Eof | Token::Newline | Token::Comment(_) if !expecting_term => break,
834 _ => return None,
835 }
836 }
837 if res.len() > 1 {
838 Some(res)
839 } else {
840 None
841 }
842}
843
844pub fn parse_offset(iter: &mut Iter<'_>) -> Option<i64> {
845 use std::str::FromStr;
846
847 let sign = match iter.next().unwrap() {
848 Token::Plus => 1,
849 Token::Minus => -1,
850 _ => return None,
851 };
852 let hour = match iter.next().unwrap() {
853 Token::Decimal(ref i, None, None) if i.len() == 2 => i.clone(),
854 _ => return None,
855 };
856 match iter.next().unwrap() {
857 Token::Colon => (),
858 _ => return None,
859 }
860 let min = match iter.next().unwrap() {
861 Token::Decimal(ref i, None, None) if i.len() == 2 => i.clone(),
862 _ => return None,
863 };
864 Some(sign * (i64::from_str(&*hour).unwrap() * 3600 + i64::from_str(&*min).unwrap() * 60))
865}
866
867pub fn parse_query(iter: &mut Iter<'_>) -> Query {
868 match iter.peek().cloned() {
869 Some(Token::Ident(ref s)) if s == "factorize" => {
870 iter.next();
871 return Query::Factorize(parse_eq(iter));
872 }
873 Some(Token::Ident(ref s)) if s == "units" => {
874 iter.next();
875 if let Some(Token::Ident(ref s)) = iter.peek().cloned() {
876 if s == "for" || s == "of" {
877 iter.next();
878 }
879 }
880 return Query::UnitsFor(parse_eq(iter));
881 }
882 Some(Token::Ident(ref s)) if s == "search" => {
883 iter.next();
884 if let Some(Token::Ident(ref s)) = iter.peek().cloned() {
885 return Query::Search(s.clone());
886 }
887 }
888 _ => (),
889 }
890 let left = parse_eq(iter);
891 match iter.peek().cloned().unwrap() {
892 Token::DashArrow => {
893 iter.next();
894 let mut copy = iter.clone();
895 if let Some(res) = parse_unitlist(&mut copy) {
896 *iter = copy;
897 return Query::Convert(left, Conversion::List(res), None, Digits::Default);
898 }
899 let digits = match iter.peek().cloned().unwrap() {
900 Token::Ident(ref s) if s == "digits" => {
901 iter.next();
902 match iter.peek().cloned() {
903 Some(Token::Decimal(int, None, None)) => {
904 iter.next();
905 match u64::from_str_radix(&*int, 10) {
906 Ok(v) => Digits::Digits(v),
907 Err(e) => {
908 return Query::Error(format!("Failed to parse digits: {}", e))
909 }
910 }
911 }
912 _ => Digits::FullInt,
913 }
914 }
915 Token::Ident(ref s) if s == "frac" || s == "fraction" || s == "ratio" => {
916 iter.next();
917 Digits::Fraction
918 }
919 Token::Ident(ref s) if s == "sci" || s == "scientific" => {
920 iter.next();
921 Digits::Scientific
922 }
923 Token::Ident(ref s) if s == "eng" || s == "engineering" => {
924 iter.next();
925 Digits::Engineering
926 }
927 _ => Digits::Default,
928 };
929 let base = match iter.peek().cloned().unwrap() {
930 Token::Ident(ref s) if s == "base" => {
931 iter.next();
932 match iter.next() {
933 Some(Token::Decimal(int, None, None)) => {
934 match u64::from_str_radix(&*int, 10) {
935 Ok(v @ 2..=36) => Some(v as u8),
936 Ok(v) => {
937 return Query::Error(format!(
938 "Unsupported base {}, must be from 2 to 36",
939 v
940 ))
941 }
942 Err(e) => {
943 return Query::Error(format!("Failed to parse base: {}", e))
944 }
945 }
946 }
947 Some(x) => {
948 return Query::Error(format!(
949 "Expected decimal base, got {}",
950 describe(&x)
951 ))
952 }
953 None => return Query::Error("Expected decimal base, got eof".to_string()),
954 }
955 }
956 Token::Ident(ref s) if s == "hex" || s == "hexadecimal" || s == "base16" => {
957 iter.next();
958 Some(16)
959 }
960 Token::Ident(ref s) if s == "oct" || s == "octal" || s == "base8" => {
961 iter.next();
962 Some(8)
963 }
964 Token::Ident(ref s) if s == "bin" || s == "binary" || s == "base2" => {
965 iter.next();
966 Some(2)
967 }
968 _ => None,
969 };
970 let right = match iter.peek().cloned().unwrap() {
971 Token::Eof => Conversion::None,
972 Token::Degree(deg) => Conversion::Degree(deg),
973 Token::Plus | Token::Minus => {
974 let mut old = iter.clone();
975 if let Some(off) = parse_offset(iter) {
976 Conversion::Offset(off)
977 } else {
978 Conversion::Expr(parse_eq(&mut old))
979 }
980 }
981 Token::Ident(ref s) if is_valid_timezone(s) => Conversion::Timezone(
982 TimeZone::get(s).expect("Running TimeZone::lookup a second time failed"),
983 ),
984 _ => Conversion::Expr(parse_eq(iter)),
985 };
986 Query::Convert(left, right, base, digits)
987 }
988 _ => Query::Expr(left),
989 }
990}
991
992fn is_valid_timezone(s: &String) -> bool {
993 s != "GB" && TimeZone::get(s).is_ok()
994}
995
996#[cfg(test)]
997mod test {
998 use super::*;
999
1000 fn parse(input: &str) -> String {
1001 parse_expr(&mut TokenIterator::new(input).peekable()).to_string()
1002 }
1003
1004 #[test]
1005 fn add_assoc() {
1006 assert_eq!(parse("a + b - c + d - e"), "((a + b) - c + d) - e");
1007 }
1008
1009 #[test]
1010 fn sub_crash_regression() {
1011 assert_eq!(parse("-"), "-<error: Expected term, got eof>");
1012 }
1013
1014 #[test]
1015 fn multiplication() {
1016 assert_eq!(parse("a⋅b"), parse("a*b"));
1017 assert_eq!(parse("a×b"), parse("a*b"));
1018 }
1019
1020 #[test]
1021 fn division() {
1022 assert_eq!(parse("2|3"), parse("2/3"));
1023 assert_eq!(parse("2∕3"), parse("2/3"));
1024 assert_eq!(parse("2÷3"), parse("2/3"));
1025 assert_eq!(parse("2⁄3"), parse("2/3"));
1026 }
1027
1028 #[test]
1029 fn exponents() {
1030 assert_eq!(parse("2¹³⁶²⁷⁹⁸⁴¹−1"), parse("2^136279841−1"));
1031 assert_eq!(parse("e³"), parse("e^3"));
1032 assert_eq!(parse("1m/s²"), parse("1m/s^2"));
1033 assert_eq!(parse("1kg*m²/s²"), parse("1kg*m^2/s^2"));
1034 assert_eq!(parse("1V/m²"), parse("1V/m^2"));
1035 assert_eq!(parse("x¹²³⁴⁵⁶⁷⁸⁹⁰"), parse("x^1234567890"));
1036 assert_eq!(parse("¹"), "<error: Expected term, got exponent>");
1037 }
1038
1039 #[test]
1040 fn mul_assoc() {
1041 assert_eq!(
1042 parse("a b * c / d / e f * g h"),
1043 "((((a b) * c) / d) / e f) * (g h)"
1044 );
1045 assert_eq!(parse("a|b c / g e|f"), "((a / b) * c) / (g * (e / f))");
1046 assert_eq!(parse("a / b / c"), "(a / b) / c");
1047 }
1048
1049 #[test]
1050 fn parse_extra_ops() {
1051 assert_eq!(parse("a b mod c d"), "a b mod c d");
1052 assert_eq!(parse("a b << c d"), "a b << c d");
1053 assert_eq!(parse("a b >> c d"), "a b >> c d");
1054 assert_eq!(parse("a b and c d"), "a b and c d");
1055 assert_eq!(parse("a b or c d"), "a b or c d");
1056 assert_eq!(parse("a b xor c d"), "a b xor c d");
1057 assert_eq!(parse("a / b c mod d e / f"), "((a / b c) mod d e) / f");
1058 }
1059
1060 #[test]
1061 fn suffix_prec() {
1062 assert_eq!(parse("a b °C + x y °F"), "a b °C + x y °F");
1063 assert_eq!(parse("a b °C c"), "(a b °C) * c");
1064 assert_eq!(parse("a °C / x"), "a °C / x");
1065 assert_eq!(parse("a °C * x"), "(a °C) * x");
1066 }
1067
1068 #[test]
1069 fn number_lex() {
1070 assert_eq!(
1071 parse("1e"),
1072 "<error: Expected term, got <Malformed number literal: No digits after exponent>>"
1073 );
1074 assert_eq!(
1075 parse("1."),
1076 "<error: Expected term, got <Malformed number literal: No digits after decimal point>>"
1077 );
1078 }
1079
1080 #[test]
1081 fn mono_unit_list() {
1082 use crate::ast::*;
1083 match parse_query(&mut TokenIterator::new("foo -> bar").peekable()) {
1084 Query::Convert(_, Conversion::Expr(_), _, _) => (),
1085 x => panic!("Expected Convert(_, Expr(_), _), got {:?}", x),
1086 }
1087 }
1088
1089 #[test]
1090 fn test_of() {
1091 assert_eq!(parse("foo of 1 abc def / 12"), "(foo of 1 abc def) / 12");
1092 }
1093
1094 #[test]
1095 fn test_prefixed_currency() {
1096 assert_eq!(parse("$2.5"), "$ * 2.5");
1097 assert_eq!(parse("£3"), "£ * 3");
1098 assert_eq!(parse("$.01"), "$ * 0.01");
1099 assert_eq!(parse("$asdf"), "$asdf");
1100 assert_eq!(parse("C$"), "C$");
1101 }
1102
1103 #[test]
1104 fn test_pow_prec() {
1105 assert_eq!(parse("-2^5"), "-(2^5)");
1106 }
1107}