Skip to main content

rink_core/parsing/
text_query.rs

1// This Source Code Form is subject to the terms of the Mozilla Public
2// License, v. 2.0. If a copy of the MPL was not distributed with this
3// file, You can obtain one at https://mozilla.org/MPL/2.0/.
4
5use crate::ast::*;
6use crate::output::Digits;
7use crate::types::{BigInt, BigRat, Numeric, TimeZone};
8use std::iter::Peekable;
9use std::str::Chars;
10
11#[derive(Debug, Clone)]
12pub enum Token {
13    Newline,
14    Comment(usize),
15    Ident(String),
16    Decimal(String, Option<String>, Option<String>),
17    Exponent(String),
18    Hex(String),
19    Oct(String),
20    Bin(String),
21    Quote(String),
22    Slash,
23    Pipe,
24    Semicolon,
25    Equals,
26    Caret,
27    Eof,
28    LPar,
29    RPar,
30    Plus,
31    Minus,
32    Asterisk,
33    DashArrow,
34    Colon,
35    DoubleLAngle,
36    DoubleRAngle,
37    KeywordMod,
38    KeywordXor,
39    KeywordOr,
40    KeywordAnd,
41    Date(Vec<DateToken>),
42    Comma,
43    Degree(Degree),
44    Percent,
45    Error(String),
46}
47
48fn describe(token: &Token) -> String {
49    match *token {
50        Token::Newline | Token::Comment(_) => "\\n".to_owned(),
51        Token::Ident(_) => "ident".to_owned(),
52        Token::Decimal(_, _, _) => "number".to_owned(),
53        Token::Exponent(_) => "exponent".to_owned(),
54        Token::Hex(_) => "hex".to_owned(),
55        Token::Oct(_) => "octal".to_owned(),
56        Token::Bin(_) => "binary".to_owned(),
57        Token::Quote(_) => "quote".to_owned(),
58        Token::Slash => "`/`".to_owned(),
59        Token::Pipe => "`|`".to_owned(),
60        Token::Semicolon => "`;`".to_owned(),
61        Token::Equals => "`=`".to_owned(),
62        Token::Caret => "`^`".to_owned(),
63        Token::Eof => "eof".to_owned(),
64        Token::LPar => "`(`".to_owned(),
65        Token::RPar => "`)`".to_owned(),
66        Token::Plus => "`+`".to_owned(),
67        Token::Minus => "`-`".to_owned(),
68        Token::Asterisk => "`*`".to_owned(),
69        Token::DashArrow => "`->`".to_owned(),
70        Token::Colon => "`:`".to_owned(),
71        Token::DoubleLAngle => "`<<`".to_owned(),
72        Token::DoubleRAngle => "`>>`".to_owned(),
73        Token::KeywordMod => "`mod`".to_owned(),
74        Token::KeywordXor => "`xor`".to_owned(),
75        Token::KeywordOr => "`or`".to_owned(),
76        Token::KeywordAnd => "`and`".to_owned(),
77        Token::Date(_) => "date literal".to_owned(),
78        Token::Comma => "`,`".to_owned(),
79        Token::Percent => "%".to_owned(),
80        Token::Degree(ref deg) => format!("`{}`", deg),
81        Token::Error(ref e) => format!("<{}>", e),
82    }
83}
84
85#[derive(Clone)]
86pub struct TokenIterator<'a>(Peekable<Chars<'a>>);
87
88impl<'a> TokenIterator<'a> {
89    pub fn new(input: &'a str) -> TokenIterator<'a> {
90        TokenIterator(input.chars().peekable())
91    }
92}
93
94/// List of currency signs like `$` etc.
95/// Based on the unicode "Currrency Symbol" (Sc) general category.
96fn is_currency(ch: char) -> bool {
97    match ch {
98        '$' | '¢' | '£' | '¤' | '¥' | '֏' | '؋' | '߾' | '߿' | '৲' | '৳' | '৻' | '૱' | '௹' | '฿'
99        | '៛' | '₠' | '₡' | '₢' | '₣' | '₤' | '₥' | '₦' | '₧' | '₨' | '₩' | '₪' | '₫' | '€'
100        | '₭' | '₮' | '₯' | '₰' | '₱' | '₲' | '₳' | '₴' | '₵' | '₶' | '₷' | '₸' | '₹' | '₺'
101        | '₻' | '₼' | '₽' | '₾' | '₿' | '⃀' | '꠸' | '﷼' | '﹩' | '$' | '¢' | '£' | '¥'
102        | '₩' | '𑿝' | '𑿞' | '𑿟' | '𑿠' | '𞋿' | '𞲰' => true,
103        _ => false,
104    }
105}
106
107fn digit_from_superscript(sup: char) -> Option<char> {
108    // From the Unicode "Superscripts and Subscripts" block, U+2070 to U+209F
109    match sup {
110        '⁰' => Some('0'),
111        '¹' => Some('1'),
112        '²' => Some('2'),
113        '³' => Some('3'),
114        '⁴' => Some('4'),
115        '⁵' => Some('5'),
116        '⁶' => Some('6'),
117        '⁷' => Some('7'),
118        '⁸' => Some('8'),
119        '⁹' => Some('9'),
120        _ => None,
121    }
122}
123
124impl<'a> Iterator for TokenIterator<'a> {
125    type Item = Token;
126
127    fn next(&mut self) -> Option<Token> {
128        if self.0.peek().is_none() {
129            return Some(Token::Eof);
130        }
131        let res = match self.0.next().unwrap() {
132            ' ' | '\t' => return self.next(),
133            '\n' => Token::Newline,
134            '(' => Token::LPar,
135            ')' => Token::RPar,
136            '+' => Token::Plus,
137            ';' => Token::Semicolon,
138            '%' => Token::Percent,
139            '=' => Token::Equals,
140            '^' => Token::Caret,
141            ',' => Token::Comma,
142            // U+2044 fraction slash '⁄'
143            // U+2215 division slash '∕'
144            '|' | '\u{2044}' | '\u{2215}' => Token::Pipe,
145            ':' => Token::Colon,
146            '→' => Token::DashArrow,
147            '<' if self.0.peek().cloned() == Some('<') => {
148                self.0.next();
149                Token::DoubleLAngle
150            }
151            '>' if self.0.peek().cloned() == Some('>') => {
152                self.0.next();
153                Token::DoubleRAngle
154            }
155            '*' => {
156                if self.0.peek().cloned() == Some('*') {
157                    self.0.next();
158                    Token::Caret
159                } else {
160                    Token::Asterisk
161                }
162            }
163            // U+22C5 dot operator '⋅'
164            // U+00D7 multiplication sign '×'
165            '⋅' | '×' => Token::Asterisk,
166            '-' => match self.0.peek().cloned() {
167                Some('>') => {
168                    self.0.next();
169                    Token::DashArrow
170                }
171                _ => Token::Minus,
172            },
173            // U+2212 minus sign '−'
174            '\u{2212}' => Token::Minus,
175            // U+00F7 division sign '÷'
176            '÷' => Token::Slash,
177            '/' => match self.0.peek() {
178                Some(&'/') => loop {
179                    match self.0.next() {
180                        None | Some('\n') => return Some(Token::Comment(1)),
181                        _ => (),
182                    }
183                },
184                Some(&'*') => {
185                    let mut lines = 0;
186                    loop {
187                        if let Some(&'\n') = self.0.peek() {
188                            lines += 1;
189                        }
190                        if let Some('*') = self.0.next() {
191                            if let Some(&'/') = self.0.peek() {
192                                self.0.next();
193                                return Some(Token::Comment(lines));
194                            }
195                        }
196                        if self.0.peek() == None {
197                            return Some(Token::Error("Expected `*/`, got EOF".to_string()));
198                        }
199                    }
200                }
201                _ => Token::Slash,
202            },
203            x @ '0'..='9' | x @ '.' => {
204                if x == '0' && self.0.peek() == Some(&'x') {
205                    self.0.next();
206                    let mut hex = String::new();
207
208                    while let Some(c) = self.0.peek().cloned() {
209                        match c {
210                            '0'..='9' | 'a'..='f' | 'A'..='F' => hex.push(self.0.next().unwrap()),
211                            '\u{2009}' | '_' => {
212                                self.0.next();
213                            }
214                            _ => break,
215                        }
216                    }
217                    if hex.is_empty() {
218                        return Some(Token::Error(
219                            "Malformed hexadecimal literal: No digits after 0x".to_owned(),
220                        ));
221                    }
222                    return Some(Token::Hex(hex));
223                }
224
225                if x == '0' && self.0.peek() == Some(&'o') {
226                    self.0.next();
227                    let mut oct = String::new();
228
229                    while let Some(c) = self.0.peek().cloned() {
230                        match c {
231                            '0'..='7' => oct.push(self.0.next().unwrap()),
232                            '\u{2009}' | '_' => {
233                                self.0.next();
234                            }
235                            _ => break,
236                        }
237                    }
238                    if oct.is_empty() {
239                        return Some(Token::Error(
240                            "Malformed octal literal: No digits after 0o".to_owned(),
241                        ));
242                    }
243                    return Some(Token::Oct(oct));
244                }
245
246                if x == '0' && self.0.peek() == Some(&'b') {
247                    self.0.next();
248                    let mut bin = String::new();
249
250                    while let Some(c) = self.0.peek().cloned() {
251                        match c {
252                            '0' | '1' => bin.push(self.0.next().unwrap()),
253                            '\u{2009}' | '_' => {
254                                self.0.next();
255                            }
256                            _ => break,
257                        }
258                    }
259                    if bin.is_empty() {
260                        return Some(Token::Error(
261                            "Malformed binary literal: No digits after 0b".to_owned(),
262                        ));
263                    }
264                    return Some(Token::Bin(bin));
265                }
266
267                let mut integer = String::new();
268                let mut frac = None;
269                let mut exp = None;
270
271                // integer component
272                if x != '.' {
273                    integer.push(x);
274                    while let Some(c) = self.0.peek().cloned() {
275                        match c {
276                            '0'..='9' => integer.push(self.0.next().unwrap()),
277                            '\u{2009}' | '_' => {
278                                self.0.next();
279                            }
280                            _ => break,
281                        }
282                    }
283                } else {
284                    integer.push('0');
285                }
286                // fractional component
287                if x == '.' || Some('.') == self.0.peek().cloned() {
288                    let mut buf = String::new();
289                    if x != '.' {
290                        self.0.next();
291                    }
292                    while let Some(c) = self.0.peek().cloned() {
293                        match c {
294                            '0'..='9' => buf.push(self.0.next().unwrap()),
295                            '\u{2009}' | '_' => {
296                                self.0.next();
297                            }
298                            _ => break,
299                        }
300                    }
301                    if buf.is_empty() {
302                        return Some(Token::Error(
303                            "Malformed number literal: No digits after decimal point".to_owned(),
304                        ));
305                    }
306                    frac = Some(buf)
307                }
308                // exponent
309                if let Some('e') = self.0.peek().cloned().map(|x| x.to_ascii_lowercase()) {
310                    let mut buf = String::new();
311                    self.0.next();
312                    if let Some('e') = self.0.peek().cloned().map(|x| x.to_ascii_lowercase()) {
313                        self.0.next();
314                    }
315                    if let Some(c) = self.0.peek().cloned() {
316                        match c {
317                            '-' => {
318                                buf.push(self.0.next().unwrap());
319                            }
320                            '+' => {
321                                self.0.next();
322                            }
323                            _ => (),
324                        }
325                    }
326                    while let Some(c) = self.0.peek().cloned() {
327                        match c {
328                            '0'..='9' => buf.push(self.0.next().unwrap()),
329                            // U+2009 thin space ' '
330                            '\u{2009}' | '_' => {
331                                self.0.next();
332                            }
333                            _ => break,
334                        }
335                    }
336                    if buf.is_empty() {
337                        return Some(Token::Error(
338                            "Malformed number literal: No digits after exponent".to_owned(),
339                        ));
340                    }
341                    exp = Some(buf)
342                }
343                Token::Decimal(integer, frac, exp)
344            }
345            x if digit_from_superscript(x).is_some() => {
346                let mut integer = String::new();
347                integer.push(digit_from_superscript(x).unwrap());
348                while let Some(c) = self.0.peek().cloned() {
349                    if let Some(digit) = digit_from_superscript(c) {
350                        self.0.next();
351                        integer.push(digit);
352                    } else {
353                        break;
354                    }
355                }
356                Token::Exponent(integer)
357            }
358            '\\' => match self.0.next() {
359                Some('u') => {
360                    let mut buf = String::new();
361                    while let Some(c) = self.0.peek().cloned() {
362                        if c.is_digit(16) {
363                            buf.push(self.0.next().unwrap());
364                        } else {
365                            break;
366                        }
367                    }
368                    let v = u32::from_str_radix(&*buf, 16).unwrap();
369                    if let Some(c) = ::std::char::from_u32(v) {
370                        let mut buf = String::new();
371                        buf.push(c);
372                        Token::Ident(buf)
373                    } else {
374                        Token::Error(format!("Invalid unicode scalar: {:x}", v))
375                    }
376                }
377                _ => Token::Error("Unexpected \\".to_string()),
378            },
379            '\'' => {
380                let mut buf = String::new();
381                loop {
382                    match self.0.next() {
383                        None | Some('\n') => {
384                            return Some(Token::Error("Unexpected newline or EOF".to_string()))
385                        }
386                        Some('\\') => match self.0.next() {
387                            Some('\'') => buf.push('\''),
388                            Some('n') => buf.push('\n'),
389                            Some('t') => buf.push('\t'),
390                            Some(c) => {
391                                return Some(Token::Error(format!(
392                                    "Invalid escape sequence \\{}",
393                                    c
394                                )))
395                            }
396                            None => return Some(Token::Error("Unexpected EOF".to_string())),
397                        },
398                        Some('\'') => break,
399                        Some(c) => buf.push(c),
400                    }
401                }
402                Token::Quote(buf)
403            }
404            '#' => {
405                let mut toks = vec![];
406                while self.0.peek().is_some() {
407                    let res = match self.0.next().unwrap() {
408                        '#' => break,
409                        ':' => DateToken::Colon,
410                        '-' => DateToken::Dash,
411                        '+' => DateToken::Plus,
412                        x if x.is_whitespace() => {
413                            while self.0.peek().map(|c| c.is_whitespace()).unwrap_or(false) {
414                                self.0.next();
415                            }
416                            DateToken::Space
417                        }
418                        x if x.is_digit(10) => {
419                            let mut integer = String::new();
420                            integer.push(x);
421                            while let Some(c) = self.0.peek().cloned() {
422                                if c.is_digit(10) {
423                                    self.0.next();
424                                    integer.push(c);
425                                } else {
426                                    break;
427                                }
428                            }
429                            let frac = if let Some('.') = self.0.peek().cloned() {
430                                let mut frac = String::new();
431                                self.0.next();
432                                while let Some(c) = self.0.peek().cloned() {
433                                    if c.is_digit(10) {
434                                        self.0.next();
435                                        frac.push(c);
436                                    } else {
437                                        break;
438                                    }
439                                }
440                                Some(frac)
441                            } else {
442                                None
443                            };
444                            DateToken::Number(integer, frac)
445                        }
446                        x => {
447                            let mut buf = String::new();
448                            buf.push(x);
449                            while let Some(c) = self.0.peek().cloned() {
450                                if !"#:-+ ".contains(c) && !c.is_digit(10) {
451                                    self.0.next();
452                                    buf.push(c);
453                                } else {
454                                    break;
455                                }
456                            }
457                            DateToken::Literal(buf)
458                        } //x => DateToken::Error(format!("Unexpected character '{}'", x))
459                    };
460                    toks.push(res);
461                }
462                if let Some(&DateToken::Space) = toks.first() {
463                    toks.remove(0);
464                }
465                if let Some(&DateToken::Space) = toks.last() {
466                    toks.pop();
467                }
468                Token::Date(toks)
469            }
470            '"' => {
471                let mut buf = String::new();
472                while let Some(c) = self.0.next() {
473                    if c == '\\' {
474                        if let Some(c) = self.0.next() {
475                            buf.push(c);
476                        }
477                    } else if c == '"' {
478                        break;
479                    } else {
480                        buf.push(c);
481                    }
482                }
483                Token::Ident(buf)
484            }
485            x => {
486                let mut buf = String::new();
487                let mut prev = x;
488                buf.push(x);
489                while let Some(c) = self.0.peek().cloned() {
490                    if digit_from_superscript(c).is_some() {
491                        // split x² into Ident(x) + Exponent(2)
492                        break;
493                    } else if c.is_digit(10) && is_currency(prev) {
494                        // split $10 into Ident($) + Decimal(10)
495                        break;
496                    } else if c.is_alphanumeric() || c == '_' || c == '$' {
497                        prev = self.0.next().unwrap();
498                        buf.push(prev);
499                    } else {
500                        break;
501                    }
502                }
503                match &*buf {
504                    "degC" | "°C" | "celsius" | "℃" => Token::Degree(Degree::Celsius),
505                    "degF" | "°F" | "fahrenheit" | "℉" => Token::Degree(Degree::Fahrenheit),
506                    "degRé" | "°Ré" | "degRe" | "°Re" | "réaumur" | "reaumur" => {
507                        Token::Degree(Degree::Reaumur)
508                    }
509                    "degRø" | "°Rø" | "degRo" | "°Ro" | "rømer" | "romer" => {
510                        Token::Degree(Degree::Romer)
511                    }
512                    "degDe" | "°De" | "delisle" => Token::Degree(Degree::Delisle),
513                    "degN" | "°N" | "degnewton" => Token::Degree(Degree::Newton),
514                    "per" => Token::Slash,
515                    "to" | "in" => Token::DashArrow,
516                    "mod" => Token::KeywordMod,
517                    "and" => Token::KeywordAnd,
518                    "or" => Token::KeywordOr,
519                    "xor" => Token::KeywordXor,
520                    _ => Token::Ident(buf),
521                }
522            }
523        };
524        Some(res)
525    }
526}
527
528pub type Iter<'a> = Peekable<TokenIterator<'a>>;
529
530fn attr_from_name(name: &str) -> Option<&'static str> {
531    match name {
532        "int" | "international" => Some("int"),
533        "UKSJJ" => Some("UKSJJ"),
534        "UKB" => Some("UKB"),
535        "UKC" => Some("UKC"),
536        "UKK" => Some("UKK"),
537        "imperial" | "british" | "UK" => Some("br"),
538        "survey" | "geodetic" => Some("survey"),
539        "irish" => Some("irish"),
540        "aust" | "australian" => Some("aust"),
541        "roman" => Some("roman"),
542        "egyptian" => Some("egyptian"),
543        "greek" => Some("greek"),
544        "olympic" => Some("olympic"),
545        _ => None,
546    }
547}
548
549fn parse_function(iter: &mut Iter<'_>, func: Function) -> Expr {
550    let args = match iter.peek().cloned().unwrap() {
551        Token::LPar => {
552            iter.next();
553            let mut args = vec![];
554            loop {
555                if let Some(&Token::RPar) = iter.peek() {
556                    iter.next();
557                    break;
558                }
559                args.push(parse_expr(iter));
560                match iter.peek().cloned().unwrap() {
561                    Token::Comma => {
562                        iter.next();
563                    }
564                    Token::RPar => (),
565                    x => {
566                        return Expr::new_error(format!(
567                            "Expected `,` or `)`, got {}",
568                            describe(&x)
569                        ))
570                    }
571                }
572            }
573            args
574        }
575        _ => vec![parse_unary(iter)],
576    };
577    Expr::new_call(func, args)
578}
579
580fn parse_radix(num: &str, base: u32, description: &str) -> Expr {
581    BigInt::from_str_radix(num, base)
582        .map(|x| BigRat::ratio(&x, &BigInt::one()))
583        .map(Numeric::Rational)
584        .map(Expr::new_const)
585        .unwrap_or_else(|_| Expr::new_error(format!("Failed to parse {}", description)))
586}
587
588fn parse_term(iter: &mut Iter<'_>) -> Expr {
589    match iter.next().unwrap() {
590        Token::Ident(ref id) => {
591            if let Some(func) = Function::from_name(id) {
592                parse_function(iter, func)
593            } else if let Some(attr) = attr_from_name(id) {
594                match iter.peek().cloned().unwrap() {
595                    Token::Ident(ref name) => {
596                        iter.next();
597                        Expr::new_unit(format!("{}{}", attr, name))
598                    }
599                    x => Expr::new_error(format!(
600                        "Attribute must be followed by ident, got {}",
601                        describe(&x)
602                    )),
603                }
604            } else {
605                match iter.peek().cloned().unwrap() {
606                    Token::Ident(ref s) if s == "of" => {
607                        iter.next();
608                        Expr::new_of(id, parse_juxt(iter))
609                    }
610                    _ => Expr::new_unit(id.to_string()),
611                }
612            }
613        }
614        Token::Quote(string) => Expr::Quote { string },
615        Token::Decimal(num, frac, exp) => Numeric::from_parts(
616            &*num,
617            frac.as_ref().map(|x| &**x),
618            exp.as_ref().map(|x| &**x),
619        )
620        .map(Expr::new_const)
621        .unwrap_or_else(Expr::new_error),
622        Token::Hex(num) => parse_radix(&*num, 16, "hex"),
623        Token::Oct(num) => parse_radix(&*num, 8, "octal"),
624        Token::Bin(num) => parse_radix(&*num, 2, "binary"),
625        Token::LPar => {
626            let res = parse_expr(iter);
627            match iter.next().unwrap() {
628                Token::RPar => res,
629                x => Expr::new_error(format!("Expected `)`, got {}", describe(&x))),
630            }
631        }
632        Token::Percent => Expr::new_unit("percent".to_owned()),
633        Token::Date(tokens) => Expr::Date { tokens },
634        Token::Comment(_) => parse_term(iter),
635        x => Expr::new_error(format!("Expected term, got {}", describe(&x))),
636    }
637}
638
639fn parse_suffix(iter: &mut Iter<'_>) -> Expr {
640    let left = parse_term(iter);
641    match *iter.peek().unwrap() {
642        Token::Percent => {
643            let mut left = left;
644            while let Some(&Token::Percent) = iter.peek() {
645                iter.next();
646                left = Expr::new_mul(vec![left, Expr::new_unit("percent".to_owned())]);
647            }
648            left
649        }
650        _ => left,
651    }
652}
653
654fn parse_pow(iter: &mut Iter<'_>) -> Expr {
655    let left = parse_suffix(iter);
656    match *iter.peek().unwrap() {
657        Token::Caret => {
658            iter.next();
659            let right = parse_pow(iter);
660            Expr::new_pow(left, right)
661        }
662        Token::Exponent(ref exp_str) => {
663            let res = Numeric::from_parts(&exp_str, None, None);
664            let exp = res.map(Expr::new_const).unwrap_or_else(Expr::new_error);
665            iter.next();
666            Expr::new_pow(left, exp)
667        }
668        _ => left,
669    }
670}
671
672fn parse_unary(iter: &mut Iter<'_>) -> Expr {
673    match *iter.peek().unwrap() {
674        Token::Plus => {
675            iter.next();
676            Expr::new_plus(parse_unary(iter))
677        }
678        Token::Minus => {
679            iter.next();
680            Expr::new_negate(parse_unary(iter))
681        }
682        _ => parse_pow(iter),
683    }
684}
685
686fn parse_frac(iter: &mut Iter<'_>) -> Expr {
687    let left = parse_unary(iter);
688    match *iter.peek().unwrap() {
689        Token::Pipe => {
690            iter.next();
691            let right = parse_unary(iter);
692            Expr::new_frac(left, right)
693        }
694        _ => left,
695    }
696}
697
698fn parse_juxt(iter: &mut Iter<'_>) -> Expr {
699    let mut terms = vec![parse_frac(iter)];
700    loop {
701        match iter.peek().cloned().unwrap() {
702            Token::Asterisk
703            | Token::Slash
704            | Token::Comma
705            | Token::Equals
706            | Token::Plus
707            | Token::Minus
708            | Token::DashArrow
709            | Token::RPar
710            | Token::Newline
711            | Token::DoubleLAngle
712            | Token::DoubleRAngle
713            | Token::KeywordMod
714            | Token::KeywordAnd
715            | Token::KeywordOr
716            | Token::KeywordXor
717            | Token::Comment(_)
718            | Token::Eof => break,
719            Token::Degree(deg) => {
720                iter.next();
721                terms = vec![Expr::new_suffix(deg, Expr::new_mul(terms))]
722            }
723            _ => terms.push(parse_frac(iter)),
724        }
725    }
726    if terms.len() == 1 {
727        terms.pop().unwrap()
728    } else {
729        Expr::new_mul(terms)
730    }
731}
732
733fn parse_div(iter: &mut Iter<'_>) -> Expr {
734    let mut terms = vec![parse_juxt(iter)];
735    loop {
736        match iter.peek().cloned().unwrap() {
737            Token::Slash => {
738                iter.next();
739                let left = Expr::new_mul(terms.drain(..).collect());
740                terms = vec![Expr::new_frac(left, parse_juxt(iter))];
741            }
742            Token::Asterisk => {
743                iter.next();
744                terms.push(parse_juxt(iter));
745            }
746            Token::DoubleLAngle => {
747                iter.next();
748                let left = Expr::new_mul(terms.drain(..).collect());
749                terms = vec![Expr::new_bin(BinOpType::ShiftL, left, parse_juxt(iter))];
750            }
751            Token::DoubleRAngle => {
752                iter.next();
753                let left = Expr::new_mul(terms.drain(..).collect());
754                terms = vec![Expr::new_bin(BinOpType::ShiftR, left, parse_juxt(iter))];
755            }
756            Token::KeywordMod => {
757                iter.next();
758                let left = Expr::new_mul(terms.drain(..).collect());
759                terms = vec![Expr::new_bin(BinOpType::Mod, left, parse_juxt(iter))];
760            }
761            Token::KeywordAnd => {
762                iter.next();
763                let left = Expr::new_mul(terms.drain(..).collect());
764                terms = vec![Expr::new_bin(BinOpType::And, left, parse_juxt(iter))];
765            }
766            Token::KeywordOr => {
767                iter.next();
768                let left = Expr::new_mul(terms.drain(..).collect());
769                terms = vec![Expr::new_bin(BinOpType::Or, left, parse_juxt(iter))];
770            }
771            Token::KeywordXor => {
772                iter.next();
773                let left = Expr::new_mul(terms.drain(..).collect());
774                terms = vec![Expr::new_bin(BinOpType::Xor, left, parse_juxt(iter))];
775            }
776            _ => break,
777        }
778    }
779    if terms.len() == 1 {
780        terms.pop().unwrap()
781    } else {
782        Expr::new_mul(terms)
783    }
784}
785
786fn parse_add(iter: &mut Iter<'_>) -> Expr {
787    let mut left = parse_div(iter);
788    loop {
789        match *iter.peek().unwrap() {
790            Token::Plus => {
791                iter.next();
792                let right = parse_div(iter);
793                left = Expr::new_add(left, right)
794            }
795            Token::Minus => {
796                iter.next();
797                let right = parse_div(iter);
798                left = Expr::new_sub(left, right)
799            }
800            _ => return left,
801        }
802    }
803}
804
805fn parse_eq(iter: &mut Iter<'_>) -> Expr {
806    let left = parse_add(iter);
807    match iter.peek().cloned().unwrap() {
808        Token::Equals => {
809            iter.next();
810            let right = parse_add(iter);
811            Expr::new_equals(left, right)
812        }
813        _ => left,
814    }
815}
816
817pub fn parse_expr(iter: &mut Iter<'_>) -> Expr {
818    parse_eq(iter)
819}
820
821pub fn parse_unitlist(iter: &mut Iter<'_>) -> Option<Vec<String>> {
822    let mut expecting_term = true;
823    let mut res = vec![];
824    loop {
825        match iter.next().unwrap() {
826            Token::Ident(ref ident) if expecting_term => {
827                res.push(ident.clone());
828                expecting_term = false;
829            }
830            Token::Comma | Token::Semicolon if !expecting_term => {
831                expecting_term = true;
832            }
833            Token::Eof | Token::Newline | Token::Comment(_) if !expecting_term => break,
834            _ => return None,
835        }
836    }
837    if res.len() > 1 {
838        Some(res)
839    } else {
840        None
841    }
842}
843
844pub fn parse_offset(iter: &mut Iter<'_>) -> Option<i64> {
845    use std::str::FromStr;
846
847    let sign = match iter.next().unwrap() {
848        Token::Plus => 1,
849        Token::Minus => -1,
850        _ => return None,
851    };
852    let hour = match iter.next().unwrap() {
853        Token::Decimal(ref i, None, None) if i.len() == 2 => i.clone(),
854        _ => return None,
855    };
856    match iter.next().unwrap() {
857        Token::Colon => (),
858        _ => return None,
859    }
860    let min = match iter.next().unwrap() {
861        Token::Decimal(ref i, None, None) if i.len() == 2 => i.clone(),
862        _ => return None,
863    };
864    Some(sign * (i64::from_str(&*hour).unwrap() * 3600 + i64::from_str(&*min).unwrap() * 60))
865}
866
867pub fn parse_query(iter: &mut Iter<'_>) -> Query {
868    match iter.peek().cloned() {
869        Some(Token::Ident(ref s)) if s == "factorize" => {
870            iter.next();
871            return Query::Factorize(parse_eq(iter));
872        }
873        Some(Token::Ident(ref s)) if s == "units" => {
874            iter.next();
875            if let Some(Token::Ident(ref s)) = iter.peek().cloned() {
876                if s == "for" || s == "of" {
877                    iter.next();
878                }
879            }
880            return Query::UnitsFor(parse_eq(iter));
881        }
882        Some(Token::Ident(ref s)) if s == "search" => {
883            iter.next();
884            if let Some(Token::Ident(ref s)) = iter.peek().cloned() {
885                return Query::Search(s.clone());
886            }
887        }
888        _ => (),
889    }
890    let left = parse_eq(iter);
891    match iter.peek().cloned().unwrap() {
892        Token::DashArrow => {
893            iter.next();
894            let mut copy = iter.clone();
895            if let Some(res) = parse_unitlist(&mut copy) {
896                *iter = copy;
897                return Query::Convert(left, Conversion::List(res), None, Digits::Default);
898            }
899            let digits = match iter.peek().cloned().unwrap() {
900                Token::Ident(ref s) if s == "digits" => {
901                    iter.next();
902                    match iter.peek().cloned() {
903                        Some(Token::Decimal(int, None, None)) => {
904                            iter.next();
905                            match u64::from_str_radix(&*int, 10) {
906                                Ok(v) => Digits::Digits(v),
907                                Err(e) => {
908                                    return Query::Error(format!("Failed to parse digits: {}", e))
909                                }
910                            }
911                        }
912                        _ => Digits::FullInt,
913                    }
914                }
915                Token::Ident(ref s) if s == "frac" || s == "fraction" || s == "ratio" => {
916                    iter.next();
917                    Digits::Fraction
918                }
919                Token::Ident(ref s) if s == "sci" || s == "scientific" => {
920                    iter.next();
921                    Digits::Scientific
922                }
923                Token::Ident(ref s) if s == "eng" || s == "engineering" => {
924                    iter.next();
925                    Digits::Engineering
926                }
927                _ => Digits::Default,
928            };
929            let base = match iter.peek().cloned().unwrap() {
930                Token::Ident(ref s) if s == "base" => {
931                    iter.next();
932                    match iter.next() {
933                        Some(Token::Decimal(int, None, None)) => {
934                            match u64::from_str_radix(&*int, 10) {
935                                Ok(v @ 2..=36) => Some(v as u8),
936                                Ok(v) => {
937                                    return Query::Error(format!(
938                                        "Unsupported base {}, must be from 2 to 36",
939                                        v
940                                    ))
941                                }
942                                Err(e) => {
943                                    return Query::Error(format!("Failed to parse base: {}", e))
944                                }
945                            }
946                        }
947                        Some(x) => {
948                            return Query::Error(format!(
949                                "Expected decimal base, got {}",
950                                describe(&x)
951                            ))
952                        }
953                        None => return Query::Error("Expected decimal base, got eof".to_string()),
954                    }
955                }
956                Token::Ident(ref s) if s == "hex" || s == "hexadecimal" || s == "base16" => {
957                    iter.next();
958                    Some(16)
959                }
960                Token::Ident(ref s) if s == "oct" || s == "octal" || s == "base8" => {
961                    iter.next();
962                    Some(8)
963                }
964                Token::Ident(ref s) if s == "bin" || s == "binary" || s == "base2" => {
965                    iter.next();
966                    Some(2)
967                }
968                _ => None,
969            };
970            let right = match iter.peek().cloned().unwrap() {
971                Token::Eof => Conversion::None,
972                Token::Degree(deg) => Conversion::Degree(deg),
973                Token::Plus | Token::Minus => {
974                    let mut old = iter.clone();
975                    if let Some(off) = parse_offset(iter) {
976                        Conversion::Offset(off)
977                    } else {
978                        Conversion::Expr(parse_eq(&mut old))
979                    }
980                }
981                Token::Ident(ref s) if is_valid_timezone(s) => Conversion::Timezone(
982                    TimeZone::get(s).expect("Running TimeZone::lookup a second time failed"),
983                ),
984                _ => Conversion::Expr(parse_eq(iter)),
985            };
986            Query::Convert(left, right, base, digits)
987        }
988        _ => Query::Expr(left),
989    }
990}
991
992fn is_valid_timezone(s: &String) -> bool {
993    s != "GB" && TimeZone::get(s).is_ok()
994}
995
996#[cfg(test)]
997mod test {
998    use super::*;
999
1000    fn parse(input: &str) -> String {
1001        parse_expr(&mut TokenIterator::new(input).peekable()).to_string()
1002    }
1003
1004    #[test]
1005    fn add_assoc() {
1006        assert_eq!(parse("a + b - c + d - e"), "((a + b) - c + d) - e");
1007    }
1008
1009    #[test]
1010    fn sub_crash_regression() {
1011        assert_eq!(parse("-"), "-<error: Expected term, got eof>");
1012    }
1013
1014    #[test]
1015    fn multiplication() {
1016        assert_eq!(parse("a⋅b"), parse("a*b"));
1017        assert_eq!(parse("a×b"), parse("a*b"));
1018    }
1019
1020    #[test]
1021    fn division() {
1022        assert_eq!(parse("2|3"), parse("2/3"));
1023        assert_eq!(parse("2∕3"), parse("2/3"));
1024        assert_eq!(parse("2÷3"), parse("2/3"));
1025        assert_eq!(parse("2⁄3"), parse("2/3"));
1026    }
1027
1028    #[test]
1029    fn exponents() {
1030        assert_eq!(parse("2¹³⁶²⁷⁹⁸⁴¹−1"), parse("2^136279841−1"));
1031        assert_eq!(parse("e³"), parse("e^3"));
1032        assert_eq!(parse("1m/s²"), parse("1m/s^2"));
1033        assert_eq!(parse("1kg*m²/s²"), parse("1kg*m^2/s^2"));
1034        assert_eq!(parse("1V/m²"), parse("1V/m^2"));
1035        assert_eq!(parse("x¹²³⁴⁵⁶⁷⁸⁹⁰"), parse("x^1234567890"));
1036        assert_eq!(parse("¹"), "<error: Expected term, got exponent>");
1037    }
1038
1039    #[test]
1040    fn mul_assoc() {
1041        assert_eq!(
1042            parse("a b * c / d / e f * g h"),
1043            "((((a b) * c) / d) / e f) * (g h)"
1044        );
1045        assert_eq!(parse("a|b c / g e|f"), "((a / b) * c) / (g * (e / f))");
1046        assert_eq!(parse("a / b / c"), "(a / b) / c");
1047    }
1048
1049    #[test]
1050    fn parse_extra_ops() {
1051        assert_eq!(parse("a b mod c d"), "a b mod c d");
1052        assert_eq!(parse("a b << c d"), "a b << c d");
1053        assert_eq!(parse("a b >> c d"), "a b >> c d");
1054        assert_eq!(parse("a b and c d"), "a b and c d");
1055        assert_eq!(parse("a b or c d"), "a b or c d");
1056        assert_eq!(parse("a b xor c d"), "a b xor c d");
1057        assert_eq!(parse("a / b c mod d e / f"), "((a / b c) mod d e) / f");
1058    }
1059
1060    #[test]
1061    fn suffix_prec() {
1062        assert_eq!(parse("a b °C + x y °F"), "a b °C + x y °F");
1063        assert_eq!(parse("a b °C c"), "(a b °C) * c");
1064        assert_eq!(parse("a °C / x"), "a °C / x");
1065        assert_eq!(parse("a °C * x"), "(a °C) * x");
1066    }
1067
1068    #[test]
1069    fn number_lex() {
1070        assert_eq!(
1071            parse("1e"),
1072            "<error: Expected term, got <Malformed number literal: No digits after exponent>>"
1073        );
1074        assert_eq!(
1075            parse("1."),
1076            "<error: Expected term, got <Malformed number literal: No digits after decimal point>>"
1077        );
1078    }
1079
1080    #[test]
1081    fn mono_unit_list() {
1082        use crate::ast::*;
1083        match parse_query(&mut TokenIterator::new("foo -> bar").peekable()) {
1084            Query::Convert(_, Conversion::Expr(_), _, _) => (),
1085            x => panic!("Expected Convert(_, Expr(_), _), got {:?}", x),
1086        }
1087    }
1088
1089    #[test]
1090    fn test_of() {
1091        assert_eq!(parse("foo of 1 abc def / 12"), "(foo of 1 abc def) / 12");
1092    }
1093
1094    #[test]
1095    fn test_prefixed_currency() {
1096        assert_eq!(parse("$2.5"), "$ * 2.5");
1097        assert_eq!(parse("£3"), "£ * 3");
1098        assert_eq!(parse("$.01"), "$ * 0.01");
1099        assert_eq!(parse("$asdf"), "$asdf");
1100        assert_eq!(parse("C$"), "C$");
1101    }
1102
1103    #[test]
1104    fn test_pow_prec() {
1105        assert_eq!(parse("-2^5"), "-(2^5)");
1106    }
1107}