kir/
parse.rs

1use std::{collections::HashMap, hash::Hash, ops::Range, str::FromStr};
2
3use indexmap::IndexMap;
4use logos::Logos;
5use num::{BigInt, BigUint};
6
7use crate::ir::*;
8
9#[derive(Logos, Debug, Clone, Copy, PartialEq)]
10pub enum Token {
11  #[regex(r"[a-zA-Z_][a-zA-Z0-9_]*[?]?")]
12  Keyword,
13  #[regex(r"%[a-zA-Z0-9_]*")]
14  ValueId,
15  #[regex(r#""[^"]*""#)]
16  String,
17  // Here, the positive number is not allowed, because the parser will treat +
18  // as a punct
19  #[regex(r"[-]?[0-9]+(\.[0-9]+)?([eE][+-]?[0-9]+)?")]
20  Number,
21  #[regex(
22        r";|\}\}|\{\{|,|:|=|->|<-|=>|<=|\+|-|\*|/|==|!=|<|>|<=|>=|\||&|\^|<<|>>|\.\.|\.|\[|\]|\{|\}"
23    )]
24  Punct,
25
26  #[token("(")]
27  LParen,
28  #[token(")")]
29  RParen,
30  #[regex(r#"###[^#]*###"#)]
31  MultiLineStr,
32  #[regex(r"[ \t\n\f]+", logos::skip)]
33  Skip,
34}
35
36#[derive(Default)]
37pub struct ValueResolver {
38  pub values: ValueMap,
39  pub resolver: HashMap<String, ValueId>,
40}
41fn is_num1(s: &str) -> bool {
42  s.chars().all(|c| c.is_ascii_digit())
43}
44impl ValueResolver {
45  pub fn new() -> Self {
46    ValueResolver {
47      values: Default::default(),
48      resolver: HashMap::new(),
49    }
50  }
51  pub fn resolve(
52    &mut self,
53    name: &str,
54    ty: Option<Type>,
55  ) -> Result<ValueId, String> {
56    if name.chars().nth(0) != Some('%') {
57      return Err("Value name must start with %".to_string());
58    }
59    let name = &name[1..];
60    if let Some(value_id) = self.resolver.get(name) {
61      if self.values[*value_id].ty != ty && self.values[*value_id].ty.is_some()
62      {
63        return Err(format!(
64          "Type mismatch for {}: expected {:?}, found {:?}",
65          name, ty, self.values[*value_id].ty
66        ));
67      }
68      self.values[*value_id].ty = ty;
69      return Ok(*value_id);
70    } else {
71      let vname = if is_num1(name) {
72        None
73      } else {
74        Some(name.to_string())
75      };
76      let value_id = self.values.insert(Value { name: vname, ty });
77      self.resolver.insert(name.to_string(), value_id);
78      Ok(value_id)
79    }
80  }
81}
82
83fn unescape_str(s: &str) -> String {
84  let mut res = String::new();
85  let mut chars = s.chars();
86  while let Some(c) = chars.next() {
87    if c == '\\' {
88      match chars.next() {
89        Some('n') => res.push('\n'),
90        Some('r') => res.push('\r'),
91        Some('t') => res.push('\t'),
92        Some('\\') => res.push('\\'),
93        Some('"') => res.push('"'),
94        Some(c) => res.push(c),
95        None => res.push('\\'),
96      }
97    } else {
98      res.push(c);
99    }
100  }
101  res
102}
103
104pub struct Parser<'src> {
105  pub lexer: logos::Lexer<'src, Token>,
106  peeked: Option<(&'src str, Token)>,
107  resolver: Option<ValueResolver>,
108}
109impl<'src> Parser<'src> {
110  pub fn new(src: &'src str) -> Self {
111    Parser {
112      lexer: Token::lexer(src),
113      peeked: None,
114      resolver: None,
115    }
116  }
117  pub fn next(&mut self) -> Result<(&'src str, Token), String> {
118    if let Some(peeked) = self.peeked.take() {
119      Ok(peeked)
120    } else {
121      match self.lexer.next() {
122        Some(Ok(token)) => Ok((self.lexer.slice(), token)),
123        Some(Err(_)) => Err("Invalid token found".to_string()),
124        None => Err("Unexpected end of input".to_string()),
125      }
126    }
127  }
128  pub fn peek(&mut self) -> Result<(&'src str, Token), String> {
129    if let Some(peeked) = self.peeked {
130      Ok(peeked)
131    } else {
132      let peeked = self.next()?;
133      self.peeked = Some(peeked);
134      Ok(peeked)
135    }
136  }
137  pub fn peek_any(&mut self, expected: &[Token]) -> bool {
138    self
139      .peek()
140      .map(|(_, t)| expected.contains(&t))
141      .unwrap_or(false)
142  }
143  pub fn peek_fn(&mut self, f: impl FnOnce(&str, Token) -> bool) -> bool {
144    self.peek().map(|(s, t)| f(s, t)).unwrap_or(false)
145  }
146  pub fn expect(&mut self, expected: Token) -> Result<&'src str, String> {
147    let (slice, token) = self.next()?;
148    if token == expected {
149      Ok(slice)
150    } else {
151      Err(format!(
152        "Expected {:?}, found {:?} at {}; the rest of the input was: {}",
153        expected,
154        token,
155        slice,
156        self.lexer.remainder().to_string()
157      ))
158    }
159  }
160  pub fn expect_any(
161    &mut self,
162    expected: &[Token],
163  ) -> Result<&'src str, String> {
164    let (slice, token) = self.next()?;
165    if expected.contains(&token) {
166      Ok(slice)
167    } else {
168      Err(format!(
169        "Expected {:?}, found {:?} at {}",
170        expected, token, slice
171      ))
172    }
173  }
174  pub fn expect_str(
175    &mut self,
176    token: Token,
177    s: &str,
178  ) -> Result<&'src str, String> {
179    let slice = self.expect(token)?;
180    if slice == s {
181      Ok(slice)
182    } else {
183      Err(format!("Expected {}, found {}", s, slice))
184    }
185  }
186  pub fn parse_unescaped_string(&mut self) -> Result<String, String> {
187    let str = self.expect(Token::String)?;
188    Ok(unescape_str(&str[1..str.len() - 1]))
189  }
190  pub fn parse<T: Parse>(&mut self) -> Result<T, String> {
191    T::parse(self)
192  }
193  pub fn parse_value(&mut self) -> Result<ValueId, String> {
194    let vid = self.expect(Token::ValueId)?;
195    let ty = if self.peek_fn(|s, t| t == Token::Punct && s == ":") {
196      let _ = self.expect(Token::Punct)?;
197      Some(self.parse()?)
198    } else {
199      None
200    };
201    self.resolve_value(vid, ty)
202  }
203
204  pub fn parse_list_kw<T: Parse>(
205    &mut self,
206    kw: Option<&str>,
207    sep: &str,
208  ) -> Result<Vec<T>, String> {
209    let mut vec = Vec::new();
210    let left = self.expect_any(&[Token::Punct, Token::LParen])?;
211    let right = match left {
212      "[" => "]",
213      "(" => ")",
214      "{" => "}",
215      _ => return Err("Invalid paren".to_string()),
216    };
217
218    if let Some(kw) = kw {
219      let _ = self.expect_str(Token::Keyword, kw)?;
220    }
221
222    loop {
223      match self.peek()? {
224        (p, Token::Punct) if p == sep => {
225          self.next()?;
226        }
227        (p, Token::Punct) if p == right => {
228          self.next()?;
229          break;
230        }
231        (p, Token::RParen) if p == right => {
232          self.next()?;
233          break;
234        }
235        _ => vec.push(self.parse()?),
236      }
237    }
238    Ok(vec)
239  }
240
241  pub fn parse_list<T: Parse>(&mut self, sep: &str) -> Result<Vec<T>, String> {
242    self.parse_list_kw(None, sep)
243  }
244
245  // parse a scope, which is { ... }
246  pub fn parse_scope(&mut self) -> Result<String, String> {
247    let mut s = String::new();
248    s.push_str(self.expect_str(Token::Punct, "{")?);
249    let mut count = 1;
250    while count > 0 {
251      let (x, token) = self.next()?;
252      match token {
253        Token::Punct if x == "{" => count += 1,
254        Token::Punct if x == "}" => count -= 1,
255        _ => {}
256      }
257      s.push_str(x);
258    }
259
260    Ok(s)
261  }
262
263  pub fn set_resolver(
264    &mut self,
265    resolver: Option<ValueResolver>,
266  ) -> Option<ValueResolver> {
267    std::mem::replace(&mut self.resolver, resolver)
268  }
269  pub fn resolve_value(
270    &mut self,
271    name: &str,
272    ty: Option<Type>,
273  ) -> Result<ValueId, String> {
274    self.resolver.as_mut().unwrap().resolve(name, ty)
275  }
276  pub fn parse_or_report<T: Parse>(&mut self) -> T {
277    match self.parse() {
278      Ok(x) => x,
279      Err(msg) => {
280        self.report_error("stdin", &msg);
281        panic!("Parse failed");
282      }
283    }
284  }
285  pub fn report_error(&self, name: &str, msg: &str) {
286    use codespan_reporting::diagnostic::{Diagnostic, Label};
287    use codespan_reporting::files::SimpleFiles;
288    use codespan_reporting::term::{
289      self,
290      termcolor::{ColorChoice, StandardStream},
291    };
292    let mut files = SimpleFiles::new();
293    let file_id = files.add(name, self.lexer.source());
294    let diagnostic = Diagnostic::error()
295      .with_message(msg)
296      .with_labels(vec![Label::primary(file_id, self.lexer.span())]);
297    let writer = StandardStream::stderr(ColorChoice::Always);
298    let config = term::Config::default();
299    term::emit(&mut writer.lock(), &config, &files, &diagnostic).unwrap();
300  }
301}
302
303pub trait Parse: Sized {
304  fn parse(parser: &mut Parser) -> Result<Self, String>;
305}
306
307impl Parse for String {
308  fn parse(parser: &mut Parser) -> Result<Self, String> {
309    parser.parse_unescaped_string()
310  }
311}
312
313macro_rules! impl_parse_for_number {
314    ($($t:ty),*) => {
315        $(
316            impl Parse for $t {
317                fn parse(
318                    parser: &mut Parser,
319                ) -> Result<Self, String> {
320                    parser
321                        .expect(Token::Number)?
322                        .parse::<Self>()
323                        .map_err(|x| x.to_string())
324                }
325            }
326        )*
327    };
328}
329impl_parse_for_number!(
330  i8, i16, i32, i64, u8, u16, u32, u64, f32, f64, isize, usize
331);
332impl_parse_for_number!(BigUint, BigInt);
333
334impl Parse for bool {
335  fn parse(parser: &mut Parser) -> Result<Self, String> {
336    let s = parser.expect(Token::Keyword)?;
337    match s {
338      "true" => Ok(true),
339      "false" => Ok(false),
340      _ => Err(format!("Invalid boolean: {}", s)),
341    }
342  }
343}
344
345impl Parse for ValueId {
346  fn parse(parser: &mut Parser) -> Result<Self, String> {
347    parser.parse_value()
348  }
349}
350
351impl Parse for Type {
352  fn parse(parser: &mut Parser) -> Result<Self, String> {
353    let s = parser.expect(Token::Keyword)?;
354    Type::from_str(s).map_err(|_| format!("Invalid type: {}", s))
355  }
356}
357
358impl<K: Parse, D: Parse> Parse for (K, D) {
359  fn parse(parser: &mut Parser) -> Result<Self, String> {
360    let k = parser.parse()?;
361    let _ = parser.expect_str(Token::Punct, ":")?;
362    Ok((k, parser.parse()?))
363  }
364}
365
366impl<T: Parse> Parse for Vec<T> {
367  fn parse(parser: &mut Parser) -> Result<Self, String> {
368    parser.parse_list(",")
369  }
370}
371impl<T: Parse, const N: usize> Parse for [T; N] {
372  fn parse(parser: &mut Parser) -> Result<Self, String> {
373    let res = parser.parse_list(",")?;
374    res.try_into().map_err(|_| format!("Expected {} values", N))
375  }
376}
377impl<T: Parse> Parse for Option<T> {
378  fn parse(parser: &mut Parser) -> Result<Self, String> {
379    if parser.peek_fn(|s, _| s == "_") {
380      parser.next()?;
381      Ok(None)
382    } else {
383      Ok(Some(parser.parse()?))
384    }
385  }
386}
387impl<T: Parse> Parse for Box<T> {
388  fn parse(parser: &mut Parser) -> Result<Self, String> {
389    Ok(Box::new(parser.parse()?))
390  }
391}
392impl<T: Parse> Parse for Range<T> {
393  fn parse(parser: &mut Parser) -> Result<Self, String> {
394    let start = parser.parse()?;
395    parser.expect_str(Token::Punct, "..")?;
396    let end = parser.parse()?;
397    Ok(start..end)
398  }
399}
400
401impl<K: Parse + Eq + Hash, D: Parse> Parse for IndexMap<K, D> {
402  fn parse(parser: &mut Parser) -> Result<Self, String> {
403    let kd_pair_vec: Vec<(K, D)> = parser.parse_list(",")?;
404    Ok(kd_pair_vec.into_iter().collect())
405  }
406}
407
408impl Parse for json::object::Object {
409  fn parse(parser: &mut Parser) -> Result<Self, String> {
410    let s = parser.parse_scope()?;
411    let parsed = json::parse(&s).map_err(|e| e.to_string())?;
412    match parsed {
413      json::JsonValue::Object(obj) => Ok(obj),
414      _ => Err(format!("Expected object, but found {}", parsed)),
415    }
416  }
417}