kindelia_lang/
parser.rs

1#![allow(clippy::style)]
2
3use std::str::FromStr;
4use serde::{Deserialize, Serialize};
5
6use crate::ast::{Func, Oper, Rule, Statement, Term};
7use kindelia_common::{crypto, Name, U120};
8use thiserror::Error;
9
10pub type ParseResult<'a, A> = Result<(&'a str, A), ParseErr>;
11
12#[derive(Error, Debug, Clone, Serialize, Deserialize)]
13#[error("Parser error")]
14pub struct ParseErr {
15  pub code: String,
16  pub erro: String,
17}
18
19impl ParseErr {
20  fn new<C, E>(code: C, erro: E) -> Self
21  where
22    C: Into<String>,
23    E: Into<String>,
24  {
25    ParseErr { code: code.into(), erro: erro.into() }
26  }
27}
28
29fn head(code: &str) -> char {
30  return code.chars().take(1).last().unwrap_or('\0');
31}
32
33fn tail(code: &str) -> &str {
34  if code.len() > 0 {
35    return &code[head(code).len_utf8()..];
36  } else {
37    return "";
38  }
39}
40
41fn drop(code: &str, amount: u128) -> &str {
42  let mut code = code;
43  for _ in 0..amount {
44    code = tail(code);
45  }
46  return code;
47}
48
49fn nth(code: &str, index: u128) -> char {
50  return head(drop(code, index));
51}
52
53fn skip(code: &str) -> &str {
54  let mut code = code;
55  loop {
56    if " \n\r\t".contains(head(code)) {
57      while " \n\r\t".contains(head(code)) {
58        code = tail(code);
59      }
60      continue;
61    }
62    if head(code) == '/' && nth(code, 1) == '/' {
63      while head(code) != '\n' && head(code) != '\0' {
64        code = tail(code);
65      }
66      continue;
67    }
68    break;
69  }
70  return code;
71}
72
73// fn hash(name: &str) -> u128 {
74//   let mut hasher = hash_map::DefaultHasher::new();
75//   name.hash(&mut hasher);
76//   hasher.finish() as u128
77// }
78
79fn is_name_char(chr: char) -> bool {
80  return chr == '_'
81    || chr == '.'
82    || chr >= 'a' && chr <= 'z'
83    || chr >= 'A' && chr <= 'Z'
84    || chr >= '0' && chr <= '9';
85}
86
87pub fn parse_char(code: &str, chr: char) -> ParseResult<()> {
88  let code = skip(code);
89  if head(code) == chr {
90    Ok((tail(code), ()))
91  } else {
92    Err(ParseErr {
93      erro: format!("Expected '{}', found '{}'. Context:\n\x1b[2m{}\x1b[0m", chr, head(code), code.chars().take(64).collect::<String>()),
94      code: code.to_string(),
95    })
96  }
97}
98
99pub fn parse_numb<T>(code: &str) -> ParseResult<T>
100where
101  T: TryFrom<u128, Error = String>,
102{
103  let mut code = skip(code);
104  let (code, numb) = {
105    if head(code) == 'x' {
106      code = tail(code);
107      let mut numb = 0;
108      let mut code = code;
109      let mut digits = 0; // this will be used to prevent number overflow panicking
110      loop {
111        if head(code) >= '0' && head(code) <= '9' {
112          numb = numb * 16 + head(code) as u128 - 0x30;
113          code = tail(code);
114        } else if head(code) >= 'a' && head(code) <= 'f' {
115          numb = numb * 16 + head(code) as u128 - 0x61 + 10;
116          code = tail(code);
117        } else if head(code) >= 'A' && head(code) <= 'F' {
118          numb = numb * 16 + head(code) as u128 - 0x41 + 10;
119          code = tail(code);
120        } else {
121          break;
122        }
123        digits += 1;
124        if digits > 30 {
125          return Err(ParseErr::new(code, "Hexadecimal number with more than 30 digits"))
126        }
127      }
128      (code, numb)
129    } else {
130      let mut numb = 0;
131      while head(code) >= '0' && head(code) <= '9' {
132        numb = numb * 10 + head(code) as u128 - 0x30;
133        code = tail(code);
134      }
135      (code, numb)
136    }
137  };
138  let numb: T = numb.try_into().map_err(|err| ParseErr::new(code, err))?;
139  Ok((code, numb))
140}
141
142pub fn parse_name(code: &str) -> ParseResult<Name> {
143  let code = skip(code);
144  let mut name = String::new();
145  if head(code) == '~' {
146    return Ok((tail(code), Name::NONE));
147  } else {
148    let mut code = code;
149    while is_name_char(head(code)) {
150      name.push(head(code));
151      code = tail(code);
152    }
153    if name.is_empty() {
154      return Err(ParseErr {
155        code: code.to_string(),
156        erro: format!("Expected identifier, found `{}`.", head(code)),
157      });
158    }
159    if name == "ask" || name == "dup" || name == "let" {
160      return Err(ParseErr {
161        code: code.to_string(),
162        erro: format!("Use of the keyword {} as a name for a term in `{}`.", name, code)
163      });
164    }
165    if ('0'..='9').contains(&name.chars().nth(0).unwrap_or(' ')) {
166      // In most cases, an user writing 0-9 probably wants to make a number, not a name, so, to
167      // avoid mistakes, we disable this syntax by default. But since names CAN start with 0-9 on
168      // Kindelia, we must create an alternative, explicit way to parse "numeric names".
169      return Err(ParseErr {
170        code: code.to_string(),
171        erro: format!("Number must start with #, but '{}' doesn't.", name),
172      });
173    }
174    let name = Name::from_str(&name);
175    let name =
176      match name {
177        Ok(name) => name,
178        Err(msg) => {
179          return Err(ParseErr {
180            code: code.to_string(),
181            erro: format!("Identifier too long: {}", msg),
182          });
183        }
184      };
185    return Ok((code, name));
186  }
187}
188
189// Like parse_name, but we're sure it's an actual name and not something else
190pub fn parse_strict_name(code: &str) -> ParseResult<Name> {
191  let code = skip(code);
192  let mut name = String::new();
193  let mut code = code;
194  while is_name_char(head(code)) {
195    name.push(head(code));
196    code = tail(code);
197  }
198  if name.is_empty() {
199    return Err(ParseErr {
200      code: code.to_string(),
201      erro: format!("Expected identifier, found `{}`.", head(code)),
202    });
203  }
204  let name = Name::from_str(&name);
205  let name =
206    match name {
207      Ok(name) => name,
208      Err(msg) => {
209        return Err(ParseErr {
210          code: code.to_string(),
211          erro: format!("Identifier too long: {}", msg),
212        });
213      }
214    };
215  return Ok((code, name));
216}
217
218pub fn parse_hex(code: &str) -> ParseResult<Vec<u8>> {
219  let mut data: Vec<u8> = Vec::new();
220  let mut code = skip(code);
221  while nth(code,0).is_ascii_hexdigit() && nth(code,1).is_ascii_hexdigit() {
222    data.append(&mut hex::decode(&String::from_iter([nth(code,0),nth(code,1)])).unwrap());
223    code = drop(code, 2);
224    code = skip(code);
225  }
226  return Ok((code, data));
227}
228
229pub fn parse_until<A>(code: &str, stop: char, read: fn(&str) -> ParseResult<A>) -> ParseResult<Vec<A>> {
230  let mut elems = Vec::new();
231  let mut code = code;
232  while code.len() > 0 && head(skip(code)) != stop {
233    let (new_code, elem) = read(code)?;
234    code = new_code;
235    elems.push(elem);
236  }
237  code = tail(skip(code));
238  return Ok((code, elems));
239}
240
241pub fn parse_term(code: &str) -> ParseResult<Term> {
242  let code = skip(code);
243  match head(code) {
244    // Lambda definition
245    '@' => {
246      let code         = tail(code);
247      let (code, name) = parse_name(code)?;
248      let (code, body) = parse_term(code)?;
249      let term = Term::lam(name, Box::new(body));
250      return Ok((code, term));
251    }
252    // Redex
253    '(' => {
254      let code = skip(tail(code));
255      let (code, oper) = parse_oper(code);
256      // Native number operation
257      if let Some(oper) = oper {
258        let (code, val0) = parse_term(code)?;
259        let (code, val1) = parse_term(code)?;
260        let (code, _unit) = parse_char(code, ')')?;
261        let term = Term::op2(oper, Box::new(val0), Box::new(val1));
262        return Ok((code, term));
263      }
264      // Lambda application
265      else if head(code) == '!' {
266        let code = tail(code);
267        let (code, func) = parse_term(code)?;
268        let (code, argm) = parse_term(code)?;
269        let (code, _unit) = parse_char(code, ')')?;
270        let term = Term::app(Box::new(func), Box::new(argm));
271        return Ok((code, term));
272      }
273      // Function application
274      else {
275        let (code, name) = parse_strict_name(code)?;
276        let (code, args) = parse_until(code, ')', parse_term)?;
277        let term = Term::fun(name, args);
278        return Ok((code, term));
279      }
280    }
281    // Constructor
282    '{' => {
283      let code = tail(code);
284      let (code, name) = parse_strict_name(code)?;
285      let (code, args) = parse_until(code, '}', parse_term)?;
286      let term = Term::ctr(name, args);
287      return Ok((code, term));
288    }
289    // Tuple sugar
290    '[' => {
291      let code = tail(code);
292      let (code, vals) = parse_until(code, ']', parse_term)?;
293      let num_vals = vals.len();
294      if num_vals <= 12 {
295        let name = Name::from_str(&format!("T{}", num_vals)).unwrap();
296        let term = Term::ctr(name, vals);
297        return Ok((code, term));
298      } else {
299        return Err(ParseErr { code: code.to_string(), erro: "Tuple too long".to_string() });
300      }
301    }
302    // Number
303    '#' => {
304      let code = tail(code);
305      let (code, numb) = parse_numb(code)?;
306      let term = Term::num(numb);
307      return Ok((code, term));
308    }
309    // Name to number sugar
310    '\'' => {
311      let code = tail(code);
312      let (code, name) = parse_strict_name(code)?;
313      let (code, _unit) = parse_char(code, '\'')?;
314      let numb = name.into();
315      let term = Term::num(numb);
316      return Ok((code, term));
317    }
318    _ => {
319      if let ('d','u','p',' ') = (nth(code,0), nth(code,1), nth(code,2), nth(code,3)) {
320        let code = drop(code,3);
321        let (code, nam0) = parse_name(code)?;
322        let (code, nam1) = parse_name(code)?;
323        let (code, _unit) = parse_char(code, '=')?;
324        let (code, expr) = parse_term(code)?;
325        let (code, _unit) = parse_char(code, ';')?;
326        let (code, body) = parse_term(code)?;
327        let term = Term::dup(nam0, nam1, Box::new(expr), Box::new(body));
328        return Ok((code, term));
329      // let x = y; z
330      // ------------
331      // (@x z y)
332      } else if let ('l','e','t',' ') = (nth(code,0), nth(code,1), nth(code,2), nth(code,3)) {
333        let code = drop(code,3);
334        let (code, name) = parse_name(code)?;
335        let (code, _unit) = parse_char(code, '=')?;
336        let (code, expr) = parse_term(code)?;
337        let (code, _unit) = parse_char(code, ';')?;
338        let (code, body) = parse_term(code)?;
339        let lam = Term::lam(name, Box::new(body));
340        let app = Term::app(Box::new(lam), Box::new(expr));
341        return Ok((code, app));
342      // ask x = y; z
343      // ------------
344      // (y @x z)
345      } else if let ('a','s','k',' ') = (nth(code,0), nth(code,1), nth(code,2), nth(code,3)) {
346        let code = skip(drop(code,3));
347        if nth(code,0) == '(' {
348          let (code, expr) = parse_term(code)?;
349          let (code, _unit) = parse_char(code, ';')?;
350          let (code, body) = parse_term(code)?;
351          let argm = Term::lam(Name::NONE, Box::new(body));
352          let term = Term::app(Box::new(expr), Box::new(argm));
353          return Ok((code, term));
354        } else {
355          let (code, name) = parse_name(code)?;
356          let (code, _unit) = parse_char(code, '=')?;
357          let (code, expr) = parse_term(code)?;
358          let (code, _unit) = parse_char(code, ';')?;
359          let (code, body) = parse_term(code)?;
360          let argm = Term::lam(name, Box::new(body));
361          let term = Term::app(Box::new(expr), Box::new(argm));
362          return Ok((code, term));
363        }
364      } else {
365        let (code, name) = parse_name(code)?;
366        let term = Term::var(name);
367        return Ok((code, term));
368      }
369    }
370  }
371}
372
373pub fn parse_oper(in_code: &str) -> (&str, Option<Oper>) {
374  let code = skip(in_code);
375  match head(code) {
376    // Should not match with `~`
377    '+' => (tail(code), Some(Oper::Add)),
378    '-' => (tail(code), Some(Oper::Sub)),
379    '*' => (tail(code), Some(Oper::Mul)),
380    '/' => (tail(code), Some(Oper::Div)),
381    '%' => (tail(code), Some(Oper::Mod)),
382    '&' => (tail(code), Some(Oper::And)),
383    '|' => (tail(code), Some(Oper::Or)),
384    '^' => (tail(code), Some(Oper::Xor)),
385    '<' => match head(tail(code)) {
386      '=' => (tail(tail(code)), Some(Oper::Lte)),
387      '<' => (tail(tail(code)), Some(Oper::Shl)),
388      _   => (tail(code), Some(Oper::Ltn)),
389    },
390    '>' => match head(tail(code)) {
391      '=' => (tail(tail(code)), Some(Oper::Gte)),
392      '>' => (tail(tail(code)), Some(Oper::Shr)),
393      _   => (tail(code), Some(Oper::Gtn)),
394    },
395    '=' => match head(tail(code)) {
396      '=' => (tail(tail(code)), Some(Oper::Eql)),
397      _   => (code, None),
398    },
399    '!' => match head(tail(code)) {
400      '=' => (tail(tail(code)), Some(Oper::Neq)),
401      _   => (code, None),
402    },
403    _ => (code, None),
404  }
405}
406
407pub fn parse_rule(code: &str) -> ParseResult<Rule> {
408  // TODO: custom parser for lhs
409  let (code, lhs) = parse_term(code)?;
410  let (code, ())  = parse_char(code, '=')?;
411  let (code, rhs) = parse_term(code)?;
412  return Ok((code, Rule { lhs, rhs }));
413}
414
415pub fn parse_rules(code: &str) -> ParseResult<Vec<Rule>> {
416  let (code, rules) = parse_until(code, '\0', parse_rule)?;
417  return Ok((code, rules));
418}
419
420pub fn parse_sign(code: &str) -> ParseResult<Option<crypto::Signature>> {
421  let code = skip(code);
422  if let ('s','i','g','n') = (nth(code,0), nth(code,1), nth(code,2), nth(code,3)) {
423    let code = drop(code,4);
424    let (code, _unit) = parse_char(code, '{')?;
425    let (code, sign) = parse_hex(code)?;
426    let (code, _unit) = parse_char(code, '}')?;
427    if sign.len() == 65 {
428      return Ok((code, Some(crypto::Signature(sign.as_slice().try_into().unwrap())))); // TODO: remove unwrap
429    } else {
430      return Err(ParseErr { 
431        code: code.to_string(), 
432        erro: "Wrong signature size".to_string()
433      });
434    }
435  }
436  return Ok((code, None));
437}
438
439pub fn parse_statement(code: &str) -> ParseResult<Statement> {
440  let code = skip(code);
441  match (nth(code,0), nth(code,1), nth(code,2)) {
442    ('f','u','n') => {
443      let code = drop(code,3);
444      let (code, _unit) = parse_char(code, '(')?;
445      let (code, name) = parse_strict_name(code)?;
446      let (code, args) = parse_until(code, ')', parse_name)?;
447      let (code, _unit) = parse_char(code, '{')?;
448      let (code, ruls) = parse_until(code, '}', parse_rule)?;
449      let code = skip(code);
450      let (code, init) = if let ('w','i','t','h') = (nth(code,0), nth(code,1), nth(code,2), nth(code,3)) {
451        let code = drop(code,4);
452        let (code, _unit) = parse_char(code, '{')?;
453        let (code, init) = parse_term(code)?;
454        let (code, _unit) = parse_char(code, '}')?;
455        (code, Some(init))
456      } else {
457        (code, None)
458      };
459      let (code, sign) = parse_sign(code)?;
460      let func = Func { rules: ruls };
461      return Ok((code, Statement::Fun { name, args, func, init, sign }));
462    }
463    ('c','t','r') => {
464      let code = drop(code,3);
465      let (code, _unit) = parse_char(code, '{')?;
466      let (code, name) = parse_strict_name(code)?;
467      let (code, args) = parse_until(code, '}', parse_name)?;
468      let (code, sign) = parse_sign(code)?;
469      return Ok((code, Statement::Ctr { name, args, sign }));
470    }
471    ('r','u','n') => {
472      let code = drop(code,3);
473      let (code, _unit) = parse_char(code, '{')?;
474      let (code, expr) = parse_term(code)?;
475      let (code, _unit) = parse_char(code, '}')?;
476      let (code, sign) = parse_sign(code)?;
477      return Ok((code, Statement::Run { expr, sign  }));
478    }
479    // reg Foo.Bar { #x123456 } sign { signature }
480    ('r','e','g') => {
481      let code = skip(drop(code, 3));
482      let (code, name) =
483        if nth(code, 0) == '{' {
484          (code, Name::EMPTY)
485        } else {
486          parse_strict_name(code)?
487        };
488      let (code, _unit) = parse_char(code, '{')?;
489      let code = skip(code);
490      let (code, ownr) = match head(code) {
491        '#' => {
492          let code = tail(code);
493          parse_numb(code)?
494        }
495        '\'' => {
496          let code = tail(code);
497          let (code, name) = parse_strict_name(code)?;
498          let (code, _unit) = parse_char(code, '\'')?;
499          let numb: U120 = name.into();
500          (code, numb)
501        },
502        _ => return Err(ParseErr::new(code, "Expected a number representation"))
503      };
504      let (code, _unit) = parse_char(code, '}')?;
505      let (code, sign) = parse_sign(code)?;
506      return Ok((code, Statement::Reg { name, ownr, sign }));
507    }
508    _ => {
509      return Err(ParseErr { code: code.to_string(),  erro: "Expected statement.".to_string() });
510    }
511  }
512}
513
514pub fn parse_statements(code: &str) -> ParseResult<Vec<Statement>> {
515  parse_until(code, '\0', parse_statement)
516}
517
518pub fn parse_code(code: &str) -> Result<Vec<Statement>, String> {
519  let statements = parse_statements(code);
520  match statements {
521    Ok((code, statements)) => {
522      if code.is_empty() {
523        Ok(statements)
524      } else {
525        Err(format!("Your code was not parsed entirely: {}", code))
526      }
527    }
528    Err(ParseErr { erro, .. }) => Err(erro),
529  }
530}