rain_lang/parser/
mod.rs

1/*!
2A simple parser, AST and prettyprinter for a textual representation of `rain` programs
3*/
4use nom::{
5    IResult,
6    multi::{many0, many0_count, separated_list, separated_nonempty_list},
7    sequence::{preceded, terminated, delimited, tuple, separated_pair},
8    combinator::{opt, map, complete, recognize},
9    branch::alt,
10    bytes::complete::{tag, is_not, is_a},
11    bytes::streaming::take_until,
12    character::complete::{not_line_ending, digit1, hex_digit1, oct_digit1},
13    character::streaming::multispace0,
14};
15use num::BigUint;
16
17pub mod ast;
18use ast::{
19    Path, Expr, Pattern, Let, Scope, SimpleAssignment, Parametrized, Lambda, Pi, Phi, Gamma, Sexpr
20};
21use crate::value::primitive::{
22    logical::{LogicalOp, Binary, Unary, Bool},
23    binary::{Natural, BinaryDisplay}
24};
25pub mod symbol_table;
26pub mod builder;
27
28macro_rules! special_chars { () => (" \t\r\n(){}[]|:.=;#,'\"") }
29macro_rules! digits { () => ("0123456789") }
30
31const LET: &'static str = "let";
32const DEFEQ: &'static str = "=";
33const TERM: &'static str = ";";
34
35/// Parse a single line `rain` comment
36pub fn parse_single_comment(input: &str) -> IResult<&str, &str> {
37    preceded(tag("//"), not_line_ending)(input)
38}
39
40/// Parse a multi line `rain` comment
41pub fn parse_multi_comment(input: &str) -> IResult<&str, &str> {
42    preceded(
43        tag("/*"),
44        take_until("*/")
45    )(input)
46}
47
48/// Parse a `rain` comment. TODO: distinguish documentation comments, and don't ignore those
49pub fn parse_comment(input: &str) -> IResult<&str, &str> {
50    alt((
51        parse_single_comment,
52        parse_multi_comment
53    ))(input)
54}
55
56/// Parse whitespace, including ignored comments. Returns the count of comments.
57pub fn whitespace(input: &str) -> IResult<&str, usize> {
58    terminated(
59        many0_count(preceded(multispace0, parse_comment)),
60        multispace0
61    )(input)
62}
63
64/// Parse a `rain` identifier, which is composed of any non-special character.
65/// Does *not* accept whitespace before the identifier, and does *not* consume whitespace after it.
66pub fn parse_ident(input: &str) -> IResult<&str, &str> {
67    recognize(
68        tuple((is_not(concat!(special_chars!(), digits!())), opt(is_not(special_chars!()))))
69    )(input)
70}
71
72/// Parse a path separator
73pub fn path_separator(input: &str) -> IResult<&str, &str> { tag(".")(input) }
74
75/// Parse a `rain` path.
76/// Does *not* accept whitespace before the path and does *not* consume whitespace after it.
77pub fn parse_path(input: &str) -> IResult<&str, Path> {
78    map(
79        separated_nonempty_list(path_separator, parse_ident),
80        |names| { names.into() }
81    )(input)
82}
83
84/// Parse a binary logical operation
85pub fn parse_binary_logical_op(input: &str) -> IResult<&str, Binary> {
86    use Binary::*;
87    preceded(whitespace, alt((
88        map(tag(And.get_string()), |_| And),
89        map(tag(Or.get_string()), |_| Or),
90        map(tag(Xor.get_string()), |_| Xor),
91        map(tag(Nand.get_string()), |_| Nand),
92        map(tag(Nor.get_string()), |_| Nor),
93        map(tag(Eq.get_string()), |_| Eq),
94        map(tag(Implies.get_string()), |_| Implies),
95        map(tag(ImpliedBy.get_string()), |_| ImpliedBy)
96    )))(input)
97}
98
99/// Parse a unary logical operation
100pub fn parse_unary_logical_op(input: &str) -> IResult<&str, Unary> {
101    use Unary::*;
102    preceded(whitespace, alt((
103        map(tag(Id.get_string()), |_| Id),
104        map(tag(Not.get_string()), |_| Not),
105        map(tag(Constant(true).get_string()), |_| Constant(true)),
106        map(tag(Constant(false).get_string()), |_| Constant(false)),
107    )))(input)
108}
109
110/// Parse a logical operation
111pub fn parse_logical_op(input: &str) -> IResult<&str, LogicalOp> {
112    use LogicalOp::*;
113    alt((
114        map(parse_binary_logical_op, Binary),
115        map(parse_unary_logical_op, Unary)
116    ))(input)
117}
118
119/// Parse the Boolean type
120pub fn parse_bool_type(input: &str) -> IResult<&str, Bool> {
121    map(preceded(whitespace, tag("#bool")), |_| Bool)(input)
122}
123
124/// Parse a boolean
125pub fn parse_bool(input: &str) -> IResult<&str, bool> {
126    preceded(whitespace, alt((
127        map(tag("#true"), |_| true),
128        map(tag("#false"), |_| false)
129    )))(input)
130}
131
132/// Parse a natural number
133pub fn parse_natural(input: &str) -> IResult<&str, Natural> {
134    use BinaryDisplay::*;
135    alt((
136        map(
137            preceded(tag("0b"), is_a("01")),
138            |bytes: &str| Natural(BigUint::parse_bytes(bytes.as_bytes(), 2).unwrap(), Bin)
139        ),
140        map(
141            preceded(tag("0o"), oct_digit1),
142            |bytes: &str| Natural(BigUint::parse_bytes(bytes.as_bytes(), 8).unwrap(), Oct)
143        ),
144        map(
145            preceded(tag("0x"), hex_digit1),
146            |bytes: &str| Natural(BigUint::parse_bytes(bytes.as_bytes(), 2).unwrap(), Hex)
147        ),
148        map(
149            digit1,
150            |bytes: &str| Natural(BigUint::parse_bytes(bytes.as_bytes(), 10).unwrap(), Dec)
151        ),
152    ))(input)
153}
154
155/// Parse an atomic `rain` expression
156/// Does *not* accept whitespace before the expression, and does *not* consume whitespace after it.
157pub fn parse_atom(input: &str) -> IResult<&str, Expr> {
158    alt((
159        map(parse_natural, Expr::Natural), // Natural values
160        map(parse_bool, Expr::Bool), // Boolean values
161        map(parse_logical_op, Expr::LogicalOp), // Binary logical operations
162        map(parse_bool_type, Expr::BoolTy), // Parse the boolean type
163        map(parse_phi, Expr::Phi), // Phis
164        map(parse_lambda, Expr::Lambda), // Lambdas
165        map(parse_gamma, Expr::Gamma), // Match statements
166        map(parse_pi, Expr::Pi), // Pi types
167        map(parse_path, Expr::Path), // Paths
168        map(parse_scope, Expr::Scope), // Scopes
169        delimited(tag("("), parse_expr, preceded(whitespace, tag(")")))
170    ))(input)
171}
172
173/// Parse a `rain` expression, which is either an atom or an application of them
174/// Accepts whitespace before the expression
175pub fn parse_expr(input: &str) -> IResult<&str, Expr> {
176    map(
177        tuple((
178            preceded(whitespace, parse_atom),
179            many0(preceded(complete(whitespace), map(parse_atom, Box::new)))
180        )),
181        |(first, mut ops)| {
182            if ops.len() == 0 { first }
183            else { ops.reverse(); ops.push(Box::new(first)); Expr::Sexpr(Sexpr { ops })}
184        }
185    )(input)
186}
187
188/// Parse a type bound
189pub fn parse_type_bound(input: &str) -> IResult<&str, Expr> {
190    preceded(preceded(whitespace, tag(":")), parse_expr)(input)
191}
192
193/// Parse a simple assignment. Accepts whitespace before it
194pub fn parse_simple_assignment(input: &str) -> IResult<&str, SimpleAssignment> {
195    map(
196        tuple((
197            whitespace,
198            parse_ident,
199            opt(parse_type_bound)
200        )),
201        |(_, name, ty)| SimpleAssignment { name, ty }
202    )(input)
203}
204
205/// Parse a pattern for assignment
206/// Accepts whitespace before the pattern
207pub fn parse_pattern(input: &str) -> IResult<&str, Pattern> {
208    map(preceded(whitespace, parse_simple_assignment), Pattern::Simple)(input) //TODO: this
209}
210
211/// Parse an equality separator, optionally preceded by whitespace
212pub fn defeq(input: &str) -> IResult<&str, &str> { preceded(whitespace, tag(DEFEQ))(input) }
213
214/// Parse a terminator, optionally preceded by whitespace
215pub fn terminator(input: &str) -> IResult<&str, &str> { preceded(whitespace, tag(TERM))(input) }
216
217/// Parse a `let`-statement
218pub fn parse_statement(input: &str) -> IResult<&str, Let> {
219    delimited(
220        preceded(whitespace, tag(LET)),
221        map(
222            separated_pair(parse_pattern, defeq, parse_expr),
223            |(pattern, expr)| { Let { pattern, expr } }
224        ),
225        terminator
226    )(input)
227}
228
229/// Parse a scope, consuming all whitespace before it
230pub fn parse_scope(input: &str) -> IResult<&str, Scope> {
231    map(
232        delimited(
233            preceded(whitespace, tag("{")),
234            tuple((
235                many0(parse_statement),
236                opt(parse_expr)
237            )),
238            preceded(whitespace, tag("}"))
239        ),
240        |(definitions, value)| Scope { definitions, value: value.map(Box::new) }
241    )(input)
242}
243
244/// Parse a phi node, consuming all whitespace beforeit
245pub fn parse_phi(input: &str) -> IResult<&str, Phi> {
246    map(
247        preceded(
248            preceded(whitespace, tag("#phi")),
249            parse_scope
250        ),
251        Phi
252    )(input)
253}
254
255/// Parse an optional ident
256pub fn parse_opt_ident(input: &str) -> IResult<&str, Option<&str>> {
257    alt((
258        map(tag("_"), |_| None),
259        map(parse_ident, Some)
260    ))(input)
261}
262
263/// Parse a list of typed identifiers
264pub fn parse_typed_idents(input: &str) -> IResult<&str, Vec<(Option<&str>, Expr)>> {
265    separated_nonempty_list(
266        delimited(whitespace, tag(","), whitespace),
267        tuple((parse_opt_ident, parse_type_bound))
268    )(input)
269}
270
271/// Parse a list of typed arguments
272pub fn parse_typed_args(input: &str) -> IResult<&str, Vec<(Option<&str>, Expr)>> {
273    delimited(
274        preceded(whitespace, tag("|")),
275        parse_typed_idents,
276        preceded(whitespace, tag("|"))
277    )(input)
278}
279
280/// Parse a function type arrow
281pub fn parse_type_arrow(input: &str) -> IResult<&str, Expr> {
282    preceded(
283        delimited(whitespace, tag("=>"), whitespace),
284        parse_atom
285    )(input)
286}
287
288/// Parse a lambda function, consuming all whitespace before it
289pub fn parse_lambda(input: &str) -> IResult<&str, Lambda> {
290    map(
291        preceded(
292            preceded(whitespace, tag("#lambda")),
293            parse_parametrized
294        ),
295        |p| Lambda(p)
296    )(input)
297}
298
299/// Parse a pi type, consuming all whitespace before it
300pub fn parse_pi(input: &str) -> IResult<&str, Pi> {
301    map(
302        preceded(
303            preceded(whitespace, tag("#pi")),
304            parse_parametrized
305        ),
306        |p| Pi(p)
307    )(input)
308}
309
310/// Parse a parametrized expression
311pub fn parse_parametrized(input: &str) -> IResult<&str, Parametrized> {
312    map(
313        tuple((parse_typed_args, opt(parse_type_arrow), parse_expr)),
314        |(args, ret_ty, result)| Parametrized {
315            args, result: Box::new(result), ret_ty: ret_ty.map(|r| Box::new(r))
316        }
317    )(input)
318}
319
320/// Parse a gamma node, consuming all whitespace before it
321pub fn parse_gamma(input: &str) -> IResult<&str, Gamma> {
322    map(
323        preceded(
324            preceded(whitespace, tag("#match")),
325            parse_pattern_matches
326        ),
327        |branches| Gamma { branches }
328    )(input)
329}
330
331/// Parse a set of pattern matches
332pub fn parse_pattern_matches(input: &str) -> IResult<&str, Vec<(Pattern, Expr)>> {
333    delimited(
334        preceded(whitespace, tag("{")),
335        separated_list(
336            preceded(whitespace, tag(",")),
337            parse_pattern_match
338        ),
339        preceded(delimited(whitespace, opt(tag(",")), whitespace), tag("}"))
340    )(input)
341}
342
343/// Parse a pattern match
344pub fn parse_pattern_match(input: &str) -> IResult<&str, (Pattern, Expr)> {
345    separated_pair(parse_pattern, preceded(whitespace, tag("=>")), parse_expr)(input)
346}
347
348#[cfg(test)]
349mod tests {
350    use super::*;
351
352    macro_rules! parse_tester {
353        ($parser:expr, $string:expr, $correct:expr, $tail:expr) => {{
354            let parser = $parser;
355            let string = $string;
356            let correct = $correct;
357            let tail: Option<&str> = $tail;
358            let (rest, parsed) = match parser(string) {
359                Ok(result) => result,
360                Err(err) => {
361                    panic!("Error {:?} parsing input string {:?}", err, string)
362                }
363            };
364            if let Some(correct) = correct { assert_eq!(parsed, correct, "Input parses wrong!"); }
365            if let Some(tail) = tail { assert_eq!(rest, tail, "Invalid tail on input!"); }
366            let displayed = format!("{}", parsed);
367            let (rest, d_parsed) = match parser(&displayed) {
368                Ok(result) => result,
369                Err(err) => {
370                    panic!(
371                        "Error {:?} parsing display string {:?} (input = {:?})",
372                        err, displayed, string
373                    )
374                }
375            };
376            assert_eq!(
377                d_parsed, parsed,
378                "Display output parses to the wrong result! (input = {:?}, displayed = {:?})",
379                string, displayed
380            );
381            assert_eq!(
382                rest, "",
383                "Unparsed display output (input = {:?}, displayed = {:?})!", string, displayed
384            );
385            assert_eq!(displayed, format!("{}", d_parsed));
386        }}
387    }
388
389    macro_rules! assert_parses {
390        ($parser:expr, $string:expr) => { parse_tester!($parser, $string, None, Some("")) }
391    }
392
393    macro_rules! assert_parses_to {
394        ($parser:expr, $string:expr, $correct:expr, $tail:expr) => {
395            parse_tester!($parser, $string, Some($correct), Some($tail))
396        }
397    }
398
399    #[test]
400    fn idents_parse_properly() {
401        assert_parses_to!(parse_ident, "hello world", "hello", " world");
402        assert_parses_to!(parse_ident, "h3110 w0rld", "h3110", " w0rld");
403        assert_parses_to!(parse_ident, "helloworld", "helloworld", "");
404        assert_parses_to!(parse_ident, "hello() world", "hello", "() world");
405        assert!(parse_ident(" helloworld").is_err());
406        assert!(parse_ident(".helloworld").is_err());
407        assert!(parse_ident(".12345").is_err());
408        assert!(parse_ident("").is_err());
409    }
410
411    #[test]
412    fn nested_exprs_dont_merge_improperly() {
413        assert_eq!(
414            &(format!("{:?}", parse_expr("a (b c) (d e)").unwrap())),
415            "(\"\", (a (b c) (d e)))"
416        );
417    }
418
419    #[test]
420    fn paths_parse_properly() {
421        assert_parses_to!(parse_path, "hello world", Path::ident("hello"), " world");
422        assert_parses_to!(parse_path, "hello.world", Path::from(vec!["hello", "world"]), "");
423        assert_parses_to!(parse_path, "hello.", Path::ident("hello"), ".");
424        assert_parses_to!(parse_path, "h3110.w0rld", Path::from(vec!["h3110", "w0rld"]), "");
425        assert!(parse_path(" helloworld").is_err());
426        assert!(parse_path(".helloworld").is_err());
427        assert!(parse_path(".12345").is_err());
428        assert!(parse_path("").is_err());
429    }
430
431    #[test]
432    fn atoms_parse_properly() {
433        assert_parses_to!(parse_atom, "x y", Expr::ident("x"), " y");
434        assert_parses_to!(parse_atom, "x.y", Expr::Path(Path::from(vec!["x", "y"])), "");
435        assert_parses_to!(parse_atom, "(x) y", Expr::ident("x"), " y");
436        assert_parses_to!(parse_atom, "(x.y) z", Expr::Path(Path::from(vec!["x", "y"])), " z");
437    }
438
439    #[test]
440    fn simple_exprs_parse_properly() {
441        assert_parses_to!(parse_expr, "(x y)", Expr::Sexpr(vec![
442            Expr::ident("y").into(), Expr::ident("x").into()
443        ].into()), "");
444        assert_parses_to!(parse_expr, "(x.y)", Expr::Path(Path::from(vec!["x", "y"])), "");
445        assert_parses_to!(parse_expr, "((x) y)", Expr::Sexpr(vec![
446            Expr::ident("y").into(), Expr::ident("x").into()
447        ].into()), "");
448        assert_parses_to!(parse_expr, "((x.y) z)", Expr::Sexpr(vec![
449            Expr::ident("z").into(),
450            Expr::Path(Path::from(vec!["x", "y"])).into()
451        ].into()), "");
452    }
453
454    #[test]
455    fn nested_exprs_parse_properly() {
456        let yz = Box::new(
457            Expr::Sexpr(vec![Expr::ident("z").into(), Expr::ident("y").into()].into())
458        );
459        let xyz = Box::new(Expr::Sexpr(
460            vec![
461            Expr::Sexpr(vec![Expr::ident("z").into(), Expr::ident("y").into()].into()).into(),
462            Expr::ident("x").into()
463            ]
464        .into()));
465        assert_parses_to!(parse_expr, "(x (y z) (x (y z)) ((y z) w))", Expr::Sexpr(vec![
466            Expr::Sexpr(vec![Expr::ident("w").into(), yz.clone()].into()).into(),
467            xyz,
468            yz.clone(),
469            Expr::ident("x").into()
470        ].into()), "")
471    }
472
473    #[test]
474    fn simple_let_statements_parse_properly() {
475        let statements = [
476            "let x = y;",
477            "let x = x.y;",
478            "let hello = (world y) z;",
479            "let z = 43 (54 2);"
480        ];
481        for statement in statements.iter() { assert_parses!(parse_statement, statement) }
482    }
483
484    #[test]
485    fn simple_scopes_parse_properly() {
486        let scopes = [
487            "{}",
488            "{ /* my variable x*/ x }",
489            "{ let x = y; x }",
490            "{ let hello = (world y) z; let x = world hello; hello x }"
491        ];
492        for scope in scopes.iter() { assert_parses!(parse_scope, scope) }
493    }
494
495    #[test]
496    fn lambda_arguments_parse_properly() {
497        let args = [
498            "|x : A|",
499            "|x : A|",
500            "|y : B, z : C|",
501            "|world: F, y: C|"
502        ];
503        for arg in args.iter() {
504            assert!(parse_typed_args(arg).is_ok(), "Failed to parse {}", arg)
505        }
506    }
507
508    #[test]
509    fn type_arrows_parse_properly() {
510        let args = [
511            "=> x",
512        ];
513        for arg in args.iter() {
514            match parse_type_arrow(arg) {
515                Ok(_) => {},
516                Err(err) => panic!("Failed to parse {:?}, got {:?}", arg, err)
517            }
518        }
519    }
520
521    #[test]
522    fn simple_functions_parse_properly() {
523        let fns = [
524            "#lambda |x : A| {}",
525            "#lambda |x : A| x",
526            "#lambda |_ : A| x",
527            "#lambda |x : A| /*some comment*/ x",
528            "#lambda |x : A| { /* my variable x*/ x }",
529            "#lambda |y : B, z : C| { let x = y; x }",
530            "#lambda |world: F, y: C| { let hello = (world y) z; let x = world hello; hello x }",
531            "#lambda |world: F, y: C| => T { let hello = (world y) z; let x = world hello; hello x }"
532        ];
533        for func in fns.iter() { assert_parses!(parse_lambda, func) }
534    }
535
536    #[test]
537    fn phi_nodes_parse_properly() {
538        let phis = [
539            "#phi { let f = #lambda |x : A| { g x }; let g = #lambda |x : A| { f x }; }",
540            "#phi { let x = 6; let y = 43; let z = 341; }"
541        ];
542        for phi in phis.iter() { assert_parses!(parse_phi, phi) }
543    }
544
545    #[test]
546    fn gamma_nodes_parse_properly() {
547        let gammas = [
548            "#match {}",
549            "#match { x => y }"
550        ];
551        for gamma in gammas.iter() { assert_parses!(parse_gamma, gamma) }
552    }
553}