heron_rebuild_syntax/
parse.rs

1use anyhow::Result;
2
3#[derive(Debug, thiserror::Error)]
4#[error("ParseError on line '{line}': {msg}")]
5pub struct Error {
6    msg: String,
7    pos: usize,
8    line: String,
9}
10
11pub fn parse(text: &str) -> Result<Vec<crate::ast::Item<'_>>> {
12    use combine::EasyParser;
13    tapefile::items()
14        .easy_parse(text)
15        .map(|(items, _remainder)| {
16            // TODO do something w/ the remainder.
17            items
18        })
19        .map_err(|e| {
20            let pos = e.position.translate_position(text);
21            // isolate the line in question:
22            let before = &text[0..pos];
23            let after = &text[pos..text.len()];
24            let prefix: String = before.chars().rev().take_while(|&c| c != '\n').collect();
25            let prefix: String = prefix.chars().rev().collect();
26            let suffix: String = after.chars().take_while(|&c| c != '\n').collect();
27            let line = prefix + &suffix;
28            // since converting combine's errors is a lifetime nightmare,
29            // we just stringify the error before returning it.
30            Error {
31                pos,
32                line,
33                msg: format!("{}", e),
34            }
35            .into()
36        })
37}
38
39pub mod prelude {
40    pub use combine::parser::char::{char, string};
41    pub use combine::parser::range::recognize;
42    pub use combine::*;
43    // pub use crate::macros::{p, repeater, wrapper};
44}
45
46pub mod util {
47
48    use super::prelude::*;
49    use combine::parser::char::{alpha_num, letter, space};
50    // use combine::parser::sequence::skip;
51
52    p! {
53        ident_start() -> char, {
54            char('_').or(letter())
55        }
56    }
57
58    p! {
59        ident_rest() -> Vec<char>, {
60            many(char('_').or(alpha_num()))
61        }
62    }
63
64    p! {
65        branch_ident_parts() -> Vec<char>, {
66            many1(char('_').or(alpha_num()))
67        }
68    }
69
70    // unlike other idents, branch idents can start w/ a number.
71    p! {
72        branch_ident() -> &'a str, {
73            recognize(branch_ident_parts())
74        }
75    }
76
77    // TODO should idents be limited to ascii?
78    p! {
79        ident() -> &'a str, {
80            recognize(ident_start().and(ident_rest()))
81        }
82    }
83
84    p! {
85        comment() -> &'a str, {
86            recognize(
87                char('#')
88                .and(skip_many(none_of("\n".chars())))
89                .and(char('\n'))
90            )
91        }
92    }
93
94    p! {
95        whitespace() -> (), {
96            skip_many1(
97                space().map(|_| ()).or(comment().map(|_| ()))
98            )
99        }
100    }
101
102    wrapper! {
103        lex(parser), {
104            optional(whitespace()).with(parser).skip(optional(whitespace()))
105        }
106    }
107
108    p! {
109        line_internal_whitespace() -> (), {
110            skip_many1(satisfy(|c: char| c.is_whitespace() && c != '\n'))
111        }
112    }
113
114    wrapper! {
115        lex_inline(parser), {
116            optional(line_internal_whitespace())
117                .with(parser)
118                .skip(optional(line_internal_whitespace()))
119        }
120    }
121
122    // parser, followed by *mandatory* whitespace
123    wrapper! {
124        lex_word(parser), {
125            optional(whitespace()).with(parser).skip(whitespace())
126        }
127    }
128
129    // parser, followed by *mandatory* line-internal whitespace
130    wrapper! {
131        lex_word_inline(parser), {
132            optional(line_internal_whitespace()).with(parser).skip(line_internal_whitespace())
133        }
134    }
135
136    wrapper! {
137        parens(parser), {
138            char('(').with(parser).skip(char(')'))
139        }
140    }
141
142    wrapper! {
143        braces(parser), {
144            char('{').with(parser).skip(char('}'))
145        }
146    }
147
148    wrapper! {
149        brackets(parser), {
150            char('[').with(parser).skip(char(']'))
151        }
152    }
153
154    p! {
155        eol() -> (), {
156            eof().or(char('\n').and(optional(whitespace())).map(|_| ()))
157        }
158    }
159
160    wrapper! {
161        line(parser), {
162            lex_inline(parser).skip(eol())
163        }
164    }
165
166    repeater! {
167        comma_delim(parser), {
168            sep_by1(lex(parser), char(','))
169        }
170    }
171
172    #[cfg(test)]
173    mod test {
174        use anyhow::Result;
175        use combine::parser::char::char;
176        use combine::EasyParser;
177        #[test]
178        fn test_ident() -> Result<()> {
179            assert_eq!("my_name", super::ident().easy_parse("my_name").unwrap().0);
180            assert_eq!(
181                "_start_under123",
182                super::ident().easy_parse("_start_under123").unwrap().0
183            );
184            assert!(super::ident().easy_parse("1name").is_err());
185            Ok(())
186        }
187        #[test]
188        fn test_whitespace() -> Result<()> {
189            assert_eq!(
190                ((), "and more"),
191                super::whitespace().easy_parse(" and more").unwrap()
192            );
193            assert_eq!(
194                ((), "and text"),
195                super::whitespace().easy_parse(" \n    and text").unwrap()
196            );
197            assert!(super::whitespace().easy_parse("x").is_err());
198            Ok(())
199        }
200        #[test]
201        fn test_lex() -> Result<()> {
202            assert_eq!(
203                'x',
204                super::lex(combine::parser::char::char('x')).easy_parse("  x  ").unwrap().0
205            );
206            assert_eq!(
207                'x',
208                super::lex(combine::parser::char::char('x')).easy_parse(" \n x \n ").unwrap().0
209            );
210            Ok(())
211        }
212        #[test]
213        fn test_lex_inline() -> Result<()> {
214            assert_eq!(
215                'x',
216                super::lex_inline(char('x')).easy_parse("  x  ").unwrap().0
217            );
218            assert!(super::lex_inline(char('x')).easy_parse("\nx").is_err());
219            assert_eq!(
220                ('x', "\n"),
221                super::lex_inline(char('x')).easy_parse("x\n").unwrap()
222            );
223            Ok(())
224        }
225        #[test]
226        fn test_eol() -> Result<()> {
227            assert_eq!(
228                ((), "other stuff"),
229                super::eol().easy_parse("\n  \n   other stuff").unwrap()
230            );
231            assert_eq!(((), ""), super::eol().easy_parse("").unwrap());
232            Ok(())
233        }
234        #[test]
235        fn test_line() -> Result<()> {
236            assert_eq!('x', super::line(char('x')).easy_parse(" x").unwrap().0);
237            assert_eq!('x', super::line(char('x')).easy_parse(" x\n").unwrap().0);
238            Ok(())
239        }
240    }
241}
242
243mod literal {
244
245    use super::prelude::*;
246
247    const FORBID_UNQUOTED: [char; 11] = ['(', ')', '[', ']', '*', '@', '$', '+', '#', '"', '\''];
248
249    wrapper! {
250        double_quotes(parser), {
251            char('"').with(parser).skip(char('"'))
252        }
253    }
254
255    p! {
256        double_quoted_literal() -> &'a str, {
257            double_quotes(recognize(skip_many(none_of("\"".chars()))))
258        }
259    }
260
261    p! {
262        unquoted_literal_char() -> char, {
263            satisfy(|c: char|
264                !c.is_whitespace() && !FORBID_UNQUOTED.iter().any(|&forbidden| forbidden == c)
265            )
266        }
267    }
268
269    p! {
270        unquoted_literal() -> &'a str, {
271            recognize(skip_many1(unquoted_literal_char()))
272        }
273    }
274
275    p! {
276        literal() -> &'a str, {
277            double_quoted_literal().or(unquoted_literal())
278        }
279    }
280
281    p! {
282        interp_literal() -> (&'a str, Vec<&'a str>), {
283            super::interp::double_quoted_interp_string()
284                .or(unquoted_literal().map(|s| (s, Vec::with_capacity(0))))
285        }
286    }
287
288    #[cfg(test)]
289    mod test {
290        use anyhow::Result;
291        use combine::EasyParser;
292        #[test]
293        fn test_literal() -> Result<()> {
294            assert_eq!(
295                "just_ident",
296                super::literal().easy_parse("just_ident").unwrap().0
297            );
298            assert_eq!(
299                "quoted text",
300                super::literal().easy_parse("\"quoted text\"").unwrap().0
301            );
302            assert_eq!(
303                "not greedy",
304                super::literal().easy_parse("\"not greedy\" won't parse this").unwrap().0
305            );
306            assert_eq!(
307                "filenames.are.ok",
308                super::literal().easy_parse("filenames.are.ok").unwrap().0
309            );
310            Ok(())
311        }
312    }
313}
314
315mod interp {
316    use super::prelude::*;
317    use super::rhs::variable;
318    use combine::parser::range::recognize_with_value;
319
320    p! {
321        interp_variable() -> (&'a str, Vec<&'a str>), {
322            variable().map(|var| (var, vec![var]))
323        }
324    }
325
326    p! {
327        interp_content() -> (&'a str, Vec<&'a str>), {
328            recognize_with_value(
329                skip_many(none_of("$\"\\".chars()))
330                    .with(optional(variable().and(interp_content())))
331            ).map(|(full_text, parsed_suffix)| {
332                if let Some((var, (_, mut rest_vars))) = parsed_suffix {
333                    rest_vars.push(var);
334                    (full_text, rest_vars)
335                } else {
336                    (full_text, Vec::with_capacity(0))
337                }
338            })
339        }
340    }
341
342    p! {
343        double_quoted_interp_string() -> (&'a str, Vec<&'a str>), {
344            super::literal::double_quotes(interp_content())
345        }
346    }
347}
348
349mod graft {
350
351    use super::prelude::*;
352    use super::util::{brackets, branch_ident, comma_delim, ident, lex_inline};
353
354    p! {
355        branch_element() -> (&'a str, &'a str), {
356            ident().skip(char(':')).and(lex_inline(branch_ident()))
357        }
358    }
359
360    p! {
361        branch_graft() -> Vec<(&'a str, &'a str)>, {
362            brackets(comma_delim(branch_element()))
363        }
364    }
365
366    #[cfg(test)]
367    mod test {
368        use anyhow::Result;
369        use combine::EasyParser;
370        #[test]
371        fn test_branch_graft() -> Result<()> {
372            assert_eq!(
373                vec![("Branchpoint1", "val1"), ("Branchpoint2", "val2")],
374                super::branch_graft()
375                    .easy_parse("[Branchpoint1: val1, Branchpoint2: val2]")
376                    .unwrap()
377                    .0
378            );
379            // make sure newlines work:
380            assert_eq!(
381                vec![("Bp1", "val1"), ("Bp2", "val2"), ("Bp3", "val3")],
382                super::branch_graft()
383                    .easy_parse("[\n\tBp1: val1,\n\tBp2: val2 ,\nBp3: val3\n]")
384                    .unwrap()
385                    .0
386            );
387            Ok(())
388        }
389    }
390}
391
392mod rhs {
393
394    use super::graft::branch_graft;
395    use super::literal::{interp_literal, literal};
396    use super::prelude::*;
397    use super::util::{branch_ident, ident, lex_inline, parens, whitespace};
398    use crate::ast::Rhs;
399
400    p! {
401        shorthand_variable() -> char, {
402            // skip_count(1, char('@'))
403            char('@')
404        }
405    }
406
407    p! {
408        variable() -> &'a str, {
409            char('$').with(ident())
410        }
411    }
412
413    p! {
414        task_output() -> (&'a str, &'a str), {
415            variable().and(char('@').with(ident()))
416        }
417    }
418
419    p! {
420        shorthand_task_output() -> &'a str, {
421            char('@').with(ident())
422        }
423    }
424
425    p! {
426        grafted_variable() -> (&'a str, Vec<(&'a str, &'a str)>), {
427            variable().and(branch_graft())
428        }
429    }
430
431    p! {
432        grafted_task_output() -> ((&'a str, &'a str), Vec<(&'a str, &'a str)>), {
433            task_output().and(branch_graft())
434        }
435    }
436
437    p! {
438        shorthand_grafted_task_output() -> (&'a str, Vec<(&'a str, &'a str)>), {
439            shorthand_task_output().and(branch_graft())
440        }
441    }
442
443    p! {
444        branchpoint_assignment() -> (&'a str, Rhs<'a>), {
445            branch_ident().and(
446                choice!(
447                    attempt(lex_inline(char('=')).with(rhs())),
448                    produce(|| Rhs::Unbound)
449                )
450            )
451        }
452    }
453
454    p! {
455        branchpoint_assignments() -> Vec<(&'a str, Rhs<'a>)>, {
456            // TODO this is prob a dumb way to do this, but cdn't think of
457            // anything else - we try to sep_by1 a whitespace-separated list
458            // of branch assignments, and if that fails, we call sep_end_by1
459            // to catch the trailing whitespace.
460            attempt(
461                sep_by1(branchpoint_assignment(), whitespace())
462            ).or(
463                sep_end_by1(branchpoint_assignment(), whitespace())
464            )
465            // many1(branchpoint_assignment())
466        }
467    }
468
469    p! {
470        branchpoint_prefix() -> &'a str, {
471            ident().skip(lex_inline(char(':')))
472        }
473    }
474
475    p! {
476        branchpoint() -> (&'a str, Vec<(&'a str, Rhs<'a>)>), {
477            parens(
478                optional(whitespace())
479                    .with(branchpoint_prefix())
480                    .skip(optional(whitespace()))
481                    .and(branchpoint_assignments())
482                    .skip(optional(whitespace()))
483            )
484        }
485    }
486
487    p! {
488        rhs() -> Rhs<'a>, {
489            choice!(
490                branchpoint().map(|(branchpoint, vals)| Rhs::Branchpoint { branchpoint, vals }),
491                attempt(
492                    shorthand_grafted_task_output()
493                        .map(|(task, branch)| Rhs::ShorthandGraftedTaskOutput { task, branch })
494                ),
495                attempt(
496                    shorthand_task_output()
497                        .map(|task| Rhs::ShorthandTaskOutput { task })
498                ),
499                shorthand_variable().map(|_| Rhs::ShorthandVariable),
500                attempt(
501                    grafted_variable()
502                        .map(|(name, branch)| Rhs::GraftedVariable { name, branch })
503                ),
504                attempt(
505                    grafted_task_output()
506                        .map(|((output, task), branch)| Rhs::GraftedTaskOutput{output, task, branch}),
507                ),
508                attempt(
509                    task_output()
510                        .map(|(output, task)| Rhs::TaskOutput {output, task})
511                ),
512                attempt(
513                    interp_literal().map(|(text, vars)| {
514                        if vars.is_empty() {
515                            Rhs::Literal { val: text }
516                        } else {
517                            Rhs::Interp { text, vars }
518                        }
519                    })
520                ),
521                variable().map(|name| Rhs::Variable { name }),
522                // nb: with interp_literal enabled, this will never execute:
523                literal().map(|val| Rhs::Literal { val })
524            )
525        }
526    }
527
528    #[cfg(test)]
529    mod test {
530        use crate::ast::Rhs;
531        use anyhow::Result;
532        use combine::EasyParser;
533        #[test]
534        fn test_literal() -> Result<()> {
535            assert_eq!(Rhs::literal("hi"), super::rhs().easy_parse("hi").unwrap().0);
536            assert_eq!(
537                Rhs::literal("hi"),
538                super::rhs().easy_parse("\"hi\"").unwrap().0
539            );
540            Ok(())
541        }
542        #[test]
543        fn test_variable() -> Result<()> {
544            assert_eq!(
545                Rhs::ShorthandVariable,
546                super::rhs().easy_parse("@").unwrap().0
547            );
548            assert_eq!(
549                Rhs::variable("var"),
550                super::rhs().easy_parse("$var").unwrap().0
551            );
552            assert_eq!(
553                Rhs::grafted_variable("var", vec![("Bp1", "val1")]),
554                super::rhs().easy_parse("$var[Bp1: val1]").unwrap().0,
555            );
556            Ok(())
557        }
558        #[test]
559        fn test_task_output() -> Result<()> {
560            assert_eq!(
561                Rhs::shorthand_grafted_task_output("task", vec![("Bp1", "val1")]),
562                super::rhs().easy_parse("@task[Bp1:val1]").unwrap().0
563            );
564            assert_eq!(
565                Rhs::shorthand_task_output("task"),
566                super::rhs().easy_parse("@task").unwrap().0
567            );
568            assert_eq!(
569                Rhs::task_output("output", "task"),
570                super::rhs().easy_parse("$output@task").unwrap().0
571            );
572            assert_eq!(
573                Rhs::grafted_task_output("output", "task", vec![("Bp1", "val1")]),
574                super::rhs().easy_parse("$output@task[Bp1: val1]").unwrap().0
575            );
576            Ok(())
577        }
578        #[test]
579        fn test_branchpoint() -> Result<()> {
580            assert_eq!(
581                ("val1", Rhs::literal("yes")),
582                super::branchpoint_assignment().easy_parse("val1=yes").unwrap().0,
583            );
584            assert_eq!(
585                Rhs::branchpoint(
586                    "Bp1",
587                    vec![("val1", Rhs::literal("yes")), ("val2", Rhs::literal("no"))],
588                ),
589                super::rhs().easy_parse("(Bp1: val1=yes val2=no)").unwrap().0
590            );
591            // make sure we can deal with multiline branchpoint assignments:
592            assert_eq!(
593                Rhs::branchpoint(
594                    "Bp1",
595                    vec![("val1", Rhs::literal("yes")), ("val2", Rhs::literal("no"))],
596                ),
597                super::rhs().easy_parse("(\nBp1:\n  val1=yes\n  val2=no\n)").unwrap().0
598            );
599            assert_eq!(
600                Rhs::branchpoint("Bp1", vec![("a", Rhs::Unbound), ("b", Rhs::Unbound)],),
601                super::rhs().easy_parse("(Bp1: a b)").unwrap().0
602            );
603            assert_eq!(
604                Rhs::branchpoint("Bp1", vec![("a", Rhs::Unbound), ("b", Rhs::Unbound)],),
605                super::rhs().easy_parse("(Bp1:\n a b)").unwrap().0
606            );
607            // ending assignments w/ whitespace:
608            assert_eq!(
609                Rhs::branchpoint("Bp1", vec![("a", Rhs::Unbound), ("b", Rhs::Unbound)],),
610                super::rhs().easy_parse("(Bp1: a b )").unwrap().0
611            );
612            Ok(())
613        }
614    }
615}
616
617mod assignment {
618
619    use super::prelude::*;
620    use super::rhs::rhs;
621    use super::util::{ident, lex_inline, line_internal_whitespace};
622    use crate::ast::Rhs;
623
624    p! {
625        assignment() -> (&'a str, Rhs<'a>), {
626            ident().and(
627                choice!(
628                    attempt(lex_inline(char('=')).with(rhs())),
629                    optional(line_internal_whitespace()).map(|_| Rhs::Unbound)
630                )
631            )
632        }
633    }
634
635    p! {
636        dot_assignment() -> (&'a str, Rhs<'a>), {
637            char('.').with(ident()).and(
638                choice!(
639                    attempt(lex_inline(char('=')).with(rhs())),
640                    line_internal_whitespace().map(|()| Rhs::Unbound)
641                )
642            )
643        }
644    }
645
646    #[cfg(test)]
647    mod test {
648        use crate::ast::Rhs;
649        use anyhow::Result;
650        use combine::EasyParser;
651        #[test]
652        fn test_unbound() -> Result<()> {
653            assert_eq!(
654                ("var", Rhs::Unbound),
655                super::assignment().easy_parse("var  ").unwrap().0
656            );
657            Ok(())
658        }
659        #[test]
660        fn test_regular_bound_assignment() -> Result<()> {
661            assert_eq!(
662                ("var", Rhs::literal("value")),
663                super::assignment().easy_parse("var=value").unwrap().0
664            );
665            Ok(())
666        }
667        #[test]
668        fn test_dot_assignment() -> Result<()> {
669            assert_eq!(
670                ("param", Rhs::literal("value")),
671                super::dot_assignment().easy_parse(".param=value").unwrap().0
672            );
673            Ok(())
674        }
675        #[test]
676        fn test_branched() -> Result<()> {
677            assert_eq!(
678                (
679                    "var",
680                    Rhs::branchpoint(
681                        "Branchpt",
682                        vec![("a1", Rhs::literal("a")), ("b2", Rhs::literal("b"))]
683                    )
684                ),
685                super::assignment().easy_parse("var=(Branchpt: a1=a b2=b)").unwrap().0
686            );
687            Ok(())
688        }
689        #[test]
690        fn test_branched_shorthand() -> Result<()> {
691            assert_eq!(
692                (
693                    "var",
694                    Rhs::branchpoint("Branchpt", vec![("a", Rhs::Unbound), ("b", Rhs::Unbound)])
695                ),
696                super::assignment().easy_parse("var=(Branchpt: a b)").unwrap().0
697            );
698            Ok(())
699        }
700        // // in DT, I think a grafted glob produces a space-separated list,
701        // // but presumably it only works for a single branchpoint.
702        // #[test]
703        // fn test_graft_shorthand_glob() -> Result<()> {
704        //     assert_eq!(
705        //         (
706        //             "dataset_json",
707        //             Rhs::ShorthandGraftedTaskOutput {
708        //                 task: "DumpHFDataset",
709        //                 branch: vec![("Dataset", "*")],
710        //             }
711        //         ),
712        //         super::assignment()
713        //             .easy_parse("dataset_json=@DumpHFDataset[Dataset:*]")
714        //             .unwrap()
715        //             .0
716        //     );
717        //     Ok(())
718        // }
719    }
720}
721
722mod spec {
723
724    use super::assignment::{assignment, dot_assignment};
725    use super::prelude::*;
726    use super::util::{ident, lex, lex_inline};
727    use crate::ast::BlockSpec;
728
729    p! {
730        input_chunk() -> Vec<BlockSpec<'a>>, {
731            lex_inline(char('<')).with(many(
732                lex_inline(assignment()).map(|(lhs, rhs)| BlockSpec::Input{lhs, rhs})
733            ))
734        }
735    }
736
737    p! {
738        output_chunk() -> Vec<BlockSpec<'a>>, {
739            lex_inline(char('>')).with(many(
740                lex_inline(assignment()).map(|(lhs, rhs)| BlockSpec::Output{lhs, rhs})
741            ))
742        }
743    }
744
745    p! {
746        param_assignment() -> BlockSpec<'a>, {
747            // special case since params can start with '.':
748            choice! (
749                assignment().map(|(lhs, rhs)| BlockSpec::Param{lhs, rhs, dot: false}),
750                dot_assignment().map(|(lhs, rhs)| BlockSpec::Param{lhs, rhs, dot: true})
751            )
752        }
753    }
754
755    p! {
756        param_chunk() -> Vec<BlockSpec<'a>>, {
757            lex_inline(string("::"))
758                .with(many(lex_inline(param_assignment())))
759        }
760    }
761
762    // p! {
763    //     package_chunk() -> Vec<BlockSpec<'a>>, {
764    //         lex_inline(char(':')).with(many(
765    //             lex_inline(ident()).map(|name| BlockSpec::Package{name})
766    //         ))
767    //     }
768    // }
769
770    p! {
771        module_chunk() -> Vec<BlockSpec<'a>>, {
772            lex_inline(
773                char('@').with(ident())
774            ).map(|name| {
775                vec![BlockSpec::Module { name }]
776            })
777        }
778    }
779
780    p! {
781        spec_chunk() -> Vec<BlockSpec<'a>>, {
782            choice!(
783                attempt(input_chunk()),
784                attempt(output_chunk()),
785                attempt(param_chunk()),
786                module_chunk()
787                // package_chunk()
788            )
789        }
790    }
791
792    p! {
793        specs() -> Vec<BlockSpec<'a>>, {
794            many(lex(spec_chunk()))
795                .map(|mut vecs: Vec<Vec<BlockSpec<'a>>>| {
796                    // TODO there's gotta be a better way, but combine is confusin.
797                    let mut flattened = Vec::new();
798                    for vec in &mut vecs {
799                        flattened.append(vec);
800                    }
801                    flattened
802                })
803        }
804    }
805
806    #[cfg(test)]
807    mod test {
808        use crate::ast::{BlockSpec, Rhs};
809        use anyhow::Result;
810        use combine::EasyParser;
811        #[test]
812        fn test_specs() -> Result<()> {
813            assert_eq!(
814                vec![
815                    BlockSpec::output("output", Rhs::literal("filename.tgz")),
816                    BlockSpec::input("input1", Rhs::task_output("output", "task")),
817                    // BlockSpec::package("package_name"),
818                    BlockSpec::param("param1", Rhs::variable("var")),
819                    BlockSpec::dot_param("param2", Rhs::literal("value")),
820                ],
821                super::specs().easy_parse(
822                    "> output=filename.tgz < input1=$output@task \n:: param1=$var .param2=value"
823                ).unwrap().0
824            );
825            Ok(())
826        }
827        #[test]
828        fn test_params() -> Result<()> {
829            assert_eq!(
830                vec![BlockSpec::param("param1", Rhs::Unbound)],
831                super::param_chunk().easy_parse(":: param1").unwrap().0
832            );
833            assert_eq!(
834                vec![BlockSpec::param("param1", Rhs::Unbound)],
835                super::spec_chunk().easy_parse(":: param1").unwrap().0
836            );
837            Ok(())
838        }
839    }
840}
841
842mod tasklike {
843    use super::prelude::*;
844    use super::spec::specs;
845    use super::util::{braces, ident, lex_inline};
846    use crate::ast::{BlockType, TasklikeBlock};
847    use crate::bash::bash_code;
848
849    p! {
850        block_name(keyword: &'static str) -> &'a str, {
851            lex_inline(string(keyword)).with(ident())
852        }
853    }
854
855    p! {
856        tasklike_block(keyword: &'static str, subtype: BlockType) -> TasklikeBlock<'a>, {
857            block_name(keyword)
858                .and(specs())
859                .and(braces(bash_code()))
860                .map(|((name, specs), code)| {
861                    TasklikeBlock {
862                        name,
863                        subtype: *subtype,
864                        specs,
865                        code,
866                    }
867                })
868
869        }
870    }
871
872    p! {
873        task() -> TasklikeBlock<'a>, {
874            tasklike_block("task", BlockType::Task)
875        }
876    }
877
878    // p! {
879    //     package() -> TasklikeBlock<'a>, {
880    //         tasklike_block("package", BlockType::Package)
881    //     }
882    // }
883
884    #[cfg(test)]
885    mod test {
886        use anyhow::Result;
887        use combine::EasyParser;
888        // use crate::HashSet;
889        // use crate::ast::{TasklikeBlock, BlockSpec, BlockType, BashCode};
890        #[test]
891        fn test_task() -> Result<()> {
892            assert_eq!(
893                "task_name",
894                super::block_name("task").easy_parse("task task_name").unwrap().0
895            );
896            // assert_eq!(
897            //     TasklikeBlock {
898            //         name: "task_name",
899            //         subtype: BlockType::Task,
900            //         specs: vec![BlockSpec::package("package_name")],
901            //         code: BashCode {
902            //             code: "echo 'hi'",
903            //             vars: HashSet::default(),
904            //         }
905            //     },
906            //     super::task().easy_parse(
907            //         "task task_name\n  : package_name\n{\n  echo 'hi'\n}"
908            //     ).unwrap().0
909            // );
910            Ok(())
911        }
912    }
913}
914
915mod grouplike {
916    use super::prelude::*;
917    use super::spec::specs;
918    use super::tasklike::{block_name, tasklike_block};
919    use super::util::{braces, whitespace};
920    use crate::ast::{BlockType, GrouplikeBlock};
921
922    p! {
923        grouplike_block(
924            keyword: &'static str,
925            subtype: BlockType,
926            internal_keyword: &'static str,
927            internal_subtype: BlockType
928        ) -> GrouplikeBlock<'a>, {
929            block_name(keyword)
930                .and(specs())
931                .and(braces(
932                    sep_by(tasklike_block(internal_keyword, *internal_subtype), whitespace())
933                ))
934                .map(|((name, specs), blocks)| {
935                    GrouplikeBlock {
936                        name,
937                        subtype: *subtype,
938                        specs,
939                        blocks,
940                    }
941                })
942        }
943    }
944
945    // p! {
946    //     versioner() -> GrouplikeBlock<'a>, {
947    //         grouplike_block(
948    //             "versioner",
949    //             BlockType::Versioner,
950    //             "action",
951    //             BlockType::Action,
952    //         )
953    //     }
954    // }
955}
956
957mod config {
958    use super::assignment::assignment;
959    use super::prelude::*;
960    use super::util::{braces, lex, line, whitespace};
961    use crate::ast::Rhs;
962
963    p! {
964        global_config() -> Vec<(&'a str, Rhs<'a>)>, {
965            lex(string("global")).with(braces(
966                optional(whitespace()).with(
967                    many(line(assignment()))
968                )
969            ))
970        }
971    }
972}
973
974mod plan {
975    use super::prelude::*;
976    use super::util::{
977        braces, branch_ident, comma_delim, ident, lex, lex_inline, parens, whitespace,
978    };
979    use crate::ast::{Branches, CrossProduct, Plan};
980
981    p! {
982        branches() -> Branches<'a>, {
983            char('*').map(|_| Branches::Glob).or(
984                many1(lex(branch_ident()))
985                .map(Branches::Specified)
986            )
987        }
988    }
989
990    p! {
991        branch_selection() -> (&'a str, Branches<'a>), {
992            parens(
993                lex(ident()).skip(lex(char(':'))).and(branches())
994            )
995        }
996    }
997
998    p! {
999        branch_selections() -> Vec<(&'a str, Branches<'a>)>, {
1000            lex(string("via")).with(sep_by1(branch_selection(), attempt(lex(char('*')))))
1001        }
1002    }
1003
1004    p! {
1005        cross_product() -> CrossProduct<'a>, {
1006            lex(string("reach"))
1007                .with(comma_delim(ident()))
1008                .and(optional(branch_selections()))
1009                .map(|(goals, branches)| {
1010                    let branches = branches.unwrap_or_default();
1011                    CrossProduct { goals, branches }
1012                })
1013        }
1014    }
1015
1016    p! {
1017        plan() -> Plan<'a>, {
1018            lex_inline(string("plan")).with(ident())
1019                .skip(whitespace())
1020                .and(braces(
1021                    many(lex(cross_product()))
1022                ))
1023                .map(|(name, cross_products)| Plan { name, cross_products })
1024        }
1025    }
1026
1027    #[cfg(test)]
1028    mod test {
1029        // use anyhow::Result;
1030        use super::*;
1031        use combine::EasyParser;
1032        #[test]
1033        fn test_cross_product() {
1034            assert_eq!(
1035                CrossProduct {
1036                    goals: vec!["task"],
1037                    branches: vec![],
1038                },
1039                cross_product().easy_parse("reach task").unwrap().0
1040            );
1041        }
1042        #[test]
1043        fn test_plan() {
1044            assert_eq!(
1045                Plan {
1046                    name: "plan",
1047                    cross_products: vec![CrossProduct {
1048                        goals: vec!["task"],
1049                        branches: vec![],
1050                    }],
1051                },
1052                plan().easy_parse("plan plan {\n  reach task\n}").unwrap().0
1053            );
1054        }
1055        #[test]
1056        fn test_branches() {
1057            assert_eq!(Branches::Glob, branches().easy_parse("*").unwrap().0);
1058            assert_eq!(
1059                Branches::Specified(vec!["val"]),
1060                branches().easy_parse("val").unwrap().0
1061            );
1062            assert_eq!(
1063                Branches::Specified(vec!["v1", "v2"]),
1064                branches().easy_parse("v1 v2").unwrap().0
1065            );
1066            // TODO add more here to test full plan syntax
1067        }
1068    }
1069}
1070
1071mod misc {
1072    use super::assignment::assignment;
1073    use super::literal::literal;
1074    use super::prelude::*;
1075    use super::util::{lex_inline, line};
1076    use crate::ast::Rhs;
1077
1078    p! {
1079        import_statement() -> &'a str, {
1080            line(
1081                lex_inline(string("import")).with(literal())
1082            )
1083        }
1084    }
1085
1086    p! {
1087        module_statement() -> (&'a str, Rhs<'a>), {
1088            line(
1089                lex_inline(string("module")).with(assignment())
1090            )
1091        }
1092    }
1093
1094    #[cfg(test)]
1095    mod test {
1096        use anyhow::Result;
1097        use combine::EasyParser;
1098        // use crate::HashSet;
1099        // use crate::ast::Item;
1100        #[test]
1101        fn test_import() -> Result<()> {
1102            assert_eq!(
1103                "packages.tape",
1104                super::import_statement().easy_parse("import packages.tape\n ").unwrap().0
1105            );
1106            // assert_eq!(
1107            //     TasklikeBlock {
1108            //         name: "task_name",
1109            //         subtype: BlockType::Task,
1110            //         specs: vec![BlockSpec::package("package_name")],
1111            //         code: BashCode {
1112            //             code: "echo 'hi'",
1113            //             vars: HashSet::default(),
1114            //         }
1115            //     },
1116            //     super::task().easy_parse(
1117            //         "task task_name\n  : package_name\n{\n  echo 'hi'\n}"
1118            //     ).unwrap().0
1119            // );
1120            Ok(())
1121        }
1122    }
1123}
1124
1125mod tapefile {
1126    use super::{
1127        config::global_config,
1128        misc::{import_statement, module_statement},
1129        plan::plan,
1130        prelude::*,
1131        tasklike::task,
1132        util::lex,
1133    };
1134    use crate::ast::Item;
1135
1136    p! {
1137        item() -> Item<'a>, {
1138            choice!(
1139                //versioner().map(Item::Versioner),
1140                import_statement().map(Item::Import),
1141                module_statement().map(|(k, v)| Item::Module(k, v)),
1142                task().map(Item::Task),
1143                global_config().map(Item::GlobalConfig),
1144                plan().map(Item::Plan)
1145                // NB this wouldn't parse, b/c the "p" gets picked up by "plan":
1146                // package().map(Item::Package)
1147
1148            )
1149        }
1150    }
1151
1152    p! {
1153        items() -> Vec<Item<'a>>, {
1154            many(lex(item()))
1155        }
1156    }
1157}