timi/
frontend.rs

1//! Frontend of the interpreter. Tokenization & Parsing is handled here.
2#[warn(missing_docs)]
3extern crate ansi_term;
4extern crate rand;
5
6use std::fmt;
7use std::collections::HashMap;
8use std::cmp; //for max
9use ir::*;
10use std::str;
11
12/// represents a position in a file, both in terms of `(line, col)` and 
13/// in terms of seek index from the file
14#[derive(Debug, Clone, Copy)]
15pub struct Point {
16    /// Raw seek index. This point is the `index` element of the file as a stream. 0 indexed
17    pub index: usize,
18    /// the line number of the point. 0 indexed
19    pub line: usize,
20    /// the column number of the point.  0 indexed
21    pub col: usize
22}
23
24impl Point {
25    pub fn new(index: usize, line: usize, col: usize) -> Point {
26        Point {
27            index: index,
28            line: line,
29            col: col
30        }
31    }
32    
33    /// convert the `Point` to a `Range` starting and ending at the same `Point`
34    pub fn as_range(&self) -> Range {
35        Range {
36            start: *self,
37            end: *self
38        }
39    }
40}
41
42/// Represents a range in a file from the start point to the end point.
43/// The Range is `[start, end]`.
44///
45/// ###Use Case
46///
47/// Used to track regions in the source code from where tokens came from. Used
48/// during error reporting to pretty-print source code blocks
49///
50/// ###Note
51///  
52/// `start = end` represents a range pointing to one character in a file.
53#[derive(Debug, Clone, Copy)]
54pub struct Range {
55    pub start: Point,
56    pub end: Point,
57}
58
59
60/// Kinds of Parse errors that occur during tokenization & Parsing.
61pub enum ParseErrorKind {
62    /// End of file with custom error string. 
63    EOF(String),
64    /// Unexpected token was found. 
65    UnexpectedToken{
66        /// List of expected tokens
67        expected: Vec<CoreToken>,
68        /// Token that was actually found
69        found: CoreToken,
70        /// error message
71        error: String },
72    /// Generic error message
73    Generic(String)
74}
75
76impl fmt::Debug for ParseErrorKind {
77    fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
78        match self {
79            &ParseErrorKind::EOF(ref s) => {
80                write!(fmt, "EOF reached: {}", s)
81            }
82            &ParseErrorKind::UnexpectedToken{ref expected, ref found, ref error} => {
83                write!(fmt, "expected one of {:#?}\n\
84                       found: {:#?}\n\
85                       error: {}",
86                       expected, found, error)
87            }
88            &ParseErrorKind::Generic(ref err) => write!(fmt, "{}", err)
89        }
90    }
91}
92
93#[derive(Debug)]
94/// Represents a parse error.
95/// Consists of a [`Range`](struct.Range.html) for location of the error
96/// and [`ParseErrorKind`](enum.ParseErrorKind.html) for error information.
97pub struct ParseError(Range, ParseErrorKind);
98
99impl ParseError {
100    /// Helper to construct a `Err` with a [`ParseErrorKind::Generic`](enum.ParseErrorKind.html)
101    pub fn generic<T>(range: Range, s: String) -> Result<T, ParseError> {
102        ParseError(range, ParseErrorKind::Generic(s)).into()
103    }
104
105    /// Pretty print by using `program` and `Range` to show where in the source code
106    /// the error was found.
107    pub fn pretty_print(&self, program: &str) -> String {
108
109        let &ParseError(range, ref error_kind) = self;
110        let program_lines  : Vec<Vec<char>> = program
111                                              .split('\n')
112                                              .map(|s| s.chars().collect())
113                                              .collect();
114
115        let source_lines = {
116            if range.start.line == range.end.line {
117                ParseError::pretty_print_single_line(range.start.line,
118                                                     range.start.col,
119                                                     range.end.col, &program_lines[range.start.line])
120
121            } else {
122                ParseError::pretty_print_multiple_lines(range, &program_lines[range.start.line..range.end.line])
123     
124            }
125        };
126
127        format!("{}\n{:#?}", source_lines, error_kind)
128    }
129
130    /// pretty print the line number string
131    fn get_line_number_pretty(line: usize) -> String {
132        format!("{}| ", line + 1)
133    }
134
135    /// pretty print a single source line
136    fn pretty_print_single_line(line: usize, col_begin: usize, col_end: usize, line_str: &Vec<char>) -> String{
137        use std::iter;
138
139        use self::ansi_term::Colour::{Blue, Red};
140
141        let line_number_pretty = ParseError::get_line_number_pretty(line);
142
143        let error_pointer_line = iter::repeat(" ").take(line_number_pretty.chars().count() +  col_begin - 1).collect::<String>() +
144                                 "^" + 
145                                &iter::repeat("-").take(col_end - (col_begin )).collect::<String>();
146
147
148
149        let source_line = Blue.paint(line_number_pretty).to_string() +
150                          &line_str.iter().cloned().collect::<String>();
151
152
153        format!("{}\n{}", source_line, Red.paint(error_pointer_line))
154    }
155
156
157    /// Pretty print multiple source lines.
158    fn pretty_print_multiple_lines(range: Range, lines_str: &[Vec<char>]) -> String{
159        let mut out = String::new();
160        for (i, line)  in lines_str.iter().enumerate() {
161            out += &ParseError::get_line_number_pretty(range.start.line + i);
162            out += &line.iter().cloned().collect::<String>();
163            out += "\n";
164
165        }
166        out
167
168    }
169}
170
171impl<T> Into<Result<T, ParseError>> for ParseError {
172    fn into(self) -> Result<T, ParseError> {
173        Err(self)
174    }
175
176
177}
178
179
180
181#[derive(Clone, PartialEq, Eq, Hash)]
182/// Token for the Core language grammar
183pub enum CoreToken {
184    Let,
185    In,
186    Ident(String),
187    Assignment,
188    Semicolon,
189    OpenRoundBracket,
190    CloseRoundBracket,
191    OpenCurlyBracket,
192    CloseCurlyBracket,
193    Comma,
194    Integer(String),
195    Or,
196    And,
197    L,
198    LEQ,
199    G,
200    GEQ,
201    EQ,
202    NEQ,
203    Plus,
204    Minus,
205    Mul,
206    Div,
207    Pack,
208    //when you call peek(), it returns this token
209    //if the token stream is empty.
210}
211
212impl fmt::Debug for CoreToken {
213    fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
214        match self {
215            &CoreToken::Let => write!(fmt, "let"),
216            &CoreToken::In => write!(fmt, "in"),
217            &CoreToken::Ident(ref s) => write!(fmt, "{}", s),
218            &CoreToken::Assignment => write!(fmt, "="),
219            &CoreToken::Semicolon => write!(fmt, ";"),
220            &CoreToken::OpenRoundBracket => write!(fmt, "("),
221            &CoreToken::CloseRoundBracket => write!(fmt, ")"),
222            &CoreToken::OpenCurlyBracket => write!(fmt, "{{"),
223            &CoreToken::CloseCurlyBracket => write!(fmt, "}}"),
224            &CoreToken::Comma => write!(fmt, ","),
225            &CoreToken::Integer(ref s) => write!(fmt, "{}", s),
226            &CoreToken::Or => write!(fmt, "||"),
227            &CoreToken::And => write!(fmt, "&&"),
228            &CoreToken::L => write!(fmt, "<"),
229            &CoreToken::LEQ => write!(fmt, "<="),
230            &CoreToken::G => write!(fmt, ">"),
231            &CoreToken::GEQ => write!(fmt, ">="),
232            &CoreToken::EQ => write!(fmt, "=="),
233            &CoreToken::NEQ => write!(fmt, "!="),
234            &CoreToken::Plus => write!(fmt, "+"),
235            &CoreToken::Minus => write!(fmt, "-"),
236            &CoreToken::Mul => write!(fmt, "*"),
237            &CoreToken::Div => write!(fmt, "/"),
238            &CoreToken::Pack => write!(fmt, "Pack"),
239     }
240 }
241}
242
243
244#[derive(Clone)]
245/// Represents a location in the token stream.
246/// Used to maintain parsing information.
247struct ParserCursor {
248    tokens: Vec<(Range, CoreToken)>,
249    pos: usize,
250    cur_range: Range,
251}
252
253impl ParserCursor {
254    fn new(tokens: Vec<(Range, CoreToken)>) -> ParserCursor {
255        let cur_range = match tokens.get(0) {
256            Some(&(r, _)) => r,
257            None => Point::new(0, 0, 0).as_range()
258        };
259
260        ParserCursor {
261            tokens: tokens,
262            pos: 0,
263            cur_range:  cur_range
264        }
265    }
266
267    /// Peek at the current token.
268    /// If a token exists, then return the token & its range.
269    /// If a token does not exist, return the range of the last seen token and `None`.
270    fn peek(&self) -> (Range, Option<CoreToken>) {
271        match self.tokens.get(self.pos) {
272            Some(&(r, ref t)) => (r, Some(t.clone())),
273            None => (self.cur_range, None)
274        }
275
276    }
277
278    /// Consume a token, moving the cursor one token forward in the input stream.
279    /// 
280    /// Returns the consumed token and its range if `consume()` succeeded.
281    /// Returns `ParseError::EOF` with `error` if no more tokens were found.
282    fn consume(&mut self, error: &str) -> Result<(Range, CoreToken), ParseError> {
283        match self.peek() {
284            (range, None) => ParseError(range, ParseErrorKind::EOF(error.to_string())).into(),
285            (range, Some(token)) => {
286                self.cur_range = range;
287                self.pos += 1;
288                Ok((range, token))
289            }
290        }
291
292    }
293
294    /// Expect the given token, returning a `ParseError` if the given token
295    /// was not found.
296    fn expect(&mut self, t: CoreToken, error: &str) -> Result<(), ParseError> {
297        match self.peek() {
298            (range, None) => return ParseError(range, ParseErrorKind::EOF(format!(
299                            "was expecting {:#?}, found EOF\n{}", t, error))).into(),
300            (range, Some(tok)) => {
301                if tok == t {
302                    self.cur_range = range;
303                    self.pos += 1; 
304                    Ok(())
305                }
306                else {
307                    return ParseError(range, ParseErrorKind::UnexpectedToken{
308                        expected: vec![t],
309                        found: tok,
310                        error: error.to_string()
311                    }).into()
312                }
313            }
314        }
315    }
316}
317
318
319
320/// Represents an input stream with location information
321///
322/// This also maintains the current line number, column number, etc.
323/// to be able to create [`Range`](struct.Range.html) for tokens.
324struct TokenizerCursor {
325    program: Vec<char>,
326    line: usize,
327    col: usize,
328    i: usize,
329}
330
331impl TokenizerCursor {
332    fn new(program: &str) -> TokenizerCursor {
333        TokenizerCursor {
334            program: program.chars().collect(),
335            line: 0,
336            col: 0,
337            i: 0
338        }
339    }
340
341    /// Peek the input stream. If the stream is empty, return `None`.
342    /// Otherwise return a `Range` of the current character and the character.
343    fn peek(&self) -> Option<(Range, &char)> {
344        self.program.get(self.i).map(|c| (self.point().as_range(), c))
345    }
346
347
348    /// Return the Point corresponding to the current cursor location.
349    fn point(&self) -> Point {
350        Point {
351            index: self.i,
352            line: self.line,
353            col: self.col
354        }
355    }
356
357
358    /// Return a [`Range`](struct.Range.html) starting from `start_point`
359    /// till the current cursor location.
360    fn range_till_cur(&self, start_point: Point) -> Range {
361        Range {
362            start: start_point,
363            end: self.point(),
364        }
365
366    }
367
368    /// Consumes the longest match from the input stream from `matches`
369    /// if no match is found, returns a [`ParseError`](enum.ParseError.html)
370    fn consume_longest_match(&mut self, matches: Vec<&str>) -> Result<(Range, String), ParseError> {
371        let start = self.point();
372
373        let longest_len = matches.iter()
374        .map(|s| s.len())
375        .fold(0, cmp::max);
376
377
378        //take only enough to not cause an out of bounds error
379        let length_to_take = cmp::min(longest_len,
380                                      self.program.len() - self.i);
381
382        //take all lengths, starting from longest,
383        //ending at shortest
384        let mut longest_match : Option<String> = None;
385
386        for l in (1..length_to_take+1).rev() {
387            let possible_match : String = self.program[self.i..self.i + l].iter().cloned().collect();
388            if matches.contains(&possible_match.as_str()) {
389                longest_match = Some(possible_match.clone());
390
391                for _ in 0..l {
392                    try!(self.consume(&format!("consuming matching token: {}", possible_match)));
393                }
394                break;
395            }
396        }
397        //longest operator is tokenized
398        match longest_match {
399            Some(m) => Ok((self.range_till_cur(start), m)),
400            None => {
401                ParseError::generic(self.range_till_cur(start),
402                                    format!("expected one of {:#?}, found none to match", matches))
403            }
404        }
405
406    }
407
408    /// Consumes from the character stream as long as `pred` returns true.
409    ///
410    /// `pred` is given the string consumed so far and the current character. It
411    /// is expected to return whether the current character should be consumed
412    /// or not. 
413    ///
414    /// ### Note
415    ///
416    ///on finding `EOF`, this returns whatever has been consumed so far
417    fn consume_while<F>(&mut self, pred: F, error: &str) -> (Range, String)
418    where F: Fn(&String, &char) -> bool {
419        let start = self.point();
420        let mut accum = String::new();
421
422        loop {
423            let c = match self.peek() {
424                Some((_, c)) => c.clone(),
425                None => return (self.range_till_cur(start), accum)
426            };
427            
428            if pred(&accum, &c) {
429                let _ = self.consume(error);
430                accum.push(c.clone());
431            }
432            else {
433                break;
434            }
435        }
436        (self.range_till_cur(start), accum)
437    }
438
439
440    /// Consumes the current character, moving one character forward in the stream.
441    ///
442    /// Returns a `ParseError` if the stream is empty, with `error` as the error
443    /// string.
444    fn consume(&mut self, error: &str) -> Result<(Range, char), ParseError> {
445        let (range, c) = match self.peek() {
446            Some((range, c)) => (range, c.clone()),
447            None => return ParseError::generic(self.point().as_range(), error.to_string())
448        };
449
450        self.i += 1;
451        self.col += 1;
452
453        if c == '\n' {
454            self.col = 0;
455            self.line += 1;
456        };
457
458        Ok((range, c))
459    }
460
461
462}
463
464/// Convert a raw identifier string to the correct token
465/// 
466/// ### Use Case
467/// identifiers can also be tokens such as `let`, `pack`, `in`, etc.
468/// we use this to disambiguate between general identifiers and keywords
469/// in the language
470fn identifier_str_to_token(token_str: &str) -> CoreToken {
471    match token_str {
472        "let" => CoreToken::Let,
473        "in" => CoreToken::In,
474        "Pack" => CoreToken::Pack,
475        other @ _ => CoreToken::Ident(other.to_string())
476    }
477}
478
479
480/// Returns whether the given character is a space
481///
482/// ### Use Case
483/// The parser eats white space between tokens.
484/// This is used to detect white space
485fn is_char_space(c: &char) -> bool {
486    *c == ' ' || *c == '\n' || *c == '\t'
487}
488
489
490/// Tokenize a symbol from the input stream
491fn tokenize_symbol(cursor: &mut TokenizerCursor) -> Result<(Range, CoreToken), ParseError> {
492
493
494    let symbol_token_map: HashMap<&str, CoreToken> =
495        [("=", CoreToken::Assignment),
496    (";", CoreToken::Semicolon),
497
498    ("(", CoreToken::OpenRoundBracket),
499    (")", CoreToken::CloseRoundBracket),
500
501    ("{", CoreToken::OpenCurlyBracket),
502    ("}", CoreToken::CloseCurlyBracket),
503
504    (",", CoreToken::Comma),
505    ("|", CoreToken::Or),
506    ("&", CoreToken::And),
507    ("<", CoreToken::L),
508    ("<=", CoreToken::LEQ),
509    (">", CoreToken::G),
510    (">=", CoreToken::GEQ),
511
512    ("!=", CoreToken::NEQ),
513    ("==", CoreToken::EQ),
514    //arithmetic
515    ("+", CoreToken::Plus),
516    ("-", CoreToken::Minus),
517    ("*", CoreToken::Mul),
518    ("/", CoreToken::Div)]
519        .iter().cloned().collect();
520
521
522    let keys : Vec<&str> = symbol_token_map.clone()
523                                .keys()
524                                .map(|&s| s.clone())
525                                .collect();
526    let (range, op_str) = try!(cursor.consume_longest_match(keys));
527
528    let tok = symbol_token_map
529                    .get(&op_str.as_str())
530                    .expect(&format!("expected symbol for string: {}", op_str));
531    Ok((range, tok.clone()))
532}
533
534/// Eat whitespace from the input stream
535fn eat_whitespace(cursor: &mut TokenizerCursor) {
536    cursor.consume_while(|_, c| is_char_space(c), "eating whitespace");
537}
538
539
540/// Return whether the character can be part of an identifier.
541fn can_char_belong_identifier(c: &char) -> bool {
542    c.is_alphanumeric() || *c == '?' || *c == '-' || *c == '_'
543}
544 
545
546/// Eats comments till the end of the line
547/// returns whether a comment was consumed or not
548/// this is used to restart tokenization for the next line
549/// if a comment was found
550fn eat_comment(cursor: &mut TokenizerCursor) -> bool {
551    match cursor.peek() {
552        Some((_, &'#')) => { 
553            cursor.consume_while(|_, c| *c != '\n',
554                                "eating comment line");
555            return true
556        }
557        Some(_) | None => return false
558    };
559}
560
561/// ```haskell`
562/// <ident> := [a-z, A-Z]([a-Z, A-Z, 0-9, ?, -, _]*)
563/// ```
564///
565/// An identifier must start with a character, and can then contain
566/// any number of characters, numbers, `_`, `-` and `?`
567fn parse_identifier(mut cursor: &mut TokenizerCursor) -> Result<(Range, CoreToken), ParseError> {
568    let mut id_string = String::new();
569    let start = cursor.point();
570
571    let (_, c) = try!(cursor.consume("consuming alphabet token"));
572
573    id_string.push(c);
574    let &(mut range, ref consumed_str) = &cursor.consume_while(|_, c| can_char_belong_identifier(c), "consuming identifier string");
575    range.start = start;
576
577    id_string += &consumed_str;
578    Ok((range, identifier_str_to_token(&id_string)))
579
580}
581
582/// Tokenize the given program string.
583/// Returns a vector of `(Range, CoreToken)` where `Range` represents the
584/// source code range of the token, and `CoreToken` is the token.
585/// Returns `ParseError` on failure.
586fn tokenize(program: &str) -> Result<Vec<(Range, CoreToken)>, ParseError> {
587
588    let mut cursor = TokenizerCursor::new(program);
589
590    let mut tokens : Vec<(Range, CoreToken)> = Vec::new();
591
592    loop {
593        //consume spaces
594        eat_whitespace(&mut cursor);
595
596        //if a comment was eaten, restart tokenization for the next line
597        if eat_comment(&mut cursor) {
598            continue;
599        }
600
601        //break out if we have exhausted the loop
602        let peek = match cursor.peek() {
603            None => break,
604            Some((_, &c)) => c
605        };
606
607        //alphabet: parse literal
608        if peek.is_alphabetic() {
609            tokens.push(try!(parse_identifier(&mut cursor)));
610        }
611        else if peek.is_numeric() {
612            //parse the number
613            //TODO: take care of floats
614            let (range, num_string) = cursor.consume_while(|_, c| c.is_numeric(),
615                                                           "trying to tokenize number");
616            tokens.push((range, CoreToken::Integer(num_string)));
617
618        }
619        else {
620            let symbol= try!(tokenize_symbol(&mut cursor));
621            tokens.push(symbol);
622        }
623    }
624
625    Ok(tokens)
626
627}
628
629/// Try to parse the given string as an integer.
630/// 
631/// ### Use Case
632/// Integers are stored as strings in the tokenizer to prevent loss of precision 
633/// during error reporting till the very last step.
634///
635/// ### NOTE
636/// This function can be extended to deal with hex literals `0x..`, etc. but this
637/// is not done for simplicity.
638/// TODO: provide support for `hex`, `oct` formats.
639fn parse_string_as_int(range: Range, num_str: String) -> Result<i32, ParseError> {
640    match i32::from_str_radix(&num_str, 10) {
641        Ok(num) => Ok(num),
642        Err(_) => ParseError::generic(range,
643                                        format!("unable to parse {} as int", num_str))
644    }
645}
646
647
648/// Returns if the given token is the start of an atomic expression.
649///
650/// ### Use Case
651/// Since our grammar is `LL(1)`, we require one lookahead to disambiguate between
652/// different parses of atomic expressions. So, we use this to tell us if
653/// we should parse an atomic expression or a function application.
654fn is_token_atomic_expr_start(t: &CoreToken) -> bool {
655    match t {
656        &CoreToken::Integer(_) => true,
657        &CoreToken::Ident(_) => true,
658        &CoreToken::OpenRoundBracket => true,
659        _ => false
660    }
661
662}
663
664/// ```haskell
665/// <atomic> := <num> | <ident> | "(" <expr> ")"
666/// ```
667fn parse_atomic_expr(mut c: &mut ParserCursor) ->
668Result<CoreExpr, ParseError> {
669    match try!(c.consume("parsing atomic expression")) {
670        (range, CoreToken::Integer(num_str)) => {
671            let num = try!(parse_string_as_int(range, num_str));
672            Ok(CoreExpr::Num(num))
673        },
674        (_, CoreToken::Ident(ident)) => {
675            Ok(CoreExpr::Variable(ident))
676        },
677        (_, CoreToken::OpenRoundBracket) => {
678            let inner_expr = try!(parse_expr(&mut c));
679            //TODO: create c.match() that prints errors about matching parens
680            try!(c.expect(CoreToken::CloseRoundBracket, "looking for ) for matching ("));
681            Ok(inner_expr)
682        },
683        (range, other) =>
684            return ParseError(range, ParseErrorKind::Generic(format!(
685                        "expected integer, identifier or (<expr>), found {:#?}",
686                        other))).into()
687    }
688
689}
690
691/// ```haskell
692/// <defn> := <ident> "=" <expr>
693/// ```
694fn parse_defn(mut c: &mut ParserCursor) ->
695Result<(Name, Box<CoreExpr>), ParseError> {
696
697    match try!(c.consume("looking for identifier of let binding LHS")) {
698        (_, CoreToken::Ident(name)) => {
699            try!(c.expect(CoreToken::Assignment, "expected = after name in let bindings"));
700            let rhs : CoreExpr = try!(parse_expr(&mut c));
701            Ok((name, Box::new(rhs)))
702        }
703        (range, other) =>  {
704            return  ParseError::generic(range, format!("expected LHS of let binding, found {:#?}", other))
705        }
706    }
707}
708
709/// ```haskell
710/// <let> := "let" <bindings> "in" <expr>;
711/// <bindings> := <defn> | <defn> ";" <bindings>
712/// ```
713fn parse_let(mut c: &mut ParserCursor) -> Result<CoreLet, ParseError> {
714    //<let>
715    try!(c.expect(CoreToken::Let, "expected let"));
716
717    let mut bindings : Vec<(Name, Box<CoreExpr>)> = Vec::new();
718
719    //<bindings>
720    loop {
721        let defn = try!(parse_defn(&mut c));
722        bindings.push(defn);
723
724        //check for ;
725        //If there is a ;, continue parsing
726        if let (_, Some(CoreToken::Semicolon)) = c.peek() {
727            try!(c.expect(CoreToken::Semicolon, "expected ; since peek() returned ;"));
728            continue;
729        }
730        else {
731            break;
732        }
733    }
734    //<in>
735    try!(c.expect(CoreToken::In, "expected <in> after let bindings"));
736
737    //<expr>
738    let rhs_expr = try!(parse_expr(c));
739
740    Ok(CoreLet {
741        bindings: bindings,
742        expr: Box::new(rhs_expr)
743    })
744}
745
746/// ```haskell
747/// <pack> := "Pack" "{" <tag : Int> "," <arity : Int> "}"
748/// ```
749fn parse_pack(mut c: &mut ParserCursor) -> Result<CoreExpr, ParseError> {
750    let pack_grammar = "pack := Pack \"{\" tag \",\" arity \"}\"";
751
752    try!(c.expect(CoreToken::Pack, pack_grammar));
753    try!(c.expect(CoreToken::OpenCurlyBracket, pack_grammar));
754
755    let tag : u32 = match try!(c.consume(pack_grammar)) {
756        (range, CoreToken::Integer(s)) => {
757            try!(parse_string_as_int(range, s)) as u32
758        }
759        (range, other) => 
760            return ParseError(range, ParseErrorKind::Generic(format!(
761                        "expected integer tag, found {:#?}", other))).into()
762    };
763
764    try!(c.expect(CoreToken::Comma, "expected , after <tag> in Pack"));
765
766    let arity : u32 = match try!(c.consume("expected arity after , in Pack")) {
767        (range, CoreToken::Integer(s)) => {
768            try!(parse_string_as_int(range, s)) as u32
769        }
770        (range, other) => 
771            return ParseError::generic(range, format!(
772                        "expected integer arity, found {:#?}", other))
773    };
774
775    //TODO: create cursor.match(..)
776    try!(c.expect(CoreToken::CloseCurlyBracket, "expecting closing curly bracket for Pack"));
777    Ok(CoreExpr::Pack{tag: tag, arity: arity })
778
779
780}
781
782
783/// ```haskell
784/// <aexpr> := <atomic> | Pack "{" <num> "," <num> "}";
785/// <application> := <aexpr> | <aexpr>+
786///```
787///
788/// Note that this parses either a standalone expression <aexpr>,
789/// or a collection of <aexpr> that become function application
790fn parse_application(mut cursor: &mut ParserCursor) -> 
791Result<CoreExpr, ParseError> {
792
793    let mut application_vec : Vec<CoreExpr> = Vec::new();
794    loop {
795
796        //we have a "pack" expression
797        match cursor.peek() {
798            (_, Some(CoreToken::Pack)) => {
799                let pack_expr = try!(parse_pack(&mut cursor));
800                application_vec.push(pack_expr);
801
802            }
803            (_, Some(other)) => {
804                if is_token_atomic_expr_start(&other) {
805                    let atomic_expr = try!(parse_atomic_expr(&mut cursor));
806                    application_vec.push(atomic_expr);
807                }
808                else {
809                    break;
810                }
811            }
812            _ => break
813        };
814    }
815
816    if application_vec.len() == 0 {
817        return ParseError::generic(cursor.cur_range,
818                concat!("wanted function application or expression").to_string())
819
820    }
821    else if application_vec.len() == 1 {
822        //just an atomic expr
823        return Ok(application_vec.remove(0))
824    }
825    else {
826
827        //function application
828        //convert f g x  y to
829        //((f g) x) y
830        let mut cur_ap_lhs = {
831            let ap_lhs = application_vec.remove(0);
832            let ap_rhs = application_vec.remove(0);
833            CoreExpr::Application(Box::new(ap_lhs), Box::new(ap_rhs))
834        };
835
836        //drop the first two and start folding
837        for ap_rhs in application_vec.into_iter() {
838            cur_ap_lhs = CoreExpr::Application(Box::new(cur_ap_lhs), Box::new(ap_rhs));
839        }
840
841        Ok(cur_ap_lhs)
842    }
843}
844
845/// General function for top-down parsing of binary operators with one lookahead.
846///
847/// `lhs_parse_fn` is first used to parse the left hand side of the binary operator.
848///
849/// `variable_bindings` maps a `CoreToken` that is expected to the `CoreVariable` that the operator
850/// corresponds to.
851///
852/// `rhs_parse_fn` is used to parse the right hand side of the in 
853fn parse_binop_at_precedence(mut cursor: &mut ParserCursor,
854                             lhs_parse_fn: fn(&mut ParserCursor) -> Result<CoreExpr, ParseError>,
855                             rhs_parse_fn: fn(&mut ParserCursor) -> Result<CoreExpr, ParseError>,
856                             variable_bindings: HashMap<CoreToken, CoreExpr>) -> Result<CoreExpr, ParseError> {
857
858    let lhs_expr : CoreExpr = try!(lhs_parse_fn(&mut cursor));
859
860    let c = match cursor.peek() {
861        (_, None) => return Ok(lhs_expr),
862        (_, Some(c)) => c
863    };
864
865    let (rhs_expr, operator_variable) = {
866        if let Some(&CoreExpr::Variable(ref op_str)) = variable_bindings.get(&c) {
867            let op_var = CoreExpr::Variable(op_str.clone());
868            try!(cursor.expect(c, &format!("parsing binary operator {:#?}", op_var)));
869            let rhs = try!(rhs_parse_fn(&mut cursor));
870
871            (rhs, op_var)
872        }
873        else {
874            return Ok(lhs_expr);
875        }
876
877
878    };
879
880    let ap_inner =
881        CoreExpr::Application(Box::new(operator_variable), Box::new(lhs_expr));
882
883    Ok(CoreExpr::Application(Box::new(ap_inner),
884                                     Box::new(rhs_expr)))
885
886
887}
888
889
890/// ```haskell
891/// <mul_div_expr> := <application_expr> ("*" |  "/") <mul_div_expr>
892/// ```
893fn parse_mul_div(mut cursor: &mut ParserCursor) -> Result<CoreExpr, ParseError> {
894    parse_binop_at_precedence(cursor,
895                              parse_application,
896                              parse_mul_div,
897                              [(CoreToken::Mul, CoreExpr::Variable("*".to_string())),
898                              (CoreToken::Div, CoreExpr::Variable("/".to_string()))
899                              ].iter().cloned().collect())
900}
901
902
903/// ```haskell
904/// <add_sub_expr> := <mul_div_expr> ("+" |  "-") <add_sub_expr>
905/// ```
906fn parse_add_sub(mut cursor: &mut ParserCursor) -> Result<CoreExpr, ParseError> {
907    parse_binop_at_precedence(cursor,
908                              parse_mul_div,
909                              parse_add_sub,
910                              [(CoreToken::Plus, CoreExpr::Variable("+".to_string())),
911                              (CoreToken::Minus, CoreExpr::Variable("-".to_string()))
912                              ].iter().cloned().collect())
913}
914
915/// ```haskell
916/// <relop_expr> := <add_sub> | <add_sub> <relop> <relop_expr>;
917/// <relop> := "<" | "<=" | ">" | ">=" | "==" | "!="
918/// ``` 
919fn parse_relop(mut cursor: &mut ParserCursor) -> Result<CoreExpr, ParseError> {
920    parse_binop_at_precedence(cursor,
921                              parse_add_sub,
922                              parse_relop,
923                              [(CoreToken::L, CoreExpr::Variable("<".to_string())),
924                              (CoreToken::LEQ, CoreExpr::Variable("<=".to_string())),
925                              (CoreToken::G, CoreExpr::Variable(">".to_string())),
926                              (CoreToken::GEQ, CoreExpr::Variable(">=".to_string())),
927                              (CoreToken::EQ, CoreExpr::Variable("==".to_string())),
928                              (CoreToken::NEQ, CoreExpr::Variable("!=".to_string()))
929                              ].iter().cloned().collect())
930
931
932}
933
934
935/// ```haskell
936/// <and_expr> := <relop_expr> "&" <and_expr> | <relop_expr>
937/// ```
938fn parse_and(mut cursor: &mut ParserCursor) -> Result<CoreExpr, ParseError> {
939    parse_binop_at_precedence(cursor,
940                              parse_relop,
941                              parse_and,
942                              [(CoreToken::And, CoreExpr::Variable("&".to_string()))
943                              ].iter().cloned().collect())
944
945}
946
947/// ```haskell
948/// <or_expr> := <and_expr> "|" <or_expr> | <or_expr>
949/// ```
950fn parse_or(mut cursor: &mut ParserCursor) -> Result<CoreExpr, ParseError> {
951    parse_binop_at_precedence(cursor,
952                              parse_and,
953                              parse_or,
954                              [(CoreToken::And, CoreExpr::Variable("|".to_string()))
955                              ].iter().cloned().collect())
956}
957
958
959
960
961///```haskell
962/// <expr> := <or_expr> | "Let" let_expr
963///```
964fn parse_expr(mut c: &mut ParserCursor) ->
965Result<CoreExpr, ParseError> {
966    match c.peek() {
967        (_, Some(CoreToken::Let)) => parse_let(&mut c).map(|l| CoreExpr::Let(l)),
968        _ => parse_or(&mut c)
969    }
970}
971
972
973///```haskell
974/// supercombinator := <name: Ident> (<args : Ident>)* "=" <expr>
975///```
976fn parse_supercombinator(mut c: &mut ParserCursor) ->
977Result<SupercombDefn, ParseError> {
978
979    let sc_name = match try!(c.consume("parsing supercombinator, looking for name")) {
980        (_, CoreToken::Ident(name)) => name,
981        (range, other)=> return ParseError::generic(range, format!(
982                        "super combinator name expected, {:#?} encountered",
983                        other))
984    };
985
986    let mut sc_args = Vec::new();
987
988    //<args>* = <expr>
989    loop {
990        match try!(c.consume("looking for <args>* =")) {
991            (_, CoreToken::Ident(sc_arg)) => {
992                sc_args.push(sc_arg);
993            }
994            (_, CoreToken::Assignment) => { break; }
995            (range, other) => {
996                return ParseError::generic(range, format!(
997                                    "identifier that is supercombinator argument or \"=\" expected, \
998                                    {:#?} encountered",
999                                    other)).into();
1000            }
1001        }
1002    }
1003
1004    let sc_body = try!(parse_expr(&mut c));
1005
1006    Ok(SupercombDefn{
1007        name: sc_name,
1008        args: sc_args,
1009        body: sc_body
1010    })
1011
1012}
1013
1014/// Try to convert the given string to a [CoreExpr](../ir/enum.CoreExpr.html)
1015pub fn string_to_expr(string: &str) -> Result<CoreExpr, ParseError> {
1016    let tokens : Vec<(Range, CoreToken)> = try!(tokenize(string));
1017    let mut cursor: ParserCursor = ParserCursor::new(tokens);
1018
1019    parse_expr(&mut cursor)
1020}
1021
1022/// Try to convert the given string to a [SupercombDefn](../ir/struct.SupercombDefn.html)
1023pub fn string_to_sc_defn(string: &str) -> Result<SupercombDefn, ParseError> {
1024    let tokens : Vec<(Range, CoreToken)> = try!(tokenize(string));
1025    let mut cursor: ParserCursor = ParserCursor::new(tokens);
1026    parse_supercombinator(&mut cursor)    
1027}
1028
1029
1030
1031/// Try to convert the given string to a [CoreProgram](../ir/type.CoreProgram.html)
1032pub fn string_to_program(string: &str) -> Result<CoreProgram, ParseError> {
1033
1034    let tokens : Vec<(Range, CoreToken)> = try!(tokenize(string));
1035    let mut cursor: ParserCursor = ParserCursor::new(tokens);
1036
1037    let mut program : CoreProgram = Vec::new();
1038
1039    loop {
1040        //we need an identifier that is the supercombinator name
1041        match cursor.peek() {
1042            (_, Some(CoreToken::Ident(_)))  => {}
1043            (range, Some(other)) => return ParseError(range, ParseErrorKind::Generic(format!(
1044                        "super combinator name expected, {:#?} encountered", other))).into(),
1045            (range, None) => return ParseError(range, ParseErrorKind::EOF("supercombinator name expected".to_string())).into()
1046        };
1047
1048        let sc = try!(parse_supercombinator(&mut cursor));
1049        program.push(sc);
1050
1051        match cursor.peek() {
1052            //we ran out of tokens, this is the last SC
1053            //break
1054            (_, None) => break,
1055            //we got a ;, more SCs to come
1056            (_, Some(CoreToken::Semicolon)) => {
1057                try!(cursor.expect(CoreToken::Semicolon, "parsing supercombinator"));
1058                continue
1059            },
1060            (range, Some(other)) => {
1061                return ParseError::generic(range, format!(
1062                            "expected either ; or EOF, found {:#?}",
1063                            other));
1064            }
1065        }
1066
1067    }
1068    Ok(program)
1069}