Skip to main content

logicaffeine_kernel/interface/
term_parser.rs

1//! Parser for Kernel Term syntax.
2//!
3//! Parses the following grammar:
4//! ```text
5//! term   ::= lambda | forall | fix | match | arrow
6//! arrow  ::= app ("->" arrow)?
7//! app    ::= atom+
8//! atom   ::= "(" term ")" | sort | ident
9//! sort   ::= "Prop" | "Type" | "Type" digit+
10//! lambda ::= "fun" ident ":" term "=>" term
11//! forall ::= "forall" ident ":" term "," term
12//! fix    ::= "fix" ident "=>" term
13//! match  ::= "match" term "return" term "with" ("|" ident+ "=>" term)+
14//! ```
15
16use crate::{Literal, Term, Universe};
17use super::error::ParseError;
18use std::collections::HashSet;
19
20/// Recursive descent parser for Term syntax.
21pub struct TermParser<'a> {
22    input: &'a str,
23    pos: usize,
24    /// Variables currently in scope (bound by lambda, forall, fix, or match case)
25    bound_vars: HashSet<String>,
26}
27
28impl<'a> TermParser<'a> {
29    /// Parse a term from input string.
30    pub fn parse(input: &'a str) -> Result<Term, ParseError> {
31        let mut parser = Self {
32            input,
33            pos: 0,
34            bound_vars: HashSet::new(),
35        };
36        let term = parser.parse_term()?;
37        parser.skip_whitespace();
38        Ok(term)
39    }
40
41    /// Parse a term (top-level).
42    fn parse_term(&mut self) -> Result<Term, ParseError> {
43        self.skip_whitespace();
44
45        if self.peek_keyword("fun") {
46            self.parse_lambda()
47        } else if self.peek_keyword("forall") {
48            self.parse_forall()
49        } else if self.peek_keyword("fix") {
50            self.parse_fix()
51        } else if self.peek_keyword("match") {
52            self.parse_match()
53        } else {
54            self.parse_arrow()
55        }
56    }
57
58    /// Parse arrow type: A -> B -> C (right associative)
59    fn parse_arrow(&mut self) -> Result<Term, ParseError> {
60        let left = self.parse_app()?;
61
62        self.skip_whitespace();
63        if self.try_consume("->") {
64            let right = self.parse_arrow()?;
65            // A -> B is sugar for Π(_:A). B
66            Ok(Term::Pi {
67                param: "_".to_string(),
68                param_type: Box::new(left),
69                body_type: Box::new(right),
70            })
71        } else {
72            Ok(left)
73        }
74    }
75
76    /// Parse application: f x y z (left associative)
77    fn parse_app(&mut self) -> Result<Term, ParseError> {
78        let mut func = self.parse_atom()?;
79
80        loop {
81            self.skip_whitespace();
82            // Check if we can parse another atom
83            // Stop at: ), ->, =>, ,, |, with, return, end, ., EOF, or keywords
84            if self.at_end()
85                || self.peek_char(')')
86                || self.peek_char('.')
87                || self.peek_char(',')
88                || self.peek_char('|')
89                || self.peek_str("->")
90                || self.peek_str("=>")
91                || self.peek_keyword("with")
92                || self.peek_keyword("return")
93                || self.peek_keyword("end")
94            {
95                break;
96            }
97
98            let arg = self.parse_atom()?;
99            func = Term::App(Box::new(func), Box::new(arg));
100        }
101
102        Ok(func)
103    }
104
105    /// Parse an atom: (term), number, string, sort, or identifier
106    fn parse_atom(&mut self) -> Result<Term, ParseError> {
107        self.skip_whitespace();
108
109        // Check for negative number
110        if self.peek_char('-') {
111            if let Some(lit) = self.try_parse_negative_number() {
112                return Ok(lit);
113            }
114        }
115
116        // Check for positive number
117        if let Some(c) = self.peek() {
118            if c.is_ascii_digit() {
119                return self.parse_number_literal();
120            }
121        }
122
123        // Check for string literal
124        if self.peek_char('"') {
125            return self.parse_string_literal();
126        }
127
128        if self.peek_char('(') {
129            self.parse_parens()
130        } else {
131            self.parse_ident_or_sort()
132        }
133    }
134
135    /// Parse a positive integer literal
136    fn parse_number_literal(&mut self) -> Result<Term, ParseError> {
137        let start = self.pos;
138
139        while let Some(c) = self.peek() {
140            if c.is_ascii_digit() {
141                self.advance();
142            } else {
143                break;
144            }
145        }
146
147        let num_str = &self.input[start..self.pos];
148        let value: i64 = num_str
149            .parse()
150            .map_err(|_| ParseError::InvalidNumber(num_str.to_string()))?;
151
152        Ok(Term::Lit(Literal::Int(value)))
153    }
154
155    /// Parse a string literal: "contents"
156    fn parse_string_literal(&mut self) -> Result<Term, ParseError> {
157        self.expect_char('"')?;
158
159        // Collect characters until closing quote
160        let mut content = String::new();
161        loop {
162            match self.peek() {
163                Some('"') => {
164                    self.advance();
165                    break;
166                }
167                Some('\\') => {
168                    // Handle escape sequences
169                    self.advance();
170                    match self.peek() {
171                        Some('n') => {
172                            content.push('\n');
173                            self.advance();
174                        }
175                        Some('t') => {
176                            content.push('\t');
177                            self.advance();
178                        }
179                        Some('\\') => {
180                            content.push('\\');
181                            self.advance();
182                        }
183                        Some('"') => {
184                            content.push('"');
185                            self.advance();
186                        }
187                        Some(c) => {
188                            content.push(c);
189                            self.advance();
190                        }
191                        None => return Err(ParseError::UnexpectedEof),
192                    }
193                }
194                Some(c) => {
195                    content.push(c);
196                    self.advance();
197                }
198                None => return Err(ParseError::UnexpectedEof),
199            }
200        }
201
202        Ok(Term::Lit(Literal::Text(content)))
203    }
204
205    /// Try to parse a negative number, returning None if not a number
206    fn try_parse_negative_number(&mut self) -> Option<Term> {
207        // Look ahead: - followed by digit
208        if !self.peek_char('-') {
209            return None;
210        }
211        let after_dash = self.pos + 1;
212        if after_dash >= self.input.len() {
213            return None;
214        }
215        let next = self.input[after_dash..].chars().next()?;
216        if !next.is_ascii_digit() {
217            return None;
218        }
219
220        // Consume the dash
221        self.advance();
222
223        // Parse the number
224        let start = self.pos;
225        while let Some(c) = self.peek() {
226            if c.is_ascii_digit() {
227                self.advance();
228            } else {
229                break;
230            }
231        }
232
233        let num_str = &self.input[start..self.pos];
234        let value: i64 = num_str.parse().ok()?;
235
236        Some(Term::Lit(Literal::Int(-value)))
237    }
238
239    /// Parse parenthesized term
240    fn parse_parens(&mut self) -> Result<Term, ParseError> {
241        self.expect_char('(')?;
242        let term = self.parse_term()?;
243        self.skip_whitespace();
244        self.expect_char(')')?;
245        Ok(term)
246    }
247
248    /// Parse identifier or sort (Prop, Type, Type0, etc.)
249    fn parse_ident_or_sort(&mut self) -> Result<Term, ParseError> {
250        let ident = self.parse_ident()?;
251
252        match ident.as_str() {
253            "Prop" => Ok(Term::Sort(Universe::Prop)),
254            "Type" => {
255                // Check for Type followed by a number
256                self.skip_whitespace();
257                if let Some(n) = self.try_parse_number() {
258                    Ok(Term::Sort(Universe::Type(n)))
259                } else {
260                    Ok(Term::Sort(Universe::Type(0)))
261                }
262            }
263            _ => {
264                // Check for TypeN (e.g., Type0, Type1)
265                if ident.starts_with("Type") {
266                    if let Ok(n) = ident[4..].parse::<u32>() {
267                        return Ok(Term::Sort(Universe::Type(n)));
268                    }
269                }
270                // Check if this is a bound variable
271                if self.bound_vars.contains(&ident) {
272                    Ok(Term::Var(ident))
273                } else {
274                    Ok(Term::Global(ident))
275                }
276            }
277        }
278    }
279
280    /// Parse lambda: fun x : T => body
281    fn parse_lambda(&mut self) -> Result<Term, ParseError> {
282        self.consume_keyword("fun")?;
283        self.skip_whitespace();
284        let param = self.parse_ident()?;
285        self.skip_whitespace();
286        self.expect_char(':')?;
287        let param_type = self.parse_term()?;
288        self.skip_whitespace();
289        self.expect_str("=>")?;
290
291        // Add param to bound variables, parse body, then remove
292        let was_bound = self.bound_vars.contains(&param);
293        self.bound_vars.insert(param.clone());
294        let body = self.parse_term()?;
295        if !was_bound {
296            self.bound_vars.remove(&param);
297        }
298
299        Ok(Term::Lambda {
300            param,
301            param_type: Box::new(param_type),
302            body: Box::new(body),
303        })
304    }
305
306    /// Parse forall: forall x : T, body
307    fn parse_forall(&mut self) -> Result<Term, ParseError> {
308        self.consume_keyword("forall")?;
309        self.skip_whitespace();
310        let param = self.parse_ident()?;
311        self.skip_whitespace();
312        self.expect_char(':')?;
313        let param_type = self.parse_term()?;
314        self.skip_whitespace();
315        self.expect_char(',')?;
316
317        // Add param to bound variables, parse body, then remove
318        let was_bound = self.bound_vars.contains(&param);
319        self.bound_vars.insert(param.clone());
320        let body_type = self.parse_term()?;
321        if !was_bound {
322            self.bound_vars.remove(&param);
323        }
324
325        Ok(Term::Pi {
326            param,
327            param_type: Box::new(param_type),
328            body_type: Box::new(body_type),
329        })
330    }
331
332    /// Parse fix: fix f => body
333    fn parse_fix(&mut self) -> Result<Term, ParseError> {
334        self.consume_keyword("fix")?;
335        self.skip_whitespace();
336        let name = self.parse_ident()?;
337        self.skip_whitespace();
338        self.expect_str("=>")?;
339
340        // Add name to bound variables, parse body, then remove
341        let was_bound = self.bound_vars.contains(&name);
342        self.bound_vars.insert(name.clone());
343        let body = self.parse_term()?;
344        if !was_bound {
345            self.bound_vars.remove(&name);
346        }
347
348        Ok(Term::Fix {
349            name,
350            body: Box::new(body),
351        })
352    }
353
354    /// Parse match: match e return M with | C1 x => t1 | C2 y z => t2 end
355    fn parse_match(&mut self) -> Result<Term, ParseError> {
356        self.consume_keyword("match")?;
357        self.skip_whitespace();
358        let discriminant = self.parse_app()?;
359        self.skip_whitespace();
360        self.consume_keyword("return")?;
361        self.skip_whitespace();
362        let motive = self.parse_app()?;
363        self.skip_whitespace();
364        self.consume_keyword("with")?;
365
366        let mut cases = Vec::new();
367        loop {
368            self.skip_whitespace();
369            if !self.try_consume("|") {
370                break;
371            }
372            self.skip_whitespace();
373
374            // Parse pattern: C x y z (constructor with binders)
375            let case_term = self.parse_case_body()?;
376            cases.push(case_term);
377        }
378
379        // Consume optional 'end' keyword
380        self.skip_whitespace();
381        let _ = self.try_consume_keyword("end");
382
383        Ok(Term::Match {
384            discriminant: Box::new(discriminant),
385            motive: Box::new(motive),
386            cases,
387        })
388    }
389
390    /// Parse a match case body: C x y => term becomes λx. λy. term
391    ///
392    /// The first identifier is the constructor name (skipped), the rest are binders.
393    fn parse_case_body(&mut self) -> Result<Term, ParseError> {
394        // First identifier is the constructor name (skip it)
395        self.skip_whitespace();
396        let _ctor_name = self.parse_ident()?;
397
398        // Collect binders until =>
399        let mut binders = Vec::new();
400        loop {
401            self.skip_whitespace();
402            if self.peek_str("=>") {
403                break;
404            }
405            let ident = self.parse_ident()?;
406            binders.push(ident);
407        }
408        self.expect_str("=>")?;
409
410        // Add all binders to bound vars
411        let mut previously_bound = Vec::new();
412        for binder in &binders {
413            previously_bound.push(self.bound_vars.contains(binder));
414            self.bound_vars.insert(binder.clone());
415        }
416
417        let mut body = self.parse_term()?;
418
419        // Remove binders that weren't previously bound
420        for (i, binder) in binders.iter().enumerate() {
421            if !previously_bound[i] {
422                self.bound_vars.remove(binder);
423            }
424        }
425
426        // Wrap in lambdas from right to left
427        // We don't know the types, so we use a placeholder
428        for binder in binders.into_iter().rev() {
429            body = Term::Lambda {
430                param: binder,
431                param_type: Box::new(Term::Global("_".to_string())), // Placeholder type
432                body: Box::new(body),
433            };
434        }
435
436        Ok(body)
437    }
438
439    // =========================================================================
440    // Low-level parsing utilities
441    // =========================================================================
442
443    /// Parse an identifier (alphanumeric + underscore, starting with letter/underscore)
444    fn parse_ident(&mut self) -> Result<String, ParseError> {
445        self.skip_whitespace();
446        let start = self.pos;
447
448        // First character must be letter or underscore
449        if let Some(c) = self.peek() {
450            if !c.is_alphabetic() && c != '_' {
451                return Err(ParseError::Expected {
452                    expected: "identifier".to_string(),
453                    found: format!("{}", c),
454                });
455            }
456        } else {
457            return Err(ParseError::UnexpectedEof);
458        }
459
460        // Consume alphanumeric and underscore
461        while let Some(c) = self.peek() {
462            if c.is_alphanumeric() || c == '_' {
463                self.advance();
464            } else {
465                break;
466            }
467        }
468
469        let ident = self.input[start..self.pos].to_string();
470        if ident.is_empty() {
471            Err(ParseError::InvalidIdent("empty".to_string()))
472        } else {
473            Ok(ident)
474        }
475    }
476
477    /// Try to parse a number, returning None if not present
478    fn try_parse_number(&mut self) -> Option<u32> {
479        self.skip_whitespace();
480        let start = self.pos;
481
482        while let Some(c) = self.peek() {
483            if c.is_ascii_digit() {
484                self.advance();
485            } else {
486                break;
487            }
488        }
489
490        if self.pos > start {
491            self.input[start..self.pos].parse().ok()
492        } else {
493            None
494        }
495    }
496
497    /// Skip whitespace
498    fn skip_whitespace(&mut self) {
499        while let Some(c) = self.peek() {
500            if c.is_whitespace() {
501                self.advance();
502            } else {
503                break;
504            }
505        }
506    }
507
508    /// Peek at current character
509    fn peek(&self) -> Option<char> {
510        self.input[self.pos..].chars().next()
511    }
512
513    /// Check if we're at a specific character
514    fn peek_char(&self, c: char) -> bool {
515        self.peek() == Some(c)
516    }
517
518    /// Check if input starts with a string at current position
519    fn peek_str(&self, s: &str) -> bool {
520        self.input[self.pos..].starts_with(s)
521    }
522
523    /// Check if input starts with a keyword (followed by non-alphanumeric)
524    fn peek_keyword(&self, keyword: &str) -> bool {
525        if !self.peek_str(keyword) {
526            return false;
527        }
528        // Check that keyword is not part of a longer identifier
529        let after = self.pos + keyword.len();
530        if after >= self.input.len() {
531            return true;
532        }
533        let next_char = self.input[after..].chars().next();
534        !next_char.map(|c| c.is_alphanumeric() || c == '_').unwrap_or(false)
535    }
536
537    /// Advance position by one character
538    fn advance(&mut self) {
539        if let Some(c) = self.peek() {
540            self.pos += c.len_utf8();
541        }
542    }
543
544    /// Check if at end of input
545    fn at_end(&self) -> bool {
546        self.pos >= self.input.len()
547    }
548
549    /// Try to consume a string, returning true if successful
550    fn try_consume(&mut self, s: &str) -> bool {
551        if self.peek_str(s) {
552            self.pos += s.len();
553            true
554        } else {
555            false
556        }
557    }
558
559    /// Try to consume a keyword, returning true if successful
560    fn try_consume_keyword(&mut self, keyword: &str) -> bool {
561        self.skip_whitespace();
562        if self.peek_keyword(keyword) {
563            self.pos += keyword.len();
564            true
565        } else {
566            false
567        }
568    }
569
570    /// Expect a specific character
571    fn expect_char(&mut self, expected: char) -> Result<(), ParseError> {
572        self.skip_whitespace();
573        if self.peek_char(expected) {
574            self.advance();
575            Ok(())
576        } else {
577            Err(ParseError::Expected {
578                expected: format!("'{}'", expected),
579                found: self.peek().map(|c| c.to_string()).unwrap_or("EOF".to_string()),
580            })
581        }
582    }
583
584    /// Expect a specific string
585    fn expect_str(&mut self, expected: &str) -> Result<(), ParseError> {
586        self.skip_whitespace();
587        if self.try_consume(expected) {
588            Ok(())
589        } else {
590            let found: String = self.input[self.pos..].chars().take(expected.len()).collect();
591            Err(ParseError::Expected {
592                expected: format!("'{}'", expected),
593                found: if found.is_empty() { "EOF".to_string() } else { found },
594            })
595        }
596    }
597
598    /// Consume a keyword (must be followed by non-alphanumeric)
599    fn consume_keyword(&mut self, keyword: &str) -> Result<(), ParseError> {
600        self.skip_whitespace();
601        if self.peek_keyword(keyword) {
602            self.pos += keyword.len();
603            Ok(())
604        } else {
605            Err(ParseError::Expected {
606                expected: keyword.to_string(),
607                found: self.peek().map(|c| c.to_string()).unwrap_or("EOF".to_string()),
608            })
609        }
610    }
611
612    /// Get remaining input (for debugging)
613    #[allow(dead_code)]
614    fn remaining(&self) -> &str {
615        &self.input[self.pos..]
616    }
617}
618
619#[cfg(test)]
620mod tests {
621    use super::*;
622
623    #[test]
624    fn test_parse_global() {
625        let term = TermParser::parse("Zero").unwrap();
626        assert!(matches!(term, Term::Global(ref s) if s == "Zero"));
627    }
628
629    #[test]
630    fn test_parse_sort() {
631        let term = TermParser::parse("Type").unwrap();
632        assert!(matches!(term, Term::Sort(Universe::Type(0))));
633
634        let term2 = TermParser::parse("Prop").unwrap();
635        assert!(matches!(term2, Term::Sort(Universe::Prop)));
636    }
637
638    #[test]
639    fn test_parse_app() {
640        let term = TermParser::parse("Succ Zero").unwrap();
641        if let Term::App(func, arg) = term {
642            assert!(matches!(*func, Term::Global(ref s) if s == "Succ"));
643            assert!(matches!(*arg, Term::Global(ref s) if s == "Zero"));
644        } else {
645            panic!("Expected App");
646        }
647    }
648
649    #[test]
650    fn test_parse_parens() {
651        let term = TermParser::parse("(Succ Zero)").unwrap();
652        assert!(matches!(term, Term::App(..)));
653    }
654
655    #[test]
656    fn test_parse_arrow() {
657        let term = TermParser::parse("Nat -> Nat").unwrap();
658        if let Term::Pi { param, param_type, body_type } = term {
659            assert_eq!(param, "_");
660            assert!(matches!(*param_type, Term::Global(ref s) if s == "Nat"));
661            assert!(matches!(*body_type, Term::Global(ref s) if s == "Nat"));
662        } else {
663            panic!("Expected Pi");
664        }
665    }
666
667    #[test]
668    fn test_parse_lambda() {
669        let term = TermParser::parse("fun x : Nat => Succ x").unwrap();
670        if let Term::Lambda { param, body, .. } = term {
671            assert_eq!(param, "x");
672            // Body should use Var for bound x
673            if let Term::App(_, arg) = *body {
674                assert!(matches!(*arg, Term::Var(ref s) if s == "x"));
675            } else {
676                panic!("Expected App in lambda body");
677            }
678        } else {
679            panic!("Expected Lambda");
680        }
681    }
682
683    #[test]
684    fn test_parse_lambda_bound_var() {
685        let term = TermParser::parse("fun n : Nat => Succ n").unwrap();
686        if let Term::Lambda { body, .. } = term {
687            if let Term::App(_, arg) = *body {
688                assert!(
689                    matches!(*arg, Term::Var(ref s) if s == "n"),
690                    "Expected Var(n), got {:?}",
691                    arg
692                );
693            } else {
694                panic!("Expected App in lambda body");
695            }
696        } else {
697            panic!("Expected Lambda");
698        }
699    }
700}