Skip to main content

slotted_egraphs/
parse.rs

1use crate::*;
2
3#[derive(Debug)]
4pub enum ParseError {
5    TokenState(String),
6    ParseState(Vec<Token>),
7    RemainingRest(Vec<Token>),
8    FromSyntaxFailed(Vec<SyntaxElem>),
9    ExpectedColonEquals(Vec<Token>),
10    ExpectedRBracket(Vec<Token>),
11}
12
13#[derive(Debug, Clone)]
14pub enum Token {
15    Slot(Slot),    // $42
16    Ident(String), // map, 15
17    PVar(String),  // ?x
18    ColonEquals,   // :=
19    LParen,        // (
20    RParen,        // )
21    LBracket,      // [
22    RBracket,      // ]
23}
24
25fn ident_char(c: char) -> bool {
26    if c.is_whitespace() {
27        return false;
28    }
29    if "()[]".contains(c) {
30        return false;
31    }
32    true
33}
34
35fn crop_ident(s: &str) -> Result<(/*ident*/ &str, /*rest*/ &str), ParseError> {
36    let out = if let Some((i, _)) = s.char_indices().find(|(_, x)| !ident_char(*x)) {
37        (&s[..i], &s[i..])
38    } else {
39        (s, "")
40    };
41
42    if out.0.is_empty() {
43        return Err(ParseError::TokenState(s.to_string()));
44    }
45
46    Ok(out)
47}
48
49fn tokenize(mut s: &str) -> Result<Vec<Token>, ParseError> {
50    let mut tokens = Vec::new();
51
52    loop {
53        s = s.trim_start();
54        if s.is_empty() {
55            break;
56        }
57
58        if s.starts_with('(') {
59            tokens.push(Token::LParen);
60            s = &s[1..];
61        } else if s.starts_with(')') {
62            tokens.push(Token::RParen);
63            s = &s[1..];
64        } else if s.starts_with('[') {
65            tokens.push(Token::LBracket);
66            s = &s[1..];
67        } else if s.starts_with(']') {
68            tokens.push(Token::RBracket);
69            s = &s[1..];
70        } else if s.starts_with(":=") {
71            tokens.push(Token::ColonEquals);
72            s = &s[2..];
73        } else if s.starts_with('?') {
74            let (op, rst) = crop_ident(&s[1..])?;
75            tokens.push(Token::PVar(op.to_string()));
76            s = rst;
77        } else if s.starts_with('$') {
78            let (op, rst) = crop_ident(&s[1..])?;
79            tokens.push(Token::Slot(Slot::named(op)));
80            s = rst;
81        } else {
82            let (op, rst) = crop_ident(s)?;
83            tokens.push(Token::Ident(op.to_string()));
84            s = rst;
85        }
86    }
87
88    Ok(tokens)
89}
90
91// parse:
92impl<L: Language> Pattern<L> {
93    pub fn parse(s: &str) -> Result<Self, ParseError> {
94        let tok = tokenize(s)?;
95        let (re, rest) = parse_pattern(&tok)?;
96
97        if !rest.is_empty() {
98            return Err(ParseError::RemainingRest(to_vec(rest)));
99        }
100
101        Ok(re)
102    }
103}
104
105impl<L: Language> RecExpr<L> {
106    pub fn parse(s: &str) -> Result<Self, ParseError> {
107        let pat = Pattern::parse(s)?;
108        Ok(pattern_to_re(&pat))
109    }
110}
111
112fn parse_pattern<L: Language>(tok: &[Token]) -> Result<(Pattern<L>, &[Token]), ParseError> {
113    let (mut pat, mut tok) = parse_pattern_nosubst(tok)?;
114    while let Some(Token::LBracket) = tok.get(0) {
115        tok = &tok[1..];
116        let (l, tok2) = parse_pattern(tok)?;
117        tok = tok2;
118
119        let Token::ColonEquals = &tok[0] else {
120            return Err(ParseError::ExpectedColonEquals(to_vec(tok)));
121        };
122        tok = &tok[1..];
123
124        let (r, tok2) = parse_pattern(tok)?;
125        tok = tok2;
126
127        let Token::RBracket = &tok[0] else {
128            return Err(ParseError::ExpectedRBracket(to_vec(tok)));
129        };
130        tok = &tok[1..];
131
132        pat = Pattern::Subst(Box::new(pat), Box::new(l), Box::new(r));
133    }
134    Ok((pat, tok))
135}
136
137fn parse_pattern_nosubst<L: Language>(
138    mut tok: &[Token],
139) -> Result<(Pattern<L>, &[Token]), ParseError> {
140    if let Token::PVar(p) = &tok[0] {
141        let pat = Pattern::PVar(p.to_string());
142        return Ok((pat, &tok[1..]));
143    }
144
145    if let Token::LParen = tok[0] {
146        tok = &tok[1..];
147
148        let Token::Ident(op) = &tok[0] else {
149            return Err(ParseError::ParseState(to_vec(tok)));
150        };
151        tok = &tok[1..];
152
153        let mut syntax_elems = vec![NestedSyntaxElem::String(op.to_string())];
154        loop {
155            if let Token::RParen = tok[0] {
156                break;
157            };
158
159            let (se, tok2) = parse_nested_syntax_elem(tok)?;
160            tok = tok2;
161            syntax_elems.push(se);
162        }
163        tok = &tok[1..];
164
165        let syntax_elems_mock: Vec<_> = syntax_elems
166            .iter()
167            .map(|x| match x {
168                NestedSyntaxElem::String(s) => SyntaxElem::String(s.clone()),
169                NestedSyntaxElem::Slot(s) => SyntaxElem::Slot(*s),
170                NestedSyntaxElem::Pattern(_) => SyntaxElem::AppliedId(AppliedId::null()),
171            })
172            .collect();
173        let node = L::from_syntax(&syntax_elems_mock)
174            .ok_or_else(|| ParseError::FromSyntaxFailed(syntax_elems_mock))?;
175        let syntax_elems = syntax_elems
176            .into_iter()
177            .filter_map(|x| match x {
178                NestedSyntaxElem::Pattern(pat) => Some(pat),
179                NestedSyntaxElem::String(_) => None,
180                NestedSyntaxElem::Slot(_) => None,
181            })
182            .collect();
183        let re = Pattern::ENode(node, syntax_elems);
184        Ok((re, tok))
185    } else {
186        let Token::Ident(op) = &tok[0] else {
187            return Err(ParseError::ParseState(to_vec(tok)));
188        };
189        tok = &tok[1..];
190
191        let elems = [SyntaxElem::String(op.to_string())];
192        let node =
193            L::from_syntax(&elems).ok_or_else(|| ParseError::FromSyntaxFailed(to_vec(&elems)))?;
194        let pat = Pattern::ENode(node, Vec::new());
195        Ok((pat, tok))
196    }
197}
198
199// Like SyntaxElem, but contains Pattern instead of AppliedId.
200enum NestedSyntaxElem<L: Language> {
201    Pattern(Pattern<L>),
202    Slot(Slot),
203    String(String),
204}
205
206fn parse_nested_syntax_elem<L: Language>(
207    tok: &[Token],
208) -> Result<(NestedSyntaxElem<L>, &[Token]), ParseError> {
209    if let Token::Slot(slot) = &tok[0] {
210        return Ok((NestedSyntaxElem::Slot(*slot), &tok[1..]));
211    }
212
213    parse_pattern::<L>(tok).map(|(x, rest)| (NestedSyntaxElem::Pattern(x), rest))
214}
215
216// print:
217impl<L: Language> std::fmt::Display for Pattern<L> {
218    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
219        match self {
220            Pattern::ENode(node, syntax_elems) => {
221                let l = node.to_syntax();
222                let n = l.len();
223
224                if n != 1 {
225                    write!(f, "(")?;
226                }
227                let mut se_idx = 0;
228                for (i, r) in l.into_iter().enumerate() {
229                    match r {
230                        SyntaxElem::AppliedId(_) => {
231                            write!(f, "{}", &syntax_elems[se_idx])?;
232                            se_idx += 1;
233                        }
234                        SyntaxElem::Slot(slot) => {
235                            write!(f, "{}", slot.to_string())?;
236                        }
237                        SyntaxElem::String(s) => {
238                            write!(f, "{}", s)?;
239                        }
240                    }
241                    if i != n - 1 {
242                        write!(f, " ")?;
243                    }
244                }
245                if n != 1 {
246                    write!(f, ")")?;
247                }
248                Ok(())
249            }
250            Pattern::PVar(p) => write!(f, "?{p}"),
251            Pattern::Subst(b, x, t) => write!(f, "{b}[{x} := {t}]"),
252        }
253    }
254}
255
256impl<L: Language> std::fmt::Debug for Pattern<L> {
257    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
258        write!(f, "{}", self)
259    }
260}
261
262impl<L: Language> std::fmt::Display for RecExpr<L> {
263    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
264        write!(f, "{}", re_to_pattern(self))
265    }
266}
267
268impl<L: Language> std::fmt::Debug for RecExpr<L> {
269    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
270        write!(f, "{:?}", re_to_pattern(self))
271    }
272}
273
274fn to_vec<T: Clone>(t: &[T]) -> Vec<T> {
275    t.iter().cloned().collect()
276}