Skip to main content

kyu_parser/parser/
pattern.rs

1//! Node and relationship pattern parsers.
2
3use chumsky::prelude::*;
4use smol_str::SmolStr;
5
6use crate::ast::*;
7use crate::span::Spanned;
8use crate::token::Token;
9
10use super::expression::expression_parser;
11
12type ParserError = Simple<Token>;
13
14/// Parse an identifier (unquoted, escaped, or keyword used in identifier position).
15///
16/// Cypher allows most keywords to be used as identifiers in non-ambiguous contexts
17/// (variable names, labels, property names, map keys, relationship types).
18///
19/// Uses `filter_map` instead of `select!` to avoid monomorphization bloat from 70+ arms,
20/// which otherwise causes linker failures from excessively long symbol names.
21pub fn ident() -> impl Parser<Token, SmolStr, Error = ParserError> + Clone {
22    filter_map(|span, token: Token| match token_to_ident(&token) {
23        Some(name) => Ok(name),
24        None => Err(Simple::expected_input_found(span, [], Some(token))),
25    })
26}
27
28/// Convert a token to an identifier name if it can be used in identifier position.
29fn token_to_ident(token: &Token) -> Option<SmolStr> {
30    match token {
31        Token::Ident(name) | Token::EscapedIdent(name) => Some(name.clone()),
32        // DDL keywords
33        Token::Node => Some(SmolStr::new("NODE")),
34        Token::Rel => Some(SmolStr::new("REL")),
35        Token::Table => Some(SmolStr::new("TABLE")),
36        Token::Group => Some(SmolStr::new("GROUP")),
37        Token::Rdf => Some(SmolStr::new("RDF")),
38        Token::Graph => Some(SmolStr::new("GRAPH")),
39        Token::From => Some(SmolStr::new("FROM")),
40        Token::To => Some(SmolStr::new("TO")),
41        Token::Primary => Some(SmolStr::new("PRIMARY")),
42        Token::Key => Some(SmolStr::new("KEY")),
43        Token::Add => Some(SmolStr::new("ADD")),
44        Token::Column => Some(SmolStr::new("COLUMN")),
45        Token::Rename => Some(SmolStr::new("RENAME")),
46        Token::Comment => Some(SmolStr::new("COMMENT")),
47        Token::Default => Some(SmolStr::new("DEFAULT")),
48        Token::Copy => Some(SmolStr::new("COPY")),
49        Token::Load => Some(SmolStr::new("LOAD")),
50        Token::Attach => Some(SmolStr::new("ATTACH")),
51        Token::Use => Some(SmolStr::new("USE")),
52        Token::Database => Some(SmolStr::new("DATABASE")),
53        Token::Export => Some(SmolStr::new("EXPORT")),
54        Token::Import => Some(SmolStr::new("IMPORT")),
55        Token::Install => Some(SmolStr::new("INSTALL")),
56        Token::Extension => Some(SmolStr::new("EXTENSION")),
57        // Transaction keywords
58        Token::Begin => Some(SmolStr::new("BEGIN")),
59        Token::Commit => Some(SmolStr::new("COMMIT")),
60        Token::Rollback => Some(SmolStr::new("ROLLBACK")),
61        Token::Transaction => Some(SmolStr::new("TRANSACTION")),
62        Token::Read => Some(SmolStr::new("READ")),
63        Token::Write => Some(SmolStr::new("WRITE")),
64        Token::Only => Some(SmolStr::new("ONLY")),
65        // Type keywords
66        Token::ListType => Some(SmolStr::new("LIST")),
67        Token::MapType => Some(SmolStr::new("MAP")),
68        Token::StructType => Some(SmolStr::new("STRUCT")),
69        Token::UnionType => Some(SmolStr::new("UNION")),
70        Token::BoolType => Some(SmolStr::new("BOOL")),
71        Token::StringType => Some(SmolStr::new("STRING")),
72        Token::DateType => Some(SmolStr::new("DATE")),
73        Token::TimestampType => Some(SmolStr::new("TIMESTAMP")),
74        Token::IntervalType => Some(SmolStr::new("INTERVAL")),
75        Token::BlobType => Some(SmolStr::new("BLOB")),
76        Token::UuidType => Some(SmolStr::new("UUID")),
77        Token::SerialType => Some(SmolStr::new("SERIAL")),
78        Token::FloatType => Some(SmolStr::new("FLOAT")),
79        Token::DoubleType => Some(SmolStr::new("DOUBLE")),
80        Token::Int8Type => Some(SmolStr::new("INT8")),
81        Token::Int16Type => Some(SmolStr::new("INT16")),
82        Token::Int32Type => Some(SmolStr::new("INT32")),
83        Token::Int64Type => Some(SmolStr::new("INT64")),
84        Token::Int128Type => Some(SmolStr::new("INT128")),
85        Token::UInt8Type => Some(SmolStr::new("UINT8")),
86        Token::UInt16Type => Some(SmolStr::new("UINT16")),
87        Token::UInt32Type => Some(SmolStr::new("UINT32")),
88        Token::UInt64Type => Some(SmolStr::new("UINT64")),
89        // Cypher keywords usable as identifiers
90        Token::Count => Some(SmolStr::new("count")),
91        Token::Exists => Some(SmolStr::new("exists")),
92        Token::All => Some(SmolStr::new("all")),
93        Token::Any => Some(SmolStr::new("any")),
94        Token::Single => Some(SmolStr::new("single")),
95        Token::None => Some(SmolStr::new("none")),
96        Token::On => Some(SmolStr::new("ON")),
97        Token::Yield => Some(SmolStr::new("YIELD")),
98        Token::End => Some(SmolStr::new("END")),
99        Token::Call => Some(SmolStr::new("CALL")),
100        Token::If => Some(SmolStr::new("IF")),
101        Token::Macro => Some(SmolStr::new("MACRO")),
102        Token::Shortest => Some(SmolStr::new("SHORTEST")),
103        Token::Asc => Some(SmolStr::new("ASC")),
104        Token::Desc => Some(SmolStr::new("DESC")),
105        Token::In => Some(SmolStr::new("IN")),
106        Token::Is => Some(SmolStr::new("IS")),
107        Token::Contains => Some(SmolStr::new("CONTAINS")),
108        Token::Starts => Some(SmolStr::new("STARTS")),
109        Token::Ends => Some(SmolStr::new("ENDS")),
110        Token::Union => Some(SmolStr::new("UNION")),
111        Token::Drop => Some(SmolStr::new("DROP")),
112        Token::Alter => Some(SmolStr::new("ALTER")),
113        Token::Remove => Some(SmolStr::new("REMOVE")),
114        Token::Profile => Some(SmolStr::new("PROFILE")),
115        Token::Explain => Some(SmolStr::new("EXPLAIN")),
116        _ => Option::None,
117    }
118}
119
120/// Parse one or more node labels: `:Label1:Label2`
121fn node_labels() -> impl Parser<Token, Vec<Spanned<SmolStr>>, Error = ParserError> + Clone {
122    just(Token::Colon)
123        .ignore_then(ident().map_with_span(|n, s| (n, s)))
124        .repeated()
125        .at_least(1)
126}
127
128/// Parse map properties: `{key: expr, key: expr, ...}`
129pub fn map_properties()
130-> impl Parser<Token, Vec<(Spanned<SmolStr>, Spanned<Expression>)>, Error = ParserError> + Clone {
131    let entry = ident()
132        .map_with_span(|n, s| (n, s))
133        .then_ignore(just(Token::Colon))
134        .then(expression_parser());
135
136    entry
137        .separated_by(just(Token::Comma))
138        .allow_trailing()
139        .delimited_by(just(Token::LeftBrace), just(Token::RightBrace))
140}
141
142/// Parse a node pattern: `(variable:Label {props})`
143pub fn node_pattern() -> impl Parser<Token, NodePattern, Error = ParserError> + Clone {
144    let variable = ident().map_with_span(|n, s| (n, s)).or_not();
145    let labels = node_labels().or_not().map(|l| l.unwrap_or_default());
146    let props = map_properties().or_not();
147
148    variable
149        .then(labels)
150        .then(props)
151        .delimited_by(just(Token::LeftParen), just(Token::RightParen))
152        .map_with_span(|((variable, labels), properties), span| NodePattern {
153            variable,
154            labels,
155            properties,
156            span,
157        })
158        .labelled("node pattern")
159}
160
161/// Parse a relationship pattern including direction arrows.
162///
163/// Handles:
164/// - `-[r:TYPE]->` (right)
165/// - `<-[r:TYPE]-` (left)
166/// - `-[r:TYPE]-`  (both)
167type RelDetail = (
168    Option<Spanned<SmolStr>>,
169    Vec<Spanned<SmolStr>>,
170    Option<(Option<u32>, Option<u32>)>,
171    Option<Vec<(Spanned<SmolStr>, Spanned<Expression>)>>,
172);
173
174fn relationship_detail() -> impl Parser<Token, RelDetail, Error = ParserError> + Clone {
175    let variable = ident().map_with_span(|n, s| (n, s)).or_not();
176
177    // Relationship types: `:TYPE1|TYPE2` or `:TYPE1|:TYPE2` (TCK allows colon after pipe)
178    let rel_types = just(Token::Colon)
179        .ignore_then(
180            ident()
181                .map_with_span(|n, s| (n, s))
182                .then(
183                    just(Token::Pipe)
184                        .ignore_then(just(Token::Colon).or_not())
185                        .ignore_then(ident().map_with_span(|n, s| (n, s)))
186                        .repeated(),
187                )
188                .map(|(first, rest)| {
189                    let mut types = vec![first];
190                    types.extend(rest);
191                    types
192                }),
193        )
194        .or_not()
195        .map(|t| t.unwrap_or_default());
196
197    // Variable-length: *min..max
198    let range = just(Token::Star)
199        .ignore_then(
200            select! { Token::Integer(n) => n as u32 }.or_not().then(
201                just(Token::DoubleDot)
202                    .ignore_then(select! { Token::Integer(n) => n as u32 }.or_not())
203                    .or_not(),
204            ),
205        )
206        .map(|(min, max_opt)| match max_opt {
207            Some(max) => (min, max),
208            None => (min, min),
209        })
210        .or_not();
211
212    let props = map_properties().or_not();
213
214    variable
215        .then(rel_types)
216        .then(range)
217        .then(props)
218        .delimited_by(just(Token::LeftBracket), just(Token::RightBracket))
219        .map(|(((variable, rel_types), range), properties)| {
220            (variable, rel_types, range, properties)
221        })
222}
223
224/// Parse a complete relationship segment with arrows/dashes.
225fn relationship_pattern() -> impl Parser<Token, RelationshipPattern, Error = ParserError> + Clone {
226    // Case 1: <-[...]-  (left arrow)
227    let left = just(Token::LeftArrow)
228        .ignore_then(relationship_detail())
229        .then_ignore(just(Token::Dash))
230        .map_with_span(|detail: RelDetail, span| {
231            let (variable, rel_types, range, properties) = detail;
232            RelationshipPattern {
233                variable,
234                rel_types,
235                direction: Direction::Left,
236                range,
237                properties,
238                span,
239            }
240        });
241
242    // Case 2: -[...]->  (right arrow)
243    let right = just(Token::Dash)
244        .ignore_then(relationship_detail())
245        .then_ignore(just(Token::Arrow))
246        .map_with_span(|detail: RelDetail, span| {
247            let (variable, rel_types, range, properties) = detail;
248            RelationshipPattern {
249                variable,
250                rel_types,
251                direction: Direction::Right,
252                range,
253                properties,
254                span,
255            }
256        });
257
258    // Case 3: -[...]-  (both directions / undirected)
259    let both = just(Token::Dash)
260        .ignore_then(relationship_detail())
261        .then_ignore(just(Token::Dash))
262        .map_with_span(|detail: RelDetail, span| {
263            let (variable, rel_types, range, properties) = detail;
264            RelationshipPattern {
265                variable,
266                rel_types,
267                direction: Direction::Both,
268                range,
269                properties,
270                span,
271            }
272        });
273
274    // Case 4: simple dashes without brackets: --> or <-- or --
275    let simple_right = just(Token::Dash)
276        .then_ignore(just(Token::Arrow))
277        .map_with_span(|_, span| RelationshipPattern {
278            variable: None,
279            rel_types: vec![],
280            direction: Direction::Right,
281            range: None,
282            properties: None,
283            span,
284        });
285
286    let simple_left = just(Token::LeftArrow)
287        .then_ignore(just(Token::Dash))
288        .map_with_span(|_, span| RelationshipPattern {
289            variable: None,
290            rel_types: vec![],
291            direction: Direction::Left,
292            range: None,
293            properties: None,
294            span,
295        });
296
297    // Case 6: simple undirected `--` without brackets
298    let simple_both = just(Token::Dash)
299        .then_ignore(just(Token::Dash))
300        .map_with_span(|_, span| RelationshipPattern {
301            variable: None,
302            rel_types: vec![],
303            direction: Direction::Both,
304            range: None,
305            properties: None,
306            span,
307        });
308
309    // Case 7: `<-->` shorthand (left-arrow followed by right-arrow)
310    let simple_both_arrow = just(Token::LeftArrow)
311        .then_ignore(just(Token::Arrow))
312        .map_with_span(|_, span| RelationshipPattern {
313            variable: None,
314            rel_types: vec![],
315            direction: Direction::Both,
316            range: None,
317            properties: None,
318            span,
319        });
320
321    choice((
322        left,
323        right,
324        both,
325        simple_right,
326        simple_left,
327        simple_both_arrow,
328        simple_both,
329    ))
330    .labelled("relationship pattern")
331}
332
333/// Parse a chain of node-rel-node-rel-... pattern elements.
334pub fn pattern_element_chain()
335-> impl Parser<Token, Vec<PatternElement>, Error = ParserError> + Clone {
336    node_pattern()
337        .map(PatternElement::Node)
338        .then(
339            relationship_pattern()
340                .map(PatternElement::Relationship)
341                .then(node_pattern().map(PatternElement::Node))
342                .repeated(),
343        )
344        .map(|(first, rest)| {
345            let mut elements = vec![first];
346            for (rel, node) in rest {
347                elements.push(rel);
348                elements.push(node);
349            }
350            elements
351        })
352}
353
354/// Parse a full pattern with optional variable assignment: `p = (a)-[:R]->(b)`
355pub fn pattern_parser() -> impl Parser<Token, Pattern, Error = ParserError> + Clone {
356    let variable_assignment = ident()
357        .map_with_span(|n, s| (n, s))
358        .then_ignore(just(Token::Eq))
359        .or_not();
360
361    variable_assignment
362        .then(pattern_element_chain())
363        .map(|(variable, elements)| Pattern { variable, elements })
364        .labelled("pattern")
365}
366
367#[cfg(test)]
368mod tests {
369    use super::*;
370    use crate::lexer::Lexer;
371
372    fn parse_pattern_from(src: &str) -> Option<Pattern> {
373        let (tokens, lex_errors) = Lexer::new(src).lex();
374        assert!(lex_errors.is_empty(), "lex errors: {lex_errors:?}");
375
376        let len = src.len();
377        let stream = chumsky::Stream::from_iter(
378            len..len + 1,
379            tokens
380                .into_iter()
381                .filter(|(tok, _)| !matches!(tok, Token::Eof)),
382        );
383
384        let (result, errors) = pattern_parser().then_ignore(end()).parse_recovery(stream);
385        if !errors.is_empty() {
386            eprintln!("parse errors: {errors:?}");
387        }
388        result
389    }
390
391    #[test]
392    fn simple_node() {
393        let p = parse_pattern_from("(n)").unwrap();
394        assert_eq!(p.elements.len(), 1);
395        if let PatternElement::Node(node) = &p.elements[0] {
396            assert_eq!(node.variable.as_ref().unwrap().0.as_str(), "n");
397            assert!(node.labels.is_empty());
398        } else {
399            panic!("expected node");
400        }
401    }
402
403    #[test]
404    fn node_with_label() {
405        let p = parse_pattern_from("(n:Person)").unwrap();
406        if let PatternElement::Node(node) = &p.elements[0] {
407            assert_eq!(node.labels.len(), 1);
408            assert_eq!(node.labels[0].0.as_str(), "Person");
409        } else {
410            panic!("expected node");
411        }
412    }
413
414    #[test]
415    fn node_with_multiple_labels() {
416        let p = parse_pattern_from("(n:Person:Employee)").unwrap();
417        if let PatternElement::Node(node) = &p.elements[0] {
418            assert_eq!(node.labels.len(), 2);
419        } else {
420            panic!("expected node");
421        }
422    }
423
424    #[test]
425    fn node_with_properties() {
426        let p = parse_pattern_from("(n:Person {name: 'Alice', age: 30})").unwrap();
427        if let PatternElement::Node(node) = &p.elements[0] {
428            assert!(node.properties.is_some());
429            assert_eq!(node.properties.as_ref().unwrap().len(), 2);
430        } else {
431            panic!("expected node");
432        }
433    }
434
435    #[test]
436    fn right_relationship() {
437        let p = parse_pattern_from("(a)-[:KNOWS]->(b)").unwrap();
438        assert_eq!(p.elements.len(), 3);
439        if let PatternElement::Relationship(rel) = &p.elements[1] {
440            assert_eq!(rel.direction, Direction::Right);
441            assert_eq!(rel.rel_types.len(), 1);
442            assert_eq!(rel.rel_types[0].0.as_str(), "KNOWS");
443        } else {
444            panic!("expected relationship");
445        }
446    }
447
448    #[test]
449    fn left_relationship() {
450        let p = parse_pattern_from("(a)<-[:LIKES]-(b)").unwrap();
451        if let PatternElement::Relationship(rel) = &p.elements[1] {
452            assert_eq!(rel.direction, Direction::Left);
453        } else {
454            panic!("expected relationship");
455        }
456    }
457
458    #[test]
459    fn undirected_relationship() {
460        let p = parse_pattern_from("(a)-[:FRIENDS]-(b)").unwrap();
461        if let PatternElement::Relationship(rel) = &p.elements[1] {
462            assert_eq!(rel.direction, Direction::Both);
463        } else {
464            panic!("expected relationship");
465        }
466    }
467
468    #[test]
469    fn variable_on_relationship() {
470        let p = parse_pattern_from("(a)-[r:KNOWS]->(b)").unwrap();
471        if let PatternElement::Relationship(rel) = &p.elements[1] {
472            assert_eq!(rel.variable.as_ref().unwrap().0.as_str(), "r");
473        } else {
474            panic!("expected relationship");
475        }
476    }
477
478    #[test]
479    fn multi_hop() {
480        let p = parse_pattern_from("(a)-[:R1]->(b)-[:R2]->(c)").unwrap();
481        assert_eq!(p.elements.len(), 5); // node-rel-node-rel-node
482    }
483
484    #[test]
485    fn pattern_with_variable_assignment() {
486        let p = parse_pattern_from("p = (a)-[:R]->(b)").unwrap();
487        assert_eq!(p.variable.as_ref().unwrap().0.as_str(), "p");
488    }
489
490    #[test]
491    fn anonymous_node() {
492        let p = parse_pattern_from("()").unwrap();
493        if let PatternElement::Node(node) = &p.elements[0] {
494            assert!(node.variable.is_none());
495        } else {
496            panic!("expected node");
497        }
498    }
499}