Skip to main content

hypen_parser/
parser.rs

1use chumsky::prelude::*;
2
3use crate::ast::*;
4
5/// Parse a line comment: // ... until end of line
6fn line_comment<'a>() -> impl Parser<'a, &'a str, (), extra::Err<Rich<'a, char>>> + Clone {
7    just("//").then(none_of('\n').repeated()).ignored()
8}
9
10/// Parse a block comment: /* ... */ (supports nesting)
11fn block_comment<'a>() -> impl Parser<'a, &'a str, (), extra::Err<Rich<'a, char>>> + Clone {
12    recursive(|nested_comment| {
13        just("/*")
14            .then(
15                nested_comment
16                    .ignored()
17                    .or(any()
18                        .and_is(just("*/").not())
19                        .and_is(just("/*").not())
20                        .ignored())
21                    .repeated(),
22            )
23            .then(just("*/"))
24            .ignored()
25    })
26}
27
28/// Parse any comment (line or block)
29fn comment<'a>() -> impl Parser<'a, &'a str, (), extra::Err<Rich<'a, char>>> + Clone {
30    line_comment().or(block_comment())
31}
32
33/// Parse whitespace and comments (replaces .padded() for comment support)
34fn ws<'a>() -> impl Parser<'a, &'a str, (), extra::Err<Rich<'a, char>>> + Clone {
35    // Match zero or more of: (whitespace | comment)
36    // This avoids generating confusing "expected '/'" errors
37    choice((text::whitespace().at_least(1).ignored(), comment()))
38        .repeated()
39        .ignored()
40}
41
42/// Extension trait to add comment-aware padding to parsers
43trait PaddedWithComments<'a, O>: Parser<'a, &'a str, O, extra::Err<Rich<'a, char>>> + Sized {
44    fn padded_with_comments(self) -> impl Parser<'a, &'a str, O, extra::Err<Rich<'a, char>>> + Clone
45    where
46        Self: Clone,
47    {
48        ws().ignore_then(self).then_ignore(ws())
49    }
50}
51
52impl<'a, O, P> PaddedWithComments<'a, O> for P where
53    P: Parser<'a, &'a str, O, extra::Err<Rich<'a, char>>>
54{
55}
56
57/// Parse a Hypen value (string, number, boolean, reference, list, map)
58fn value_parser<'a>() -> impl Parser<'a, &'a str, Value, extra::Err<Rich<'a, char>>> + Clone {
59    recursive(|value| {
60        // Double-quoted string with escape support: "hello \"world\""
61        let dq_char = just('\\').ignore_then(any()).or(none_of('"'));
62        let dq_string = just('"')
63            .then(dq_char.repeated().collect::<Vec<_>>())
64            .then(just('"').labelled("closing quote '\"'"))
65            .to_slice()
66            .map(|s: &str| Value::String(s.to_string()))
67            .labelled("double-quoted string");
68
69        // Single-quoted string: 'hello "world"'
70        let sq_char = just('\\').ignore_then(any()).or(none_of('\''));
71        let sq_string = just('\'')
72            .then(sq_char.repeated().collect::<Vec<_>>())
73            .then(just('\'').labelled("closing quote \"'\""))
74            .to_slice()
75            .map(|s: &str| Value::String(s.to_string()))
76            .labelled("single-quoted string");
77
78        let string = dq_string.or(sq_string).labelled("string literal");
79
80        // Boolean: true or false
81        let boolean = text::keyword("true")
82            .to(Value::Boolean(true))
83            .or(text::keyword("false").to(Value::Boolean(false)))
84            .labelled("boolean (true or false)");
85
86        // Number: 123, 123.45, -123, -123.45
87        let number = just('-')
88            .or_not()
89            .then(text::int(10))
90            .then(just('.').then(text::digits(10)).or_not())
91            .to_slice()
92            .try_map(|s: &str, span| {
93                let n: f64 = s.parse().unwrap();
94                if n.is_infinite() {
95                    Err(Rich::custom(
96                        span,
97                        format!("number literal '{}' is too large", s),
98                    ))
99                } else {
100                    Ok(Value::Number(n))
101                }
102            })
103            .labelled("number");
104
105        // Reference: @state.user, @actions.login
106        let reference = just('@')
107            .ignore_then(
108                text::ascii::ident()
109                    .labelled("reference path (e.g., state.user)")
110                    .then(just('.').ignore_then(text::ascii::ident()).repeated())
111                    .to_slice(),
112            )
113            .map(|s: &str| Value::Reference(s.to_string()))
114            .labelled("reference (@state.* or @actions.*)");
115
116        // Data source reference: $provider.path.segments
117        // e.g., $spacetime.messages, $firebase.user.name
118        let data_source_ref = just('$')
119            .ignore_then(
120                text::ascii::ident()
121                    .labelled("data source provider (e.g., spacetime)")
122                    .then(
123                        just('.')
124                            .ignore_then(text::ascii::ident())
125                            .repeated()
126                            .at_least(1),
127                    )
128                    .to_slice(),
129            )
130            .map(|s: &str| Value::DataSourceReference(s.to_string()))
131            .labelled("data source reference ($provider.path)");
132
133        // List: [item1, item2, item3]
134        let list = value
135            .clone()
136            .padded_with_comments()
137            .separated_by(just(','))
138            .allow_trailing()
139            .collect()
140            .delimited_by(just('['), just(']').labelled("closing bracket ']'"))
141            .map(Value::List)
142            .labelled("list [...]");
143
144        // Map: {key1: value1, key2: value2}
145        let map_entry = text::ascii::ident()
146            .labelled("map key")
147            .padded_with_comments()
148            .then_ignore(just(':').labelled("':' after map key"))
149            .then(value.clone().padded_with_comments().labelled("map value"));
150
151        let map = map_entry
152            .separated_by(just(','))
153            .allow_trailing()
154            .collect::<Vec<_>>()
155            .delimited_by(just('{'), just('}').labelled("closing brace '}'"))
156            .map(|entries: Vec<(&str, Value)>| {
157                Value::Map(
158                    entries
159                        .into_iter()
160                        .map(|(k, v)| (k.to_string(), v))
161                        .collect(),
162                )
163            })
164            .labelled("map {...}");
165
166        // Bare identifier (unquoted string for simple cases)
167        let identifier = text::ascii::ident()
168            .map(|s: &str| Value::String(s.to_string()))
169            .labelled("identifier");
170
171        choice((
172            string,
173            data_source_ref,
174            reference,
175            boolean,
176            number,
177            list,
178            map,
179            identifier,
180        ))
181        .labelled("value")
182    })
183}
184
185/// Parse a complete component with optional children
186pub fn component_parser<'a>(
187) -> impl Parser<'a, &'a str, ComponentSpecification, extra::Err<Rich<'a, char>>> + Clone {
188    recursive(|component| {
189        // Optional declaration keyword: "module" or "component"
190        let declaration_keyword = text::keyword("module")
191            .to(DeclarationType::Module)
192            .or(text::keyword("component").to(DeclarationType::ComponentKeyword))
193            .labelled("declaration keyword (module or component)")
194            .padded_with_comments()
195            .or_not();
196
197        // Component name (supports dot notation for applicators)
198        let name = just('.')
199            .or_not()
200            .then(text::ascii::ident())
201            .to_slice()
202            .map(|s: &str| s.to_string())
203            .labelled("component name")
204            .padded_with_comments();
205
206        // Parse single argument (named or positional)
207        let value = value_parser();
208
209        let arg = text::ascii::ident()
210            .then_ignore(just(':').padded_with_comments())
211            .then(value.clone())
212            .map(|(key, value)| (Some(key.to_string()), value))
213            .labelled("named argument (key: value)")
214            .or(value
215                .map(|value| (None, value))
216                .labelled("positional argument"));
217
218        // Arguments parser - only parses (arg1, arg2, ...) when present
219        let args_with_parens = arg
220            .clone()
221            .padded_with_comments()
222            .separated_by(just(',').labelled("',' between arguments"))
223            .allow_trailing()
224            .collect::<Vec<_>>()
225            .delimited_by(
226                just('(').labelled("'(' to start arguments"),
227                just(')').labelled("closing parenthesis ')'"),
228            )
229            .map(|args| {
230                let arguments = args
231                    .into_iter()
232                    .enumerate()
233                    .map(|(i, (key, value))| match key {
234                        Some(k) => Argument::Named { key: k, value },
235                        None => Argument::Positioned { position: i, value },
236                    })
237                    .collect();
238                ArgumentList::new(arguments)
239            })
240            .labelled("argument list (...)");
241
242        // Arguments are optional - either we have (...) or nothing.
243        // We use and_is(just('(').not()) on the empty case so that if a '(' is
244        // present, the parser must commit to parsing args_with_parens rather than
245        // silently falling through. This ensures errors from inside argument
246        // parsing (like unclosed strings) propagate correctly.
247        let args = args_with_parens.or(empty().and_is(just('(').not()).to(ArgumentList::empty()));
248
249        // Also keep arg_parser for applicators
250        let arg_parser = arg
251            .clone()
252            .padded_with_comments()
253            .separated_by(just(',').labelled("',' between arguments"))
254            .allow_trailing()
255            .collect::<Vec<_>>()
256            .delimited_by(just('('), just(')').labelled("closing parenthesis ')'"))
257            .map(|args| {
258                let arguments = args
259                    .into_iter()
260                    .enumerate()
261                    .map(|(i, (key, value))| match key {
262                        Some(k) => Argument::Named { key: k, value },
263                        None => Argument::Positioned { position: i, value },
264                    })
265                    .collect();
266                ArgumentList::new(arguments)
267            })
268            .or(empty().to(ArgumentList::empty()));
269
270        // Children block: { child1 child2 child3 }
271        // Parse explicitly to handle empty braces { }
272        let children_block = just('{')
273            .padded_with_comments()
274            .ignore_then(
275                component
276                    .clone()
277                    .padded_with_comments()
278                    .repeated()
279                    .collect::<Vec<_>>(),
280            )
281            .then_ignore(
282                just('}')
283                    .labelled("closing brace '}' for children block")
284                    .padded_with_comments(),
285            )
286            .labelled("children block {...}")
287            .or_not();
288
289        // Applicators: .applicator1() .applicator2(args)
290        let applicators = just('.')
291            .ignore_then(text::ascii::ident().labelled("applicator name"))
292            .then(arg_parser.clone())
293            .map(|(name, args)| ApplicatorSpecification {
294                name: name.to_string(),
295                arguments: args,
296                children: vec![],
297                internal_id: String::new(),
298            })
299            .labelled("applicator (.name(...))")
300            .padded_with_comments()
301            .repeated()
302            .collect::<Vec<_>>();
303
304        declaration_keyword
305            .then(name)
306            .then(args)
307            .then(children_block)
308            .then(applicators)
309            .map(|((((decl_type, name), args), children), applicators)| {
310                // Fold applicators into the component hierarchy
311                let base_component = ComponentSpecification::new(
312                    id_gen::NodeId::next().to_string(),
313                    name.clone(),
314                    args, // args is already an ArgumentList, not Option
315                    vec![],
316                    fold_applicators(children.unwrap_or_default()),
317                    MetaData {
318                        internal_id: String::new(),
319                        name_range: 0..0,
320                        block_range: None,
321                    },
322                )
323                .with_declaration_type(decl_type.unwrap_or(DeclarationType::Component));
324
325                // If there are applicators, add them to the component
326                if applicators.is_empty() {
327                    base_component
328                } else {
329                    ComponentSpecification {
330                        applicators,
331                        ..base_component
332                    }
333                }
334            })
335            .labelled("component")
336    })
337}
338
339/// Fold applicators into component hierarchy
340/// Components starting with '.' are treated as applicators of the previous component
341fn fold_applicators(components: Vec<ComponentSpecification>) -> Vec<ComponentSpecification> {
342    let mut result: Vec<ComponentSpecification> = Vec::new();
343
344    for component in components {
345        if component.name.starts_with('.') && !result.is_empty() {
346            // This is an applicator - attach it to the previous component
347            let mut owner: ComponentSpecification = result.pop().unwrap();
348            owner.applicators.push(component.to_applicator());
349            result.push(owner);
350        } else {
351            result.push(component);
352        }
353    }
354
355    result
356}
357
358/// Parse an import statement
359/// Syntax: import { Component1, Component2 } from "path"
360///     or: import Component from "path"
361pub fn import_parser<'a>(
362) -> impl Parser<'a, &'a str, ImportStatement, extra::Err<Rich<'a, char>>> + Clone {
363    // Parse import keyword
364    let import_keyword = text::keyword("import")
365        .labelled("'import' keyword")
366        .padded_with_comments();
367
368    // Parse a string literal for the source path (double or single quotes)
369    let dq_path = just('"')
370        .ignore_then(none_of('"').repeated().to_slice())
371        .then_ignore(just('"').labelled("closing quote '\"'"))
372        .map(|s: &str| s.to_string());
373    let sq_path = just('\'')
374        .ignore_then(none_of('\'').repeated().to_slice())
375        .then_ignore(just('\'').labelled("closing quote \"'\""))
376        .map(|s: &str| s.to_string());
377    let string_literal = dq_path.or(sq_path).labelled("import path string");
378
379    // Parse named imports: { Component1, Component2, ... }
380    let named_imports = text::ascii::ident()
381        .map(|s: &str| s.to_string())
382        .labelled("component name")
383        .padded_with_comments()
384        .separated_by(just(','))
385        .allow_trailing()
386        .collect::<Vec<String>>()
387        .delimited_by(
388            just('{').padded_with_comments(),
389            just('}')
390                .labelled("closing brace '}' for named imports")
391                .padded_with_comments(),
392        )
393        .map(ImportClause::Named)
394        .labelled("named imports { ... }");
395
396    // Parse default import: ComponentName
397    let default_import = text::ascii::ident()
398        .map(|s: &str| s.to_string())
399        .map(ImportClause::Default)
400        .labelled("default import name");
401
402    // Import clause can be either named or default
403    let import_clause = named_imports.or(default_import).padded_with_comments();
404
405    // Parse "from" keyword
406    let from_keyword = text::keyword("from")
407        .labelled("'from' keyword")
408        .padded_with_comments();
409
410    // Parse the full import statement
411    import_keyword
412        .ignore_then(import_clause)
413        .then_ignore(from_keyword)
414        .then(string_literal.padded_with_comments())
415        .map(|(clause, source_str)| {
416            // Determine if source is a URL or local path
417            let source = if source_str.starts_with("http://") || source_str.starts_with("https://")
418            {
419                ImportSource::Url(source_str)
420            } else {
421                ImportSource::Local(source_str)
422            };
423            ImportStatement::new(clause, source)
424        })
425        .labelled("import statement")
426}
427
428/// Parse a complete Hypen document with imports and components
429pub fn document_parser<'a>(
430) -> impl Parser<'a, &'a str, Document, extra::Err<Rich<'a, char>>> + Clone {
431    // Parse imports (zero or more)
432    let imports = import_parser()
433        .padded_with_comments()
434        .repeated()
435        .collect::<Vec<ImportStatement>>();
436
437    // Parse components (zero or more)
438    let components = component_parser()
439        .padded_with_comments()
440        .repeated()
441        .collect::<Vec<ComponentSpecification>>();
442
443    // Combine imports and components into a document
444    imports
445        .then(components)
446        .map(|(imports, components)| Document::new(imports, components))
447}
448
449/// Parse a list of components from text
450pub fn parse_components(input: &str) -> Result<Vec<ComponentSpecification>, Vec<Rich<'_, char>>> {
451    component_parser()
452        .padded_with_comments()
453        .repeated()
454        .collect()
455        .then_ignore(end())
456        .parse(input)
457        .into_result()
458}
459
460/// Parse a single component from text
461pub fn parse_component(input: &str) -> Result<ComponentSpecification, Vec<Rich<'_, char>>> {
462    component_parser()
463        .padded_with_comments()
464        .then_ignore(end())
465        .parse(input)
466        .into_result()
467}
468
469/// Parse a complete Hypen document (imports + components)
470pub fn parse_document(input: &str) -> Result<Document, Vec<Rich<'_, char>>> {
471    document_parser()
472        .padded_with_comments()
473        .then_ignore(end())
474        .parse(input)
475        .into_result()
476}
477
478/// Parse a single import statement
479pub fn parse_import(input: &str) -> Result<ImportStatement, Vec<Rich<'_, char>>> {
480    import_parser()
481        .padded_with_comments()
482        .then_ignore(end())
483        .parse(input)
484        .into_result()
485}
486
487// Simple sequential ID generator for AST nodes
488mod id_gen {
489    use std::sync::atomic::{AtomicUsize, Ordering};
490
491    static COUNTER: AtomicUsize = AtomicUsize::new(0);
492
493    pub struct NodeId(usize);
494
495    impl NodeId {
496        /// Generate the next sequential ID
497        pub fn next() -> Self {
498            NodeId(COUNTER.fetch_add(1, Ordering::SeqCst))
499        }
500    }
501
502    impl std::fmt::Display for NodeId {
503        fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
504            write!(f, "id-{}", self.0)
505        }
506    }
507}