Skip to main content

hypen_parser/
parser.rs

1use chumsky::prelude::*;
2
3use crate::ast::*;
4
5/// Parse a line comment: // ... until end of line
6fn line_comment<'a>() -> impl Parser<'a, &'a str, (), extra::Err<Rich<'a, char>>> + Clone {
7    just("//").then(none_of('\n').repeated()).ignored()
8}
9
10/// Parse a block comment: /* ... */ (supports nesting)
11fn block_comment<'a>() -> impl Parser<'a, &'a str, (), extra::Err<Rich<'a, char>>> + Clone {
12    recursive(|nested_comment| {
13        just("/*")
14            .then(
15                nested_comment
16                    .ignored()
17                    .or(any()
18                        .and_is(just("*/").not())
19                        .and_is(just("/*").not())
20                        .ignored())
21                    .repeated(),
22            )
23            .then(just("*/"))
24            .ignored()
25    })
26}
27
28/// Parse any comment (line or block)
29fn comment<'a>() -> impl Parser<'a, &'a str, (), extra::Err<Rich<'a, char>>> + Clone {
30    line_comment().or(block_comment())
31}
32
33/// Parse whitespace and comments (replaces .padded() for comment support)
34fn ws<'a>() -> impl Parser<'a, &'a str, (), extra::Err<Rich<'a, char>>> + Clone {
35    // Match zero or more of: (whitespace | comment)
36    // This avoids generating confusing "expected '/'" errors
37    choice((text::whitespace().at_least(1).ignored(), comment()))
38        .repeated()
39        .ignored()
40}
41
42/// Extension trait to add comment-aware padding to parsers
43trait PaddedWithComments<'a, O>: Parser<'a, &'a str, O, extra::Err<Rich<'a, char>>> + Sized {
44    fn padded_with_comments(self) -> impl Parser<'a, &'a str, O, extra::Err<Rich<'a, char>>> + Clone
45    where
46        Self: Clone,
47    {
48        ws().ignore_then(self).then_ignore(ws())
49    }
50}
51
52impl<'a, O, P> PaddedWithComments<'a, O> for P where
53    P: Parser<'a, &'a str, O, extra::Err<Rich<'a, char>>>
54{
55}
56
57/// Parse a Hypen value (string, number, boolean, reference, list, map)
58fn value_parser<'a>() -> impl Parser<'a, &'a str, Value, extra::Err<Rich<'a, char>>> + Clone {
59    recursive(|value| {
60        // Double-quoted string with escape support: "hello \"world\""
61        let dq_char = just('\\').ignore_then(any()).or(none_of('"'));
62        let dq_string = just('"')
63            .then(dq_char.repeated().collect::<Vec<_>>())
64            .then(just('"').labelled("closing quote '\"'"))
65            .to_slice()
66            .map(|s: &str| Value::String(s.to_string()))
67            .labelled("double-quoted string");
68
69        // Single-quoted string: 'hello "world"'
70        let sq_char = just('\\').ignore_then(any()).or(none_of('\''));
71        let sq_string = just('\'')
72            .then(sq_char.repeated().collect::<Vec<_>>())
73            .then(just('\'').labelled("closing quote \"'\""))
74            .to_slice()
75            .map(|s: &str| Value::String(s.to_string()))
76            .labelled("single-quoted string");
77
78        let string = dq_string.or(sq_string).labelled("string literal");
79
80        // Boolean: true or false
81        let boolean = text::keyword("true")
82            .to(Value::Boolean(true))
83            .or(text::keyword("false").to(Value::Boolean(false)))
84            .labelled("boolean (true or false)");
85
86        // Number: 123, 123.45, -123, -123.45
87        let number = just('-')
88            .or_not()
89            .then(text::int(10))
90            .then(just('.').then(text::digits(10)).or_not())
91            .to_slice()
92            .try_map(|s: &str, span| {
93                let n: f64 = s.parse().unwrap();
94                if n.is_infinite() {
95                    Err(Rich::custom(
96                        span,
97                        format!("number literal '{}' is too large", s),
98                    ))
99                } else {
100                    Ok(Value::Number(n))
101                }
102            })
103            .labelled("number");
104
105        // Identifier that allows hyphens (for resource names like "plus-square")
106        let hyphenated_ident = text::ascii::ident()
107            .then(just('-').then(text::ascii::ident()).repeated())
108            .to_slice();
109
110        // Reference: @state.user, @actions.login, @resources.plus-square
111        let reference = just('@')
112            .ignore_then(
113                text::ascii::ident()
114                    .labelled("reference path (e.g., state.user)")
115                    .then(just('.').ignore_then(hyphenated_ident).repeated())
116                    .to_slice(),
117            )
118            .map(|s: &str| Value::Reference(s.to_string()))
119            .labelled("reference (@state.*, @actions.*, @resources.*, @provider.*)");
120
121        // Data source references now use @ prefix like everything else.
122        // e.g., @spacetime.messages, @firebase.user.name
123        // They are parsed as Reference and the engine disambiguates based on
124        // known prefixes (state, item, actions, resources) vs data source providers.
125
126        // List: [item1, item2, item3]
127        let list = value
128            .clone()
129            .padded_with_comments()
130            .separated_by(just(','))
131            .allow_trailing()
132            .collect()
133            .delimited_by(just('['), just(']').labelled("closing bracket ']'"))
134            .map(Value::List)
135            .labelled("list [...]");
136
137        // Map: {key1: value1, key2: value2}
138        let map_entry = text::ascii::ident()
139            .labelled("map key")
140            .padded_with_comments()
141            .then_ignore(just(':').labelled("':' after map key"))
142            .then(value.clone().padded_with_comments().labelled("map value"));
143
144        let map = map_entry
145            .separated_by(just(','))
146            .allow_trailing()
147            .collect::<Vec<_>>()
148            .delimited_by(just('{'), just('}').labelled("closing brace '}'"))
149            .map(|entries: Vec<(&str, Value)>| {
150                Value::Map(
151                    entries
152                        .into_iter()
153                        .map(|(k, v)| (k.to_string(), v))
154                        .collect(),
155                )
156            })
157            .labelled("map {...}");
158
159        // Bare identifier (unquoted string for simple cases)
160        let identifier = text::ascii::ident()
161            .map(|s: &str| Value::String(s.to_string()))
162            .labelled("identifier");
163
164        choice((
165            string,
166            reference,
167            boolean,
168            number,
169            list,
170            map,
171            identifier,
172        ))
173        .labelled("value")
174    })
175}
176
177/// Parse a complete component with optional children
178pub fn component_parser<'a>(
179) -> impl Parser<'a, &'a str, ComponentSpecification, extra::Err<Rich<'a, char>>> + Clone {
180    recursive(|component| {
181        // Optional declaration keyword: "module" or "component"
182        let declaration_keyword = text::keyword("module")
183            .to(DeclarationType::Module)
184            .or(text::keyword("component").to(DeclarationType::ComponentKeyword))
185            .labelled("declaration keyword (module or component)")
186            .padded_with_comments()
187            .or_not();
188
189        // Component name (supports dot notation for applicators)
190        let name = just('.')
191            .or_not()
192            .then(text::ascii::ident())
193            .to_slice()
194            .map(|s: &str| s.to_string())
195            .labelled("component name")
196            .padded_with_comments();
197
198        // Parse single argument (named or positional)
199        let value = value_parser();
200
201        let arg = text::ascii::ident()
202            .then_ignore(just(':').padded_with_comments())
203            .then(value.clone())
204            .map(|(key, value)| (Some(key.to_string()), value))
205            .labelled("named argument (key: value)")
206            .or(value
207                .map(|value| (None, value))
208                .labelled("positional argument"));
209
210        // Arguments parser - only parses (arg1, arg2, ...) when present
211        let args_with_parens = arg
212            .clone()
213            .padded_with_comments()
214            .separated_by(just(',').labelled("',' between arguments"))
215            .allow_trailing()
216            .collect::<Vec<_>>()
217            .delimited_by(
218                just('(').labelled("'(' to start arguments"),
219                just(')').labelled("closing parenthesis ')'"),
220            )
221            .map(|args| {
222                let arguments = args
223                    .into_iter()
224                    .enumerate()
225                    .map(|(i, (key, value))| match key {
226                        Some(k) => Argument::Named { key: k, value },
227                        None => Argument::Positioned { position: i, value },
228                    })
229                    .collect();
230                ArgumentList::new(arguments)
231            })
232            .labelled("argument list (...)");
233
234        // Arguments are optional - either we have (...) or nothing.
235        // We use and_is(just('(').not()) on the empty case so that if a '(' is
236        // present, the parser must commit to parsing args_with_parens rather than
237        // silently falling through. This ensures errors from inside argument
238        // parsing (like unclosed strings) propagate correctly.
239        let args = args_with_parens.or(empty().and_is(just('(').not()).to(ArgumentList::empty()));
240
241        // Also keep arg_parser for applicators
242        let arg_parser = arg
243            .clone()
244            .padded_with_comments()
245            .separated_by(just(',').labelled("',' between arguments"))
246            .allow_trailing()
247            .collect::<Vec<_>>()
248            .delimited_by(just('('), just(')').labelled("closing parenthesis ')'"))
249            .map(|args| {
250                let arguments = args
251                    .into_iter()
252                    .enumerate()
253                    .map(|(i, (key, value))| match key {
254                        Some(k) => Argument::Named { key: k, value },
255                        None => Argument::Positioned { position: i, value },
256                    })
257                    .collect();
258                ArgumentList::new(arguments)
259            })
260            .or(empty().to(ArgumentList::empty()));
261
262        // Children block: { child1 child2 child3 }
263        // Parse explicitly to handle empty braces { }
264        let children_block = just('{')
265            .padded_with_comments()
266            .ignore_then(
267                component
268                    .clone()
269                    .padded_with_comments()
270                    .repeated()
271                    .collect::<Vec<_>>(),
272            )
273            .then_ignore(
274                just('}')
275                    .labelled("closing brace '}' for children block")
276                    .padded_with_comments(),
277            )
278            .labelled("children block {...}")
279            .or_not();
280
281        // Applicators: .applicator1() .applicator2(args)
282        let applicators = just('.')
283            .ignore_then(text::ascii::ident().labelled("applicator name"))
284            .then(arg_parser.clone())
285            .map(|(name, args)| ApplicatorSpecification {
286                name: name.to_string(),
287                arguments: args,
288                children: vec![],
289                internal_id: String::new(),
290            })
291            .labelled("applicator (.name(...))")
292            .padded_with_comments()
293            .repeated()
294            .collect::<Vec<_>>();
295
296        declaration_keyword
297            .then(name)
298            .then(args)
299            .then(children_block)
300            .then(applicators)
301            .map(|((((decl_type, name), args), children), applicators)| {
302                // Fold applicators into the component hierarchy
303                let base_component = ComponentSpecification::new(
304                    id_gen::NodeId::next().to_string(),
305                    name.clone(),
306                    args, // args is already an ArgumentList, not Option
307                    vec![],
308                    fold_applicators(children.unwrap_or_default()),
309                    MetaData {
310                        internal_id: String::new(),
311                        name_range: 0..0,
312                        block_range: None,
313                    },
314                )
315                .with_declaration_type(decl_type.unwrap_or(DeclarationType::Component));
316
317                // If there are applicators, add them to the component
318                if applicators.is_empty() {
319                    base_component
320                } else {
321                    ComponentSpecification {
322                        applicators,
323                        ..base_component
324                    }
325                }
326            })
327            .labelled("component")
328    })
329}
330
331/// Fold applicators into component hierarchy
332/// Components starting with '.' are treated as applicators of the previous component
333fn fold_applicators(components: Vec<ComponentSpecification>) -> Vec<ComponentSpecification> {
334    let mut result: Vec<ComponentSpecification> = Vec::new();
335
336    for component in components {
337        if component.name.starts_with('.') && !result.is_empty() {
338            // This is an applicator - attach it to the previous component
339            let mut owner: ComponentSpecification = result.pop().unwrap();
340            owner.applicators.push(component.to_applicator());
341            result.push(owner);
342        } else {
343            result.push(component);
344        }
345    }
346
347    result
348}
349
350/// Parse an import statement
351/// Syntax: import { Component1, Component2 } from "path"
352///     or: import Component from "path"
353pub fn import_parser<'a>(
354) -> impl Parser<'a, &'a str, ImportStatement, extra::Err<Rich<'a, char>>> + Clone {
355    // Parse import keyword
356    let import_keyword = text::keyword("import")
357        .labelled("'import' keyword")
358        .padded_with_comments();
359
360    // Parse a string literal for the source path (double or single quotes)
361    let dq_path = just('"')
362        .ignore_then(none_of('"').repeated().to_slice())
363        .then_ignore(just('"').labelled("closing quote '\"'"))
364        .map(|s: &str| s.to_string());
365    let sq_path = just('\'')
366        .ignore_then(none_of('\'').repeated().to_slice())
367        .then_ignore(just('\'').labelled("closing quote \"'\""))
368        .map(|s: &str| s.to_string());
369    let string_literal = dq_path.or(sq_path).labelled("import path string");
370
371    // Parse named imports: { Component1, Component2, ... }
372    let named_imports = text::ascii::ident()
373        .map(|s: &str| s.to_string())
374        .labelled("component name")
375        .padded_with_comments()
376        .separated_by(just(','))
377        .allow_trailing()
378        .collect::<Vec<String>>()
379        .delimited_by(
380            just('{').padded_with_comments(),
381            just('}')
382                .labelled("closing brace '}' for named imports")
383                .padded_with_comments(),
384        )
385        .map(ImportClause::Named)
386        .labelled("named imports { ... }");
387
388    // Parse default import: ComponentName
389    let default_import = text::ascii::ident()
390        .map(|s: &str| s.to_string())
391        .map(ImportClause::Default)
392        .labelled("default import name");
393
394    // Import clause can be either named or default
395    let import_clause = named_imports.or(default_import).padded_with_comments();
396
397    // Parse "from" keyword
398    let from_keyword = text::keyword("from")
399        .labelled("'from' keyword")
400        .padded_with_comments();
401
402    // Parse the full import statement
403    import_keyword
404        .ignore_then(import_clause)
405        .then_ignore(from_keyword)
406        .then(string_literal.padded_with_comments())
407        .map(|(clause, source_str)| {
408            // Determine if source is a URL or local path
409            let source = if source_str.starts_with("http://") || source_str.starts_with("https://")
410            {
411                ImportSource::Url(source_str)
412            } else {
413                ImportSource::Local(source_str)
414            };
415            ImportStatement::new(clause, source)
416        })
417        .labelled("import statement")
418}
419
420/// Parse a complete Hypen document with imports and components
421pub fn document_parser<'a>(
422) -> impl Parser<'a, &'a str, Document, extra::Err<Rich<'a, char>>> + Clone {
423    // Parse imports (zero or more)
424    let imports = import_parser()
425        .padded_with_comments()
426        .repeated()
427        .collect::<Vec<ImportStatement>>();
428
429    // Parse components (zero or more)
430    let components = component_parser()
431        .padded_with_comments()
432        .repeated()
433        .collect::<Vec<ComponentSpecification>>();
434
435    // Combine imports and components into a document
436    imports
437        .then(components)
438        .map(|(imports, components)| Document::new(imports, components))
439}
440
441/// Parse a list of components from text
442pub fn parse_components(input: &str) -> Result<Vec<ComponentSpecification>, Vec<Rich<'_, char>>> {
443    component_parser()
444        .padded_with_comments()
445        .repeated()
446        .collect()
447        .then_ignore(end())
448        .parse(input)
449        .into_result()
450}
451
452/// Parse a single component from text
453pub fn parse_component(input: &str) -> Result<ComponentSpecification, Vec<Rich<'_, char>>> {
454    component_parser()
455        .padded_with_comments()
456        .then_ignore(end())
457        .parse(input)
458        .into_result()
459}
460
461/// Parse a complete Hypen document (imports + components)
462pub fn parse_document(input: &str) -> Result<Document, Vec<Rich<'_, char>>> {
463    document_parser()
464        .padded_with_comments()
465        .then_ignore(end())
466        .parse(input)
467        .into_result()
468}
469
470/// Parse a single import statement
471pub fn parse_import(input: &str) -> Result<ImportStatement, Vec<Rich<'_, char>>> {
472    import_parser()
473        .padded_with_comments()
474        .then_ignore(end())
475        .parse(input)
476        .into_result()
477}
478
479// Simple sequential ID generator for AST nodes
480mod id_gen {
481    use std::sync::atomic::{AtomicUsize, Ordering};
482
483    static COUNTER: AtomicUsize = AtomicUsize::new(0);
484
485    pub struct NodeId(usize);
486
487    impl NodeId {
488        /// Generate the next sequential ID
489        pub fn next() -> Self {
490            NodeId(COUNTER.fetch_add(1, Ordering::SeqCst))
491        }
492    }
493
494    impl std::fmt::Display for NodeId {
495        fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
496            write!(f, "id-{}", self.0)
497        }
498    }
499}