Skip to main content

hypen_parser/
parser.rs

1use chumsky::prelude::*;
2
3use crate::ast::*;
4
5/// Parse a line comment: // ... until end of line
6fn line_comment<'a>() -> impl Parser<'a, &'a str, (), extra::Err<Rich<'a, char>>> + Clone {
7    just("//").then(none_of('\n').repeated()).ignored()
8}
9
10/// Parse a block comment: /* ... */ (supports nesting)
11fn block_comment<'a>() -> impl Parser<'a, &'a str, (), extra::Err<Rich<'a, char>>> + Clone {
12    recursive(|nested_comment| {
13        just("/*")
14            .then(
15                nested_comment
16                    .ignored()
17                    .or(any()
18                        .and_is(just("*/").not())
19                        .and_is(just("/*").not())
20                        .ignored())
21                    .repeated(),
22            )
23            .then(just("*/"))
24            .ignored()
25    })
26}
27
28/// Parse any comment (line or block)
29fn comment<'a>() -> impl Parser<'a, &'a str, (), extra::Err<Rich<'a, char>>> + Clone {
30    line_comment().or(block_comment())
31}
32
33/// Parse whitespace and comments (replaces .padded() for comment support)
34fn ws<'a>() -> impl Parser<'a, &'a str, (), extra::Err<Rich<'a, char>>> + Clone {
35    // Match zero or more of: (whitespace | comment)
36    // This avoids generating confusing "expected '/'" errors
37    choice((text::whitespace().at_least(1).ignored(), comment()))
38        .repeated()
39        .ignored()
40}
41
42/// Extension trait to add comment-aware padding to parsers
43trait PaddedWithComments<'a, O>: Parser<'a, &'a str, O, extra::Err<Rich<'a, char>>> + Sized {
44    fn padded_with_comments(self) -> impl Parser<'a, &'a str, O, extra::Err<Rich<'a, char>>> + Clone
45    where
46        Self: Clone,
47    {
48        ws().ignore_then(self).then_ignore(ws())
49    }
50}
51
52impl<'a, O, P> PaddedWithComments<'a, O> for P where
53    P: Parser<'a, &'a str, O, extra::Err<Rich<'a, char>>>
54{
55}
56
57/// Parse a Hypen value (string, number, boolean, reference, list, map)
58fn value_parser<'a>() -> impl Parser<'a, &'a str, Value, extra::Err<Rich<'a, char>>> + Clone {
59    recursive(|value| {
60        // Double-quoted string with escape support: "hello \"world\""
61        let dq_char = just('\\').ignore_then(any()).or(none_of('"'));
62        let dq_string = just('"')
63            .then(dq_char.repeated().collect::<Vec<_>>())
64            .then(just('"').labelled("closing quote '\"'"))
65            .to_slice()
66            .map(|s: &str| Value::String(s.to_string()))
67            .labelled("double-quoted string");
68
69        // Single-quoted string: 'hello "world"'
70        let sq_char = just('\\').ignore_then(any()).or(none_of('\''));
71        let sq_string = just('\'')
72            .then(sq_char.repeated().collect::<Vec<_>>())
73            .then(just('\'').labelled("closing quote \"'\""))
74            .to_slice()
75            .map(|s: &str| Value::String(s.to_string()))
76            .labelled("single-quoted string");
77
78        let string = dq_string.or(sq_string).labelled("string literal");
79
80        // Boolean: true or false
81        let boolean = text::keyword("true")
82            .to(Value::Boolean(true))
83            .or(text::keyword("false").to(Value::Boolean(false)))
84            .labelled("boolean (true or false)");
85
86        // Number: 123, 123.45, -123, -123.45
87        let number = just('-')
88            .or_not()
89            .then(text::int(10))
90            .then(just('.').then(text::digits(10)).or_not())
91            .to_slice()
92            .try_map(|s: &str, span| {
93                let n: f64 = s.parse().unwrap();
94                if n.is_infinite() {
95                    Err(Rich::custom(
96                        span,
97                        format!("number literal '{}' is too large", s),
98                    ))
99                } else {
100                    Ok(Value::Number(n))
101                }
102            })
103            .labelled("number");
104
105        // Identifier that allows hyphens (for resource names like "plus-square")
106        let hyphenated_ident = text::ascii::ident()
107            .then(just('-').then(text::ascii::ident()).repeated())
108            .to_slice();
109
110        // Reference: @state.user, @actions.login, @resources.plus-square
111        let reference = just('@')
112            .ignore_then(
113                text::ascii::ident()
114                    .labelled("reference path (e.g., state.user)")
115                    .then(just('.').ignore_then(hyphenated_ident).repeated())
116                    .to_slice(),
117            )
118            .map(|s: &str| Value::Reference(s.to_string()))
119            .labelled("reference (@state.*, @actions.*, @resources.*, @provider.*)");
120
121        // Data source references now use @ prefix like everything else.
122        // e.g., @spacetime.messages, @firebase.user.name
123        // They are parsed as Reference and the engine disambiguates based on
124        // known prefixes (state, item, actions, resources) vs data source providers.
125
126        // List: [item1, item2, item3]
127        let list = value
128            .clone()
129            .padded_with_comments()
130            .separated_by(just(','))
131            .allow_trailing()
132            .collect()
133            .delimited_by(just('['), just(']').labelled("closing bracket ']'"))
134            .map(Value::List)
135            .labelled("list [...]");
136
137        // Map: {key1: value1, key2: value2}
138        let map_entry = text::ascii::ident()
139            .labelled("map key")
140            .padded_with_comments()
141            .then_ignore(just(':').labelled("':' after map key"))
142            .then(value.clone().padded_with_comments().labelled("map value"));
143
144        let map = map_entry
145            .separated_by(just(','))
146            .allow_trailing()
147            .collect::<Vec<_>>()
148            .delimited_by(just('{'), just('}').labelled("closing brace '}'"))
149            .map(|entries: Vec<(&str, Value)>| {
150                Value::Map(
151                    entries
152                        .into_iter()
153                        .map(|(k, v)| (k.to_string(), v))
154                        .collect(),
155                )
156            })
157            .labelled("map {...}");
158
159        // Bare identifier (unquoted string for simple cases)
160        let identifier = text::ascii::ident()
161            .map(|s: &str| Value::String(s.to_string()))
162            .labelled("identifier");
163
164        choice((string, reference, boolean, number, list, map, identifier)).labelled("value")
165    })
166}
167
168/// Parse a complete component with optional children
169pub fn component_parser<'a>(
170) -> impl Parser<'a, &'a str, ComponentSpecification, extra::Err<Rich<'a, char>>> + Clone {
171    recursive(|component| {
172        // Optional declaration keyword: "module" or "component"
173        let declaration_keyword = text::keyword("module")
174            .to(DeclarationType::Module)
175            .or(text::keyword("component").to(DeclarationType::ComponentKeyword))
176            .labelled("declaration keyword (module or component)")
177            .padded_with_comments()
178            .or_not();
179
180        // Component name (supports dot notation for applicators)
181        let name = just('.')
182            .or_not()
183            .then(text::ascii::ident())
184            .to_slice()
185            .map(|s: &str| s.to_string())
186            .labelled("component name")
187            .padded_with_comments();
188
189        // Parse single argument (named or positional)
190        let value = value_parser();
191
192        let arg = text::ascii::ident()
193            .then_ignore(just(':').padded_with_comments())
194            .then(value.clone())
195            .map(|(key, value)| (Some(key.to_string()), value))
196            .labelled("named argument (key: value)")
197            .or(value
198                .map(|value| (None, value))
199                .labelled("positional argument"));
200
201        // Arguments parser - only parses (arg1, arg2, ...) when present
202        let args_with_parens = arg
203            .clone()
204            .padded_with_comments()
205            .separated_by(just(',').labelled("',' between arguments"))
206            .allow_trailing()
207            .collect::<Vec<_>>()
208            .delimited_by(
209                just('(').labelled("'(' to start arguments"),
210                just(')').labelled("closing parenthesis ')'"),
211            )
212            .map(|args| {
213                let arguments = args
214                    .into_iter()
215                    .enumerate()
216                    .map(|(i, (key, value))| match key {
217                        Some(k) => Argument::Named { key: k, value },
218                        None => Argument::Positioned { position: i, value },
219                    })
220                    .collect();
221                ArgumentList::new(arguments)
222            })
223            .labelled("argument list (...)");
224
225        // Arguments are optional - either we have (...) or nothing.
226        // We use and_is(just('(').not()) on the empty case so that if a '(' is
227        // present, the parser must commit to parsing args_with_parens rather than
228        // silently falling through. This ensures errors from inside argument
229        // parsing (like unclosed strings) propagate correctly.
230        let args = args_with_parens.or(empty().and_is(just('(').not()).to(ArgumentList::empty()));
231
232        // Also keep arg_parser for applicators
233        let arg_parser = arg
234            .clone()
235            .padded_with_comments()
236            .separated_by(just(',').labelled("',' between arguments"))
237            .allow_trailing()
238            .collect::<Vec<_>>()
239            .delimited_by(just('('), just(')').labelled("closing parenthesis ')'"))
240            .map(|args| {
241                let arguments = args
242                    .into_iter()
243                    .enumerate()
244                    .map(|(i, (key, value))| match key {
245                        Some(k) => Argument::Named { key: k, value },
246                        None => Argument::Positioned { position: i, value },
247                    })
248                    .collect();
249                ArgumentList::new(arguments)
250            })
251            .or(empty().to(ArgumentList::empty()));
252
253        // Children block: { child1 child2 child3 }
254        // Parse explicitly to handle empty braces { }
255        let children_block = just('{')
256            .padded_with_comments()
257            .ignore_then(
258                component
259                    .clone()
260                    .padded_with_comments()
261                    .repeated()
262                    .collect::<Vec<_>>(),
263            )
264            .then_ignore(
265                just('}')
266                    .labelled("closing brace '}' for children block")
267                    .padded_with_comments(),
268            )
269            .labelled("children block {...}")
270            .or_not();
271
272        // Applicators: .applicator1() .applicator2(args)
273        let applicators = just('.')
274            .ignore_then(text::ascii::ident().labelled("applicator name"))
275            .then(arg_parser.clone())
276            .map(|(name, args)| ApplicatorSpecification {
277                name: name.to_string(),
278                arguments: args,
279                children: vec![],
280                internal_id: String::new(),
281            })
282            .labelled("applicator (.name(...))")
283            .padded_with_comments()
284            .repeated()
285            .collect::<Vec<_>>();
286
287        declaration_keyword
288            .then(name)
289            .then(args)
290            .then(children_block)
291            .then(applicators)
292            .map(|((((decl_type, name), args), children), applicators)| {
293                // Fold applicators into the component hierarchy
294                let base_component = ComponentSpecification::new(
295                    id_gen::NodeId::next().to_string(),
296                    name.clone(),
297                    args, // args is already an ArgumentList, not Option
298                    vec![],
299                    fold_applicators(children.unwrap_or_default()),
300                    MetaData {
301                        internal_id: String::new(),
302                        name_range: 0..0,
303                        block_range: None,
304                    },
305                )
306                .with_declaration_type(decl_type.unwrap_or(DeclarationType::Component));
307
308                // If there are applicators, add them to the component
309                if applicators.is_empty() {
310                    base_component
311                } else {
312                    ComponentSpecification {
313                        applicators,
314                        ..base_component
315                    }
316                }
317            })
318            .labelled("component")
319    })
320}
321
322/// Fold applicators into component hierarchy
323/// Components starting with '.' are treated as applicators of the previous component
324fn fold_applicators(components: Vec<ComponentSpecification>) -> Vec<ComponentSpecification> {
325    let mut result: Vec<ComponentSpecification> = Vec::new();
326
327    for component in components {
328        if component.name.starts_with('.') && !result.is_empty() {
329            // This is an applicator - attach it to the previous component
330            let mut owner: ComponentSpecification = result.pop().unwrap();
331            owner.applicators.push(component.to_applicator());
332            result.push(owner);
333        } else {
334            result.push(component);
335        }
336    }
337
338    result
339}
340
341/// Parse an import statement
342/// Syntax: import { Component1, Component2 } from "path"
343///     or: import Component from "path"
344pub fn import_parser<'a>(
345) -> impl Parser<'a, &'a str, ImportStatement, extra::Err<Rich<'a, char>>> + Clone {
346    // Parse import keyword
347    let import_keyword = text::keyword("import")
348        .labelled("'import' keyword")
349        .padded_with_comments();
350
351    // Parse a string literal for the source path (double or single quotes)
352    let dq_path = just('"')
353        .ignore_then(none_of('"').repeated().to_slice())
354        .then_ignore(just('"').labelled("closing quote '\"'"))
355        .map(|s: &str| s.to_string());
356    let sq_path = just('\'')
357        .ignore_then(none_of('\'').repeated().to_slice())
358        .then_ignore(just('\'').labelled("closing quote \"'\""))
359        .map(|s: &str| s.to_string());
360    let string_literal = dq_path.or(sq_path).labelled("import path string");
361
362    // Parse named imports: { Component1, Component2, ... }
363    let named_imports = text::ascii::ident()
364        .map(|s: &str| s.to_string())
365        .labelled("component name")
366        .padded_with_comments()
367        .separated_by(just(','))
368        .allow_trailing()
369        .collect::<Vec<String>>()
370        .delimited_by(
371            just('{').padded_with_comments(),
372            just('}')
373                .labelled("closing brace '}' for named imports")
374                .padded_with_comments(),
375        )
376        .map(ImportClause::Named)
377        .labelled("named imports { ... }");
378
379    // Parse default import: ComponentName
380    let default_import = text::ascii::ident()
381        .map(|s: &str| s.to_string())
382        .map(ImportClause::Default)
383        .labelled("default import name");
384
385    // Import clause can be either named or default
386    let import_clause = named_imports.or(default_import).padded_with_comments();
387
388    // Parse "from" keyword
389    let from_keyword = text::keyword("from")
390        .labelled("'from' keyword")
391        .padded_with_comments();
392
393    // Parse the full import statement
394    import_keyword
395        .ignore_then(import_clause)
396        .then_ignore(from_keyword)
397        .then(string_literal.padded_with_comments())
398        .map(|(clause, source_str)| {
399            // Determine if source is a URL or local path
400            let source = if source_str.starts_with("http://") || source_str.starts_with("https://")
401            {
402                ImportSource::Url(source_str)
403            } else {
404                ImportSource::Local(source_str)
405            };
406            ImportStatement::new(clause, source)
407        })
408        .labelled("import statement")
409}
410
411/// Parse a complete Hypen document with imports and components
412pub fn document_parser<'a>(
413) -> impl Parser<'a, &'a str, Document, extra::Err<Rich<'a, char>>> + Clone {
414    // Parse imports (zero or more)
415    let imports = import_parser()
416        .padded_with_comments()
417        .repeated()
418        .collect::<Vec<ImportStatement>>();
419
420    // Parse components (zero or more)
421    let components = component_parser()
422        .padded_with_comments()
423        .repeated()
424        .collect::<Vec<ComponentSpecification>>();
425
426    // Combine imports and components into a document
427    imports
428        .then(components)
429        .map(|(imports, components)| Document::new(imports, components))
430}
431
432/// Parse a list of components from text
433pub fn parse_components(input: &str) -> Result<Vec<ComponentSpecification>, Vec<Rich<'_, char>>> {
434    component_parser()
435        .padded_with_comments()
436        .repeated()
437        .collect()
438        .then_ignore(end())
439        .parse(input)
440        .into_result()
441}
442
443/// Parse a single component from text
444pub fn parse_component(input: &str) -> Result<ComponentSpecification, Vec<Rich<'_, char>>> {
445    component_parser()
446        .padded_with_comments()
447        .then_ignore(end())
448        .parse(input)
449        .into_result()
450}
451
452/// Parse a complete Hypen document (imports + components)
453pub fn parse_document(input: &str) -> Result<Document, Vec<Rich<'_, char>>> {
454    document_parser()
455        .padded_with_comments()
456        .then_ignore(end())
457        .parse(input)
458        .into_result()
459}
460
461/// Parse a single import statement
462pub fn parse_import(input: &str) -> Result<ImportStatement, Vec<Rich<'_, char>>> {
463    import_parser()
464        .padded_with_comments()
465        .then_ignore(end())
466        .parse(input)
467        .into_result()
468}
469
470// Simple sequential ID generator for AST nodes
471mod id_gen {
472    use std::sync::atomic::{AtomicUsize, Ordering};
473
474    static COUNTER: AtomicUsize = AtomicUsize::new(0);
475
476    pub struct NodeId(usize);
477
478    impl NodeId {
479        /// Generate the next sequential ID
480        pub fn next() -> Self {
481            NodeId(COUNTER.fetch_add(1, Ordering::SeqCst))
482        }
483    }
484
485    impl std::fmt::Display for NodeId {
486        fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
487            write!(f, "id-{}", self.0)
488        }
489    }
490}