Skip to main content

hypen_parser/
parser.rs

1use chumsky::prelude::*;
2
3use crate::ast::*;
4
5/// Parse a line comment: // ... until end of line
6fn line_comment<'a>() -> impl Parser<'a, &'a str, (), extra::Err<Rich<'a, char>>> + Clone {
7    just("//").then(none_of('\n').repeated()).ignored()
8}
9
10/// Parse a block comment: /* ... */
11fn block_comment<'a>() -> impl Parser<'a, &'a str, (), extra::Err<Rich<'a, char>>> + Clone {
12    just("/*")
13        .then(any().and_is(just("*/").not()).repeated())
14        .then(just("*/"))
15        .ignored()
16}
17
18/// Parse any comment (line or block)
19fn comment<'a>() -> impl Parser<'a, &'a str, (), extra::Err<Rich<'a, char>>> + Clone {
20    line_comment().or(block_comment())
21}
22
23/// Parse whitespace and comments (replaces .padded() for comment support)
24fn ws<'a>() -> impl Parser<'a, &'a str, (), extra::Err<Rich<'a, char>>> + Clone {
25    // Match zero or more of: (whitespace | comment)
26    // This avoids generating confusing "expected '/'" errors
27    choice((text::whitespace().at_least(1).ignored(), comment()))
28        .repeated()
29        .ignored()
30}
31
32/// Extension trait to add comment-aware padding to parsers
33trait PaddedWithComments<'a, O>: Parser<'a, &'a str, O, extra::Err<Rich<'a, char>>> + Sized {
34    fn padded_with_comments(self) -> impl Parser<'a, &'a str, O, extra::Err<Rich<'a, char>>> + Clone
35    where
36        Self: Clone,
37    {
38        ws().ignore_then(self).then_ignore(ws())
39    }
40}
41
42impl<'a, O, P> PaddedWithComments<'a, O> for P where
43    P: Parser<'a, &'a str, O, extra::Err<Rich<'a, char>>>
44{
45}
46
47/// Parse a Hypen value (string, number, boolean, reference, list, map)
48fn value_parser<'a>() -> impl Parser<'a, &'a str, Value, extra::Err<Rich<'a, char>>> + Clone {
49    recursive(|value| {
50        // Double-quoted string with escape support: "hello \"world\""
51        let dq_char = just('\\')
52            .ignore_then(any())
53            .ignored()
54            .or(none_of('"').ignored());
55        let dq_string = just('"')
56            .ignore_then(dq_char.repeated())
57            .then_ignore(just('"').labelled("closing quote '\"'"))
58            .to_slice()
59            .map(|s: &str| Value::String(s.to_string()))
60            .labelled("double-quoted string");
61
62        // Single-quoted string: 'hello "world"'
63        let sq_char = just('\\')
64            .ignore_then(any())
65            .ignored()
66            .or(none_of('\'').ignored());
67        let sq_string = just('\'')
68            .ignore_then(sq_char.repeated())
69            .then_ignore(just('\'').labelled("closing quote \"'\""))
70            .to_slice()
71            .map(|s: &str| Value::String(s.to_string()))
72            .labelled("single-quoted string");
73
74        let string = dq_string.or(sq_string).labelled("string literal");
75
76        // Boolean: true or false
77        let boolean = text::keyword("true")
78            .to(Value::Boolean(true))
79            .or(text::keyword("false").to(Value::Boolean(false)))
80            .labelled("boolean (true or false)");
81
82        // Number: 123, 123.45, -123, -123.45
83        let number = just('-')
84            .or_not()
85            .then(text::int(10))
86            .then(just('.').then(text::digits(10)).or_not())
87            .to_slice()
88            .map(|s: &str| Value::Number(s.parse().unwrap()))
89            .labelled("number");
90
91        // Reference: @state.user, @actions.login
92        let reference = just('@')
93            .ignore_then(
94                text::ascii::ident()
95                    .labelled("reference path (e.g., state.user)")
96                    .then(just('.').ignore_then(text::ascii::ident()).repeated())
97                    .to_slice(),
98            )
99            .map(|s: &str| Value::Reference(s.to_string()))
100            .labelled("reference (@state.* or @actions.*)");
101
102        // Data source reference: $provider.path.segments
103        // e.g., $spacetime.messages, $firebase.user.name
104        let data_source_ref = just('$')
105            .ignore_then(
106                text::ascii::ident()
107                    .labelled("data source provider (e.g., spacetime)")
108                    .then(
109                        just('.')
110                            .ignore_then(text::ascii::ident())
111                            .repeated()
112                            .at_least(1),
113                    )
114                    .to_slice(),
115            )
116            .map(|s: &str| Value::DataSourceReference(s.to_string()))
117            .labelled("data source reference ($provider.path)");
118
119        // List: [item1, item2, item3]
120        let list = value
121            .clone()
122            .padded_with_comments()
123            .separated_by(just(','))
124            .allow_trailing()
125            .collect()
126            .delimited_by(just('['), just(']').labelled("closing bracket ']'"))
127            .map(Value::List)
128            .labelled("list [...]");
129
130        // Map: {key1: value1, key2: value2}
131        let map_entry = text::ascii::ident()
132            .labelled("map key")
133            .padded_with_comments()
134            .then_ignore(just(':').labelled("':' after map key"))
135            .then(value.clone().padded_with_comments().labelled("map value"));
136
137        let map = map_entry
138            .separated_by(just(','))
139            .allow_trailing()
140            .collect::<Vec<_>>()
141            .delimited_by(just('{'), just('}').labelled("closing brace '}'"))
142            .map(|entries: Vec<(&str, Value)>| {
143                Value::Map(
144                    entries
145                        .into_iter()
146                        .map(|(k, v)| (k.to_string(), v))
147                        .collect(),
148                )
149            })
150            .labelled("map {...}");
151
152        // Bare identifier (unquoted string for simple cases)
153        let identifier = text::ascii::ident()
154            .map(|s: &str| Value::String(s.to_string()))
155            .labelled("identifier");
156
157        choice((string, data_source_ref, reference, boolean, number, list, map, identifier)).labelled("value")
158    })
159}
160
161/// Parse a complete component with optional children
162pub fn component_parser<'a>(
163) -> impl Parser<'a, &'a str, ComponentSpecification, extra::Err<Rich<'a, char>>> + Clone {
164    recursive(|component| {
165        // Optional declaration keyword: "module" or "component"
166        let declaration_keyword = text::keyword("module")
167            .to(DeclarationType::Module)
168            .or(text::keyword("component").to(DeclarationType::ComponentKeyword))
169            .labelled("declaration keyword (module or component)")
170            .padded_with_comments()
171            .or_not();
172
173        // Component name (supports dot notation for applicators)
174        let name = just('.')
175            .or_not()
176            .then(text::ascii::ident())
177            .to_slice()
178            .map(|s: &str| s.to_string())
179            .labelled("component name")
180            .padded_with_comments();
181
182        // Parse single argument (named or positional)
183        let value = value_parser();
184
185        let arg = text::ascii::ident()
186            .then_ignore(just(':').padded_with_comments())
187            .then(value.clone())
188            .map(|(key, value)| (Some(key.to_string()), value))
189            .labelled("named argument (key: value)")
190            .or(value
191                .map(|value| (None, value))
192                .labelled("positional argument"));
193
194        // Arguments parser - only parses (arg1, arg2, ...) when present
195        let args_with_parens = arg
196            .clone()
197            .padded_with_comments()
198            .separated_by(just(',').labelled("',' between arguments"))
199            .allow_trailing()
200            .collect::<Vec<_>>()
201            .delimited_by(
202                just('(').labelled("'(' to start arguments"),
203                just(')').labelled("closing parenthesis ')'"),
204            )
205            .map(|args| {
206                let arguments = args
207                    .into_iter()
208                    .enumerate()
209                    .map(|(i, (key, value))| match key {
210                        Some(k) => Argument::Named { key: k, value },
211                        None => Argument::Positioned { position: i, value },
212                    })
213                    .collect();
214                ArgumentList::new(arguments)
215            })
216            .labelled("argument list (...)");
217
218        // Arguments are optional - either we have (...) or nothing
219        let args = args_with_parens
220            .or_not()
221            .map(|opt| opt.unwrap_or_else(ArgumentList::empty));
222
223        // Also keep arg_parser for applicators
224        let arg_parser = arg
225            .clone()
226            .padded_with_comments()
227            .separated_by(just(',').labelled("',' between arguments"))
228            .allow_trailing()
229            .collect::<Vec<_>>()
230            .delimited_by(just('('), just(')').labelled("closing parenthesis ')'"))
231            .map(|args| {
232                let arguments = args
233                    .into_iter()
234                    .enumerate()
235                    .map(|(i, (key, value))| match key {
236                        Some(k) => Argument::Named { key: k, value },
237                        None => Argument::Positioned { position: i, value },
238                    })
239                    .collect();
240                ArgumentList::new(arguments)
241            })
242            .or(empty().to(ArgumentList::empty()));
243
244        // Children block: { child1 child2 child3 }
245        // Parse explicitly to handle empty braces { }
246        let children_block = just('{')
247            .padded_with_comments()
248            .ignore_then(
249                component
250                    .clone()
251                    .padded_with_comments()
252                    .repeated()
253                    .collect::<Vec<_>>(),
254            )
255            .then_ignore(
256                just('}')
257                    .labelled("closing brace '}' for children block")
258                    .padded_with_comments(),
259            )
260            .labelled("children block {...}")
261            .or_not();
262
263        // Applicators: .applicator1() .applicator2(args)
264        let applicators = just('.')
265            .ignore_then(text::ascii::ident().labelled("applicator name"))
266            .then(arg_parser.clone())
267            .map(|(name, args)| ApplicatorSpecification {
268                name: name.to_string(),
269                arguments: args,
270                children: vec![],
271                internal_id: String::new(),
272            })
273            .labelled("applicator (.name(...))")
274            .padded_with_comments()
275            .repeated()
276            .collect::<Vec<_>>();
277
278        declaration_keyword
279            .then(name)
280            .then(args)
281            .then(children_block)
282            .then(applicators)
283            .map(|((((decl_type, name), args), children), applicators)| {
284                // Fold applicators into the component hierarchy
285                let base_component = ComponentSpecification::new(
286                    id_gen::NodeId::next().to_string(),
287                    name.clone(),
288                    args, // args is already an ArgumentList, not Option
289                    vec![],
290                    fold_applicators(children.unwrap_or_default()),
291                    MetaData {
292                        internal_id: String::new(),
293                        name_range: 0..0,
294                        block_range: None,
295                    },
296                )
297                .with_declaration_type(decl_type.unwrap_or(DeclarationType::Component));
298
299                // If there are applicators, add them to the component
300                if applicators.is_empty() {
301                    base_component
302                } else {
303                    ComponentSpecification {
304                        applicators,
305                        ..base_component
306                    }
307                }
308            })
309            .labelled("component")
310    })
311}
312
313/// Fold applicators into component hierarchy
314/// Components starting with '.' are treated as applicators of the previous component
315fn fold_applicators(components: Vec<ComponentSpecification>) -> Vec<ComponentSpecification> {
316    let mut result: Vec<ComponentSpecification> = Vec::new();
317
318    for component in components {
319        if component.name.starts_with('.') && !result.is_empty() {
320            // This is an applicator - attach it to the previous component
321            let mut owner: ComponentSpecification = result.pop().unwrap();
322            owner.applicators.push(component.to_applicator());
323            result.push(owner);
324        } else {
325            result.push(component);
326        }
327    }
328
329    result
330}
331
332/// Parse an import statement
333/// Syntax: import { Component1, Component2 } from "path"
334///     or: import Component from "path"
335pub fn import_parser<'a>(
336) -> impl Parser<'a, &'a str, ImportStatement, extra::Err<Rich<'a, char>>> + Clone {
337    // Parse import keyword
338    let import_keyword = text::keyword("import")
339        .labelled("'import' keyword")
340        .padded_with_comments();
341
342    // Parse a string literal for the source path (double or single quotes)
343    let dq_path = just('"')
344        .ignore_then(none_of('"').repeated().to_slice())
345        .then_ignore(just('"').labelled("closing quote '\"'"))
346        .map(|s: &str| s.to_string());
347    let sq_path = just('\'')
348        .ignore_then(none_of('\'').repeated().to_slice())
349        .then_ignore(just('\'').labelled("closing quote \"'\""))
350        .map(|s: &str| s.to_string());
351    let string_literal = dq_path.or(sq_path).labelled("import path string");
352
353    // Parse named imports: { Component1, Component2, ... }
354    let named_imports = text::ascii::ident()
355        .map(|s: &str| s.to_string())
356        .labelled("component name")
357        .padded_with_comments()
358        .separated_by(just(','))
359        .allow_trailing()
360        .collect::<Vec<String>>()
361        .delimited_by(
362            just('{').padded_with_comments(),
363            just('}')
364                .labelled("closing brace '}' for named imports")
365                .padded_with_comments(),
366        )
367        .map(ImportClause::Named)
368        .labelled("named imports { ... }");
369
370    // Parse default import: ComponentName
371    let default_import = text::ascii::ident()
372        .map(|s: &str| s.to_string())
373        .map(ImportClause::Default)
374        .labelled("default import name");
375
376    // Import clause can be either named or default
377    let import_clause = named_imports.or(default_import).padded_with_comments();
378
379    // Parse "from" keyword
380    let from_keyword = text::keyword("from")
381        .labelled("'from' keyword")
382        .padded_with_comments();
383
384    // Parse the full import statement
385    import_keyword
386        .ignore_then(import_clause)
387        .then_ignore(from_keyword)
388        .then(string_literal.padded_with_comments())
389        .map(|(clause, source_str)| {
390            // Determine if source is a URL or local path
391            let source = if source_str.starts_with("http://") || source_str.starts_with("https://")
392            {
393                ImportSource::Url(source_str)
394            } else {
395                ImportSource::Local(source_str)
396            };
397            ImportStatement::new(clause, source)
398        })
399        .labelled("import statement")
400}
401
402/// Parse a complete Hypen document with imports and components
403pub fn document_parser<'a>(
404) -> impl Parser<'a, &'a str, Document, extra::Err<Rich<'a, char>>> + Clone {
405    // Parse imports (zero or more)
406    let imports = import_parser()
407        .padded_with_comments()
408        .repeated()
409        .collect::<Vec<ImportStatement>>();
410
411    // Parse components (zero or more)
412    let components = component_parser()
413        .padded_with_comments()
414        .repeated()
415        .collect::<Vec<ComponentSpecification>>();
416
417    // Combine imports and components into a document
418    imports
419        .then(components)
420        .map(|(imports, components)| Document::new(imports, components))
421}
422
423/// Parse a list of components from text
424pub fn parse_components(input: &str) -> Result<Vec<ComponentSpecification>, Vec<Rich<'_, char>>> {
425    component_parser()
426        .padded_with_comments()
427        .repeated()
428        .collect()
429        .then_ignore(end())
430        .parse(input)
431        .into_result()
432}
433
434/// Parse a single component from text
435pub fn parse_component(input: &str) -> Result<ComponentSpecification, Vec<Rich<'_, char>>> {
436    component_parser()
437        .padded_with_comments()
438        .then_ignore(end())
439        .parse(input)
440        .into_result()
441}
442
443/// Parse a complete Hypen document (imports + components)
444pub fn parse_document(input: &str) -> Result<Document, Vec<Rich<'_, char>>> {
445    document_parser()
446        .padded_with_comments()
447        .then_ignore(end())
448        .parse(input)
449        .into_result()
450}
451
452/// Parse a single import statement
453pub fn parse_import(input: &str) -> Result<ImportStatement, Vec<Rich<'_, char>>> {
454    import_parser()
455        .padded_with_comments()
456        .then_ignore(end())
457        .parse(input)
458        .into_result()
459}
460
461// Simple sequential ID generator for AST nodes
462mod id_gen {
463    use std::sync::atomic::{AtomicUsize, Ordering};
464
465    static COUNTER: AtomicUsize = AtomicUsize::new(0);
466
467    pub struct NodeId(usize);
468
469    impl NodeId {
470        /// Generate the next sequential ID
471        pub fn next() -> Self {
472            NodeId(COUNTER.fetch_add(1, Ordering::SeqCst))
473        }
474    }
475
476    impl std::fmt::Display for NodeId {
477        fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
478            write!(f, "id-{}", self.0)
479        }
480    }
481}