Skip to main content

hypen_parser/
parser.rs

1use chumsky::prelude::*;
2
3use crate::ast::*;
4
5/// Parse a line comment: // ... until end of line
6fn line_comment<'a>() -> impl Parser<'a, &'a str, (), extra::Err<Rich<'a, char>>> + Clone {
7    just("//").then(none_of('\n').repeated()).ignored()
8}
9
10/// Parse a block comment: /* ... */
11fn block_comment<'a>() -> impl Parser<'a, &'a str, (), extra::Err<Rich<'a, char>>> + Clone {
12    just("/*")
13        .then(any().and_is(just("*/").not()).repeated())
14        .then(just("*/"))
15        .ignored()
16}
17
18/// Parse any comment (line or block)
19fn comment<'a>() -> impl Parser<'a, &'a str, (), extra::Err<Rich<'a, char>>> + Clone {
20    line_comment().or(block_comment())
21}
22
23/// Parse whitespace and comments (replaces .padded() for comment support)
24fn ws<'a>() -> impl Parser<'a, &'a str, (), extra::Err<Rich<'a, char>>> + Clone {
25    // Match zero or more of: (whitespace | comment)
26    // This avoids generating confusing "expected '/'" errors
27    choice((text::whitespace().at_least(1).ignored(), comment()))
28        .repeated()
29        .ignored()
30}
31
32/// Extension trait to add comment-aware padding to parsers
33trait PaddedWithComments<'a, O>: Parser<'a, &'a str, O, extra::Err<Rich<'a, char>>> + Sized {
34    fn padded_with_comments(self) -> impl Parser<'a, &'a str, O, extra::Err<Rich<'a, char>>> + Clone
35    where
36        Self: Clone,
37    {
38        ws().ignore_then(self).then_ignore(ws())
39    }
40}
41
42impl<'a, O, P> PaddedWithComments<'a, O> for P where
43    P: Parser<'a, &'a str, O, extra::Err<Rich<'a, char>>>
44{
45}
46
47/// Parse a Hypen value (string, number, boolean, reference, list, map)
48fn value_parser<'a>() -> impl Parser<'a, &'a str, Value, extra::Err<Rich<'a, char>>> + Clone {
49    recursive(|value| {
50        // Double-quoted string with escape support: "hello \"world\""
51        let dq_char = just('\\')
52            .ignore_then(any())
53            .ignored()
54            .or(none_of('"').ignored());
55        let dq_string = just('"')
56            .ignore_then(dq_char.repeated())
57            .then_ignore(just('"').labelled("closing quote '\"'"))
58            .to_slice()
59            .map(|s: &str| Value::String(s.to_string()))
60            .labelled("double-quoted string");
61
62        // Single-quoted string: 'hello "world"'
63        let sq_char = just('\\')
64            .ignore_then(any())
65            .ignored()
66            .or(none_of('\'').ignored());
67        let sq_string = just('\'')
68            .ignore_then(sq_char.repeated())
69            .then_ignore(just('\'').labelled("closing quote \"'\""))
70            .to_slice()
71            .map(|s: &str| Value::String(s.to_string()))
72            .labelled("single-quoted string");
73
74        let string = dq_string.or(sq_string).labelled("string literal");
75
76        // Boolean: true or false
77        let boolean = text::keyword("true")
78            .to(Value::Boolean(true))
79            .or(text::keyword("false").to(Value::Boolean(false)))
80            .labelled("boolean (true or false)");
81
82        // Number: 123, 123.45, -123, -123.45
83        let number = just('-')
84            .or_not()
85            .then(text::int(10))
86            .then(just('.').then(text::digits(10)).or_not())
87            .to_slice()
88            .map(|s: &str| Value::Number(s.parse().unwrap()))
89            .labelled("number");
90
91        // Reference: @state.user, @actions.login
92        let reference = just('@')
93            .ignore_then(
94                text::ascii::ident()
95                    .labelled("reference path (e.g., state.user)")
96                    .then(just('.').ignore_then(text::ascii::ident()).repeated())
97                    .to_slice(),
98            )
99            .map(|s: &str| Value::Reference(s.to_string()))
100            .labelled("reference (@state.* or @actions.*)");
101
102        // List: [item1, item2, item3]
103        let list = value
104            .clone()
105            .padded_with_comments()
106            .separated_by(just(','))
107            .allow_trailing()
108            .collect()
109            .delimited_by(just('['), just(']').labelled("closing bracket ']'"))
110            .map(Value::List)
111            .labelled("list [...]");
112
113        // Map: {key1: value1, key2: value2}
114        let map_entry = text::ascii::ident()
115            .labelled("map key")
116            .padded_with_comments()
117            .then_ignore(just(':').labelled("':' after map key"))
118            .then(value.clone().padded_with_comments().labelled("map value"));
119
120        let map = map_entry
121            .separated_by(just(','))
122            .allow_trailing()
123            .collect::<Vec<_>>()
124            .delimited_by(just('{'), just('}').labelled("closing brace '}'"))
125            .map(|entries: Vec<(&str, Value)>| {
126                Value::Map(
127                    entries
128                        .into_iter()
129                        .map(|(k, v)| (k.to_string(), v))
130                        .collect(),
131                )
132            })
133            .labelled("map {...}");
134
135        // Bare identifier (unquoted string for simple cases)
136        let identifier = text::ascii::ident()
137            .map(|s: &str| Value::String(s.to_string()))
138            .labelled("identifier");
139
140        choice((string, reference, boolean, number, list, map, identifier)).labelled("value")
141    })
142}
143
144/// Parse a complete component with optional children
145pub fn component_parser<'a>(
146) -> impl Parser<'a, &'a str, ComponentSpecification, extra::Err<Rich<'a, char>>> + Clone {
147    recursive(|component| {
148        // Optional declaration keyword: "module" or "component"
149        let declaration_keyword = text::keyword("module")
150            .to(DeclarationType::Module)
151            .or(text::keyword("component").to(DeclarationType::ComponentKeyword))
152            .labelled("declaration keyword (module or component)")
153            .padded_with_comments()
154            .or_not();
155
156        // Component name (supports dot notation for applicators)
157        let name = just('.')
158            .or_not()
159            .then(text::ascii::ident())
160            .to_slice()
161            .map(|s: &str| s.to_string())
162            .labelled("component name")
163            .padded_with_comments();
164
165        // Parse single argument (named or positional)
166        let value = value_parser();
167
168        let arg = text::ascii::ident()
169            .then_ignore(just(':').padded_with_comments())
170            .then(value.clone())
171            .map(|(key, value)| (Some(key.to_string()), value))
172            .labelled("named argument (key: value)")
173            .or(value
174                .map(|value| (None, value))
175                .labelled("positional argument"));
176
177        // Arguments parser - only parses (arg1, arg2, ...) when present
178        let args_with_parens = arg
179            .clone()
180            .padded_with_comments()
181            .separated_by(just(',').labelled("',' between arguments"))
182            .allow_trailing()
183            .collect::<Vec<_>>()
184            .delimited_by(
185                just('(').labelled("'(' to start arguments"),
186                just(')').labelled("closing parenthesis ')'"),
187            )
188            .map(|args| {
189                let arguments = args
190                    .into_iter()
191                    .enumerate()
192                    .map(|(i, (key, value))| match key {
193                        Some(k) => Argument::Named { key: k, value },
194                        None => Argument::Positioned { position: i, value },
195                    })
196                    .collect();
197                ArgumentList::new(arguments)
198            })
199            .labelled("argument list (...)");
200
201        // Arguments are optional - either we have (...) or nothing
202        let args = args_with_parens
203            .or_not()
204            .map(|opt| opt.unwrap_or_else(ArgumentList::empty));
205
206        // Also keep arg_parser for applicators
207        let arg_parser = arg
208            .clone()
209            .padded_with_comments()
210            .separated_by(just(',').labelled("',' between arguments"))
211            .allow_trailing()
212            .collect::<Vec<_>>()
213            .delimited_by(just('('), just(')').labelled("closing parenthesis ')'"))
214            .map(|args| {
215                let arguments = args
216                    .into_iter()
217                    .enumerate()
218                    .map(|(i, (key, value))| match key {
219                        Some(k) => Argument::Named { key: k, value },
220                        None => Argument::Positioned { position: i, value },
221                    })
222                    .collect();
223                ArgumentList::new(arguments)
224            })
225            .or(empty().to(ArgumentList::empty()));
226
227        // Children block: { child1 child2 child3 }
228        // Parse explicitly to handle empty braces { }
229        let children_block = just('{')
230            .padded_with_comments()
231            .ignore_then(
232                component
233                    .clone()
234                    .padded_with_comments()
235                    .repeated()
236                    .collect::<Vec<_>>(),
237            )
238            .then_ignore(
239                just('}')
240                    .labelled("closing brace '}' for children block")
241                    .padded_with_comments(),
242            )
243            .labelled("children block {...}")
244            .or_not();
245
246        // Applicators: .applicator1() .applicator2(args)
247        let applicators = just('.')
248            .ignore_then(text::ascii::ident().labelled("applicator name"))
249            .then(arg_parser.clone())
250            .map(|(name, args)| ApplicatorSpecification {
251                name: name.to_string(),
252                arguments: args,
253                children: vec![],
254                internal_id: String::new(),
255            })
256            .labelled("applicator (.name(...))")
257            .padded_with_comments()
258            .repeated()
259            .collect::<Vec<_>>();
260
261        declaration_keyword
262            .then(name)
263            .then(args)
264            .then(children_block)
265            .then(applicators)
266            .map(|((((decl_type, name), args), children), applicators)| {
267                // Fold applicators into the component hierarchy
268                let base_component = ComponentSpecification::new(
269                    id_gen::NodeId::next().to_string(),
270                    name.clone(),
271                    args, // args is already an ArgumentList, not Option
272                    vec![],
273                    fold_applicators(children.unwrap_or_default()),
274                    MetaData {
275                        internal_id: String::new(),
276                        name_range: 0..0,
277                        block_range: None,
278                    },
279                )
280                .with_declaration_type(decl_type.unwrap_or(DeclarationType::Component));
281
282                // If there are applicators, add them to the component
283                if applicators.is_empty() {
284                    base_component
285                } else {
286                    ComponentSpecification {
287                        applicators,
288                        ..base_component
289                    }
290                }
291            })
292            .labelled("component")
293    })
294}
295
296/// Fold applicators into component hierarchy
297/// Components starting with '.' are treated as applicators of the previous component
298fn fold_applicators(components: Vec<ComponentSpecification>) -> Vec<ComponentSpecification> {
299    let mut result: Vec<ComponentSpecification> = Vec::new();
300
301    for component in components {
302        if component.name.starts_with('.') && !result.is_empty() {
303            // This is an applicator - attach it to the previous component
304            let mut owner: ComponentSpecification = result.pop().unwrap();
305            owner.applicators.push(component.to_applicator());
306            result.push(owner);
307        } else {
308            result.push(component);
309        }
310    }
311
312    result
313}
314
315/// Parse an import statement
316/// Syntax: import { Component1, Component2 } from "path"
317///     or: import Component from "path"
318pub fn import_parser<'a>(
319) -> impl Parser<'a, &'a str, ImportStatement, extra::Err<Rich<'a, char>>> + Clone {
320    // Parse import keyword
321    let import_keyword = text::keyword("import")
322        .labelled("'import' keyword")
323        .padded_with_comments();
324
325    // Parse a string literal for the source path (double or single quotes)
326    let dq_path = just('"')
327        .ignore_then(none_of('"').repeated().to_slice())
328        .then_ignore(just('"').labelled("closing quote '\"'"))
329        .map(|s: &str| s.to_string());
330    let sq_path = just('\'')
331        .ignore_then(none_of('\'').repeated().to_slice())
332        .then_ignore(just('\'').labelled("closing quote \"'\""))
333        .map(|s: &str| s.to_string());
334    let string_literal = dq_path.or(sq_path).labelled("import path string");
335
336    // Parse named imports: { Component1, Component2, ... }
337    let named_imports = text::ascii::ident()
338        .map(|s: &str| s.to_string())
339        .labelled("component name")
340        .padded_with_comments()
341        .separated_by(just(','))
342        .allow_trailing()
343        .collect::<Vec<String>>()
344        .delimited_by(
345            just('{').padded_with_comments(),
346            just('}')
347                .labelled("closing brace '}' for named imports")
348                .padded_with_comments(),
349        )
350        .map(ImportClause::Named)
351        .labelled("named imports { ... }");
352
353    // Parse default import: ComponentName
354    let default_import = text::ascii::ident()
355        .map(|s: &str| s.to_string())
356        .map(ImportClause::Default)
357        .labelled("default import name");
358
359    // Import clause can be either named or default
360    let import_clause = named_imports.or(default_import).padded_with_comments();
361
362    // Parse "from" keyword
363    let from_keyword = text::keyword("from")
364        .labelled("'from' keyword")
365        .padded_with_comments();
366
367    // Parse the full import statement
368    import_keyword
369        .ignore_then(import_clause)
370        .then_ignore(from_keyword)
371        .then(string_literal.padded_with_comments())
372        .map(|(clause, source_str)| {
373            // Determine if source is a URL or local path
374            let source = if source_str.starts_with("http://") || source_str.starts_with("https://")
375            {
376                ImportSource::Url(source_str)
377            } else {
378                ImportSource::Local(source_str)
379            };
380            ImportStatement::new(clause, source)
381        })
382        .labelled("import statement")
383}
384
385/// Parse a complete Hypen document with imports and components
386pub fn document_parser<'a>(
387) -> impl Parser<'a, &'a str, Document, extra::Err<Rich<'a, char>>> + Clone {
388    // Parse imports (zero or more)
389    let imports = import_parser()
390        .padded_with_comments()
391        .repeated()
392        .collect::<Vec<ImportStatement>>();
393
394    // Parse components (zero or more)
395    let components = component_parser()
396        .padded_with_comments()
397        .repeated()
398        .collect::<Vec<ComponentSpecification>>();
399
400    // Combine imports and components into a document
401    imports
402        .then(components)
403        .map(|(imports, components)| Document::new(imports, components))
404}
405
406/// Parse a list of components from text
407pub fn parse_components(input: &str) -> Result<Vec<ComponentSpecification>, Vec<Rich<char>>> {
408    component_parser()
409        .padded_with_comments()
410        .repeated()
411        .collect()
412        .then_ignore(end())
413        .parse(input)
414        .into_result()
415}
416
417/// Parse a single component from text
418pub fn parse_component(input: &str) -> Result<ComponentSpecification, Vec<Rich<char>>> {
419    component_parser()
420        .padded_with_comments()
421        .then_ignore(end())
422        .parse(input)
423        .into_result()
424}
425
426/// Parse a complete Hypen document (imports + components)
427pub fn parse_document(input: &str) -> Result<Document, Vec<Rich<char>>> {
428    document_parser()
429        .padded_with_comments()
430        .then_ignore(end())
431        .parse(input)
432        .into_result()
433}
434
435/// Parse a single import statement
436pub fn parse_import(input: &str) -> Result<ImportStatement, Vec<Rich<char>>> {
437    import_parser()
438        .padded_with_comments()
439        .then_ignore(end())
440        .parse(input)
441        .into_result()
442}
443
444// Simple sequential ID generator for AST nodes
445mod id_gen {
446    use std::sync::atomic::{AtomicUsize, Ordering};
447
448    static COUNTER: AtomicUsize = AtomicUsize::new(0);
449
450    pub struct NodeId(usize);
451
452    impl NodeId {
453        /// Generate the next sequential ID
454        pub fn next() -> Self {
455            NodeId(COUNTER.fetch_add(1, Ordering::SeqCst))
456        }
457    }
458
459    impl std::fmt::Display for NodeId {
460        fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
461            write!(f, "id-{}", self.0)
462        }
463    }
464}