Skip to main content

panache_parser/parser/yaml/
core.rs

1use crate::syntax::{SyntaxKind, SyntaxNode};
2use rowan::GreenNodeBuilder;
3
4use super::model::{
5    BasicYamlEntry, ShadowYamlOptions, ShadowYamlOutcome, ShadowYamlReport, YamlInputKind,
6    YamlShadowToken, YamlShadowTokenKind,
7};
8
9/// Parse YAML in shadow mode using prototype groundwork only.
10///
11/// This API is intentionally read-only and does not replace production YAML
12/// parsing. By default it is disabled and reports `SkippedDisabled`.
13pub fn parse_shadow(input: &str, options: ShadowYamlOptions) -> ShadowYamlReport {
14    let line_count = input.lines().count().max(1);
15
16    if !options.enabled {
17        return ShadowYamlReport {
18            outcome: ShadowYamlOutcome::SkippedDisabled,
19            shadow_reason: "shadow-disabled",
20            input_kind: options.input_kind,
21            input_len_bytes: input.len(),
22            line_count,
23            normalized_input: None,
24        };
25    }
26
27    let normalized = match options.input_kind {
28        YamlInputKind::Plain => input.to_owned(),
29        YamlInputKind::Hashpipe => normalize_hashpipe_input(input),
30    };
31
32    let parsed = parse_basic_mapping_tree(&normalized).is_some();
33
34    ShadowYamlReport {
35        outcome: if parsed {
36            ShadowYamlOutcome::PrototypeParsed
37        } else {
38            ShadowYamlOutcome::PrototypeRejected
39        },
40        shadow_reason: if parsed {
41            "prototype-basic-mapping-parsed"
42        } else {
43            "prototype-basic-mapping-rejected"
44        },
45        input_kind: options.input_kind,
46        input_len_bytes: input.len(),
47        line_count,
48        normalized_input: Some(normalized),
49    }
50}
51
52fn normalize_hashpipe_input(input: &str) -> String {
53    input
54        .lines()
55        .map(strip_hashpipe_prefix)
56        .collect::<Vec<_>>()
57        .join("\n")
58}
59
60fn strip_hashpipe_prefix(line: &str) -> &str {
61    if let Some(rest) = line.strip_prefix("#|") {
62        return rest.strip_prefix(' ').unwrap_or(rest);
63    }
64    line
65}
66
67fn split_line_and_newline(line: &str) -> (&str, &str) {
68    if let Some(without_lf) = line.strip_suffix('\n') {
69        if let Some(without_crlf) = without_lf.strip_suffix('\r') {
70            (without_crlf, "\r\n")
71        } else {
72            (without_lf, "\n")
73        }
74    } else {
75        (line, "")
76    }
77}
78
79fn leading_indent(text: &str) -> usize {
80    text.bytes()
81        .take_while(|b| *b == b' ' || *b == b'\t')
82        .count()
83}
84
85fn parse_raw_mapping_line(line: &str) -> Option<(&str, &str)> {
86    let mut in_single = false;
87    let mut in_double = false;
88    let mut split_idx = None;
89
90    for (idx, ch) in line.char_indices() {
91        match ch {
92            '\'' if !in_double => in_single = !in_single,
93            '"' if !in_single => in_double = !in_double,
94            ':' if !in_single && !in_double => {
95                split_idx = Some(idx);
96                break;
97            }
98            _ => {}
99        }
100    }
101
102    let idx = split_idx?;
103    let raw_key = &line[..idx];
104    let raw_value = &line[idx + ':'.len_utf8()..];
105    if raw_key.trim().is_empty() || raw_value.trim().is_empty() {
106        return None;
107    }
108    Some((raw_key, raw_value))
109}
110
111fn split_value_and_comment(raw_value: &str) -> (&str, Option<&str>) {
112    if let Some(idx) = raw_value.find('#') {
113        let (before, after) = raw_value.split_at(idx);
114        if !before.trim().is_empty() {
115            return (before.trim_end_matches([' ', '\t']), Some(after));
116        }
117    }
118    (raw_value, None)
119}
120
121fn split_tag_prefix(text: &str) -> (Option<&str>, &str) {
122    let trimmed = text.trim_start_matches([' ', '\t']);
123    if !trimmed.starts_with("!!") {
124        return (None, text);
125    }
126
127    let rel_start = text.len() - trimmed.len();
128    let rest = &text[rel_start + 2..];
129    let end_rel = rest
130        .char_indices()
131        .find_map(|(i, ch)| (ch == ' ' || ch == '\t').then_some(i))
132        .unwrap_or(rest.len());
133    if end_rel == 0 {
134        return (None, text);
135    }
136
137    let tag_end = rel_start + 2 + end_rel;
138    let tag = &text[rel_start..tag_end];
139    let value = &text[tag_end..];
140    (Some(tag), value)
141}
142
143fn lex_mapping_line_tokens<'a>(
144    line: &'a str,
145    newline: &'a str,
146    current_indent: usize,
147    indent_stack: &mut Vec<usize>,
148    out: &mut Vec<YamlShadowToken<'a>>,
149) -> Option<()> {
150    let line_indent = leading_indent(line);
151    let content = &line[line_indent..];
152
153    if content.trim().is_empty() {
154        if !newline.is_empty() {
155            out.push(YamlShadowToken {
156                kind: YamlShadowTokenKind::Newline,
157                text: newline,
158            });
159        }
160        return Some(());
161    }
162
163    if line_indent > current_indent {
164        indent_stack.push(line_indent);
165        out.push(YamlShadowToken {
166            kind: YamlShadowTokenKind::Indent,
167            text: &line[..line_indent],
168        });
169    } else if line_indent < current_indent {
170        while let Some(last) = indent_stack.last().copied() {
171            if line_indent < last {
172                indent_stack.pop();
173                out.push(YamlShadowToken {
174                    kind: YamlShadowTokenKind::Dedent,
175                    text: "",
176                });
177            } else {
178                break;
179            }
180        }
181        if indent_stack.last().copied().unwrap_or(0) != line_indent {
182            return None;
183        }
184    }
185
186    if line_indent > 0 {
187        out.push(YamlShadowToken {
188            kind: YamlShadowTokenKind::Whitespace,
189            text: &line[..line_indent],
190        });
191    }
192
193    let (raw_key, raw_value) = parse_raw_mapping_line(content)?;
194
195    let (key_tag, key_text) = split_tag_prefix(raw_key);
196    if let Some(tag) = key_tag {
197        out.push(YamlShadowToken {
198            kind: YamlShadowTokenKind::Tag,
199            text: tag,
200        });
201        let ws_len = leading_indent(key_text);
202        if ws_len > 0 {
203            out.push(YamlShadowToken {
204                kind: YamlShadowTokenKind::Whitespace,
205                text: &key_text[..ws_len],
206            });
207        }
208        out.push(YamlShadowToken {
209            kind: YamlShadowTokenKind::Key,
210            text: &key_text[ws_len..],
211        });
212    } else {
213        out.push(YamlShadowToken {
214            kind: YamlShadowTokenKind::Key,
215            text: raw_key,
216        });
217    }
218
219    out.push(YamlShadowToken {
220        kind: YamlShadowTokenKind::Colon,
221        text: ":",
222    });
223
224    let (value_part, comment_part) = split_value_and_comment(raw_value);
225    let leading_ws_len = leading_indent(value_part);
226    if leading_ws_len > 0 {
227        out.push(YamlShadowToken {
228            kind: YamlShadowTokenKind::Whitespace,
229            text: &value_part[..leading_ws_len],
230        });
231    }
232
233    let scalar_part = &value_part[leading_ws_len..];
234    let (value_tag, value_text) = split_tag_prefix(scalar_part);
235    if let Some(tag) = value_tag {
236        out.push(YamlShadowToken {
237            kind: YamlShadowTokenKind::Tag,
238            text: tag,
239        });
240        let ws_len = leading_indent(value_text);
241        if ws_len > 0 {
242            out.push(YamlShadowToken {
243                kind: YamlShadowTokenKind::Whitespace,
244                text: &value_text[..ws_len],
245            });
246        }
247        out.push(YamlShadowToken {
248            kind: YamlShadowTokenKind::Scalar,
249            text: &value_text[ws_len..],
250        });
251    } else {
252        out.push(YamlShadowToken {
253            kind: YamlShadowTokenKind::Scalar,
254            text: scalar_part,
255        });
256    }
257
258    if let Some(comment) = comment_part {
259        let leading_comment_ws_len = raw_value.len() - comment.len() - value_part.len();
260        if leading_comment_ws_len > 0 {
261            let start = value_part.len();
262            let end = start + leading_comment_ws_len;
263            out.push(YamlShadowToken {
264                kind: YamlShadowTokenKind::Whitespace,
265                text: &raw_value[start..end],
266            });
267        }
268        out.push(YamlShadowToken {
269            kind: YamlShadowTokenKind::Comment,
270            text: comment,
271        });
272    }
273
274    if !newline.is_empty() {
275        out.push(YamlShadowToken {
276            kind: YamlShadowTokenKind::Newline,
277            text: newline,
278        });
279    }
280
281    Some(())
282}
283
284pub fn lex_basic_mapping_tokens(input: &str) -> Option<Vec<YamlShadowToken<'_>>> {
285    if input.is_empty() {
286        return None;
287    }
288
289    let mut tokens = Vec::new();
290    let mut indent_stack = vec![0usize];
291
292    for raw_line in input.split_inclusive('\n') {
293        let (line, newline) = split_line_and_newline(raw_line);
294        let current_indent = indent_stack.last().copied().unwrap_or(0);
295        lex_mapping_line_tokens(
296            line,
297            newline,
298            current_indent,
299            &mut indent_stack,
300            &mut tokens,
301        )?;
302    }
303
304    while indent_stack.len() > 1 {
305        indent_stack.pop();
306        tokens.push(YamlShadowToken {
307            kind: YamlShadowTokenKind::Dedent,
308            text: "",
309        });
310    }
311
312    Some(tokens)
313}
314
315fn emit_block_map<'a>(
316    builder: &mut GreenNodeBuilder<'_>,
317    tokens: &[YamlShadowToken<'a>],
318    i: &mut usize,
319    stop_on_dedent: bool,
320) -> Option<()> {
321    let mut closed_by_dedent = false;
322    while *i < tokens.len() {
323        match tokens[*i].kind {
324            YamlShadowTokenKind::Newline => {
325                builder.token(SyntaxKind::NEWLINE.into(), tokens[*i].text);
326                *i += 1;
327            }
328            YamlShadowTokenKind::Dedent => {
329                if stop_on_dedent {
330                    *i += 1;
331                    closed_by_dedent = true;
332                    break;
333                }
334                return None;
335            }
336            YamlShadowTokenKind::Indent => return None,
337            _ => {
338                builder.start_node(SyntaxKind::YAML_BLOCK_MAP_ENTRY.into());
339                builder.start_node(SyntaxKind::YAML_BLOCK_MAP_KEY.into());
340
341                let mut saw_colon = false;
342                while *i < tokens.len() {
343                    match tokens[*i].kind {
344                        YamlShadowTokenKind::Key => {
345                            builder.token(SyntaxKind::YAML_KEY.into(), tokens[*i].text);
346                            *i += 1;
347                        }
348                        YamlShadowTokenKind::Tag => {
349                            builder.token(SyntaxKind::YAML_TAG.into(), tokens[*i].text);
350                            *i += 1;
351                        }
352                        YamlShadowTokenKind::Whitespace => {
353                            builder.token(SyntaxKind::WHITESPACE.into(), tokens[*i].text);
354                            *i += 1;
355                        }
356                        YamlShadowTokenKind::Colon => {
357                            builder.token(SyntaxKind::YAML_COLON.into(), tokens[*i].text);
358                            *i += 1;
359                            saw_colon = true;
360                            break;
361                        }
362                        _ => return None,
363                    }
364                }
365                if !saw_colon {
366                    return None;
367                }
368                builder.finish_node(); // YAML_BLOCK_MAP_KEY
369
370                builder.start_node(SyntaxKind::YAML_BLOCK_MAP_VALUE.into());
371                while *i < tokens.len() {
372                    match tokens[*i].kind {
373                        YamlShadowTokenKind::Scalar => {
374                            builder.token(SyntaxKind::YAML_SCALAR.into(), tokens[*i].text);
375                            *i += 1;
376                        }
377                        YamlShadowTokenKind::Tag => {
378                            builder.token(SyntaxKind::YAML_TAG.into(), tokens[*i].text);
379                            *i += 1;
380                        }
381                        YamlShadowTokenKind::Comment => {
382                            builder.token(SyntaxKind::YAML_COMMENT.into(), tokens[*i].text);
383                            *i += 1;
384                        }
385                        YamlShadowTokenKind::Whitespace => {
386                            builder.token(SyntaxKind::WHITESPACE.into(), tokens[*i].text);
387                            *i += 1;
388                        }
389                        _ => break,
390                    }
391                }
392
393                let mut trailing_newline: Option<&str> = None;
394                if *i < tokens.len() && tokens[*i].kind == YamlShadowTokenKind::Newline {
395                    trailing_newline = Some(tokens[*i].text);
396                    *i += 1;
397                }
398
399                if *i < tokens.len() && tokens[*i].kind == YamlShadowTokenKind::Indent {
400                    *i += 1;
401                    builder.start_node(SyntaxKind::YAML_BLOCK_MAP.into());
402                    emit_block_map(builder, tokens, i, true)?;
403                    builder.finish_node(); // YAML_BLOCK_MAP
404                }
405
406                builder.finish_node(); // YAML_BLOCK_MAP_VALUE
407                if let Some(newline) = trailing_newline {
408                    builder.token(SyntaxKind::NEWLINE.into(), newline);
409                }
410                builder.finish_node(); // YAML_BLOCK_MAP_ENTRY
411            }
412        }
413    }
414
415    if stop_on_dedent && !closed_by_dedent {
416        return None;
417    }
418
419    Some(())
420}
421
422/// Parse one or more `key: value` lines and emit a prototype YAML mapping CST.
423///
424/// This remains prototype-scoped but models YAML mapping structure with explicit
425/// block-map and entry/key/value nodes, plus key/colon/whitespace/value/newline
426/// tokens.
427pub fn parse_basic_mapping_tree(input: &str) -> Option<SyntaxNode> {
428    let tokens = lex_basic_mapping_tokens(input)?;
429
430    let mut builder = GreenNodeBuilder::new();
431    builder.start_node(SyntaxKind::DOCUMENT.into());
432    builder.start_node(SyntaxKind::YAML_METADATA_CONTENT.into());
433    builder.start_node(SyntaxKind::YAML_BLOCK_MAP.into());
434    let mut i = 0usize;
435    emit_block_map(&mut builder, &tokens, &mut i, false)?;
436
437    builder.finish_node(); // YAML_BLOCK_MAP
438    builder.finish_node(); // YAML_METADATA_CONTENT
439    builder.finish_node(); // DOCUMENT
440    Some(SyntaxNode::new_root(builder.finish()))
441}
442
443/// Parse a single-line YAML mapping entry like `title: My Title`.
444///
445/// This is intentionally minimal groundwork and currently supports exactly one
446/// `key: value` line.
447pub fn parse_basic_entry(input: &str) -> Option<BasicYamlEntry<'_>> {
448    if input.contains('\n') {
449        return None;
450    }
451
452    let (raw_key, raw_value) = input.split_once(':')?;
453    let key = raw_key.trim();
454    let value = raw_value.trim();
455
456    if key.is_empty() || value.is_empty() {
457        return None;
458    }
459
460    Some(BasicYamlEntry { key, value })
461}
462
463/// Parse a single-line YAML mapping entry and emit a tiny Rowan CST.
464///
465/// The current prototype emits:
466/// DOCUMENT
467///   YAML_METADATA_CONTENT
468///     YAML_BLOCK_MAP
469///       YAML_BLOCK_MAP_ENTRY
470///         YAML_BLOCK_MAP_KEY
471///           YAML_KEY(key)
472///           YAML_COLON(":")
473///         YAML_BLOCK_MAP_VALUE
474///           [WHITESPACE(" ")] // when present in the original input
475///           YAML_SCALAR(value)
476pub fn parse_basic_entry_tree(input: &str) -> Option<SyntaxNode> {
477    parse_basic_entry(input)?;
478    parse_basic_mapping_tree(input)
479}