panache_parser/parser/yaml/
core.rs1use crate::syntax::{SyntaxKind, SyntaxNode};
2use rowan::GreenNodeBuilder;
3
4use super::model::{
5 BasicYamlEntry, ShadowYamlOptions, ShadowYamlOutcome, ShadowYamlReport, YamlInputKind,
6 YamlShadowToken, YamlShadowTokenKind,
7};
8
9pub fn parse_shadow(input: &str, options: ShadowYamlOptions) -> ShadowYamlReport {
14 let line_count = input.lines().count().max(1);
15
16 if !options.enabled {
17 return ShadowYamlReport {
18 outcome: ShadowYamlOutcome::SkippedDisabled,
19 shadow_reason: "shadow-disabled",
20 input_kind: options.input_kind,
21 input_len_bytes: input.len(),
22 line_count,
23 normalized_input: None,
24 };
25 }
26
27 let normalized = match options.input_kind {
28 YamlInputKind::Plain => input.to_owned(),
29 YamlInputKind::Hashpipe => normalize_hashpipe_input(input),
30 };
31
32 let parsed = parse_basic_mapping_tree(&normalized).is_some();
33
34 ShadowYamlReport {
35 outcome: if parsed {
36 ShadowYamlOutcome::PrototypeParsed
37 } else {
38 ShadowYamlOutcome::PrototypeRejected
39 },
40 shadow_reason: if parsed {
41 "prototype-basic-mapping-parsed"
42 } else {
43 "prototype-basic-mapping-rejected"
44 },
45 input_kind: options.input_kind,
46 input_len_bytes: input.len(),
47 line_count,
48 normalized_input: Some(normalized),
49 }
50}
51
52fn normalize_hashpipe_input(input: &str) -> String {
53 input
54 .lines()
55 .map(strip_hashpipe_prefix)
56 .collect::<Vec<_>>()
57 .join("\n")
58}
59
60fn strip_hashpipe_prefix(line: &str) -> &str {
61 if let Some(rest) = line.strip_prefix("#|") {
62 return rest.strip_prefix(' ').unwrap_or(rest);
63 }
64 line
65}
66
67fn split_line_and_newline(line: &str) -> (&str, &str) {
68 if let Some(without_lf) = line.strip_suffix('\n') {
69 if let Some(without_crlf) = without_lf.strip_suffix('\r') {
70 (without_crlf, "\r\n")
71 } else {
72 (without_lf, "\n")
73 }
74 } else {
75 (line, "")
76 }
77}
78
79fn leading_indent(text: &str) -> usize {
80 text.bytes()
81 .take_while(|b| *b == b' ' || *b == b'\t')
82 .count()
83}
84
85fn parse_raw_mapping_line(line: &str) -> Option<(&str, &str)> {
86 let mut in_single = false;
87 let mut in_double = false;
88 let mut split_idx = None;
89
90 for (idx, ch) in line.char_indices() {
91 match ch {
92 '\'' if !in_double => in_single = !in_single,
93 '"' if !in_single => in_double = !in_double,
94 ':' if !in_single && !in_double => {
95 split_idx = Some(idx);
96 break;
97 }
98 _ => {}
99 }
100 }
101
102 let idx = split_idx?;
103 let raw_key = &line[..idx];
104 let raw_value = &line[idx + ':'.len_utf8()..];
105 if raw_key.trim().is_empty() || raw_value.trim().is_empty() {
106 return None;
107 }
108 Some((raw_key, raw_value))
109}
110
111fn split_value_and_comment(raw_value: &str) -> (&str, Option<&str>) {
112 if let Some(idx) = raw_value.find('#') {
113 let (before, after) = raw_value.split_at(idx);
114 if !before.trim().is_empty() {
115 return (before.trim_end_matches([' ', '\t']), Some(after));
116 }
117 }
118 (raw_value, None)
119}
120
121fn split_tag_prefix(text: &str) -> (Option<&str>, &str) {
122 let trimmed = text.trim_start_matches([' ', '\t']);
123 if !trimmed.starts_with("!!") {
124 return (None, text);
125 }
126
127 let rel_start = text.len() - trimmed.len();
128 let rest = &text[rel_start + 2..];
129 let end_rel = rest
130 .char_indices()
131 .find_map(|(i, ch)| (ch == ' ' || ch == '\t').then_some(i))
132 .unwrap_or(rest.len());
133 if end_rel == 0 {
134 return (None, text);
135 }
136
137 let tag_end = rel_start + 2 + end_rel;
138 let tag = &text[rel_start..tag_end];
139 let value = &text[tag_end..];
140 (Some(tag), value)
141}
142
143fn lex_mapping_line_tokens<'a>(
144 line: &'a str,
145 newline: &'a str,
146 current_indent: usize,
147 indent_stack: &mut Vec<usize>,
148 out: &mut Vec<YamlShadowToken<'a>>,
149) -> Option<()> {
150 let line_indent = leading_indent(line);
151 let content = &line[line_indent..];
152
153 if content.trim().is_empty() {
154 if !newline.is_empty() {
155 out.push(YamlShadowToken {
156 kind: YamlShadowTokenKind::Newline,
157 text: newline,
158 });
159 }
160 return Some(());
161 }
162
163 if line_indent > current_indent {
164 indent_stack.push(line_indent);
165 out.push(YamlShadowToken {
166 kind: YamlShadowTokenKind::Indent,
167 text: &line[..line_indent],
168 });
169 } else if line_indent < current_indent {
170 while let Some(last) = indent_stack.last().copied() {
171 if line_indent < last {
172 indent_stack.pop();
173 out.push(YamlShadowToken {
174 kind: YamlShadowTokenKind::Dedent,
175 text: "",
176 });
177 } else {
178 break;
179 }
180 }
181 if indent_stack.last().copied().unwrap_or(0) != line_indent {
182 return None;
183 }
184 }
185
186 if line_indent > 0 {
187 out.push(YamlShadowToken {
188 kind: YamlShadowTokenKind::Whitespace,
189 text: &line[..line_indent],
190 });
191 }
192
193 let (raw_key, raw_value) = parse_raw_mapping_line(content)?;
194
195 let (key_tag, key_text) = split_tag_prefix(raw_key);
196 if let Some(tag) = key_tag {
197 out.push(YamlShadowToken {
198 kind: YamlShadowTokenKind::Tag,
199 text: tag,
200 });
201 let ws_len = leading_indent(key_text);
202 if ws_len > 0 {
203 out.push(YamlShadowToken {
204 kind: YamlShadowTokenKind::Whitespace,
205 text: &key_text[..ws_len],
206 });
207 }
208 out.push(YamlShadowToken {
209 kind: YamlShadowTokenKind::Key,
210 text: &key_text[ws_len..],
211 });
212 } else {
213 out.push(YamlShadowToken {
214 kind: YamlShadowTokenKind::Key,
215 text: raw_key,
216 });
217 }
218
219 out.push(YamlShadowToken {
220 kind: YamlShadowTokenKind::Colon,
221 text: ":",
222 });
223
224 let (value_part, comment_part) = split_value_and_comment(raw_value);
225 let leading_ws_len = leading_indent(value_part);
226 if leading_ws_len > 0 {
227 out.push(YamlShadowToken {
228 kind: YamlShadowTokenKind::Whitespace,
229 text: &value_part[..leading_ws_len],
230 });
231 }
232
233 let scalar_part = &value_part[leading_ws_len..];
234 let (value_tag, value_text) = split_tag_prefix(scalar_part);
235 if let Some(tag) = value_tag {
236 out.push(YamlShadowToken {
237 kind: YamlShadowTokenKind::Tag,
238 text: tag,
239 });
240 let ws_len = leading_indent(value_text);
241 if ws_len > 0 {
242 out.push(YamlShadowToken {
243 kind: YamlShadowTokenKind::Whitespace,
244 text: &value_text[..ws_len],
245 });
246 }
247 out.push(YamlShadowToken {
248 kind: YamlShadowTokenKind::Scalar,
249 text: &value_text[ws_len..],
250 });
251 } else {
252 out.push(YamlShadowToken {
253 kind: YamlShadowTokenKind::Scalar,
254 text: scalar_part,
255 });
256 }
257
258 if let Some(comment) = comment_part {
259 let leading_comment_ws_len = raw_value.len() - comment.len() - value_part.len();
260 if leading_comment_ws_len > 0 {
261 let start = value_part.len();
262 let end = start + leading_comment_ws_len;
263 out.push(YamlShadowToken {
264 kind: YamlShadowTokenKind::Whitespace,
265 text: &raw_value[start..end],
266 });
267 }
268 out.push(YamlShadowToken {
269 kind: YamlShadowTokenKind::Comment,
270 text: comment,
271 });
272 }
273
274 if !newline.is_empty() {
275 out.push(YamlShadowToken {
276 kind: YamlShadowTokenKind::Newline,
277 text: newline,
278 });
279 }
280
281 Some(())
282}
283
284pub fn lex_basic_mapping_tokens(input: &str) -> Option<Vec<YamlShadowToken<'_>>> {
285 if input.is_empty() {
286 return None;
287 }
288
289 let mut tokens = Vec::new();
290 let mut indent_stack = vec![0usize];
291
292 for raw_line in input.split_inclusive('\n') {
293 let (line, newline) = split_line_and_newline(raw_line);
294 let current_indent = indent_stack.last().copied().unwrap_or(0);
295 lex_mapping_line_tokens(
296 line,
297 newline,
298 current_indent,
299 &mut indent_stack,
300 &mut tokens,
301 )?;
302 }
303
304 while indent_stack.len() > 1 {
305 indent_stack.pop();
306 tokens.push(YamlShadowToken {
307 kind: YamlShadowTokenKind::Dedent,
308 text: "",
309 });
310 }
311
312 Some(tokens)
313}
314
315fn emit_block_map<'a>(
316 builder: &mut GreenNodeBuilder<'_>,
317 tokens: &[YamlShadowToken<'a>],
318 i: &mut usize,
319 stop_on_dedent: bool,
320) -> Option<()> {
321 let mut closed_by_dedent = false;
322 while *i < tokens.len() {
323 match tokens[*i].kind {
324 YamlShadowTokenKind::Newline => {
325 builder.token(SyntaxKind::NEWLINE.into(), tokens[*i].text);
326 *i += 1;
327 }
328 YamlShadowTokenKind::Dedent => {
329 if stop_on_dedent {
330 *i += 1;
331 closed_by_dedent = true;
332 break;
333 }
334 return None;
335 }
336 YamlShadowTokenKind::Indent => return None,
337 _ => {
338 builder.start_node(SyntaxKind::YAML_BLOCK_MAP_ENTRY.into());
339 builder.start_node(SyntaxKind::YAML_BLOCK_MAP_KEY.into());
340
341 let mut saw_colon = false;
342 while *i < tokens.len() {
343 match tokens[*i].kind {
344 YamlShadowTokenKind::Key => {
345 builder.token(SyntaxKind::YAML_KEY.into(), tokens[*i].text);
346 *i += 1;
347 }
348 YamlShadowTokenKind::Tag => {
349 builder.token(SyntaxKind::YAML_TAG.into(), tokens[*i].text);
350 *i += 1;
351 }
352 YamlShadowTokenKind::Whitespace => {
353 builder.token(SyntaxKind::WHITESPACE.into(), tokens[*i].text);
354 *i += 1;
355 }
356 YamlShadowTokenKind::Colon => {
357 builder.token(SyntaxKind::YAML_COLON.into(), tokens[*i].text);
358 *i += 1;
359 saw_colon = true;
360 break;
361 }
362 _ => return None,
363 }
364 }
365 if !saw_colon {
366 return None;
367 }
368 builder.finish_node(); builder.start_node(SyntaxKind::YAML_BLOCK_MAP_VALUE.into());
371 while *i < tokens.len() {
372 match tokens[*i].kind {
373 YamlShadowTokenKind::Scalar => {
374 builder.token(SyntaxKind::YAML_SCALAR.into(), tokens[*i].text);
375 *i += 1;
376 }
377 YamlShadowTokenKind::Tag => {
378 builder.token(SyntaxKind::YAML_TAG.into(), tokens[*i].text);
379 *i += 1;
380 }
381 YamlShadowTokenKind::Comment => {
382 builder.token(SyntaxKind::YAML_COMMENT.into(), tokens[*i].text);
383 *i += 1;
384 }
385 YamlShadowTokenKind::Whitespace => {
386 builder.token(SyntaxKind::WHITESPACE.into(), tokens[*i].text);
387 *i += 1;
388 }
389 _ => break,
390 }
391 }
392
393 let mut trailing_newline: Option<&str> = None;
394 if *i < tokens.len() && tokens[*i].kind == YamlShadowTokenKind::Newline {
395 trailing_newline = Some(tokens[*i].text);
396 *i += 1;
397 }
398
399 if *i < tokens.len() && tokens[*i].kind == YamlShadowTokenKind::Indent {
400 *i += 1;
401 builder.start_node(SyntaxKind::YAML_BLOCK_MAP.into());
402 emit_block_map(builder, tokens, i, true)?;
403 builder.finish_node(); }
405
406 builder.finish_node(); if let Some(newline) = trailing_newline {
408 builder.token(SyntaxKind::NEWLINE.into(), newline);
409 }
410 builder.finish_node(); }
412 }
413 }
414
415 if stop_on_dedent && !closed_by_dedent {
416 return None;
417 }
418
419 Some(())
420}
421
422pub fn parse_basic_mapping_tree(input: &str) -> Option<SyntaxNode> {
428 let tokens = lex_basic_mapping_tokens(input)?;
429
430 let mut builder = GreenNodeBuilder::new();
431 builder.start_node(SyntaxKind::DOCUMENT.into());
432 builder.start_node(SyntaxKind::YAML_METADATA_CONTENT.into());
433 builder.start_node(SyntaxKind::YAML_BLOCK_MAP.into());
434 let mut i = 0usize;
435 emit_block_map(&mut builder, &tokens, &mut i, false)?;
436
437 builder.finish_node(); builder.finish_node(); builder.finish_node(); Some(SyntaxNode::new_root(builder.finish()))
441}
442
443pub fn parse_basic_entry(input: &str) -> Option<BasicYamlEntry<'_>> {
448 if input.contains('\n') {
449 return None;
450 }
451
452 let (raw_key, raw_value) = input.split_once(':')?;
453 let key = raw_key.trim();
454 let value = raw_value.trim();
455
456 if key.is_empty() || value.is_empty() {
457 return None;
458 }
459
460 Some(BasicYamlEntry { key, value })
461}
462
463pub fn parse_basic_entry_tree(input: &str) -> Option<SyntaxNode> {
477 parse_basic_entry(input)?;
478 parse_basic_mapping_tree(input)
479}