panache_parser/parser/yaml/
events.rs1use crate::syntax::{SyntaxKind, SyntaxNode};
13
14use super::parser::parse_yaml_tree;
15
16pub fn project_events(input: &str) -> Vec<String> {
19 let Some(tree) = parse_yaml_tree(input) else {
20 return Vec::new();
21 };
22
23 let has_explicit_doc_start = tree
24 .descendants_with_tokens()
25 .filter_map(|el| el.into_token())
26 .any(|tok| tok.kind() == SyntaxKind::YAML_DOCUMENT_START);
27 let doc_open = if has_explicit_doc_start {
28 "+DOC ---".to_string()
29 } else {
30 "+DOC".to_string()
31 };
32 let has_explicit_doc_end = tree
33 .descendants_with_tokens()
34 .filter_map(|el| el.into_token())
35 .any(|tok| tok.kind() == SyntaxKind::YAML_DOCUMENT_END);
36 let doc_close = if has_explicit_doc_end {
37 "-DOC ...".to_string()
38 } else {
39 "-DOC".to_string()
40 };
41
42 let has_any_content = tree.descendants().any(|n| {
47 matches!(
48 n.kind(),
49 SyntaxKind::YAML_BLOCK_SEQUENCE_ITEM
50 | SyntaxKind::YAML_BLOCK_MAP_ENTRY
51 | SyntaxKind::YAML_FLOW_MAP
52 | SyntaxKind::YAML_FLOW_SEQUENCE
53 )
54 }) || tree
55 .descendants_with_tokens()
56 .filter_map(|el| el.into_token())
57 .any(|tok| matches!(tok.kind(), SyntaxKind::YAML_SCALAR | SyntaxKind::YAML_TAG));
58 if !has_any_content && !has_explicit_doc_start {
59 return vec!["+STR".to_string(), "-STR".to_string()];
60 }
61
62 if let Some(seq_node) = tree
63 .descendants()
64 .find(|n| n.kind() == SyntaxKind::YAML_BLOCK_SEQUENCE)
65 {
66 let mut events = Vec::new();
67 events.push("+STR".to_string());
68 events.push(doc_open);
69 events.push("+SEQ".to_string());
70 project_block_sequence_items(&seq_node, &mut events);
71 events.push("-SEQ".to_string());
72 events.push(doc_close);
73 events.push("-STR".to_string());
74 return events;
75 }
76
77 let mut values = Vec::new();
78 let mut map_header = "+MAP".to_string();
79 if let Some(root_map) = tree
80 .descendants()
81 .find(|n| n.kind() == SyntaxKind::YAML_BLOCK_MAP)
82 {
83 project_block_map_entries(&root_map, &mut values);
84 }
85
86 if values.is_empty()
87 && let Some(flow_map) = tree
88 .descendants()
89 .find(|n| n.kind() == SyntaxKind::YAML_FLOW_MAP)
90 {
91 map_header = "+MAP {}".to_string();
92 project_flow_map_entries(&flow_map, &mut values);
93 }
94
95 if values.is_empty()
96 && let Some(flow_seq) = tree
97 .descendants()
98 .find(|n| n.kind() == SyntaxKind::YAML_FLOW_SEQUENCE)
99 && let Some(items) = simple_flow_sequence_items(&flow_seq.text().to_string())
100 {
101 let mut seq_events: Vec<String> = items
102 .iter()
103 .map(|item| {
104 if item.starts_with('"') || item.starts_with('\'') {
105 quoted_val_event(item)
106 } else {
107 plain_val_event(item)
108 }
109 })
110 .collect();
111 let mut events = Vec::with_capacity(seq_events.len() + 7);
112 events.push("+STR".to_string());
113 events.push(doc_open);
114 events.push("+SEQ []".to_string());
115 events.append(&mut seq_events);
116 events.push("-SEQ".to_string());
117 events.push(doc_close);
118 events.push("-STR".to_string());
119 return events;
120 }
121
122 let scalar_document_value = if values.is_empty() {
123 let text = tree
124 .descendants_with_tokens()
125 .filter_map(|el| el.into_token())
126 .filter(|tok| tok.kind() == SyntaxKind::YAML_SCALAR)
127 .map(|tok| tok.text().to_string())
128 .collect::<Vec<_>>()
129 .join("");
130 (!text.is_empty()).then_some(text)
131 } else {
132 None
133 };
134
135 if let Some(text) = scalar_document_value {
136 let tag_text = tree
137 .descendants_with_tokens()
138 .filter_map(|el| el.into_token())
139 .find(|tok| tok.kind() == SyntaxKind::YAML_TAG)
140 .map(|tok| tok.text().to_string());
141 let scalar_event = if let Some(tag) = tag_text
142 && let Some(long) = long_tag(&tag)
143 {
144 format!("=VAL {long} :{text}")
145 } else if text.starts_with('"') || text.starts_with('\'') {
146 quoted_val_event(&text)
147 } else {
148 plain_val_event(&text)
149 };
150 return vec![
151 "+STR".to_string(),
152 doc_open.clone(),
153 scalar_event,
154 doc_close,
155 "-STR".to_string(),
156 ];
157 }
158
159 let mut events = Vec::with_capacity(values.len() + 6);
160 events.push("+STR".to_string());
161 events.push(doc_open);
162 events.push(map_header);
163 events.append(&mut values);
164 events.push("-MAP".to_string());
165 events.push(doc_close);
166 events.push("-STR".to_string());
167 events
168}
169
170fn plain_val_event(text: &str) -> String {
171 format!("=VAL :{}", text.replace('\\', "\\\\"))
172}
173
174fn quoted_val_event(text: &str) -> String {
175 if text.starts_with('\'') {
176 let trimmed = text.trim_end_matches('\'');
177 let normalized = trimmed.replace("''", "'").replace('\\', "\\\\");
178 format!("=VAL {normalized}")
179 } else {
180 let trimmed = text.trim_end_matches('"');
181 let mut normalized = String::with_capacity(trimmed.len());
182 let mut chars = trimmed.chars().peekable();
183 while let Some(ch) = chars.next() {
184 if ch != '\\' {
185 normalized.push(ch);
186 continue;
187 }
188
189 let Some(next) = chars.next() else {
190 normalized.push('\\');
191 break;
192 };
193
194 match next {
195 '/' => normalized.push('/'),
196 '"' => normalized.push('"'),
197 other => {
198 normalized.push('\\');
199 normalized.push(other);
200 }
201 }
202 }
203 format!("=VAL {normalized}")
204 }
205}
206
207fn long_tag(tag: &str) -> Option<String> {
208 let builtin: Option<&'static str> = match tag {
209 "!!str" => Some("<tag:yaml.org,2002:str>"),
210 "!!int" => Some("<tag:yaml.org,2002:int>"),
211 "!!bool" => Some("<tag:yaml.org,2002:bool>"),
212 "!!null" => Some("<tag:yaml.org,2002:null>"),
213 "!!float" => Some("<tag:yaml.org,2002:float>"),
214 "!!seq" => Some("<tag:yaml.org,2002:seq>"),
215 "!!map" => Some("<tag:yaml.org,2002:map>"),
216 _ => None,
217 };
218 if let Some(s) = builtin {
219 return Some(s.to_string());
220 }
221 if tag == "!" {
222 return Some("<!>".to_string());
223 }
224 if tag.starts_with('!') && !tag.starts_with("!!") {
225 return Some(format!("<{tag}>"));
226 }
227 None
228}
229
230fn simple_flow_sequence_items(text: &str) -> Option<Vec<String>> {
231 let trimmed = text.trim();
232 let inner = trimmed.strip_prefix('[')?.strip_suffix(']')?;
233 let inner = inner.trim();
234 if inner.is_empty() {
235 return Some(Vec::new());
236 }
237
238 let mut items = Vec::new();
239 let mut start = 0usize;
240 let mut in_single = false;
241 let mut in_double = false;
242 let mut escaped_double = false;
243
244 for (idx, ch) in inner.char_indices() {
245 if in_double {
246 if escaped_double {
247 escaped_double = false;
248 continue;
249 }
250 match ch {
251 '\\' => escaped_double = true,
252 '"' => in_double = false,
253 _ => {}
254 }
255 continue;
256 }
257
258 if in_single {
259 if ch == '\'' {
260 in_single = false;
261 }
262 continue;
263 }
264
265 match ch {
266 '\'' => in_single = true,
267 '"' => in_double = true,
268 ',' => {
269 let item = inner[start..idx].trim();
270 if item.is_empty() {
271 return None;
272 }
273 items.push(item.to_string());
274 start = idx + 1;
275 }
276 _ => {}
277 }
278 }
279
280 let last = inner[start..].trim();
281 if !last.is_empty() {
282 items.push(last.to_string());
283 }
284 Some(items)
285}
286
287fn escape_block_scalar_text(text: &str) -> String {
288 let mut out = String::with_capacity(text.len());
289 for ch in text.chars() {
290 match ch {
291 '\\' => out.push_str("\\\\"),
292 '\n' => out.push_str("\\n"),
293 '\t' => out.push_str("\\t"),
294 '\r' => out.push_str("\\r"),
295 other => out.push(other),
296 }
297 }
298 out
299}
300
301fn extract_block_scalar_body(value_node: &SyntaxNode) -> Option<(char, String)> {
305 let tokens: Vec<_> = value_node
306 .descendants_with_tokens()
307 .filter_map(|el| el.into_token())
308 .filter(|tok| matches!(tok.kind(), SyntaxKind::YAML_SCALAR | SyntaxKind::NEWLINE))
309 .collect();
310 let first = tokens.first()?;
311 if first.kind() != SyntaxKind::YAML_SCALAR {
312 return None;
313 }
314 let indicator = match first.text() {
315 "|" => '|',
316 ">" => '>',
317 _ => return None,
318 };
319
320 let mut raw = String::new();
321 let mut seen_header = false;
322 let mut skipped_header_newline = false;
323 for tok in tokens.iter().skip(1) {
324 if !seen_header && !skipped_header_newline && tok.kind() == SyntaxKind::NEWLINE {
325 skipped_header_newline = true;
326 seen_header = true;
327 continue;
328 }
329 raw.push_str(tok.text());
330 }
331
332 let mut lines: Vec<&str> = raw.split('\n').collect();
333 if lines.last().is_some_and(|s| s.is_empty()) {
334 lines.pop();
335 }
336
337 let content_indent = lines
338 .iter()
339 .filter(|l| !l.trim().is_empty())
340 .map(|l| l.chars().take_while(|c| *c == ' ').count())
341 .min()
342 .unwrap_or(0);
343
344 let stripped: Vec<String> = lines
345 .iter()
346 .map(|l| {
347 if l.len() >= content_indent {
348 l[content_indent..].to_string()
349 } else {
350 String::new()
351 }
352 })
353 .collect();
354
355 let folded = match indicator {
356 '|' => stripped.join("\n"),
357 '>' => {
358 let mut result = String::new();
359 let mut last_blank = false;
360 for (idx, line) in stripped.iter().enumerate() {
361 if line.is_empty() {
362 result.push('\n');
363 last_blank = true;
364 } else {
365 if idx > 0 && !last_blank {
366 result.push(' ');
367 }
368 result.push_str(line);
369 last_blank = false;
370 }
371 }
372 result
373 }
374 _ => unreachable!(),
375 };
376
377 let trimmed = folded.trim_end_matches('\n');
378 let body = if trimmed.is_empty() {
379 String::new()
380 } else {
381 format!("{trimmed}\n")
382 };
383 Some((indicator, body))
384}
385
386fn fold_plain_scalar(text: &str) -> String {
387 let mut pieces = Vec::new();
388 for line in text.split('\n') {
389 let trimmed = line.trim();
390 if !trimmed.is_empty() {
391 pieces.push(trimmed.to_string());
392 }
393 }
394 if pieces.is_empty() {
395 return String::new();
396 }
397 pieces.join(" ")
398}
399
400fn project_flow_map_entries(flow_map: &SyntaxNode, out: &mut Vec<String>) {
401 for entry in flow_map
402 .children()
403 .filter(|n| n.kind() == SyntaxKind::YAML_FLOW_MAP_ENTRY)
404 {
405 let key_node = entry
406 .children()
407 .find(|n| n.kind() == SyntaxKind::YAML_FLOW_MAP_KEY)
408 .expect("flow map key");
409 let value_node = entry
410 .children()
411 .find(|n| n.kind() == SyntaxKind::YAML_FLOW_MAP_VALUE)
412 .expect("flow map value");
413
414 let has_explicit_colon = key_node
415 .children_with_tokens()
416 .filter_map(|el| el.into_token())
417 .any(|tok| tok.kind() == SyntaxKind::YAML_COLON);
418
419 let raw_key = key_node
420 .descendants_with_tokens()
421 .filter_map(|el| el.into_token())
422 .filter(|tok| matches!(tok.kind(), SyntaxKind::YAML_SCALAR | SyntaxKind::YAML_KEY))
423 .map(|tok| tok.text().to_string())
424 .collect::<Vec<_>>()
425 .join("");
426 let raw_value = value_node
427 .descendants_with_tokens()
428 .filter_map(|el| el.into_token())
429 .filter(|tok| tok.kind() == SyntaxKind::YAML_SCALAR)
430 .map(|tok| tok.text().to_string())
431 .collect::<Vec<_>>()
432 .join("");
433
434 if has_explicit_colon {
435 out.push(plain_val_event(&fold_plain_scalar(&raw_key)));
436 out.push(plain_val_event(&fold_plain_scalar(&raw_value)));
437 } else {
438 let combined = format!("{raw_key}{raw_value}");
439 out.push(plain_val_event(&fold_plain_scalar(&combined)));
440 out.push("=VAL :".to_string());
441 }
442 }
443}
444
445fn project_block_sequence_items(seq_node: &SyntaxNode, out: &mut Vec<String>) {
446 for item in seq_node
447 .children()
448 .filter(|n| n.kind() == SyntaxKind::YAML_BLOCK_SEQUENCE_ITEM)
449 {
450 if let Some(nested_seq) = item
451 .children()
452 .find(|n| n.kind() == SyntaxKind::YAML_BLOCK_SEQUENCE)
453 {
454 out.push("+SEQ".to_string());
455 project_block_sequence_items(&nested_seq, out);
456 out.push("-SEQ".to_string());
457 continue;
458 }
459 if let Some(nested_map) = item
460 .children()
461 .find(|n| n.kind() == SyntaxKind::YAML_BLOCK_MAP)
462 {
463 out.push("+MAP".to_string());
464 project_block_map_entries(&nested_map, out);
465 out.push("-MAP".to_string());
466 continue;
467 }
468 if let Some(flow_seq) = item
469 .children()
470 .find(|n| n.kind() == SyntaxKind::YAML_FLOW_SEQUENCE)
471 {
472 let flow_text = flow_seq.text().to_string();
473 if let Some(flow_items) = simple_flow_sequence_items(&flow_text) {
474 out.push("+SEQ []".to_string());
475 for value in flow_items {
476 if value.starts_with('"') || value.starts_with('\'') {
477 out.push(quoted_val_event(&value));
478 } else {
479 out.push(plain_val_event(&value));
480 }
481 }
482 out.push("-SEQ".to_string());
483 continue;
484 }
485 }
486 if let Some(flow_map) = item
487 .children()
488 .find(|n| n.kind() == SyntaxKind::YAML_FLOW_MAP)
489 {
490 out.push("+MAP {}".to_string());
491 project_flow_map_entries(&flow_map, out);
492 out.push("-MAP".to_string());
493 continue;
494 }
495 let item_tag = item
496 .descendants_with_tokens()
497 .filter_map(|el| el.into_token())
498 .find(|tok| tok.kind() == SyntaxKind::YAML_TAG)
499 .map(|tok| tok.text().to_string());
500 let scalar_text = item
501 .descendants_with_tokens()
502 .filter_map(|el| el.into_token())
503 .filter(|tok| tok.kind() == SyntaxKind::YAML_SCALAR)
504 .map(|tok| tok.text().to_string())
505 .collect::<Vec<_>>()
506 .join("");
507 let scalar_trimmed = scalar_text.trim_end();
508 let event = if let Some(tag) = item_tag
509 && let Some(long) = long_tag(&tag)
510 {
511 format!("=VAL {long} :{scalar_text}")
512 } else if let Some(rest) = scalar_trimmed.strip_prefix('&') {
513 if let Some((anchor, value)) = rest.split_once(' ') {
514 format!("=VAL &{anchor} :{value}")
515 } else {
516 format!("=VAL &{rest} :")
517 }
518 } else if scalar_trimmed.starts_with('*') {
519 format!("=ALI {scalar_trimmed}")
520 } else if scalar_text.starts_with('"') || scalar_text.starts_with('\'') {
521 quoted_val_event(&scalar_text)
522 } else {
523 plain_val_event(&scalar_text)
524 };
525 out.push(event);
526 }
527}
528
529fn project_block_map_entries(map_node: &SyntaxNode, out: &mut Vec<String>) {
530 for entry in map_node
531 .children()
532 .filter(|n| n.kind() == SyntaxKind::YAML_BLOCK_MAP_ENTRY)
533 {
534 let key_node = entry
535 .children()
536 .find(|n| n.kind() == SyntaxKind::YAML_BLOCK_MAP_KEY)
537 .expect("key node");
538 let value_node = entry
539 .children()
540 .find(|n| n.kind() == SyntaxKind::YAML_BLOCK_MAP_VALUE)
541 .expect("value node");
542
543 let key_tag = key_node
544 .children_with_tokens()
545 .filter_map(|el| el.into_token())
546 .find(|tok| tok.kind() == SyntaxKind::YAML_TAG)
547 .map(|tok| tok.text().to_string());
548 let key_text = key_node
549 .children_with_tokens()
550 .filter_map(|el| el.into_token())
551 .find(|tok| tok.kind() == SyntaxKind::YAML_KEY)
552 .map(|tok| tok.text().to_string())
553 .expect("key token");
554
555 let key_event = if let Some(tag) = key_tag {
556 if let Some(long) = long_tag(&tag) {
557 format!("=VAL {long} :{key_text}")
558 } else {
559 plain_val_event(&key_text)
560 }
561 } else if let Some(rest) = key_text.strip_prefix('&') {
562 if let Some((anchor, value)) = rest.split_once(' ') {
563 format!("=VAL &{} :{}", anchor, value)
564 } else {
565 format!("=VAL &{} :", rest)
566 }
567 } else if key_text.starts_with('"') || key_text.starts_with('\'') {
568 quoted_val_event(&key_text)
569 } else if key_text.starts_with('*') {
570 format!("=ALI {}", key_text.trim_end())
571 } else {
572 plain_val_event(&key_text)
573 };
574 out.push(key_event);
575
576 if let Some(nested_map) = value_node
577 .children()
578 .find(|n| n.kind() == SyntaxKind::YAML_BLOCK_MAP)
579 {
580 out.push("+MAP".to_string());
581 project_block_map_entries(&nested_map, out);
582 out.push("-MAP".to_string());
583 continue;
584 }
585
586 if let Some(flow_map) = value_node
587 .children()
588 .find(|n| n.kind() == SyntaxKind::YAML_FLOW_MAP)
589 {
590 out.push("+MAP {}".to_string());
591 project_flow_map_entries(&flow_map, out);
592 out.push("-MAP".to_string());
593 continue;
594 }
595
596 if let Some((indicator, body)) = extract_block_scalar_body(&value_node) {
597 let escaped = escape_block_scalar_text(&body);
598 out.push(format!("=VAL {indicator}{escaped}"));
599 continue;
600 }
601
602 let value_tag = value_node
603 .children_with_tokens()
604 .filter_map(|el| el.into_token())
605 .find(|tok| tok.kind() == SyntaxKind::YAML_TAG)
606 .map(|tok| tok.text().to_string());
607 let value_text = value_node
608 .descendants_with_tokens()
609 .filter_map(|el| el.into_token())
610 .filter(|tok| tok.kind() == SyntaxKind::YAML_SCALAR)
611 .map(|tok| tok.text().to_string())
612 .collect::<Vec<_>>()
613 .join("");
614
615 if value_tag.is_none()
616 && let Some(items) = simple_flow_sequence_items(&value_text)
617 {
618 out.push("+SEQ []".to_string());
619 for item in items {
620 if item.starts_with('"') || item.starts_with('\'') {
621 out.push(quoted_val_event(&item));
622 } else {
623 out.push(plain_val_event(&item));
624 }
625 }
626 out.push("-SEQ".to_string());
627 } else if value_text.is_empty() {
628 out.push("=VAL :".to_string());
629 } else {
630 let value_event = if let Some(tag) = value_tag {
631 if let Some(long) = long_tag(&tag) {
632 if let Some(rest) = value_text.strip_prefix('&') {
633 if let Some((anchor, tail)) = rest.split_once(' ') {
634 format!("=VAL &{anchor} {long} :{tail}")
635 } else {
636 format!("=VAL &{rest} {long} :")
637 }
638 } else {
639 format!("=VAL {long} :{value_text}")
640 }
641 } else {
642 plain_val_event(&value_text)
643 }
644 } else if value_text.starts_with('"') || value_text.starts_with('\'') {
645 quoted_val_event(&value_text)
646 } else if let Some(rest) = value_text.strip_prefix('&') {
647 if let Some((anchor, value)) = rest.split_once(' ') {
648 format!("=VAL &{} :{}", anchor, value)
649 } else {
650 format!("=VAL &{} :", rest)
651 }
652 } else if value_text.starts_with('*') {
653 format!("=ALI {value_text}")
654 } else {
655 plain_val_event(&value_text)
656 };
657 out.push(value_event);
658 }
659 }
660}