quillmark_core/document/prescan.rs
1//! Pre-scan of a metadata fence's YAML content to recover features that
2//! serde_saphyr discards.
3//!
4//! Three features are recovered here:
5//!
6//! 1. **Top-level comments.** YAML comments are dropped by the YAML parser.
7//! To round-trip them as [`super::FrontmatterItem::Comment`], we extract them
8//! before parsing.
9//!
10//! 2. **Nested comments.** Comments inside block mappings/sequences are
11//! captured with their structural path (sequence of keys/indices) and an
12//! ordinal indicating where in the container they sit. The emitter
13//! re-injects them at the matching position. See [`NestedComment`].
14//!
15//! 3. **`!fill` tags.** Custom YAML tags are accepted and dropped by
16//! serde_saphyr; the value survives but the tag annotation is lost. We
17//! detect `!fill` on top-level scalar fields, strip the tag from the
18//! cleaned YAML (so serde_saphyr sees a plain scalar), and record a
19//! `fill: true` marker on the resulting `Field` item.
20//!
21//! Other custom tags (`!include`, `!env`, …) are stripped with a
22//! `parse::unsupported_yaml_tag` warning.
23
24use crate::Diagnostic;
25use crate::Severity;
26
27/// One ordered hint extracted from the fence body.
28///
29/// `Comment` stands alone; `Field` captures only the `fill` flag because the
30/// value is produced by serde_saphyr parsing the cleaned text. The matching
31/// YAML key is the lookup key into the parsed map.
32///
33/// `Comment.inline` distinguishes own-line comments (`# text` on a line by
34/// itself) from inline trailing comments (`field: value # text`). Inline
35/// top-level comments always immediately follow their host `Field` in the
36/// item stream; the emitter peeks ahead by one slot to attach them.
37#[derive(Debug, Clone, PartialEq)]
38pub enum PreItem {
39 Field { key: String, fill: bool },
40 Comment { text: String, inline: bool },
41}
42
43/// One segment of a path into the parsed YAML structure.
44#[derive(Debug, Clone, PartialEq, Eq)]
45pub enum CommentPathSegment {
46 Key(String),
47 Index(usize),
48}
49
50/// A comment that appears inside a nested mapping or sequence.
51///
52/// `container_path` locates the immediate parent container.
53///
54/// Position semantics depend on `inline`:
55/// - **Own-line (`inline = false`)**: `position` is the slot ordinal within
56/// the container's child list, ranging `0..=child_count`. The comment is
57/// rendered before the child at this position. `position == child_count`
58/// means "after all children".
59/// - **Inline (`inline = true`)**: `position` is the host child's index,
60/// ranging `0..child_count`. The comment is attached to that child's
61/// trailing line. An inline comment whose host is missing at emit time
62/// (orphan) degrades to an own-line comment at the same indent.
63#[derive(Debug, Clone, PartialEq, Eq)]
64pub struct NestedComment {
65 pub container_path: Vec<CommentPathSegment>,
66 pub position: usize,
67 pub text: String,
68 pub inline: bool,
69}
70
71/// Output of [`prescan_fence_content`].
72#[derive(Debug, Clone, Default)]
73pub struct PreScan {
74 /// YAML text with `!fill` tags stripped and all comment lines removed.
75 /// Suitable for feeding into serde_saphyr.
76 pub cleaned_yaml: String,
77 /// Ordered items discovered at the top level — fields (with fill flags)
78 /// and own-line top-level comments, in source order.
79 pub items: Vec<PreItem>,
80 /// Comments inside nested containers, with structural paths.
81 pub nested_comments: Vec<NestedComment>,
82 /// Warnings produced during the scan.
83 pub warnings: Vec<Diagnostic>,
84 /// Unsupported-fill-target errors. The parser turns these into
85 /// `ParseError::InvalidStructure` rejections (`!fill` on mappings).
86 pub fill_target_errors: Vec<String>,
87}
88
89/// Tracks one open YAML container while scanning lines.
90#[derive(Debug)]
91struct Frame {
92 /// Indent (in columns) of children of this container.
93 indent: usize,
94 /// Path to this container from the fence root.
95 path: Vec<CommentPathSegment>,
96 /// Container kind. `None` until the first child line determines it.
97 kind: Option<FrameKind>,
98 /// Number of children seen so far.
99 child_count: usize,
100}
101
102#[derive(Debug, Clone, Copy, PartialEq, Eq)]
103enum FrameKind {
104 Mapping,
105 Sequence,
106}
107
108/// Scan the body of a YAML metadata fence.
109///
110/// `content` is the text between the opening and closing `---` markers
111/// (exclusive), with leading/trailing whitespace preserved.
112pub fn prescan_fence_content(content: &str) -> PreScan {
113 let mut out = PreScan::default();
114
115 // We operate on the raw text to preserve positions. `lines()` strips
116 // line endings; we rebuild with `\n` which is what serde_saphyr expects.
117 let lines: Vec<&str> = content.split('\n').collect();
118 let mut cleaned_lines: Vec<String> = Vec::with_capacity(lines.len());
119
120 // Stack of open containers. The root frame is the frontmatter mapping
121 // itself; children appear at indent 0.
122 let mut stack: Vec<Frame> = vec![Frame {
123 indent: 0,
124 path: Vec::new(),
125 kind: Some(FrameKind::Mapping),
126 child_count: 0,
127 }];
128
129 for raw_line in &lines {
130 let line = *raw_line;
131 let indent = leading_space_count(line);
132 let trimmed = &line[indent..];
133
134 // Skip blank lines (no structural meaning, no comment).
135 if trimmed.is_empty() {
136 cleaned_lines.push(line.to_string());
137 continue;
138 }
139
140 // Pop frames that this line has dedented out of. A line at indent
141 // `indent` belongs to the deepest frame whose `indent <= indent`.
142 // (Equality means the line is a child at this frame's level.)
143 while let Some(frame) = stack.last() {
144 if frame.indent > indent {
145 stack.pop();
146 } else {
147 break;
148 }
149 }
150
151 // Case 1: own-line comment.
152 if trimmed.starts_with('#') {
153 let text = strip_comment_marker(trimmed);
154
155 // Determine the deepest frame that contains this line.
156 // For a comment at indent N, the containing frame is the one
157 // with the largest indent <= N. The stack is ordered shallow
158 // to deep; the last frame is the deepest. After the dedent
159 // pop above, the top frame's indent is <= indent, which is
160 // what we want.
161 let frame = stack.last().expect("root frame always present");
162
163 if frame.path.is_empty() {
164 // Top-level comment — preserve via PreItem::Comment.
165 out.items.push(PreItem::Comment {
166 text: text.to_string(),
167 inline: false,
168 });
169 } else {
170 out.nested_comments.push(NestedComment {
171 container_path: frame.path.clone(),
172 position: frame.child_count,
173 text: text.to_string(),
174 inline: false,
175 });
176 }
177 // Don't emit the line into the cleaned YAML — serde_saphyr
178 // ignores comments either way, but omitting the line avoids
179 // ambiguity with `!fill` rewriting.
180 continue;
181 }
182
183 // Case 2: sequence item line (`- ...`).
184 if trimmed == "-" || trimmed.starts_with("- ") {
185 // The frame at this indent must be a sequence. If the deepest
186 // frame's indent matches this line's indent, claim it; if it
187 // doesn't, push a fresh sequence frame at this indent under
188 // the deepest container.
189 let frame_idx = ensure_frame_at_indent(&mut stack, indent, FrameKind::Sequence);
190 let frame = &mut stack[frame_idx];
191 let item_index = frame.child_count;
192 frame.child_count += 1;
193 let parent_path: Vec<CommentPathSegment> = frame.path.clone();
194 // Snapshot the item path before borrowing mutably again below.
195 let item_path: Vec<CommentPathSegment> = {
196 let mut p = parent_path.clone();
197 p.push(CommentPathSegment::Index(item_index));
198 p
199 };
200 // Drop frames deeper than this sequence; the new item starts
201 // a fresh nested context.
202 while stack.len() > frame_idx + 1 {
203 stack.pop();
204 }
205
206 // Detach a possible trailing comment on the item line.
207 let after_dash_full = if trimmed == "-" { "" } else { &trimmed[2..] };
208 let (after_dash, trailing_comment) = split_trailing_comment(after_dash_full);
209 let after_dash_trimmed = after_dash.trim_start();
210 let inline_indent_offset = indent + 2 + (after_dash.len() - after_dash_trimmed.len());
211
212 if after_dash_trimmed.is_empty() {
213 // No inline value. Children, if any, will appear on the
214 // following lines with indent > this line's indent. Push a
215 // placeholder frame so when those children arrive, the
216 // sequence-item frame is already on the stack.
217 //
218 // We push a frame with indent = indent + 2; the actual
219 // child kind/indent gets resolved when the next non-empty
220 // line arrives.
221 stack.push(Frame {
222 indent: indent + 2,
223 path: item_path,
224 kind: None,
225 child_count: 0,
226 });
227 } else if split_key(after_dash_trimmed).is_some() {
228 // Inline mapping start (`- key: ...`). The key is the first
229 // child of an implicit mapping whose siblings sit at the
230 // same column as the key.
231 stack.push(Frame {
232 indent: inline_indent_offset,
233 path: item_path,
234 kind: Some(FrameKind::Mapping),
235 child_count: 1,
236 });
237 }
238 // Otherwise: inline scalar value, no further nesting.
239
240 // Rebuild the line with the trailing comment stripped, and
241 // capture it as an inline NestedComment attached to this item.
242 if let Some(c) = trailing_comment {
243 out.nested_comments.push(NestedComment {
244 container_path: parent_path,
245 position: item_index,
246 text: strip_comment_marker(&c).to_string(),
247 inline: true,
248 });
249 let head = format!("{:width$}", "", width = indent);
250 let body = if after_dash.trim_end().is_empty() {
251 "-".to_string()
252 } else {
253 format!("- {}", after_dash.trim_end())
254 };
255 cleaned_lines.push(format!("{}{}", head, body));
256 } else {
257 cleaned_lines.push(line.to_string());
258 }
259 continue;
260 }
261
262 // Case 3: top-level field line with possible `!fill` tag and/or
263 // trailing comment. Top-level only — `is_top_level` mirrors the
264 // pre-existing semantics.
265 let is_top_level = indent == 0;
266 if is_top_level {
267 if let Some((key, after_colon)) = split_key(line) {
268 let (value_part, trailing_comment) = split_trailing_comment(&after_colon);
269
270 let (fill, value_without_tag, had_non_fill_tag, fill_target_err) =
271 inspect_fill_and_tags(&value_part, &key);
272
273 if had_non_fill_tag {
274 out.warnings.push(
275 Diagnostic::new(
276 Severity::Warning,
277 format!(
278 "YAML tag on key `{}` is not supported; the tag has been dropped and the value kept",
279 key
280 ),
281 )
282 .with_code("parse::unsupported_yaml_tag".to_string()),
283 );
284 }
285 if let Some(err) = fill_target_err {
286 out.fill_target_errors.push(err);
287 }
288 if fill && (key == "QUILL" || key == "KIND") {
289 out.fill_target_errors.push(format!(
290 "`!fill` cannot be applied to the sentinel key `{}` — sentinels are routing keys, not data, and must resolve at parse time",
291 key
292 ));
293 }
294
295 out.items.push(PreItem::Field {
296 key: key.clone(),
297 fill,
298 });
299
300 // Update the structural stack for this top-level key.
301 // The root frame is at index 0; children appear at indent 0.
302 let root = &mut stack[0];
303 root.child_count += 1;
304 let key_path = vec![CommentPathSegment::Key(key.clone())];
305
306 // Pop everything but the root.
307 while stack.len() > 1 {
308 stack.pop();
309 }
310
311 // If the value is empty (block style: `key:` followed by
312 // indented children), push a frame so nested comments can
313 // be attached. Otherwise (inline scalar/flow), no nested
314 // children come from this key.
315 if has_empty_inline_value(&value_without_tag) {
316 stack.push(Frame {
317 indent: 2,
318 path: key_path,
319 kind: None,
320 child_count: 0,
321 });
322 }
323
324 // Rebuild the line without the `!fill` tag (and without
325 // the trailing comment, since that goes on its own
326 // line now).
327 let cleaned = format!("{}:{}", key, value_without_tag);
328 cleaned_lines.push(cleaned);
329
330 if let Some(c) = trailing_comment {
331 out.items.push(PreItem::Comment {
332 text: strip_comment_marker(&c).to_string(),
333 inline: true,
334 });
335 }
336
337 continue;
338 }
339 }
340
341 // Case 4: nested key line (`key:` or `key: value`) inside a block
342 // mapping. We recognise simple `key:` patterns; unusual forms fall
343 // through to verbatim pass-through.
344 if let Some((key, after_colon)) = split_key(trimmed) {
345 // The frame at this indent must be a mapping.
346 let frame_idx = ensure_frame_at_indent(&mut stack, indent, FrameKind::Mapping);
347 let frame = &mut stack[frame_idx];
348 let key_index = frame.child_count;
349 frame.child_count += 1;
350 let parent_path: Vec<CommentPathSegment> = frame.path.clone();
351 let key_path: Vec<CommentPathSegment> = {
352 let mut p = parent_path.clone();
353 p.push(CommentPathSegment::Key(key.clone()));
354 p
355 };
356 // Drop frames deeper than this mapping; siblings reset nesting.
357 while stack.len() > frame_idx + 1 {
358 stack.pop();
359 }
360
361 // Detach a possible trailing comment on the line. We keep the
362 // value (sans comment) in the cleaned YAML and capture the
363 // comment as an inline NestedComment attached to this key.
364 let (value_part, trailing_comment) = split_trailing_comment(&after_colon);
365 if let Some(c) = trailing_comment {
366 out.nested_comments.push(NestedComment {
367 container_path: parent_path,
368 position: key_index,
369 text: strip_comment_marker(&c).to_string(),
370 inline: true,
371 });
372 let head = format!("{:width$}", "", width = indent);
373 cleaned_lines.push(format!("{}{}:{}", head, key, value_part));
374 } else {
375 cleaned_lines.push(line.to_string());
376 }
377
378 // If the value is empty (block style) push a frame for nested
379 // children at indent + 2.
380 if has_empty_inline_value(&after_colon) {
381 stack.push(Frame {
382 indent: indent + 2,
383 path: key_path,
384 kind: None,
385 child_count: 0,
386 });
387 }
388 continue;
389 }
390
391 // Everything else: pass through verbatim.
392 cleaned_lines.push(line.to_string());
393 }
394
395 out.cleaned_yaml = cleaned_lines.join("\n");
396 out
397}
398
399/// Ensure the deepest frame on the stack matches the given `indent` and
400/// kind, pushing a new frame if necessary. Returns the index of the matched
401/// or freshly-pushed frame.
402fn ensure_frame_at_indent(stack: &mut Vec<Frame>, indent: usize, kind: FrameKind) -> usize {
403 // After dedent popping, the top frame has `indent <= indent`. If it
404 // matches exactly, claim it. Otherwise, push a new child frame under
405 // it that has the requested indent.
406 let top_idx = stack.len() - 1;
407 let top = &mut stack[top_idx];
408
409 if top.indent == indent {
410 if top.kind.is_none() {
411 top.kind = Some(kind);
412 }
413 return top_idx;
414 }
415
416 // The top frame is shallower (its indent < indent). Push a new frame
417 // at this indent, parented under the top frame. The new frame's path
418 // is a continuation: for a sequence at deeper indent under a mapping,
419 // the path is the same as the parent's `path` (because the sequence
420 // is the value of the parent's most recent key).
421 //
422 // Concretely, when we encounter `- foo` at indent 2 and the stack top
423 // is the root mapping with indent 0, the parent frame's most-recent
424 // child path was already pushed when we saw `key:` in case 3 (we
425 // pushed a placeholder frame at indent 2 with `path = [Key(key)]` and
426 // unknown kind). So usually we won't reach this branch — the
427 // placeholder is already there. This branch is a safety net for
428 // unusual layouts.
429 let parent_path = top.path.clone();
430 stack.push(Frame {
431 indent,
432 path: parent_path,
433 kind: Some(kind),
434 child_count: 0,
435 });
436 stack.len() - 1
437}
438
439/// Strip a YAML comment marker (`# `) from the start of a string.
440///
441/// Strips all leading `#` characters, then one optional space.
442fn strip_comment_marker(raw: &str) -> &str {
443 let after = raw.trim_start_matches('#');
444 after.strip_prefix(' ').unwrap_or(after)
445}
446
447/// Number of leading ASCII spaces. Tabs are not expanded; they don't appear
448/// in canonical Quillmark YAML and would be a separate problem.
449fn leading_space_count(line: &str) -> usize {
450 line.bytes().take_while(|b| *b == b' ').count()
451}
452
453/// `true` when the value portion of a `key:` line is empty (after trimming
454/// whitespace). Trailing comments are ignored. An empty value means the
455/// real value is on subsequent indented lines (block mapping or sequence).
456fn has_empty_inline_value(after_colon: &str) -> bool {
457 let (v, _) = split_trailing_comment(after_colon);
458 v.trim().is_empty()
459}
460
461/// Split a line into `(key, rest_after_colon)`. Returns `None` if the line
462/// does not start with a bare YAML key.
463fn split_key(line: &str) -> Option<(String, String)> {
464 // Identifier-like keys only. YAML allows more, but Quillmark's schema
465 // restricts field names to `[a-zA-Z_][a-zA-Z0-9_]*` (and reserved
466 // uppercase sentinels). Anything more exotic falls through to the
467 // unmodified path and will be parsed (or rejected) by serde_saphyr.
468 let bytes = line.as_bytes();
469 if bytes.is_empty() {
470 return None;
471 }
472 if !(bytes[0].is_ascii_alphabetic() || bytes[0] == b'_') {
473 return None;
474 }
475 let mut i = 1;
476 while i < bytes.len() && (bytes[i].is_ascii_alphanumeric() || bytes[i] == b'_') {
477 i += 1;
478 }
479 if i >= bytes.len() || bytes[i] != b':' {
480 return None;
481 }
482 let key = line[..i].to_string();
483 let rest = line[i + 1..].to_string();
484 Some((key, rest))
485}
486
487/// Split a value string into `(value, trailing_comment)`.
488///
489/// Trailing comments begin with ` #` or `\t#` outside of any quoted string.
490/// This is a simple scanner: it respects `"..."` and `'...'` quoting.
491fn split_trailing_comment(value: &str) -> (String, Option<String>) {
492 let bytes = value.as_bytes();
493 let mut i = 0;
494 let mut prev_was_ws = true; // allow `key:#` edge case to NOT be a comment
495 let mut in_dq = false;
496 let mut in_sq = false;
497 while i < bytes.len() {
498 let b = bytes[i];
499 if in_dq {
500 if b == b'\\' && i + 1 < bytes.len() {
501 i += 2;
502 continue;
503 }
504 if b == b'"' {
505 in_dq = false;
506 }
507 } else if in_sq {
508 if b == b'\'' {
509 in_sq = false;
510 }
511 } else {
512 if b == b'"' {
513 in_dq = true;
514 } else if b == b'\'' {
515 in_sq = true;
516 } else if b == b'#' && prev_was_ws {
517 let v = value[..i].trim_end().to_string();
518 let c = value[i..].to_string();
519 return (v, Some(c));
520 }
521 }
522 prev_was_ws = matches!(b, b' ' | b'\t');
523 i += 1;
524 }
525 (value.to_string(), None)
526}
527
528/// Inspect the value portion of a field line for `!fill` and other tags.
529///
530/// Returns `(fill, value_without_tag, had_other_tag, fill_target_err)`.
531///
532/// - `fill`: `true` when the value starts with `!fill`.
533/// - `value_without_tag`: the same text with the `!fill` tag stripped;
534/// leading whitespace is preserved so YAML parsing still sees a clean
535/// scalar.
536/// - `had_other_tag`: `true` when a non-`!fill` `!tag` was found at the
537/// start of the value. The tag is *not* stripped (serde_saphyr tolerates
538/// and drops unknown tags), so callers get a warning only.
539/// - `fill_target_err`: populated when `!fill` is applied to a mapping
540/// (flow `{...}` or block form). `!fill` on mappings is rejected because
541/// top-level `type: object` is not a supported schema type in Quillmark;
542/// `!fill` on scalars and sequences is allowed.
543fn inspect_fill_and_tags(value: &str, key: &str) -> (bool, String, bool, Option<String>) {
544 let trimmed = value.trim_start();
545 let leading_ws_len = value.len() - trimmed.len();
546
547 // Exactly empty / null (e.g. `key:` with nothing) — not a fill target.
548 if trimmed.is_empty() {
549 return (false, value.to_string(), false, None);
550 }
551
552 // `!fill` alone on the line (bare tag, no value) → placeholder. The
553 // value may be null (no continuation) or a block sequence on the
554 // following indented lines. serde_saphyr produces the actual value.
555 if trimmed == "!fill" {
556 // Replace the tag with nothing; leave the leading whitespace so the
557 // line shape is preserved (serde_saphyr treats `key: ` as null,
558 // and if a block sequence follows on indented lines, it parses as
559 // a sequence).
560 let reconstructed = value[..leading_ws_len].to_string();
561 return (true, reconstructed, false, None);
562 }
563
564 // `!fill <value>` → strip tag, record fill=true.
565 if let Some(rest) = trimmed.strip_prefix("!fill") {
566 // Must be followed by whitespace or end-of-value to count; otherwise
567 // it's `!fillwhatever` which is a non-`!fill` tag.
568 if rest.starts_with(' ') || rest.starts_with('\t') || rest.is_empty() {
569 let rest_trim = rest.trim_start();
570 // Reject flow-mappings (`!fill {...}`); top-level `type: object`
571 // isn't supported by the schema. Flow sequences (`!fill [...]`)
572 // and scalars are allowed.
573 let err = if rest_trim.starts_with('{') {
574 Some(format!(
575 "`!fill` on key `{}` targets a mapping; `!fill` is supported on scalars and sequences only",
576 key
577 ))
578 } else {
579 None
580 };
581 // Reconstruct: one space + the rest (trimmed) so the cleaned
582 // text reads `key: rest`.
583 let reconstructed = if rest_trim.is_empty() {
584 value[..leading_ws_len].to_string()
585 } else {
586 format!(" {}", rest_trim)
587 };
588 return (true, reconstructed, false, err);
589 }
590 }
591
592 // Any other `!tag` prefix is a non-fill custom tag. Leave the value
593 // alone; serde_saphyr will strip the tag.
594 if trimmed.starts_with('!') {
595 return (false, value.to_string(), true, None);
596 }
597
598 (false, value.to_string(), false, None)
599}
600
601#[cfg(test)]
602mod tests {
603 use super::*;
604
605 #[test]
606 fn extracts_own_line_comments() {
607 let input = "# top\ntitle: foo\n# mid\nauthor: bar\n";
608 let out = prescan_fence_content(input);
609 assert_eq!(
610 out.items,
611 vec![
612 PreItem::Comment {
613 text: "top".to_string(),
614 inline: false,
615 },
616 PreItem::Field {
617 key: "title".to_string(),
618 fill: false,
619 },
620 PreItem::Comment {
621 text: "mid".to_string(),
622 inline: false,
623 },
624 PreItem::Field {
625 key: "author".to_string(),
626 fill: false,
627 },
628 ]
629 );
630 assert!(out.nested_comments.is_empty());
631 }
632
633 #[test]
634 fn splits_trailing_comments() {
635 let input = "title: foo # inline\n";
636 let out = prescan_fence_content(input);
637 assert_eq!(
638 out.items,
639 vec![
640 PreItem::Field {
641 key: "title".to_string(),
642 fill: false,
643 },
644 PreItem::Comment {
645 text: "inline".to_string(),
646 inline: true,
647 },
648 ]
649 );
650 assert!(out.cleaned_yaml.contains("title: foo"));
651 assert!(!out.cleaned_yaml.contains("inline"));
652 }
653
654 #[test]
655 fn detects_fill_on_scalar() {
656 let input = "dept: !fill Department\n";
657 let out = prescan_fence_content(input);
658 assert_eq!(
659 out.items,
660 vec![PreItem::Field {
661 key: "dept".to_string(),
662 fill: true,
663 }]
664 );
665 assert!(out.cleaned_yaml.contains("dept: Department"));
666 assert!(!out.cleaned_yaml.contains("!fill"));
667 }
668
669 #[test]
670 fn detects_bare_fill() {
671 let input = "dept: !fill\n";
672 let out = prescan_fence_content(input);
673 assert_eq!(
674 out.items,
675 vec![PreItem::Field {
676 key: "dept".to_string(),
677 fill: true,
678 }]
679 );
680 assert!(!out.cleaned_yaml.contains("!fill"));
681 }
682
683 #[test]
684 fn unknown_tag_warns() {
685 let input = "x: !custom value\n";
686 let out = prescan_fence_content(input);
687 assert!(
688 out.warnings
689 .iter()
690 .any(|w| w.code.as_deref() == Some("parse::unsupported_yaml_tag")),
691 "expected unsupported_yaml_tag warning"
692 );
693 }
694
695 #[test]
696 fn nested_comment_in_sequence_captured() {
697 let input = "arr:\n # before-first\n - a\n # between\n - b\n # after-last\n";
698 let out = prescan_fence_content(input);
699 assert_eq!(
700 out.nested_comments,
701 vec![
702 NestedComment {
703 container_path: vec![CommentPathSegment::Key("arr".to_string())],
704 position: 0,
705 text: "before-first".to_string(),
706 inline: false,
707 },
708 NestedComment {
709 container_path: vec![CommentPathSegment::Key("arr".to_string())],
710 position: 1,
711 text: "between".to_string(),
712 inline: false,
713 },
714 NestedComment {
715 container_path: vec![CommentPathSegment::Key("arr".to_string())],
716 position: 2,
717 text: "after-last".to_string(),
718 inline: false,
719 },
720 ]
721 );
722 assert!(
723 !out.warnings
724 .iter()
725 .any(|w| w.code.as_deref() == Some("parse::comments_in_nested_yaml_dropped")),
726 "no dropped-comment warning expected; nested comments are now preserved"
727 );
728 }
729
730 #[test]
731 fn nested_comment_in_mapping_captured() {
732 let input = "outer:\n # comment\n inner: 1\n";
733 let out = prescan_fence_content(input);
734 assert_eq!(
735 out.nested_comments,
736 vec![NestedComment {
737 container_path: vec![CommentPathSegment::Key("outer".to_string())],
738 position: 0,
739 text: "comment".to_string(),
740 inline: false,
741 }]
742 );
743 }
744
745 #[test]
746 fn deep_nested_comment_path() {
747 let input = "outer:\n inner:\n # deep\n leaf: 1\n";
748 let out = prescan_fence_content(input);
749 assert_eq!(
750 out.nested_comments,
751 vec![NestedComment {
752 container_path: vec![
753 CommentPathSegment::Key("outer".to_string()),
754 CommentPathSegment::Key("inner".to_string()),
755 ],
756 position: 0,
757 text: "deep".to_string(),
758 inline: false,
759 }]
760 );
761 }
762
763 #[test]
764 fn comment_inside_seq_of_maps() {
765 // Each sequence item is a mapping. A comment between keys of the
766 // first item belongs to that item's mapping.
767 let input = "items:\n - name: a\n # inside-first\n val: 1\n - name: b\n";
768 let out = prescan_fence_content(input);
769 assert_eq!(
770 out.nested_comments,
771 vec![NestedComment {
772 container_path: vec![
773 CommentPathSegment::Key("items".to_string()),
774 CommentPathSegment::Index(0),
775 ],
776 position: 1,
777 text: "inside-first".to_string(),
778 inline: false,
779 }]
780 );
781 }
782
783 #[test]
784 fn nested_inline_on_sequence_item() {
785 // `- a # tail` attaches an inline comment to item 0 (host index, not
786 // the slot after).
787 let input = "arr:\n - a # tail\n - b\n";
788 let out = prescan_fence_content(input);
789 assert_eq!(
790 out.nested_comments,
791 vec![NestedComment {
792 container_path: vec![CommentPathSegment::Key("arr".to_string())],
793 position: 0,
794 text: "tail".to_string(),
795 inline: true,
796 }]
797 );
798 assert!(out.cleaned_yaml.contains("- a\n"));
799 assert!(!out.cleaned_yaml.contains("tail"));
800 }
801
802 #[test]
803 fn nested_inline_on_mapping_field() {
804 // `inner: 1 # tail` inside `outer:` attaches inline at host index 0.
805 let input = "outer:\n inner: 1 # tail\n";
806 let out = prescan_fence_content(input);
807 assert_eq!(
808 out.nested_comments,
809 vec![NestedComment {
810 container_path: vec![CommentPathSegment::Key("outer".to_string())],
811 position: 0,
812 text: "tail".to_string(),
813 inline: true,
814 }]
815 );
816 }
817
818 #[test]
819 fn fill_on_flow_sequence_allowed() {
820 let input = "x: !fill [1, 2]\n";
821 let out = prescan_fence_content(input);
822 assert!(
823 out.fill_target_errors.is_empty(),
824 "expected no error; !fill on sequences is supported"
825 );
826 assert_eq!(
827 out.items,
828 vec![PreItem::Field {
829 key: "x".to_string(),
830 fill: true,
831 }]
832 );
833 }
834
835 #[test]
836 fn fill_on_flow_mapping_errors() {
837 let input = "x: !fill {a: 1}\n";
838 let out = prescan_fence_content(input);
839 assert!(
840 !out.fill_target_errors.is_empty(),
841 "expected error; !fill on mappings is rejected"
842 );
843 }
844}