quillmark_core/document/prescan.rs
1//! Pre-scan of a metadata fence's YAML content to recover features that
2//! serde_saphyr discards.
3//!
4//! Three features are recovered here:
5//!
6//! 1. **Top-level comments.** YAML comments are dropped by the YAML parser.
7//! To round-trip them as [`super::FrontmatterItem::Comment`], we extract them
8//! before parsing.
9//!
10//! 2. **Nested comments.** Comments inside block mappings/sequences are
11//! captured with their structural path (sequence of keys/indices) and an
12//! ordinal indicating where in the container they sit. The emitter
13//! re-injects them at the matching position. See [`NestedComment`].
14//!
15//! 3. **`!fill` tags.** Custom YAML tags are accepted and dropped by
16//! serde_saphyr; the value survives but the tag annotation is lost. We
17//! detect `!fill` on top-level scalar fields, strip the tag from the
18//! cleaned YAML (so serde_saphyr sees a plain scalar), and record a
19//! `fill: true` marker on the resulting `Field` item.
20//!
21//! Other custom tags (`!include`, `!env`, …) are stripped with a
22//! `parse::unsupported_yaml_tag` warning.
23
24use crate::Diagnostic;
25use crate::Severity;
26
27/// One ordered hint extracted from the fence body.
28///
29/// `Comment` stands alone; `Field` captures only the `fill` flag because the
30/// value is produced by serde_saphyr parsing the cleaned text. The matching
31/// YAML key is the lookup key into the parsed map.
32///
33/// `Comment.inline` distinguishes own-line comments (`# text` on a line by
34/// itself) from inline trailing comments (`field: value # text`). Inline
35/// top-level comments always immediately follow their host `Field` in the
36/// item stream; the emitter peeks ahead by one slot to attach them.
37#[derive(Debug, Clone, PartialEq)]
38pub enum PreItem {
39 Field { key: String, fill: bool },
40 Comment { text: String, inline: bool },
41}
42
43/// One segment of a path into the parsed YAML structure.
44#[derive(Debug, Clone, PartialEq, Eq)]
45pub enum CommentPathSegment {
46 Key(String),
47 Index(usize),
48}
49
50/// A comment that appears inside a nested mapping or sequence.
51///
52/// `container_path` locates the immediate parent container.
53///
54/// Position semantics depend on `inline`:
55/// - **Own-line (`inline = false`)**: `position` is the slot ordinal within
56/// the container's child list, ranging `0..=child_count`. The comment is
57/// rendered before the child at this position. `position == child_count`
58/// means "after all children".
59/// - **Inline (`inline = true`)**: `position` is the host child's index,
60/// ranging `0..child_count`. The comment is attached to that child's
61/// trailing line. An inline comment whose host is missing at emit time
62/// (orphan) degrades to an own-line comment at the same indent.
63#[derive(Debug, Clone, PartialEq, Eq)]
64pub struct NestedComment {
65 pub container_path: Vec<CommentPathSegment>,
66 pub position: usize,
67 pub text: String,
68 pub inline: bool,
69}
70
71/// Output of [`prescan_fence_content`].
72#[derive(Debug, Clone, Default)]
73pub struct PreScan {
74 /// YAML text with `!fill` tags stripped and all comment lines removed.
75 /// Suitable for feeding into serde_saphyr.
76 pub cleaned_yaml: String,
77 /// Ordered items discovered at the top level — fields (with fill flags)
78 /// and own-line top-level comments, in source order.
79 pub items: Vec<PreItem>,
80 /// Comments inside nested containers, with structural paths.
81 pub nested_comments: Vec<NestedComment>,
82 /// Warnings produced during the scan.
83 pub warnings: Vec<Diagnostic>,
84 /// Unsupported-fill-target errors. The parser turns these into
85 /// `ParseError::InvalidStructure` rejections (`!fill` on mappings).
86 pub fill_target_errors: Vec<String>,
87}
88
89/// Tracks one open YAML container while scanning lines.
90#[derive(Debug)]
91struct Frame {
92 /// Indent (in columns) of children of this container.
93 indent: usize,
94 /// Path to this container from the fence root.
95 path: Vec<CommentPathSegment>,
96 /// Container kind. `None` until the first child line determines it.
97 kind: Option<FrameKind>,
98 /// Number of children seen so far.
99 child_count: usize,
100}
101
102#[derive(Debug, Clone, Copy, PartialEq, Eq)]
103enum FrameKind {
104 Mapping,
105 Sequence,
106}
107
108/// Scan the body of a YAML metadata fence.
109///
110/// `content` is the text between the opening and closing `---` markers
111/// (exclusive), with leading/trailing whitespace preserved.
112pub fn prescan_fence_content(content: &str) -> PreScan {
113 let mut out = PreScan::default();
114
115 // We operate on the raw text to preserve positions. `lines()` strips
116 // line endings; we rebuild with `\n` which is what serde_saphyr expects.
117 let lines: Vec<&str> = content.split('\n').collect();
118 let mut cleaned_lines: Vec<String> = Vec::with_capacity(lines.len());
119
120 // Stack of open containers. The root frame is the frontmatter mapping
121 // itself; children appear at indent 0.
122 let mut stack: Vec<Frame> = vec![Frame {
123 indent: 0,
124 path: Vec::new(),
125 kind: Some(FrameKind::Mapping),
126 child_count: 0,
127 }];
128
129 for raw_line in &lines {
130 let line = *raw_line;
131 let indent = leading_space_count(line);
132 let trimmed = &line[indent..];
133
134 // Skip blank lines (no structural meaning, no comment).
135 if trimmed.is_empty() {
136 cleaned_lines.push(line.to_string());
137 continue;
138 }
139
140 // Pop frames that this line has dedented out of. A line at indent
141 // `indent` belongs to the deepest frame whose `indent <= indent`.
142 // (Equality means the line is a child at this frame's level.)
143 while let Some(frame) = stack.last() {
144 if frame.indent > indent {
145 stack.pop();
146 } else {
147 break;
148 }
149 }
150
151 // Case 1: own-line comment.
152 if trimmed.starts_with('#') {
153 let text = strip_comment_marker(trimmed);
154
155 // Determine the deepest frame that contains this line.
156 // For a comment at indent N, the containing frame is the one
157 // with the largest indent <= N. The stack is ordered shallow
158 // to deep; the last frame is the deepest. After the dedent
159 // pop above, the top frame's indent is <= indent, which is
160 // what we want.
161 let frame = stack.last().expect("root frame always present");
162
163 if frame.path.is_empty() {
164 // Top-level comment — preserve via PreItem::Comment.
165 out.items.push(PreItem::Comment {
166 text: text.to_string(),
167 inline: false,
168 });
169 } else {
170 out.nested_comments.push(NestedComment {
171 container_path: frame.path.clone(),
172 position: frame.child_count,
173 text: text.to_string(),
174 inline: false,
175 });
176 }
177 // Don't emit the line into the cleaned YAML — serde_saphyr
178 // ignores comments either way, but omitting the line avoids
179 // ambiguity with `!fill` rewriting.
180 continue;
181 }
182
183 // Case 2: sequence item line (`- ...`).
184 if trimmed == "-" || trimmed.starts_with("- ") {
185 // The frame at this indent must be a sequence. If the deepest
186 // frame's indent matches this line's indent, claim it; if it
187 // doesn't, push a fresh sequence frame at this indent under
188 // the deepest container.
189 let frame_idx = ensure_frame_at_indent(&mut stack, indent, FrameKind::Sequence);
190 let frame = &mut stack[frame_idx];
191 let item_index = frame.child_count;
192 frame.child_count += 1;
193 let parent_path: Vec<CommentPathSegment> = frame.path.clone();
194 // Snapshot the item path before borrowing mutably again below.
195 let item_path: Vec<CommentPathSegment> = {
196 let mut p = parent_path.clone();
197 p.push(CommentPathSegment::Index(item_index));
198 p
199 };
200 // Drop frames deeper than this sequence; the new item starts
201 // a fresh nested context.
202 while stack.len() > frame_idx + 1 {
203 stack.pop();
204 }
205
206 // Detach a possible trailing comment on the item line.
207 let after_dash_full = if trimmed == "-" { "" } else { &trimmed[2..] };
208 let (after_dash, trailing_comment) = split_trailing_comment(after_dash_full);
209 let after_dash_trimmed = after_dash.trim_start();
210 let inline_indent_offset = indent + 2 + (after_dash.len() - after_dash_trimmed.len());
211
212 if after_dash_trimmed.is_empty() {
213 // No inline value. Children, if any, will appear on the
214 // following lines with indent > this line's indent. Push a
215 // placeholder frame so when those children arrive, the
216 // sequence-item frame is already on the stack.
217 //
218 // We push a frame with indent = indent + 2; the actual
219 // child kind/indent gets resolved when the next non-empty
220 // line arrives.
221 stack.push(Frame {
222 indent: indent + 2,
223 path: item_path,
224 kind: None,
225 child_count: 0,
226 });
227 } else if split_key(after_dash_trimmed).is_some() {
228 // Inline mapping start (`- key: ...`). The key is the first
229 // child of an implicit mapping whose siblings sit at the
230 // same column as the key.
231 stack.push(Frame {
232 indent: inline_indent_offset,
233 path: item_path,
234 kind: Some(FrameKind::Mapping),
235 child_count: 1,
236 });
237 }
238 // Otherwise: inline scalar value, no further nesting.
239
240 // Rebuild the line with the trailing comment stripped, and
241 // capture it as an inline NestedComment attached to this item.
242 if let Some(c) = trailing_comment {
243 out.nested_comments.push(NestedComment {
244 container_path: parent_path,
245 position: item_index,
246 text: strip_comment_marker(&c).to_string(),
247 inline: true,
248 });
249 let head = format!("{:width$}", "", width = indent);
250 let body = if after_dash.trim_end().is_empty() {
251 "-".to_string()
252 } else {
253 format!("- {}", after_dash.trim_end())
254 };
255 cleaned_lines.push(format!("{}{}", head, body));
256 } else {
257 cleaned_lines.push(line.to_string());
258 }
259 continue;
260 }
261
262 // Case 3: top-level field line with possible `!fill` tag and/or
263 // trailing comment. Top-level only — `is_top_level` mirrors the
264 // pre-existing semantics.
265 let is_top_level = indent == 0;
266 if is_top_level {
267 if let Some((key, after_colon)) = split_key(line) {
268 let (value_part, trailing_comment) = split_trailing_comment(&after_colon);
269
270 let (fill, value_without_tag, had_non_fill_tag, fill_target_err) =
271 inspect_fill_and_tags(&value_part, &key);
272
273 if had_non_fill_tag {
274 out.warnings.push(
275 Diagnostic::new(
276 Severity::Warning,
277 format!(
278 "YAML tag on key `{}` is not supported; the tag has been dropped and the value kept",
279 key
280 ),
281 )
282 .with_code("parse::unsupported_yaml_tag".to_string()),
283 );
284 }
285 if let Some(err) = fill_target_err {
286 out.fill_target_errors.push(err);
287 }
288
289 out.items.push(PreItem::Field {
290 key: key.clone(),
291 fill,
292 });
293
294 // Update the structural stack for this top-level key.
295 // The root frame is at index 0; children appear at indent 0.
296 let root = &mut stack[0];
297 root.child_count += 1;
298 let key_path = vec![CommentPathSegment::Key(key.clone())];
299
300 // Pop everything but the root.
301 while stack.len() > 1 {
302 stack.pop();
303 }
304
305 // If the value is empty (block style: `key:` followed by
306 // indented children), push a frame so nested comments can
307 // be attached. Otherwise (inline scalar/flow), no nested
308 // children come from this key.
309 if has_empty_inline_value(&value_without_tag) {
310 stack.push(Frame {
311 indent: 2,
312 path: key_path,
313 kind: None,
314 child_count: 0,
315 });
316 }
317
318 // Rebuild the line without the `!fill` tag (and without
319 // the trailing comment, since that goes on its own
320 // line now).
321 let cleaned = format!("{}:{}", key, value_without_tag);
322 cleaned_lines.push(cleaned);
323
324 if let Some(c) = trailing_comment {
325 out.items.push(PreItem::Comment {
326 text: strip_comment_marker(&c).to_string(),
327 inline: true,
328 });
329 }
330
331 continue;
332 }
333 }
334
335 // Case 4: nested key line (`key:` or `key: value`) inside a block
336 // mapping. We recognise simple `key:` patterns; unusual forms fall
337 // through to verbatim pass-through.
338 if let Some((key, after_colon)) = split_key(trimmed) {
339 // The frame at this indent must be a mapping.
340 let frame_idx = ensure_frame_at_indent(&mut stack, indent, FrameKind::Mapping);
341 let frame = &mut stack[frame_idx];
342 let key_index = frame.child_count;
343 frame.child_count += 1;
344 let parent_path: Vec<CommentPathSegment> = frame.path.clone();
345 let key_path: Vec<CommentPathSegment> = {
346 let mut p = parent_path.clone();
347 p.push(CommentPathSegment::Key(key.clone()));
348 p
349 };
350 // Drop frames deeper than this mapping; siblings reset nesting.
351 while stack.len() > frame_idx + 1 {
352 stack.pop();
353 }
354
355 // Detach a possible trailing comment on the line. We keep the
356 // value (sans comment) in the cleaned YAML and capture the
357 // comment as an inline NestedComment attached to this key.
358 let (value_part, trailing_comment) = split_trailing_comment(&after_colon);
359 if let Some(c) = trailing_comment {
360 out.nested_comments.push(NestedComment {
361 container_path: parent_path,
362 position: key_index,
363 text: strip_comment_marker(&c).to_string(),
364 inline: true,
365 });
366 let head = format!("{:width$}", "", width = indent);
367 cleaned_lines.push(format!("{}{}:{}", head, key, value_part));
368 } else {
369 cleaned_lines.push(line.to_string());
370 }
371
372 // If the value is empty (block style) push a frame for nested
373 // children at indent + 2.
374 if has_empty_inline_value(&after_colon) {
375 stack.push(Frame {
376 indent: indent + 2,
377 path: key_path,
378 kind: None,
379 child_count: 0,
380 });
381 }
382 continue;
383 }
384
385 // Everything else: pass through verbatim.
386 cleaned_lines.push(line.to_string());
387 }
388
389 out.cleaned_yaml = cleaned_lines.join("\n");
390 out
391}
392
393/// Ensure the deepest frame on the stack matches the given `indent` and
394/// kind, pushing a new frame if necessary. Returns the index of the matched
395/// or freshly-pushed frame.
396fn ensure_frame_at_indent(stack: &mut Vec<Frame>, indent: usize, kind: FrameKind) -> usize {
397 // After dedent popping, the top frame has `indent <= indent`. If it
398 // matches exactly, claim it. Otherwise, push a new child frame under
399 // it that has the requested indent.
400 let top_idx = stack.len() - 1;
401 let top = &mut stack[top_idx];
402
403 if top.indent == indent {
404 if top.kind.is_none() {
405 top.kind = Some(kind);
406 }
407 return top_idx;
408 }
409
410 // The top frame is shallower (its indent < indent). Push a new frame
411 // at this indent, parented under the top frame. The new frame's path
412 // is a continuation: for a sequence at deeper indent under a mapping,
413 // the path is the same as the parent's `path` (because the sequence
414 // is the value of the parent's most recent key).
415 //
416 // Concretely, when we encounter `- foo` at indent 2 and the stack top
417 // is the root mapping with indent 0, the parent frame's most-recent
418 // child path was already pushed when we saw `key:` in case 3 (we
419 // pushed a placeholder frame at indent 2 with `path = [Key(key)]` and
420 // unknown kind). So usually we won't reach this branch — the
421 // placeholder is already there. This branch is a safety net for
422 // unusual layouts.
423 let parent_path = top.path.clone();
424 stack.push(Frame {
425 indent,
426 path: parent_path,
427 kind: Some(kind),
428 child_count: 0,
429 });
430 stack.len() - 1
431}
432
433/// Strip a YAML comment marker (`# `) from the start of a string.
434///
435/// Strips all leading `#` characters, then one optional space.
436fn strip_comment_marker(raw: &str) -> &str {
437 let after = raw.trim_start_matches('#');
438 after.strip_prefix(' ').unwrap_or(after)
439}
440
441/// Number of leading ASCII spaces. Tabs are not expanded; they don't appear
442/// in canonical Quillmark YAML and would be a separate problem.
443fn leading_space_count(line: &str) -> usize {
444 line.bytes().take_while(|b| *b == b' ').count()
445}
446
447/// `true` when the value portion of a `key:` line is empty (after trimming
448/// whitespace). Trailing comments are ignored. An empty value means the
449/// real value is on subsequent indented lines (block mapping or sequence).
450fn has_empty_inline_value(after_colon: &str) -> bool {
451 let (v, _) = split_trailing_comment(after_colon);
452 v.trim().is_empty()
453}
454
455/// Split a line into `(key, rest_after_colon)`. Returns `None` if the line
456/// does not start with a bare YAML key.
457fn split_key(line: &str) -> Option<(String, String)> {
458 // Identifier-like keys only. YAML allows more, but Quillmark's schema
459 // restricts field names to `[a-zA-Z_][a-zA-Z0-9_]*` (and reserved
460 // uppercase sentinels). Anything more exotic falls through to the
461 // unmodified path and will be parsed (or rejected) by serde_saphyr.
462 let bytes = line.as_bytes();
463 if bytes.is_empty() {
464 return None;
465 }
466 if !(bytes[0].is_ascii_alphabetic() || bytes[0] == b'_') {
467 return None;
468 }
469 let mut i = 1;
470 while i < bytes.len() && (bytes[i].is_ascii_alphanumeric() || bytes[i] == b'_') {
471 i += 1;
472 }
473 if i >= bytes.len() || bytes[i] != b':' {
474 return None;
475 }
476 let key = line[..i].to_string();
477 let rest = line[i + 1..].to_string();
478 Some((key, rest))
479}
480
481/// Split a value string into `(value, trailing_comment)`.
482///
483/// Trailing comments begin with ` #` or `\t#` outside of any quoted string.
484/// This is a simple scanner: it respects `"..."` and `'...'` quoting.
485fn split_trailing_comment(value: &str) -> (String, Option<String>) {
486 let bytes = value.as_bytes();
487 let mut i = 0;
488 let mut prev_was_ws = true; // allow `key:#` edge case to NOT be a comment
489 let mut in_dq = false;
490 let mut in_sq = false;
491 while i < bytes.len() {
492 let b = bytes[i];
493 if in_dq {
494 if b == b'\\' && i + 1 < bytes.len() {
495 i += 2;
496 continue;
497 }
498 if b == b'"' {
499 in_dq = false;
500 }
501 } else if in_sq {
502 if b == b'\'' {
503 in_sq = false;
504 }
505 } else {
506 if b == b'"' {
507 in_dq = true;
508 } else if b == b'\'' {
509 in_sq = true;
510 } else if b == b'#' && prev_was_ws {
511 let v = value[..i].trim_end().to_string();
512 let c = value[i..].to_string();
513 return (v, Some(c));
514 }
515 }
516 prev_was_ws = matches!(b, b' ' | b'\t');
517 i += 1;
518 }
519 (value.to_string(), None)
520}
521
522/// Inspect the value portion of a field line for `!fill` and other tags.
523///
524/// Returns `(fill, value_without_tag, had_other_tag, fill_target_err)`.
525///
526/// - `fill`: `true` when the value starts with `!fill`.
527/// - `value_without_tag`: the same text with the `!fill` tag stripped;
528/// leading whitespace is preserved so YAML parsing still sees a clean
529/// scalar.
530/// - `had_other_tag`: `true` when a non-`!fill` `!tag` was found at the
531/// start of the value. The tag is *not* stripped (serde_saphyr tolerates
532/// and drops unknown tags), so callers get a warning only.
533/// - `fill_target_err`: populated when `!fill` is applied to a mapping
534/// (flow `{...}` or block form). `!fill` on mappings is rejected because
535/// top-level `type: object` is not a supported schema type in Quillmark;
536/// `!fill` on scalars and sequences is allowed.
537fn inspect_fill_and_tags(value: &str, key: &str) -> (bool, String, bool, Option<String>) {
538 let trimmed = value.trim_start();
539 let leading_ws_len = value.len() - trimmed.len();
540
541 // Exactly empty / null (e.g. `key:` with nothing) — not a fill target.
542 if trimmed.is_empty() {
543 return (false, value.to_string(), false, None);
544 }
545
546 // `!fill` alone on the line (bare tag, no value) → placeholder. The
547 // value may be null (no continuation) or a block sequence on the
548 // following indented lines. serde_saphyr produces the actual value.
549 if trimmed == "!fill" {
550 // Replace the tag with nothing; leave the leading whitespace so the
551 // line shape is preserved (serde_saphyr treats `key: ` as null,
552 // and if a block sequence follows on indented lines, it parses as
553 // a sequence).
554 let reconstructed = value[..leading_ws_len].to_string();
555 return (true, reconstructed, false, None);
556 }
557
558 // `!fill <value>` → strip tag, record fill=true.
559 if let Some(rest) = trimmed.strip_prefix("!fill") {
560 // Must be followed by whitespace or end-of-value to count; otherwise
561 // it's `!fillwhatever` which is a non-`!fill` tag.
562 if rest.starts_with(' ') || rest.starts_with('\t') || rest.is_empty() {
563 let rest_trim = rest.trim_start();
564 // Reject flow-mappings (`!fill {...}`); top-level `type: object`
565 // isn't supported by the schema. Flow sequences (`!fill [...]`)
566 // and scalars are allowed.
567 let err = if rest_trim.starts_with('{') {
568 Some(format!(
569 "`!fill` on key `{}` targets a mapping; `!fill` is supported on scalars and sequences only",
570 key
571 ))
572 } else {
573 None
574 };
575 // Reconstruct: one space + the rest (trimmed) so the cleaned
576 // text reads `key: rest`.
577 let reconstructed = if rest_trim.is_empty() {
578 value[..leading_ws_len].to_string()
579 } else {
580 format!(" {}", rest_trim)
581 };
582 return (true, reconstructed, false, err);
583 }
584 }
585
586 // Any other `!tag` prefix is a non-fill custom tag. Leave the value
587 // alone; serde_saphyr will strip the tag.
588 if trimmed.starts_with('!') {
589 return (false, value.to_string(), true, None);
590 }
591
592 (false, value.to_string(), false, None)
593}
594
595#[cfg(test)]
596mod tests {
597 use super::*;
598
599 #[test]
600 fn extracts_own_line_comments() {
601 let input = "# top\ntitle: foo\n# mid\nauthor: bar\n";
602 let out = prescan_fence_content(input);
603 assert_eq!(
604 out.items,
605 vec![
606 PreItem::Comment {
607 text: "top".to_string(),
608 inline: false,
609 },
610 PreItem::Field {
611 key: "title".to_string(),
612 fill: false,
613 },
614 PreItem::Comment {
615 text: "mid".to_string(),
616 inline: false,
617 },
618 PreItem::Field {
619 key: "author".to_string(),
620 fill: false,
621 },
622 ]
623 );
624 assert!(out.nested_comments.is_empty());
625 }
626
627 #[test]
628 fn splits_trailing_comments() {
629 let input = "title: foo # inline\n";
630 let out = prescan_fence_content(input);
631 assert_eq!(
632 out.items,
633 vec![
634 PreItem::Field {
635 key: "title".to_string(),
636 fill: false,
637 },
638 PreItem::Comment {
639 text: "inline".to_string(),
640 inline: true,
641 },
642 ]
643 );
644 assert!(out.cleaned_yaml.contains("title: foo"));
645 assert!(!out.cleaned_yaml.contains("inline"));
646 }
647
648 #[test]
649 fn detects_fill_on_scalar() {
650 let input = "dept: !fill Department\n";
651 let out = prescan_fence_content(input);
652 assert_eq!(
653 out.items,
654 vec![PreItem::Field {
655 key: "dept".to_string(),
656 fill: true,
657 }]
658 );
659 assert!(out.cleaned_yaml.contains("dept: Department"));
660 assert!(!out.cleaned_yaml.contains("!fill"));
661 }
662
663 #[test]
664 fn detects_bare_fill() {
665 let input = "dept: !fill\n";
666 let out = prescan_fence_content(input);
667 assert_eq!(
668 out.items,
669 vec![PreItem::Field {
670 key: "dept".to_string(),
671 fill: true,
672 }]
673 );
674 assert!(!out.cleaned_yaml.contains("!fill"));
675 }
676
677 #[test]
678 fn unknown_tag_warns() {
679 let input = "x: !custom value\n";
680 let out = prescan_fence_content(input);
681 assert!(
682 out.warnings
683 .iter()
684 .any(|w| w.code.as_deref() == Some("parse::unsupported_yaml_tag")),
685 "expected unsupported_yaml_tag warning"
686 );
687 }
688
689 #[test]
690 fn nested_comment_in_sequence_captured() {
691 let input = "arr:\n # before-first\n - a\n # between\n - b\n # after-last\n";
692 let out = prescan_fence_content(input);
693 assert_eq!(
694 out.nested_comments,
695 vec![
696 NestedComment {
697 container_path: vec![CommentPathSegment::Key("arr".to_string())],
698 position: 0,
699 text: "before-first".to_string(),
700 inline: false,
701 },
702 NestedComment {
703 container_path: vec![CommentPathSegment::Key("arr".to_string())],
704 position: 1,
705 text: "between".to_string(),
706 inline: false,
707 },
708 NestedComment {
709 container_path: vec![CommentPathSegment::Key("arr".to_string())],
710 position: 2,
711 text: "after-last".to_string(),
712 inline: false,
713 },
714 ]
715 );
716 assert!(
717 !out.warnings
718 .iter()
719 .any(|w| w.code.as_deref() == Some("parse::comments_in_nested_yaml_dropped")),
720 "no dropped-comment warning expected; nested comments are now preserved"
721 );
722 }
723
724 #[test]
725 fn nested_comment_in_mapping_captured() {
726 let input = "outer:\n # comment\n inner: 1\n";
727 let out = prescan_fence_content(input);
728 assert_eq!(
729 out.nested_comments,
730 vec![NestedComment {
731 container_path: vec![CommentPathSegment::Key("outer".to_string())],
732 position: 0,
733 text: "comment".to_string(),
734 inline: false,
735 }]
736 );
737 }
738
739 #[test]
740 fn deep_nested_comment_path() {
741 let input = "outer:\n inner:\n # deep\n leaf: 1\n";
742 let out = prescan_fence_content(input);
743 assert_eq!(
744 out.nested_comments,
745 vec![NestedComment {
746 container_path: vec![
747 CommentPathSegment::Key("outer".to_string()),
748 CommentPathSegment::Key("inner".to_string()),
749 ],
750 position: 0,
751 text: "deep".to_string(),
752 inline: false,
753 }]
754 );
755 }
756
757 #[test]
758 fn comment_inside_seq_of_maps() {
759 // Each sequence item is a mapping. A comment between keys of the
760 // first item belongs to that item's mapping.
761 let input = "items:\n - name: a\n # inside-first\n val: 1\n - name: b\n";
762 let out = prescan_fence_content(input);
763 assert_eq!(
764 out.nested_comments,
765 vec![NestedComment {
766 container_path: vec![
767 CommentPathSegment::Key("items".to_string()),
768 CommentPathSegment::Index(0),
769 ],
770 position: 1,
771 text: "inside-first".to_string(),
772 inline: false,
773 }]
774 );
775 }
776
777 #[test]
778 fn nested_inline_on_sequence_item() {
779 // `- a # tail` attaches an inline comment to item 0 (host index, not
780 // the slot after).
781 let input = "arr:\n - a # tail\n - b\n";
782 let out = prescan_fence_content(input);
783 assert_eq!(
784 out.nested_comments,
785 vec![NestedComment {
786 container_path: vec![CommentPathSegment::Key("arr".to_string())],
787 position: 0,
788 text: "tail".to_string(),
789 inline: true,
790 }]
791 );
792 assert!(out.cleaned_yaml.contains("- a\n"));
793 assert!(!out.cleaned_yaml.contains("tail"));
794 }
795
796 #[test]
797 fn nested_inline_on_mapping_field() {
798 // `inner: 1 # tail` inside `outer:` attaches inline at host index 0.
799 let input = "outer:\n inner: 1 # tail\n";
800 let out = prescan_fence_content(input);
801 assert_eq!(
802 out.nested_comments,
803 vec![NestedComment {
804 container_path: vec![CommentPathSegment::Key("outer".to_string())],
805 position: 0,
806 text: "tail".to_string(),
807 inline: true,
808 }]
809 );
810 }
811
812 #[test]
813 fn fill_on_flow_sequence_allowed() {
814 let input = "x: !fill [1, 2]\n";
815 let out = prescan_fence_content(input);
816 assert!(
817 out.fill_target_errors.is_empty(),
818 "expected no error; !fill on sequences is supported"
819 );
820 assert_eq!(
821 out.items,
822 vec![PreItem::Field {
823 key: "x".to_string(),
824 fill: true,
825 }]
826 );
827 }
828
829 #[test]
830 fn fill_on_flow_mapping_errors() {
831 let input = "x: !fill {a: 1}\n";
832 let out = prescan_fence_content(input);
833 assert!(
834 !out.fill_target_errors.is_empty(),
835 "expected error; !fill on mappings is rejected"
836 );
837 }
838}