quillmark_core/document/prescan.rs
1//! Pre-scan of a metadata fence's YAML content to recover features that
2//! serde_saphyr discards.
3//!
4//! Three features are recovered here:
5//!
6//! 1. **Top-level comments.** YAML comments are dropped by the YAML parser.
7//! To round-trip them as [`super::FrontmatterItem::Comment`], we extract them
8//! before parsing.
9//!
10//! 2. **Nested comments.** Comments inside block mappings/sequences are
11//! captured with their structural path (sequence of keys/indices) and an
12//! ordinal indicating where in the container they sit. The emitter
13//! re-injects them at the matching position. See [`NestedComment`].
14//!
15//! 3. **`!fill` tags.** Custom YAML tags are accepted and dropped by
16//! serde_saphyr; the value survives but the tag annotation is lost. We
17//! detect `!fill` on top-level scalar fields, strip the tag from the
18//! cleaned YAML (so serde_saphyr sees a plain scalar), and record a
19//! `fill: true` marker on the resulting `Field` item.
20//!
21//! Other custom tags (`!include`, `!env`, …) are stripped with a
22//! `parse::unsupported_yaml_tag` warning.
23
24use crate::Diagnostic;
25use crate::Severity;
26
27/// One ordered hint extracted from the fence body.
28///
29/// `Comment` stands alone; `Field` captures only the `fill` flag because the
30/// value is produced by serde_saphyr parsing the cleaned text. The matching
31/// YAML key is the lookup key into the parsed map.
32#[derive(Debug, Clone, PartialEq)]
33pub enum PreItem {
34 Field { key: String, fill: bool },
35 Comment(String),
36}
37
38/// One segment of a path into the parsed YAML structure.
39#[derive(Debug, Clone, PartialEq, Eq)]
40pub enum CommentPathSegment {
41 Key(String),
42 Index(usize),
43}
44
45/// A comment that appears inside a nested mapping or sequence.
46///
47/// `container_path` locates the immediate parent container; `position` is
48/// the ordinal within that container's child list before which the comment
49/// sits. A position equal to the container's length means "after all
50/// children".
51#[derive(Debug, Clone, PartialEq, Eq)]
52pub struct NestedComment {
53 pub container_path: Vec<CommentPathSegment>,
54 pub position: usize,
55 pub text: String,
56}
57
58/// Output of [`prescan_fence_content`].
59#[derive(Debug, Clone, Default)]
60pub struct PreScan {
61 /// YAML text with `!fill` tags stripped and all comment lines removed.
62 /// Suitable for feeding into serde_saphyr.
63 pub cleaned_yaml: String,
64 /// Ordered items discovered at the top level — fields (with fill flags)
65 /// and own-line top-level comments, in source order.
66 pub items: Vec<PreItem>,
67 /// Comments inside nested containers, with structural paths.
68 pub nested_comments: Vec<NestedComment>,
69 /// Warnings produced during the scan.
70 pub warnings: Vec<Diagnostic>,
71 /// Unsupported-fill-target errors. The parser turns these into
72 /// `ParseError::InvalidStructure` rejections (`!fill` on mappings).
73 pub fill_target_errors: Vec<String>,
74}
75
76/// Tracks one open YAML container while scanning lines.
77#[derive(Debug)]
78struct Frame {
79 /// Indent (in columns) of children of this container.
80 indent: usize,
81 /// Path to this container from the fence root.
82 path: Vec<CommentPathSegment>,
83 /// Container kind. `None` until the first child line determines it.
84 kind: Option<FrameKind>,
85 /// Number of children seen so far.
86 child_count: usize,
87}
88
89#[derive(Debug, Clone, Copy, PartialEq, Eq)]
90enum FrameKind {
91 Mapping,
92 Sequence,
93}
94
95/// Scan the body of a YAML metadata fence.
96///
97/// `content` is the text between the opening and closing `---` markers
98/// (exclusive), with leading/trailing whitespace preserved.
99pub fn prescan_fence_content(content: &str) -> PreScan {
100 let mut out = PreScan::default();
101
102 // We operate on the raw text to preserve positions. `lines()` strips
103 // line endings; we rebuild with `\n` which is what serde_saphyr expects.
104 let lines: Vec<&str> = content.split('\n').collect();
105 let mut cleaned_lines: Vec<String> = Vec::with_capacity(lines.len());
106
107 // Stack of open containers. The root frame is the frontmatter mapping
108 // itself; children appear at indent 0.
109 let mut stack: Vec<Frame> = vec![Frame {
110 indent: 0,
111 path: Vec::new(),
112 kind: Some(FrameKind::Mapping),
113 child_count: 0,
114 }];
115
116 for raw_line in &lines {
117 let line = *raw_line;
118 let indent = leading_space_count(line);
119 let trimmed = &line[indent..];
120
121 // Skip blank lines (no structural meaning, no comment).
122 if trimmed.is_empty() {
123 cleaned_lines.push(line.to_string());
124 continue;
125 }
126
127 // Pop frames that this line has dedented out of. A line at indent
128 // `indent` belongs to the deepest frame whose `indent <= indent`.
129 // (Equality means the line is a child at this frame's level.)
130 while let Some(frame) = stack.last() {
131 if frame.indent > indent {
132 stack.pop();
133 } else {
134 break;
135 }
136 }
137
138 // Case 1: own-line comment.
139 if trimmed.starts_with('#') {
140 let without_hash = &trimmed[1..];
141 let text = without_hash.strip_prefix(' ').unwrap_or(without_hash);
142
143 // Determine the deepest frame that contains this line.
144 // For a comment at indent N, the containing frame is the one
145 // with the largest indent <= N. The stack is ordered shallow
146 // to deep; the last frame is the deepest. After the dedent
147 // pop above, the top frame's indent is <= indent, which is
148 // what we want.
149 let frame = stack.last().expect("root frame always present");
150
151 if frame.path.is_empty() {
152 // Top-level comment — preserve via PreItem::Comment.
153 out.items.push(PreItem::Comment(text.to_string()));
154 } else {
155 out.nested_comments.push(NestedComment {
156 container_path: frame.path.clone(),
157 position: frame.child_count,
158 text: text.to_string(),
159 });
160 }
161 // Don't emit the line into the cleaned YAML — serde_saphyr
162 // ignores comments either way, but omitting the line avoids
163 // ambiguity with `!fill` rewriting.
164 continue;
165 }
166
167 // Case 2: sequence item line (`- ...`).
168 if trimmed == "-" || trimmed.starts_with("- ") {
169 // The frame at this indent must be a sequence. If the deepest
170 // frame's indent matches this line's indent, claim it; if it
171 // doesn't, push a fresh sequence frame at this indent under
172 // the deepest container.
173 let frame_idx = ensure_frame_at_indent(&mut stack, indent, FrameKind::Sequence);
174 let frame = &mut stack[frame_idx];
175 let item_index = frame.child_count;
176 frame.child_count += 1;
177 let parent_path: Vec<CommentPathSegment> = frame.path.clone();
178 // Snapshot the item path before borrowing mutably again below.
179 let item_path: Vec<CommentPathSegment> = {
180 let mut p = parent_path.clone();
181 p.push(CommentPathSegment::Index(item_index));
182 p
183 };
184 // Drop frames deeper than this sequence; the new item starts
185 // a fresh nested context.
186 while stack.len() > frame_idx + 1 {
187 stack.pop();
188 }
189
190 // Detach a possible trailing comment on the item line.
191 let after_dash_full = if trimmed == "-" { "" } else { &trimmed[2..] };
192 let (after_dash, trailing_comment) = split_trailing_comment(after_dash_full);
193 let after_dash_trimmed = after_dash.trim_start();
194 let inline_indent_offset = indent + 2 + (after_dash.len() - after_dash_trimmed.len());
195
196 if after_dash_trimmed.is_empty() {
197 // No inline value. Children, if any, will appear on the
198 // following lines with indent > this line's indent. Push a
199 // placeholder frame so when those children arrive, the
200 // sequence-item frame is already on the stack.
201 //
202 // We push a frame with indent = indent + 2; the actual
203 // child kind/indent gets resolved when the next non-empty
204 // line arrives.
205 stack.push(Frame {
206 indent: indent + 2,
207 path: item_path,
208 kind: None,
209 child_count: 0,
210 });
211 } else if split_key(after_dash_trimmed).is_some() {
212 // Inline mapping start (`- key: ...`). The key is the first
213 // child of an implicit mapping whose siblings sit at the
214 // same column as the key.
215 stack.push(Frame {
216 indent: inline_indent_offset,
217 path: item_path,
218 kind: Some(FrameKind::Mapping),
219 child_count: 1,
220 });
221 }
222 // Otherwise: inline scalar value, no further nesting.
223
224 // Rebuild the line with the trailing comment stripped, and
225 // capture it as a NestedComment that lands after this item.
226 if let Some(c) = trailing_comment {
227 let stripped = c.trim_start_matches('#');
228 let text = stripped.strip_prefix(' ').unwrap_or(stripped);
229 out.nested_comments.push(NestedComment {
230 container_path: parent_path,
231 position: item_index + 1,
232 text: text.to_string(),
233 });
234 let head = format!("{:width$}", "", width = indent);
235 let body = if after_dash.trim_end().is_empty() {
236 "-".to_string()
237 } else {
238 format!("- {}", after_dash.trim_end())
239 };
240 cleaned_lines.push(format!("{}{}", head, body));
241 } else {
242 cleaned_lines.push(line.to_string());
243 }
244 continue;
245 }
246
247 // Case 3: top-level field line with possible `!fill` tag and/or
248 // trailing comment. Top-level only — `is_top_level` mirrors the
249 // pre-existing semantics.
250 let is_top_level = indent == 0;
251 if is_top_level {
252 if let Some((key, after_colon)) = split_key(line) {
253 let (value_part, trailing_comment) = split_trailing_comment(&after_colon);
254
255 let (fill, value_without_tag, had_non_fill_tag, fill_target_err) =
256 inspect_fill_and_tags(&value_part, &key);
257
258 if had_non_fill_tag {
259 out.warnings.push(
260 Diagnostic::new(
261 Severity::Warning,
262 format!(
263 "YAML tag on key `{}` is not supported; the tag has been dropped and the value kept",
264 key
265 ),
266 )
267 .with_code("parse::unsupported_yaml_tag".to_string()),
268 );
269 }
270 if let Some(err) = fill_target_err {
271 out.fill_target_errors.push(err);
272 }
273
274 out.items.push(PreItem::Field {
275 key: key.clone(),
276 fill,
277 });
278
279 // Update the structural stack for this top-level key.
280 // The root frame is at index 0; children appear at indent 0.
281 let root = &mut stack[0];
282 root.child_count += 1;
283 let key_path = vec![CommentPathSegment::Key(key.clone())];
284
285 // Pop everything but the root.
286 while stack.len() > 1 {
287 stack.pop();
288 }
289
290 // If the value is empty (block style: `key:` followed by
291 // indented children), push a frame so nested comments can
292 // be attached. Otherwise (inline scalar/flow), no nested
293 // children come from this key.
294 if has_empty_inline_value(&value_without_tag) {
295 stack.push(Frame {
296 indent: 2,
297 path: key_path,
298 kind: None,
299 child_count: 0,
300 });
301 }
302
303 // Rebuild the line without the `!fill` tag (and without
304 // the trailing comment, since that goes on its own
305 // line now).
306 let cleaned = format!("{}:{}", key, value_without_tag);
307 cleaned_lines.push(cleaned);
308
309 if let Some(c) = trailing_comment {
310 let stripped = c.trim_start_matches('#');
311 let text = stripped.strip_prefix(' ').unwrap_or(stripped);
312 out.items.push(PreItem::Comment(text.to_string()));
313 }
314
315 continue;
316 }
317 }
318
319 // Case 4: nested key line (`key:` or `key: value`) inside a block
320 // mapping. We recognise simple `key:` patterns; unusual forms fall
321 // through to verbatim pass-through.
322 if let Some((key, after_colon)) = split_key(trimmed) {
323 // The frame at this indent must be a mapping.
324 let frame_idx = ensure_frame_at_indent(&mut stack, indent, FrameKind::Mapping);
325 let frame = &mut stack[frame_idx];
326 let key_index = frame.child_count;
327 frame.child_count += 1;
328 let parent_path: Vec<CommentPathSegment> = frame.path.clone();
329 let key_path: Vec<CommentPathSegment> = {
330 let mut p = parent_path.clone();
331 p.push(CommentPathSegment::Key(key.clone()));
332 p
333 };
334 // Drop frames deeper than this mapping; siblings reset nesting.
335 while stack.len() > frame_idx + 1 {
336 stack.pop();
337 }
338
339 // Detach a possible trailing comment on the line. We keep the
340 // value (sans comment) in the cleaned YAML and capture the
341 // comment as a NestedComment that lands after this entry.
342 let (value_part, trailing_comment) = split_trailing_comment(&after_colon);
343 if let Some(c) = trailing_comment {
344 let stripped = c.trim_start_matches('#');
345 let text = stripped.strip_prefix(' ').unwrap_or(stripped);
346 out.nested_comments.push(NestedComment {
347 container_path: parent_path,
348 position: key_index + 1,
349 text: text.to_string(),
350 });
351 let head = format!("{:width$}", "", width = indent);
352 cleaned_lines.push(format!("{}{}:{}", head, key, value_part));
353 } else {
354 cleaned_lines.push(line.to_string());
355 }
356
357 // If the value is empty (block style) push a frame for nested
358 // children at indent + 2.
359 if has_empty_inline_value(&after_colon) {
360 stack.push(Frame {
361 indent: indent + 2,
362 path: key_path,
363 kind: None,
364 child_count: 0,
365 });
366 }
367 continue;
368 }
369
370 // Everything else: pass through verbatim.
371 cleaned_lines.push(line.to_string());
372 }
373
374 out.cleaned_yaml = cleaned_lines.join("\n");
375 out
376}
377
378/// Ensure the deepest frame on the stack matches the given `indent` and
379/// kind, pushing a new frame if necessary. Returns the index of the matched
380/// or freshly-pushed frame.
381fn ensure_frame_at_indent(stack: &mut Vec<Frame>, indent: usize, kind: FrameKind) -> usize {
382 // After dedent popping, the top frame has `indent <= indent`. If it
383 // matches exactly, claim it. Otherwise, push a new child frame under
384 // it that has the requested indent.
385 let top_idx = stack.len() - 1;
386 let top = &mut stack[top_idx];
387
388 if top.indent == indent {
389 if top.kind.is_none() {
390 top.kind = Some(kind);
391 }
392 return top_idx;
393 }
394
395 // The top frame is shallower (its indent < indent). Push a new frame
396 // at this indent, parented under the top frame. The new frame's path
397 // is a continuation: for a sequence at deeper indent under a mapping,
398 // the path is the same as the parent's `path` (because the sequence
399 // is the value of the parent's most recent key).
400 //
401 // Concretely, when we encounter `- foo` at indent 2 and the stack top
402 // is the root mapping with indent 0, the parent frame's most-recent
403 // child path was already pushed when we saw `key:` in case 3 (we
404 // pushed a placeholder frame at indent 2 with `path = [Key(key)]` and
405 // unknown kind). So usually we won't reach this branch — the
406 // placeholder is already there. This branch is a safety net for
407 // unusual layouts.
408 let parent_path = top.path.clone();
409 stack.push(Frame {
410 indent,
411 path: parent_path,
412 kind: Some(kind),
413 child_count: 0,
414 });
415 stack.len() - 1
416}
417
418/// Number of leading ASCII spaces. Tabs are not expanded; they don't appear
419/// in canonical Quillmark YAML and would be a separate problem.
420fn leading_space_count(line: &str) -> usize {
421 line.bytes().take_while(|b| *b == b' ').count()
422}
423
424/// `true` when the value portion of a `key:` line is empty (after trimming
425/// whitespace). Trailing comments are ignored. An empty value means the
426/// real value is on subsequent indented lines (block mapping or sequence).
427fn has_empty_inline_value(after_colon: &str) -> bool {
428 let (v, _) = split_trailing_comment(after_colon);
429 v.trim().is_empty()
430}
431
432/// Split a line into `(key, rest_after_colon)`. Returns `None` if the line
433/// does not start with a bare YAML key.
434fn split_key(line: &str) -> Option<(String, String)> {
435 // Identifier-like keys only. YAML allows more, but Quillmark's schema
436 // restricts field names to `[a-zA-Z_][a-zA-Z0-9_]*` (and reserved
437 // uppercase sentinels). Anything more exotic falls through to the
438 // unmodified path and will be parsed (or rejected) by serde_saphyr.
439 let bytes = line.as_bytes();
440 if bytes.is_empty() {
441 return None;
442 }
443 if !(bytes[0].is_ascii_alphabetic() || bytes[0] == b'_') {
444 return None;
445 }
446 let mut i = 1;
447 while i < bytes.len() && (bytes[i].is_ascii_alphanumeric() || bytes[i] == b'_') {
448 i += 1;
449 }
450 if i >= bytes.len() || bytes[i] != b':' {
451 return None;
452 }
453 let key = line[..i].to_string();
454 let rest = line[i + 1..].to_string();
455 Some((key, rest))
456}
457
458/// Split a value string into `(value, trailing_comment)`.
459///
460/// Trailing comments begin with ` #` or `\t#` outside of any quoted string.
461/// This is a simple scanner: it respects `"..."` and `'...'` quoting.
462fn split_trailing_comment(value: &str) -> (String, Option<String>) {
463 let bytes = value.as_bytes();
464 let mut i = 0;
465 let mut prev_was_ws = true; // allow `key:#` edge case to NOT be a comment
466 let mut in_dq = false;
467 let mut in_sq = false;
468 while i < bytes.len() {
469 let b = bytes[i];
470 if in_dq {
471 if b == b'\\' && i + 1 < bytes.len() {
472 i += 2;
473 continue;
474 }
475 if b == b'"' {
476 in_dq = false;
477 }
478 } else if in_sq {
479 if b == b'\'' {
480 in_sq = false;
481 }
482 } else {
483 if b == b'"' {
484 in_dq = true;
485 } else if b == b'\'' {
486 in_sq = true;
487 } else if b == b'#' && prev_was_ws {
488 let v = value[..i].trim_end().to_string();
489 let c = value[i..].to_string();
490 return (v, Some(c));
491 }
492 }
493 prev_was_ws = matches!(b, b' ' | b'\t');
494 i += 1;
495 }
496 (value.to_string(), None)
497}
498
499/// Inspect the value portion of a field line for `!fill` and other tags.
500///
501/// Returns `(fill, value_without_tag, had_other_tag, fill_target_err)`.
502///
503/// - `fill`: `true` when the value starts with `!fill`.
504/// - `value_without_tag`: the same text with the `!fill` tag stripped;
505/// leading whitespace is preserved so YAML parsing still sees a clean
506/// scalar.
507/// - `had_other_tag`: `true` when a non-`!fill` `!tag` was found at the
508/// start of the value. The tag is *not* stripped (serde_saphyr tolerates
509/// and drops unknown tags), so callers get a warning only.
510/// - `fill_target_err`: populated when `!fill` is applied to a mapping
511/// (flow `{...}` or block form). `!fill` on mappings is rejected because
512/// top-level `type: object` is not a supported schema type in Quillmark;
513/// `!fill` on scalars and sequences is allowed.
514fn inspect_fill_and_tags(value: &str, key: &str) -> (bool, String, bool, Option<String>) {
515 let trimmed = value.trim_start();
516 let leading_ws_len = value.len() - trimmed.len();
517
518 // Exactly empty / null (e.g. `key:` with nothing) — not a fill target.
519 if trimmed.is_empty() {
520 return (false, value.to_string(), false, None);
521 }
522
523 // `!fill` alone on the line (bare tag, no value) → placeholder. The
524 // value may be null (no continuation) or a block sequence on the
525 // following indented lines. serde_saphyr produces the actual value.
526 if trimmed == "!fill" {
527 // Replace the tag with nothing; leave the leading whitespace so the
528 // line shape is preserved (serde_saphyr treats `key: ` as null,
529 // and if a block sequence follows on indented lines, it parses as
530 // a sequence).
531 let reconstructed = value[..leading_ws_len].to_string();
532 return (true, reconstructed, false, None);
533 }
534
535 // `!fill <value>` → strip tag, record fill=true.
536 if let Some(rest) = trimmed.strip_prefix("!fill") {
537 // Must be followed by whitespace or end-of-value to count; otherwise
538 // it's `!fillwhatever` which is a non-`!fill` tag.
539 if rest.starts_with(' ') || rest.starts_with('\t') || rest.is_empty() {
540 let rest_trim = rest.trim_start();
541 // Reject flow-mappings (`!fill {...}`); top-level `type: object`
542 // isn't supported by the schema. Flow sequences (`!fill [...]`)
543 // and scalars are allowed.
544 let err = if rest_trim.starts_with('{') {
545 Some(format!(
546 "`!fill` on key `{}` targets a mapping; `!fill` is supported on scalars and sequences only",
547 key
548 ))
549 } else {
550 None
551 };
552 // Reconstruct: one space + the rest (trimmed) so the cleaned
553 // text reads `key: rest`.
554 let reconstructed = if rest_trim.is_empty() {
555 value[..leading_ws_len].to_string()
556 } else {
557 format!(" {}", rest_trim)
558 };
559 return (true, reconstructed, false, err);
560 }
561 }
562
563 // Any other `!tag` prefix is a non-fill custom tag. Leave the value
564 // alone; serde_saphyr will strip the tag.
565 if trimmed.starts_with('!') {
566 return (false, value.to_string(), true, None);
567 }
568
569 (false, value.to_string(), false, None)
570}
571
572#[cfg(test)]
573mod tests {
574 use super::*;
575
576 #[test]
577 fn extracts_own_line_comments() {
578 let input = "# top\ntitle: foo\n# mid\nauthor: bar\n";
579 let out = prescan_fence_content(input);
580 assert_eq!(
581 out.items,
582 vec![
583 PreItem::Comment("top".to_string()),
584 PreItem::Field {
585 key: "title".to_string(),
586 fill: false,
587 },
588 PreItem::Comment("mid".to_string()),
589 PreItem::Field {
590 key: "author".to_string(),
591 fill: false,
592 },
593 ]
594 );
595 assert!(out.nested_comments.is_empty());
596 }
597
598 #[test]
599 fn splits_trailing_comments() {
600 let input = "title: foo # inline\n";
601 let out = prescan_fence_content(input);
602 assert_eq!(
603 out.items,
604 vec![
605 PreItem::Field {
606 key: "title".to_string(),
607 fill: false,
608 },
609 PreItem::Comment("inline".to_string()),
610 ]
611 );
612 assert!(out.cleaned_yaml.contains("title: foo"));
613 assert!(!out.cleaned_yaml.contains("inline"));
614 }
615
616 #[test]
617 fn detects_fill_on_scalar() {
618 let input = "dept: !fill Department\n";
619 let out = prescan_fence_content(input);
620 assert_eq!(
621 out.items,
622 vec![PreItem::Field {
623 key: "dept".to_string(),
624 fill: true,
625 }]
626 );
627 assert!(out.cleaned_yaml.contains("dept: Department"));
628 assert!(!out.cleaned_yaml.contains("!fill"));
629 }
630
631 #[test]
632 fn detects_bare_fill() {
633 let input = "dept: !fill\n";
634 let out = prescan_fence_content(input);
635 assert_eq!(
636 out.items,
637 vec![PreItem::Field {
638 key: "dept".to_string(),
639 fill: true,
640 }]
641 );
642 assert!(!out.cleaned_yaml.contains("!fill"));
643 }
644
645 #[test]
646 fn unknown_tag_warns() {
647 let input = "x: !custom value\n";
648 let out = prescan_fence_content(input);
649 assert!(
650 out.warnings
651 .iter()
652 .any(|w| w.code.as_deref() == Some("parse::unsupported_yaml_tag")),
653 "expected unsupported_yaml_tag warning"
654 );
655 }
656
657 #[test]
658 fn nested_comment_in_sequence_captured() {
659 let input = "arr:\n # before-first\n - a\n # between\n - b\n # after-last\n";
660 let out = prescan_fence_content(input);
661 assert_eq!(
662 out.nested_comments,
663 vec![
664 NestedComment {
665 container_path: vec![CommentPathSegment::Key("arr".to_string())],
666 position: 0,
667 text: "before-first".to_string(),
668 },
669 NestedComment {
670 container_path: vec![CommentPathSegment::Key("arr".to_string())],
671 position: 1,
672 text: "between".to_string(),
673 },
674 NestedComment {
675 container_path: vec![CommentPathSegment::Key("arr".to_string())],
676 position: 2,
677 text: "after-last".to_string(),
678 },
679 ]
680 );
681 assert!(
682 !out.warnings
683 .iter()
684 .any(|w| w.code.as_deref() == Some("parse::comments_in_nested_yaml_dropped")),
685 "no dropped-comment warning expected; nested comments are now preserved"
686 );
687 }
688
689 #[test]
690 fn nested_comment_in_mapping_captured() {
691 let input = "outer:\n # comment\n inner: 1\n";
692 let out = prescan_fence_content(input);
693 assert_eq!(
694 out.nested_comments,
695 vec![NestedComment {
696 container_path: vec![CommentPathSegment::Key("outer".to_string())],
697 position: 0,
698 text: "comment".to_string(),
699 }]
700 );
701 }
702
703 #[test]
704 fn deep_nested_comment_path() {
705 let input = "outer:\n inner:\n # deep\n leaf: 1\n";
706 let out = prescan_fence_content(input);
707 assert_eq!(
708 out.nested_comments,
709 vec![NestedComment {
710 container_path: vec![
711 CommentPathSegment::Key("outer".to_string()),
712 CommentPathSegment::Key("inner".to_string()),
713 ],
714 position: 0,
715 text: "deep".to_string(),
716 }]
717 );
718 }
719
720 #[test]
721 fn comment_inside_seq_of_maps() {
722 // Each sequence item is a mapping. A comment between keys of the
723 // first item belongs to that item's mapping.
724 let input = "items:\n - name: a\n # inside-first\n val: 1\n - name: b\n";
725 let out = prescan_fence_content(input);
726 assert_eq!(
727 out.nested_comments,
728 vec![NestedComment {
729 container_path: vec![
730 CommentPathSegment::Key("items".to_string()),
731 CommentPathSegment::Index(0),
732 ],
733 position: 1,
734 text: "inside-first".to_string(),
735 }]
736 );
737 }
738
739 #[test]
740 fn fill_on_flow_sequence_allowed() {
741 let input = "x: !fill [1, 2]\n";
742 let out = prescan_fence_content(input);
743 assert!(
744 out.fill_target_errors.is_empty(),
745 "expected no error; !fill on sequences is supported"
746 );
747 assert_eq!(
748 out.items,
749 vec![PreItem::Field {
750 key: "x".to_string(),
751 fill: true,
752 }]
753 );
754 }
755
756 #[test]
757 fn fill_on_flow_mapping_errors() {
758 let input = "x: !fill {a: 1}\n";
759 let out = prescan_fence_content(input);
760 assert!(
761 !out.fill_target_errors.is_empty(),
762 "expected error; !fill on mappings is rejected"
763 );
764 }
765}