Skip to main content

quillmark_core/document/
prescan.rs

1//! Pre-scan of a metadata fence's YAML content to recover features that
2//! serde_saphyr discards.
3//!
4//! Three features are recovered here:
5//!
6//! 1. **Top-level comments.** YAML comments are dropped by the YAML parser.
7//!    To round-trip them as [`super::FrontmatterItem::Comment`], we extract them
8//!    before parsing.
9//!
10//! 2. **Nested comments.** Comments inside block mappings/sequences are
11//!    captured with their structural path (sequence of keys/indices) and an
12//!    ordinal indicating where in the container they sit. The emitter
13//!    re-injects them at the matching position. See [`NestedComment`].
14//!
15//! 3. **`!fill` tags.** Custom YAML tags are accepted and dropped by
16//!    serde_saphyr; the value survives but the tag annotation is lost. We
17//!    detect `!fill` on top-level scalar fields, strip the tag from the
18//!    cleaned YAML (so serde_saphyr sees a plain scalar), and record a
19//!    `fill: true` marker on the resulting `Field` item.
20//!
21//! Other custom tags (`!include`, `!env`, …) are stripped with a
22//! `parse::unsupported_yaml_tag` warning.
23
24use crate::Diagnostic;
25use crate::Severity;
26
27/// One ordered hint extracted from the fence body.
28///
29/// `Comment` stands alone; `Field` captures only the `fill` flag because the
30/// value is produced by serde_saphyr parsing the cleaned text. The matching
31/// YAML key is the lookup key into the parsed map.
32#[derive(Debug, Clone, PartialEq)]
33pub enum PreItem {
34    Field { key: String, fill: bool },
35    Comment(String),
36}
37
38/// One segment of a path into the parsed YAML structure.
39#[derive(Debug, Clone, PartialEq, Eq)]
40pub enum CommentPathSegment {
41    Key(String),
42    Index(usize),
43}
44
45/// A comment that appears inside a nested mapping or sequence.
46///
47/// `container_path` locates the immediate parent container; `position` is
48/// the ordinal within that container's child list before which the comment
49/// sits. A position equal to the container's length means "after all
50/// children".
51#[derive(Debug, Clone, PartialEq, Eq)]
52pub struct NestedComment {
53    pub container_path: Vec<CommentPathSegment>,
54    pub position: usize,
55    pub text: String,
56}
57
58/// Output of [`prescan_fence_content`].
59#[derive(Debug, Clone, Default)]
60pub struct PreScan {
61    /// YAML text with `!fill` tags stripped and all comment lines removed.
62    /// Suitable for feeding into serde_saphyr.
63    pub cleaned_yaml: String,
64    /// Ordered items discovered at the top level — fields (with fill flags)
65    /// and own-line top-level comments, in source order.
66    pub items: Vec<PreItem>,
67    /// Comments inside nested containers, with structural paths.
68    pub nested_comments: Vec<NestedComment>,
69    /// Warnings produced during the scan.
70    pub warnings: Vec<Diagnostic>,
71    /// Unsupported-fill-target errors. The parser turns these into
72    /// `ParseError::InvalidStructure` rejections (`!fill` on mappings).
73    pub fill_target_errors: Vec<String>,
74}
75
76/// Tracks one open YAML container while scanning lines.
77#[derive(Debug)]
78struct Frame {
79    /// Indent (in columns) of children of this container.
80    indent: usize,
81    /// Path to this container from the fence root.
82    path: Vec<CommentPathSegment>,
83    /// Container kind. `None` until the first child line determines it.
84    kind: Option<FrameKind>,
85    /// Number of children seen so far.
86    child_count: usize,
87}
88
89#[derive(Debug, Clone, Copy, PartialEq, Eq)]
90enum FrameKind {
91    Mapping,
92    Sequence,
93}
94
95/// Scan the body of a YAML metadata fence.
96///
97/// `content` is the text between the opening and closing `---` markers
98/// (exclusive), with leading/trailing whitespace preserved.
99pub fn prescan_fence_content(content: &str) -> PreScan {
100    let mut out = PreScan::default();
101
102    // We operate on the raw text to preserve positions. `lines()` strips
103    // line endings; we rebuild with `\n` which is what serde_saphyr expects.
104    let lines: Vec<&str> = content.split('\n').collect();
105    let mut cleaned_lines: Vec<String> = Vec::with_capacity(lines.len());
106
107    // Stack of open containers. The root frame is the frontmatter mapping
108    // itself; children appear at indent 0.
109    let mut stack: Vec<Frame> = vec![Frame {
110        indent: 0,
111        path: Vec::new(),
112        kind: Some(FrameKind::Mapping),
113        child_count: 0,
114    }];
115
116    for raw_line in &lines {
117        let line = *raw_line;
118        let indent = leading_space_count(line);
119        let trimmed = &line[indent..];
120
121        // Skip blank lines (no structural meaning, no comment).
122        if trimmed.is_empty() {
123            cleaned_lines.push(line.to_string());
124            continue;
125        }
126
127        // Pop frames that this line has dedented out of. A line at indent
128        // `indent` belongs to the deepest frame whose `indent <= indent`.
129        // (Equality means the line is a child at this frame's level.)
130        while let Some(frame) = stack.last() {
131            if frame.indent > indent {
132                stack.pop();
133            } else {
134                break;
135            }
136        }
137
138        // Case 1: own-line comment.
139        if trimmed.starts_with('#') {
140            let without_hash = &trimmed[1..];
141            let text = without_hash.strip_prefix(' ').unwrap_or(without_hash);
142
143            // Determine the deepest frame that contains this line.
144            // For a comment at indent N, the containing frame is the one
145            // with the largest indent <= N. The stack is ordered shallow
146            // to deep; the last frame is the deepest. After the dedent
147            // pop above, the top frame's indent is <= indent, which is
148            // what we want.
149            let frame = stack.last().expect("root frame always present");
150
151            if frame.path.is_empty() {
152                // Top-level comment — preserve via PreItem::Comment.
153                out.items.push(PreItem::Comment(text.to_string()));
154            } else {
155                out.nested_comments.push(NestedComment {
156                    container_path: frame.path.clone(),
157                    position: frame.child_count,
158                    text: text.to_string(),
159                });
160            }
161            // Don't emit the line into the cleaned YAML — serde_saphyr
162            // ignores comments either way, but omitting the line avoids
163            // ambiguity with `!fill` rewriting.
164            continue;
165        }
166
167        // Case 2: sequence item line (`- ...`).
168        if trimmed == "-" || trimmed.starts_with("- ") {
169            // The frame at this indent must be a sequence. If the deepest
170            // frame's indent matches this line's indent, claim it; if it
171            // doesn't, push a fresh sequence frame at this indent under
172            // the deepest container.
173            let frame_idx = ensure_frame_at_indent(&mut stack, indent, FrameKind::Sequence);
174            let frame = &mut stack[frame_idx];
175            let item_index = frame.child_count;
176            frame.child_count += 1;
177            let parent_path: Vec<CommentPathSegment> = frame.path.clone();
178            // Snapshot the item path before borrowing mutably again below.
179            let item_path: Vec<CommentPathSegment> = {
180                let mut p = parent_path.clone();
181                p.push(CommentPathSegment::Index(item_index));
182                p
183            };
184            // Drop frames deeper than this sequence; the new item starts
185            // a fresh nested context.
186            while stack.len() > frame_idx + 1 {
187                stack.pop();
188            }
189
190            // Detach a possible trailing comment on the item line.
191            let after_dash_full = if trimmed == "-" { "" } else { &trimmed[2..] };
192            let (after_dash, trailing_comment) = split_trailing_comment(after_dash_full);
193            let after_dash_trimmed = after_dash.trim_start();
194            let inline_indent_offset = indent + 2 + (after_dash.len() - after_dash_trimmed.len());
195
196            if after_dash_trimmed.is_empty() {
197                // No inline value. Children, if any, will appear on the
198                // following lines with indent > this line's indent. Push a
199                // placeholder frame so when those children arrive, the
200                // sequence-item frame is already on the stack.
201                //
202                // We push a frame with indent = indent + 2; the actual
203                // child kind/indent gets resolved when the next non-empty
204                // line arrives.
205                stack.push(Frame {
206                    indent: indent + 2,
207                    path: item_path,
208                    kind: None,
209                    child_count: 0,
210                });
211            } else if split_key(after_dash_trimmed).is_some() {
212                // Inline mapping start (`- key: ...`). The key is the first
213                // child of an implicit mapping whose siblings sit at the
214                // same column as the key.
215                stack.push(Frame {
216                    indent: inline_indent_offset,
217                    path: item_path,
218                    kind: Some(FrameKind::Mapping),
219                    child_count: 1,
220                });
221            }
222            // Otherwise: inline scalar value, no further nesting.
223
224            // Rebuild the line with the trailing comment stripped, and
225            // capture it as a NestedComment that lands after this item.
226            if let Some(c) = trailing_comment {
227                let stripped = c.trim_start_matches('#');
228                let text = stripped.strip_prefix(' ').unwrap_or(stripped);
229                out.nested_comments.push(NestedComment {
230                    container_path: parent_path,
231                    position: item_index + 1,
232                    text: text.to_string(),
233                });
234                let head = format!("{:width$}", "", width = indent);
235                let body = if after_dash.trim_end().is_empty() {
236                    "-".to_string()
237                } else {
238                    format!("- {}", after_dash.trim_end())
239                };
240                cleaned_lines.push(format!("{}{}", head, body));
241            } else {
242                cleaned_lines.push(line.to_string());
243            }
244            continue;
245        }
246
247        // Case 3: top-level field line with possible `!fill` tag and/or
248        // trailing comment. Top-level only — `is_top_level` mirrors the
249        // pre-existing semantics.
250        let is_top_level = indent == 0;
251        if is_top_level {
252            if let Some((key, after_colon)) = split_key(line) {
253                let (value_part, trailing_comment) = split_trailing_comment(&after_colon);
254
255                let (fill, value_without_tag, had_non_fill_tag, fill_target_err) =
256                    inspect_fill_and_tags(&value_part, &key);
257
258                if had_non_fill_tag {
259                    out.warnings.push(
260                        Diagnostic::new(
261                            Severity::Warning,
262                            format!(
263                                "YAML tag on key `{}` is not supported; the tag has been dropped and the value kept",
264                                key
265                            ),
266                        )
267                        .with_code("parse::unsupported_yaml_tag".to_string()),
268                    );
269                }
270                if let Some(err) = fill_target_err {
271                    out.fill_target_errors.push(err);
272                }
273
274                out.items.push(PreItem::Field {
275                    key: key.clone(),
276                    fill,
277                });
278
279                // Update the structural stack for this top-level key.
280                // The root frame is at index 0; children appear at indent 0.
281                let root = &mut stack[0];
282                root.child_count += 1;
283                let key_path = vec![CommentPathSegment::Key(key.clone())];
284
285                // Pop everything but the root.
286                while stack.len() > 1 {
287                    stack.pop();
288                }
289
290                // If the value is empty (block style: `key:` followed by
291                // indented children), push a frame so nested comments can
292                // be attached. Otherwise (inline scalar/flow), no nested
293                // children come from this key.
294                if has_empty_inline_value(&value_without_tag) {
295                    stack.push(Frame {
296                        indent: 2,
297                        path: key_path,
298                        kind: None,
299                        child_count: 0,
300                    });
301                }
302
303                // Rebuild the line without the `!fill` tag (and without
304                // the trailing comment, since that goes on its own
305                // line now).
306                let cleaned = format!("{}:{}", key, value_without_tag);
307                cleaned_lines.push(cleaned);
308
309                if let Some(c) = trailing_comment {
310                    let stripped = c.trim_start_matches('#');
311                    let text = stripped.strip_prefix(' ').unwrap_or(stripped);
312                    out.items.push(PreItem::Comment(text.to_string()));
313                }
314
315                continue;
316            }
317        }
318
319        // Case 4: nested key line (`key:` or `key: value`) inside a block
320        // mapping. We recognise simple `key:` patterns; unusual forms fall
321        // through to verbatim pass-through.
322        if let Some((key, after_colon)) = split_key(trimmed) {
323            // The frame at this indent must be a mapping.
324            let frame_idx = ensure_frame_at_indent(&mut stack, indent, FrameKind::Mapping);
325            let frame = &mut stack[frame_idx];
326            let key_index = frame.child_count;
327            frame.child_count += 1;
328            let parent_path: Vec<CommentPathSegment> = frame.path.clone();
329            let key_path: Vec<CommentPathSegment> = {
330                let mut p = parent_path.clone();
331                p.push(CommentPathSegment::Key(key.clone()));
332                p
333            };
334            // Drop frames deeper than this mapping; siblings reset nesting.
335            while stack.len() > frame_idx + 1 {
336                stack.pop();
337            }
338
339            // Detach a possible trailing comment on the line. We keep the
340            // value (sans comment) in the cleaned YAML and capture the
341            // comment as a NestedComment that lands after this entry.
342            let (value_part, trailing_comment) = split_trailing_comment(&after_colon);
343            if let Some(c) = trailing_comment {
344                let stripped = c.trim_start_matches('#');
345                let text = stripped.strip_prefix(' ').unwrap_or(stripped);
346                out.nested_comments.push(NestedComment {
347                    container_path: parent_path,
348                    position: key_index + 1,
349                    text: text.to_string(),
350                });
351                let head = format!("{:width$}", "", width = indent);
352                cleaned_lines.push(format!("{}{}:{}", head, key, value_part));
353            } else {
354                cleaned_lines.push(line.to_string());
355            }
356
357            // If the value is empty (block style) push a frame for nested
358            // children at indent + 2.
359            if has_empty_inline_value(&after_colon) {
360                stack.push(Frame {
361                    indent: indent + 2,
362                    path: key_path,
363                    kind: None,
364                    child_count: 0,
365                });
366            }
367            continue;
368        }
369
370        // Everything else: pass through verbatim.
371        cleaned_lines.push(line.to_string());
372    }
373
374    out.cleaned_yaml = cleaned_lines.join("\n");
375    out
376}
377
378/// Ensure the deepest frame on the stack matches the given `indent` and
379/// kind, pushing a new frame if necessary. Returns the index of the matched
380/// or freshly-pushed frame.
381fn ensure_frame_at_indent(stack: &mut Vec<Frame>, indent: usize, kind: FrameKind) -> usize {
382    // After dedent popping, the top frame has `indent <= indent`. If it
383    // matches exactly, claim it. Otherwise, push a new child frame under
384    // it that has the requested indent.
385    let top_idx = stack.len() - 1;
386    let top = &mut stack[top_idx];
387
388    if top.indent == indent {
389        if top.kind.is_none() {
390            top.kind = Some(kind);
391        }
392        return top_idx;
393    }
394
395    // The top frame is shallower (its indent < indent). Push a new frame
396    // at this indent, parented under the top frame. The new frame's path
397    // is a continuation: for a sequence at deeper indent under a mapping,
398    // the path is the same as the parent's `path` (because the sequence
399    // is the value of the parent's most recent key).
400    //
401    // Concretely, when we encounter `- foo` at indent 2 and the stack top
402    // is the root mapping with indent 0, the parent frame's most-recent
403    // child path was already pushed when we saw `key:` in case 3 (we
404    // pushed a placeholder frame at indent 2 with `path = [Key(key)]` and
405    // unknown kind). So usually we won't reach this branch — the
406    // placeholder is already there. This branch is a safety net for
407    // unusual layouts.
408    let parent_path = top.path.clone();
409    stack.push(Frame {
410        indent,
411        path: parent_path,
412        kind: Some(kind),
413        child_count: 0,
414    });
415    stack.len() - 1
416}
417
418/// Number of leading ASCII spaces. Tabs are not expanded; they don't appear
419/// in canonical Quillmark YAML and would be a separate problem.
420fn leading_space_count(line: &str) -> usize {
421    line.bytes().take_while(|b| *b == b' ').count()
422}
423
424/// `true` when the value portion of a `key:` line is empty (after trimming
425/// whitespace). Trailing comments are ignored. An empty value means the
426/// real value is on subsequent indented lines (block mapping or sequence).
427fn has_empty_inline_value(after_colon: &str) -> bool {
428    let (v, _) = split_trailing_comment(after_colon);
429    v.trim().is_empty()
430}
431
432/// Split a line into `(key, rest_after_colon)`. Returns `None` if the line
433/// does not start with a bare YAML key.
434fn split_key(line: &str) -> Option<(String, String)> {
435    // Identifier-like keys only. YAML allows more, but Quillmark's schema
436    // restricts field names to `[a-zA-Z_][a-zA-Z0-9_]*` (and reserved
437    // uppercase sentinels). Anything more exotic falls through to the
438    // unmodified path and will be parsed (or rejected) by serde_saphyr.
439    let bytes = line.as_bytes();
440    if bytes.is_empty() {
441        return None;
442    }
443    if !(bytes[0].is_ascii_alphabetic() || bytes[0] == b'_') {
444        return None;
445    }
446    let mut i = 1;
447    while i < bytes.len() && (bytes[i].is_ascii_alphanumeric() || bytes[i] == b'_') {
448        i += 1;
449    }
450    if i >= bytes.len() || bytes[i] != b':' {
451        return None;
452    }
453    let key = line[..i].to_string();
454    let rest = line[i + 1..].to_string();
455    Some((key, rest))
456}
457
458/// Split a value string into `(value, trailing_comment)`.
459///
460/// Trailing comments begin with ` #` or `\t#` outside of any quoted string.
461/// This is a simple scanner: it respects `"..."` and `'...'` quoting.
462fn split_trailing_comment(value: &str) -> (String, Option<String>) {
463    let bytes = value.as_bytes();
464    let mut i = 0;
465    let mut prev_was_ws = true; // allow `key:#` edge case to NOT be a comment
466    let mut in_dq = false;
467    let mut in_sq = false;
468    while i < bytes.len() {
469        let b = bytes[i];
470        if in_dq {
471            if b == b'\\' && i + 1 < bytes.len() {
472                i += 2;
473                continue;
474            }
475            if b == b'"' {
476                in_dq = false;
477            }
478        } else if in_sq {
479            if b == b'\'' {
480                in_sq = false;
481            }
482        } else {
483            if b == b'"' {
484                in_dq = true;
485            } else if b == b'\'' {
486                in_sq = true;
487            } else if b == b'#' && prev_was_ws {
488                let v = value[..i].trim_end().to_string();
489                let c = value[i..].to_string();
490                return (v, Some(c));
491            }
492        }
493        prev_was_ws = matches!(b, b' ' | b'\t');
494        i += 1;
495    }
496    (value.to_string(), None)
497}
498
499/// Inspect the value portion of a field line for `!fill` and other tags.
500///
501/// Returns `(fill, value_without_tag, had_other_tag, fill_target_err)`.
502///
503/// - `fill`: `true` when the value starts with `!fill`.
504/// - `value_without_tag`: the same text with the `!fill` tag stripped;
505///   leading whitespace is preserved so YAML parsing still sees a clean
506///   scalar.
507/// - `had_other_tag`: `true` when a non-`!fill` `!tag` was found at the
508///   start of the value. The tag is *not* stripped (serde_saphyr tolerates
509///   and drops unknown tags), so callers get a warning only.
510/// - `fill_target_err`: populated when `!fill` is applied to a mapping
511///   (flow `{...}` or block form). `!fill` on mappings is rejected because
512///   top-level `type: object` is not a supported schema type in Quillmark;
513///   `!fill` on scalars and sequences is allowed.
514fn inspect_fill_and_tags(value: &str, key: &str) -> (bool, String, bool, Option<String>) {
515    let trimmed = value.trim_start();
516    let leading_ws_len = value.len() - trimmed.len();
517
518    // Exactly empty / null (e.g. `key:` with nothing) — not a fill target.
519    if trimmed.is_empty() {
520        return (false, value.to_string(), false, None);
521    }
522
523    // `!fill` alone on the line (bare tag, no value) → placeholder. The
524    // value may be null (no continuation) or a block sequence on the
525    // following indented lines. serde_saphyr produces the actual value.
526    if trimmed == "!fill" {
527        // Replace the tag with nothing; leave the leading whitespace so the
528        // line shape is preserved (serde_saphyr treats `key: ` as null,
529        // and if a block sequence follows on indented lines, it parses as
530        // a sequence).
531        let reconstructed = value[..leading_ws_len].to_string();
532        return (true, reconstructed, false, None);
533    }
534
535    // `!fill <value>` → strip tag, record fill=true.
536    if let Some(rest) = trimmed.strip_prefix("!fill") {
537        // Must be followed by whitespace or end-of-value to count; otherwise
538        // it's `!fillwhatever` which is a non-`!fill` tag.
539        if rest.starts_with(' ') || rest.starts_with('\t') || rest.is_empty() {
540            let rest_trim = rest.trim_start();
541            // Reject flow-mappings (`!fill {...}`); top-level `type: object`
542            // isn't supported by the schema. Flow sequences (`!fill [...]`)
543            // and scalars are allowed.
544            let err = if rest_trim.starts_with('{') {
545                Some(format!(
546                    "`!fill` on key `{}` targets a mapping; `!fill` is supported on scalars and sequences only",
547                    key
548                ))
549            } else {
550                None
551            };
552            // Reconstruct: one space + the rest (trimmed) so the cleaned
553            // text reads `key: rest`.
554            let reconstructed = if rest_trim.is_empty() {
555                value[..leading_ws_len].to_string()
556            } else {
557                format!(" {}", rest_trim)
558            };
559            return (true, reconstructed, false, err);
560        }
561    }
562
563    // Any other `!tag` prefix is a non-fill custom tag. Leave the value
564    // alone; serde_saphyr will strip the tag.
565    if trimmed.starts_with('!') {
566        return (false, value.to_string(), true, None);
567    }
568
569    (false, value.to_string(), false, None)
570}
571
572#[cfg(test)]
573mod tests {
574    use super::*;
575
576    #[test]
577    fn extracts_own_line_comments() {
578        let input = "# top\ntitle: foo\n# mid\nauthor: bar\n";
579        let out = prescan_fence_content(input);
580        assert_eq!(
581            out.items,
582            vec![
583                PreItem::Comment("top".to_string()),
584                PreItem::Field {
585                    key: "title".to_string(),
586                    fill: false,
587                },
588                PreItem::Comment("mid".to_string()),
589                PreItem::Field {
590                    key: "author".to_string(),
591                    fill: false,
592                },
593            ]
594        );
595        assert!(out.nested_comments.is_empty());
596    }
597
598    #[test]
599    fn splits_trailing_comments() {
600        let input = "title: foo # inline\n";
601        let out = prescan_fence_content(input);
602        assert_eq!(
603            out.items,
604            vec![
605                PreItem::Field {
606                    key: "title".to_string(),
607                    fill: false,
608                },
609                PreItem::Comment("inline".to_string()),
610            ]
611        );
612        assert!(out.cleaned_yaml.contains("title: foo"));
613        assert!(!out.cleaned_yaml.contains("inline"));
614    }
615
616    #[test]
617    fn detects_fill_on_scalar() {
618        let input = "dept: !fill Department\n";
619        let out = prescan_fence_content(input);
620        assert_eq!(
621            out.items,
622            vec![PreItem::Field {
623                key: "dept".to_string(),
624                fill: true,
625            }]
626        );
627        assert!(out.cleaned_yaml.contains("dept: Department"));
628        assert!(!out.cleaned_yaml.contains("!fill"));
629    }
630
631    #[test]
632    fn detects_bare_fill() {
633        let input = "dept: !fill\n";
634        let out = prescan_fence_content(input);
635        assert_eq!(
636            out.items,
637            vec![PreItem::Field {
638                key: "dept".to_string(),
639                fill: true,
640            }]
641        );
642        assert!(!out.cleaned_yaml.contains("!fill"));
643    }
644
645    #[test]
646    fn unknown_tag_warns() {
647        let input = "x: !custom value\n";
648        let out = prescan_fence_content(input);
649        assert!(
650            out.warnings
651                .iter()
652                .any(|w| w.code.as_deref() == Some("parse::unsupported_yaml_tag")),
653            "expected unsupported_yaml_tag warning"
654        );
655    }
656
657    #[test]
658    fn nested_comment_in_sequence_captured() {
659        let input = "arr:\n  # before-first\n  - a\n  # between\n  - b\n  # after-last\n";
660        let out = prescan_fence_content(input);
661        assert_eq!(
662            out.nested_comments,
663            vec![
664                NestedComment {
665                    container_path: vec![CommentPathSegment::Key("arr".to_string())],
666                    position: 0,
667                    text: "before-first".to_string(),
668                },
669                NestedComment {
670                    container_path: vec![CommentPathSegment::Key("arr".to_string())],
671                    position: 1,
672                    text: "between".to_string(),
673                },
674                NestedComment {
675                    container_path: vec![CommentPathSegment::Key("arr".to_string())],
676                    position: 2,
677                    text: "after-last".to_string(),
678                },
679            ]
680        );
681        assert!(
682            !out.warnings
683                .iter()
684                .any(|w| w.code.as_deref() == Some("parse::comments_in_nested_yaml_dropped")),
685            "no dropped-comment warning expected; nested comments are now preserved"
686        );
687    }
688
689    #[test]
690    fn nested_comment_in_mapping_captured() {
691        let input = "outer:\n  # comment\n  inner: 1\n";
692        let out = prescan_fence_content(input);
693        assert_eq!(
694            out.nested_comments,
695            vec![NestedComment {
696                container_path: vec![CommentPathSegment::Key("outer".to_string())],
697                position: 0,
698                text: "comment".to_string(),
699            }]
700        );
701    }
702
703    #[test]
704    fn deep_nested_comment_path() {
705        let input = "outer:\n  inner:\n    # deep\n    leaf: 1\n";
706        let out = prescan_fence_content(input);
707        assert_eq!(
708            out.nested_comments,
709            vec![NestedComment {
710                container_path: vec![
711                    CommentPathSegment::Key("outer".to_string()),
712                    CommentPathSegment::Key("inner".to_string()),
713                ],
714                position: 0,
715                text: "deep".to_string(),
716            }]
717        );
718    }
719
720    #[test]
721    fn comment_inside_seq_of_maps() {
722        // Each sequence item is a mapping. A comment between keys of the
723        // first item belongs to that item's mapping.
724        let input = "items:\n  - name: a\n    # inside-first\n    val: 1\n  - name: b\n";
725        let out = prescan_fence_content(input);
726        assert_eq!(
727            out.nested_comments,
728            vec![NestedComment {
729                container_path: vec![
730                    CommentPathSegment::Key("items".to_string()),
731                    CommentPathSegment::Index(0),
732                ],
733                position: 1,
734                text: "inside-first".to_string(),
735            }]
736        );
737    }
738
739    #[test]
740    fn fill_on_flow_sequence_allowed() {
741        let input = "x: !fill [1, 2]\n";
742        let out = prescan_fence_content(input);
743        assert!(
744            out.fill_target_errors.is_empty(),
745            "expected no error; !fill on sequences is supported"
746        );
747        assert_eq!(
748            out.items,
749            vec![PreItem::Field {
750                key: "x".to_string(),
751                fill: true,
752            }]
753        );
754    }
755
756    #[test]
757    fn fill_on_flow_mapping_errors() {
758        let input = "x: !fill {a: 1}\n";
759        let out = prescan_fence_content(input);
760        assert!(
761            !out.fill_target_errors.is_empty(),
762            "expected error; !fill on mappings is rejected"
763        );
764    }
765}