Skip to main content

quillmark_core/document/
prescan.rs

1//! Pre-scan of a metadata fence's YAML content to recover features that
2//! serde_saphyr discards.
3//!
4//! Three features are recovered here:
5//!
6//! 1. **Top-level comments.** YAML comments are dropped by the YAML parser.
7//!    To round-trip them as [`super::FrontmatterItem::Comment`], we extract them
8//!    before parsing.
9//!
10//! 2. **Nested comments.** Comments inside block mappings/sequences are
11//!    captured with their structural path (sequence of keys/indices) and an
12//!    ordinal indicating where in the container they sit. The emitter
13//!    re-injects them at the matching position. See [`NestedComment`].
14//!
15//! 3. **`!fill` tags.** Custom YAML tags are accepted and dropped by
16//!    serde_saphyr; the value survives but the tag annotation is lost. We
17//!    detect `!fill` on top-level scalar fields, strip the tag from the
18//!    cleaned YAML (so serde_saphyr sees a plain scalar), and record a
19//!    `fill: true` marker on the resulting `Field` item.
20//!
21//! Other custom tags (`!include`, `!env`, …) are stripped with a
22//! `parse::unsupported_yaml_tag` warning.
23
24use crate::Diagnostic;
25use crate::Severity;
26
27/// One ordered hint extracted from the fence body.
28///
29/// `Comment` stands alone; `Field` captures only the `fill` flag because the
30/// value is produced by serde_saphyr parsing the cleaned text. The matching
31/// YAML key is the lookup key into the parsed map.
32///
33/// `Comment.inline` distinguishes own-line comments (`# text` on a line by
34/// itself) from inline trailing comments (`field: value # text`). Inline
35/// top-level comments always immediately follow their host `Field` in the
36/// item stream; the emitter peeks ahead by one slot to attach them.
37#[derive(Debug, Clone, PartialEq)]
38pub enum PreItem {
39    Field { key: String, fill: bool },
40    Comment { text: String, inline: bool },
41}
42
43/// One segment of a path into the parsed YAML structure.
44#[derive(Debug, Clone, PartialEq, Eq)]
45pub enum CommentPathSegment {
46    Key(String),
47    Index(usize),
48}
49
50/// A comment that appears inside a nested mapping or sequence.
51///
52/// `container_path` locates the immediate parent container.
53///
54/// Position semantics depend on `inline`:
55/// - **Own-line (`inline = false`)**: `position` is the slot ordinal within
56///   the container's child list, ranging `0..=child_count`. The comment is
57///   rendered before the child at this position. `position == child_count`
58///   means "after all children".
59/// - **Inline (`inline = true`)**: `position` is the host child's index,
60///   ranging `0..child_count`. The comment is attached to that child's
61///   trailing line. An inline comment whose host is missing at emit time
62///   (orphan) degrades to an own-line comment at the same indent.
63#[derive(Debug, Clone, PartialEq, Eq)]
64pub struct NestedComment {
65    pub container_path: Vec<CommentPathSegment>,
66    pub position: usize,
67    pub text: String,
68    pub inline: bool,
69}
70
71/// Output of [`prescan_fence_content`].
72#[derive(Debug, Clone, Default)]
73pub struct PreScan {
74    /// YAML text with `!fill` tags stripped and all comment lines removed.
75    /// Suitable for feeding into serde_saphyr.
76    pub cleaned_yaml: String,
77    /// Ordered items discovered at the top level — fields (with fill flags)
78    /// and own-line top-level comments, in source order.
79    pub items: Vec<PreItem>,
80    /// Comments inside nested containers, with structural paths.
81    pub nested_comments: Vec<NestedComment>,
82    /// Warnings produced during the scan.
83    pub warnings: Vec<Diagnostic>,
84    /// Unsupported-fill-target errors. The parser turns these into
85    /// `ParseError::InvalidStructure` rejections (`!fill` on mappings).
86    pub fill_target_errors: Vec<String>,
87}
88
89/// Tracks one open YAML container while scanning lines.
90#[derive(Debug)]
91struct Frame {
92    /// Indent (in columns) of children of this container.
93    indent: usize,
94    /// Path to this container from the fence root.
95    path: Vec<CommentPathSegment>,
96    /// Container kind. `None` until the first child line determines it.
97    kind: Option<FrameKind>,
98    /// Number of children seen so far.
99    child_count: usize,
100}
101
102#[derive(Debug, Clone, Copy, PartialEq, Eq)]
103enum FrameKind {
104    Mapping,
105    Sequence,
106}
107
108/// Scan the body of a YAML metadata fence.
109///
110/// `content` is the text between the opening and closing `---` markers
111/// (exclusive), with leading/trailing whitespace preserved.
112pub fn prescan_fence_content(content: &str) -> PreScan {
113    let mut out = PreScan::default();
114
115    // We operate on the raw text to preserve positions. `lines()` strips
116    // line endings; we rebuild with `\n` which is what serde_saphyr expects.
117    let lines: Vec<&str> = content.split('\n').collect();
118    let mut cleaned_lines: Vec<String> = Vec::with_capacity(lines.len());
119
120    // Stack of open containers. The root frame is the frontmatter mapping
121    // itself; children appear at indent 0.
122    let mut stack: Vec<Frame> = vec![Frame {
123        indent: 0,
124        path: Vec::new(),
125        kind: Some(FrameKind::Mapping),
126        child_count: 0,
127    }];
128
129    for raw_line in &lines {
130        let line = *raw_line;
131        let indent = leading_space_count(line);
132        let trimmed = &line[indent..];
133
134        // Skip blank lines (no structural meaning, no comment).
135        if trimmed.is_empty() {
136            cleaned_lines.push(line.to_string());
137            continue;
138        }
139
140        // Pop frames that this line has dedented out of. A line at indent
141        // `indent` belongs to the deepest frame whose `indent <= indent`.
142        // (Equality means the line is a child at this frame's level.)
143        while let Some(frame) = stack.last() {
144            if frame.indent > indent {
145                stack.pop();
146            } else {
147                break;
148            }
149        }
150
151        // Case 1: own-line comment.
152        if trimmed.starts_with('#') {
153            let text = strip_comment_marker(trimmed);
154
155            // Determine the deepest frame that contains this line.
156            // For a comment at indent N, the containing frame is the one
157            // with the largest indent <= N. The stack is ordered shallow
158            // to deep; the last frame is the deepest. After the dedent
159            // pop above, the top frame's indent is <= indent, which is
160            // what we want.
161            let frame = stack.last().expect("root frame always present");
162
163            if frame.path.is_empty() {
164                // Top-level comment — preserve via PreItem::Comment.
165                out.items.push(PreItem::Comment {
166                    text: text.to_string(),
167                    inline: false,
168                });
169            } else {
170                out.nested_comments.push(NestedComment {
171                    container_path: frame.path.clone(),
172                    position: frame.child_count,
173                    text: text.to_string(),
174                    inline: false,
175                });
176            }
177            // Don't emit the line into the cleaned YAML — serde_saphyr
178            // ignores comments either way, but omitting the line avoids
179            // ambiguity with `!fill` rewriting.
180            continue;
181        }
182
183        // Case 2: sequence item line (`- ...`).
184        if trimmed == "-" || trimmed.starts_with("- ") {
185            // The frame at this indent must be a sequence. If the deepest
186            // frame's indent matches this line's indent, claim it; if it
187            // doesn't, push a fresh sequence frame at this indent under
188            // the deepest container.
189            let frame_idx = ensure_frame_at_indent(&mut stack, indent, FrameKind::Sequence);
190            let frame = &mut stack[frame_idx];
191            let item_index = frame.child_count;
192            frame.child_count += 1;
193            let parent_path: Vec<CommentPathSegment> = frame.path.clone();
194            // Snapshot the item path before borrowing mutably again below.
195            let item_path: Vec<CommentPathSegment> = {
196                let mut p = parent_path.clone();
197                p.push(CommentPathSegment::Index(item_index));
198                p
199            };
200            // Drop frames deeper than this sequence; the new item starts
201            // a fresh nested context.
202            while stack.len() > frame_idx + 1 {
203                stack.pop();
204            }
205
206            // Detach a possible trailing comment on the item line.
207            let after_dash_full = if trimmed == "-" { "" } else { &trimmed[2..] };
208            let (after_dash, trailing_comment) = split_trailing_comment(after_dash_full);
209            let after_dash_trimmed = after_dash.trim_start();
210            let inline_indent_offset = indent + 2 + (after_dash.len() - after_dash_trimmed.len());
211
212            if after_dash_trimmed.is_empty() {
213                // No inline value. Children, if any, will appear on the
214                // following lines with indent > this line's indent. Push a
215                // placeholder frame so when those children arrive, the
216                // sequence-item frame is already on the stack.
217                //
218                // We push a frame with indent = indent + 2; the actual
219                // child kind/indent gets resolved when the next non-empty
220                // line arrives.
221                stack.push(Frame {
222                    indent: indent + 2,
223                    path: item_path,
224                    kind: None,
225                    child_count: 0,
226                });
227            } else if split_key(after_dash_trimmed).is_some() {
228                // Inline mapping start (`- key: ...`). The key is the first
229                // child of an implicit mapping whose siblings sit at the
230                // same column as the key.
231                stack.push(Frame {
232                    indent: inline_indent_offset,
233                    path: item_path,
234                    kind: Some(FrameKind::Mapping),
235                    child_count: 1,
236                });
237            }
238            // Otherwise: inline scalar value, no further nesting.
239
240            // Rebuild the line with the trailing comment stripped, and
241            // capture it as an inline NestedComment attached to this item.
242            if let Some(c) = trailing_comment {
243                out.nested_comments.push(NestedComment {
244                    container_path: parent_path,
245                    position: item_index,
246                    text: strip_comment_marker(&c).to_string(),
247                    inline: true,
248                });
249                let head = format!("{:width$}", "", width = indent);
250                let body = if after_dash.trim_end().is_empty() {
251                    "-".to_string()
252                } else {
253                    format!("- {}", after_dash.trim_end())
254                };
255                cleaned_lines.push(format!("{}{}", head, body));
256            } else {
257                cleaned_lines.push(line.to_string());
258            }
259            continue;
260        }
261
262        // Case 3: top-level field line with possible `!fill` tag and/or
263        // trailing comment. Top-level only — `is_top_level` mirrors the
264        // pre-existing semantics.
265        let is_top_level = indent == 0;
266        if is_top_level {
267            if let Some((key, after_colon)) = split_key(line) {
268                let (value_part, trailing_comment) = split_trailing_comment(&after_colon);
269
270                let (fill, value_without_tag, had_non_fill_tag, fill_target_err) =
271                    inspect_fill_and_tags(&value_part, &key);
272
273                if had_non_fill_tag {
274                    out.warnings.push(
275                        Diagnostic::new(
276                            Severity::Warning,
277                            format!(
278                                "YAML tag on key `{}` is not supported; the tag has been dropped and the value kept",
279                                key
280                            ),
281                        )
282                        .with_code("parse::unsupported_yaml_tag".to_string()),
283                    );
284                }
285                if let Some(err) = fill_target_err {
286                    out.fill_target_errors.push(err);
287                }
288
289                out.items.push(PreItem::Field {
290                    key: key.clone(),
291                    fill,
292                });
293
294                // Update the structural stack for this top-level key.
295                // The root frame is at index 0; children appear at indent 0.
296                let root = &mut stack[0];
297                root.child_count += 1;
298                let key_path = vec![CommentPathSegment::Key(key.clone())];
299
300                // Pop everything but the root.
301                while stack.len() > 1 {
302                    stack.pop();
303                }
304
305                // If the value is empty (block style: `key:` followed by
306                // indented children), push a frame so nested comments can
307                // be attached. Otherwise (inline scalar/flow), no nested
308                // children come from this key.
309                if has_empty_inline_value(&value_without_tag) {
310                    stack.push(Frame {
311                        indent: 2,
312                        path: key_path,
313                        kind: None,
314                        child_count: 0,
315                    });
316                }
317
318                // Rebuild the line without the `!fill` tag (and without
319                // the trailing comment, since that goes on its own
320                // line now).
321                let cleaned = format!("{}:{}", key, value_without_tag);
322                cleaned_lines.push(cleaned);
323
324                if let Some(c) = trailing_comment {
325                    out.items.push(PreItem::Comment {
326                        text: strip_comment_marker(&c).to_string(),
327                        inline: true,
328                    });
329                }
330
331                continue;
332            }
333        }
334
335        // Case 4: nested key line (`key:` or `key: value`) inside a block
336        // mapping. We recognise simple `key:` patterns; unusual forms fall
337        // through to verbatim pass-through.
338        if let Some((key, after_colon)) = split_key(trimmed) {
339            // The frame at this indent must be a mapping.
340            let frame_idx = ensure_frame_at_indent(&mut stack, indent, FrameKind::Mapping);
341            let frame = &mut stack[frame_idx];
342            let key_index = frame.child_count;
343            frame.child_count += 1;
344            let parent_path: Vec<CommentPathSegment> = frame.path.clone();
345            let key_path: Vec<CommentPathSegment> = {
346                let mut p = parent_path.clone();
347                p.push(CommentPathSegment::Key(key.clone()));
348                p
349            };
350            // Drop frames deeper than this mapping; siblings reset nesting.
351            while stack.len() > frame_idx + 1 {
352                stack.pop();
353            }
354
355            // Detach a possible trailing comment on the line. We keep the
356            // value (sans comment) in the cleaned YAML and capture the
357            // comment as an inline NestedComment attached to this key.
358            let (value_part, trailing_comment) = split_trailing_comment(&after_colon);
359            if let Some(c) = trailing_comment {
360                out.nested_comments.push(NestedComment {
361                    container_path: parent_path,
362                    position: key_index,
363                    text: strip_comment_marker(&c).to_string(),
364                    inline: true,
365                });
366                let head = format!("{:width$}", "", width = indent);
367                cleaned_lines.push(format!("{}{}:{}", head, key, value_part));
368            } else {
369                cleaned_lines.push(line.to_string());
370            }
371
372            // If the value is empty (block style) push a frame for nested
373            // children at indent + 2.
374            if has_empty_inline_value(&after_colon) {
375                stack.push(Frame {
376                    indent: indent + 2,
377                    path: key_path,
378                    kind: None,
379                    child_count: 0,
380                });
381            }
382            continue;
383        }
384
385        // Everything else: pass through verbatim.
386        cleaned_lines.push(line.to_string());
387    }
388
389    out.cleaned_yaml = cleaned_lines.join("\n");
390    out
391}
392
393/// Ensure the deepest frame on the stack matches the given `indent` and
394/// kind, pushing a new frame if necessary. Returns the index of the matched
395/// or freshly-pushed frame.
396fn ensure_frame_at_indent(stack: &mut Vec<Frame>, indent: usize, kind: FrameKind) -> usize {
397    // After dedent popping, the top frame has `indent <= indent`. If it
398    // matches exactly, claim it. Otherwise, push a new child frame under
399    // it that has the requested indent.
400    let top_idx = stack.len() - 1;
401    let top = &mut stack[top_idx];
402
403    if top.indent == indent {
404        if top.kind.is_none() {
405            top.kind = Some(kind);
406        }
407        return top_idx;
408    }
409
410    // The top frame is shallower (its indent < indent). Push a new frame
411    // at this indent, parented under the top frame. The new frame's path
412    // is a continuation: for a sequence at deeper indent under a mapping,
413    // the path is the same as the parent's `path` (because the sequence
414    // is the value of the parent's most recent key).
415    //
416    // Concretely, when we encounter `- foo` at indent 2 and the stack top
417    // is the root mapping with indent 0, the parent frame's most-recent
418    // child path was already pushed when we saw `key:` in case 3 (we
419    // pushed a placeholder frame at indent 2 with `path = [Key(key)]` and
420    // unknown kind). So usually we won't reach this branch — the
421    // placeholder is already there. This branch is a safety net for
422    // unusual layouts.
423    let parent_path = top.path.clone();
424    stack.push(Frame {
425        indent,
426        path: parent_path,
427        kind: Some(kind),
428        child_count: 0,
429    });
430    stack.len() - 1
431}
432
433/// Strip a YAML comment marker (`# `) from the start of a string.
434///
435/// Strips all leading `#` characters, then one optional space.
436fn strip_comment_marker(raw: &str) -> &str {
437    let after = raw.trim_start_matches('#');
438    after.strip_prefix(' ').unwrap_or(after)
439}
440
441/// Number of leading ASCII spaces. Tabs are not expanded; they don't appear
442/// in canonical Quillmark YAML and would be a separate problem.
443fn leading_space_count(line: &str) -> usize {
444    line.bytes().take_while(|b| *b == b' ').count()
445}
446
447/// `true` when the value portion of a `key:` line is empty (after trimming
448/// whitespace). Trailing comments are ignored. An empty value means the
449/// real value is on subsequent indented lines (block mapping or sequence).
450fn has_empty_inline_value(after_colon: &str) -> bool {
451    let (v, _) = split_trailing_comment(after_colon);
452    v.trim().is_empty()
453}
454
455/// Split a line into `(key, rest_after_colon)`. Returns `None` if the line
456/// does not start with a bare YAML key.
457fn split_key(line: &str) -> Option<(String, String)> {
458    // Identifier-like keys only. YAML allows more, but Quillmark's schema
459    // restricts field names to `[a-zA-Z_][a-zA-Z0-9_]*` (and reserved
460    // uppercase sentinels). Anything more exotic falls through to the
461    // unmodified path and will be parsed (or rejected) by serde_saphyr.
462    let bytes = line.as_bytes();
463    if bytes.is_empty() {
464        return None;
465    }
466    if !(bytes[0].is_ascii_alphabetic() || bytes[0] == b'_') {
467        return None;
468    }
469    let mut i = 1;
470    while i < bytes.len() && (bytes[i].is_ascii_alphanumeric() || bytes[i] == b'_') {
471        i += 1;
472    }
473    if i >= bytes.len() || bytes[i] != b':' {
474        return None;
475    }
476    let key = line[..i].to_string();
477    let rest = line[i + 1..].to_string();
478    Some((key, rest))
479}
480
481/// Split a value string into `(value, trailing_comment)`.
482///
483/// Trailing comments begin with ` #` or `\t#` outside of any quoted string.
484/// This is a simple scanner: it respects `"..."` and `'...'` quoting.
485fn split_trailing_comment(value: &str) -> (String, Option<String>) {
486    let bytes = value.as_bytes();
487    let mut i = 0;
488    let mut prev_was_ws = true; // allow `key:#` edge case to NOT be a comment
489    let mut in_dq = false;
490    let mut in_sq = false;
491    while i < bytes.len() {
492        let b = bytes[i];
493        if in_dq {
494            if b == b'\\' && i + 1 < bytes.len() {
495                i += 2;
496                continue;
497            }
498            if b == b'"' {
499                in_dq = false;
500            }
501        } else if in_sq {
502            if b == b'\'' {
503                in_sq = false;
504            }
505        } else {
506            if b == b'"' {
507                in_dq = true;
508            } else if b == b'\'' {
509                in_sq = true;
510            } else if b == b'#' && prev_was_ws {
511                let v = value[..i].trim_end().to_string();
512                let c = value[i..].to_string();
513                return (v, Some(c));
514            }
515        }
516        prev_was_ws = matches!(b, b' ' | b'\t');
517        i += 1;
518    }
519    (value.to_string(), None)
520}
521
522/// Inspect the value portion of a field line for `!fill` and other tags.
523///
524/// Returns `(fill, value_without_tag, had_other_tag, fill_target_err)`.
525///
526/// - `fill`: `true` when the value starts with `!fill`.
527/// - `value_without_tag`: the same text with the `!fill` tag stripped;
528///   leading whitespace is preserved so YAML parsing still sees a clean
529///   scalar.
530/// - `had_other_tag`: `true` when a non-`!fill` `!tag` was found at the
531///   start of the value. The tag is *not* stripped (serde_saphyr tolerates
532///   and drops unknown tags), so callers get a warning only.
533/// - `fill_target_err`: populated when `!fill` is applied to a mapping
534///   (flow `{...}` or block form). `!fill` on mappings is rejected because
535///   top-level `type: object` is not a supported schema type in Quillmark;
536///   `!fill` on scalars and sequences is allowed.
537fn inspect_fill_and_tags(value: &str, key: &str) -> (bool, String, bool, Option<String>) {
538    let trimmed = value.trim_start();
539    let leading_ws_len = value.len() - trimmed.len();
540
541    // Exactly empty / null (e.g. `key:` with nothing) — not a fill target.
542    if trimmed.is_empty() {
543        return (false, value.to_string(), false, None);
544    }
545
546    // `!fill` alone on the line (bare tag, no value) → placeholder. The
547    // value may be null (no continuation) or a block sequence on the
548    // following indented lines. serde_saphyr produces the actual value.
549    if trimmed == "!fill" {
550        // Replace the tag with nothing; leave the leading whitespace so the
551        // line shape is preserved (serde_saphyr treats `key: ` as null,
552        // and if a block sequence follows on indented lines, it parses as
553        // a sequence).
554        let reconstructed = value[..leading_ws_len].to_string();
555        return (true, reconstructed, false, None);
556    }
557
558    // `!fill <value>` → strip tag, record fill=true.
559    if let Some(rest) = trimmed.strip_prefix("!fill") {
560        // Must be followed by whitespace or end-of-value to count; otherwise
561        // it's `!fillwhatever` which is a non-`!fill` tag.
562        if rest.starts_with(' ') || rest.starts_with('\t') || rest.is_empty() {
563            let rest_trim = rest.trim_start();
564            // Reject flow-mappings (`!fill {...}`); top-level `type: object`
565            // isn't supported by the schema. Flow sequences (`!fill [...]`)
566            // and scalars are allowed.
567            let err = if rest_trim.starts_with('{') {
568                Some(format!(
569                    "`!fill` on key `{}` targets a mapping; `!fill` is supported on scalars and sequences only",
570                    key
571                ))
572            } else {
573                None
574            };
575            // Reconstruct: one space + the rest (trimmed) so the cleaned
576            // text reads `key: rest`.
577            let reconstructed = if rest_trim.is_empty() {
578                value[..leading_ws_len].to_string()
579            } else {
580                format!(" {}", rest_trim)
581            };
582            return (true, reconstructed, false, err);
583        }
584    }
585
586    // Any other `!tag` prefix is a non-fill custom tag. Leave the value
587    // alone; serde_saphyr will strip the tag.
588    if trimmed.starts_with('!') {
589        return (false, value.to_string(), true, None);
590    }
591
592    (false, value.to_string(), false, None)
593}
594
595#[cfg(test)]
596mod tests {
597    use super::*;
598
599    #[test]
600    fn extracts_own_line_comments() {
601        let input = "# top\ntitle: foo\n# mid\nauthor: bar\n";
602        let out = prescan_fence_content(input);
603        assert_eq!(
604            out.items,
605            vec![
606                PreItem::Comment {
607                    text: "top".to_string(),
608                    inline: false,
609                },
610                PreItem::Field {
611                    key: "title".to_string(),
612                    fill: false,
613                },
614                PreItem::Comment {
615                    text: "mid".to_string(),
616                    inline: false,
617                },
618                PreItem::Field {
619                    key: "author".to_string(),
620                    fill: false,
621                },
622            ]
623        );
624        assert!(out.nested_comments.is_empty());
625    }
626
627    #[test]
628    fn splits_trailing_comments() {
629        let input = "title: foo # inline\n";
630        let out = prescan_fence_content(input);
631        assert_eq!(
632            out.items,
633            vec![
634                PreItem::Field {
635                    key: "title".to_string(),
636                    fill: false,
637                },
638                PreItem::Comment {
639                    text: "inline".to_string(),
640                    inline: true,
641                },
642            ]
643        );
644        assert!(out.cleaned_yaml.contains("title: foo"));
645        assert!(!out.cleaned_yaml.contains("inline"));
646    }
647
648    #[test]
649    fn detects_fill_on_scalar() {
650        let input = "dept: !fill Department\n";
651        let out = prescan_fence_content(input);
652        assert_eq!(
653            out.items,
654            vec![PreItem::Field {
655                key: "dept".to_string(),
656                fill: true,
657            }]
658        );
659        assert!(out.cleaned_yaml.contains("dept: Department"));
660        assert!(!out.cleaned_yaml.contains("!fill"));
661    }
662
663    #[test]
664    fn detects_bare_fill() {
665        let input = "dept: !fill\n";
666        let out = prescan_fence_content(input);
667        assert_eq!(
668            out.items,
669            vec![PreItem::Field {
670                key: "dept".to_string(),
671                fill: true,
672            }]
673        );
674        assert!(!out.cleaned_yaml.contains("!fill"));
675    }
676
677    #[test]
678    fn unknown_tag_warns() {
679        let input = "x: !custom value\n";
680        let out = prescan_fence_content(input);
681        assert!(
682            out.warnings
683                .iter()
684                .any(|w| w.code.as_deref() == Some("parse::unsupported_yaml_tag")),
685            "expected unsupported_yaml_tag warning"
686        );
687    }
688
689    #[test]
690    fn nested_comment_in_sequence_captured() {
691        let input = "arr:\n  # before-first\n  - a\n  # between\n  - b\n  # after-last\n";
692        let out = prescan_fence_content(input);
693        assert_eq!(
694            out.nested_comments,
695            vec![
696                NestedComment {
697                    container_path: vec![CommentPathSegment::Key("arr".to_string())],
698                    position: 0,
699                    text: "before-first".to_string(),
700                    inline: false,
701                },
702                NestedComment {
703                    container_path: vec![CommentPathSegment::Key("arr".to_string())],
704                    position: 1,
705                    text: "between".to_string(),
706                    inline: false,
707                },
708                NestedComment {
709                    container_path: vec![CommentPathSegment::Key("arr".to_string())],
710                    position: 2,
711                    text: "after-last".to_string(),
712                    inline: false,
713                },
714            ]
715        );
716        assert!(
717            !out.warnings
718                .iter()
719                .any(|w| w.code.as_deref() == Some("parse::comments_in_nested_yaml_dropped")),
720            "no dropped-comment warning expected; nested comments are now preserved"
721        );
722    }
723
724    #[test]
725    fn nested_comment_in_mapping_captured() {
726        let input = "outer:\n  # comment\n  inner: 1\n";
727        let out = prescan_fence_content(input);
728        assert_eq!(
729            out.nested_comments,
730            vec![NestedComment {
731                container_path: vec![CommentPathSegment::Key("outer".to_string())],
732                position: 0,
733                text: "comment".to_string(),
734                inline: false,
735            }]
736        );
737    }
738
739    #[test]
740    fn deep_nested_comment_path() {
741        let input = "outer:\n  inner:\n    # deep\n    leaf: 1\n";
742        let out = prescan_fence_content(input);
743        assert_eq!(
744            out.nested_comments,
745            vec![NestedComment {
746                container_path: vec![
747                    CommentPathSegment::Key("outer".to_string()),
748                    CommentPathSegment::Key("inner".to_string()),
749                ],
750                position: 0,
751                text: "deep".to_string(),
752                inline: false,
753            }]
754        );
755    }
756
757    #[test]
758    fn comment_inside_seq_of_maps() {
759        // Each sequence item is a mapping. A comment between keys of the
760        // first item belongs to that item's mapping.
761        let input = "items:\n  - name: a\n    # inside-first\n    val: 1\n  - name: b\n";
762        let out = prescan_fence_content(input);
763        assert_eq!(
764            out.nested_comments,
765            vec![NestedComment {
766                container_path: vec![
767                    CommentPathSegment::Key("items".to_string()),
768                    CommentPathSegment::Index(0),
769                ],
770                position: 1,
771                text: "inside-first".to_string(),
772                inline: false,
773            }]
774        );
775    }
776
777    #[test]
778    fn nested_inline_on_sequence_item() {
779        // `- a # tail` attaches an inline comment to item 0 (host index, not
780        // the slot after).
781        let input = "arr:\n  - a # tail\n  - b\n";
782        let out = prescan_fence_content(input);
783        assert_eq!(
784            out.nested_comments,
785            vec![NestedComment {
786                container_path: vec![CommentPathSegment::Key("arr".to_string())],
787                position: 0,
788                text: "tail".to_string(),
789                inline: true,
790            }]
791        );
792        assert!(out.cleaned_yaml.contains("- a\n"));
793        assert!(!out.cleaned_yaml.contains("tail"));
794    }
795
796    #[test]
797    fn nested_inline_on_mapping_field() {
798        // `inner: 1 # tail` inside `outer:` attaches inline at host index 0.
799        let input = "outer:\n  inner: 1 # tail\n";
800        let out = prescan_fence_content(input);
801        assert_eq!(
802            out.nested_comments,
803            vec![NestedComment {
804                container_path: vec![CommentPathSegment::Key("outer".to_string())],
805                position: 0,
806                text: "tail".to_string(),
807                inline: true,
808            }]
809        );
810    }
811
812    #[test]
813    fn fill_on_flow_sequence_allowed() {
814        let input = "x: !fill [1, 2]\n";
815        let out = prescan_fence_content(input);
816        assert!(
817            out.fill_target_errors.is_empty(),
818            "expected no error; !fill on sequences is supported"
819        );
820        assert_eq!(
821            out.items,
822            vec![PreItem::Field {
823                key: "x".to_string(),
824                fill: true,
825            }]
826        );
827    }
828
829    #[test]
830    fn fill_on_flow_mapping_errors() {
831        let input = "x: !fill {a: 1}\n";
832        let out = prescan_fence_content(input);
833        assert!(
834            !out.fill_target_errors.is_empty(),
835            "expected error; !fill on mappings is rejected"
836        );
837    }
838}