Skip to main content

quillmark_core/document/
prescan.rs

1//! Pre-scan of a metadata fence's YAML content to recover features that
2//! serde_saphyr discards.
3//!
4//! Three features are recovered here:
5//!
6//! 1. **Top-level comments.** YAML comments are dropped by the YAML parser.
7//!    To round-trip them as [`super::FrontmatterItem::Comment`], we extract them
8//!    before parsing.
9//!
10//! 2. **Nested comments.** Comments inside block mappings/sequences are
11//!    captured with their structural path (sequence of keys/indices) and an
12//!    ordinal indicating where in the container they sit. The emitter
13//!    re-injects them at the matching position. See [`NestedComment`].
14//!
15//! 3. **`!fill` tags.** Custom YAML tags are accepted and dropped by
16//!    serde_saphyr; the value survives but the tag annotation is lost. We
17//!    detect `!fill` on top-level scalar fields, strip the tag from the
18//!    cleaned YAML (so serde_saphyr sees a plain scalar), and record a
19//!    `fill: true` marker on the resulting `Field` item.
20//!
21//! Other custom tags (`!include`, `!env`, …) are stripped with a
22//! `parse::unsupported_yaml_tag` warning.
23
24use crate::Diagnostic;
25use crate::Severity;
26
27/// One ordered hint extracted from the fence body.
28///
29/// `Comment` stands alone; `Field` captures only the `fill` flag because the
30/// value is produced by serde_saphyr parsing the cleaned text. The matching
31/// YAML key is the lookup key into the parsed map.
32///
33/// `Comment.inline` distinguishes own-line comments (`# text` on a line by
34/// itself) from inline trailing comments (`field: value # text`). Inline
35/// top-level comments always immediately follow their host `Field` in the
36/// item stream; the emitter peeks ahead by one slot to attach them.
37#[derive(Debug, Clone, PartialEq)]
38pub enum PreItem {
39    Field { key: String, fill: bool },
40    Comment { text: String, inline: bool },
41}
42
43/// One segment of a path into the parsed YAML structure.
44#[derive(Debug, Clone, PartialEq, Eq)]
45pub enum CommentPathSegment {
46    Key(String),
47    Index(usize),
48}
49
50/// A comment that appears inside a nested mapping or sequence.
51///
52/// `container_path` locates the immediate parent container.
53///
54/// Position semantics depend on `inline`:
55/// - **Own-line (`inline = false`)**: `position` is the slot ordinal within
56///   the container's child list, ranging `0..=child_count`. The comment is
57///   rendered before the child at this position. `position == child_count`
58///   means "after all children".
59/// - **Inline (`inline = true`)**: `position` is the host child's index,
60///   ranging `0..child_count`. The comment is attached to that child's
61///   trailing line. An inline comment whose host is missing at emit time
62///   (orphan) degrades to an own-line comment at the same indent.
63#[derive(Debug, Clone, PartialEq, Eq)]
64pub struct NestedComment {
65    pub container_path: Vec<CommentPathSegment>,
66    pub position: usize,
67    pub text: String,
68    pub inline: bool,
69}
70
71/// Output of [`prescan_fence_content`].
72#[derive(Debug, Clone, Default)]
73pub struct PreScan {
74    /// YAML text with `!fill` tags stripped and all comment lines removed.
75    /// Suitable for feeding into serde_saphyr.
76    pub cleaned_yaml: String,
77    /// Ordered items discovered at the top level — fields (with fill flags)
78    /// and own-line top-level comments, in source order.
79    pub items: Vec<PreItem>,
80    /// Comments inside nested containers, with structural paths.
81    pub nested_comments: Vec<NestedComment>,
82    /// Warnings produced during the scan.
83    pub warnings: Vec<Diagnostic>,
84    /// Unsupported-fill-target errors. The parser turns these into
85    /// `ParseError::InvalidStructure` rejections (`!fill` on mappings).
86    pub fill_target_errors: Vec<String>,
87}
88
89/// Tracks one open YAML container while scanning lines.
90#[derive(Debug)]
91struct Frame {
92    /// Indent (in columns) of children of this container.
93    indent: usize,
94    /// Path to this container from the fence root.
95    path: Vec<CommentPathSegment>,
96    /// Container kind. `None` until the first child line determines it.
97    kind: Option<FrameKind>,
98    /// Number of children seen so far.
99    child_count: usize,
100}
101
102#[derive(Debug, Clone, Copy, PartialEq, Eq)]
103enum FrameKind {
104    Mapping,
105    Sequence,
106}
107
108/// Scan the body of a YAML metadata fence.
109///
110/// `content` is the text between the opening and closing `---` markers
111/// (exclusive), with leading/trailing whitespace preserved.
112pub fn prescan_fence_content(content: &str) -> PreScan {
113    let mut out = PreScan::default();
114
115    // We operate on the raw text to preserve positions. `lines()` strips
116    // line endings; we rebuild with `\n` which is what serde_saphyr expects.
117    let lines: Vec<&str> = content.split('\n').collect();
118    let mut cleaned_lines: Vec<String> = Vec::with_capacity(lines.len());
119
120    // Stack of open containers. The root frame is the frontmatter mapping
121    // itself; children appear at indent 0.
122    let mut stack: Vec<Frame> = vec![Frame {
123        indent: 0,
124        path: Vec::new(),
125        kind: Some(FrameKind::Mapping),
126        child_count: 0,
127    }];
128
129    for raw_line in &lines {
130        let line = *raw_line;
131        let indent = leading_space_count(line);
132        let trimmed = &line[indent..];
133
134        // Skip blank lines (no structural meaning, no comment).
135        if trimmed.is_empty() {
136            cleaned_lines.push(line.to_string());
137            continue;
138        }
139
140        // Pop frames that this line has dedented out of. A line at indent
141        // `indent` belongs to the deepest frame whose `indent <= indent`.
142        // (Equality means the line is a child at this frame's level.)
143        while let Some(frame) = stack.last() {
144            if frame.indent > indent {
145                stack.pop();
146            } else {
147                break;
148            }
149        }
150
151        // Case 1: own-line comment.
152        if trimmed.starts_with('#') {
153            let text = strip_comment_marker(trimmed);
154
155            // Determine the deepest frame that contains this line.
156            // For a comment at indent N, the containing frame is the one
157            // with the largest indent <= N. The stack is ordered shallow
158            // to deep; the last frame is the deepest. After the dedent
159            // pop above, the top frame's indent is <= indent, which is
160            // what we want.
161            let frame = stack.last().expect("root frame always present");
162
163            if frame.path.is_empty() {
164                // Top-level comment — preserve via PreItem::Comment.
165                out.items.push(PreItem::Comment {
166                    text: text.to_string(),
167                    inline: false,
168                });
169            } else {
170                out.nested_comments.push(NestedComment {
171                    container_path: frame.path.clone(),
172                    position: frame.child_count,
173                    text: text.to_string(),
174                    inline: false,
175                });
176            }
177            // Don't emit the line into the cleaned YAML — serde_saphyr
178            // ignores comments either way, but omitting the line avoids
179            // ambiguity with `!fill` rewriting.
180            continue;
181        }
182
183        // Case 2: sequence item line (`- ...`).
184        if trimmed == "-" || trimmed.starts_with("- ") {
185            // The frame at this indent must be a sequence. If the deepest
186            // frame's indent matches this line's indent, claim it; if it
187            // doesn't, push a fresh sequence frame at this indent under
188            // the deepest container.
189            let frame_idx = ensure_frame_at_indent(&mut stack, indent, FrameKind::Sequence);
190            let frame = &mut stack[frame_idx];
191            let item_index = frame.child_count;
192            frame.child_count += 1;
193            let parent_path: Vec<CommentPathSegment> = frame.path.clone();
194            // Snapshot the item path before borrowing mutably again below.
195            let item_path: Vec<CommentPathSegment> = {
196                let mut p = parent_path.clone();
197                p.push(CommentPathSegment::Index(item_index));
198                p
199            };
200            // Drop frames deeper than this sequence; the new item starts
201            // a fresh nested context.
202            while stack.len() > frame_idx + 1 {
203                stack.pop();
204            }
205
206            // Detach a possible trailing comment on the item line.
207            let after_dash_full = if trimmed == "-" { "" } else { &trimmed[2..] };
208            let (after_dash, trailing_comment) = split_trailing_comment(after_dash_full);
209            let after_dash_trimmed = after_dash.trim_start();
210            let inline_indent_offset = indent + 2 + (after_dash.len() - after_dash_trimmed.len());
211
212            if after_dash_trimmed.is_empty() {
213                // No inline value. Children, if any, will appear on the
214                // following lines with indent > this line's indent. Push a
215                // placeholder frame so when those children arrive, the
216                // sequence-item frame is already on the stack.
217                //
218                // We push a frame with indent = indent + 2; the actual
219                // child kind/indent gets resolved when the next non-empty
220                // line arrives.
221                stack.push(Frame {
222                    indent: indent + 2,
223                    path: item_path,
224                    kind: None,
225                    child_count: 0,
226                });
227            } else if split_key(after_dash_trimmed).is_some() {
228                // Inline mapping start (`- key: ...`). The key is the first
229                // child of an implicit mapping whose siblings sit at the
230                // same column as the key.
231                stack.push(Frame {
232                    indent: inline_indent_offset,
233                    path: item_path,
234                    kind: Some(FrameKind::Mapping),
235                    child_count: 1,
236                });
237            }
238            // Otherwise: inline scalar value, no further nesting.
239
240            // Rebuild the line with the trailing comment stripped, and
241            // capture it as an inline NestedComment attached to this item.
242            if let Some(c) = trailing_comment {
243                out.nested_comments.push(NestedComment {
244                    container_path: parent_path,
245                    position: item_index,
246                    text: strip_comment_marker(&c).to_string(),
247                    inline: true,
248                });
249                let head = format!("{:width$}", "", width = indent);
250                let body = if after_dash.trim_end().is_empty() {
251                    "-".to_string()
252                } else {
253                    format!("- {}", after_dash.trim_end())
254                };
255                cleaned_lines.push(format!("{}{}", head, body));
256            } else {
257                cleaned_lines.push(line.to_string());
258            }
259            continue;
260        }
261
262        // Case 3: top-level field line with possible `!fill` tag and/or
263        // trailing comment. Top-level only — `is_top_level` mirrors the
264        // pre-existing semantics.
265        let is_top_level = indent == 0;
266        if is_top_level {
267            if let Some((key, after_colon)) = split_key(line) {
268                let (value_part, trailing_comment) = split_trailing_comment(&after_colon);
269
270                let (fill, value_without_tag, had_non_fill_tag, fill_target_err) =
271                    inspect_fill_and_tags(&value_part, &key);
272
273                if had_non_fill_tag {
274                    out.warnings.push(
275                        Diagnostic::new(
276                            Severity::Warning,
277                            format!(
278                                "YAML tag on key `{}` is not supported; the tag has been dropped and the value kept",
279                                key
280                            ),
281                        )
282                        .with_code("parse::unsupported_yaml_tag".to_string()),
283                    );
284                }
285                if let Some(err) = fill_target_err {
286                    out.fill_target_errors.push(err);
287                }
288                if fill && (key == "QUILL" || key == "KIND") {
289                    out.fill_target_errors.push(format!(
290                        "`!fill` cannot be applied to the sentinel key `{}` — sentinels are routing keys, not data, and must resolve at parse time",
291                        key
292                    ));
293                }
294
295                out.items.push(PreItem::Field {
296                    key: key.clone(),
297                    fill,
298                });
299
300                // Update the structural stack for this top-level key.
301                // The root frame is at index 0; children appear at indent 0.
302                let root = &mut stack[0];
303                root.child_count += 1;
304                let key_path = vec![CommentPathSegment::Key(key.clone())];
305
306                // Pop everything but the root.
307                while stack.len() > 1 {
308                    stack.pop();
309                }
310
311                // If the value is empty (block style: `key:` followed by
312                // indented children), push a frame so nested comments can
313                // be attached. Otherwise (inline scalar/flow), no nested
314                // children come from this key.
315                if has_empty_inline_value(&value_without_tag) {
316                    stack.push(Frame {
317                        indent: 2,
318                        path: key_path,
319                        kind: None,
320                        child_count: 0,
321                    });
322                }
323
324                // Rebuild the line without the `!fill` tag (and without
325                // the trailing comment, since that goes on its own
326                // line now).
327                let cleaned = format!("{}:{}", key, value_without_tag);
328                cleaned_lines.push(cleaned);
329
330                if let Some(c) = trailing_comment {
331                    out.items.push(PreItem::Comment {
332                        text: strip_comment_marker(&c).to_string(),
333                        inline: true,
334                    });
335                }
336
337                continue;
338            }
339        }
340
341        // Case 4: nested key line (`key:` or `key: value`) inside a block
342        // mapping. We recognise simple `key:` patterns; unusual forms fall
343        // through to verbatim pass-through.
344        if let Some((key, after_colon)) = split_key(trimmed) {
345            // The frame at this indent must be a mapping.
346            let frame_idx = ensure_frame_at_indent(&mut stack, indent, FrameKind::Mapping);
347            let frame = &mut stack[frame_idx];
348            let key_index = frame.child_count;
349            frame.child_count += 1;
350            let parent_path: Vec<CommentPathSegment> = frame.path.clone();
351            let key_path: Vec<CommentPathSegment> = {
352                let mut p = parent_path.clone();
353                p.push(CommentPathSegment::Key(key.clone()));
354                p
355            };
356            // Drop frames deeper than this mapping; siblings reset nesting.
357            while stack.len() > frame_idx + 1 {
358                stack.pop();
359            }
360
361            // Detach a possible trailing comment on the line. We keep the
362            // value (sans comment) in the cleaned YAML and capture the
363            // comment as an inline NestedComment attached to this key.
364            let (value_part, trailing_comment) = split_trailing_comment(&after_colon);
365            if let Some(c) = trailing_comment {
366                out.nested_comments.push(NestedComment {
367                    container_path: parent_path,
368                    position: key_index,
369                    text: strip_comment_marker(&c).to_string(),
370                    inline: true,
371                });
372                let head = format!("{:width$}", "", width = indent);
373                cleaned_lines.push(format!("{}{}:{}", head, key, value_part));
374            } else {
375                cleaned_lines.push(line.to_string());
376            }
377
378            // If the value is empty (block style) push a frame for nested
379            // children at indent + 2.
380            if has_empty_inline_value(&after_colon) {
381                stack.push(Frame {
382                    indent: indent + 2,
383                    path: key_path,
384                    kind: None,
385                    child_count: 0,
386                });
387            }
388            continue;
389        }
390
391        // Everything else: pass through verbatim.
392        cleaned_lines.push(line.to_string());
393    }
394
395    out.cleaned_yaml = cleaned_lines.join("\n");
396    out
397}
398
399/// Ensure the deepest frame on the stack matches the given `indent` and
400/// kind, pushing a new frame if necessary. Returns the index of the matched
401/// or freshly-pushed frame.
402fn ensure_frame_at_indent(stack: &mut Vec<Frame>, indent: usize, kind: FrameKind) -> usize {
403    // After dedent popping, the top frame has `indent <= indent`. If it
404    // matches exactly, claim it. Otherwise, push a new child frame under
405    // it that has the requested indent.
406    let top_idx = stack.len() - 1;
407    let top = &mut stack[top_idx];
408
409    if top.indent == indent {
410        if top.kind.is_none() {
411            top.kind = Some(kind);
412        }
413        return top_idx;
414    }
415
416    // The top frame is shallower (its indent < indent). Push a new frame
417    // at this indent, parented under the top frame. The new frame's path
418    // is a continuation: for a sequence at deeper indent under a mapping,
419    // the path is the same as the parent's `path` (because the sequence
420    // is the value of the parent's most recent key).
421    //
422    // Concretely, when we encounter `- foo` at indent 2 and the stack top
423    // is the root mapping with indent 0, the parent frame's most-recent
424    // child path was already pushed when we saw `key:` in case 3 (we
425    // pushed a placeholder frame at indent 2 with `path = [Key(key)]` and
426    // unknown kind). So usually we won't reach this branch — the
427    // placeholder is already there. This branch is a safety net for
428    // unusual layouts.
429    let parent_path = top.path.clone();
430    stack.push(Frame {
431        indent,
432        path: parent_path,
433        kind: Some(kind),
434        child_count: 0,
435    });
436    stack.len() - 1
437}
438
439/// Strip a YAML comment marker (`# `) from the start of a string.
440///
441/// Strips all leading `#` characters, then one optional space.
442fn strip_comment_marker(raw: &str) -> &str {
443    let after = raw.trim_start_matches('#');
444    after.strip_prefix(' ').unwrap_or(after)
445}
446
447/// Number of leading ASCII spaces. Tabs are not expanded; they don't appear
448/// in canonical Quillmark YAML and would be a separate problem.
449fn leading_space_count(line: &str) -> usize {
450    line.bytes().take_while(|b| *b == b' ').count()
451}
452
453/// `true` when the value portion of a `key:` line is empty (after trimming
454/// whitespace). Trailing comments are ignored. An empty value means the
455/// real value is on subsequent indented lines (block mapping or sequence).
456fn has_empty_inline_value(after_colon: &str) -> bool {
457    let (v, _) = split_trailing_comment(after_colon);
458    v.trim().is_empty()
459}
460
461/// Split a line into `(key, rest_after_colon)`. Returns `None` if the line
462/// does not start with a bare YAML key.
463fn split_key(line: &str) -> Option<(String, String)> {
464    // Identifier-like keys only. YAML allows more, but Quillmark's schema
465    // restricts field names to `[a-zA-Z_][a-zA-Z0-9_]*` (and reserved
466    // uppercase sentinels). Anything more exotic falls through to the
467    // unmodified path and will be parsed (or rejected) by serde_saphyr.
468    let bytes = line.as_bytes();
469    if bytes.is_empty() {
470        return None;
471    }
472    if !(bytes[0].is_ascii_alphabetic() || bytes[0] == b'_') {
473        return None;
474    }
475    let mut i = 1;
476    while i < bytes.len() && (bytes[i].is_ascii_alphanumeric() || bytes[i] == b'_') {
477        i += 1;
478    }
479    if i >= bytes.len() || bytes[i] != b':' {
480        return None;
481    }
482    let key = line[..i].to_string();
483    let rest = line[i + 1..].to_string();
484    Some((key, rest))
485}
486
487/// Split a value string into `(value, trailing_comment)`.
488///
489/// Trailing comments begin with ` #` or `\t#` outside of any quoted string.
490/// This is a simple scanner: it respects `"..."` and `'...'` quoting.
491fn split_trailing_comment(value: &str) -> (String, Option<String>) {
492    let bytes = value.as_bytes();
493    let mut i = 0;
494    let mut prev_was_ws = true; // allow `key:#` edge case to NOT be a comment
495    let mut in_dq = false;
496    let mut in_sq = false;
497    while i < bytes.len() {
498        let b = bytes[i];
499        if in_dq {
500            if b == b'\\' && i + 1 < bytes.len() {
501                i += 2;
502                continue;
503            }
504            if b == b'"' {
505                in_dq = false;
506            }
507        } else if in_sq {
508            if b == b'\'' {
509                in_sq = false;
510            }
511        } else {
512            if b == b'"' {
513                in_dq = true;
514            } else if b == b'\'' {
515                in_sq = true;
516            } else if b == b'#' && prev_was_ws {
517                let v = value[..i].trim_end().to_string();
518                let c = value[i..].to_string();
519                return (v, Some(c));
520            }
521        }
522        prev_was_ws = matches!(b, b' ' | b'\t');
523        i += 1;
524    }
525    (value.to_string(), None)
526}
527
528/// Inspect the value portion of a field line for `!fill` and other tags.
529///
530/// Returns `(fill, value_without_tag, had_other_tag, fill_target_err)`.
531///
532/// - `fill`: `true` when the value starts with `!fill`.
533/// - `value_without_tag`: the same text with the `!fill` tag stripped;
534///   leading whitespace is preserved so YAML parsing still sees a clean
535///   scalar.
536/// - `had_other_tag`: `true` when a non-`!fill` `!tag` was found at the
537///   start of the value. The tag is *not* stripped (serde_saphyr tolerates
538///   and drops unknown tags), so callers get a warning only.
539/// - `fill_target_err`: populated when `!fill` is applied to a mapping
540///   (flow `{...}` or block form). `!fill` on mappings is rejected because
541///   top-level `type: object` is not a supported schema type in Quillmark;
542///   `!fill` on scalars and sequences is allowed.
543fn inspect_fill_and_tags(value: &str, key: &str) -> (bool, String, bool, Option<String>) {
544    let trimmed = value.trim_start();
545    let leading_ws_len = value.len() - trimmed.len();
546
547    // Exactly empty / null (e.g. `key:` with nothing) — not a fill target.
548    if trimmed.is_empty() {
549        return (false, value.to_string(), false, None);
550    }
551
552    // `!fill` alone on the line (bare tag, no value) → placeholder. The
553    // value may be null (no continuation) or a block sequence on the
554    // following indented lines. serde_saphyr produces the actual value.
555    if trimmed == "!fill" {
556        // Replace the tag with nothing; leave the leading whitespace so the
557        // line shape is preserved (serde_saphyr treats `key: ` as null,
558        // and if a block sequence follows on indented lines, it parses as
559        // a sequence).
560        let reconstructed = value[..leading_ws_len].to_string();
561        return (true, reconstructed, false, None);
562    }
563
564    // `!fill <value>` → strip tag, record fill=true.
565    if let Some(rest) = trimmed.strip_prefix("!fill") {
566        // Must be followed by whitespace or end-of-value to count; otherwise
567        // it's `!fillwhatever` which is a non-`!fill` tag.
568        if rest.starts_with(' ') || rest.starts_with('\t') || rest.is_empty() {
569            let rest_trim = rest.trim_start();
570            // Reject flow-mappings (`!fill {...}`); top-level `type: object`
571            // isn't supported by the schema. Flow sequences (`!fill [...]`)
572            // and scalars are allowed.
573            let err = if rest_trim.starts_with('{') {
574                Some(format!(
575                    "`!fill` on key `{}` targets a mapping; `!fill` is supported on scalars and sequences only",
576                    key
577                ))
578            } else {
579                None
580            };
581            // Reconstruct: one space + the rest (trimmed) so the cleaned
582            // text reads `key: rest`.
583            let reconstructed = if rest_trim.is_empty() {
584                value[..leading_ws_len].to_string()
585            } else {
586                format!(" {}", rest_trim)
587            };
588            return (true, reconstructed, false, err);
589        }
590    }
591
592    // Any other `!tag` prefix is a non-fill custom tag. Leave the value
593    // alone; serde_saphyr will strip the tag.
594    if trimmed.starts_with('!') {
595        return (false, value.to_string(), true, None);
596    }
597
598    (false, value.to_string(), false, None)
599}
600
601#[cfg(test)]
602mod tests {
603    use super::*;
604
605    #[test]
606    fn extracts_own_line_comments() {
607        let input = "# top\ntitle: foo\n# mid\nauthor: bar\n";
608        let out = prescan_fence_content(input);
609        assert_eq!(
610            out.items,
611            vec![
612                PreItem::Comment {
613                    text: "top".to_string(),
614                    inline: false,
615                },
616                PreItem::Field {
617                    key: "title".to_string(),
618                    fill: false,
619                },
620                PreItem::Comment {
621                    text: "mid".to_string(),
622                    inline: false,
623                },
624                PreItem::Field {
625                    key: "author".to_string(),
626                    fill: false,
627                },
628            ]
629        );
630        assert!(out.nested_comments.is_empty());
631    }
632
633    #[test]
634    fn splits_trailing_comments() {
635        let input = "title: foo # inline\n";
636        let out = prescan_fence_content(input);
637        assert_eq!(
638            out.items,
639            vec![
640                PreItem::Field {
641                    key: "title".to_string(),
642                    fill: false,
643                },
644                PreItem::Comment {
645                    text: "inline".to_string(),
646                    inline: true,
647                },
648            ]
649        );
650        assert!(out.cleaned_yaml.contains("title: foo"));
651        assert!(!out.cleaned_yaml.contains("inline"));
652    }
653
654    #[test]
655    fn detects_fill_on_scalar() {
656        let input = "dept: !fill Department\n";
657        let out = prescan_fence_content(input);
658        assert_eq!(
659            out.items,
660            vec![PreItem::Field {
661                key: "dept".to_string(),
662                fill: true,
663            }]
664        );
665        assert!(out.cleaned_yaml.contains("dept: Department"));
666        assert!(!out.cleaned_yaml.contains("!fill"));
667    }
668
669    #[test]
670    fn detects_bare_fill() {
671        let input = "dept: !fill\n";
672        let out = prescan_fence_content(input);
673        assert_eq!(
674            out.items,
675            vec![PreItem::Field {
676                key: "dept".to_string(),
677                fill: true,
678            }]
679        );
680        assert!(!out.cleaned_yaml.contains("!fill"));
681    }
682
683    #[test]
684    fn unknown_tag_warns() {
685        let input = "x: !custom value\n";
686        let out = prescan_fence_content(input);
687        assert!(
688            out.warnings
689                .iter()
690                .any(|w| w.code.as_deref() == Some("parse::unsupported_yaml_tag")),
691            "expected unsupported_yaml_tag warning"
692        );
693    }
694
695    #[test]
696    fn nested_comment_in_sequence_captured() {
697        let input = "arr:\n  # before-first\n  - a\n  # between\n  - b\n  # after-last\n";
698        let out = prescan_fence_content(input);
699        assert_eq!(
700            out.nested_comments,
701            vec![
702                NestedComment {
703                    container_path: vec![CommentPathSegment::Key("arr".to_string())],
704                    position: 0,
705                    text: "before-first".to_string(),
706                    inline: false,
707                },
708                NestedComment {
709                    container_path: vec![CommentPathSegment::Key("arr".to_string())],
710                    position: 1,
711                    text: "between".to_string(),
712                    inline: false,
713                },
714                NestedComment {
715                    container_path: vec![CommentPathSegment::Key("arr".to_string())],
716                    position: 2,
717                    text: "after-last".to_string(),
718                    inline: false,
719                },
720            ]
721        );
722        assert!(
723            !out.warnings
724                .iter()
725                .any(|w| w.code.as_deref() == Some("parse::comments_in_nested_yaml_dropped")),
726            "no dropped-comment warning expected; nested comments are now preserved"
727        );
728    }
729
730    #[test]
731    fn nested_comment_in_mapping_captured() {
732        let input = "outer:\n  # comment\n  inner: 1\n";
733        let out = prescan_fence_content(input);
734        assert_eq!(
735            out.nested_comments,
736            vec![NestedComment {
737                container_path: vec![CommentPathSegment::Key("outer".to_string())],
738                position: 0,
739                text: "comment".to_string(),
740                inline: false,
741            }]
742        );
743    }
744
745    #[test]
746    fn deep_nested_comment_path() {
747        let input = "outer:\n  inner:\n    # deep\n    leaf: 1\n";
748        let out = prescan_fence_content(input);
749        assert_eq!(
750            out.nested_comments,
751            vec![NestedComment {
752                container_path: vec![
753                    CommentPathSegment::Key("outer".to_string()),
754                    CommentPathSegment::Key("inner".to_string()),
755                ],
756                position: 0,
757                text: "deep".to_string(),
758                inline: false,
759            }]
760        );
761    }
762
763    #[test]
764    fn comment_inside_seq_of_maps() {
765        // Each sequence item is a mapping. A comment between keys of the
766        // first item belongs to that item's mapping.
767        let input = "items:\n  - name: a\n    # inside-first\n    val: 1\n  - name: b\n";
768        let out = prescan_fence_content(input);
769        assert_eq!(
770            out.nested_comments,
771            vec![NestedComment {
772                container_path: vec![
773                    CommentPathSegment::Key("items".to_string()),
774                    CommentPathSegment::Index(0),
775                ],
776                position: 1,
777                text: "inside-first".to_string(),
778                inline: false,
779            }]
780        );
781    }
782
783    #[test]
784    fn nested_inline_on_sequence_item() {
785        // `- a # tail` attaches an inline comment to item 0 (host index, not
786        // the slot after).
787        let input = "arr:\n  - a # tail\n  - b\n";
788        let out = prescan_fence_content(input);
789        assert_eq!(
790            out.nested_comments,
791            vec![NestedComment {
792                container_path: vec![CommentPathSegment::Key("arr".to_string())],
793                position: 0,
794                text: "tail".to_string(),
795                inline: true,
796            }]
797        );
798        assert!(out.cleaned_yaml.contains("- a\n"));
799        assert!(!out.cleaned_yaml.contains("tail"));
800    }
801
802    #[test]
803    fn nested_inline_on_mapping_field() {
804        // `inner: 1 # tail` inside `outer:` attaches inline at host index 0.
805        let input = "outer:\n  inner: 1 # tail\n";
806        let out = prescan_fence_content(input);
807        assert_eq!(
808            out.nested_comments,
809            vec![NestedComment {
810                container_path: vec![CommentPathSegment::Key("outer".to_string())],
811                position: 0,
812                text: "tail".to_string(),
813                inline: true,
814            }]
815        );
816    }
817
818    #[test]
819    fn fill_on_flow_sequence_allowed() {
820        let input = "x: !fill [1, 2]\n";
821        let out = prescan_fence_content(input);
822        assert!(
823            out.fill_target_errors.is_empty(),
824            "expected no error; !fill on sequences is supported"
825        );
826        assert_eq!(
827            out.items,
828            vec![PreItem::Field {
829                key: "x".to_string(),
830                fill: true,
831            }]
832        );
833    }
834
835    #[test]
836    fn fill_on_flow_mapping_errors() {
837        let input = "x: !fill {a: 1}\n";
838        let out = prescan_fence_content(input);
839        assert!(
840            !out.fill_target_errors.is_empty(),
841            "expected error; !fill on mappings is rejected"
842        );
843    }
844}