panache-parser 0.19.1

Lossless CST parser and syntax wrappers for Pandoc markdown, Quarto, and RMarkdown
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
//! Typed AST wrappers over the in-tree YAML CST.
//!
//! These wrappers give value-extraction consumers (metadata, bibliography,
//! includes, hashpipe) a typed traversal API over the rowan CST produced by
//! [`crate::parser::yaml::parse_yaml_tree`], replacing the external
//! `yaml_parser` crate's `ast` module. They follow the house pattern
//! ([`super::headings`], [`super::references`]): newtype wrappers with
//! hand-written [`AstNode`] impls and `rowan::ast::support`-based accessors.
//!
//! Two CST facts shape the API:
//!
//! - YAML parses produce one or more `YAML_DOCUMENT` children under a
//!   *stream-equivalent* container. Standalone parses
//!   ([`crate::parser::yaml::parse_yaml_tree`]) root the tree at
//!   `YAML_STREAM`; embedded parses nest the documents directly under
//!   the host wrapper (`YAML_METADATA_CONTENT` for frontmatter,
//!   `HASHPIPE_YAML_CONTENT` for hashpipe), which plays the stream
//!   role for its singleton-stream embedding. [`parse_yaml_document`]
//!   centralizes the descent so no consumer re-implements it.
//! - `YAML_BLOCK_MAP_KEY` includes the trailing `:` (`YAML_COLON`) token, so
//!   [`YamlBlockMapKey::scalar`] reads the `YAML_SCALAR` node child rather
//!   than the whole-key text.
//!
//! Scalar style is not a CST kind — every style is a `YAML_SCALAR` node
//! (wrapping `YAML_SCALAR_TEXT` content) — so [`YamlScalar`] detects the
//! style from the leading byte and cooks via the shared
//! [`crate::parser::yaml::cook`] primitives.

use rowan::{TextRange, TextSize};

use super::ast::{AstChildren, AstNode, support};
use super::{PanacheLanguage, SyntaxKind, SyntaxNode, SyntaxToken};
use crate::parser::yaml::{ScalarStyle, cook, parse_yaml_tree};

/// Parse `input` and return the first YAML document. Descends through any
/// stream-equivalent container ([`is_stream_equivalent`]) so it works against
/// standalone parses (rooted at `YAML_STREAM`) and host embeddings alike.
/// Returns `None` when the input fails the structural validator (no tree)
/// or has no document.
pub fn parse_yaml_document(input: &str) -> Option<YamlDocument> {
    first_document(&parse_yaml_tree(input)?)
}

/// Parse `input` and return every YAML document in the stream. Most consumers
/// only need the first ([`parse_yaml_document`]); this exists for multi-document
/// completeness (`a: 1\n---\nb: 2`).
pub fn parse_yaml_documents(input: &str) -> Vec<YamlDocument> {
    let Some(tree) = parse_yaml_tree(input) else {
        return Vec::new();
    };
    stream_container(&tree)
        .map(|stream| stream.children().filter_map(YamlDocument::cast).collect())
        .unwrap_or_default()
}

/// True for any node that plays the YAML stream container role: the spec
/// `YAML_STREAM` for standalone parses, and the host embedding wrappers
/// (`YAML_METADATA_CONTENT`, `HASHPIPE_YAML_CONTENT`) for the singleton-stream
/// embeddings the host parser produces. Each wraps zero or more
/// `YAML_DOCUMENT` children.
pub(crate) fn is_stream_equivalent(kind: SyntaxKind) -> bool {
    matches!(
        kind,
        SyntaxKind::YAML_STREAM
            | SyntaxKind::YAML_METADATA_CONTENT
            | SyntaxKind::HASHPIPE_YAML_CONTENT
    )
}

fn stream_container(tree: &SyntaxNode) -> Option<SyntaxNode> {
    tree.descendants().find(|n| is_stream_equivalent(n.kind()))
}

fn first_document(tree: &SyntaxNode) -> Option<YamlDocument> {
    stream_container(tree)?
        .children()
        .find_map(YamlDocument::cast)
}

/// The five concrete node shapes a value, sequence item, or document body can
/// take. `None` (i.e. an absent `YamlNode`) models an empty YAML value.
#[derive(Debug, Clone)]
pub enum YamlNode {
    BlockMap(YamlBlockMap),
    BlockSequence(YamlBlockSequence),
    FlowMap(YamlFlowMap),
    FlowSequence(YamlFlowSequence),
    Scalar(YamlScalar),
}

/// Resolve the single content node held by a value / item / document wrapper.
/// Container children take precedence; a bare scalar value resolves to the
/// first `YAML_SCALAR` node (anchors/tags/aliases are skipped). Returns `None`
/// for an empty value.
fn node_child(parent: &SyntaxNode) -> Option<YamlNode> {
    for child in parent.children() {
        match child.kind() {
            SyntaxKind::YAML_BLOCK_MAP => return YamlBlockMap::cast(child).map(YamlNode::BlockMap),
            SyntaxKind::YAML_BLOCK_SEQUENCE => {
                return YamlBlockSequence::cast(child).map(YamlNode::BlockSequence);
            }
            SyntaxKind::YAML_FLOW_MAP => return YamlFlowMap::cast(child).map(YamlNode::FlowMap),
            SyntaxKind::YAML_FLOW_SEQUENCE => {
                return YamlFlowSequence::cast(child).map(YamlNode::FlowSequence);
            }
            _ => {}
        }
    }
    scalar_token(parent).map(YamlNode::Scalar)
}

/// First direct `YAML_SCALAR` node child of `parent`, wrapped. A scalar is a
/// node whose leaves are the per-line `YAML_SCALAR_TEXT` fragments (and any
/// `NEWLINE` between them); flow punctuation is `YAML_FLOW_INDICATOR`, so it
/// never matches here.
fn scalar_token(parent: &SyntaxNode) -> Option<YamlScalar> {
    parent
        .children()
        .find(|n| n.kind() == SyntaxKind::YAML_SCALAR)
        .and_then(YamlScalar::cast)
}

fn token_of(parent: &SyntaxNode, kind: SyntaxKind) -> Option<SyntaxToken> {
    parent
        .children_with_tokens()
        .filter_map(|el| el.into_token())
        .find(|t| t.kind() == kind)
}

/// A composite node's `text_range()` includes the trailing newline rowan
/// attaches after its last child, so using it as a diagnostic span ends at
/// column 1 of the following sibling line. This returns the range trimmed of
/// trailing whitespace — ending at the last content byte. `text_range()` itself
/// stays lossless; this is the semantic projection, mirroring the inline
/// `content_range()` accessors.
fn content_text_range(node: &SyntaxNode) -> TextRange {
    let range = node.text_range();
    let content_len = node.text().to_string().trim_end().len();
    let end = range.start() + TextSize::from(content_len as u32);
    TextRange::new(range.start(), end)
}

/// Projections shared by value / item / document body wrappers. Implemented as
/// a macro so each wrapper gets the same `as_*` surface without repetition.
macro_rules! node_projections {
    () => {
        /// The single content node, or `None` for an empty value.
        pub fn as_node(&self) -> Option<YamlNode> {
            node_child(&self.0)
        }

        /// The value as a scalar, or `None` if it is a container or empty.
        pub fn as_scalar(&self) -> Option<YamlScalar> {
            match self.as_node()? {
                YamlNode::Scalar(s) => Some(s),
                _ => None,
            }
        }

        pub fn as_block_map(&self) -> Option<YamlBlockMap> {
            match self.as_node()? {
                YamlNode::BlockMap(m) => Some(m),
                _ => None,
            }
        }

        pub fn as_block_sequence(&self) -> Option<YamlBlockSequence> {
            match self.as_node()? {
                YamlNode::BlockSequence(s) => Some(s),
                _ => None,
            }
        }

        pub fn as_flow_map(&self) -> Option<YamlFlowMap> {
            match self.as_node()? {
                YamlNode::FlowMap(m) => Some(m),
                _ => None,
            }
        }

        pub fn as_flow_sequence(&self) -> Option<YamlFlowSequence> {
            match self.as_node()? {
                YamlNode::FlowSequence(s) => Some(s),
                _ => None,
            }
        }

        /// Whether this value is empty (no scalar and no container child).
        pub fn is_empty(&self) -> bool {
            self.as_node().is_none()
        }

        /// The explicit `YAML_TAG` token decorating this value (e.g. `!expr`),
        /// if any. Used by the hashpipe formatter to preserve chunk-option tags.
        pub fn tag(&self) -> Option<SyntaxToken> {
            token_of(&self.0, SyntaxKind::YAML_TAG)
        }
    };
}

/// Declare a newtype CST-node wrapper with the standard hand-written
/// [`AstNode`] impl for a single `SyntaxKind`.
macro_rules! ast_node {
    ($(#[$meta:meta])* $name:ident, $kind:ident) => {
        $(#[$meta])*
        #[derive(Debug, Clone, PartialEq, Eq, Hash)]
        pub struct $name(SyntaxNode);

        impl AstNode for $name {
            type Language = PanacheLanguage;

            fn can_cast(kind: SyntaxKind) -> bool {
                kind == SyntaxKind::$kind
            }

            fn cast(syntax: SyntaxNode) -> Option<Self> {
                Self::can_cast(syntax.kind()).then_some(Self(syntax))
            }

            fn syntax(&self) -> &SyntaxNode {
                &self.0
            }
        }
    };
}

ast_node!(
    /// A single YAML document inside the stream.
    YamlDocument, YAML_DOCUMENT
);

impl YamlDocument {
    pub fn block_map(&self) -> Option<YamlBlockMap> {
        support::child(&self.0)
    }

    pub fn block_sequence(&self) -> Option<YamlBlockSequence> {
        support::child(&self.0)
    }

    pub fn flow_map(&self) -> Option<YamlFlowMap> {
        support::child(&self.0)
    }

    pub fn flow_sequence(&self) -> Option<YamlFlowSequence> {
        support::child(&self.0)
    }

    /// A top-level bare scalar document (`"just a string"`).
    pub fn scalar(&self) -> Option<YamlScalar> {
        scalar_token(&self.0)
    }

    pub fn as_node(&self) -> Option<YamlNode> {
        node_child(&self.0)
    }
}

ast_node!(
    /// A block mapping (`key: value` entries).
    YamlBlockMap, YAML_BLOCK_MAP
);

impl YamlBlockMap {
    pub fn entries(&self) -> AstChildren<YamlBlockMapEntry> {
        support::children(&self.0)
    }

    /// The node's range trimmed of trailing trivia (see [`content_text_range`]).
    pub fn content_range(&self) -> TextRange {
        content_text_range(&self.0)
    }

    /// The first entry whose (cooked) key text equals `key`.
    pub fn entry(&self, key: &str) -> Option<YamlBlockMapEntry> {
        self.entries()
            .find(|entry| entry.key_text().as_deref() == Some(key))
    }

    pub fn value_of(&self, key: &str) -> Option<YamlBlockMapValue> {
        self.entry(key)?.value()
    }
}

ast_node!(
    /// One `key: value` pair in a block mapping.
    YamlBlockMapEntry, YAML_BLOCK_MAP_ENTRY
);

impl YamlBlockMapEntry {
    pub fn key(&self) -> Option<YamlBlockMapKey> {
        support::child(&self.0)
    }

    /// The cooked key text. Reads the scalar child of `YAML_BLOCK_MAP_KEY`, so
    /// the trailing `:` token is excluded.
    pub fn key_text(&self) -> Option<String> {
        self.key()?.scalar().map(|s| s.value())
    }

    pub fn value(&self) -> Option<YamlBlockMapValue> {
        support::child(&self.0)
    }
}

ast_node!(
    /// The key side of a block-map entry. Holds the `YAML_SCALAR` node AND the
    /// trailing `YAML_COLON` token.
    YamlBlockMapKey, YAML_BLOCK_MAP_KEY
);

impl YamlBlockMapKey {
    /// The key's scalar node (excluding the `:` colon).
    pub fn scalar(&self) -> Option<YamlScalar> {
        scalar_token(&self.0)
    }
}

ast_node!(
    /// The value side of a block-map entry: a scalar, a nested container, or
    /// empty.
    YamlBlockMapValue, YAML_BLOCK_MAP_VALUE
);

impl YamlBlockMapValue {
    node_projections!();
}

ast_node!(
    /// A block sequence (`- item` entries).
    YamlBlockSequence, YAML_BLOCK_SEQUENCE
);

impl YamlBlockSequence {
    pub fn items(&self) -> AstChildren<YamlBlockSequenceItem> {
        support::children(&self.0)
    }

    /// The node's range trimmed of trailing trivia (see [`content_text_range`]).
    pub fn content_range(&self) -> TextRange {
        content_text_range(&self.0)
    }
}

ast_node!(
    /// One `- item` in a block sequence. The leading `-` is a
    /// `YAML_BLOCK_SEQ_ENTRY` token, skipped by the content projections.
    YamlBlockSequenceItem, YAML_BLOCK_SEQUENCE_ITEM
);

impl YamlBlockSequenceItem {
    node_projections!();
}

ast_node!(
    /// A flow sequence (`[a, b, c]`).
    YamlFlowSequence, YAML_FLOW_SEQUENCE
);

impl YamlFlowSequence {
    pub fn items(&self) -> AstChildren<YamlFlowSequenceItem> {
        support::children(&self.0)
    }

    /// The node's range trimmed of trailing trivia (see [`content_text_range`]).
    pub fn content_range(&self) -> TextRange {
        content_text_range(&self.0)
    }
}

ast_node!(
    /// One item in a flow sequence.
    YamlFlowSequenceItem, YAML_FLOW_SEQUENCE_ITEM
);

impl YamlFlowSequenceItem {
    node_projections!();
}

ast_node!(
    /// A flow mapping (`{k: v, ...}`).
    YamlFlowMap, YAML_FLOW_MAP
);

impl YamlFlowMap {
    pub fn entries(&self) -> AstChildren<YamlFlowMapEntry> {
        support::children(&self.0)
    }

    /// The node's range trimmed of trailing trivia (see [`content_text_range`]).
    pub fn content_range(&self) -> TextRange {
        content_text_range(&self.0)
    }

    pub fn entry(&self, key: &str) -> Option<YamlFlowMapEntry> {
        self.entries()
            .find(|entry| entry.key_text().as_deref() == Some(key))
    }

    pub fn value_of(&self, key: &str) -> Option<YamlFlowMapValue> {
        self.entry(key)?.value()
    }
}

ast_node!(
    /// One `k: v` pair in a flow mapping.
    YamlFlowMapEntry, YAML_FLOW_MAP_ENTRY
);

impl YamlFlowMapEntry {
    pub fn key(&self) -> Option<YamlFlowMapKey> {
        support::child(&self.0)
    }

    pub fn key_text(&self) -> Option<String> {
        self.key()?.scalar().map(|s| s.value())
    }

    pub fn value(&self) -> Option<YamlFlowMapValue> {
        support::child(&self.0)
    }
}

ast_node!(
    /// The key side of a flow-map entry.
    YamlFlowMapKey, YAML_FLOW_MAP_KEY
);

impl YamlFlowMapKey {
    pub fn scalar(&self) -> Option<YamlScalar> {
        scalar_token(&self.0)
    }
}

ast_node!(
    /// The value side of a flow-map entry.
    YamlFlowMapValue, YAML_FLOW_MAP_VALUE
);

impl YamlFlowMapValue {
    node_projections!();
}

/// The lexical style of a scalar, detected from its raw source. (The CST does
/// not record style as a distinct kind — every style is a `YAML_SCALAR` node.)
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub enum YamlScalarStyle {
    Plain,
    SingleQuoted,
    DoubleQuoted,
    Literal,
    Folded,
}

impl YamlScalarStyle {
    fn to_cook_style(self) -> ScalarStyle {
        match self {
            YamlScalarStyle::Plain => ScalarStyle::Plain,
            YamlScalarStyle::SingleQuoted => ScalarStyle::SingleQuoted,
            YamlScalarStyle::DoubleQuoted => ScalarStyle::DoubleQuoted,
            YamlScalarStyle::Literal => ScalarStyle::Literal,
            YamlScalarStyle::Folded => ScalarStyle::Folded,
        }
    }
}

fn detect_style(raw: &str) -> YamlScalarStyle {
    match raw.trim_start().as_bytes().first() {
        Some(b'\'') => YamlScalarStyle::SingleQuoted,
        Some(b'"') => YamlScalarStyle::DoubleQuoted,
        Some(b'|') => YamlScalarStyle::Literal,
        Some(b'>') => YamlScalarStyle::Folded,
        _ => YamlScalarStyle::Plain,
    }
}

/// A scalar value node. Its leaves are the per-physical-line content fragments
/// (`YAML_SCALAR_TEXT`) interleaved with `NEWLINE` (and, for embedded hashpipe,
/// line-prefix) tokens; [`raw`](Self::raw) reassembles them.
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct YamlScalar(SyntaxNode);

impl YamlScalar {
    pub fn cast(node: SyntaxNode) -> Option<Self> {
        (node.kind() == SyntaxKind::YAML_SCALAR).then_some(Self(node))
    }

    /// The raw source bytes of the scalar (all leaves concatenated), including
    /// any quotes / block header and embedded line breaks.
    pub fn raw(&self) -> String {
        self.0.text().to_string()
    }

    pub fn style(&self) -> YamlScalarStyle {
        detect_style(&self.raw())
    }

    /// The cooked logical string: quotes stripped, escapes decoded, multi-line
    /// scalars folded per YAML 1.2. Block scalars (`|`/`>`) are returned raw
    /// (their cooking needs parent indent context).
    pub fn value(&self) -> String {
        let source = self.prefix_stripped_source();
        cook(detect_style(&source).to_cook_style(), &source)
    }

    /// The scalar's content leaves concatenated, dropping any embedded
    /// per-line prefix trivia (`YAML_LINE_PREFIX`, e.g. hashpipe `#|`). This
    /// is the cooking input: prefixes are host framing, not scalar content,
    /// so they must not fold into the value. For plain (frontmatter) scalars,
    /// which carry no prefix leaves, this equals [`raw`](Self::raw).
    fn prefix_stripped_source(&self) -> String {
        self.0
            .children_with_tokens()
            .filter_map(|el| el.into_token())
            .filter(|t| t.kind() != SyntaxKind::YAML_LINE_PREFIX)
            .map(|t| t.text().to_string())
            .collect()
    }

    pub fn text_range(&self) -> TextRange {
        self.0.text_range()
    }

    pub fn syntax(&self) -> &SyntaxNode {
        &self.0
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn parse_yaml_document_descends_envelope() {
        let doc = parse_yaml_document("title: x\n").expect("document");
        let map = doc.block_map().expect("block map");
        assert_eq!(map.entries().count(), 1);
    }

    #[test]
    fn content_range_excludes_trailing_trivia() {
        // A nested block map's `text_range()` runs up to the next sibling key
        // (it owns the trailing newline); `content_range()` ends at the last
        // content byte so a diagnostic does not bleed onto the sibling line.
        let input = "outer:\n  a: 1\n  b: 2\nsibling: x\n";
        let doc = parse_yaml_document(input).expect("document");
        let inner = doc.block_map().unwrap().value_of("outer").unwrap();
        let map = inner.as_block_map().unwrap();
        let slice = |r: TextRange| &input[usize::from(r.start())..usize::from(r.end())];
        assert!(
            slice(map.syntax().text_range()).ends_with('\n'),
            "text_range keeps the trailing newline (lossless)"
        );
        assert_eq!(slice(map.content_range()), "a: 1\n  b: 2");
    }

    #[test]
    fn key_text_strips_colon() {
        let doc = parse_yaml_document("key: value\n").expect("document");
        let entry = doc.block_map().unwrap().entries().next().unwrap();
        assert_eq!(entry.key_text().as_deref(), Some("key"));
    }

    #[test]
    fn value_is_cooked() {
        let doc = parse_yaml_document("k: 'it''s'\n").expect("document");
        let value = doc.block_map().unwrap().value_of("k").unwrap();
        assert_eq!(value.as_scalar().unwrap().value(), "it's");

        let doc = parse_yaml_document("k: \"a\\nb\"\n").expect("document");
        let value = doc.block_map().unwrap().value_of("k").unwrap();
        assert_eq!(value.as_scalar().unwrap().value(), "a\nb");
    }

    #[test]
    fn raw_preserves_quotes() {
        let doc = parse_yaml_document("k: 'it''s'\n").expect("document");
        let scalar = doc
            .block_map()
            .unwrap()
            .value_of("k")
            .unwrap()
            .as_scalar()
            .unwrap();
        assert_eq!(scalar.raw(), "'it''s'");
        assert_eq!(scalar.style(), YamlScalarStyle::SingleQuoted);
    }

    #[test]
    fn value_skips_embedded_line_prefix() {
        use crate::parser::yaml::parse_stream_with_prefix;

        // Double-quoted multi-line scalar inside hashpipe-prefixed YAML: the
        // `#|` continuation prefix is carried as a `YAML_LINE_PREFIX` leaf for
        // losslessness, but it must not bleed into the cooked value.
        let tree = parse_stream_with_prefix("#| key: \"foo\n#|   bar\"\n", "#|");
        let scalar = first_document(&tree)
            .and_then(|d| d.block_map())
            .and_then(|m| m.value_of("key"))
            .and_then(|v| v.as_scalar())
            .expect("scalar value");
        let value = scalar.value();
        assert!(!value.contains("#|"), "prefix leaked into value: {value:?}");
        assert_eq!(value, "foo bar");
        // raw() stays byte-exact (lossless contract): the prefix leaf is kept.
        assert!(
            scalar.raw().contains("#|"),
            "raw() must retain the prefix leaf: {:?}",
            scalar.raw()
        );
    }

    #[test]
    fn scalar_text_range_is_content_relative() {
        let input = "k: value\n";
        let doc = parse_yaml_document(input).expect("document");
        let scalar = doc
            .block_map()
            .unwrap()
            .value_of("k")
            .unwrap()
            .as_scalar()
            .unwrap();
        let range = scalar.text_range();
        let start: usize = range.start().into();
        let end: usize = range.end().into();
        assert_eq!(&input[start..end], "value");
    }

    #[test]
    fn empty_value_has_no_scalar() {
        let doc = parse_yaml_document("k:\n").expect("document");
        let value = doc.block_map().unwrap().value_of("k").unwrap();
        assert!(value.is_empty());
        assert!(value.as_scalar().is_none());
    }

    #[test]
    fn block_sequence_items_yield_scalars() {
        let doc = parse_yaml_document("k:\n  - a\n  - b\n").expect("document");
        let seq = doc
            .block_map()
            .unwrap()
            .value_of("k")
            .unwrap()
            .as_block_sequence()
            .expect("block sequence");
        let items: Vec<String> = seq
            .items()
            .filter_map(|item| item.as_scalar().map(|s| s.value()))
            .collect();
        assert_eq!(items, vec!["a".to_string(), "b".to_string()]);
    }

    #[test]
    fn flow_sequence_items_yield_scalars() {
        let doc = parse_yaml_document("k: [a, b]\n").expect("document");
        let seq = doc
            .block_map()
            .unwrap()
            .value_of("k")
            .unwrap()
            .as_flow_sequence()
            .expect("flow sequence");
        let items: Vec<String> = seq
            .items()
            .filter_map(|item| item.as_scalar().map(|s| s.value()))
            .collect();
        assert_eq!(items, vec!["a".to_string(), "b".to_string()]);
    }

    #[test]
    fn tag_token_is_exposed_and_scalar_ignores_it() {
        let doc = parse_yaml_document("k: !expr foo\n").expect("document");
        let value = doc.block_map().unwrap().value_of("k").unwrap();
        assert_eq!(
            value.tag().map(|t| t.text().to_string()),
            Some("!expr".to_string())
        );
        assert_eq!(value.as_scalar().unwrap().raw(), "foo");
    }

    #[test]
    fn quoted_key_with_colon_round_trips() {
        let doc = parse_yaml_document("\"foo:bar\": 1\n").expect("document");
        let entry = doc.block_map().unwrap().entries().next().unwrap();
        assert_eq!(entry.key_text().as_deref(), Some("foo:bar"));
        assert_eq!(entry.key().unwrap().scalar().unwrap().raw(), "\"foo:bar\"");
    }

    #[test]
    fn parse_yaml_documents_returns_all_documents() {
        let docs = parse_yaml_documents("a: 1\n---\nb: 2\n");
        assert_eq!(docs.len(), 2);
    }

    #[test]
    fn invalid_yaml_yields_no_document() {
        assert!(parse_yaml_document("k: [\n").is_none());
    }
}