Skip to main content

aozora_syntax/
alloc.rs

1//! Arena-backed AST construction.
2//!
3//! [`BorrowedAllocator<'a>`] is the sole AST builder for the
4//! [`crate::borrowed`] AST. It owns an [`Interner`] so byte-equal
5//! strings (ruby readings, container labels, kaeriten marks) share a
6//! single arena allocation.
7//!
8//! ## Naming convention
9//!
10//! - `make_*` methods build *payload* references (`&'a Gaiji<'a>`,
11//!   `&'a Annotation<'a>`) without wrapping them in a node.
12//! - Variant-named methods (`ruby`, `bouten`, `gaiji`, …) build the
13//!   final [`borrowed::AozoraNode<'a>`]. The `gaiji` and `annotation`
14//!   node constructors take the payload reference (built via
15//!   `make_gaiji` / `make_annotation`) so a payload can be shared
16//!   between a `Segment` and a `Node` without recomputing the string
17//!   interns.
18//! - `seg_*` methods build segment elements for `content_segments`.
19//!
20//! ## Canonicalisation
21//!
22//! Both `content_plain("")` and `content_segments(&[])` return
23//! [`borrowed::Content::EMPTY`] (i.e. `Segments(&[])`). `content_segments`
24//! collapses an all-`Text` input into a single concatenated `Plain`
25//! (the concatenation is interned). The legacy owned `Content::from`
26//! / `Content::from_segments` helpers used the same canonicalisation;
27//! preserving it keeps the determinism + sentinel-alignment
28//! proptests in `aozora-lex/tests/property_borrowed_arena.rs` honest
29//! across edits.
30
31use aozora_encoding::gaiji::Resolved;
32
33use crate::borrowed::{self, Arena, Interner};
34use crate::{
35    AlignEnd, AnnotationKind, AozoraHeadingKind, BoutenKind, BoutenPosition, Container, Indent,
36    Keigakomi, SectionKind,
37};
38
39/// Arena-backed builder for [`borrowed::AozoraNode<'a>`] and its
40/// payload types.
41///
42/// Owns an [`Interner`] keyed off the supplied [`Arena`]; both string
43/// content and per-variant payloads land in the arena, so dropping the
44/// arena tears the entire AST down in one step (no per-node `Drop`
45/// runs, no `Box::drop` traffic).
46#[derive(Debug)]
47pub struct BorrowedAllocator<'a> {
48    arena: &'a Arena,
49    interner: Interner<'a>,
50}
51
52#[allow(
53    clippy::unused_self,
54    reason = "API consistency: every BorrowedAllocator builder method takes &mut self even when the variant is a pure wrapper, so call sites have a uniform shape (alloc.method(...) for every variant). Switching trivial wrappers to free fns would split the API in half."
55)]
56impl<'a> BorrowedAllocator<'a> {
57    /// New allocator with a fresh interner sized to `interner_capacity`.
58    /// Capacity is rounded up to the next power of two by the interner.
59    #[must_use]
60    pub fn with_capacity(arena: &'a Arena, interner_capacity: usize) -> Self {
61        Self {
62            arena,
63            interner: Interner::with_capacity_in(interner_capacity, arena),
64        }
65    }
66
67    /// Construct with the interner's default initial capacity (64 → 64
68    /// after power-of-two rounding).
69    #[must_use]
70    pub fn new(arena: &'a Arena) -> Self {
71        Self::with_capacity(arena, 64)
72    }
73
74    /// Borrow the underlying arena. Useful for callers that need to
75    /// emit an arena-allocated normalised text alongside the AST.
76    #[must_use]
77    pub fn arena(&self) -> &'a Arena {
78        self.arena
79    }
80
81    /// Finish allocation and return the interner so the caller can
82    /// inspect its dedup counters (cache hits, table hits, allocs,
83    /// average probe length). The interner's `&'a` arena reference
84    /// continues to keep the interned strings alive.
85    #[must_use]
86    pub fn into_interner(self) -> Interner<'a> {
87        self.interner
88    }
89
90    // ---------------------------------------------------------------------
91    // Content / segment builders
92    // ---------------------------------------------------------------------
93
94    /// Build a plain-text body content. Empty input canonicalises to
95    /// `Segments(&[])` (the legacy owned shape did the same).
96    pub fn content_plain(&mut self, s: &str) -> borrowed::Content<'a> {
97        if s.is_empty() {
98            borrowed::Content::EMPTY
99        } else {
100            borrowed::Content::Plain(self.interner.intern(s))
101        }
102    }
103
104    /// Build a body content from a sequence of segments. Empty input →
105    /// `Segments(&[])`; all-`Text` input collapses into a single
106    /// concatenated `Plain` (interned).
107    pub fn content_segments(&mut self, segs: &[borrowed::Segment<'a>]) -> borrowed::Content<'a> {
108        if segs.is_empty() {
109            return borrowed::Content::EMPTY;
110        }
111        if segs.iter().all(|s| matches!(s, borrowed::Segment::Text(_))) {
112            // Total length is known (sum of text lengths) so we can
113            // pre-size the buffer and avoid reallocation.
114            let total: usize = segs
115                .iter()
116                .map(|s| match s {
117                    borrowed::Segment::Text(t) => t.len(),
118                    _ => 0,
119                })
120                .sum();
121            let mut buf = String::with_capacity(total);
122            for s in segs {
123                if let borrowed::Segment::Text(t) = s {
124                    buf.push_str(t);
125                }
126            }
127            return borrowed::Content::Plain(self.interner.intern(&buf));
128        }
129        borrowed::Content::Segments(self.arena.alloc_slice_copy(segs))
130    }
131
132    /// `Segment::Text(s)` — interns the string.
133    pub fn seg_text(&mut self, s: &str) -> borrowed::Segment<'a> {
134        borrowed::Segment::Text(self.interner.intern(s))
135    }
136
137    /// `Segment::Gaiji(g)` — wraps a payload built via [`Self::make_gaiji`].
138    #[must_use]
139    pub fn seg_gaiji(&self, g: &'a borrowed::Gaiji<'a>) -> borrowed::Segment<'a> {
140        borrowed::Segment::Gaiji(g)
141    }
142
143    /// `Segment::Annotation(a)` — wraps a payload built via [`Self::make_annotation`].
144    #[must_use]
145    pub fn seg_annotation(&self, a: &'a borrowed::Annotation<'a>) -> borrowed::Segment<'a> {
146        borrowed::Segment::Annotation(a)
147    }
148
149    // ---------------------------------------------------------------------
150    // Payload builders (used by both Segment and Node constructors)
151    // ---------------------------------------------------------------------
152
153    /// Build a `Gaiji` payload. Use [`Self::seg_gaiji`] to wrap as a
154    /// segment, or [`Self::gaiji`] to wrap as a node.
155    pub fn make_gaiji(
156        &mut self,
157        description: &str,
158        ucs: Option<Resolved>,
159        mencode: Option<&str>,
160    ) -> &'a borrowed::Gaiji<'a> {
161        let g = borrowed::Gaiji {
162            description: self.interner.intern(description),
163            ucs,
164            mencode: mencode.map(|s| self.interner.intern(s)),
165        };
166        self.arena.alloc(g)
167    }
168
169    /// Build an `Annotation` payload. Use [`Self::seg_annotation`] to
170    /// wrap as a segment, or [`Self::annotation`] to wrap as a node.
171    ///
172    /// `raw` carries the [`borrowed::NonEmptyStr`] invariant.
173    ///
174    /// # Panics
175    ///
176    /// Panics if `raw` is empty. Phase 3 emits annotation only after
177    /// at least one byte landed in the bracket body.
178    pub fn make_annotation(
179        &mut self,
180        raw: &str,
181        kind: AnnotationKind,
182    ) -> &'a borrowed::Annotation<'a> {
183        let raw = borrowed::NonEmptyStr::new(self.interner.intern(raw))
184            .expect("Phase 3 must emit Annotation with non-empty raw bytes");
185        let a = borrowed::Annotation { raw, kind };
186        self.arena.alloc(a)
187    }
188
189    // ---------------------------------------------------------------------
190    // Node variant constructors (17 — matches the AozoraNode enum)
191    // ---------------------------------------------------------------------
192
193    /// `AozoraNode::Ruby(Ruby { base, reading, delim_explicit })`.
194    ///
195    /// `base` and `reading` carry the [`borrowed::NonEmpty`]
196    /// invariant. Phase 3 only emits Ruby once both are non-empty,
197    /// so this `expect` is a contract-check; an empty payload here
198    /// signals a classifier bug.
199    ///
200    /// # Panics
201    ///
202    /// Panics if `base` or `reading` is empty. Phase 3 emit-sites
203    /// classify only after the body is populated, so the panic
204    /// represents a pipeline-internal bug — the
205    /// [`borrowed::NonEmpty`] payload encodes this invariant at the
206    /// type level.
207    #[must_use]
208    pub fn ruby(
209        &self,
210        base: borrowed::Content<'a>,
211        reading: borrowed::Content<'a>,
212        delim_explicit: bool,
213    ) -> borrowed::AozoraNode<'a> {
214        let base =
215            borrowed::NonEmpty::new(base).expect("Phase 3 must emit Ruby with non-empty base");
216        let reading = borrowed::NonEmpty::new(reading)
217            .expect("Phase 3 must emit Ruby with non-empty reading");
218        borrowed::AozoraNode::Ruby(self.arena.alloc(borrowed::Ruby {
219            base,
220            reading,
221            delim_explicit,
222        }))
223    }
224
225    /// `AozoraNode::Bouten(Bouten { kind, target, position,
226    /// consumed_predecessor })`.
227    ///
228    /// `target` carries the [`borrowed::NonEmpty`] invariant —
229    /// Phase 3 resolves the forward reference before emitting.
230    ///
231    /// `consumed_predecessor` is `true` when the classifier pulled
232    /// the node's source span back over the literal occurrence of
233    /// `target` that sits immediately before the `[`. See the field
234    /// docstring on [`borrowed::Bouten`] for the serializer
235    /// round-trip contract that depends on this flag.
236    ///
237    /// # Panics
238    ///
239    /// Panics if `target` is empty. The forward-reference resolver
240    /// in Phase 3 always lands a non-empty target before emit; an
241    /// empty payload here signals a classifier bug.
242    #[must_use]
243    #[allow(
244        clippy::too_many_arguments,
245        reason = "every parameter is part of the public bouten contract — kind / target / position / consumed_predecessor each carry independent semantics and grouping them into a builder would add a layer without saving the caller anything"
246    )]
247    pub fn bouten(
248        &self,
249        kind: BoutenKind,
250        target: borrowed::Content<'a>,
251        position: BoutenPosition,
252        consumed_predecessor: bool,
253    ) -> borrowed::AozoraNode<'a> {
254        let target = borrowed::NonEmpty::new(target)
255            .expect("Phase 3 must emit Bouten with a resolved non-empty target");
256        borrowed::AozoraNode::Bouten(self.arena.alloc(borrowed::Bouten {
257            kind,
258            target,
259            position,
260            consumed_predecessor,
261        }))
262    }
263
264    /// `AozoraNode::TateChuYoko(TateChuYoko { text,
265    /// consumed_predecessor })`.
266    ///
267    /// `text` carries the [`borrowed::NonEmpty`] invariant.
268    /// `consumed_predecessor` mirrors [`Self::bouten`]'s flag.
269    ///
270    /// # Panics
271    ///
272    /// Panics if `text` is empty.
273    #[must_use]
274    pub fn tate_chu_yoko(
275        &self,
276        text: borrowed::Content<'a>,
277        consumed_predecessor: bool,
278    ) -> borrowed::AozoraNode<'a> {
279        let text = borrowed::NonEmpty::new(text)
280            .expect("Phase 3 must emit TateChuYoko with non-empty text");
281        borrowed::AozoraNode::TateChuYoko(self.arena.alloc(borrowed::TateChuYoko {
282            text,
283            consumed_predecessor,
284        }))
285    }
286
287    /// `AozoraNode::Gaiji(g)`.
288    #[must_use]
289    pub fn gaiji(&self, g: &'a borrowed::Gaiji<'a>) -> borrowed::AozoraNode<'a> {
290        borrowed::AozoraNode::Gaiji(g)
291    }
292
293    /// `AozoraNode::Indent(i)`.
294    #[must_use]
295    pub fn indent(&self, i: Indent) -> borrowed::AozoraNode<'a> {
296        borrowed::AozoraNode::Indent(i)
297    }
298
299    /// `AozoraNode::AlignEnd(a)`.
300    #[must_use]
301    pub fn align_end(&self, a: AlignEnd) -> borrowed::AozoraNode<'a> {
302        borrowed::AozoraNode::AlignEnd(a)
303    }
304
305    /// `AozoraNode::Warichu(Warichu { upper, lower })`.
306    #[must_use]
307    pub fn warichu(
308        &self,
309        upper: borrowed::Content<'a>,
310        lower: borrowed::Content<'a>,
311    ) -> borrowed::AozoraNode<'a> {
312        borrowed::AozoraNode::Warichu(self.arena.alloc(borrowed::Warichu { upper, lower }))
313    }
314
315    /// `AozoraNode::Keigakomi(k)`.
316    #[must_use]
317    pub fn keigakomi(&self, k: Keigakomi) -> borrowed::AozoraNode<'a> {
318        borrowed::AozoraNode::Keigakomi(k)
319    }
320
321    /// `AozoraNode::PageBreak`.
322    #[must_use]
323    pub fn page_break(&self) -> borrowed::AozoraNode<'a> {
324        borrowed::AozoraNode::PageBreak
325    }
326
327    /// `AozoraNode::SectionBreak(k)`.
328    #[must_use]
329    pub fn section_break(&self, k: SectionKind) -> borrowed::AozoraNode<'a> {
330        borrowed::AozoraNode::SectionBreak(k)
331    }
332
333    /// `AozoraNode::AozoraHeading(AozoraHeading { kind, text })`.
334    ///
335    /// `text` carries the [`borrowed::NonEmpty`] invariant.
336    ///
337    /// # Panics
338    ///
339    /// Panics if `text` is empty.
340    #[must_use]
341    pub fn aozora_heading(
342        &self,
343        kind: AozoraHeadingKind,
344        text: borrowed::Content<'a>,
345    ) -> borrowed::AozoraNode<'a> {
346        let text = borrowed::NonEmpty::new(text)
347            .expect("Phase 3 must emit AozoraHeading with non-empty text");
348        borrowed::AozoraNode::AozoraHeading(
349            self.arena.alloc(borrowed::AozoraHeading { kind, text }),
350        )
351    }
352
353    /// `AozoraNode::HeadingHint(HeadingHint { level, target })`.
354    ///
355    /// `target` carries the [`borrowed::NonEmptyStr`] invariant.
356    ///
357    /// # Panics
358    ///
359    /// Panics if `target` is empty. Phase 3 emits the hint only
360    /// after the forward-reference target lands non-empty; an empty
361    /// payload here signals a classifier bug.
362    pub fn heading_hint(&mut self, level: u8, target: &str) -> borrowed::AozoraNode<'a> {
363        let target = borrowed::NonEmptyStr::new(self.interner.intern(target))
364            .expect("Phase 3 must emit HeadingHint with non-empty target");
365        borrowed::AozoraNode::HeadingHint(self.arena.alloc(borrowed::HeadingHint { level, target }))
366    }
367
368    /// `AozoraNode::Sashie(Sashie { file, caption })`.
369    ///
370    /// `file` carries the [`borrowed::NonEmptyStr`] invariant.
371    ///
372    /// # Panics
373    ///
374    /// Panics if `file` is empty.
375    pub fn sashie(
376        &mut self,
377        file: &str,
378        caption: Option<borrowed::Content<'a>>,
379    ) -> borrowed::AozoraNode<'a> {
380        let file = borrowed::NonEmptyStr::new(self.interner.intern(file))
381            .expect("Phase 3 must emit Sashie with non-empty file path");
382        borrowed::AozoraNode::Sashie(self.arena.alloc(borrowed::Sashie { file, caption }))
383    }
384
385    /// `AozoraNode::Kaeriten(Kaeriten { mark })`.
386    ///
387    /// `mark` carries the [`borrowed::NonEmptyStr`] invariant.
388    ///
389    /// # Panics
390    ///
391    /// Panics if `mark` is empty.
392    pub fn kaeriten(&mut self, mark: &str) -> borrowed::AozoraNode<'a> {
393        let mark = borrowed::NonEmptyStr::new(self.interner.intern(mark))
394            .expect("Phase 3 must emit Kaeriten with non-empty mark");
395        borrowed::AozoraNode::Kaeriten(self.arena.alloc(borrowed::Kaeriten { mark }))
396    }
397
398    /// `AozoraNode::Annotation(a)`.
399    #[must_use]
400    pub fn annotation(&self, a: &'a borrowed::Annotation<'a>) -> borrowed::AozoraNode<'a> {
401        borrowed::AozoraNode::Annotation(a)
402    }
403
404    /// `AozoraNode::DoubleRuby(DoubleRuby { content })`.
405    ///
406    /// `content` carries the [`borrowed::NonEmpty`] invariant — Phase 3
407    /// pre-filters `《《》》` with empty body into plain text so this
408    /// path is never reached with an empty payload.
409    ///
410    /// # Panics
411    ///
412    /// Panics if `content` is empty. Phase 3's pre-filter is the
413    /// gate; an empty payload here signals a classifier bug.
414    #[must_use]
415    pub fn double_ruby(&self, content: borrowed::Content<'a>) -> borrowed::AozoraNode<'a> {
416        let content = borrowed::NonEmpty::new(content)
417            .expect("Phase 3 pre-filters empty DoubleRuby into plain");
418        borrowed::AozoraNode::DoubleRuby(self.arena.alloc(borrowed::DoubleRuby { content }))
419    }
420
421    /// `AozoraNode::Container(c)`.
422    #[must_use]
423    pub fn container(&self, c: Container) -> borrowed::AozoraNode<'a> {
424        borrowed::AozoraNode::Container(c)
425    }
426}
427
428#[cfg(test)]
429mod tests {
430    //! Per-variant round-trip tests for `BorrowedAllocator`.
431    //!
432    //! Each test constructs one `borrowed::AozoraNode<'a>` via the
433    //! allocator and asserts the resulting payload fields match what
434    //! we asked for. Together they cover all 17 node variants plus
435    //! content / segment composition + interner dedup.
436
437    use core::ptr;
438
439    use super::*;
440    use crate::borrowed;
441    use crate::{
442        AlignEnd, AnnotationKind, AozoraHeadingKind, BoutenKind, BoutenPosition, Container,
443        ContainerKind, Indent, Keigakomi, SectionKind,
444    };
445
446    fn fresh_alloc(arena: &Arena) -> BorrowedAllocator<'_> {
447        BorrowedAllocator::new(arena)
448    }
449
450    #[test]
451    fn ruby_round_trip() {
452        let arena = Arena::new();
453        let mut a = fresh_alloc(&arena);
454        let base = a.content_plain("青梅");
455        let reading = a.content_plain("おうめ");
456        let n = a.ruby(base, reading, true);
457        match n {
458            borrowed::AozoraNode::Ruby(r) => {
459                assert_eq!(r.base.as_plain(), Some("青梅"));
460                assert_eq!(r.reading.as_plain(), Some("おうめ"));
461                assert!(r.delim_explicit);
462            }
463            other => panic!("expected Ruby, got {other:?}"),
464        }
465    }
466
467    #[test]
468    fn bouten_round_trip() {
469        let arena = Arena::new();
470        let mut a = fresh_alloc(&arena);
471        let target = a.content_plain("青空");
472        let n = a.bouten(BoutenKind::Goma, target, BoutenPosition::Right, false);
473        match n {
474            borrowed::AozoraNode::Bouten(b) => {
475                assert_eq!(b.kind, BoutenKind::Goma);
476                assert_eq!(b.target.as_plain(), Some("青空"));
477                assert_eq!(b.position, BoutenPosition::Right);
478                assert!(!b.consumed_predecessor);
479            }
480            other => panic!("expected Bouten, got {other:?}"),
481        }
482    }
483
484    #[test]
485    fn tate_chu_yoko_round_trip() {
486        let arena = Arena::new();
487        let mut a = fresh_alloc(&arena);
488        let text = a.content_plain("12");
489        let n = a.tate_chu_yoko(text, false);
490        match n {
491            borrowed::AozoraNode::TateChuYoko(t) => {
492                assert_eq!(t.text.as_plain(), Some("12"));
493            }
494            other => panic!("expected TateChuYoko, got {other:?}"),
495        }
496    }
497
498    #[test]
499    fn gaiji_full_metadata() {
500        let arena = Arena::new();
501        let mut a = fresh_alloc(&arena);
502        let g = a.make_gaiji(
503            "木+吶のつくり",
504            Some(Resolved::Char('𠀋')),
505            Some("第3水準1-85-54"),
506        );
507        let n = a.gaiji(g);
508        match n {
509            borrowed::AozoraNode::Gaiji(gn) => {
510                assert_eq!(gn.description, "木+吶のつくり");
511                assert_eq!(gn.ucs, Some(Resolved::Char('𠀋')));
512                assert_eq!(gn.mencode, Some("第3水準1-85-54"));
513            }
514            other => panic!("expected Gaiji, got {other:?}"),
515        }
516    }
517
518    #[test]
519    fn gaiji_no_mencode() {
520        let arena = Arena::new();
521        let mut a = fresh_alloc(&arena);
522        let g = a.make_gaiji("desc", None, None);
523        let n = a.gaiji(g);
524        match n {
525            borrowed::AozoraNode::Gaiji(gn) => {
526                assert_eq!(gn.description, "desc");
527                assert!(gn.ucs.is_none());
528                assert!(gn.mencode.is_none());
529            }
530            other => panic!("expected Gaiji, got {other:?}"),
531        }
532    }
533
534    #[test]
535    fn indent_round_trip() {
536        let arena = Arena::new();
537        let a = fresh_alloc(&arena);
538        let n = a.indent(Indent { amount: 3 });
539        assert!(matches!(
540            n,
541            borrowed::AozoraNode::Indent(Indent { amount: 3 })
542        ));
543    }
544
545    #[test]
546    fn align_end_round_trip() {
547        let arena = Arena::new();
548        let a = fresh_alloc(&arena);
549        let n = a.align_end(AlignEnd { offset: 2 });
550        assert!(matches!(
551            n,
552            borrowed::AozoraNode::AlignEnd(AlignEnd { offset: 2 })
553        ));
554    }
555
556    #[test]
557    fn warichu_round_trip() {
558        let arena = Arena::new();
559        let mut a = fresh_alloc(&arena);
560        let upper = a.content_plain("上");
561        let lower = a.content_plain("下");
562        let n = a.warichu(upper, lower);
563        match n {
564            borrowed::AozoraNode::Warichu(w) => {
565                assert_eq!(w.upper.as_plain(), Some("上"));
566                assert_eq!(w.lower.as_plain(), Some("下"));
567            }
568            other => panic!("expected Warichu, got {other:?}"),
569        }
570    }
571
572    #[test]
573    fn keigakomi_round_trip() {
574        let arena = Arena::new();
575        let a = fresh_alloc(&arena);
576        let n = a.keigakomi(Keigakomi);
577        assert!(matches!(n, borrowed::AozoraNode::Keigakomi(Keigakomi)));
578    }
579
580    #[test]
581    fn page_break_round_trip() {
582        let arena = Arena::new();
583        let a = fresh_alloc(&arena);
584        let n = a.page_break();
585        assert!(matches!(n, borrowed::AozoraNode::PageBreak));
586    }
587
588    #[test]
589    fn section_break_round_trip() {
590        let arena = Arena::new();
591        let a = fresh_alloc(&arena);
592        let n = a.section_break(SectionKind::Choho);
593        assert!(matches!(
594            n,
595            borrowed::AozoraNode::SectionBreak(SectionKind::Choho)
596        ));
597    }
598
599    #[test]
600    fn aozora_heading_round_trip() {
601        let arena = Arena::new();
602        let mut a = fresh_alloc(&arena);
603        let text = a.content_plain("見出し");
604        let n = a.aozora_heading(AozoraHeadingKind::Window, text);
605        match n {
606            borrowed::AozoraNode::AozoraHeading(h) => {
607                assert_eq!(h.kind, AozoraHeadingKind::Window);
608                assert_eq!(h.text.as_plain(), Some("見出し"));
609            }
610            other => panic!("expected AozoraHeading, got {other:?}"),
611        }
612    }
613
614    #[test]
615    fn heading_hint_round_trip() {
616        let arena = Arena::new();
617        let mut a = fresh_alloc(&arena);
618        let n = a.heading_hint(2, "対象");
619        match n {
620            borrowed::AozoraNode::HeadingHint(h) => {
621                assert_eq!(h.level, 2);
622                assert_eq!(h.target.as_str(), "対象");
623            }
624            other => panic!("expected HeadingHint, got {other:?}"),
625        }
626    }
627
628    #[test]
629    fn sashie_with_caption() {
630        let arena = Arena::new();
631        let mut a = fresh_alloc(&arena);
632        let caption = a.content_plain("挿絵キャプション");
633        let n = a.sashie("fig01.png", Some(caption));
634        match n {
635            borrowed::AozoraNode::Sashie(s) => {
636                assert_eq!(s.file.as_str(), "fig01.png");
637                assert_eq!(
638                    s.caption.and_then(borrowed::Content::as_plain),
639                    Some("挿絵キャプション")
640                );
641            }
642            other => panic!("expected Sashie, got {other:?}"),
643        }
644    }
645
646    #[test]
647    fn sashie_without_caption() {
648        let arena = Arena::new();
649        let mut a = fresh_alloc(&arena);
650        let n = a.sashie("fig02.png", None);
651        match n {
652            borrowed::AozoraNode::Sashie(s) => {
653                assert_eq!(s.file.as_str(), "fig02.png");
654                assert!(s.caption.is_none());
655            }
656            other => panic!("expected Sashie, got {other:?}"),
657        }
658    }
659
660    #[test]
661    fn kaeriten_round_trip() {
662        let arena = Arena::new();
663        let mut a = fresh_alloc(&arena);
664        let n = a.kaeriten("一");
665        match n {
666            borrowed::AozoraNode::Kaeriten(k) => assert_eq!(k.mark.as_str(), "一"),
667            other => panic!("expected Kaeriten, got {other:?}"),
668        }
669    }
670
671    #[test]
672    fn annotation_round_trip() {
673        let arena = Arena::new();
674        let mut a = fresh_alloc(&arena);
675        let payload = a.make_annotation("[#X]", AnnotationKind::Unknown);
676        let n = a.annotation(payload);
677        match n {
678            borrowed::AozoraNode::Annotation(an) => {
679                assert_eq!(an.raw.as_str(), "[#X]");
680                assert_eq!(an.kind, AnnotationKind::Unknown);
681            }
682            other => panic!("expected Annotation, got {other:?}"),
683        }
684    }
685
686    #[test]
687    fn double_ruby_round_trip() {
688        let arena = Arena::new();
689        let mut a = fresh_alloc(&arena);
690        let content = a.content_plain("重要");
691        let n = a.double_ruby(content);
692        match n {
693            borrowed::AozoraNode::DoubleRuby(d) => {
694                assert_eq!(d.content.as_plain(), Some("重要"));
695            }
696            other => panic!("expected DoubleRuby, got {other:?}"),
697        }
698    }
699
700    #[test]
701    fn container_round_trip() {
702        let arena = Arena::new();
703        let a = fresh_alloc(&arena);
704        let c = Container {
705            kind: ContainerKind::Indent { amount: 1 },
706        };
707        let n = a.container(c);
708        assert!(matches!(n, borrowed::AozoraNode::Container(cc) if cc == c));
709    }
710
711    // ---------------------------------------------------------------------
712    // Content / segment composition (canonicalisation rules)
713    // ---------------------------------------------------------------------
714
715    #[test]
716    fn content_plain_empty_collapses_to_empty_segments() {
717        let arena = Arena::new();
718        let mut a = fresh_alloc(&arena);
719        let c = a.content_plain("");
720        assert!(matches!(c, borrowed::Content::Segments(s) if s.is_empty()));
721    }
722
723    #[test]
724    fn content_plain_nonempty_returns_plain_variant() {
725        let arena = Arena::new();
726        let mut a = fresh_alloc(&arena);
727        let c = a.content_plain("hello");
728        assert_eq!(c.as_plain(), Some("hello"));
729    }
730
731    #[test]
732    fn content_segments_preserves_order_and_kind() {
733        let arena = Arena::new();
734        let mut a = fresh_alloc(&arena);
735        let g = a.make_gaiji("X", None, None);
736        let seg_g = a.seg_gaiji(g);
737        let seg_t1 = a.seg_text("before ");
738        let seg_t2 = a.seg_text(" after");
739        let ann = a.make_annotation("[#X]", AnnotationKind::Unknown);
740        let seg_a = a.seg_annotation(ann);
741        let c = a.content_segments(&[seg_t1, seg_g, seg_t2, seg_a]);
742        let borrowed::Content::Segments(segs) = c else {
743            panic!("expected Segments variant for mixed-kind input");
744        };
745        assert_eq!(segs.len(), 4);
746        assert!(matches!(&segs[0], borrowed::Segment::Text(t) if *t == "before "));
747        assert!(matches!(&segs[1], borrowed::Segment::Gaiji(_)));
748        assert!(matches!(&segs[2], borrowed::Segment::Text(t) if *t == " after"));
749        assert!(matches!(&segs[3], borrowed::Segment::Annotation(_)));
750    }
751
752    #[test]
753    fn content_segments_all_text_collapses_to_plain() {
754        let arena = Arena::new();
755        let mut a = fresh_alloc(&arena);
756        let s1 = a.seg_text("hi ");
757        let s2 = a.seg_text("there");
758        let c = a.content_segments(&[s1, s2]);
759        assert_eq!(c.as_plain(), Some("hi there"));
760    }
761
762    #[test]
763    fn content_segments_empty_collapses_to_empty_segments() {
764        let arena = Arena::new();
765        let mut a = fresh_alloc(&arena);
766        let c = a.content_segments(&[]);
767        assert!(matches!(c, borrowed::Content::Segments(s) if s.is_empty()));
768    }
769
770    // ---------------------------------------------------------------------
771    // Interner is wired up — repeated short strings share a single
772    // arena slot.
773    // ---------------------------------------------------------------------
774
775    #[test]
776    fn interner_dedups_repeated_readings() {
777        let arena = Arena::new();
778        let mut a = fresh_alloc(&arena);
779        let base1 = a.content_plain("青梅");
780        let reading1 = a.content_plain("おうめ");
781        let n1 = a.ruby(base1, reading1, false);
782        let base2 = a.content_plain("青梅");
783        let reading2 = a.content_plain("おうめ");
784        let n2 = a.ruby(base2, reading2, false);
785        let borrowed::AozoraNode::Ruby(r1) = n1 else {
786            unreachable!();
787        };
788        let borrowed::AozoraNode::Ruby(r2) = n2 else {
789            unreachable!();
790        };
791        let s1 = r1.reading.as_plain().expect("plain");
792        let s2 = r2.reading.as_plain().expect("plain");
793        assert_eq!(
794            s1.as_ptr(),
795            s2.as_ptr(),
796            "interner must dedup repeated readings"
797        );
798    }
799
800    #[test]
801    fn arena_accessor_returns_construction_arena() {
802        let arena = Arena::new();
803        let a = fresh_alloc(&arena);
804        assert!(ptr::eq(a.arena(), &raw const arena));
805    }
806}