Skip to main content

hwpforge_core/
inline.rs

1//! Rich inline content carried inside a single [`Run`](crate::run::Run).
2//!
3//! Most runs hold plain text (modeled as `RunContent::Text(String)`),
4//! but a handful of HWPX inline elements carry per-occurrence attributes
5//! that cannot survive a `String` round-trip:
6//!
7//! - `<hp:tab width="..." leader="..." type="..."/>` — explicit tab stop
8//!   position, leader glyph, and alignment, emitted inside `<hp:t>` mixed
9//!   content
10//!
11//! For runs that need to carry any such attribute payload, projection
12//! emits `RunContent::InlineText(InlineText)` (a non-exhaustive enum
13//! variant added alongside `Text(String)` for backward compatibility).
14//! Plain text runs continue to use `Text(String)` so the 18-file
15//! `RunContent::Text` surface is undisturbed.
16//!
17//! See `.docs/research/2026-05-26_tab_fidelity_bugs.md` (Bug A / Phase 2)
18//! for the underlying investigation.
19
20use hwpforge_foundation::HwpUnit;
21use schemars::JsonSchema;
22use serde::{Deserialize, Serialize};
23
24/// A sequence of inline segments that compose a single `<hp:t>` element
25/// with mixed content.
26///
27/// `segments` is normalized so adjacent [`InlineSegment::Plain`] entries
28/// are merged. Callers that only need the plain-text equivalent (no tab
29/// attributes) can use [`InlineText::plain_text`] which renders each
30/// [`InlineSegment::Tab`] as a `\t` character.
31#[derive(Debug, Clone, PartialEq, Serialize, Deserialize, JsonSchema, Default)]
32pub struct InlineText {
33    /// Ordered inline segments. Always non-empty for a meaningful run;
34    /// projection that produces an empty `InlineText` should emit
35    /// `RunContent::Text(String::new())` instead.
36    pub segments: Vec<InlineSegment>,
37}
38
39/// One typed segment inside an [`InlineText`].
40///
41/// Marked `#[non_exhaustive]` so future inline elements (ruby, hyphen
42/// hints, etc.) can be added without breaking existing matches.
43#[derive(Debug, Clone, PartialEq, Serialize, Deserialize, JsonSchema)]
44#[non_exhaustive]
45pub enum InlineSegment {
46    /// Plain text. May contain `\n`, `U+00A0` (NBSP), `U+001F` (fwSpace)
47    /// — those are still handled as character-level sentinels by the
48    /// HWPX encoder, matching the `Text(String)` behavior.
49    Plain(String),
50    /// An inline tab with explicit per-occurrence attributes.
51    Tab(InlineTabAttr),
52}
53
54/// Per-occurrence attributes for an inline `<hp:tab>` element.
55///
56/// Mirrors the HWP5 0x09 control char's 14-byte inline payload (only
57/// the first 6 bytes are meaningful per HWP 5.0 spec §1.5):
58///
59/// | offset | width    | meaning                                |
60/// |-------:|----------|----------------------------------------|
61/// | 0..4   | `u32 LE` | `width` — HwpUnit, distance to the stop |
62/// | 4      | `u8`     | `leader` — fill glyph (raw HWP5 enum)   |
63/// | 5      | `u8`     | `tab_type` — 0=Left, 1=Right, 2=Center, 3=Decimal |
64///
65/// The raw `leader` / `tab_type` integers are preserved verbatim and
66/// emitted into HWPX as `<hp:tab leader="3" type="1"/>` — Hancom uses
67/// the HWP5 numeric encoding directly for inline tabs (unlike the
68/// header-level `<hh:tabItem>` which uses enum strings).
69#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, JsonSchema)]
70pub struct InlineTabAttr {
71    /// Position (HwpUnit) at which the tab stops. Truth-pair fixtures
72    /// show this is the original HWP5 raw value, NOT the halved
73    /// HwpUnitChar form used by `<hh:tabPr>` stops.
74    pub width: HwpUnit,
75    /// Raw HWP5 fill_type byte (0..=4 known; openhwp: 0=None, 1=Dot,
76    /// 2=LongDash, 3=Dash, 4=Underscore).
77    pub leader: u8,
78    /// Raw HWP5 tab_type byte (0=Left, 1=Right, 2=Center, 3=Decimal).
79    pub tab_type: u8,
80}
81
82impl InlineTabAttr {
83    /// Returns `true` when all attributes are zero — i.e. a default tab
84    /// that is semantically identical to a bare `<hp:tab/>` and does
85    /// not need the rich [`InlineSegment::Tab`] representation.
86    pub fn is_default(&self) -> bool {
87        self.width.as_i32() == 0 && self.leader == 0 && self.tab_type == 0
88    }
89}
90
91impl InlineText {
92    /// Constructs an [`InlineText`] from raw segments, dropping empty
93    /// `Plain` runs and merging adjacent ones to keep the canonical
94    /// form predictable for equality comparisons and HWPX emit.
95    pub fn from_segments(segments: impl IntoIterator<Item = InlineSegment>) -> Self {
96        let mut out: Vec<InlineSegment> = Vec::new();
97        for seg in segments {
98            match seg {
99                InlineSegment::Plain(s) if s.is_empty() => continue,
100                InlineSegment::Plain(s) => match out.last_mut() {
101                    Some(InlineSegment::Plain(prev)) => prev.push_str(&s),
102                    _ => out.push(InlineSegment::Plain(s)),
103                },
104                other => out.push(other),
105            }
106        }
107        Self { segments: out }
108    }
109
110    /// Returns the plain-text equivalent: each [`InlineSegment::Tab`]
111    /// becomes a `\t` character. Useful for callers (Markdown bridge,
112    /// CLI search, etc.) that cannot represent tab attributes.
113    pub fn plain_text(&self) -> String {
114        let mut out = String::new();
115        for seg in &self.segments {
116            match seg {
117                InlineSegment::Plain(s) => out.push_str(s),
118                InlineSegment::Tab(_) => out.push('\t'),
119            }
120        }
121        out
122    }
123
124    /// Returns `true` when the inline text carries no information that
125    /// `RunContent::Text(String)` could not also represent — i.e. every
126    /// segment is `Plain` or a default tab. Projection can downgrade
127    /// such [`InlineText`] back to `Text(String)` to keep the audit
128    /// baseline simple.
129    pub fn is_downgradable(&self) -> bool {
130        self.segments.iter().all(|seg| match seg {
131            InlineSegment::Plain(_) => true,
132            InlineSegment::Tab(attr) => attr.is_default(),
133        })
134    }
135}
136
137#[cfg(test)]
138mod tests {
139    use super::*;
140
141    fn attr(width: i32, leader: u8, tab_type: u8) -> InlineTabAttr {
142        InlineTabAttr { width: HwpUnit::new(width).unwrap(), leader, tab_type }
143    }
144
145    #[test]
146    fn from_segments_merges_adjacent_plain_runs() {
147        let it = InlineText::from_segments([
148            InlineSegment::Plain("LE".into()),
149            InlineSegment::Plain("FT".into()),
150            InlineSegment::Tab(attr(12488, 3, 1)),
151            InlineSegment::Plain("RI".into()),
152            InlineSegment::Plain("GHT".into()),
153        ]);
154        assert_eq!(
155            it.segments,
156            vec![
157                InlineSegment::Plain("LEFT".into()),
158                InlineSegment::Tab(attr(12488, 3, 1)),
159                InlineSegment::Plain("RIGHT".into()),
160            ]
161        );
162    }
163
164    #[test]
165    fn from_segments_drops_empty_plain_entries() {
166        let it = InlineText::from_segments([
167            InlineSegment::Plain(String::new()),
168            InlineSegment::Tab(attr(0, 0, 0)),
169            InlineSegment::Plain(String::new()),
170        ]);
171        assert_eq!(it.segments, vec![InlineSegment::Tab(attr(0, 0, 0))]);
172    }
173
174    #[test]
175    fn plain_text_renders_tabs_as_horizontal_tab_chars() {
176        let it = InlineText::from_segments([
177            InlineSegment::Plain("a".into()),
178            InlineSegment::Tab(attr(12488, 3, 1)),
179            InlineSegment::Plain("b".into()),
180            InlineSegment::Tab(attr(0, 0, 0)),
181            InlineSegment::Plain("c".into()),
182        ]);
183        assert_eq!(it.plain_text(), "a\tb\tc");
184    }
185
186    #[test]
187    fn is_default_attr() {
188        assert!(attr(0, 0, 0).is_default());
189        assert!(!attr(1, 0, 0).is_default());
190        assert!(!attr(0, 1, 0).is_default());
191        assert!(!attr(0, 0, 1).is_default());
192    }
193
194    #[test]
195    fn is_downgradable_true_for_plain_and_default_tab_only() {
196        assert!(InlineText::from_segments([InlineSegment::Plain("hi".into())]).is_downgradable());
197        assert!(InlineText::from_segments([
198            InlineSegment::Plain("a".into()),
199            InlineSegment::Tab(attr(0, 0, 0)),
200            InlineSegment::Plain("b".into()),
201        ])
202        .is_downgradable());
203        assert!(
204            !InlineText::from_segments([InlineSegment::Tab(attr(12488, 3, 1))]).is_downgradable()
205        );
206    }
207}