hwpforge_core/inline.rs
1//! Rich inline content carried inside a single [`Run`](crate::run::Run).
2//!
3//! Most runs hold plain text (modeled as `RunContent::Text(String)`),
4//! but a handful of HWPX inline elements carry per-occurrence attributes
5//! that cannot survive a `String` round-trip:
6//!
7//! - `<hp:tab width="..." leader="..." type="..."/>` — explicit tab stop
8//! position, leader glyph, and alignment, emitted inside `<hp:t>` mixed
9//! content
10//!
11//! For runs that need to carry any such attribute payload, projection
12//! emits `RunContent::InlineText(InlineText)` (a non-exhaustive enum
13//! variant added alongside `Text(String)` for backward compatibility).
14//! Plain text runs continue to use `Text(String)` so the 18-file
15//! `RunContent::Text` surface is undisturbed.
16//!
17//! See `.docs/research/2026-05-26_tab_fidelity_bugs.md` (Bug A / Phase 2)
18//! for the underlying investigation.
19
20use hwpforge_foundation::HwpUnit;
21use schemars::JsonSchema;
22use serde::{Deserialize, Serialize};
23
24/// A sequence of inline segments that compose a single `<hp:t>` element
25/// with mixed content.
26///
27/// `segments` is normalized so adjacent [`InlineSegment::Plain`] entries
28/// are merged. Callers that only need the plain-text equivalent (no tab
29/// attributes) can use [`InlineText::plain_text`] which renders each
30/// [`InlineSegment::Tab`] as a `\t` character.
31#[derive(Debug, Clone, PartialEq, Serialize, Deserialize, JsonSchema, Default)]
32pub struct InlineText {
33 /// Ordered inline segments. Always non-empty for a meaningful run;
34 /// projection that produces an empty `InlineText` should emit
35 /// `RunContent::Text(String::new())` instead.
36 pub segments: Vec<InlineSegment>,
37}
38
39/// One typed segment inside an [`InlineText`].
40///
41/// Marked `#[non_exhaustive]` so future inline elements (ruby, hyphen
42/// hints, etc.) can be added without breaking existing matches.
43#[derive(Debug, Clone, PartialEq, Serialize, Deserialize, JsonSchema)]
44#[non_exhaustive]
45pub enum InlineSegment {
46 /// Plain text. May contain `\n`, `U+00A0` (NBSP), `U+001F` (fwSpace)
47 /// — those are still handled as character-level sentinels by the
48 /// HWPX encoder, matching the `Text(String)` behavior.
49 Plain(String),
50 /// An inline tab with explicit per-occurrence attributes.
51 Tab(InlineTabAttr),
52}
53
54/// Per-occurrence attributes for an inline `<hp:tab>` element.
55///
56/// Mirrors the HWP5 0x09 control char's 14-byte inline payload (only
57/// the first 6 bytes are meaningful per HWP 5.0 spec §1.5):
58///
59/// | offset | width | meaning |
60/// |-------:|----------|----------------------------------------|
61/// | 0..4 | `u32 LE` | `width` — HwpUnit, distance to the stop |
62/// | 4 | `u8` | `leader` — fill glyph (raw HWP5 enum) |
63/// | 5 | `u8` | `tab_type` — 0=Left, 1=Right, 2=Center, 3=Decimal |
64///
65/// The raw `leader` / `tab_type` integers are preserved verbatim and
66/// emitted into HWPX as `<hp:tab leader="3" type="1"/>` — Hancom uses
67/// the HWP5 numeric encoding directly for inline tabs (unlike the
68/// header-level `<hh:tabItem>` which uses enum strings).
69#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, JsonSchema)]
70pub struct InlineTabAttr {
71 /// Position (HwpUnit) at which the tab stops. Truth-pair fixtures
72 /// show this is the original HWP5 raw value, NOT the halved
73 /// HwpUnitChar form used by `<hh:tabPr>` stops.
74 pub width: HwpUnit,
75 /// Raw HWP5 fill_type byte (0..=4 known; openhwp: 0=None, 1=Dot,
76 /// 2=LongDash, 3=Dash, 4=Underscore).
77 pub leader: u8,
78 /// Raw HWP5 tab_type byte (0=Left, 1=Right, 2=Center, 3=Decimal).
79 pub tab_type: u8,
80}
81
82impl InlineTabAttr {
83 /// Returns `true` when all attributes are zero — i.e. a default tab
84 /// that is semantically identical to a bare `<hp:tab/>` and does
85 /// not need the rich [`InlineSegment::Tab`] representation.
86 pub fn is_default(&self) -> bool {
87 self.width.as_i32() == 0 && self.leader == 0 && self.tab_type == 0
88 }
89}
90
91impl InlineText {
92 /// Constructs an [`InlineText`] from raw segments, dropping empty
93 /// `Plain` runs and merging adjacent ones to keep the canonical
94 /// form predictable for equality comparisons and HWPX emit.
95 pub fn from_segments(segments: impl IntoIterator<Item = InlineSegment>) -> Self {
96 let mut out: Vec<InlineSegment> = Vec::new();
97 for seg in segments {
98 match seg {
99 InlineSegment::Plain(s) if s.is_empty() => continue,
100 InlineSegment::Plain(s) => match out.last_mut() {
101 Some(InlineSegment::Plain(prev)) => prev.push_str(&s),
102 _ => out.push(InlineSegment::Plain(s)),
103 },
104 other => out.push(other),
105 }
106 }
107 Self { segments: out }
108 }
109
110 /// Returns the plain-text equivalent: each [`InlineSegment::Tab`]
111 /// becomes a `\t` character. Useful for callers (Markdown bridge,
112 /// CLI search, etc.) that cannot represent tab attributes.
113 pub fn plain_text(&self) -> String {
114 let mut out = String::new();
115 for seg in &self.segments {
116 match seg {
117 InlineSegment::Plain(s) => out.push_str(s),
118 InlineSegment::Tab(_) => out.push('\t'),
119 }
120 }
121 out
122 }
123
124 /// Returns `true` when the inline text carries no information that
125 /// `RunContent::Text(String)` could not also represent — i.e. every
126 /// segment is `Plain` or a default tab. Projection can downgrade
127 /// such [`InlineText`] back to `Text(String)` to keep the audit
128 /// baseline simple.
129 pub fn is_downgradable(&self) -> bool {
130 self.segments.iter().all(|seg| match seg {
131 InlineSegment::Plain(_) => true,
132 InlineSegment::Tab(attr) => attr.is_default(),
133 })
134 }
135}
136
137#[cfg(test)]
138mod tests {
139 use super::*;
140
141 fn attr(width: i32, leader: u8, tab_type: u8) -> InlineTabAttr {
142 InlineTabAttr { width: HwpUnit::new(width).unwrap(), leader, tab_type }
143 }
144
145 #[test]
146 fn from_segments_merges_adjacent_plain_runs() {
147 let it = InlineText::from_segments([
148 InlineSegment::Plain("LE".into()),
149 InlineSegment::Plain("FT".into()),
150 InlineSegment::Tab(attr(12488, 3, 1)),
151 InlineSegment::Plain("RI".into()),
152 InlineSegment::Plain("GHT".into()),
153 ]);
154 assert_eq!(
155 it.segments,
156 vec![
157 InlineSegment::Plain("LEFT".into()),
158 InlineSegment::Tab(attr(12488, 3, 1)),
159 InlineSegment::Plain("RIGHT".into()),
160 ]
161 );
162 }
163
164 #[test]
165 fn from_segments_drops_empty_plain_entries() {
166 let it = InlineText::from_segments([
167 InlineSegment::Plain(String::new()),
168 InlineSegment::Tab(attr(0, 0, 0)),
169 InlineSegment::Plain(String::new()),
170 ]);
171 assert_eq!(it.segments, vec![InlineSegment::Tab(attr(0, 0, 0))]);
172 }
173
174 #[test]
175 fn plain_text_renders_tabs_as_horizontal_tab_chars() {
176 let it = InlineText::from_segments([
177 InlineSegment::Plain("a".into()),
178 InlineSegment::Tab(attr(12488, 3, 1)),
179 InlineSegment::Plain("b".into()),
180 InlineSegment::Tab(attr(0, 0, 0)),
181 InlineSegment::Plain("c".into()),
182 ]);
183 assert_eq!(it.plain_text(), "a\tb\tc");
184 }
185
186 #[test]
187 fn is_default_attr() {
188 assert!(attr(0, 0, 0).is_default());
189 assert!(!attr(1, 0, 0).is_default());
190 assert!(!attr(0, 1, 0).is_default());
191 assert!(!attr(0, 0, 1).is_default());
192 }
193
194 #[test]
195 fn is_downgradable_true_for_plain_and_default_tab_only() {
196 assert!(InlineText::from_segments([InlineSegment::Plain("hi".into())]).is_downgradable());
197 assert!(InlineText::from_segments([
198 InlineSegment::Plain("a".into()),
199 InlineSegment::Tab(attr(0, 0, 0)),
200 InlineSegment::Plain("b".into()),
201 ])
202 .is_downgradable());
203 assert!(
204 !InlineText::from_segments([InlineSegment::Tab(attr(12488, 3, 1))]).is_downgradable()
205 );
206 }
207}