pdf_engine/text_info.rs
1//! Canonical, serializable wire form of an extracted text span.
2//!
3//! [`TextSpanInfo`] is the **single source of truth** for the shape of a
4//! positioned text span as delivered to every SDK binding (Tauri/serde,
5//! Node/napi, WASM). Bindings construct it from a [`TextSpan`] via [`From`]
6//! and then serialize or re-shape it for their target runtime, instead of each
7//! maintaining a separate field list and conversion logic.
8//!
9//! The `serde` derive is feature-gated so the core engine stays
10//! serialization-agnostic; the struct and the `From` conversion are always
11//! available, so non-serde bindings (e.g. napi) consume the same source.
12
13use crate::text::{FontMetrics, TextSpan, WidthSource};
14
15/// A positioned text span in PDF user space (origin bottom-left, y up).
16///
17/// JSON key renames mirror the established wire contract consumed by the
18/// editor; all optional metadata is omitted from JSON when absent, so existing
19/// clients are unaffected.
20#[derive(Debug, Clone)]
21#[cfg_attr(feature = "serde", derive(serde::Serialize))]
22pub struct TextSpanInfo {
23 /// The extracted text of the span.
24 pub text: String,
25 /// X position in PDF user space.
26 pub x: f64,
27 /// Y position in PDF user space (baseline origin).
28 pub y: f64,
29 /// Span width in user space. Falls back to an estimate (½ em per character)
30 /// when no measured width is available.
31 pub width: f64,
32 /// Span height in user space.
33 ///
34 /// Mirrors the existing wire contract, which reports the font size (ascent
35 /// extent) here. NOTE: this preserves prior behaviour exactly; richer
36 /// ascent/descent metrics arrive in a later milestone.
37 pub height: f64,
38 /// Font size in points.
39 pub font_size: f64,
40
41 /// PostScript font name with any 6-character subset prefix stripped.
42 /// Omitted from JSON when unknown.
43 #[cfg_attr(
44 feature = "serde",
45 serde(rename = "fontName", skip_serializing_if = "Option::is_none")
46 )]
47 pub font_name: Option<String>,
48 /// Inferred bold style.
49 #[cfg_attr(feature = "serde", serde(rename = "isBold"))]
50 pub is_bold: bool,
51 /// Inferred italic style.
52 #[cfg_attr(feature = "serde", serde(rename = "isItalic"))]
53 pub is_italic: bool,
54 /// Fill colour as sRGB `[r, g, b]` normalised to 0.0–1.0. Omitted from JSON
55 /// for pattern/shading paints.
56 #[cfg_attr(feature = "serde", serde(skip_serializing_if = "Option::is_none"))]
57 pub color: Option<[f32; 3]>,
58
59 /// Whether glyph widths came from real font metrics or an estimate.
60 /// Serialised as `"Metric"` / `"Estimate"`.
61 #[cfg_attr(feature = "serde", serde(rename = "widthSource"))]
62 pub width_source: WidthSource,
63 /// Per-glyph bounding boxes `[x0, y0, x1, y1]` (y up). Omitted when empty.
64 #[cfg_attr(
65 feature = "serde",
66 serde(rename = "charBounds", skip_serializing_if = "Vec::is_empty")
67 )]
68 pub char_bounds: Vec<[f64; 4]>,
69
70 // ---- Golf 1 typographic metadata ----
71 /// Full affine transform `[a, b, c, d, e, f]` of the span's first glyph.
72 /// Captures rotation/shear discarded by `(x, y, fontSize)`.
73 #[cfg_attr(feature = "serde", serde(skip_serializing_if = "Option::is_none"))]
74 pub transform: Option<[f64; 6]>,
75 /// Numeric font weight (~100–900) from embedded font data, if available.
76 #[cfg_attr(
77 feature = "serde",
78 serde(rename = "fontWeight", skip_serializing_if = "Option::is_none")
79 )]
80 pub font_weight: Option<u16>,
81 /// Serif flag from embedded font data, if available.
82 #[cfg_attr(
83 feature = "serde",
84 serde(rename = "isSerif", skip_serializing_if = "Option::is_none")
85 )]
86 pub is_serif: Option<bool>,
87 /// Monospace flag from embedded font data, if available.
88 #[cfg_attr(
89 feature = "serde",
90 serde(rename = "isMonospace", skip_serializing_if = "Option::is_none")
91 )]
92 pub is_monospace: Option<bool>,
93 /// Coarse PDF text render mode: `0` fill, `1` stroke, `3` invisible.
94 #[cfg_attr(
95 feature = "serde",
96 serde(rename = "renderMode", skip_serializing_if = "Option::is_none")
97 )]
98 pub render_mode: Option<u8>,
99
100 // ---- Golf 2 font metrics ----
101 /// Vertical font metrics (/1000 em) from the embedded font, if available.
102 #[cfg_attr(
103 feature = "serde",
104 serde(rename = "fontMetrics", skip_serializing_if = "Option::is_none")
105 )]
106 pub font_metrics: Option<FontMetrics>,
107}
108
109impl From<TextSpan> for TextSpanInfo {
110 /// Build the canonical wire form from an owned engine [`TextSpan`].
111 ///
112 /// Centralises the conversions that previously lived in each binding: the
113 /// ½-em width fallback, the RGBA-u8 → RGB-f32 colour normalisation, and the
114 /// height-reports-font-size contract.
115 fn from(s: TextSpan) -> Self {
116 let char_count = s.text.chars().count() as f64;
117 let width = if s.width > 0.0 {
118 s.width
119 } else {
120 s.font_size * 0.5 * char_count
121 };
122 let color = s
123 .color
124 .map(|[r, g, b, _a]| [r as f32 / 255.0, g as f32 / 255.0, b as f32 / 255.0]);
125 Self {
126 x: s.x,
127 y: s.y,
128 width,
129 height: s.font_size,
130 font_size: s.font_size,
131 font_name: s.font_name,
132 is_bold: s.is_bold,
133 is_italic: s.is_italic,
134 color,
135 width_source: s.width_source,
136 char_bounds: s.char_bounds,
137 transform: s.transform,
138 font_weight: s.font_weight,
139 is_serif: s.is_serif,
140 is_monospace: s.is_monospace,
141 render_mode: s.render_mode,
142 font_metrics: s.font_metrics,
143 text: s.text,
144 }
145 }
146}
147
148impl From<&TextSpan> for TextSpanInfo {
149 /// Build the canonical wire form from a borrowed [`TextSpan`] (clones).
150 fn from(s: &TextSpan) -> Self {
151 TextSpanInfo::from(s.clone())
152 }
153}