Skip to main content

pdf_engine/
text_info.rs

1//! Canonical, serializable wire form of an extracted text span.
2//!
3//! [`TextSpanInfo`] is the **single source of truth** for the shape of a
4//! positioned text span as delivered to every SDK binding (Tauri/serde,
5//! Node/napi, WASM). Bindings construct it from a [`TextSpan`] via [`From`]
6//! and then serialize or re-shape it for their target runtime, instead of each
7//! maintaining a separate field list and conversion logic.
8//!
9//! The `serde` derive is feature-gated so the core engine stays
10//! serialization-agnostic; the struct and the `From` conversion are always
11//! available, so non-serde bindings (e.g. napi) consume the same source.
12
13use crate::text::{FontMetrics, TextSpan, WidthSource};
14
15/// A positioned text span in PDF user space (origin bottom-left, y up).
16///
17/// JSON key renames mirror the established wire contract consumed by the
18/// editor; all optional metadata is omitted from JSON when absent, so existing
19/// clients are unaffected.
20#[derive(Debug, Clone)]
21#[cfg_attr(feature = "serde", derive(serde::Serialize))]
22pub struct TextSpanInfo {
23    /// The extracted text of the span.
24    pub text: String,
25    /// X position in PDF user space.
26    pub x: f64,
27    /// Y position in PDF user space (baseline origin).
28    pub y: f64,
29    /// Span width in user space. Falls back to an estimate (½ em per character)
30    /// when no measured width is available.
31    pub width: f64,
32    /// Span height in user space.
33    ///
34    /// Mirrors the existing wire contract, which reports the font size (ascent
35    /// extent) here. NOTE: this preserves prior behaviour exactly; richer
36    /// ascent/descent metrics arrive in a later milestone.
37    pub height: f64,
38    /// Font size in points.
39    pub font_size: f64,
40
41    /// PostScript font name with any 6-character subset prefix stripped.
42    /// Omitted from JSON when unknown.
43    #[cfg_attr(
44        feature = "serde",
45        serde(rename = "fontName", skip_serializing_if = "Option::is_none")
46    )]
47    pub font_name: Option<String>,
48    /// Inferred bold style.
49    #[cfg_attr(feature = "serde", serde(rename = "isBold"))]
50    pub is_bold: bool,
51    /// Inferred italic style.
52    #[cfg_attr(feature = "serde", serde(rename = "isItalic"))]
53    pub is_italic: bool,
54    /// Fill colour as sRGB `[r, g, b]` normalised to 0.0–1.0. Omitted from JSON
55    /// for pattern/shading paints.
56    #[cfg_attr(feature = "serde", serde(skip_serializing_if = "Option::is_none"))]
57    pub color: Option<[f32; 3]>,
58
59    /// Whether glyph widths came from real font metrics or an estimate.
60    /// Serialised as `"Metric"` / `"Estimate"`.
61    #[cfg_attr(feature = "serde", serde(rename = "widthSource"))]
62    pub width_source: WidthSource,
63    /// Per-glyph bounding boxes `[x0, y0, x1, y1]` (y up). Omitted when empty.
64    #[cfg_attr(
65        feature = "serde",
66        serde(rename = "charBounds", skip_serializing_if = "Vec::is_empty")
67    )]
68    pub char_bounds: Vec<[f64; 4]>,
69
70    // ---- Golf 1 typographic metadata ----
71    /// Full affine transform `[a, b, c, d, e, f]` of the span's first glyph.
72    /// Captures rotation/shear discarded by `(x, y, fontSize)`.
73    #[cfg_attr(feature = "serde", serde(skip_serializing_if = "Option::is_none"))]
74    pub transform: Option<[f64; 6]>,
75    /// Numeric font weight (~100–900) from embedded font data, if available.
76    #[cfg_attr(
77        feature = "serde",
78        serde(rename = "fontWeight", skip_serializing_if = "Option::is_none")
79    )]
80    pub font_weight: Option<u16>,
81    /// Serif flag from embedded font data, if available.
82    #[cfg_attr(
83        feature = "serde",
84        serde(rename = "isSerif", skip_serializing_if = "Option::is_none")
85    )]
86    pub is_serif: Option<bool>,
87    /// Monospace flag from embedded font data, if available.
88    #[cfg_attr(
89        feature = "serde",
90        serde(rename = "isMonospace", skip_serializing_if = "Option::is_none")
91    )]
92    pub is_monospace: Option<bool>,
93    /// Coarse PDF text render mode: `0` fill, `1` stroke, `3` invisible.
94    #[cfg_attr(
95        feature = "serde",
96        serde(rename = "renderMode", skip_serializing_if = "Option::is_none")
97    )]
98    pub render_mode: Option<u8>,
99
100    // ---- Golf 2 font metrics ----
101    /// Vertical font metrics (/1000 em) from the embedded font, if available.
102    #[cfg_attr(
103        feature = "serde",
104        serde(rename = "fontMetrics", skip_serializing_if = "Option::is_none")
105    )]
106    pub font_metrics: Option<FontMetrics>,
107}
108
109impl From<TextSpan> for TextSpanInfo {
110    /// Build the canonical wire form from an owned engine [`TextSpan`].
111    ///
112    /// Centralises the conversions that previously lived in each binding: the
113    /// ½-em width fallback, the RGBA-u8 → RGB-f32 colour normalisation, and the
114    /// height-reports-font-size contract.
115    fn from(s: TextSpan) -> Self {
116        let char_count = s.text.chars().count() as f64;
117        let width = if s.width > 0.0 {
118            s.width
119        } else {
120            s.font_size * 0.5 * char_count
121        };
122        let color = s
123            .color
124            .map(|[r, g, b, _a]| [r as f32 / 255.0, g as f32 / 255.0, b as f32 / 255.0]);
125        Self {
126            x: s.x,
127            y: s.y,
128            width,
129            height: s.font_size,
130            font_size: s.font_size,
131            font_name: s.font_name,
132            is_bold: s.is_bold,
133            is_italic: s.is_italic,
134            color,
135            width_source: s.width_source,
136            char_bounds: s.char_bounds,
137            transform: s.transform,
138            font_weight: s.font_weight,
139            is_serif: s.is_serif,
140            is_monospace: s.is_monospace,
141            render_mode: s.render_mode,
142            font_metrics: s.font_metrics,
143            text: s.text,
144        }
145    }
146}
147
148impl From<&TextSpan> for TextSpanInfo {
149    /// Build the canonical wire form from a borrowed [`TextSpan`] (clones).
150    fn from(s: &TextSpan) -> Self {
151        TextSpanInfo::from(s.clone())
152    }
153}