dxpdf 0.2.17

A fast DOCX-to-PDF converter powered by Skia
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
//! Fragment conversion — transform Inline content into measured Fragments
//! for the line-fitting algorithm.

use std::rc::Rc;

use crate::model::{RunProperties, UnderlineStyle};

use crate::render::dimension::Pt;
use crate::render::emoji::cluster::{EmojiPresentation, EmojiStructure};
use crate::render::fonts::TypefaceEntry;
use crate::render::geometry::PtSize;
use crate::render::resolve::color::RgbColor;
use crate::render::resolve::fonts::effective_font;
use crate::render::resolve::images::MediaEntry;

mod collect;
mod segment;
mod text;

pub use collect::{collect_fragments, FieldContext, FragmentCtx};

// ── Superscript / subscript rendering constants ───────────────────────────────
// §17.3.2.42: these ratios are "application-defined" per the spec; the values
// below match Word's rendering as documented in the OpenXML SDK reference.

/// Font size of super/subscript text as a fraction of the base font size.
pub(super) const SUPERSCRIPT_FONT_SIZE_RATIO: f32 = 0.58;

/// Superscript baseline shift: fraction of base ascent to raise the text by.
pub(super) const SUPERSCRIPT_ASCENT_OFFSET_RATIO: f32 = 0.33;

/// Subscript baseline shift: fraction of base character height to lower the text by.
pub(super) const SUBSCRIPT_HEIGHT_OFFSET_RATIO: f32 = 0.08;

/// Font properties needed for rendering a text fragment.
#[derive(Clone, Debug)]
pub struct FontProps {
    pub family: Rc<str>,
    pub size: Pt,
    pub bold: bool,
    pub italic: bool,
    pub underline: bool,
    pub char_spacing: Pt,
    /// §17.3.2.45: horizontal character scale as a multiplier (1.0 = normal,
    /// 0.8 = 80%, 1.5 = 150%). Applied to glyph advances during measure and
    /// to the Skia font's `scale_x` during paint. Inter-character spacing
    /// (`char_spacing`) is **not** scaled by this — the spec keeps the two
    /// independent.
    pub text_scale: f32,
    /// Underline position from font metrics (positive = below baseline).
    pub underline_position: Pt,
    /// Underline thickness from font metrics.
    pub underline_thickness: Pt,
}

/// Font metrics for a specific font at a specific size.
/// Evaluated once by the measurer and carried through the pipeline.
#[derive(Clone, Copy, Debug)]
pub struct TextMetrics {
    /// Distance from baseline to top of glyphs (positive upward).
    pub ascent: Pt,
    /// Distance from baseline to bottom of glyphs (positive downward).
    pub descent: Pt,
    /// §17.3.1.33: inter-line leading from the font's metrics.
    /// Included in Auto line spacing base but not in glyph height.
    pub leading: Pt,
}

impl TextMetrics {
    /// Glyph height (ascent + descent) — used for baseline positioning.
    pub fn height(&self) -> Pt {
        self.ascent + self.descent
    }

    /// §17.3.1.33: full line height including leading — the base unit
    /// that Auto line spacing multipliers scale.
    pub fn line_height(&self) -> Pt {
        self.ascent + self.descent + self.leading
    }
}

/// §17.3.2.4: run-level border for rendering.
#[derive(Clone, Copy, Debug)]
pub struct FragmentBorder {
    pub width: Pt,
    pub color: RgbColor,
    pub space: Pt,
}

/// A measured fragment — the atomic unit for line fitting.
#[derive(Clone, Debug)]
pub enum Fragment {
    Text {
        text: Rc<str>,
        font: FontProps,
        color: RgbColor,
        /// §17.3.2.32: run-level shading (background color behind text).
        shading: Option<RgbColor>,
        /// §17.3.2.4: run-level border (box around text).
        border: Option<FragmentBorder>,
        /// Full width including trailing whitespace (used for positioning).
        width: Pt,
        /// Width excluding trailing whitespace (used for line-break overflow checking).
        /// Trailing whitespace is allowed to hang past the margin per Word behavior.
        trimmed_width: Pt,
        /// Font metrics (ascent + descent = text height).
        metrics: TextMetrics,
        hyperlink_url: Option<String>,
        baseline_offset: Pt,
        /// Horizontal offset for drawing text within the fragment width.
        /// Used for right/center-justified list labels where the text is
        /// positioned within a wider fragment. Default: Pt::ZERO.
        text_offset: Pt,
    },
    Image {
        size: PtSize,
        rel_id: String,
        image_data: Option<MediaEntry>,
    },
    /// One emoji grapheme cluster (UAX #29) classified as an emoji sequence
    /// (UTS #51), to be rasterized at paint time via Skia's raster backend
    /// and embedded as an inline PDF image. See `docs/emoji-rendering.md`.
    Emoji {
        /// Cluster text exactly as classified — one grapheme cluster, possibly
        /// multi-codepoint (ZWJ, modifier, RIS, tag, keycap sequences).
        text: String,
        /// Color emoji typeface resolved upstream by the emoji resolver.
        /// Frozen at fragment build so paint never re-resolves.
        typeface: TypefaceEntry,
        /// Font size at which to rasterize, in Pt.
        size: Pt,
        /// UTS #51 §2 presentation. `EmojiPresentation::Text` is preserved
        /// (the rasterizer can still render it via the same color path) but
        /// allows future paint-side decisions (e.g. monochrome over color).
        presentation: EmojiPresentation,
        /// UTS #51 §2 cluster structure. Carried for diagnostics + future
        /// painter behaviour (skin-tone modifier substitution, etc.).
        structure: EmojiStructure,
        /// Measured advance from Skia raster metrics at `size`.
        advance: Pt,
        /// Font metrics from the resolved emoji typeface. Drives the
        /// rasterized image's natural aspect ratio and the rect's vertical
        /// extent in `line_emit::emit_line_commands` — NOT the line-height
        /// contribution. Color emoji typefaces (Apple Color Emoji, Segoe UI
        /// Emoji) carry tall ascents (≈1.25× font size) so their glyph art
        /// fits, but bumping running-text line height by that amount makes
        /// emoji-mixed lines visibly taller than text-only lines.
        metrics: TextMetrics,
        /// Metrics for line-height contribution, derived from the run's
        /// font.size against the run-level typeface (not the emoji
        /// typeface). Keeps the inline emoji "1em-tall" semantics so a
        /// paragraph that mixes emoji and plain text lays out evenly.
        /// The rasterized image still draws at its natural extent and may
        /// overhang the line slightly.
        line_metrics: TextMetrics,
        /// Inherited from the run (super/subscript / `w:position`).
        baseline_offset: Pt,
    },
    Tab {
        line_height: Pt,
        /// Override minimum width for line fitting (default: MIN_TAB_WIDTH).
        fitting_width: Option<Pt>,
    },
    LineBreak {
        line_height: Pt,
    },
    /// §17.3.3.1: column break — forces content to the next column.
    ColumnBreak,
    /// §17.3.3.1: page break — forces content to the next page.
    PageBreak {
        line_height: Pt,
    },
    /// Named destination (bookmark target) — zero-width marker.
    Bookmark {
        name: String,
    },
}

impl Fragment {
    pub fn width(&self) -> Pt {
        match self {
            Fragment::Text { width, .. } => *width,
            Fragment::Image { size, .. } => size.width,
            Fragment::Emoji { advance, .. } => *advance,
            Fragment::Tab { fitting_width, .. } => fitting_width.unwrap_or(MIN_TAB_WIDTH),
            Fragment::LineBreak { .. }
            | Fragment::ColumnBreak
            | Fragment::PageBreak { .. }
            | Fragment::Bookmark { .. } => Pt::ZERO,
        }
    }

    /// Width for overflow checking — excludes trailing whitespace on text fragments.
    pub fn trimmed_width(&self) -> Pt {
        match self {
            Fragment::Text { trimmed_width, .. } => *trimmed_width,
            other => other.width(),
        }
    }

    pub fn height(&self) -> Pt {
        match self {
            Fragment::Text { metrics, .. } => metrics.height(),
            Fragment::Image { size, .. } => size.height,
            Fragment::Emoji { line_metrics, .. } => line_metrics.height(),
            Fragment::Tab { line_height, .. }
            | Fragment::LineBreak { line_height }
            | Fragment::PageBreak { line_height } => *line_height,
            Fragment::ColumnBreak | Fragment::Bookmark { .. } => Pt::ZERO,
        }
    }

    pub fn is_line_break(&self) -> bool {
        matches!(
            self,
            Fragment::LineBreak { .. } | Fragment::ColumnBreak | Fragment::PageBreak { .. }
        )
    }

    /// §17.3.3.1: true if this fragment is a page break that forces
    /// subsequent content to the next page.
    pub fn is_page_break(&self) -> bool {
        matches!(self, Fragment::PageBreak { .. })
    }

    /// Get font properties if this is a text fragment.
    pub fn font_props(&self) -> Option<&FontProps> {
        match self {
            Fragment::Text { font, .. } => Some(font),
            _ => None,
        }
    }
}

/// §17.3.1.37: minimum tab fragment width for line fitting.
/// Tabs resolve to tab stops defined on the paragraph; this constant is only
/// used as the fragment width during line breaking (actual tab position is
/// computed during paragraph layout).
pub const MIN_TAB_WIDTH: Pt = Pt::new(1.0);

/// Extract font properties from RunProperties with a default font family fallback.
pub fn font_props_from_run(
    rp: &RunProperties,
    default_family: &str,
    default_size: Pt,
) -> FontProps {
    let family = effective_font(&rp.fonts).unwrap_or(default_family);

    let size = rp.font_size.map(Pt::from).unwrap_or(default_size);

    let char_spacing = rp.spacing.map(Pt::from).unwrap_or(Pt::ZERO);

    let text_scale = rp.text_scale.map_or(1.0, |s| s.as_factor());

    FontProps {
        family: Rc::from(family),
        size,
        bold: rp.bold.unwrap_or(false),
        italic: rp.italic.unwrap_or(false),
        // §17.3.2.40: an actual underline style sets the bool. The model's
        // tri-state — `None` (inherit), `Some(UnderlineStyle::None)`
        // (explicit "no underline" override), `Some(_actual_style_)` —
        // collapses here into "draw / don't draw"; only the third case
        // draws.
        underline: matches!(rp.underline, Some(s) if s != UnderlineStyle::None),
        char_spacing,
        text_scale,
        // Populated by the measurer from Skia font metrics.
        underline_position: Pt::ZERO,
        underline_thickness: Pt::ZERO,
    }
}

/// Convert a number to lowercase Roman numerals.
pub fn to_roman_lower(mut n: u32) -> String {
    const VALS: [(u32, &str); 13] = [
        (1000, "m"),
        (900, "cm"),
        (500, "d"),
        (400, "cd"),
        (100, "c"),
        (90, "xc"),
        (50, "l"),
        (40, "xl"),
        (10, "x"),
        (9, "ix"),
        (5, "v"),
        (4, "iv"),
        (1, "i"),
    ];
    let mut s = String::new();
    for &(val, sym) in &VALS {
        while n >= val {
            s.push_str(sym);
            n -= val;
        }
    }
    s
}

#[cfg(test)]
mod tests {
    use super::*;
    use crate::model::UnderlineStyle;

    #[test]
    fn font_props_default_fallback() {
        let rp = RunProperties::default();
        let fp = font_props_from_run(&rp, "Helvetica", Pt::new(12.0));
        assert_eq!(&*fp.family, "Helvetica");
        assert_eq!(fp.size.raw(), 12.0);
        assert!(!fp.bold);
        assert!(!fp.italic);
    }

    // ── §17.3.2.40 underline tri-state ─────────────────────────────────────
    //
    // `RunProperties::underline: Option<UnderlineStyle>` carries three states:
    //   * `None`                            — element absent; inherit (§17.7.2)
    //   * `Some(UnderlineStyle::None)`      — `<w:u w:val="none"/>` explicit override
    //   * `Some(UnderlineStyle::Single)` …  — actual underline style
    // `font_props.underline` is the rendering-decision boolean: it must be
    // `true` only when an actual underline style is in effect.

    fn rp_with_underline(style: Option<UnderlineStyle>) -> RunProperties {
        RunProperties {
            underline: style,
            ..RunProperties::default()
        }
    }

    #[test]
    fn font_props_underline_absent_is_false() {
        let fp = font_props_from_run(&rp_with_underline(None), "Helvetica", Pt::new(12.0));
        assert!(!fp.underline, "no <w:u> element → no underline");
    }

    #[test]
    fn font_props_underline_explicit_none_is_false() {
        let fp = font_props_from_run(
            &rp_with_underline(Some(UnderlineStyle::None)),
            "Helvetica",
            Pt::new(12.0),
        );
        assert!(
            !fp.underline,
            "<w:u w:val=\"none\"/> is the spec's explicit \"no underline\" \
             override; font_props.underline must remain false"
        );
    }

    #[test]
    fn font_props_underline_single_is_true() {
        let fp = font_props_from_run(
            &rp_with_underline(Some(UnderlineStyle::Single)),
            "Helvetica",
            Pt::new(12.0),
        );
        assert!(fp.underline, "<w:u w:val=\"single\"/> → underline drawn");
    }

    #[test]
    fn font_props_text_scale_default_is_one() {
        // §17.3.2.45: when <w:w> is absent the run renders at 100% width.
        let fp = font_props_from_run(&RunProperties::default(), "Helvetica", Pt::new(12.0));
        assert_eq!(fp.text_scale, 1.0);
    }

    #[test]
    fn font_props_text_scale_compressed() {
        // <w:w w:val="80"/> → 0.8× horizontal scale.
        let rp = RunProperties {
            text_scale: Some(crate::model::TextScale::new(80)),
            ..RunProperties::default()
        };
        let fp = font_props_from_run(&rp, "Helvetica", Pt::new(12.0));
        assert!((fp.text_scale - 0.8).abs() < f32::EPSILON);
    }

    #[test]
    fn font_props_text_scale_expanded() {
        // <w:w w:val="150"/> → 1.5× horizontal scale.
        let rp = RunProperties {
            text_scale: Some(crate::model::TextScale::new(150)),
            ..RunProperties::default()
        };
        let fp = font_props_from_run(&rp, "Helvetica", Pt::new(12.0));
        assert!((fp.text_scale - 1.5).abs() < f32::EPSILON);
    }

    #[test]
    fn font_props_underline_double_is_true() {
        // Sanity: any non-`None` style sets the bool. A future renderer
        // change to support distinct styles will replace this bool with
        // an enum; for now, "any style other than None" → draw.
        let fp = font_props_from_run(
            &rp_with_underline(Some(UnderlineStyle::Double)),
            "Helvetica",
            Pt::new(12.0),
        );
        assert!(fp.underline);
    }
}