svg2pdf/render/
text.rs

1use crate::render::path;
2use crate::util::allocate::RefAllocator;
3use crate::util::context::Context;
4use crate::util::helper::{deflate, ContentExt, TransformExt};
5use crate::util::resources::ResourceContainer;
6use crate::ConversionError::{self, InvalidFont, SubsetError};
7use crate::Result;
8use pdf_writer::types::{
9    CidFontType, FontFlags, SystemInfo, TextRenderingMode, UnicodeCmap,
10};
11use pdf_writer::writers::WMode;
12use pdf_writer::{Chunk, Content, Filter, Finish, Name, Ref, Str};
13use siphasher::sip128::{Hasher128, SipHasher13};
14use std::collections::{BTreeMap, HashMap};
15use std::hash::Hash;
16use std::sync::Arc;
17use subsetter::GlyphRemapper;
18use ttf_parser::{name_id, Face, GlyphId, PlatformId, Tag};
19use usvg::{Fill, Group, ImageKind, Node, PaintOrder, Stroke, Transform};
20
21const CFF: Tag = Tag::from_bytes(b"CFF ");
22const CFF2: Tag = Tag::from_bytes(b"CFF2");
23
24const SUBSET_TAG_LEN: usize = 6;
25const IDENTITY_H: &str = "Identity-H";
26
27const CMAP_NAME: Name = Name(b"Custom");
28const SYSTEM_INFO: SystemInfo = SystemInfo {
29    registry: Str(b"Adobe"),
30    ordering: Str(b"Identity"),
31    supplement: 0,
32};
33
34/// Write all font objects into the chunk.
35pub fn write_font(
36    chunk: &mut Chunk,
37    alloc: &mut RefAllocator,
38    font: &mut Font,
39) -> Result<()> {
40    // We've already parsed all fonts when creating the font objects, so each font
41    // should be valid.
42    let ttf = Face::parse(&font.face_data, font.face_index)
43        .map_err(|_| InvalidFont(font.id))?;
44    let units_per_em = ttf.units_per_em();
45
46    let type0_ref = font.reference;
47    let cid_ref = alloc.alloc_ref();
48    let descriptor_ref = alloc.alloc_ref();
49    let cmap_ref = alloc.alloc_ref();
50    let data_ref = alloc.alloc_ref();
51
52    let glyph_set = &mut font.glyph_set;
53    let glyph_remapper = &font.glyph_remapper;
54
55    // Do we have a TrueType or CFF font?
56    //
57    // FIXME: CFF2 must be handled differently and requires PDF 2.0
58    // (or we have to convert it to CFF).
59    let is_cff = ttf
60        .raw_face()
61        .table(CFF)
62        .or_else(|| ttf.raw_face().table(CFF2))
63        .is_some();
64
65    let base_font = base_font_name(&ttf, glyph_set);
66    let base_font_type0 =
67        if is_cff { format!("{base_font}-{IDENTITY_H}") } else { base_font.clone() };
68
69    chunk
70        .type0_font(type0_ref)
71        .base_font(Name(base_font_type0.as_bytes()))
72        .encoding_predefined(Name(IDENTITY_H.as_bytes()))
73        .descendant_font(cid_ref)
74        .to_unicode(cmap_ref);
75
76    // Write the CID font referencing the font descriptor.
77    let mut cid = chunk.cid_font(cid_ref);
78    cid.subtype(if is_cff { CidFontType::Type0 } else { CidFontType::Type2 });
79    cid.base_font(Name(base_font.as_bytes()));
80    cid.system_info(SYSTEM_INFO);
81    cid.font_descriptor(descriptor_ref);
82    cid.default_width(0.0);
83    if !is_cff {
84        cid.cid_to_gid_map_predefined(Name(b"Identity"));
85    }
86
87    let mut widths = vec![];
88    for old_gid in glyph_remapper.remapped_gids() {
89        let width = ttf.glyph_hor_advance(GlyphId(old_gid)).unwrap_or(0);
90        let units = (width as f64 / units_per_em as f64) * 1000.0;
91        widths.push(units as f32);
92    }
93
94    // Write all non-zero glyph widths.
95    let mut first = 0;
96    let mut width_writer = cid.widths();
97    for (w, group) in widths.group_by_key(|&w| w) {
98        let end = first + group.len();
99        if w != 0.0 {
100            let last = end - 1;
101            width_writer.same(first as u16, last as u16, w);
102        }
103        first = end;
104    }
105
106    width_writer.finish();
107    cid.finish();
108
109    let mut flags = FontFlags::empty();
110    flags.set(
111        FontFlags::SERIF,
112        find_name(&ttf, name_id::POST_SCRIPT_NAME)
113            .is_some_and(|name| name.contains("Serif")),
114    );
115    flags.set(FontFlags::FIXED_PITCH, ttf.is_monospaced());
116    flags.set(FontFlags::ITALIC, ttf.is_italic());
117    flags.insert(FontFlags::SYMBOLIC);
118    flags.insert(FontFlags::SMALL_CAP);
119
120    let convert = |val| (val / units_per_em as f32) * 1000.0;
121
122    let global_bbox = ttf.global_bounding_box();
123    let bbox = pdf_writer::Rect::new(
124        convert(global_bbox.x_min as f32),
125        convert(global_bbox.y_min as f32),
126        convert(global_bbox.x_max as f32),
127        convert(global_bbox.y_max as f32),
128    );
129
130    let italic_angle = ttf.italic_angle();
131    let ascender = convert(ttf.typographic_ascender().unwrap_or(ttf.ascender()) as f32);
132    let descender =
133        convert(ttf.typographic_descender().unwrap_or(ttf.descender()) as f32);
134    let cap_height = ttf
135        .capital_height()
136        .filter(|&h| h > 0)
137        .map(|h| convert(h as f32))
138        .unwrap_or(ascender);
139    let stem_v = 10.0 + 0.244 * (f32::from(ttf.weight().to_number()) - 50.0);
140
141    // Write the font descriptor (contains metrics about the font).
142    let mut font_descriptor = chunk.font_descriptor(descriptor_ref);
143    font_descriptor
144        .name(Name(base_font.as_bytes()))
145        .flags(flags)
146        .bbox(bbox)
147        .italic_angle(italic_angle)
148        .ascent(ascender)
149        .descent(descender)
150        .cap_height(cap_height)
151        .stem_v(stem_v);
152
153    if is_cff {
154        font_descriptor.font_file3(data_ref);
155    } else {
156        font_descriptor.font_file2(data_ref);
157    }
158
159    font_descriptor.finish();
160
161    let cmap = create_cmap(glyph_set, glyph_remapper).ok_or(SubsetError(font.id))?;
162    chunk.cmap(cmap_ref, &cmap.finish()).writing_mode(WMode::Horizontal);
163
164    // Subset and write the font's bytes.
165    let data = subset_font(&font.face_data, font.face_index, glyph_remapper, font.id)?;
166
167    let mut stream = chunk.stream(data_ref, &data);
168    stream.filter(Filter::FlateDecode);
169    if is_cff {
170        stream.pair(Name(b"Subtype"), Name(b"CIDFontType0C"));
171    }
172
173    stream.finish();
174    Ok(())
175}
176
177/// Create a /ToUnicode CMap.
178fn create_cmap(
179    glyph_set: &mut BTreeMap<u16, String>,
180    glyph_remapper: &GlyphRemapper,
181) -> Option<UnicodeCmap> {
182    // Produce a reverse mapping from glyphs' CIDs to unicode strings.
183    let mut cmap = UnicodeCmap::new(CMAP_NAME, SYSTEM_INFO);
184    for (&g, text) in glyph_set.iter() {
185        let new_gid = glyph_remapper.get(g)?;
186        if !text.is_empty() {
187            cmap.pair_with_multiple(new_gid, text.chars());
188        }
189    }
190
191    Some(cmap)
192}
193
194fn subset_font(
195    font_data: &[u8],
196    index: u32,
197    glyph_remapper: &GlyphRemapper,
198    id: fontdb::ID,
199) -> Result<Vec<u8>> {
200    let data = font_data;
201    let subsetted =
202        subsetter::subset(data, index, glyph_remapper).map_err(|_| SubsetError(id))?;
203    let mut data = subsetted.as_ref();
204
205    // Extract the standalone CFF font program if applicable.
206    let face = ttf_parser::RawFace::parse(data, 0).map_err(|_| SubsetError(id))?;
207    if let Some(cff) = face.table(CFF) {
208        data = cff;
209    }
210
211    Ok(deflate(data))
212}
213
214/// Render some text into a content stream.
215pub fn render(
216    text: &usvg::Text,
217    chunk: &mut Chunk,
218    content: &mut Content,
219    ctx: &mut Context,
220    rc: &mut ResourceContainer,
221    accumulated_transform: Transform,
222) -> Result<()> {
223    let mut font_names = HashMap::new();
224
225    // TODO: Don't clone here...
226    let fonts = ctx.fonts.clone();
227
228    for span in text.layouted() {
229        for glyph in &span.positioned_glyphs {
230            let Some(font) = ctx.font_ref(glyph.font) else { continue };
231            font_names
232                .entry(font.reference)
233                .or_insert_with(|| rc.add_font(font.reference));
234        }
235    }
236
237    for span in text.layouted() {
238        if !span.visible {
239            continue;
240        }
241
242        let operation = |content: &mut Content| -> Result<()> {
243            for glyph in &span.positioned_glyphs {
244                let Some(font) = fonts.get(&glyph.font).and_then(|f| f.as_ref()) else {
245                    continue;
246                };
247
248                let name = font_names.get(&font.reference).unwrap();
249
250                // TODO: Remove unwraps and switch to error-based handling.
251                // NOTE(laurmaedje): If it can't happen, I think a panic is
252                // better. There is no way to handle it as a consumer of
253                // svg2pdf.
254                let cid = font.glyph_remapper.get(glyph.id.0).unwrap();
255                let ts = glyph
256                    .outline_transform()
257                    .pre_scale(font.units_per_em as f32, font.units_per_em as f32)
258                    // The glyphs in usvg are already scaled according the font size, but
259                    // we want to leverage the native PDF font size feature instead, so we downscale
260                    // it to a font size of 1.
261                    .pre_scale(1.0 / span.font_size.get(), 1.0 / span.font_size.get());
262                content.save_state_checked()?;
263                content.begin_text();
264                content.set_text_matrix(ts.to_pdf_transform());
265                content.set_font(Name(name.as_bytes()), span.font_size.get());
266                content.show(Str(&[(cid >> 8) as u8, (cid & 0xff) as u8]));
267                content.end_text();
268                content.restore_state();
269            }
270
271            Ok(())
272        };
273
274        let stroke_operation = |content: &mut Content, _: &Stroke| -> Result<()> {
275            content.set_text_rendering_mode(TextRenderingMode::Stroke);
276            operation(content)
277        };
278
279        let fill_operation = |content: &mut Content, _: &Fill| -> Result<()> {
280            content.set_text_rendering_mode(TextRenderingMode::Fill);
281            operation(content)
282        };
283
284        if let Some(overline) = &span.overline {
285            path::render(overline, chunk, content, ctx, rc, accumulated_transform)?;
286        }
287
288        if let Some(underline) = &span.underline {
289            path::render(underline, chunk, content, ctx, rc, accumulated_transform)?;
290        }
291
292        content.save_state_checked()?;
293        match (span.fill.as_ref(), span.stroke.as_ref()) {
294            (Some(fill), Some(stroke)) => match span.paint_order {
295                PaintOrder::FillAndStroke => {
296                    path::fill(
297                        fill,
298                        chunk,
299                        content,
300                        ctx,
301                        rc,
302                        fill_operation,
303                        accumulated_transform,
304                        text.bounding_box(),
305                    )?;
306                    path::stroke(
307                        stroke,
308                        chunk,
309                        content,
310                        ctx,
311                        rc,
312                        stroke_operation,
313                        accumulated_transform,
314                        text.bounding_box(),
315                    )?;
316                }
317                PaintOrder::StrokeAndFill => {
318                    path::stroke(
319                        stroke,
320                        chunk,
321                        content,
322                        ctx,
323                        rc,
324                        stroke_operation,
325                        accumulated_transform,
326                        text.bounding_box(),
327                    )?;
328                    path::fill(
329                        fill,
330                        chunk,
331                        content,
332                        ctx,
333                        rc,
334                        fill_operation,
335                        accumulated_transform,
336                        text.bounding_box(),
337                    )?;
338                }
339            },
340            (None, Some(stroke)) => {
341                path::stroke(
342                    stroke,
343                    chunk,
344                    content,
345                    ctx,
346                    rc,
347                    stroke_operation,
348                    accumulated_transform,
349                    text.bounding_box(),
350                )?;
351            }
352            (Some(fill), None) => {
353                path::fill(
354                    fill,
355                    chunk,
356                    content,
357                    ctx,
358                    rc,
359                    fill_operation,
360                    accumulated_transform,
361                    text.bounding_box(),
362                )?;
363            }
364            (None, None) => {
365                content.set_text_rendering_mode(TextRenderingMode::Invisible);
366                operation(content)?;
367            }
368        };
369
370        content.restore_state();
371
372        if let Some(line_through) = &span.line_through {
373            path::render(line_through, chunk, content, ctx, rc, accumulated_transform)?;
374        }
375    }
376
377    Ok(())
378}
379
380/// Creates the base font name for a font with a specific glyph subset.
381/// Consists of a subset tag and the PostScript name of the font.
382///
383/// Returns a string of length maximum 116, so that even with `-Identity-H`
384/// added it does not exceed the maximum PDF/A name length of 127.
385fn base_font_name<T: Hash>(ttf: &Face, glyphs: &T) -> String {
386    const MAX_LEN: usize = 127 - REST_LEN;
387    const REST_LEN: usize = SUBSET_TAG_LEN + 1 + 1 + IDENTITY_H.len();
388
389    let postscript_name = find_name(ttf, name_id::POST_SCRIPT_NAME);
390    let name = postscript_name.as_deref().unwrap_or("unknown");
391    let trimmed = &name[..name.len().min(MAX_LEN)];
392
393    // Hash the full name (we might have trimmed) and the glyphs to produce
394    // a fairly unique subset tag.
395    let subset_tag = subset_tag(&(name, glyphs));
396
397    format!("{subset_tag}+{trimmed}")
398}
399
400/// Produce a unique 6 letter tag for a glyph set.
401fn subset_tag<T: Hash>(glyphs: &T) -> String {
402    const BASE: u128 = 26;
403    let mut hash = hash128(&glyphs);
404    let mut letter = [b'A'; SUBSET_TAG_LEN];
405    for l in letter.iter_mut() {
406        *l = b'A' + (hash % BASE) as u8;
407        hash /= BASE;
408    }
409    std::str::from_utf8(&letter).unwrap().into()
410}
411
412/// Calculate a 128-bit siphash of a value.
413pub fn hash128<T: Hash + ?Sized>(value: &T) -> u128 {
414    let mut state = SipHasher13::new();
415    value.hash(&mut state);
416    state.finish128().as_u128()
417}
418
419/// Try to find and decode the name with the given id.
420pub(super) fn find_name(ttf: &Face, name_id: u16) -> Option<String> {
421    ttf.names().into_iter().find_map(|entry| {
422        if entry.name_id == name_id {
423            if let Some(string) = entry.to_string() {
424                return Some(string);
425            }
426
427            if entry.platform_id == PlatformId::Macintosh && entry.encoding_id == 0 {
428                return Some(decode_mac_roman(entry.name));
429            }
430        }
431
432        None
433    })
434}
435
436/// Decode mac roman encoded bytes into a string.
437fn decode_mac_roman(coded: &[u8]) -> String {
438    #[rustfmt::skip]
439    const TABLE: [char; 128] = [
440        'Ä', 'Å', 'Ç', 'É', 'Ñ', 'Ö', 'Ü', 'á', 'à', 'â', 'ä', 'ã', 'å', 'ç', 'é', 'è',
441        'ê', 'ë', 'í', 'ì', 'î', 'ï', 'ñ', 'ó', 'ò', 'ô', 'ö', 'õ', 'ú', 'ù', 'û', 'ü',
442        '†', '°', '¢', '£', '§', '•', '¶', 'ß', '®', '©', '™', '´', '¨', '≠', 'Æ', 'Ø',
443        '∞', '±', '≤', '≥', '¥', 'µ', '∂', '∑', '∏', 'π', '∫', 'ª', 'º', 'Ω', 'æ', 'ø',
444        '¿', '¡', '¬', '√', 'ƒ', '≈', '∆', '«', '»', '…', '\u{a0}', 'À', 'Ã', 'Õ', 'Œ', 'œ',
445        '–', '—', '“', '”', '‘', '’', '÷', '◊', 'ÿ', 'Ÿ', '⁄', '€', '‹', '›', 'fi', 'fl',
446        '‡', '·', '‚', '„', '‰', 'Â', 'Ê', 'Á', 'Ë', 'È', 'Í', 'Î', 'Ï', 'Ì', 'Ó', 'Ô',
447        '\u{f8ff}', 'Ò', 'Ú', 'Û', 'Ù', 'ı', 'ˆ', '˜', '¯', '˘', '˙', '˚', '¸', '˝', '˛', 'ˇ',
448    ];
449
450    fn char_from_mac_roman(code: u8) -> char {
451        if code < 128 {
452            code as char
453        } else {
454            TABLE[(code - 128) as usize]
455        }
456    }
457
458    coded.iter().copied().map(char_from_mac_roman).collect()
459}
460
461/// Extra methods for [`[T]`](slice).
462pub trait SliceExt<T> {
463    /// Split a slice into consecutive runs with the same key and yield for
464    /// each such run the key and the slice of elements with that key.
465    fn group_by_key<K, F>(&self, f: F) -> GroupByKey<'_, T, F>
466    where
467        F: FnMut(&T) -> K,
468        K: PartialEq;
469}
470
471impl<T> SliceExt<T> for [T] {
472    fn group_by_key<K, F>(&self, f: F) -> GroupByKey<'_, T, F> {
473        GroupByKey { slice: self, f }
474    }
475}
476
477/// This struct is created by [`SliceExt::group_by_key`].
478pub struct GroupByKey<'a, T, F> {
479    slice: &'a [T],
480    f: F,
481}
482
483impl<'a, T, K, F> Iterator for GroupByKey<'a, T, F>
484where
485    F: FnMut(&T) -> K,
486    K: PartialEq,
487{
488    type Item = (K, &'a [T]);
489
490    fn next(&mut self) -> Option<Self::Item> {
491        let mut iter = self.slice.iter();
492        let key = (self.f)(iter.next()?);
493        let count = 1 + iter.take_while(|t| (self.f)(t) == key).count();
494        let (head, tail) = self.slice.split_at(count);
495        self.slice = tail;
496        Some((key, head))
497    }
498}
499
500#[derive(Clone)]
501pub struct Font {
502    pub id: fontdb::ID,
503    pub glyph_set: BTreeMap<u16, String>,
504    pub glyph_remapper: GlyphRemapper,
505    pub reference: Ref,
506    pub face_data: Arc<Vec<u8>>,
507    pub units_per_em: u16,
508    pub face_index: u32,
509}
510
511pub fn fill_fonts(
512    group: &Group,
513    ctx: &mut Context,
514    fontdb: &fontdb::Database,
515) -> Result<()> {
516    for child in group.children() {
517        match child {
518            Node::Text(t) => {
519                let allocator = &mut ctx.ref_allocator;
520                for span in t.layouted() {
521                    for g in &span.positioned_glyphs {
522                        let font = ctx.fonts.entry(g.font).or_insert_with(|| {
523                            fontdb
524                                .with_face_data(g.font, |data, face_index| {
525                                    // TODO: Currently, we are parsing each font twice, once here
526                                    // and once again when writing the fonts. We should probably
527                                    // improve on that...
528                                    if let Ok(ttf) =
529                                        ttf_parser::Face::parse(data, face_index)
530                                    {
531                                        let reference = allocator.alloc_ref();
532                                        let glyph_set = BTreeMap::new();
533                                        let glyph_remapper = GlyphRemapper::new();
534                                        return Some(Font {
535                                            id: g.font,
536                                            reference,
537                                            face_data: Arc::new(Vec::from(data)),
538                                            units_per_em: ttf.units_per_em(),
539                                            glyph_set,
540                                            glyph_remapper,
541                                            face_index,
542                                        });
543                                    }
544
545                                    None
546                                })
547                                .flatten()
548                        });
549
550                        if let Some(ref mut font) = font {
551                            font.glyph_set.insert(g.id.0, g.text.clone());
552                            font.glyph_remapper.remap(g.id.0);
553                        }
554
555                        if ctx.options.pdfa && g.id.0 == 0 {
556                            return Err(ConversionError::MissingGlyphs);
557                        }
558                    }
559                }
560            }
561            Node::Group(group) => fill_fonts(group, ctx, fontdb)?,
562            Node::Image(image) => {
563                if let ImageKind::SVG(svg) = image.kind() {
564                    fill_fonts(svg.root(), ctx, fontdb)?;
565                }
566            }
567            _ => {}
568        }
569
570        let mut result = Ok(());
571        child.subroots(|subroot| {
572            result = result.and(fill_fonts(subroot, ctx, fontdb));
573        });
574        result?;
575    }
576
577    Ok(())
578}