typst_pdf/
font.rs

1use std::collections::{BTreeMap, HashMap};
2use std::hash::Hash;
3use std::sync::Arc;
4
5use ecow::{eco_format, EcoString};
6use pdf_writer::types::{CidFontType, FontFlags, SystemInfo, UnicodeCmap};
7use pdf_writer::writers::{FontDescriptor, WMode};
8use pdf_writer::{Chunk, Filter, Finish, Name, Rect, Ref, Str};
9use subsetter::GlyphRemapper;
10use ttf_parser::{name_id, GlyphId, Tag};
11use typst_library::diag::{At, SourceResult};
12use typst_library::text::Font;
13use typst_syntax::Span;
14use typst_utils::SliceExt;
15
16use crate::{deflate, EmExt, NameExt, PdfChunk, WithGlobalRefs};
17
18const CFF: Tag = Tag::from_bytes(b"CFF ");
19const CFF2: Tag = Tag::from_bytes(b"CFF2");
20
21const SUBSET_TAG_LEN: usize = 6;
22const IDENTITY_H: &str = "Identity-H";
23
24pub(crate) const CMAP_NAME: Name = Name(b"Custom");
25pub(crate) const SYSTEM_INFO: SystemInfo = SystemInfo {
26    registry: Str(b"Adobe"),
27    ordering: Str(b"Identity"),
28    supplement: 0,
29};
30
31/// Embed all used fonts into the PDF.
32#[typst_macros::time(name = "write fonts")]
33pub fn write_fonts(
34    context: &WithGlobalRefs,
35) -> SourceResult<(PdfChunk, HashMap<Font, Ref>)> {
36    let mut chunk = PdfChunk::new();
37    let mut out = HashMap::new();
38    context.resources.traverse(&mut |resources| {
39        for font in resources.fonts.items() {
40            if out.contains_key(font) {
41                continue;
42            }
43
44            let type0_ref = chunk.alloc();
45            let cid_ref = chunk.alloc();
46            let descriptor_ref = chunk.alloc();
47            let cmap_ref = chunk.alloc();
48            let data_ref = chunk.alloc();
49            out.insert(font.clone(), type0_ref);
50
51            let glyph_set = resources.glyph_sets.get(font).unwrap();
52            let glyph_remapper = resources.glyph_remappers.get(font).unwrap();
53            let ttf = font.ttf();
54
55            // Do we have a TrueType or CFF font?
56            //
57            // FIXME: CFF2 must be handled differently and requires PDF 2.0
58            // (or we have to convert it to CFF).
59            let is_cff = ttf
60                .raw_face()
61                .table(CFF)
62                .or_else(|| ttf.raw_face().table(CFF2))
63                .is_some();
64
65            let base_font = base_font_name(font, glyph_set);
66            let base_font_type0 = if is_cff {
67                eco_format!("{base_font}-{IDENTITY_H}")
68            } else {
69                base_font.clone()
70            };
71
72            // Write the base font object referencing the CID font.
73            chunk
74                .type0_font(type0_ref)
75                .base_font(Name(base_font_type0.as_bytes()))
76                .encoding_predefined(Name(IDENTITY_H.as_bytes()))
77                .descendant_font(cid_ref)
78                .to_unicode(cmap_ref);
79
80            // Write the CID font referencing the font descriptor.
81            let mut cid = chunk.cid_font(cid_ref);
82            cid.subtype(if is_cff { CidFontType::Type0 } else { CidFontType::Type2 });
83            cid.base_font(Name(base_font.as_bytes()));
84            cid.system_info(SYSTEM_INFO);
85            cid.font_descriptor(descriptor_ref);
86            cid.default_width(0.0);
87            if !is_cff {
88                cid.cid_to_gid_map_predefined(Name(b"Identity"));
89            }
90
91            // Extract the widths of all glyphs.
92            // `remapped_gids` returns an iterator over the old GIDs in their new sorted
93            // order, so we can append the widths as is.
94            let widths = glyph_remapper
95                .remapped_gids()
96                .map(|gid| {
97                    let width = ttf.glyph_hor_advance(GlyphId(gid)).unwrap_or(0);
98                    font.to_em(width).to_font_units()
99                })
100                .collect::<Vec<_>>();
101
102            // Write all non-zero glyph widths.
103            let mut first = 0;
104            let mut width_writer = cid.widths();
105            for (w, group) in widths.group_by_key(|&w| w) {
106                let end = first + group.len();
107                if w != 0.0 {
108                    let last = end - 1;
109                    width_writer.same(first as u16, last as u16, w);
110                }
111                first = end;
112            }
113
114            width_writer.finish();
115            cid.finish();
116
117            // Write the /ToUnicode character map, which maps glyph ids back to
118            // unicode codepoints to enable copying out of the PDF.
119            let cmap = create_cmap(glyph_set, glyph_remapper);
120            chunk
121                .cmap(cmap_ref, &cmap)
122                .writing_mode(WMode::Horizontal)
123                .filter(Filter::FlateDecode);
124
125            let subset = subset_font(font, glyph_remapper)
126                .map_err(|err| {
127                    let postscript_name = font.find_name(name_id::POST_SCRIPT_NAME);
128                    let name = postscript_name.as_deref().unwrap_or(&font.info().family);
129                    eco_format!("failed to process font {name}: {err}")
130                })
131                .at(Span::detached())?;
132
133            let mut stream = chunk.stream(data_ref, &subset);
134            stream.filter(Filter::FlateDecode);
135            if is_cff {
136                stream.pair(Name(b"Subtype"), Name(b"CIDFontType0C"));
137            }
138            stream.finish();
139
140            let mut font_descriptor =
141                write_font_descriptor(&mut chunk, descriptor_ref, font, &base_font);
142            if is_cff {
143                font_descriptor.font_file3(data_ref);
144            } else {
145                font_descriptor.font_file2(data_ref);
146            }
147        }
148
149        Ok(())
150    })?;
151
152    Ok((chunk, out))
153}
154
155/// Writes a FontDescriptor dictionary.
156pub fn write_font_descriptor<'a>(
157    pdf: &'a mut Chunk,
158    descriptor_ref: Ref,
159    font: &'a Font,
160    base_font: &str,
161) -> FontDescriptor<'a> {
162    let ttf = font.ttf();
163    let metrics = font.metrics();
164    let serif = font
165        .find_name(name_id::POST_SCRIPT_NAME)
166        .is_some_and(|name| name.contains("Serif"));
167
168    let mut flags = FontFlags::empty();
169    flags.set(FontFlags::SERIF, serif);
170    flags.set(FontFlags::FIXED_PITCH, ttf.is_monospaced());
171    flags.set(FontFlags::ITALIC, ttf.is_italic());
172    flags.insert(FontFlags::SYMBOLIC);
173    flags.insert(FontFlags::SMALL_CAP);
174
175    let global_bbox = ttf.global_bounding_box();
176    let bbox = Rect::new(
177        font.to_em(global_bbox.x_min).to_font_units(),
178        font.to_em(global_bbox.y_min).to_font_units(),
179        font.to_em(global_bbox.x_max).to_font_units(),
180        font.to_em(global_bbox.y_max).to_font_units(),
181    );
182
183    let italic_angle = ttf.italic_angle().unwrap_or(0.0);
184    let ascender = metrics.ascender.to_font_units();
185    let descender = metrics.descender.to_font_units();
186    let cap_height = metrics.cap_height.to_font_units();
187    let stem_v = 10.0 + 0.244 * (f32::from(ttf.weight().to_number()) - 50.0);
188
189    // Write the font descriptor (contains metrics about the font).
190    let mut font_descriptor = pdf.font_descriptor(descriptor_ref);
191    font_descriptor
192        .name(Name(base_font.as_bytes()))
193        .flags(flags)
194        .bbox(bbox)
195        .italic_angle(italic_angle)
196        .ascent(ascender)
197        .descent(descender)
198        .cap_height(cap_height)
199        .stem_v(stem_v);
200
201    font_descriptor
202}
203
204/// Subset a font to the given glyphs.
205///
206/// - For a font with TrueType outlines, this produces the whole OpenType font.
207/// - For a font with CFF outlines, this produces just the CFF font program.
208///
209/// In both cases, this returns the already compressed data.
210#[comemo::memoize]
211#[typst_macros::time(name = "subset font")]
212fn subset_font(
213    font: &Font,
214    glyph_remapper: &GlyphRemapper,
215) -> Result<Arc<Vec<u8>>, subsetter::Error> {
216    let data = font.data();
217    let subset = subsetter::subset(data, font.index(), glyph_remapper)?;
218    let mut data = subset.as_ref();
219
220    // Extract the standalone CFF font program if applicable.
221    let raw = ttf_parser::RawFace::parse(data, 0).unwrap();
222    if let Some(cff) = raw.table(CFF) {
223        data = cff;
224    }
225
226    Ok(Arc::new(deflate(data)))
227}
228
229/// Creates the base font name for a font with a specific glyph subset.
230/// Consists of a subset tag and the PostScript name of the font.
231///
232/// Returns a string of length maximum 116, so that even with `-Identity-H`
233/// added it does not exceed the maximum PDF/A name length of 127.
234pub(crate) fn base_font_name<T: Hash>(font: &Font, glyphs: &T) -> EcoString {
235    const MAX_LEN: usize = Name::PDFA_LIMIT - REST_LEN;
236    const REST_LEN: usize = SUBSET_TAG_LEN + 1 + 1 + IDENTITY_H.len();
237
238    let postscript_name = font.find_name(name_id::POST_SCRIPT_NAME);
239    let name = postscript_name.as_deref().unwrap_or("unknown");
240    let trimmed = &name[..name.len().min(MAX_LEN)];
241
242    // Hash the full name (we might have trimmed) and the glyphs to produce
243    // a fairly unique subset tag.
244    let subset_tag = subset_tag(&(name, glyphs));
245
246    eco_format!("{subset_tag}+{trimmed}")
247}
248
249/// Produce a unique 6 letter tag for a glyph set.
250pub(crate) fn subset_tag<T: Hash>(glyphs: &T) -> EcoString {
251    const BASE: u128 = 26;
252    let mut hash = typst_utils::hash128(&glyphs);
253    let mut letter = [b'A'; SUBSET_TAG_LEN];
254    for l in letter.iter_mut() {
255        *l = b'A' + (hash % BASE) as u8;
256        hash /= BASE;
257    }
258    std::str::from_utf8(&letter).unwrap().into()
259}
260
261/// Create a compressed `/ToUnicode` CMap.
262#[comemo::memoize]
263#[typst_macros::time(name = "create cmap")]
264fn create_cmap(
265    glyph_set: &BTreeMap<u16, EcoString>,
266    glyph_remapper: &GlyphRemapper,
267) -> Arc<Vec<u8>> {
268    // Produce a reverse mapping from glyphs' CIDs to unicode strings.
269    let mut cmap = UnicodeCmap::new(CMAP_NAME, SYSTEM_INFO);
270    for (&g, text) in glyph_set.iter() {
271        // See commend in `write_normal_text` for why we can choose the CID this way.
272        let cid = glyph_remapper.get(g).unwrap();
273        if !text.is_empty() {
274            cmap.pair_with_multiple(cid, text.chars());
275        }
276    }
277    Arc::new(deflate(&cmap.finish()))
278}