Skip to main content

pdf_font/font/cff/
mod.rs

1//! A [Compact Font Format Table](
2//! https://docs.microsoft.com/en-us/typography/opentype/spec/cff) implementation.
3
4// Useful links:
5// http://wwwimages.adobe.com/content/dam/Adobe/en/devnet/font/pdfs/5176.CFF.pdf
6// http://wwwimages.adobe.com/content/dam/Adobe/en/devnet/font/pdfs/5177.Type2.pdf
7// https://github.com/opentypejs/opentype.js/blob/master/src/tables/cff.js
8
9mod charset;
10mod charstring;
11mod dict;
12mod encoding;
13mod index;
14mod parser;
15mod std_names;
16
17use crate::font::argstack::ArgumentsStack;
18use crate::font::cff::parser::FromData;
19use crate::font::util::TryNumFrom;
20use crate::font::{
21    Builder, DummyOutline, GlyphId, Matrix, OutlineBuilder, OutlineError, Rect, RectF,
22};
23use charset::{Charset, parse_charset};
24use charstring::CharStringParser;
25use core::convert::TryFrom;
26use core::num::NonZeroU16;
27use core::ops::Range;
28use dict::DictionaryParser;
29use encoding::{Encoding, STANDARD_ENCODING, parse_encoding};
30use index::{Index, parse_index, skip_index};
31use log::warn;
32use parser::{LazyArray16, NumFrom, Stream};
33use std_names::STANDARD_NAMES;
34
35// Limits according to the Adobe Technical Note #5176, chapter 4 DICT Data.
36const MAX_OPERANDS_LEN: usize = 48;
37
38// Limits according to the Adobe Technical Note #5177 Appendix B.
39const STACK_LIMIT: u8 = 10;
40const MAX_ARGUMENTS_STACK_LEN: usize = 48;
41
42const TWO_BYTE_OPERATOR_MARK: u8 = 12;
43
44/// A [Compact Font Format Table](
45/// https://docs.microsoft.com/en-us/typography/opentype/spec/cff).
46#[derive(Clone, Copy)]
47pub struct Table<'a> {
48    // The whole CFF table.
49    // Used to resolve a local subroutine in a CID font.
50    table_data: &'a [u8],
51
52    #[allow(dead_code)]
53    strings: Index<'a>,
54    global_subrs: Index<'a>,
55    charset: Charset<'a>,
56    number_of_glyphs: NonZeroU16,
57    matrix: Matrix,
58    top_matrix_explicit: bool,
59    char_strings: Index<'a>,
60    kind: FontKind<'a>,
61}
62
63impl<'a> Table<'a> {
64    /// Parses a table from raw data.
65    pub fn parse(data: &'a [u8]) -> Option<Self> {
66        let mut s = Stream::new(data);
67
68        // Parse Header.
69        let major = s.read::<u8>()?;
70        s.skip::<u8>(); // minor
71        let header_size = s.read::<u8>()?;
72        s.skip::<u8>(); // Absolute offset
73
74        if major != 1 {
75            return None;
76        }
77
78        // Jump to Name INDEX. It's not necessarily right after the header.
79        if header_size > 4 {
80            s.advance(usize::from(header_size) - 4);
81        }
82
83        // Skip Name INDEX.
84        skip_index::<u16>(&mut s)?;
85
86        let top_dict = parse_top_dict(&mut s)?;
87
88        // Must be set, otherwise there are nothing to parse.
89        if top_dict.char_strings_offset == 0 {
90            return None;
91        }
92
93        // String INDEX.
94        let strings = parse_index::<u16>(&mut s)?;
95
96        // Parse Global Subroutines INDEX.
97        let global_subrs = parse_index::<u16>(&mut s)?;
98
99        let char_strings = {
100            let mut s = Stream::new_at(data, top_dict.char_strings_offset)?;
101            parse_index::<u16>(&mut s)?
102        };
103
104        // 'The number of glyphs is the value of the count field in the CharStrings INDEX.'
105        let number_of_glyphs = u16::try_from(char_strings.len())
106            .ok()
107            .and_then(NonZeroU16::new)?;
108
109        let charset = match top_dict.charset_offset {
110            Some(charset_id::ISO_ADOBE) => Charset::ISOAdobe,
111            Some(charset_id::EXPERT) => Charset::Expert,
112            Some(charset_id::EXPERT_SUBSET) => Charset::ExpertSubset,
113            Some(offset) => {
114                let mut s = Stream::new_at(data, offset)?;
115                parse_charset(number_of_glyphs, &mut s)?
116            }
117            None => Charset::ISOAdobe, // default
118        };
119
120        let top_matrix_explicit = top_dict.matrix.is_some();
121        let matrix = top_dict.matrix.unwrap_or_default();
122
123        let kind = if top_dict.has_ros {
124            parse_cid_metadata(data, top_dict, number_of_glyphs.get())?
125        } else {
126            // Only SID fonts are allowed to have an Encoding.
127            let encoding = match top_dict.encoding_offset {
128                Some(encoding_id::STANDARD) => Encoding::new_standard(),
129                Some(encoding_id::EXPERT) => Encoding::new_expert(),
130                Some(offset) => parse_encoding(&mut Stream::new_at(data, offset)?)?,
131                None => Encoding::new_standard(), // default
132            };
133
134            parse_sid_metadata(data, top_dict, encoding)?
135        };
136
137        Some(Self {
138            table_data: data,
139            strings,
140            global_subrs,
141            charset,
142            number_of_glyphs,
143            matrix,
144            top_matrix_explicit,
145            char_strings,
146            kind,
147        })
148    }
149
150    /// Returns a total number of glyphs in the font.
151    ///
152    /// Never zero.
153    #[inline]
154    pub fn number_of_glyphs(&self) -> u16 {
155        self.number_of_glyphs.get()
156    }
157
158    /// Return the matrix that needs to be applied to the glyph to scale it to
159    /// a single font unit.
160    pub fn glyph_matrix(&self, glyph_id: GlyphId) -> Matrix {
161        // This is a hot mess, as the interaction between top-level matrix and
162        // font-dict matrix is not properly specified. I dealt with this in the
163        // past, see: https://github.com/typst/subsetter/blob/5c7764b2835e9273801ed7f0078d0ca06550ce74/src/cff/dict/top_dict.rs#L109-L123
164        let FontKind::CID(ref cid) = self.kind else {
165            return self.matrix;
166        };
167
168        let fd_matrix = (|| {
169            let font_dict_index = cid.fd_select.font_dict_index(glyph_id)?;
170            let font_dict_data = cid.fd_array.get(u32::from(font_dict_index))?;
171            parse_font_dict_matrix(font_dict_data)
172        })();
173
174        let effective_fd = match (self.top_matrix_explicit, fd_matrix) {
175            // Case 1.
176            (true, Some(fd)) => fd,
177            // Case 2.
178            (false, Some(fd)) => Matrix {
179                sx: fd.sx * 1000.0,
180                ky: fd.ky * 1000.0,
181                kx: fd.kx * 1000.0,
182                sy: fd.sy * 1000.0,
183                tx: fd.tx * 1000.0,
184                ty: fd.ty * 1000.0,
185            },
186            // Case 3 & 4.
187            (_, None) => Matrix {
188                sx: 1.0,
189                ky: 0.0,
190                kx: 0.0,
191                sy: 1.0,
192                tx: 0.0,
193                ty: 0.0,
194            },
195        };
196
197        // Compose the two matrices.
198        Matrix {
199            sx: self.matrix.sx * effective_fd.sx + self.matrix.kx * effective_fd.ky,
200            ky: self.matrix.ky * effective_fd.sx + self.matrix.sy * effective_fd.ky,
201            kx: self.matrix.sx * effective_fd.kx + self.matrix.kx * effective_fd.sy,
202            sy: self.matrix.ky * effective_fd.kx + self.matrix.sy * effective_fd.sy,
203            tx: self.matrix.sx * effective_fd.tx
204                + self.matrix.kx * effective_fd.ty
205                + self.matrix.tx,
206            ty: self.matrix.ky * effective_fd.tx
207                + self.matrix.sy * effective_fd.ty
208                + self.matrix.ty,
209        }
210    }
211
212    /// Outlines a glyph.
213    pub fn outline(
214        &self,
215        glyph_id: GlyphId,
216        builder: &mut dyn OutlineBuilder,
217    ) -> Result<Rect, OutlineError> {
218        let data = self
219            .char_strings
220            .get(u32::from(glyph_id.0))
221            .ok_or(OutlineError::NoGlyph)?;
222        parse_char_string(data, self, glyph_id, false, builder).map(|v| v.0)
223    }
224
225    /// Resolves a Glyph ID for a code point.
226    pub fn glyph_index(&self, code_point: u8) -> Option<GlyphId> {
227        match self.kind {
228            FontKind::SID(ref sid_meta) => {
229                match sid_meta.encoding.code_to_gid(&self.charset, code_point) {
230                    Some(id) => Some(id),
231                    None => {
232                        // Try using the Standard encoding otherwise.
233                        // Custom Encodings does not guarantee to include all glyphs.
234                        Encoding::new_standard().code_to_gid(&self.charset, code_point)
235                    }
236                }
237            }
238            FontKind::CID(_) => None,
239        }
240    }
241
242    /// Returns a glyph width.
243    pub fn glyph_width(&self, glyph_id: GlyphId) -> Option<u16> {
244        match self.kind {
245            FontKind::SID(ref sid) => {
246                let data = self.char_strings.get(u32::from(glyph_id.0))?;
247                let (_, width) =
248                    parse_char_string(data, self, glyph_id, true, &mut DummyOutline).ok()?;
249                let width = width
250                    .map(|w| sid.nominal_width + w)
251                    .unwrap_or(sid.default_width);
252                u16::try_from(width as i32).ok()
253            }
254            FontKind::CID(_) => None,
255        }
256    }
257
258    /// Convert a CID to its correpsonding glyph id.
259    pub fn glyph_index_by_cid(&self, cid: u16) -> Option<GlyphId> {
260        match self.kind {
261            FontKind::SID(_) => None,
262            FontKind::CID(_) => self.charset.sid_to_gid(StringId(cid)),
263        }
264    }
265
266    /// Whether the font is a CID font.
267    pub fn is_cid(&self) -> bool {
268        matches!(self.kind, FontKind::CID(_))
269    }
270
271    /// Returns a glyph ID by a name.
272    pub fn glyph_index_by_name(&self, name: &str) -> Option<GlyphId> {
273        match self.kind {
274            FontKind::SID(_) => {
275                // See PDFBOX-5987: We first check if there happens to be a custom SID
276                // (even if it's a standard name), and only if not do we check
277                // the standard names.
278                let sid = if let Some(index) =
279                    self.strings.into_iter().position(|n| n == name.as_bytes())
280                {
281                    StringId((STANDARD_NAMES.len() + index) as u16)
282                } else {
283                    STANDARD_NAMES
284                        .iter()
285                        .position(|n| *n == name)
286                        .map(|n| StringId(n as u16))?
287                };
288
289                self.charset.sid_to_gid(sid)
290            }
291            FontKind::CID(_) => None,
292        }
293    }
294
295    /// Returns a glyph name.
296    pub fn glyph_name(&self, glyph_id: GlyphId) -> Option<&'a str> {
297        match self.kind {
298            FontKind::SID(_) => {
299                let sid = self.charset.gid_to_sid(glyph_id)?;
300                let sid = usize::from(sid.0);
301                match STANDARD_NAMES.get(sid) {
302                    Some(name) => Some(name),
303                    None => {
304                        let idx = u32::try_from(sid - STANDARD_NAMES.len()).ok()?;
305                        let name = self.strings.get(idx)?;
306                        core::str::from_utf8(name).ok()
307                    }
308                }
309            }
310            FontKind::CID(_) => None,
311        }
312    }
313
314    /// Returns the CID corresponding to a glyph ID.
315    ///
316    /// Returns `None` if this is not a `CIDFont`.
317    pub fn glyph_cid(&self, glyph_id: GlyphId) -> Option<u16> {
318        match self.kind {
319            FontKind::SID(_) => None,
320            FontKind::CID(_) => self.charset.gid_to_sid(glyph_id).map(|id| id.0),
321        }
322    }
323}
324
325impl core::fmt::Debug for Table<'_> {
326    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
327        write!(f, "Table {{ ... }}")
328    }
329}
330
331/// Enumerates some operators defined in the Adobe Technical Note #5177.
332mod operator {
333    pub(crate) const DOTSECTION: u8 = 0;
334    pub(crate) const HORIZONTAL_STEM: u8 = 1;
335    pub(crate) const VERTICAL_STEM: u8 = 3;
336    pub(crate) const VERTICAL_MOVE_TO: u8 = 4;
337    pub(crate) const LINE_TO: u8 = 5;
338    pub(crate) const HORIZONTAL_LINE_TO: u8 = 6;
339    pub(crate) const VERTICAL_LINE_TO: u8 = 7;
340    pub(crate) const CURVE_TO: u8 = 8;
341    pub(crate) const CALL_LOCAL_SUBROUTINE: u8 = 10;
342    pub(crate) const RETURN: u8 = 11;
343    pub(crate) const ENDCHAR: u8 = 14;
344    pub(crate) const HORIZONTAL_STEM_HINT_MASK: u8 = 18;
345    pub(crate) const HINT_MASK: u8 = 19;
346    pub(crate) const COUNTER_MASK: u8 = 20;
347    pub(crate) const MOVE_TO: u8 = 21;
348    pub(crate) const HORIZONTAL_MOVE_TO: u8 = 22;
349    pub(crate) const VERTICAL_STEM_HINT_MASK: u8 = 23;
350    pub(crate) const CURVE_LINE: u8 = 24;
351    pub(crate) const LINE_CURVE: u8 = 25;
352    pub(crate) const VV_CURVE_TO: u8 = 26;
353    pub(crate) const HH_CURVE_TO: u8 = 27;
354    pub(crate) const SHORT_INT: u8 = 28;
355    pub(crate) const CALL_GLOBAL_SUBROUTINE: u8 = 29;
356    pub(crate) const VH_CURVE_TO: u8 = 30;
357    pub(crate) const HV_CURVE_TO: u8 = 31;
358    pub(crate) const HFLEX: u8 = 34;
359    pub(crate) const FLEX: u8 = 35;
360    pub(crate) const HFLEX1: u8 = 36;
361    pub(crate) const FLEX1: u8 = 37;
362    pub(crate) const FIXED_16_16: u8 = 255;
363}
364
365/// Enumerates some operators defined in the Adobe Technical Note #5176,
366/// Table 9 Top DICT Operator Entries
367mod top_dict_operator {
368    pub(crate) const CHARSET_OFFSET: u16 = 15;
369    pub(crate) const ENCODING_OFFSET: u16 = 16;
370    pub(crate) const CHAR_STRINGS_OFFSET: u16 = 17;
371    pub(crate) const PRIVATE_DICT_SIZE_AND_OFFSET: u16 = 18;
372    pub(crate) const FONT_MATRIX: u16 = 1207;
373    pub(crate) const ROS: u16 = 1230;
374    pub(crate) const FD_ARRAY: u16 = 1236;
375    pub(crate) const FD_SELECT: u16 = 1237;
376}
377
378/// Enumerates some operators defined in the Adobe Technical Note #5176,
379/// Table 23 Private DICT Operators
380mod private_dict_operator {
381    pub(crate) const LOCAL_SUBROUTINES_OFFSET: u16 = 19;
382    pub(crate) const DEFAULT_WIDTH: u16 = 20;
383    pub(crate) const NOMINAL_WIDTH: u16 = 21;
384}
385
386/// Enumerates Charset IDs defined in the Adobe Technical Note #5176, Table 22
387mod charset_id {
388    pub(crate) const ISO_ADOBE: usize = 0;
389    pub(crate) const EXPERT: usize = 1;
390    pub(crate) const EXPERT_SUBSET: usize = 2;
391}
392
393/// Enumerates Charset IDs defined in the Adobe Technical Note #5176, Table 16
394mod encoding_id {
395    pub(crate) const STANDARD: usize = 0;
396    pub(crate) const EXPERT: usize = 1;
397}
398
399#[derive(Clone, Copy, Debug)]
400pub(crate) enum FontKind<'a> {
401    SID(SIDMetadata<'a>),
402    CID(CIDMetadata<'a>),
403}
404
405#[derive(Clone, Copy, Default, Debug)]
406pub(crate) struct SIDMetadata<'a> {
407    local_subrs: Index<'a>,
408    /// Can be zero.
409    default_width: f32,
410    /// Can be zero.
411    nominal_width: f32,
412    encoding: Encoding<'a>,
413}
414
415#[derive(Clone, Copy, Default, Debug)]
416pub(crate) struct CIDMetadata<'a> {
417    fd_array: Index<'a>,
418    fd_select: FDSelect<'a>,
419}
420
421#[derive(Default)]
422struct TopDict {
423    charset_offset: Option<usize>,
424    encoding_offset: Option<usize>,
425    char_strings_offset: usize,
426    private_dict_range: Option<Range<usize>>,
427    matrix: Option<Matrix>,
428    has_ros: bool,
429    fd_array_offset: Option<usize>,
430    fd_select_offset: Option<usize>,
431}
432
433fn parse_top_dict(s: &mut Stream<'_>) -> Option<TopDict> {
434    let mut top_dict = TopDict::default();
435
436    let index = parse_index::<u16>(s)?;
437
438    // The Top DICT INDEX should have only one dictionary.
439    let data = index.get(0)?;
440
441    let mut operands_buffer = [0.0; MAX_OPERANDS_LEN];
442    let mut dict_parser = DictionaryParser::new(data, &mut operands_buffer);
443    while let Some(operator) = dict_parser.parse_next() {
444        match operator.get() {
445            top_dict_operator::CHARSET_OFFSET => {
446                top_dict.charset_offset = dict_parser.parse_offset();
447            }
448            top_dict_operator::ENCODING_OFFSET => {
449                top_dict.encoding_offset = dict_parser.parse_offset();
450            }
451            top_dict_operator::CHAR_STRINGS_OFFSET => {
452                top_dict.char_strings_offset = dict_parser.parse_offset()?;
453            }
454            top_dict_operator::PRIVATE_DICT_SIZE_AND_OFFSET => {
455                top_dict.private_dict_range = dict_parser.parse_range();
456            }
457            top_dict_operator::FONT_MATRIX => {
458                dict_parser.parse_operands()?;
459                let operands = dict_parser.operands();
460                if operands.len() == 6 {
461                    top_dict.matrix = Some(Matrix {
462                        sx: operands[0] as f32,
463                        ky: operands[1] as f32,
464                        kx: operands[2] as f32,
465                        sy: operands[3] as f32,
466                        tx: operands[4] as f32,
467                        ty: operands[5] as f32,
468                    });
469                }
470            }
471            top_dict_operator::ROS => {
472                top_dict.has_ros = true;
473            }
474            top_dict_operator::FD_ARRAY => {
475                top_dict.fd_array_offset = dict_parser.parse_offset();
476            }
477            top_dict_operator::FD_SELECT => {
478                top_dict.fd_select_offset = dict_parser.parse_offset();
479            }
480            _ => {}
481        }
482    }
483
484    Some(top_dict)
485}
486
487#[derive(Default, Debug)]
488struct PrivateDict {
489    local_subroutines_offset: Option<usize>,
490    default_width: Option<f32>,
491    nominal_width: Option<f32>,
492}
493
494fn parse_private_dict(data: &[u8]) -> PrivateDict {
495    let mut dict = PrivateDict::default();
496    let mut operands_buffer = [0.0; MAX_OPERANDS_LEN];
497    let mut dict_parser = DictionaryParser::new(data, &mut operands_buffer);
498    while let Some(operator) = dict_parser.parse_next() {
499        if operator.get() == private_dict_operator::LOCAL_SUBROUTINES_OFFSET {
500            dict.local_subroutines_offset = dict_parser.parse_offset();
501        } else if operator.get() == private_dict_operator::DEFAULT_WIDTH {
502            dict.default_width = dict_parser.parse_number().map(|n| n as f32);
503        } else if operator.get() == private_dict_operator::NOMINAL_WIDTH {
504            dict.nominal_width = dict_parser.parse_number().map(|n| n as f32);
505        }
506    }
507
508    dict
509}
510
511fn parse_font_dict(data: &[u8]) -> Option<Range<usize>> {
512    let mut operands_buffer = [0.0; MAX_OPERANDS_LEN];
513    let mut dict_parser = DictionaryParser::new(data, &mut operands_buffer);
514    while let Some(operator) = dict_parser.parse_next() {
515        if operator.get() == top_dict_operator::PRIVATE_DICT_SIZE_AND_OFFSET {
516            return dict_parser.parse_range();
517        }
518    }
519
520    None
521}
522
523fn parse_font_dict_matrix(data: &[u8]) -> Option<Matrix> {
524    let mut operands_buffer = [0.0; MAX_OPERANDS_LEN];
525    let mut dict_parser = DictionaryParser::new(data, &mut operands_buffer);
526    while let Some(operator) = dict_parser.parse_next() {
527        if operator.get() == top_dict_operator::FONT_MATRIX {
528            dict_parser.parse_operands()?;
529            let operands = dict_parser.operands();
530            if operands.len() == 6 {
531                return Some(Matrix {
532                    sx: operands[0] as f32,
533                    ky: operands[1] as f32,
534                    kx: operands[2] as f32,
535                    sy: operands[3] as f32,
536                    tx: operands[4] as f32,
537                    ty: operands[5] as f32,
538                });
539            }
540        }
541    }
542
543    None
544}
545
546/// In CID fonts, to get local subroutines we have to:
547///   1. Find Font DICT index via `FDSelect` by GID.
548///   2. Get Font DICT data from `FDArray` using this index.
549///   3. Get a Private DICT offset from a Font DICT.
550///   4. Get a local subroutine offset from Private DICT.
551///   5. Parse a local subroutine at offset.
552fn parse_cid_local_subrs<'a>(
553    data: &'a [u8],
554    glyph_id: GlyphId,
555    cid: &CIDMetadata<'_>,
556) -> Option<Index<'a>> {
557    let font_dict_index = cid.fd_select.font_dict_index(glyph_id)?;
558    let font_dict_data = cid.fd_array.get(u32::from(font_dict_index))?;
559    let private_dict_range = parse_font_dict(font_dict_data)?;
560    let private_dict_data = data.get(private_dict_range.clone())?;
561    let private_dict = parse_private_dict(private_dict_data);
562    let subroutines_offset = private_dict.local_subroutines_offset?;
563
564    // 'The local subroutines offset is relative to the beginning
565    // of the Private DICT data.'
566    let start = private_dict_range.start.checked_add(subroutines_offset)?;
567    let subrs_data = data.get(start..)?;
568    let mut s = Stream::new(subrs_data);
569    parse_index::<u16>(&mut s)
570}
571
572struct CharStringParserContext<'a> {
573    metadata: &'a Table<'a>,
574    width: Option<f32>,
575    stems_len: u32,
576    has_endchar: bool,
577    has_seac: bool,
578    glyph_id: GlyphId, // Required to parse local subroutine in CID fonts.
579    local_subrs: Option<Index<'a>>,
580}
581
582fn parse_char_string(
583    data: &[u8],
584    metadata: &Table<'_>,
585    glyph_id: GlyphId,
586    width_only: bool,
587    builder: &mut dyn OutlineBuilder,
588) -> Result<(Rect, Option<f32>), OutlineError> {
589    let local_subrs = match metadata.kind {
590        FontKind::SID(ref sid) => Some(sid.local_subrs),
591        FontKind::CID(_) => None, // Will be resolved on request.
592    };
593
594    let mut ctx = CharStringParserContext {
595        metadata,
596        width: None,
597        stems_len: 0,
598        has_endchar: false,
599        has_seac: false,
600        glyph_id,
601        local_subrs,
602    };
603
604    let mut inner_builder = Builder {
605        builder,
606        bbox: RectF::new(),
607    };
608
609    let stack = ArgumentsStack {
610        data: &mut [0.0; MAX_ARGUMENTS_STACK_LEN], // 192B
611        len: 0,
612        max_len: MAX_ARGUMENTS_STACK_LEN,
613    };
614    let mut parser = CharStringParser {
615        stack,
616        builder: &mut inner_builder,
617        x: 0.0,
618        y: 0.0,
619        has_move_to: false,
620        is_first_move_to: true,
621        width_only,
622    };
623    _parse_char_string(&mut ctx, data, 0, &mut parser)?;
624
625    if width_only {
626        return Ok((Rect::zero(), ctx.width));
627    }
628
629    if !ctx.has_endchar {
630        return Err(OutlineError::MissingEndChar);
631    }
632
633    let bbox = parser.builder.bbox;
634
635    // Check that bbox was changed.
636    if bbox.is_default() {
637        return Err(OutlineError::ZeroBBox);
638    }
639
640    let rect = bbox.to_rect().ok_or(OutlineError::BboxOverflow)?;
641    Ok((rect, ctx.width))
642}
643
644fn _parse_char_string(
645    ctx: &mut CharStringParserContext<'_>,
646    char_string: &[u8],
647    depth: u8,
648    p: &mut CharStringParser<'_>,
649) -> Result<(), OutlineError> {
650    // See PDFJS-6132: In case we encounter an error, try to return whatever
651    // we extracted so far.
652    macro_rules! try_or_endchar {
653        ($e:expr) => {
654            match $e {
655                Ok(()) => {}
656                Err(_) if p.stack.is_empty() => {
657                    if !p.is_first_move_to {
658                        p.builder.close();
659                    }
660                    ctx.has_endchar = true;
661                    return Ok(());
662                }
663                Err(e) => return Err(e),
664            }
665        };
666    }
667
668    let mut s = Stream::new(char_string);
669    while !s.at_end() {
670        let op = s.read::<u8>().ok_or(OutlineError::ReadOutOfBounds)?;
671        match op {
672            0 | 2 | 9 | 13 | 15 | 16 | 17 => {
673                // Reserved.
674                warn!("encountered reserved operator {}", op);
675            }
676            operator::HORIZONTAL_STEM
677            | operator::VERTICAL_STEM
678            | operator::HORIZONTAL_STEM_HINT_MASK
679            | operator::VERTICAL_STEM_HINT_MASK => {
680                // y dy {dya dyb}* hstem
681                // x dx {dxa dxb}* vstem
682                // y dy {dya dyb}* hstemhm
683                // x dx {dxa dxb}* vstemhm
684
685                // If the stack length is uneven, than the first value is a `width`.
686                let len = if p.stack.len().is_odd() && ctx.width.is_none() {
687                    ctx.width = Some(p.stack.at(0));
688                    p.stack.len() - 1
689                } else {
690                    p.stack.len()
691                };
692
693                ctx.stems_len += len as u32 >> 1;
694
695                // We are ignoring the hint operators.
696                p.stack.clear();
697            }
698            operator::VERTICAL_MOVE_TO => {
699                let mut i = 0;
700                if p.stack.len() == 2 {
701                    i += 1;
702                    if ctx.width.is_none() {
703                        ctx.width = Some(p.stack.at(0));
704                    }
705                }
706
707                try_or_endchar!(p.parse_vertical_move_to(i));
708            }
709            operator::LINE_TO => {
710                try_or_endchar!(p.parse_line_to());
711            }
712            operator::HORIZONTAL_LINE_TO => {
713                try_or_endchar!(p.parse_horizontal_line_to());
714            }
715            operator::VERTICAL_LINE_TO => {
716                try_or_endchar!(p.parse_vertical_line_to());
717            }
718            operator::CURVE_TO => {
719                try_or_endchar!(p.parse_curve_to());
720            }
721            operator::CALL_LOCAL_SUBROUTINE => {
722                if p.stack.is_empty() {
723                    return Err(OutlineError::InvalidArgumentsStackLength);
724                }
725
726                if depth == STACK_LIMIT {
727                    return Err(OutlineError::NestingLimitReached);
728                }
729
730                // Parse and remember the local subroutine for the current glyph.
731                // Since it's a pretty complex task, we're doing it only when
732                // a local subroutine is actually requested by the glyphs charstring.
733                if ctx.local_subrs.is_none()
734                    && let FontKind::CID(ref cid) = ctx.metadata.kind
735                {
736                    ctx.local_subrs =
737                        parse_cid_local_subrs(ctx.metadata.table_data, ctx.glyph_id, cid);
738                }
739
740                if let Some(local_subrs) = ctx.local_subrs {
741                    let subroutine_bias = calc_subroutine_bias(local_subrs.len());
742                    let index = conv_subroutine_index(p.stack.pop(), subroutine_bias)?;
743                    let char_string = local_subrs
744                        .get(index)
745                        .ok_or(OutlineError::InvalidSubroutineIndex)?;
746                    _parse_char_string(ctx, char_string, depth + 1, p)?;
747                } else {
748                    return Err(OutlineError::NoLocalSubroutines);
749                }
750
751                if ctx.has_endchar && !ctx.has_seac {
752                    if !s.at_end() {
753                        return Err(OutlineError::DataAfterEndChar);
754                    }
755
756                    break;
757                }
758            }
759            operator::RETURN => {
760                break;
761            }
762            TWO_BYTE_OPERATOR_MARK => {
763                // flex
764                let op2 = s.read::<u8>().ok_or(OutlineError::ReadOutOfBounds)?;
765                match op2 {
766                    operator::HFLEX => try_or_endchar!(p.parse_hflex()),
767                    operator::FLEX => try_or_endchar!(p.parse_flex()),
768                    operator::HFLEX1 => try_or_endchar!(p.parse_hflex1()),
769                    operator::FLEX1 => try_or_endchar!(p.parse_flex1()),
770                    // Division operator.
771                    12 => {
772                        let num2 = p.stack.pop();
773                        let num1 = p.stack.pop();
774
775                        p.stack.push(num1 / num2)?;
776                    }
777                    operator::DOTSECTION => {}
778                    _ => {
779                        return Err(OutlineError::UnsupportedOperator);
780                    }
781                }
782            }
783            operator::ENDCHAR => {
784                if p.stack.len() == 4 || (ctx.width.is_none() && p.stack.len() == 5) {
785                    // Process 'seac'.
786                    let accent_char = seac_code_to_glyph_id(&ctx.metadata.charset, p.stack.pop())
787                        .ok_or(OutlineError::InvalidSeacCode)?;
788                    let base_char = seac_code_to_glyph_id(&ctx.metadata.charset, p.stack.pop())
789                        .ok_or(OutlineError::InvalidSeacCode)?;
790                    let dy = p.stack.pop();
791                    let dx = p.stack.pop();
792
793                    if ctx.width.is_none() && !p.stack.is_empty() {
794                        ctx.width = Some(p.stack.pop());
795                    }
796
797                    ctx.has_seac = true;
798
799                    if depth == STACK_LIMIT {
800                        return Err(OutlineError::NestingLimitReached);
801                    }
802
803                    let base_char_string = ctx
804                        .metadata
805                        .char_strings
806                        .get(u32::from(base_char.0))
807                        .ok_or(OutlineError::InvalidSeacCode)?;
808                    _parse_char_string(ctx, base_char_string, depth + 1, p)?;
809                    p.x = dx;
810                    p.y = dy;
811
812                    let accent_char_string = ctx
813                        .metadata
814                        .char_strings
815                        .get(u32::from(accent_char.0))
816                        .ok_or(OutlineError::InvalidSeacCode)?;
817                    _parse_char_string(ctx, accent_char_string, depth + 1, p)?;
818                } else if p.stack.len() == 1 && ctx.width.is_none() {
819                    ctx.width = Some(p.stack.pop());
820                }
821
822                if !p.is_first_move_to {
823                    p.is_first_move_to = true;
824                    p.builder.close();
825                }
826
827                if !s.at_end() {
828                    return Err(OutlineError::DataAfterEndChar);
829                }
830
831                ctx.has_endchar = true;
832
833                break;
834            }
835            operator::HINT_MASK | operator::COUNTER_MASK => {
836                let mut len = p.stack.len();
837
838                // We are ignoring the hint operators.
839                p.stack.clear();
840
841                // If the stack length is uneven, than the first value is a `width`.
842                if len.is_odd() {
843                    len -= 1;
844                    if ctx.width.is_none() {
845                        ctx.width = Some(p.stack.at(0));
846                    }
847                }
848
849                ctx.stems_len += len as u32 >> 1;
850
851                s.advance(usize::num_from((ctx.stems_len + 7) >> 3));
852            }
853            operator::MOVE_TO => {
854                let mut i = 0;
855                if p.stack.len() == 3 {
856                    i += 1;
857                    if ctx.width.is_none() {
858                        ctx.width = Some(p.stack.at(0));
859                    }
860                }
861
862                try_or_endchar!(p.parse_move_to(i));
863            }
864            operator::HORIZONTAL_MOVE_TO => {
865                let mut i = 0;
866                if p.stack.len() == 2 {
867                    i += 1;
868                    if ctx.width.is_none() {
869                        ctx.width = Some(p.stack.at(0));
870                    }
871                }
872
873                try_or_endchar!(p.parse_horizontal_move_to(i));
874            }
875            operator::CURVE_LINE => {
876                try_or_endchar!(p.parse_curve_line());
877            }
878            operator::LINE_CURVE => {
879                try_or_endchar!(p.parse_line_curve());
880            }
881            operator::VV_CURVE_TO => {
882                try_or_endchar!(p.parse_vv_curve_to());
883            }
884            operator::HH_CURVE_TO => {
885                try_or_endchar!(p.parse_hh_curve_to());
886            }
887            operator::SHORT_INT => {
888                let n = s.read::<i16>().ok_or(OutlineError::ReadOutOfBounds)?;
889                p.stack.push(f32::from(n))?;
890            }
891            operator::CALL_GLOBAL_SUBROUTINE => {
892                if p.stack.is_empty() {
893                    return Err(OutlineError::InvalidArgumentsStackLength);
894                }
895
896                if depth == STACK_LIMIT {
897                    return Err(OutlineError::NestingLimitReached);
898                }
899
900                let subroutine_bias = calc_subroutine_bias(ctx.metadata.global_subrs.len());
901                let index = conv_subroutine_index(p.stack.pop(), subroutine_bias)?;
902                let char_string = ctx
903                    .metadata
904                    .global_subrs
905                    .get(index)
906                    .ok_or(OutlineError::InvalidSubroutineIndex)?;
907                _parse_char_string(ctx, char_string, depth + 1, p)?;
908
909                if ctx.has_endchar && !ctx.has_seac {
910                    if !s.at_end() {
911                        return Err(OutlineError::DataAfterEndChar);
912                    }
913
914                    break;
915                }
916            }
917            operator::VH_CURVE_TO => {
918                try_or_endchar!(p.parse_vh_curve_to());
919            }
920            operator::HV_CURVE_TO => {
921                try_or_endchar!(p.parse_hv_curve_to());
922            }
923            32..=246 => {
924                p.parse_int1(op)?;
925            }
926            247..=250 => {
927                p.parse_int2(op, &mut s)?;
928            }
929            251..=254 => {
930                p.parse_int3(op, &mut s)?;
931            }
932            operator::FIXED_16_16 => {
933                p.parse_fixed(&mut s)?;
934            }
935        }
936
937        if p.width_only && ctx.width.is_some() {
938            break;
939        }
940    }
941
942    // TODO: 'A charstring subroutine must end with either an endchar or a return operator.'
943
944    Ok(())
945}
946
947fn seac_code_to_glyph_id(charset: &Charset<'_>, n: f32) -> Option<GlyphId> {
948    let code = u8::try_num_from(n)?;
949
950    let sid = STANDARD_ENCODING[usize::from(code)];
951    let sid = StringId(sid);
952
953    match charset {
954        Charset::ISOAdobe => {
955            // ISO Adobe charset only defines string ids up to 228 (zcaron)
956            if code <= 228 {
957                Some(GlyphId(sid.0))
958            } else {
959                None
960            }
961        }
962        Charset::Expert | Charset::ExpertSubset => None,
963        _ => charset.sid_to_gid(sid),
964    }
965}
966
967#[derive(Clone, Copy, Debug)]
968enum FDSelect<'a> {
969    Format0(LazyArray16<'a, u8>),
970    Format3(&'a [u8]), // It's easier to parse it in-place.
971}
972
973impl Default for FDSelect<'_> {
974    fn default() -> Self {
975        FDSelect::Format0(LazyArray16::default())
976    }
977}
978
979impl FDSelect<'_> {
980    fn font_dict_index(&self, glyph_id: GlyphId) -> Option<u8> {
981        match self {
982            FDSelect::Format0(array) => array.get(glyph_id.0),
983            FDSelect::Format3(data) => {
984                let mut s = Stream::new(data);
985                let number_of_ranges = s.read::<u16>()?;
986                if number_of_ranges == 0 {
987                    return None;
988                }
989
990                // 'A sentinel GID follows the last range element and serves
991                // to delimit the last range in the array.'
992                // So we can simply increase the number of ranges by one.
993                let number_of_ranges = number_of_ranges.checked_add(1)?;
994
995                // Range is: GlyphId + u8
996                let mut prev_first_glyph = s.read::<GlyphId>()?;
997                let mut prev_index = s.read::<u8>()?;
998                for _ in 1..number_of_ranges {
999                    let curr_first_glyph = s.read::<GlyphId>()?;
1000                    if (prev_first_glyph..curr_first_glyph).contains(&glyph_id) {
1001                        return Some(prev_index);
1002                    } else {
1003                        prev_index = s.read::<u8>()?;
1004                    }
1005
1006                    prev_first_glyph = curr_first_glyph;
1007                }
1008
1009                None
1010            }
1011        }
1012    }
1013}
1014
1015fn parse_fd_select<'a>(number_of_glyphs: u16, s: &mut Stream<'a>) -> Option<FDSelect<'a>> {
1016    let format = s.read::<u8>()?;
1017    match format {
1018        0 => Some(FDSelect::Format0(s.read_array16::<u8>(number_of_glyphs)?)),
1019        3 => Some(FDSelect::Format3(s.tail()?)),
1020        _ => None,
1021    }
1022}
1023
1024fn parse_sid_metadata<'a>(
1025    data: &'a [u8],
1026    top_dict: TopDict,
1027    encoding: Encoding<'a>,
1028) -> Option<FontKind<'a>> {
1029    let mut metadata = SIDMetadata {
1030        encoding,
1031        ..Default::default()
1032    };
1033
1034    let private_dict = if let Some(range) = top_dict.private_dict_range.clone() {
1035        parse_private_dict(data.get(range)?)
1036    } else {
1037        return Some(FontKind::SID(metadata));
1038    };
1039
1040    metadata.default_width = private_dict.default_width.unwrap_or(0.0);
1041    metadata.nominal_width = private_dict.nominal_width.unwrap_or(0.0);
1042
1043    if let (Some(private_dict_range), Some(subroutines_offset)) = (
1044        top_dict.private_dict_range,
1045        private_dict.local_subroutines_offset,
1046    ) {
1047        // 'The local subroutines offset is relative to the beginning
1048        // of the Private DICT data.'
1049        if let Some(start) = private_dict_range.start.checked_add(subroutines_offset) {
1050            let data = data.get(start..data.len())?;
1051            let mut s = Stream::new(data);
1052            metadata.local_subrs = parse_index::<u16>(&mut s)?;
1053        }
1054    }
1055
1056    Some(FontKind::SID(metadata))
1057}
1058
1059fn parse_cid_metadata(
1060    data: &[u8],
1061    top_dict: TopDict,
1062    number_of_glyphs: u16,
1063) -> Option<FontKind<'_>> {
1064    let (charset_offset, fd_array_offset, fd_select_offset) = match (
1065        top_dict.charset_offset,
1066        top_dict.fd_array_offset,
1067        top_dict.fd_select_offset,
1068    ) {
1069        (Some(a), Some(b), Some(c)) => (a, b, c),
1070        _ => return None, // charset, FDArray and FDSelect must be set.
1071    };
1072
1073    if charset_offset <= charset_id::EXPERT_SUBSET {
1074        // 'There are no predefined charsets for CID fonts.'
1075        // Adobe Technical Note #5176, chapter 18 CID-keyed Fonts
1076        return None;
1077    }
1078
1079    let metadata = CIDMetadata {
1080        fd_array: {
1081            let mut s = Stream::new_at(data, fd_array_offset)?;
1082            parse_index::<u16>(&mut s)?
1083        },
1084        fd_select: {
1085            let mut s = Stream::new_at(data, fd_select_offset)?;
1086            parse_fd_select(number_of_glyphs, &mut s)?
1087        },
1088    };
1089
1090    Some(FontKind::CID(metadata))
1091}
1092
1093/// A type-safe wrapper for string ID.
1094#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Debug)]
1095pub(crate) struct StringId(u16);
1096
1097impl FromData for StringId {
1098    const SIZE: usize = 2;
1099
1100    #[inline]
1101    fn parse(data: &[u8]) -> Option<Self> {
1102        u16::parse(data).map(StringId)
1103    }
1104}
1105
1106trait IsEven {
1107    fn is_even(&self) -> bool;
1108    fn is_odd(&self) -> bool;
1109}
1110
1111impl IsEven for usize {
1112    #[inline]
1113    fn is_even(&self) -> bool {
1114        (*self) & 1 == 0
1115    }
1116
1117    #[inline]
1118    fn is_odd(&self) -> bool {
1119        !self.is_even()
1120    }
1121}
1122
1123fn f32_abs(n: f32) -> f32 {
1124    n.abs()
1125}
1126
1127#[inline]
1128fn conv_subroutine_index(index: f32, bias: u16) -> Result<u32, OutlineError> {
1129    conv_subroutine_index_impl(index, bias).ok_or(OutlineError::InvalidSubroutineIndex)
1130}
1131
1132#[inline]
1133fn conv_subroutine_index_impl(index: f32, bias: u16) -> Option<u32> {
1134    let index = i32::try_num_from(index)?;
1135    let bias = i32::from(bias);
1136
1137    let index = index.checked_add(bias)?;
1138    u32::try_from(index).ok()
1139}
1140
1141// Adobe Technical Note #5176, Chapter 16 "Local / Global Subrs INDEXes"
1142#[inline]
1143fn calc_subroutine_bias(len: u32) -> u16 {
1144    if len < 1240 {
1145        107
1146    } else if len < 33900 {
1147        1131
1148    } else {
1149        32768
1150    }
1151}