nipdf_cff_parser/
inner.rs

1use paste::paste;
2use prescript::{Encoding, Name, name, sname};
3use snafu::prelude::*;
4use std::{
5    borrow::Cow,
6    collections::HashMap,
7    hash::Hash,
8    num::TryFromIntError,
9    ops::{Deref, Range, RangeInclusive},
10};
11use winnow::{
12    ModalResult, Parser,
13    binary::{be_u8, be_u16, be_u24, be_u32, length_repeat, length_take},
14    combinator::{alt, dispatch, empty, fail, preceded, repeat, repeat_till, terminated},
15    error::{ErrMode, ErrorConvert, FromExternalError, ParseError, ParserError},
16    stream::{Accumulate, Compare, Stream, StreamIsPartial},
17    token::{any, rest, take},
18};
19
20mod predefined_charsets;
21mod predefined_encodings;
22
23/// Glyph ID
24type Gid = u8;
25
26type Sid = u16;
27
28/// Operand, value of Dict
29#[derive(Clone, PartialEq, Debug)]
30enum Operand {
31    Integer(i32),
32    Real(f32),
33    IntArray(Vec<i32>),
34    RealArray(Vec<f32>),
35}
36
37impl Operand {
38    /// Return integer value if Operand is Integer, otherwise return None.
39    pub fn int(&self) -> Option<i32> {
40        match self {
41            Operand::Integer(v) => Some(*v),
42            _ => None,
43        }
44    }
45
46    /// Return real value if Operand is Integer or Real, otherwise return None.
47    pub fn real(&self) -> Option<f32> {
48        match self {
49            Operand::Integer(v) => Some(*v as f32),
50            Operand::Real(v) => Some(*v),
51            _ => None,
52        }
53    }
54
55    /// Return bool value if Operand is Integer, otherwise return None.
56    /// int 1 is true, 0 is false. Other int value is invalid.
57    pub fn bool(&self) -> Option<bool> {
58        match self {
59            Operand::Integer(0) => Some(false),
60            Operand::Integer(1) => Some(true),
61            _ => None,
62        }
63    }
64
65    pub fn int_array(&self) -> Option<&[i32]> {
66        match self {
67            Operand::IntArray(v) => Some(v),
68            _ => None,
69        }
70    }
71
72    pub fn real_array(&self) -> Option<&[f32]> {
73        match self {
74            Operand::RealArray(v) => Some(v),
75            _ => None,
76        }
77    }
78}
79
80/// Return parser to parse integer
81fn integer_parser<'a, E: ParserError<&'a [u8]>>() -> impl Parser<&'a [u8], i32, E> {
82    dispatch! {any;
83        v@32..=246  => |_: &mut &'a [u8]| Ok((v as i32) - 139),
84        v@247..=250 => |buf: &mut &'a [u8]| {
85            let b1 = any(buf)?;
86            Ok(((v as i32) - 247) * 256 + (b1 as i32) + 108)
87        },
88        v@251..=254 => |buf: &mut &'a [u8]| {
89            let b1 = any(buf)?;
90            Ok(-((v as i32) - 251) * 256 - (b1 as i32) - 108)
91        },
92        28 => |buf: &mut &'a [u8]| {
93            let b1 = any(buf)?;
94            let b2 = any(buf)?;
95            Ok(((b1 as i16) << 8 | b2 as i16) as i32)
96        },
97        29 => |buf: &mut &'a [u8]| {
98            let b1 = any(buf)?;
99            let b2 = any(buf)?;
100            let b3 = any(buf)?;
101            let b4 = any(buf)?;
102            Ok(((b1 as i32) << 24) + ((b2 as i32) << 16) + ((b3 as i32) << 8) + (b4 as i32))
103        },
104        _ => fail::<_, i32, _>,
105    }
106}
107
108/// A real number operand is provided in addition to integer operands. This
109/// operand begins with a byte value of 30 followed  by a variable-length
110/// sequence of bytes. Each byte is composed  of two 4-bit nibbles as defined in
111/// fowling table. The first nibble of a  pair is stored in the most significant 4
112/// bits of a byte and the  second nibble of a pair is stored in the least
113/// significant 4 bits of a byte.
114///
115/// | nibble | represents |
116/// |--------|-------|
117/// | 0-9 | 0-9 |
118/// | a | .(decimal point) |
119/// | b | E |
120/// | c | E– |
121/// | d | <reserved> |
122/// | e | –(minus) |
123/// | f | end of number |
124///
125/// A real number is terminated by one (or two) 0xf nibbles so that it is
126/// always padded to a full byte. Thus, the value –2.25 is  encoded by the byte
127/// sequence (1e e2 a2 5f) and the value  0.140541E–3 by the sequence (1e 0a 14
128/// 05 41 c3 ff).
129fn real_parser<I, E>() -> impl Parser<I, f32, E>
130where
131    I: Stream<Token = u8> + Clone + StreamIsPartial + Compare<u8>,
132    E: ParserError<I> + ParserError<(I, usize)> + ErrorConvert<E>,
133{
134    use winnow::binary::bits::{bits, pattern, take};
135
136    #[derive(PartialEq, Debug)]
137    enum NumberState {
138        Int,
139        Mantissa,
140        Exponent,
141    }
142
143    struct Real {
144        int: u32,
145        negative: bool,
146        state: NumberState,
147        mantissa: f32,
148        mantissa_len: i32,
149        exponent_negative: bool,
150        exponent: u32,
151    }
152
153    impl From<Real> for f32 {
154        fn from(value: Real) -> Self {
155            let mut r = value.mantissa.mul_add(
156                10f32.powi(-value.mantissa_len)
157                    * 10f32.powf(if value.exponent_negative {
158                        -(value.exponent as f32)
159                    } else {
160                        value.exponent as f32
161                    }),
162                value.int as f32,
163            );
164            if value.negative {
165                r = -r;
166            }
167            r
168        }
169    }
170
171    impl Accumulate<u8> for Real {
172        fn initial(_: Option<usize>) -> Self {
173            Self {
174                int: 0,
175                state: NumberState::Int,
176                exponent_negative: false,
177                mantissa: 0.0,
178                mantissa_len: 0,
179                exponent: 0,
180                negative: false,
181            }
182        }
183
184        fn accumulate(&mut self, acc: u8) {
185            match acc {
186                0..=9 => match self.state {
187                    NumberState::Int => {
188                        self.int = self.int * 10 + acc as u32;
189                    }
190                    NumberState::Mantissa => {
191                        self.mantissa = self.mantissa.mul_add(10.0, acc as f32);
192                        self.mantissa_len += 1;
193                    }
194                    NumberState::Exponent => {
195                        self.exponent = self.exponent * 10 + acc as u32;
196                    }
197                },
198                0xa => {
199                    debug_assert_eq!(NumberState::Int, self.state);
200                    self.state = NumberState::Mantissa;
201                }
202                0xb => {
203                    self.state = NumberState::Exponent;
204                }
205                0xc => {
206                    self.state = NumberState::Exponent;
207                    self.exponent_negative = true;
208                }
209                0xe => {
210                    // minus
211                    self.negative = true;
212                }
213                _ => unreachable!(),
214            }
215        }
216    }
217    preceded(
218        30u8,
219        bits(repeat_till::<_, _, Real, _, _, _, _>(
220            1..,
221            take::<_, u8, _, E>(4u8),
222            pattern(0xfu8, 4u8),
223        )),
224    )
225    .map(|(v, _)| v.into())
226}
227
228/// Return operand parser
229/// Operand maybe integer/real/bool/intArray/realArray, if multiple operands
230/// are provided, item types must be same, either int or real, returned as
231/// intArray/realArray.
232fn operand_parser<'a, E>() -> impl Parser<&'a [u8], Operand, E>
233where
234    E: ParserError<&'a [u8]> + ErrorConvert<E> + ParserError<(&'a [u8], usize)>,
235{
236    fn post_process(v: Vec<Operand>) -> Operand {
237        // if v has one element, return that element
238        // if all elements are all int, return int_array
239        // if all elements are all real, return real_array
240        // otherwise, convert all elements to real, and return real_array
241        if v.len() == 1 {
242            return v[0].clone();
243        }
244        let mut is_same_type = true;
245        let mut is_int = false;
246        let mut is_real = false;
247        for i in &v {
248            match i {
249                Operand::Integer(_) => {
250                    if is_real {
251                        is_same_type = false;
252                        break;
253                    }
254                    is_int = true;
255                }
256                Operand::Real(_) => {
257                    if is_int {
258                        is_same_type = false;
259                        break;
260                    }
261                    is_real = true;
262                }
263                _ => {
264                    is_same_type = false;
265                    break;
266                }
267            }
268        }
269        if is_same_type {
270            if is_int {
271                let mut int_array = Vec::with_capacity(v.len());
272                for i in v {
273                    match i {
274                        Operand::Integer(i) => int_array.push(i),
275                        _ => unreachable!(),
276                    }
277                }
278                Operand::IntArray(int_array)
279            } else if is_real {
280                let mut real_array = Vec::with_capacity(v.len());
281                for i in v {
282                    match i {
283                        Operand::Real(r) => real_array.push(r),
284                        _ => unreachable!(),
285                    }
286                }
287                Operand::RealArray(real_array)
288            } else {
289                unreachable!()
290            }
291        } else {
292            // mixed int/real to real array
293            let mut real_array = Vec::with_capacity(v.len());
294            for i in v {
295                match i {
296                    Operand::Integer(i) => real_array.push(i as f32),
297                    Operand::Real(r) => real_array.push(r),
298                    _ => unreachable!(),
299                }
300            }
301            Operand::RealArray(real_array)
302        }
303    }
304
305    repeat(
306        1..,
307        alt((
308            integer_parser().map(Operand::Integer),
309            real_parser().map(Operand::Real),
310        )),
311    )
312    .map(post_process)
313}
314
315/// Operator of Dict. Operator is a byte value that is either a single byte
316/// value 0-21 or a byte value equal to 12 followed by a single byte
317/// value 0-21.
318#[derive(PartialEq, Eq, Debug, Clone, Copy)]
319pub struct Operator {
320    tag: u8,
321    /// First byte of Operator is 12 if true.
322    escape: bool,
323}
324
325impl Operator {
326    pub const BASE_FONT_BLEND: Self = Self::escaped(23);
327    pub const BASE_FONT_NAME: Self = Self::escaped(22);
328    pub const CHARSETS: Self = Self::new(15);
329    pub const CHARSTRING_TYPE: Self = Self::escaped(6);
330    pub const CHAR_STRINGS: Self = Self::new(17);
331    pub const COPYRIGHT: Self = Self::escaped(0);
332    pub const ENCODINGS: Self = Self::new(16);
333    pub const FAMILY_NAME: Self = Self::new(3);
334    pub const FONT_BBOX: Self = Self::new(5);
335    pub const FONT_MATRIX: Self = Self::escaped(7);
336    pub const FULL_NAME: Self = Self::new(2);
337    pub const IS_FIXED_PITCH: Self = Self::escaped(1);
338    pub const ITALIC_ANGLE: Self = Self::escaped(2);
339    pub const NOTICE: Self = Self::new(1);
340    pub const PAINT_TYPE: Self = Self::escaped(5);
341    pub const POST_SCRIPT: Self = Self::escaped(21);
342    pub const PRIVATE: Self = Self::new(18);
343    pub const ROS: Self = Self::escaped(30);
344    pub const STROKE_WIDTH: Self = Self::escaped(8);
345    pub const SYNTHETIC_BASE: Self = Self::escaped(20);
346    pub const UNDERLINE_POSITION: Self = Self::escaped(3);
347    pub const UNDERLINE_THICKNESS: Self = Self::escaped(4);
348    pub const UNIQUE_ID: Self = Self::new(13);
349    pub const VERSION: Self = Self::new(0);
350    pub const WEIGHT: Self = Self::new(4);
351    pub const XUID: Self = Self::new(14);
352
353    pub const fn new(tag: u8) -> Self {
354        debug_assert!(tag <= 21);
355        Self { tag, escape: false }
356    }
357
358    pub const fn escaped(tag: u8) -> Self {
359        Self { tag, escape: true }
360    }
361}
362
363/// Operator hash is tag, if escape is true, set high bit.
364impl Hash for Operator {
365    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
366        let tag = self.tag;
367        let escape = self.escape;
368        let tag = if escape { tag | 0x80 } else { tag };
369        tag.hash(state);
370    }
371}
372
373fn operator_parser<'a, E: ParserError<&'a [u8]>>() -> impl Parser<&'a [u8], Operator, E> {
374    let escaped = preceded(12u8, any).map(Operator::escaped);
375    let normal = any.map(Operator::new);
376    alt((escaped, normal))
377}
378
379/// Error may returned in this crate.
380#[derive(Debug, Snafu)]
381pub enum Error {
382    #[snafu(display("Dict value not Integer"))]
383    ExpectInt,
384    #[snafu(display("Dict value not Real"))]
385    ExpectReal,
386    #[snafu(display("Dict value not Integer Array"))]
387    ExpectIntArray,
388    #[snafu(display("Dict value not Real Array"))]
389    ExpectRealArray,
390    #[snafu(display("Dict value not Bool"))]
391    ExpectBool,
392
393    #[snafu(display("Invalid offsets data"))]
394    InvalidOffsetsData,
395
396    #[snafu(display("Parse error: {message}"))]
397    ParseError {
398        message: String,
399        #[snafu(source(from(prescript::ParserError, Box::new)))]
400        source: Box<prescript::ParserError>,
401    },
402
403    /// Error during cast integer.
404    #[snafu(display("Parse error: {message}"))]
405    ParseErrorIntCast {
406        message: String,
407        source: TryFromIntError,
408    },
409
410    #[snafu(display("Required top dict value missing"))]
411    RequiredDictValueMissing,
412    #[snafu(whatever, display("{message}"))]
413    GenericError {
414        message: String,
415
416        // Having a `source` is optional, but if it is present, it must
417        // have this specific attribute and type:
418        #[snafu(source(from(Box<dyn std::error::Error + Send + Sync>, Some)))]
419        source: Option<Box<dyn std::error::Error + Send + Sync>>,
420    },
421}
422
423pub type Result<T, E = Error> = std::result::Result<T, E>;
424
425#[derive(PartialEq, Debug, Clone)]
426pub struct Dict(HashMap<Operator, Operand>);
427
428impl Dict {
429    /// If value not exist for `k`, return None,
430    /// use `f` to convert Operand value to type `T` and returns otherwise.
431    fn opt<'a, T: 'a, F: FnOnce(&'a Operand) -> Result<T>>(
432        &'a self,
433        f: F,
434        k: Operator,
435    ) -> Result<Option<T>> {
436        self.0.get(&k).map(f).transpose()
437    }
438
439    /// If value not exist for `k`, return default value `dv`,
440    /// use `f` to convert Operand value to type `T` and returns otherwise.
441    fn opt_or<'a, T: 'a, F: FnOnce(&'a Operand) -> Result<T>>(
442        &'a self,
443        f: F,
444        k: Operator,
445        dv: T,
446    ) -> Result<T> {
447        self.0.get(&k).map_or(Ok(dv), f)
448    }
449
450    /// If value not exist for `k`, return `Error::RequiredDictValueMissing` error,
451    /// use `f` to convert Operand value to type `T` and returns otherwise.
452    fn required<'a, T: 'a, F: FnOnce(&'a Operand) -> Result<T>>(
453        &'a self,
454        f: F,
455        k: Operator,
456    ) -> Result<T> {
457        self.0
458            .get(&k)
459            .map_or(Err(Error::RequiredDictValueMissing), f)
460    }
461
462    /// Assume the operand value is delta-encoded, return decoded real number array.
463    pub fn as_delta_encoded(&self, k: Operator) -> Result<Option<Vec<f32>>> {
464        let r = self.as_real_array(k)?;
465        Ok(r.map(|v| {
466            let mut r = Vec::with_capacity(v.len());
467            let mut prev = 0.0;
468            for &i in v {
469                r.push(i + prev);
470                prev += i;
471            }
472            r
473        }))
474    }
475
476    /// Assume the operand value is delta-encoded, return decoded real number array.
477    /// Return default value if value not exist.
478    pub fn as_delta_encoded_or(&self, k: Operator, default: &'static [f32]) -> Result<Vec<f32>> {
479        self.as_delta_encoded(k)
480            .map(|v| v.unwrap_or_else(|| default.to_vec()))
481    }
482
483    pub fn delta_encoded(&self, k: Operator) -> Result<Vec<f32>> {
484        self.as_delta_encoded(k)
485            .and_then(|v| v.ok_or(Error::RequiredDictValueMissing))
486    }
487}
488
489impl Accumulate<(Operand, Operator)> for Dict {
490    fn initial(capacity: Option<usize>) -> Self {
491        Dict(capacity.map_or_else(HashMap::new, HashMap::with_capacity))
492    }
493
494    fn accumulate(&mut self, acc: (Operand, Operator)) {
495        self.0.insert(acc.1, acc.0);
496    }
497}
498
499macro_rules! access_methods {
500    ($name: ident, $f: expr, $rt: ty) => {
501        access_methods!($name, $f, $rt, $rt);
502    };
503    ($name: ident, $f: expr, $rt: ty, $def_t: ty) => {
504        paste! {
505            pub fn $name(&self, k: Operator) -> Result<$rt> {
506                self.required($f, k)
507            }
508
509            pub fn [<as_ $name>](&self, k: Operator) -> Result<Option<$rt>> {
510                self.opt($f, k)
511            }
512
513            pub fn [<as_ $name _or>](&self, k: Operator, default: $def_t) -> Result<$rt> {
514                self.opt_or($f, k, default)
515            }
516        }
517    };
518}
519
520impl Dict {
521    access_methods!(int, |v| v.int().context(ExpectIntSnafu), i32);
522
523    access_methods!(real, |v| v.real().context(ExpectRealSnafu), f32);
524
525    access_methods!(bool, |v| v.bool().context(ExpectBoolSnafu), bool);
526
527    access_methods!(
528        int_array,
529        |v| v.int_array().context(ExpectIntArraySnafu),
530        &[i32],
531        &'static [i32]
532    );
533
534    access_methods!(
535        real_array,
536        |v| v.real_array().context(ExpectRealArraySnafu),
537        &[f32],
538        &'static [f32]
539    );
540}
541
542/// Return Dict parser.
543/// Dict stored as a sequence of operators and operands. The operands are
544/// stored before the operators.
545fn dict_parser<'a, E>() -> impl Parser<&'a [u8], Dict, E>
546where
547    E: ParserError<&'a [u8]> + ParserError<(&'a [u8], usize)> + ErrorConvert<E>,
548{
549    let parse_item = (operand_parser(), operator_parser());
550    repeat(1.., parse_item)
551}
552
553/// Byte length of offset data type.
554#[derive(PartialEq, Eq, Debug, Clone, Copy)]
555#[repr(u8)]
556pub enum OffSize {
557    One = 1u8,
558    Two = 2u8,
559    Three = 3u8,
560    Four = 4u8,
561}
562
563impl OffSize {
564    /// Return byte length of offset data type.
565    pub fn len(self) -> usize {
566        self as usize
567    }
568}
569
570fn off_size_parser<'a, E: ParserError<&'a [u8]>>() -> impl Parser<&'a [u8], OffSize, E> {
571    dispatch! {any;
572        1 => empty.value(OffSize::One),
573        2 => empty.value(OffSize::Two),
574        3 => empty.value(OffSize::Three),
575        4 => empty.value(OffSize::Four),
576        _ => fail
577    }
578}
579
580/// Offsets is a sequence of n + 1 off_size bytes, where n is the number of
581/// items in the index. The first offset is always 1.
582#[derive(Debug, Clone, Copy)]
583struct Offsets<'a>(OffSize, &'a [u8]);
584
585impl<'a> Offsets<'a> {
586    /// Return `Error::InvalidOffsetsData` if first offset is not 1.
587    /// Assume data byte length is multiple of off_size.
588    pub fn new(off_size: OffSize, data: &'a [u8]) -> Result<Self> {
589        let first = Self::_get(data, off_size, 0)?;
590        ensure!(first == 1, InvalidOffsetsDataSnafu);
591        Ok(Self(off_size, data))
592    }
593
594    /// Return length of offsets, which is the number of elements.
595    pub fn len(&self) -> usize {
596        self.1.len() / self.0.len() - 1
597    }
598
599    /// Return data offset range of specific index. Panic if `ith` is out of range.
600    pub fn range(&self, ith: usize) -> Result<Range<usize>> {
601        Ok(self.get(ith)?..self.get(ith + 1)?)
602    }
603
604    /// Return data offset of specific index. The offset is 0-based.
605    /// `ith` can be length of offsets, which means the end offset of last element.
606    /// Panic if `ith` is out of range.
607    pub fn get(&self, ith: usize) -> Result<usize> {
608        let r = Self::_get(self.1, self.0, ith)?;
609        Ok(r as usize - 1)
610    }
611
612    /// Get offset of `ith` element
613    fn _get(data: &[u8], off_size: OffSize, ith: usize) -> Result<u32> {
614        // skip ith off_size bytes
615        let buf = &data[ith * off_size.len()..];
616        match off_size {
617            OffSize::One => {
618                ignore_rest(be_u8::<_, prescript::ParserError>.map(|v| v as u32)).parse(buf)
619            }
620            OffSize::Two => {
621                ignore_rest(be_u16::<_, prescript::ParserError>.map(|v| v as u32)).parse(buf)
622            }
623            OffSize::Three => {
624                ignore_rest(be_u24::<_, prescript::ParserError>.map(|v| v)).parse(buf)
625            }
626            OffSize::Four => ignore_rest(be_u32::<_, prescript::ParserError>).parse(buf),
627        }
628        .map_err(ParseError::into_inner)
629        .context(ParseSnafu {
630            message: "parse OffSize".to_owned(),
631        })
632    }
633}
634
635fn ignore_rest<I, O, E, P>(p: P) -> impl Parser<I, O, E>
636where
637    I: Stream,
638    E: ParserError<I>,
639    P: Parser<I, O, E>,
640{
641    terminated(p, rest)
642}
643
644fn parse_ignore_rest<I, O, P, E: ParserError<I>>(p: P, buf: I) -> Result<O, E::Inner>
645where
646    I: Stream + StreamIsPartial,
647    P: Parser<I, O, E>,
648    E::Inner: ParserError<I>,
649{
650    ignore_rest(p).parse(buf).map_err(ParseError::into_inner)
651}
652
653/// Data with an index(offset) for quick access memory
654/// by index.
655#[derive(Debug, Clone, Copy)]
656pub struct IndexedData<'a> {
657    offsets: Offsets<'a>,
658    data: &'a [u8],
659}
660
661impl<'a> IndexedData<'a> {
662    pub fn len(&self) -> usize {
663        self.offsets.len()
664    }
665
666    /// Get value by index, use parser to decode data.
667    /// Panic if `idx` is out of range.
668    pub fn get<T: 'a, F: Parser<&'a [u8], T, prescript::ParserError>>(
669        &self,
670        idx: usize,
671        mut f: F,
672    ) -> Result<T> {
673        let buf = self.get_bin_str(idx)?;
674        f.parse(buf).map_err(Into::into).context(ParseSnafu {
675            message: format!("get indexed data: [{}]", idx),
676        })
677    }
678
679    /// Get str by index. Panic if `idx` is out of range.
680    /// Returns `&[u8]` instead of `&str`, because the str may not be valid utf8,
681    /// `from_utf8()` returns error if str contains '\0'.
682    pub fn get_bin_str(&self, idx: usize) -> Result<&'a [u8]> {
683        let range = self.offsets.range(idx)?;
684        Ok(&self.data[range])
685    }
686
687    /// Get Dict by index. Panic if `idx` is out of range.
688    pub fn get_dict(&self, idx: usize) -> Result<Dict> {
689        self.get(idx, dict_parser())
690    }
691}
692
693/// Index Format:
694///
695/// ---+-----------------------+------------------------------------------
696/// 0 | count  | The number of index entries
697/// ---+-----------------------+------------------------------------------
698/// 1 | off_size              | The size in bytes of each offset
699/// ---+-----------------------+------------------------------------------
700/// 2 | offset array          | Offset array, count + 1 elements
701/// ---+-----------------------+------------------------------------------
702/// 3 | data                  | Data
703/// ---+-----------------------+------------------------------------------
704fn parse_indexed_data<'a, E>(buf: &mut &'a [u8]) -> ModalResult<IndexedData<'a>, E>
705where
706    E: ParserError<&'a [u8]> + FromExternalError<&'a [u8], Error>,
707{
708    let (n, off_size) = (be_u16, off_size_parser()).parse_next(buf)?;
709    let offset_data_len = (n + 1) as usize * off_size.len();
710    let offsets = take(offset_data_len)
711        .try_map(|offset_data| Offsets::new(off_size, offset_data))
712        .parse_next(buf)?;
713
714    let data_len = offsets
715        .get(n as usize)
716        .map_err(|e| ErrMode::<E>::from_external_error(buf, e))?;
717    take(data_len)
718        .map(|data| IndexedData { offsets, data })
719        .parse_next(buf)
720}
721
722fn name_index_parser<'a, E>() -> impl Parser<&'a [u8], NameIndex<'a>, ErrMode<E>>
723where
724    E: ParserError<&'a [u8]> + FromExternalError<&'a [u8], Error>,
725{
726    parse_indexed_data.map(NameIndex)
727}
728
729fn string_index_parser<'a, E>() -> impl Parser<&'a [u8], StringIndex<'a>, ErrMode<E>>
730where
731    E: ParserError<&'a [u8]> + FromExternalError<&'a [u8], Error>,
732{
733    parse_indexed_data.map(StringIndex)
734}
735
736fn top_dict_index_parser<'a, E>() -> impl Parser<&'a [u8], TopDictIndex<'a>, ErrMode<E>>
737where
738    E: ParserError<&'a [u8]> + FromExternalError<&'a [u8], Error>,
739{
740    parse_indexed_data.map(TopDictIndex)
741}
742
743/// Header of CFF.
744#[derive(PartialEq, Eq, Debug, Clone, Copy)]
745pub struct Header {
746    pub major: u8,
747    pub minor: u8,
748    pub hdr_size: u8,
749    pub off_size: OffSize,
750}
751
752fn header_parser<'a, E: ParserError<&'a [u8]>>() -> impl Parser<&'a [u8], Header, E> {
753    (be_u8, be_u8, be_u8, off_size_parser()).map(|(major, minor, hdr_size, off_size)| Header {
754        major,
755        minor,
756        hdr_size,
757        off_size,
758    })
759}
760
761pub fn parse_header(buf: &[u8]) -> Result<Header> {
762    parse_ignore_rest(header_parser::<prescript::ParserError>(), buf).context(ParseSnafu {
763        message: "parse header".to_owned(),
764    })
765}
766
767/// Font name index, stores font names in Index.
768/// The name first byte maybe zero, which means the corresponding font
769/// is removed. The index is the index of other top font data index.
770#[derive(Debug, Clone, Copy)]
771pub struct NameIndex<'a>(IndexedData<'a>);
772
773impl<'a> NameIndex<'a> {
774    pub fn len(&self) -> usize {
775        self.0.len()
776    }
777
778    /// Get font name by index. Return None if name is marked removed.
779    pub fn get(&self, idx: usize) -> Result<Option<Cow<'a, str>>> {
780        let name = self.0.get_bin_str(idx)?;
781        Ok(if name.is_empty() || name[0] == 0 {
782            None
783        } else {
784            Some(String::from_utf8_lossy(name))
785        })
786    }
787}
788
789/// Resolve &str using SID from IndexedData.
790/// SID is an integer that identifies a string in the string INDEX.
791/// The first 391 SIDs are predefined standard strings.
792/// SID greater than 390 are strings that are defined in the string INDEX.
793/// To resolve a SID, subtract 391 from the SID value and use the result as
794/// an index into the string INDEX.
795#[derive(Debug, Copy, Clone)]
796pub struct StringIndex<'a>(IndexedData<'a>);
797
798impl<'a> StringIndex<'a> {
799    /// Panic if `idx` is out of range. Return None if str is marked removed
800    pub fn get(&self, idx: Sid) -> Result<Cow<'a, str>> {
801        Ok(if idx < 391 {
802            Cow::Borrowed(STANDARD_STRINGS[idx as usize])
803        } else {
804            String::from_utf8_lossy(self.0.get_bin_str((idx - 391) as usize)?)
805        })
806    }
807}
808
809/// Standard strings defined in CFF spec, used in Type 1 and some other strings.
810#[rustfmt::skip]
811const STANDARD_STRINGS: [&str; 391] = [
812    ".notdef", "space", "exclam", "quotedbl", "numbersign", "dollar", "percent",
813    "ampersand", "quoteright", "parenleft", "parenright", "asterisk", "plus", "comma",
814    "hyphen", "period", "slash", "zero", "one", "two", "three", "four", "five", "six",
815    "seven", "eight", "nine", "colon", "semicolon", "less", "equal", "greater",
816    "question", "at",
817    "A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O", "P",
818    "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z",
819    "bracketleft", "backslash", "bracketright", "asciicircum", "underscore", "quoteleft",
820    "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p",
821    "q", "r", "s", "t", "u", "v", "w", "x", "y", "z",
822    "braceleft", "bar", "braceright", "asciitilde", "exclamdown", "cent", "sterling",
823    "fraction", "yen", "florin", "section", "currency", "quotesingle", "quotedblleft",
824    "guillemotleft", "guilsinglleft", "guilsinglright", "fi", "fl", "endash", "dagger",
825    "daggerdbl", "periodcentered", "paragraph", "bullet", "quotesinglbase",
826    "quotedblbase", "quotedblright", "guillemotright", "ellipsis", "perthousand",
827    "questiondown", "grave", "acute", "circumflex", "tilde", "macron", "breve",
828    "dotaccent", "dieresis", "ring", "cedilla", "hungarumlaut", "ogonek", "caron",
829    "emdash", "AE", "ordfeminine", "Lslash", "Oslash", "OE", "ordmasculine", "ae",
830    "dotlessi", "lslash", "oslash", "oe", "germandbls", "onesuperior", "logicalnot",
831    "mu", "trademark", "Eth", "onehalf", "plusminus", "Thorn", "onequarter", "divide",
832    "brokenbar", "degree", "thorn", "threequarters", "twosuperior", "registered",
833    "minus", "eth", "multiply", "threesuperior", "copyright", "Aacute", "Acircumflex",
834    "Adieresis", "Agrave", "Aring", "Atilde", "Ccedilla", "Eacute", "Ecircumflex",
835    "Edieresis", "Egrave", "Iacute", "Icircumflex", "Idieresis", "Igrave", "Ntilde",
836    "Oacute", "Ocircumflex", "Odieresis", "Ograve", "Otilde", "Scaron", "Uacute",
837    "Ucircumflex", "Udieresis", "Ugrave", "Yacute", "Ydieresis", "Zcaron", "aacute",
838    "acircumflex", "adieresis", "agrave", "aring", "atilde", "ccedilla", "eacute",
839    "ecircumflex", "edieresis", "egrave", "iacute", "icircumflex", "idieresis",
840    "igrave", "ntilde", "oacute", "ocircumflex", "odieresis", "ograve", "otilde",
841    "scaron", "uacute", "ucircumflex", "udieresis", "ugrave", "yacute", "ydieresis",
842    "zcaron", "exclamsmall", "Hungarumlautsmall", "dollaroldstyle", "dollarsuperior",
843    "ampersandsmall", "Acutesmall", "parenleftsuperior", "parenrightsuperior",
844    "twodotenleader", "onedotenleader", "zerooldstyle", "oneoldstyle", "twooldstyle",
845    "threeoldstyle", "fouroldstyle", "fiveoldstyle", "sixoldstyle", "sevenoldstyle",
846    "eightoldstyle", "nineoldstyle", "commasuperior", "threequartersemdash",
847    "periodsuperior", "questionsmall", "asuperior", "bsuperior", "centsuperior",
848    "dsuperior", "esuperior", "isuperior", "lsuperior", "msuperior", "nsuperior",
849    "osuperior", "rsuperior", "ssuperior", "tsuperior", "ff", "ffi", "ffl",
850    "parenleftinferior", "parenrightinferior", "Circumflexsmall", "hyphensuperior",
851    "Gravesmall", "Asmall", "Bsmall", "Csmall", "Dsmall", "Esmall", "Fsmall",
852    "Gsmall", "Hsmall", "Ismall", "Jsmall", "Ksmall", "Lsmall", "Msmall", "Nsmall",
853    "Osmall", "Psmall", "Qsmall", "Rsmall", "Ssmall", "Tsmall", "Usmall", "Vsmall",
854    "Wsmall", "Xsmall", "Ysmall", "Zsmall", "colonmonetary", "onefitted", "rupiah",
855    "Tildesmall", "exclamdownsmall", "centoldstyle", "Lslashsmall", "Scaronsmall",
856    "Zcaronsmall", "Dieresissmall", "Brevesmall", "Caronsmall", "Dotaccentsmall",
857    "Macronsmall", "figuredash", "hypheninferior", "Ogoneksmall", "Ringsmall",
858    "Cedillasmall", "questiondownsmall", "oneeighth", "threeeighths", "fiveeighths",
859    "seveneighths", "onethird", "twothirds", "zerosuperior", "foursuperior",
860    "fivesuperior", "sixsuperior", "sevensuperior", "eightsuperior", "ninesuperior",
861    "zeroinferior", "oneinferior", "twoinferior", "threeinferior", "fourinferior",
862    "fiveinferior", "sixinferior", "seveninferior", "eightinferior", "nineinferior",
863    "centinferior", "dollarinferior", "periodinferior", "commainferior",
864    "Agravesmall", "Aacutesmall", "Acircumflexsmall", "Atildesmall", "Adieresissmall",
865    "Aringsmall", "AEsmall", "Ccedillasmall", "Egravesmall", "Eacutesmall",
866    "Ecircumflexsmall", "Edieresissmall", "Igravesmall", "Iacutesmall",
867    "Icircumflexsmall", "Idieresissmall", "Ethsmall", "Ntildesmall", "Ogravesmall",
868    "Oacutesmall", "Ocircumflexsmall", "Otildesmall", "Odieresissmall", "OEsmall",
869    "Oslashsmall", "Ugravesmall", "Uacutesmall", "Ucircumflexsmall", "Udieresissmall",
870    "Yacutesmall", "Thornsmall", "Ydieresissmall", "001.000", "001.001", "001.002",
871    "001.003", "Black", "Bold", "Book", "Light", "Medium", "Regular", "Roman",
872    "Semibold",
873];
874
875/// Dict supports resolve SID to &str
876#[derive(Debug)]
877struct SIDDict<'a> {
878    dict: Dict,
879    strings: StringIndex<'a>,
880}
881
882/// SIDDict deref to Dict, to add Dict access methods.
883impl Deref for SIDDict<'_> {
884    type Target = Dict;
885
886    fn deref(&self) -> &Self::Target {
887        &self.dict
888    }
889}
890
891impl SIDDict<'_> {
892    fn resolve_sid(&self, v: &Operand) -> Result<Cow<'_, str>> {
893        v.int().context(ExpectIntSnafu).and_then(|v| {
894            self.strings.get(
895                v.try_into()
896                    .context(ParseErrorIntCastSnafu { message: "" })?,
897            )
898        })
899    }
900
901    pub fn sid(&self, k: Operator) -> Result<Cow<'_, str>> {
902        self.required(|v| self.resolve_sid(v), k)
903    }
904
905    #[allow(dead_code)]
906    pub fn as_sid(&self, k: Operator) -> Result<Option<Cow<'_, str>>> {
907        self.opt(|v| self.resolve_sid(v), k)
908    }
909
910    #[allow(dead_code)]
911    pub fn as_sid_or(&self, k: Operator, default: &'static str) -> Result<Cow<'_, str>> {
912        self.opt_or(|v| self.resolve_sid(v), k, default.into())
913    }
914}
915
916/// Top Dict for each font face.
917#[derive(Debug)]
918pub struct TopDictData<'a>(SIDDict<'a>);
919
920impl<'a> TopDictData<'a> {
921    pub fn new(dict: Dict, strings: StringIndex<'a>) -> Result<Self> {
922        let r = Self(SIDDict { dict, strings });
923        ensure_whatever!(!r.0.0.contains_key(&Operator::ROS), "TODO: CIDFont");
924        Ok(r)
925    }
926
927    pub fn string_index(&self) -> StringIndex<'_> {
928        self.0.strings
929    }
930
931    pub fn version(&self) -> Result<Cow<'_, str>> {
932        self.0.sid(Operator::VERSION)
933    }
934
935    pub fn notice(&self) -> Result<Cow<'_, str>> {
936        self.0.sid(Operator::NOTICE)
937    }
938
939    pub fn copyright(&self) -> Result<Cow<'_, str>> {
940        self.0.sid(Operator::COPYRIGHT)
941    }
942
943    pub fn full_name(&self) -> Result<Cow<'_, str>> {
944        self.0.sid(Operator::FULL_NAME)
945    }
946
947    pub fn family_name(&self) -> Result<Cow<'_, str>> {
948        self.0.sid(Operator::FAMILY_NAME)
949    }
950
951    pub fn weight(&self) -> Result<Cow<'_, str>> {
952        self.0.sid(Operator::WEIGHT)
953    }
954
955    pub fn is_fixed_pitch(&self) -> Result<bool> {
956        self.0.as_bool_or(Operator::IS_FIXED_PITCH, false)
957    }
958
959    pub fn italic_angle(&self) -> Result<f32> {
960        self.0.as_real_or(Operator::ITALIC_ANGLE, 0.0)
961    }
962
963    pub fn underline_position(&self) -> Result<f32> {
964        self.0.as_real_or(Operator::UNDERLINE_POSITION, -100.0)
965    }
966
967    pub fn underline_thickness(&self) -> Result<f32> {
968        self.0.as_real_or(Operator::UNDERLINE_THICKNESS, 50.0)
969    }
970
971    pub fn paint_type(&self) -> Result<i32> {
972        self.0.as_int_or(Operator::PAINT_TYPE, 0)
973    }
974
975    pub fn charstring_type(&self) -> Result<i32> {
976        self.0.as_int_or(Operator::CHARSTRING_TYPE, 2)
977    }
978
979    pub fn font_matrix(&self) -> Result<&[f32]> {
980        self.0.as_real_array_or(
981            Operator::FONT_MATRIX,
982            &[0.001, 0.0, 0.0, 0.001, 0.0, 0.0][..],
983        )
984    }
985
986    pub fn unique_id(&self) -> Result<i32> {
987        self.0.as_int_or(Operator::UNIQUE_ID, 0)
988    }
989
990    pub fn font_bbox(&self) -> Result<&[f32]> {
991        self.0
992            .as_real_array_or(Operator::FONT_BBOX, &[0.0, 0.0, 0.0, 0.0][..])
993    }
994
995    pub fn stroke_width(&self) -> Result<f32> {
996        self.0.as_real_or(Operator::STROKE_WIDTH, 0.0)
997    }
998
999    pub fn xuid(&self) -> Result<&[i32]> {
1000        self.0.int_array(Operator::XUID)
1001    }
1002
1003    /// `file` is the raw file data.
1004    pub fn charsets(&self, file: &[u8]) -> Result<Charsets> {
1005        let offset = self.0.as_int_or(Operator::CHARSETS, 0)?;
1006
1007        match offset {
1008            0 => Ok(Charsets::Predefined(PredefinedCharsets::ISOAdobe)),
1009            1 => Ok(Charsets::Predefined(PredefinedCharsets::Expert)),
1010            2 => Ok(Charsets::Predefined(PredefinedCharsets::ExpertSubset)),
1011            _ => parse_ignore_rest(
1012                charsets_parser(self.n_glyphs(file)?),
1013                &file[offset as usize..],
1014            )
1015            .context(ParseSnafu {
1016                message: "parse Charsets".to_owned(),
1017            }),
1018        }
1019    }
1020
1021    pub fn encodings(&self, file: &[u8]) -> Result<(Encodings, Option<Vec<EncodingSupplement>>)> {
1022        let offset = self.0.as_int_or(Operator::ENCODINGS, 0)?;
1023
1024        match offset {
1025            0 => Ok((Encodings::PredefinedStandard, None)),
1026            1 => Ok((Encodings::PredefinedExpert, None)),
1027            _ => parse_ignore_rest(encodings_parser(), &file[offset as usize..]).context(
1028                ParseSnafu {
1029                    message: "parse Encodings".to_owned(),
1030                },
1031            ),
1032        }
1033    }
1034
1035    pub fn private(&self) -> Result<&[i32]> {
1036        self.0.int_array(Operator::PRIVATE)
1037    }
1038
1039    fn char_strings(&self) -> Result<i32> {
1040        self.0.int(Operator::CHAR_STRINGS)
1041    }
1042
1043    /// Return glyphs count in font. `file` is the raw file data.
1044    pub fn n_glyphs(&self, file: &[u8]) -> Result<u16> {
1045        let buf = &file[self.char_strings()? as usize..];
1046        let index = parse_ignore_rest(parse_indexed_data, buf).context(ParseSnafu {
1047            message: "parse CharStrings INDEX".to_owned(),
1048        })?;
1049        index.len().try_into().context(ParseErrorIntCastSnafu {
1050            message: "convert index length to u16".to_owned(),
1051        })
1052    }
1053
1054    pub fn synthetic_base(&self) -> Result<i32> {
1055        self.0.int(Operator::SYNTHETIC_BASE)
1056    }
1057
1058    pub fn post_script(&self) -> Result<Cow<'_, str>> {
1059        self.0.sid(Operator::POST_SCRIPT)
1060    }
1061
1062    pub fn base_font_name(&self) -> Result<Cow<'_, str>> {
1063        self.0.sid(Operator::BASE_FONT_NAME)
1064    }
1065
1066    pub fn base_font_blend(&self) -> Result<Vec<f32>> {
1067        self.0.delta_encoded(Operator::BASE_FONT_BLEND)
1068    }
1069}
1070
1071/// IndexedData to store TopDicts. Each item is TopDict
1072pub struct TopDictIndex<'a>(IndexedData<'a>);
1073
1074impl<'a> TopDictIndex<'a> {
1075    #[allow(dead_code)]
1076    pub fn len(&self) -> usize {
1077        self.0.len()
1078    }
1079
1080    pub fn get(&self, idx: usize, strings: StringIndex<'a>) -> Result<TopDictData<'a>> {
1081        TopDictData::new(self.0.get_dict(idx)?, strings)
1082    }
1083}
1084
1085#[repr(u8)]
1086#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1087pub enum PredefinedCharsets {
1088    ISOAdobe = 0,
1089    Expert = 1,
1090    ExpertSubset = 2,
1091}
1092
1093/// Charsets map code index(gid) (u8) to SID
1094#[derive(Debug, PartialEq)]
1095pub enum Charsets {
1096    /// Format0, n_glyph - 1 codes stored in u8 array. because 0 are omitted because it
1097    /// is always map to sid 0, which is .notdef.
1098    Format0(Vec<Sid>),
1099    Format1(Vec<RangeInclusive<Sid>>), // (first, n_left: u8)
1100    Format2(Vec<RangeInclusive<Sid>>), // (first, n_left: u16)
1101    Predefined(PredefinedCharsets),
1102}
1103
1104impl Charsets {
1105    /// Return SID by index(gid). Return None if `idx` is out of range.
1106    pub fn resolve_sid(&self, idx: Gid) -> Option<Sid> {
1107        if idx == 0 {
1108            return Some(0);
1109        }
1110
1111        match self {
1112            Self::Predefined(predefined) => match predefined {
1113                PredefinedCharsets::ISOAdobe => (idx < 229).then_some(idx as Sid),
1114                PredefinedCharsets::Expert => {
1115                    predefined_charsets::EXPERT.get(idx as usize).copied()
1116                }
1117                PredefinedCharsets::ExpertSubset => predefined_charsets::EXPERT_SUBSET
1118                    .get(idx as usize)
1119                    .copied(),
1120            },
1121
1122            // 0 not stored in sids vec.
1123            Self::Format0(sids) => sids.get(idx as usize - 1).copied(),
1124
1125            Self::Format1(ranges) | Self::Format2(ranges) => {
1126                let idx = idx as Sid;
1127                let mut i: Sid = 1;
1128                for range in ranges {
1129                    let start = i;
1130                    match Sid::try_from(range.len()) {
1131                        Ok(len) => i += len,
1132                        Err(e) => {
1133                            log::error!("Error converting range length to Sid: {:?}", e);
1134                            return None;
1135                        }
1136                    }
1137                    if i > idx {
1138                        return Some(*range.start() + idx - start);
1139                    }
1140                }
1141                None
1142            }
1143        }
1144    }
1145}
1146
1147/// Charsets has four formats by first byte of buf:
1148///
1149/// 0: format0, n_glyphs SID
1150/// 1: format1, n_ranges (first, n_left: u8) SID
1151/// 2: format2, n_ranges (first, n_left: u16) SID
1152///
1153/// Predefined charsets has no format byte, handled by TopDict::charsets().
1154fn charsets_parser<'a>(n_glyphs: u16) -> impl Parser<&'a [u8], Charsets, prescript::ParserError> {
1155    fn covers(r: &[RangeInclusive<Sid>]) -> usize {
1156        let mut covers = 0;
1157        for range in r {
1158            covers += range.len();
1159        }
1160        covers
1161    }
1162
1163    fn range_parser<'a, LEFT>(
1164        n_glyphs: u16,
1165        mut n_left_parser: LEFT,
1166    ) -> impl Parser<&'a [u8], Vec<RangeInclusive<Sid>>, prescript::ParserError>
1167    where
1168        LEFT: Parser<&'a [u8], u16, prescript::ParserError>,
1169    {
1170        // let n_left_parser = n_left_parser();
1171        move |buf: &mut &'a [u8]| {
1172            let mut parse_item =
1173                (be_u16, n_left_parser.by_ref()).map(|(first, n_left)| first..=(first + n_left));
1174            let mut ranges: Vec<RangeInclusive<Sid>> = vec![];
1175            loop {
1176                match (n_glyphs as usize).cmp(&covers(&ranges[..])) {
1177                    std::cmp::Ordering::Equal => return Ok(ranges),
1178                    std::cmp::Ordering::Greater => ranges.push(parse_item.parse_next(buf)?),
1179                    std::cmp::Ordering::Less => fail::<_, _, prescript::ParserError>(buf)?,
1180                }
1181            }
1182        }
1183    }
1184
1185    let n_glyphs = n_glyphs - 1; // 0 is always .notdef, not exist in charsets
1186    dispatch! {any::<_, prescript::ParserError>;
1187        0 => repeat(n_glyphs as usize,  be_u16).map(Charsets::Format0),
1188        1 => range_parser(n_glyphs,  be_u8::<&'a [u8], prescript::ParserError>.output_into()).map(Charsets::Format1),
1189        2 => range_parser(n_glyphs,  be_u16).map(Charsets::Format2),
1190        _ => fail,
1191    }
1192}
1193
1194/// Supplemental data for encoding, replace some char code for a new glyph name.
1195/// `code` is char code to replace,
1196/// `sid` is SID of glyph name.
1197#[derive(Clone, Copy, PartialEq, Eq, Debug)]
1198pub struct EncodingSupplement {
1199    code: u8,
1200    sid: Sid,
1201}
1202
1203impl EncodingSupplement {
1204    fn new(code: u8, sid: Sid) -> Self {
1205        Self { code, sid }
1206    }
1207
1208    pub fn apply(self, strings: StringIndex<'_>, encodings: &mut Encoding) -> Result<()> {
1209        encodings[self.code as usize] = name(&strings.get(self.sid)?);
1210        Ok(())
1211    }
1212}
1213
1214#[derive(Clone, Copy, PartialEq, Eq, Debug)]
1215pub struct EncodingRange {
1216    first: u8,
1217    n_left: u8,
1218}
1219
1220impl EncodingRange {
1221    fn new(first: u8, n_left: u8) -> Self {
1222        Self { first, n_left }
1223    }
1224}
1225
1226/// Encodings map char code to gid, use Charset to map gid to SID.
1227#[derive(Debug, PartialEq)]
1228pub enum Encodings {
1229    Format0(Vec<u8>),
1230    Format1(Vec<EncodingRange>),
1231    PredefinedStandard,
1232    PredefinedExpert,
1233}
1234
1235impl Encodings {
1236    /// build encodings.
1237    pub fn build(&self, charsets: &Charsets, string_index: StringIndex<'_>) -> Result<Encoding> {
1238        const NOTDEF: Name = sname(prescript::NOTDEF);
1239        Ok(match self {
1240            Self::Format0(codes) => {
1241                let mut encodings = [NOTDEF; 256];
1242                for (i, code) in codes.iter().enumerate() {
1243                    let gid = (i + 1).try_into();
1244                    let gid = match gid {
1245                        Ok(gid) => Some(gid),
1246                        Err(e) => {
1247                            log::error!("Error converting index to gid: {:?}", e);
1248                            None
1249                        }
1250                    };
1251                    let sid = gid.and_then(|gid| charsets.resolve_sid(gid));
1252                    if let Some(v) = sid.map(|sid| string_index.get(sid)).transpose()? {
1253                        encodings[*code as usize] = name(&v);
1254                    }
1255                }
1256                Encoding::new(encodings)
1257            }
1258            Self::Format1(ranges) => {
1259                let mut encodings = [NOTDEF; 256];
1260                for range in ranges {
1261                    for i in range.first..=range.first + range.n_left {
1262                        if let Some(v) = charsets
1263                            .resolve_sid(i)
1264                            .map(|sid| string_index.get(sid))
1265                            .transpose()?
1266                        {
1267                            encodings[i as usize] = name(&v);
1268                        }
1269                    }
1270                }
1271                Encoding::new(encodings)
1272            }
1273            Self::PredefinedStandard => predefined_encodings::STANDARD,
1274            Self::PredefinedExpert => predefined_encodings::EXPERT,
1275        })
1276    }
1277}
1278
1279/// Parses Encodings for Format0 and Format1, other predfined encodings are
1280/// handled by `TopDict::encodings()`.
1281///
1282/// First byte lower 7-bits to determinate Format0 or Format1.
1283///
1284/// If first byte is 0, then Format0, followed by nCodes (u8) and code (u8) array.
1285/// If first byte is 1, then Format1, followed by nRanges (u8) and EncodingRange array,
1286///
1287/// If first byte highest bit is 1, EncodingSuppliments exists after Format0 or Format 1.
1288/// EncodingSuppliments is a sequence of code (u8) and sid (u16) preceeded with `nSups` (u8),
1289/// which is the count of EncodingSuppliment.
1290fn encodings_parser<'a>()
1291-> impl Parser<&'a [u8], (Encodings, Option<Vec<EncodingSupplement>>), prescript::ParserError> {
1292    let mut format0 = length_take(be_u8).map(|v: &[u8]| Encodings::Format0(v.to_owned()));
1293    let mut format1 = length_repeat(
1294        be_u8,
1295        (be_u8, be_u8).map(|(first, n_left)| EncodingRange::new(first, n_left)),
1296    )
1297    .map(Encodings::Format1);
1298    let supplement_parser = (be_u8, be_u16).map(|(code, sid)| EncodingSupplement::new(code, sid));
1299    let mut supplements_parser = length_repeat(be_u8, supplement_parser).map(Some);
1300    dispatch! { be_u8::<_, prescript::ParserError>;
1301        0 => (format0.by_ref(), empty.value(None)),
1302        1 => (format1.by_ref(), empty.value(None)),
1303        0x80 => (format0.by_ref(),  supplements_parser.by_ref()),
1304        0x81 => (format1.by_ref(),  supplements_parser.by_ref()),
1305        _ => fail,
1306    }
1307}
1308
1309pub fn parse_fonts(buf: &[u8]) -> Result<(NameIndex<'_>, TopDictIndex<'_>, StringIndex<'_>)> {
1310    parse_ignore_rest(
1311        (
1312            name_index_parser(),
1313            top_dict_index_parser(),
1314            string_index_parser(),
1315        ),
1316        buf,
1317    )
1318    .context(ParseSnafu {
1319        message: "parse fonts file".to_owned(),
1320    })
1321}
1322
1323#[cfg(test)]
1324mod tests;