pdf/
font.rs

1use crate as pdf;
2use crate::object::*;
3use crate::primitive::*;
4use crate::error::*;
5use crate::encoding::Encoding;
6use std::collections::HashMap;
7use std::fmt::Write;
8use crate::parser::{Lexer, parse_with_lexer, ParseFlags};
9use std::convert::TryInto;
10use std::sync::Arc;
11use istring::SmallString;
12use datasize::DataSize;
13use itertools::Itertools;
14
15#[allow(non_upper_case_globals, dead_code)]
16mod flags {
17    pub const FixedPitch: u32    = 1 << 0;
18    pub const Serif: u32         = 1 << 1;
19    pub const Symbolic: u32      = 1 << 2;
20    pub const Script: u32        = 1 << 3;
21    pub const Nonsymbolic: u32   = 1 << 5;
22    pub const Italic: u32        = 1 << 6;
23    pub const AllCap: u32        = 1 << 16;
24    pub const SmallCap: u32      = 1 << 17;
25    pub const ForceBold: u32     = 1 << 18;
26}
27
28#[derive(Object, ObjectWrite, Debug, Copy, Clone, DataSize, DeepClone)]
29pub enum FontType {
30    Type0,
31    Type1,
32    MMType1,
33    Type3,
34    TrueType,
35    CIDFontType0, //Type1
36    CIDFontType2, // TrueType
37}
38
39#[derive(Debug, DataSize, DeepClone)]
40pub struct Font {
41    pub subtype: FontType,
42    pub name: Option<Name>,
43    pub data: FontData,
44
45    pub encoding: Option<Encoding>,
46
47    // FIXME: Should use RcRef<Stream>
48    pub to_unicode: Option<RcRef<Stream<()>>>,
49
50    /// other keys not mapped in other places. May change over time without notice, and adding things probably will break things. So don't expect this to be part of the stable API
51    pub _other: Dictionary
52}
53
54#[derive(Debug, DataSize, DeepClone)]
55pub enum FontData {
56    Type1(TFont),
57    Type0(Type0Font),
58    TrueType(TFont),
59    CIDFontType0(CIDFont),
60    CIDFontType2(CIDFont),
61    Other(Dictionary),
62}
63
64#[derive(Debug, DataSize, DeepClone)]
65pub enum CidToGidMap {
66    Identity,
67    Table(Vec<u16>)
68}
69impl Object for CidToGidMap {
70    fn from_primitive(p: Primitive, resolve: &impl Resolve) -> Result<Self> {
71        match p {
72            Primitive::Name(name) if name == "Identity" => {
73                Ok(CidToGidMap::Identity)
74            }
75            p @ Primitive::Stream(_) | p @ Primitive::Reference(_) => {
76                let stream: Stream<()> = Stream::from_primitive(p, resolve)?;
77                let data = stream.data(resolve)?;
78                Ok(CidToGidMap::Table(data.chunks_exact(2).map(|c| (c[0] as u16) << 8 | c[1] as u16).collect()))
79            },
80            p => Err(PdfError::UnexpectedPrimitive {
81                expected: "/Identity or Stream",
82                found: p.get_debug_name()
83            })
84        }
85    }
86}
87impl ObjectWrite for CidToGidMap {
88    fn to_primitive(&self, update: &mut impl Updater) -> Result<Primitive> {
89        match self {
90            CidToGidMap::Identity => Ok(Name::from("Identity").into()),
91            CidToGidMap::Table(ref table) => {
92                let mut data = Vec::with_capacity(table.len() * 2);
93                data.extend(table.iter().flat_map(|&v| <[u8; 2]>::into_iter(v.to_be_bytes())));
94                Stream::new((), data).to_primitive(update)
95            }
96        }
97    }
98}
99
100impl Object for Font {
101    fn from_primitive(p: Primitive, resolve: &impl Resolve) -> Result<Self> {
102        let mut dict = p.resolve(resolve)?.into_dictionary()?;
103
104        let subtype = t!(FontType::from_primitive(dict.require("Font", "Subtype")?, resolve));
105
106        // BaseFont is required for all FontTypes except Type3
107        dict.expect("Font", "Type", "Font", true)?;
108        let base_font_primitive = dict.get("BaseFont");
109        let base_font = match (base_font_primitive, subtype) {
110            (Some(name), _) => Some(t!(t!(name.clone().resolve(resolve)).into_name(), name)),
111            (None, FontType::Type3) => None,
112            (_, _) => return Err(PdfError::MissingEntry {
113                typ: "Font",
114                field: "BaseFont".to_string()
115            })
116        };
117
118        let encoding = dict.remove("Encoding").map(|p| Object::from_primitive(p, resolve)).transpose()?;
119
120        let to_unicode = match dict.remove("ToUnicode") {
121            Some(p) => Some(Object::from_primitive(p, resolve)?),
122            None => None
123        };
124        let _other = dict.clone();
125        let data = match subtype {
126            FontType::Type0 => FontData::Type0(Type0Font::from_dict(dict, resolve)?),
127            FontType::Type1 => FontData::Type1(TFont::from_dict(dict, resolve)?),
128            FontType::TrueType => FontData::TrueType(TFont::from_dict(dict, resolve)?),
129            FontType::CIDFontType0 => FontData::CIDFontType0(CIDFont::from_dict(dict, resolve)?),
130            FontType::CIDFontType2 => FontData::CIDFontType2(CIDFont::from_dict(dict, resolve)?),
131            _ => FontData::Other(dict)
132        };
133        
134        Ok(Font {
135            subtype,
136            name: base_font,
137            data,
138            encoding,
139            to_unicode,
140            _other
141        })
142    }
143}
144impl ObjectWrite for Font {
145    fn to_primitive(&self, update: &mut impl Updater) -> Result<Primitive> {
146        let mut dict = match self.data {
147            FontData::CIDFontType0(ref d) | FontData::CIDFontType2(ref d) => d.to_dict(update)?,
148            FontData::TrueType(ref d) | FontData::Type1(ref d) => d.to_dict(update)?,
149            FontData::Type0(ref d) => d.to_dict(update)?,
150            FontData::Other(ref dict) => dict.clone(),
151        };
152        
153        if let Some(ref to_unicode) = self.to_unicode {
154            dict.insert("ToUnicode", to_unicode.to_primitive(update)?);
155        }
156        if let Some(ref encoding) = self.encoding {
157            dict.insert("Encoding", encoding.to_primitive(update)?);
158        }
159        if let Some(ref name) = self.name {
160            dict.insert("BaseFont", name.to_primitive(update)?);
161        }
162
163        let subtype = match self.data {
164            FontData::Type0(_) => FontType::Type0,
165            FontData::Type1(_) => FontType::Type1,
166            FontData::TrueType(_) => FontType::TrueType,
167            FontData::CIDFontType0(_) => FontType::CIDFontType0,
168            FontData::CIDFontType2(_) => FontType::CIDFontType2,
169            FontData::Other(_) => bail!("unimplemented")
170        };
171        dict.insert("Subtype", subtype.to_primitive(update)?);
172        dict.insert("Type", Name::from("Font"));
173
174        Ok(Primitive::Dictionary(dict))
175    }
176}
177
178
179#[derive(Debug)]
180pub struct Widths {
181    values: Vec<f32>,
182    default: f32,
183    first_char: usize
184}
185impl Widths {
186    pub fn get(&self, cid: usize) -> f32 {
187        if cid < self.first_char {
188            self.default
189        } else {
190            self.values.get(cid - self.first_char).cloned().unwrap_or(self.default)
191        }
192    }
193    fn new(default: f32) -> Widths {
194        Widths {
195            default,
196            values: Vec::new(),
197            first_char: 0
198        }
199    }
200    fn ensure_cid(&mut self, cid: usize) {
201        if let Some(offset) = cid.checked_sub(self.first_char) { // cid may be < first_char
202            // reserve difference of offset to capacity
203            // if enough capacity to cover offset, saturates to zero, and reserve will do nothing
204            self.values.reserve(offset.saturating_sub(self.values.capacity()));
205        }
206    }
207    #[allow(clippy::float_cmp)]  // TODO
208    fn set(&mut self, cid: usize, width: f32) {
209        self._set(cid, width);
210        debug_assert_eq!(self.get(cid), width);
211    }
212    fn _set(&mut self, cid: usize, width: f32) {
213        use std::iter::repeat;
214
215        if self.values.is_empty() {
216            self.first_char = cid;
217            self.values.push(width);
218            return;
219        }
220
221        if cid == self.first_char + self.values.len() {
222            self.values.push(width);
223            return;
224        }
225
226        if cid < self.first_char {
227            self.values.splice(0 .. 0, repeat(self.default).take(self.first_char - cid));
228            self.first_char = cid;
229            self.values[0] = width;
230            return;
231        }
232
233        if cid > self.values.len() + self.first_char {
234            self.ensure_cid(cid);
235            self.values.extend(repeat(self.default).take(cid - self.first_char - self.values.len()));
236            self.values.push(width);
237            return;
238        }
239
240        self.values[cid - self.first_char] = width;
241    }
242}
243impl Font {
244    pub fn embedded_data(&self, resolve: &impl Resolve) -> Option<Result<Arc<[u8]>>> {
245        match self.data {
246            FontData::Type0(ref t) => t.descendant_fonts.get(0).and_then(|f| f.embedded_data(resolve)),
247            FontData::CIDFontType0(ref c) | FontData::CIDFontType2(ref c) => c.font_descriptor.data(resolve),
248            FontData::Type1(ref t) | FontData::TrueType(ref t) => t.font_descriptor.as_ref().and_then(|d| d.data(resolve)),
249            _ => None
250        }
251    }
252    pub fn is_cid(&self) -> bool {
253        matches!(self.data, FontData::Type0(_) | FontData::CIDFontType0(_) | FontData::CIDFontType2(_))
254    }
255    pub fn cid_to_gid_map(&self) -> Option<&CidToGidMap> {
256        match self.data {
257            FontData::Type0(ref inner) => inner.descendant_fonts.get(0).and_then(|f| f.cid_to_gid_map()),
258            FontData::CIDFontType0(ref f) | FontData::CIDFontType2(ref f) => f.cid_to_gid_map.as_ref(),
259            _ => None
260        }
261    }
262    pub fn encoding(&self) -> Option<&Encoding> {
263        self.encoding.as_ref()
264    }
265    pub fn info(&self) -> Option<&TFont> {
266        match self.data {
267            FontData::Type1(ref info) => Some(info),
268            FontData::TrueType(ref info) => Some(info),
269            _ => None
270        }
271    }
272    pub fn widths(&self, resolve: &impl Resolve) -> Result<Option<Widths>> {
273        match self.data {
274            FontData::Type0(ref t0) => t0.descendant_fonts[0].widths(resolve),
275            FontData::Type1(ref info) | FontData::TrueType(ref info) => {
276                match *info {
277                    TFont { first_char: Some(first), ref widths, .. } => Ok(Some(Widths {
278                        default: 0.0,
279                        first_char: first as usize,
280                        values: widths.as_ref().cloned().unwrap_or_default()
281                    })),
282                    _ => Ok(None)
283                }
284            },
285            FontData::CIDFontType0(ref cid) | FontData::CIDFontType2(ref cid) => {
286                let mut widths = Widths::new(cid.default_width);
287                let mut iter = cid.widths.iter();
288                while let Some(p) = iter.next() {
289                    let c1 = p.as_usize()?;
290                    match iter.next() {
291                        Some(Primitive::Array(array)) => {
292                            widths.ensure_cid(c1 + array.len() - 1);
293                            for (i, w) in array.iter().enumerate() {
294                                widths.set(c1 + i, w.as_number()?);
295                            }
296                        },
297                        Some(&Primitive::Reference(r)) => {
298                            match resolve.resolve(r)? {
299                                Primitive::Array(array) => {
300                                    widths.ensure_cid(c1 + array.len() - 1);
301                                    for (i, w) in array.iter().enumerate() {
302                                        widths.set(c1 + i, w.as_number()?);
303                                    }
304                                }
305                                p => return Err(PdfError::Other { msg: format!("unexpected primitive in W array: {:?}", p) })
306                            }
307                        }
308                        Some(&Primitive::Integer(c2)) => {
309                            let w = try_opt!(iter.next()).as_number()?;
310                            for c in c1 ..= (c2 as usize) {
311                                widths.set(c, w);
312                            }
313                        },
314                        p => return Err(PdfError::Other { msg: format!("unexpected primitive in W array: {:?}", p) })
315                    }
316                }
317                Ok(Some(widths))
318            },
319            _ => Ok(None)
320        }
321    }
322    pub fn to_unicode(&self, resolve: &impl Resolve) -> Option<Result<ToUnicodeMap>> {
323        self.to_unicode.as_ref().map(|s| (**s).data(resolve).and_then(|d| parse_cmap(&d)))
324    }
325}
326#[derive(Object, ObjectWrite, Debug, DataSize, DeepClone)]
327pub struct TFont {
328    #[pdf(key="BaseFont")]
329    pub base_font: Option<Name>,
330
331    /// per spec required, but some files lack it.
332    #[pdf(key="FirstChar")]
333    pub first_char: Option<i32>,
334
335    /// same
336    #[pdf(key="LastChar")]
337    pub last_char: Option<i32>,
338
339    #[pdf(key="Widths")]
340    pub widths: Option<Vec<f32>>,
341
342    #[pdf(key="FontDescriptor")]
343    pub font_descriptor: Option<FontDescriptor>
344}
345
346#[derive(Object, ObjectWrite, Debug, DataSize, DeepClone)]
347pub struct Type0Font {
348    #[pdf(key="DescendantFonts")]
349    pub descendant_fonts: Vec<MaybeRef<Font>>,
350
351    #[pdf(key="ToUnicode")]
352    pub to_unicode: Option<RcRef<Stream<()>>>,
353}
354
355#[derive(Object, ObjectWrite, Debug, DataSize, DeepClone)]
356pub struct CIDFont {
357    #[pdf(key="CIDSystemInfo")]
358    pub system_info: Dictionary,
359
360    #[pdf(key="FontDescriptor")]
361    pub font_descriptor: FontDescriptor,
362
363    #[pdf(key="DW", default="1000.")]
364    pub default_width: f32,
365
366    #[pdf(key="W")]
367    pub widths: Vec<Primitive>,
368
369    #[pdf(key="CIDToGIDMap")]
370    pub cid_to_gid_map: Option<CidToGidMap>,
371
372    #[pdf(other)]
373    pub _other: Dictionary
374}
375
376
377#[derive(Object, ObjectWrite, Debug, DataSize, DeepClone)]
378pub struct FontDescriptor {
379    #[pdf(key="FontName")]
380    pub font_name: Name,
381
382    #[pdf(key="FontFamily")]
383    pub font_family: Option<PdfString>,
384
385    #[pdf(key="FontStretch")]
386    pub font_stretch: Option<FontStretch>,
387
388    #[pdf(key="FontWeight")]
389    pub font_weight: Option<f32>,
390
391    #[pdf(key="Flags")]
392    pub flags: u32,
393
394    #[pdf(key="FontBBox")]
395    pub font_bbox: Rectangle,
396
397    #[pdf(key="ItalicAngle")]
398    pub italic_angle: f32,
399
400    // required as per spec, but still missing in some cases
401    #[pdf(key="Ascent")]
402    pub ascent: Option<f32>,
403
404    #[pdf(key="Descent")]
405    pub descent: Option<f32>,
406
407    #[pdf(key="Leading", default="0.")]
408    pub leading: f32,
409
410    #[pdf(key="CapHeight")]
411    pub cap_height: Option<f32>,
412
413    #[pdf(key="XHeight", default="0.")]
414    pub xheight: f32,
415
416    #[pdf(key="StemV", default="0.")]
417    pub stem_v: f32,
418
419    #[pdf(key="StemH", default="0.")]
420    pub stem_h: f32,
421
422    #[pdf(key="AvgWidth", default="0.")]
423    pub avg_width: f32,
424
425    #[pdf(key="MaxWidth", default="0.")]
426    pub max_width: f32,
427
428    #[pdf(key="MissingWidth", default="0.")]
429    pub missing_width: f32,
430
431    #[pdf(key="FontFile")]
432    pub font_file: Option<RcRef<Stream<()>>>,
433
434    #[pdf(key="FontFile2")]
435    pub font_file2: Option<RcRef<Stream<()>>>,
436
437    #[pdf(key="FontFile3")]
438    pub font_file3: Option<RcRef<Stream<FontStream3>>>,
439
440    #[pdf(key="CharSet")]
441    pub char_set: Option<PdfString>
442}
443impl FontDescriptor {
444    pub fn data(&self, resolve: &impl Resolve) -> Option<Result<Arc<[u8]>>> {
445        if let Some(ref s) = self.font_file {
446            Some((**s).data(resolve))
447        } else if let Some(ref s) = self.font_file2 {
448            Some((**s).data(resolve))
449        } else if let Some(ref s) = self.font_file3 {
450            Some((**s).data(resolve))
451        } else {
452            None
453        }
454    }
455}
456
457#[derive(Object, ObjectWrite, Debug, Clone, DataSize, DeepClone)]
458#[pdf(key="Subtype")]
459pub enum FontTypeExt {
460    Type1C,
461    CIDFontType0C,
462    OpenType
463}
464#[derive(Object, ObjectWrite, Debug, Clone, DataSize, DeepClone)]
465pub struct FontStream3 {
466    #[pdf(key="Subtype")]
467    pub subtype: FontTypeExt
468}
469
470#[derive(Object, ObjectWrite, Debug, PartialEq, Eq, PartialOrd, Ord, Clone, DataSize, DeepClone)]
471pub enum FontStretch {
472    UltraCondensed,
473    ExtraCondensed,
474    Condensed,
475    SemiCondensed,
476    Normal,
477    SemiExpanded,
478    Expanded,
479    ExtraExpanded,
480    UltraExpanded
481}
482
483#[derive(Clone, Debug, Default)]
484pub struct ToUnicodeMap {
485    // todo: reduce allocations
486    inner: HashMap<u16, SmallString>
487}
488impl ToUnicodeMap {
489    pub fn new() -> Self {
490        Self::default()
491    }
492    /// Create a new ToUnicodeMap from key/value pairs.
493    ///
494    /// subject to change
495    pub fn create(iter: impl Iterator<Item=(u16, SmallString)>) -> Self {
496        ToUnicodeMap { inner: iter.collect() }
497    }
498    pub fn get(&self, gid: u16) -> Option<&str> {
499        self.inner.get(&gid).map(|s| s.as_str())
500    }
501    pub fn insert(&mut self, gid: u16, unicode: SmallString) {
502        self.inner.insert(gid, unicode);
503    }
504    pub fn iter(&self) -> impl Iterator<Item=(u16, &str)> {
505        self.inner.iter().map(|(&gid, unicode)| (gid, unicode.as_str()))
506    }
507    pub fn len(&self) -> usize {
508        self.inner.len()
509    }
510    pub fn is_empty(&self) -> bool {
511        self.inner.is_empty()
512    }
513}
514
515/// helper function to decode UTF-16-BE data
516/// takes a slice of u8 and returns an iterator for char or an decoding error
517pub fn utf16be_to_char(
518    data: &[u8],
519) -> impl Iterator<Item = std::result::Result<char, std::char::DecodeUtf16Error>> + '_ {
520    char::decode_utf16(data.chunks_exact(2).map(|w| u16::from_be_bytes([w[0], w[1]])))
521}
522/// converts UTF16-BE to a string replacing illegal/unknown characters
523pub fn utf16be_to_string_lossy(data: &[u8]) -> String {
524    utf16be_to_char(data)
525        .map(|r| r.unwrap_or(std::char::REPLACEMENT_CHARACTER))
526        .collect()
527}
528/// converts UTF16-BE to a string errors out in illegal/unknonw characters
529pub fn utf16be_to_string(data: &[u8]) -> pdf::error::Result<SmallString> {
530    utf16be_to_char(data)
531        .map(|r| r.map_err(|_| PdfError::Utf16Decode))
532        .collect()
533}
534fn parse_cid(s: &PdfString) -> Result<u16> {
535    let b = s.as_bytes();
536    match b.len() {
537        2 => Ok(u16::from_be_bytes(b.try_into().unwrap())),
538        1 => Ok(b[0] as u16),
539        _ => Err(PdfError::CidDecode),
540    }
541}
542fn parse_cmap(data: &[u8]) -> Result<ToUnicodeMap> {
543    let mut lexer = Lexer::new(data);
544    let mut map = ToUnicodeMap::new();
545    while let Ok(substr) = lexer.next() {
546        match substr.as_slice() {
547            b"beginbfchar" => loop {
548                let a = parse_with_lexer(&mut lexer, &NoResolve, ParseFlags::STRING);
549                if a.is_err() {
550                    break;
551                }
552                let b = parse_with_lexer(&mut lexer, &NoResolve, ParseFlags::STRING);
553                match (a, b) {
554                    (Ok(Primitive::String(cid_data)), Ok(Primitive::String(unicode_data))) => {
555                        let cid = parse_cid(&cid_data)?;
556                        let bytes = unicode_data.as_bytes();
557                        match utf16be_to_string(bytes) {
558                            Ok(unicode) => map.insert(cid, unicode),
559                            Err(_) => warn!("invalid unicode for cid {cid} {bytes:?}"),
560                        }
561                    }
562                    _ => break,
563                }
564            },
565            b"beginbfrange" => loop {
566                let a = parse_with_lexer(&mut lexer, &NoResolve, ParseFlags::STRING);
567                if a.is_err() {
568                    break;
569                }
570                let b = parse_with_lexer(&mut lexer, &NoResolve, ParseFlags::STRING);
571                let c = parse_with_lexer(&mut lexer, &NoResolve, ParseFlags::STRING | ParseFlags::ARRAY);
572                match (a, b, c) {
573                    (
574                        Ok(Primitive::String(cid_start_data)),
575                        Ok(Primitive::String(cid_end_data)),
576                        Ok(Primitive::String(unicode_data)),
577                    ) if unicode_data.data.len() > 0 => {
578                        let cid_start = parse_cid(&cid_start_data)?;
579                        let cid_end = parse_cid(&cid_end_data)?;
580                        let mut unicode_data = unicode_data.into_bytes();
581
582                        for cid in cid_start..=cid_end {
583                            match utf16be_to_string(&unicode_data) {
584                                Ok(unicode) => map.insert(cid, unicode),
585                                Err(_) => warn!("invalid unicode for cid {cid} {unicode_data:?}"),
586                            }
587                            let last = unicode_data.last_mut().unwrap();
588                            if *last < 255 {
589                                *last += 1;
590                            } else {
591                                break;
592                            }
593                        }
594                    }
595                    (
596                        Ok(Primitive::String(cid_start_data)),
597                        Ok(Primitive::String(cid_end_data)),
598                        Ok(Primitive::Array(unicode_data_arr)),
599                    ) => {
600                        let cid_start = parse_cid(&cid_start_data)?;
601                        let cid_end = parse_cid(&cid_end_data)?;
602
603                        for (cid, unicode_data) in (cid_start..=cid_end).zip(unicode_data_arr) {
604                            let bytes = unicode_data.as_string()?.as_bytes();
605                            match utf16be_to_string(bytes) {
606                                Ok(unicode) => map.insert(cid, unicode),
607                                Err(_) => warn!("invalid unicode for cid {cid} {bytes:?}"),
608                            }
609                        }
610                    }
611                    _ => break,
612                }
613            },
614            b"endcmap" => break,
615            _ => {}
616        }
617    }
618
619    Ok(map)
620}
621
622fn write_cid(w: &mut String, cid: u16) {
623    write!(w, "<{:04X}>", cid).unwrap();
624}
625fn write_unicode(out: &mut String, unicode: &str) {
626    let mut buf = [0; 2];
627    write!(out, "<").unwrap();
628    for c in unicode.chars() {
629        let slice = c.encode_utf16(&mut buf);
630        for &word in slice.iter() {
631            write!(out, "{:04X}", word).unwrap();
632        }
633    }
634    write!(out, ">").unwrap();
635}
636pub fn write_cmap(map: &ToUnicodeMap) -> String {
637    let mut buf = String::new();
638    let mut list: Vec<(u16, &str)> = map.inner.iter().map(|(&cid, s)| (cid, s.as_str())).collect();
639    list.sort();
640
641
642    let mut remaining = &list[..];
643    let blocks = std::iter::from_fn(move || {
644        if remaining.len() == 0 {
645            return None;
646        }
647        let first_cid = remaining[0].0;
648        let seq_len = remaining.iter().enumerate().take_while(|&(i, &(cid, _))| cid == first_cid + i as u16).count();
649        
650        let (block, tail) = remaining.split_at(seq_len);
651        remaining = tail;
652        Some(block)
653    });
654
655    for (single, group) in &blocks.group_by(|b| b.len() == 1) {
656        if single {
657            writeln!(buf, "beginbfchar").unwrap();
658            for block in group {
659                for &(cid, uni) in block {
660                    write_cid(&mut buf, cid);
661                    write!(buf, " ").unwrap();
662                    write_unicode(&mut buf, uni);
663                    writeln!(buf).unwrap();
664                }
665            }
666            writeln!(buf, "endbfchar").unwrap();
667        } else {
668            writeln!(buf, "beginbfrange").unwrap();
669            for block in group {
670                write_cid(&mut buf, block[0].0);
671                write!(buf, " ").unwrap();
672                write_cid(&mut buf, block.last().unwrap().0);
673                write!(buf, " [").unwrap();
674                for (i, &(_cid, u)) in block.iter().enumerate() {
675                    if i > 0 {
676                        write!(buf, ", ").unwrap();
677                    }
678                    write_unicode(&mut buf, u);
679                }
680                writeln!(buf, "]").unwrap();
681            }    
682            writeln!(buf, "endbfrange").unwrap();
683        }
684    }
685
686    buf
687}
688
689#[cfg(test)]
690mod tests {
691
692    use crate::font::{utf16be_to_string, utf16be_to_char, utf16be_to_string_lossy};
693    #[test]
694    fn utf16be_to_string_quick() {
695        let v = vec![0x20, 0x09];
696        let s = utf16be_to_string(&v);
697        assert_eq!(s.unwrap(), "\u{2009}");
698        assert!(!v.is_empty());
699    }
700
701    #[test]
702    fn test_to_char() {
703        // 𝄞mus<invalid>ic<invalid>
704        let v = [
705            0xD8, 0x34, 0xDD, 0x1E, 0x00, 0x6d, 0x00, 0x75, 0x00, 0x73, 0xDD, 0x1E, 0x00, 0x69, 0x00,
706            0x63, 0xD8, 0x34,
707        ];
708
709        assert_eq!(
710            utf16be_to_char(&v)
711                .map(|r| r.map_err(|e| e.unpaired_surrogate()))
712                .collect::<Vec<_>>(),
713            vec![
714                Ok('𝄞'),
715                Ok('m'),
716                Ok('u'),
717                Ok('s'),
718                Err(0xDD1E),
719                Ok('i'),
720                Ok('c'),
721                Err(0xD834)
722            ]
723        );
724
725        let mut lossy = String::from("𝄞mus");
726        lossy.push(std::char::REPLACEMENT_CHARACTER);
727        lossy.push('i');
728        lossy.push('c');
729        lossy.push(std::char::REPLACEMENT_CHARACTER);
730
731        let r = utf16be_to_string(&v);
732        if let Err(r) = r {
733            // FIXME: compare against PdfError::Utf16Decode variant
734            assert_eq!(r.to_string(), "UTF16 decode error");
735        }
736        assert_eq!(utf16be_to_string(&v[..8]).unwrap(), String::from("𝄞mu"));
737        assert_eq!(utf16be_to_string_lossy(&v), lossy);
738    }
739}