1use crate as pdf;
2use crate::object::*;
3use crate::primitive::*;
4use crate::error::*;
5use crate::encoding::Encoding;
6use std::collections::HashMap;
7use std::fmt::Write;
8use crate::parser::{Lexer, parse_with_lexer, ParseFlags};
9use std::convert::TryInto;
10use std::sync::Arc;
11use istring::SmallString;
12use datasize::DataSize;
13use itertools::Itertools;
14
15#[allow(non_upper_case_globals, dead_code)]
16mod flags {
17 pub const FixedPitch: u32 = 1 << 0;
18 pub const Serif: u32 = 1 << 1;
19 pub const Symbolic: u32 = 1 << 2;
20 pub const Script: u32 = 1 << 3;
21 pub const Nonsymbolic: u32 = 1 << 5;
22 pub const Italic: u32 = 1 << 6;
23 pub const AllCap: u32 = 1 << 16;
24 pub const SmallCap: u32 = 1 << 17;
25 pub const ForceBold: u32 = 1 << 18;
26}
27
28#[derive(Object, ObjectWrite, Debug, Copy, Clone, DataSize, DeepClone)]
29pub enum FontType {
30 Type0,
31 Type1,
32 MMType1,
33 Type3,
34 TrueType,
35 CIDFontType0, CIDFontType2, }
38
39#[derive(Debug, DataSize, DeepClone)]
40pub struct Font {
41 pub subtype: FontType,
42 pub name: Option<Name>,
43 pub data: FontData,
44
45 pub encoding: Option<Encoding>,
46
47 pub to_unicode: Option<RcRef<Stream<()>>>,
49
50 pub _other: Dictionary
52}
53
54#[derive(Debug, DataSize, DeepClone)]
55pub enum FontData {
56 Type1(TFont),
57 Type0(Type0Font),
58 TrueType(TFont),
59 CIDFontType0(CIDFont),
60 CIDFontType2(CIDFont),
61 Other(Dictionary),
62}
63
64#[derive(Debug, DataSize, DeepClone)]
65pub enum CidToGidMap {
66 Identity,
67 Table(Vec<u16>)
68}
69impl Object for CidToGidMap {
70 fn from_primitive(p: Primitive, resolve: &impl Resolve) -> Result<Self> {
71 match p {
72 Primitive::Name(name) if name == "Identity" => {
73 Ok(CidToGidMap::Identity)
74 }
75 p @ Primitive::Stream(_) | p @ Primitive::Reference(_) => {
76 let stream: Stream<()> = Stream::from_primitive(p, resolve)?;
77 let data = stream.data(resolve)?;
78 Ok(CidToGidMap::Table(data.chunks_exact(2).map(|c| (c[0] as u16) << 8 | c[1] as u16).collect()))
79 },
80 p => Err(PdfError::UnexpectedPrimitive {
81 expected: "/Identity or Stream",
82 found: p.get_debug_name()
83 })
84 }
85 }
86}
87impl ObjectWrite for CidToGidMap {
88 fn to_primitive(&self, update: &mut impl Updater) -> Result<Primitive> {
89 match self {
90 CidToGidMap::Identity => Ok(Name::from("Identity").into()),
91 CidToGidMap::Table(ref table) => {
92 let mut data = Vec::with_capacity(table.len() * 2);
93 data.extend(table.iter().flat_map(|&v| <[u8; 2]>::into_iter(v.to_be_bytes())));
94 Stream::new((), data).to_primitive(update)
95 }
96 }
97 }
98}
99
100impl Object for Font {
101 fn from_primitive(p: Primitive, resolve: &impl Resolve) -> Result<Self> {
102 let mut dict = p.resolve(resolve)?.into_dictionary()?;
103
104 let subtype = t!(FontType::from_primitive(dict.require("Font", "Subtype")?, resolve));
105
106 dict.expect("Font", "Type", "Font", true)?;
108 let base_font_primitive = dict.get("BaseFont");
109 let base_font = match (base_font_primitive, subtype) {
110 (Some(name), _) => Some(t!(t!(name.clone().resolve(resolve)).into_name(), name)),
111 (None, FontType::Type3) => None,
112 (_, _) => return Err(PdfError::MissingEntry {
113 typ: "Font",
114 field: "BaseFont".to_string()
115 })
116 };
117
118 let encoding = dict.remove("Encoding").map(|p| Object::from_primitive(p, resolve)).transpose()?;
119
120 let to_unicode = match dict.remove("ToUnicode") {
121 Some(p) => Some(Object::from_primitive(p, resolve)?),
122 None => None
123 };
124 let _other = dict.clone();
125 let data = match subtype {
126 FontType::Type0 => FontData::Type0(Type0Font::from_dict(dict, resolve)?),
127 FontType::Type1 => FontData::Type1(TFont::from_dict(dict, resolve)?),
128 FontType::TrueType => FontData::TrueType(TFont::from_dict(dict, resolve)?),
129 FontType::CIDFontType0 => FontData::CIDFontType0(CIDFont::from_dict(dict, resolve)?),
130 FontType::CIDFontType2 => FontData::CIDFontType2(CIDFont::from_dict(dict, resolve)?),
131 _ => FontData::Other(dict)
132 };
133
134 Ok(Font {
135 subtype,
136 name: base_font,
137 data,
138 encoding,
139 to_unicode,
140 _other
141 })
142 }
143}
144impl ObjectWrite for Font {
145 fn to_primitive(&self, update: &mut impl Updater) -> Result<Primitive> {
146 let mut dict = match self.data {
147 FontData::CIDFontType0(ref d) | FontData::CIDFontType2(ref d) => d.to_dict(update)?,
148 FontData::TrueType(ref d) | FontData::Type1(ref d) => d.to_dict(update)?,
149 FontData::Type0(ref d) => d.to_dict(update)?,
150 FontData::Other(ref dict) => dict.clone(),
151 };
152
153 if let Some(ref to_unicode) = self.to_unicode {
154 dict.insert("ToUnicode", to_unicode.to_primitive(update)?);
155 }
156 if let Some(ref encoding) = self.encoding {
157 dict.insert("Encoding", encoding.to_primitive(update)?);
158 }
159 if let Some(ref name) = self.name {
160 dict.insert("BaseFont", name.to_primitive(update)?);
161 }
162
163 let subtype = match self.data {
164 FontData::Type0(_) => FontType::Type0,
165 FontData::Type1(_) => FontType::Type1,
166 FontData::TrueType(_) => FontType::TrueType,
167 FontData::CIDFontType0(_) => FontType::CIDFontType0,
168 FontData::CIDFontType2(_) => FontType::CIDFontType2,
169 FontData::Other(_) => bail!("unimplemented")
170 };
171 dict.insert("Subtype", subtype.to_primitive(update)?);
172 dict.insert("Type", Name::from("Font"));
173
174 Ok(Primitive::Dictionary(dict))
175 }
176}
177
178
179#[derive(Debug)]
180pub struct Widths {
181 values: Vec<f32>,
182 default: f32,
183 first_char: usize
184}
185impl Widths {
186 pub fn get(&self, cid: usize) -> f32 {
187 if cid < self.first_char {
188 self.default
189 } else {
190 self.values.get(cid - self.first_char).cloned().unwrap_or(self.default)
191 }
192 }
193 fn new(default: f32) -> Widths {
194 Widths {
195 default,
196 values: Vec::new(),
197 first_char: 0
198 }
199 }
200 fn ensure_cid(&mut self, cid: usize) {
201 if let Some(offset) = cid.checked_sub(self.first_char) { self.values.reserve(offset.saturating_sub(self.values.capacity()));
205 }
206 }
207 #[allow(clippy::float_cmp)] fn set(&mut self, cid: usize, width: f32) {
209 self._set(cid, width);
210 debug_assert_eq!(self.get(cid), width);
211 }
212 fn _set(&mut self, cid: usize, width: f32) {
213 use std::iter::repeat;
214
215 if self.values.is_empty() {
216 self.first_char = cid;
217 self.values.push(width);
218 return;
219 }
220
221 if cid == self.first_char + self.values.len() {
222 self.values.push(width);
223 return;
224 }
225
226 if cid < self.first_char {
227 self.values.splice(0 .. 0, repeat(self.default).take(self.first_char - cid));
228 self.first_char = cid;
229 self.values[0] = width;
230 return;
231 }
232
233 if cid > self.values.len() + self.first_char {
234 self.ensure_cid(cid);
235 self.values.extend(repeat(self.default).take(cid - self.first_char - self.values.len()));
236 self.values.push(width);
237 return;
238 }
239
240 self.values[cid - self.first_char] = width;
241 }
242}
243impl Font {
244 pub fn embedded_data(&self, resolve: &impl Resolve) -> Option<Result<Arc<[u8]>>> {
245 match self.data {
246 FontData::Type0(ref t) => t.descendant_fonts.get(0).and_then(|f| f.embedded_data(resolve)),
247 FontData::CIDFontType0(ref c) | FontData::CIDFontType2(ref c) => c.font_descriptor.data(resolve),
248 FontData::Type1(ref t) | FontData::TrueType(ref t) => t.font_descriptor.as_ref().and_then(|d| d.data(resolve)),
249 _ => None
250 }
251 }
252 pub fn is_cid(&self) -> bool {
253 matches!(self.data, FontData::Type0(_) | FontData::CIDFontType0(_) | FontData::CIDFontType2(_))
254 }
255 pub fn cid_to_gid_map(&self) -> Option<&CidToGidMap> {
256 match self.data {
257 FontData::Type0(ref inner) => inner.descendant_fonts.get(0).and_then(|f| f.cid_to_gid_map()),
258 FontData::CIDFontType0(ref f) | FontData::CIDFontType2(ref f) => f.cid_to_gid_map.as_ref(),
259 _ => None
260 }
261 }
262 pub fn encoding(&self) -> Option<&Encoding> {
263 self.encoding.as_ref()
264 }
265 pub fn info(&self) -> Option<&TFont> {
266 match self.data {
267 FontData::Type1(ref info) => Some(info),
268 FontData::TrueType(ref info) => Some(info),
269 _ => None
270 }
271 }
272 pub fn widths(&self, resolve: &impl Resolve) -> Result<Option<Widths>> {
273 match self.data {
274 FontData::Type0(ref t0) => t0.descendant_fonts[0].widths(resolve),
275 FontData::Type1(ref info) | FontData::TrueType(ref info) => {
276 match *info {
277 TFont { first_char: Some(first), ref widths, .. } => Ok(Some(Widths {
278 default: 0.0,
279 first_char: first as usize,
280 values: widths.as_ref().cloned().unwrap_or_default()
281 })),
282 _ => Ok(None)
283 }
284 },
285 FontData::CIDFontType0(ref cid) | FontData::CIDFontType2(ref cid) => {
286 let mut widths = Widths::new(cid.default_width);
287 let mut iter = cid.widths.iter();
288 while let Some(p) = iter.next() {
289 let c1 = p.as_usize()?;
290 match iter.next() {
291 Some(Primitive::Array(array)) => {
292 widths.ensure_cid(c1 + array.len() - 1);
293 for (i, w) in array.iter().enumerate() {
294 widths.set(c1 + i, w.as_number()?);
295 }
296 },
297 Some(&Primitive::Reference(r)) => {
298 match resolve.resolve(r)? {
299 Primitive::Array(array) => {
300 widths.ensure_cid(c1 + array.len() - 1);
301 for (i, w) in array.iter().enumerate() {
302 widths.set(c1 + i, w.as_number()?);
303 }
304 }
305 p => return Err(PdfError::Other { msg: format!("unexpected primitive in W array: {:?}", p) })
306 }
307 }
308 Some(&Primitive::Integer(c2)) => {
309 let w = try_opt!(iter.next()).as_number()?;
310 for c in c1 ..= (c2 as usize) {
311 widths.set(c, w);
312 }
313 },
314 p => return Err(PdfError::Other { msg: format!("unexpected primitive in W array: {:?}", p) })
315 }
316 }
317 Ok(Some(widths))
318 },
319 _ => Ok(None)
320 }
321 }
322 pub fn to_unicode(&self, resolve: &impl Resolve) -> Option<Result<ToUnicodeMap>> {
323 self.to_unicode.as_ref().map(|s| (**s).data(resolve).and_then(|d| parse_cmap(&d)))
324 }
325}
326#[derive(Object, ObjectWrite, Debug, DataSize, DeepClone)]
327pub struct TFont {
328 #[pdf(key="BaseFont")]
329 pub base_font: Option<Name>,
330
331 #[pdf(key="FirstChar")]
333 pub first_char: Option<i32>,
334
335 #[pdf(key="LastChar")]
337 pub last_char: Option<i32>,
338
339 #[pdf(key="Widths")]
340 pub widths: Option<Vec<f32>>,
341
342 #[pdf(key="FontDescriptor")]
343 pub font_descriptor: Option<FontDescriptor>
344}
345
346#[derive(Object, ObjectWrite, Debug, DataSize, DeepClone)]
347pub struct Type0Font {
348 #[pdf(key="DescendantFonts")]
349 pub descendant_fonts: Vec<MaybeRef<Font>>,
350
351 #[pdf(key="ToUnicode")]
352 pub to_unicode: Option<RcRef<Stream<()>>>,
353}
354
355#[derive(Object, ObjectWrite, Debug, DataSize, DeepClone)]
356pub struct CIDFont {
357 #[pdf(key="CIDSystemInfo")]
358 pub system_info: Dictionary,
359
360 #[pdf(key="FontDescriptor")]
361 pub font_descriptor: FontDescriptor,
362
363 #[pdf(key="DW", default="1000.")]
364 pub default_width: f32,
365
366 #[pdf(key="W")]
367 pub widths: Vec<Primitive>,
368
369 #[pdf(key="CIDToGIDMap")]
370 pub cid_to_gid_map: Option<CidToGidMap>,
371
372 #[pdf(other)]
373 pub _other: Dictionary
374}
375
376
377#[derive(Object, ObjectWrite, Debug, DataSize, DeepClone)]
378pub struct FontDescriptor {
379 #[pdf(key="FontName")]
380 pub font_name: Name,
381
382 #[pdf(key="FontFamily")]
383 pub font_family: Option<PdfString>,
384
385 #[pdf(key="FontStretch")]
386 pub font_stretch: Option<FontStretch>,
387
388 #[pdf(key="FontWeight")]
389 pub font_weight: Option<f32>,
390
391 #[pdf(key="Flags")]
392 pub flags: u32,
393
394 #[pdf(key="FontBBox")]
395 pub font_bbox: Rectangle,
396
397 #[pdf(key="ItalicAngle")]
398 pub italic_angle: f32,
399
400 #[pdf(key="Ascent")]
402 pub ascent: Option<f32>,
403
404 #[pdf(key="Descent")]
405 pub descent: Option<f32>,
406
407 #[pdf(key="Leading", default="0.")]
408 pub leading: f32,
409
410 #[pdf(key="CapHeight")]
411 pub cap_height: Option<f32>,
412
413 #[pdf(key="XHeight", default="0.")]
414 pub xheight: f32,
415
416 #[pdf(key="StemV", default="0.")]
417 pub stem_v: f32,
418
419 #[pdf(key="StemH", default="0.")]
420 pub stem_h: f32,
421
422 #[pdf(key="AvgWidth", default="0.")]
423 pub avg_width: f32,
424
425 #[pdf(key="MaxWidth", default="0.")]
426 pub max_width: f32,
427
428 #[pdf(key="MissingWidth", default="0.")]
429 pub missing_width: f32,
430
431 #[pdf(key="FontFile")]
432 pub font_file: Option<RcRef<Stream<()>>>,
433
434 #[pdf(key="FontFile2")]
435 pub font_file2: Option<RcRef<Stream<()>>>,
436
437 #[pdf(key="FontFile3")]
438 pub font_file3: Option<RcRef<Stream<FontStream3>>>,
439
440 #[pdf(key="CharSet")]
441 pub char_set: Option<PdfString>
442}
443impl FontDescriptor {
444 pub fn data(&self, resolve: &impl Resolve) -> Option<Result<Arc<[u8]>>> {
445 if let Some(ref s) = self.font_file {
446 Some((**s).data(resolve))
447 } else if let Some(ref s) = self.font_file2 {
448 Some((**s).data(resolve))
449 } else if let Some(ref s) = self.font_file3 {
450 Some((**s).data(resolve))
451 } else {
452 None
453 }
454 }
455}
456
457#[derive(Object, ObjectWrite, Debug, Clone, DataSize, DeepClone)]
458#[pdf(key="Subtype")]
459pub enum FontTypeExt {
460 Type1C,
461 CIDFontType0C,
462 OpenType
463}
464#[derive(Object, ObjectWrite, Debug, Clone, DataSize, DeepClone)]
465pub struct FontStream3 {
466 #[pdf(key="Subtype")]
467 pub subtype: FontTypeExt
468}
469
470#[derive(Object, ObjectWrite, Debug, PartialEq, Eq, PartialOrd, Ord, Clone, DataSize, DeepClone)]
471pub enum FontStretch {
472 UltraCondensed,
473 ExtraCondensed,
474 Condensed,
475 SemiCondensed,
476 Normal,
477 SemiExpanded,
478 Expanded,
479 ExtraExpanded,
480 UltraExpanded
481}
482
483#[derive(Clone, Debug, Default)]
484pub struct ToUnicodeMap {
485 inner: HashMap<u16, SmallString>
487}
488impl ToUnicodeMap {
489 pub fn new() -> Self {
490 Self::default()
491 }
492 pub fn create(iter: impl Iterator<Item=(u16, SmallString)>) -> Self {
496 ToUnicodeMap { inner: iter.collect() }
497 }
498 pub fn get(&self, gid: u16) -> Option<&str> {
499 self.inner.get(&gid).map(|s| s.as_str())
500 }
501 pub fn insert(&mut self, gid: u16, unicode: SmallString) {
502 self.inner.insert(gid, unicode);
503 }
504 pub fn iter(&self) -> impl Iterator<Item=(u16, &str)> {
505 self.inner.iter().map(|(&gid, unicode)| (gid, unicode.as_str()))
506 }
507 pub fn len(&self) -> usize {
508 self.inner.len()
509 }
510 pub fn is_empty(&self) -> bool {
511 self.inner.is_empty()
512 }
513}
514
515pub fn utf16be_to_char(
518 data: &[u8],
519) -> impl Iterator<Item = std::result::Result<char, std::char::DecodeUtf16Error>> + '_ {
520 char::decode_utf16(data.chunks_exact(2).map(|w| u16::from_be_bytes([w[0], w[1]])))
521}
522pub fn utf16be_to_string_lossy(data: &[u8]) -> String {
524 utf16be_to_char(data)
525 .map(|r| r.unwrap_or(std::char::REPLACEMENT_CHARACTER))
526 .collect()
527}
528pub fn utf16be_to_string(data: &[u8]) -> pdf::error::Result<SmallString> {
530 utf16be_to_char(data)
531 .map(|r| r.map_err(|_| PdfError::Utf16Decode))
532 .collect()
533}
534fn parse_cid(s: &PdfString) -> Result<u16> {
535 let b = s.as_bytes();
536 match b.len() {
537 2 => Ok(u16::from_be_bytes(b.try_into().unwrap())),
538 1 => Ok(b[0] as u16),
539 _ => Err(PdfError::CidDecode),
540 }
541}
542fn parse_cmap(data: &[u8]) -> Result<ToUnicodeMap> {
543 let mut lexer = Lexer::new(data);
544 let mut map = ToUnicodeMap::new();
545 while let Ok(substr) = lexer.next() {
546 match substr.as_slice() {
547 b"beginbfchar" => loop {
548 let a = parse_with_lexer(&mut lexer, &NoResolve, ParseFlags::STRING);
549 if a.is_err() {
550 break;
551 }
552 let b = parse_with_lexer(&mut lexer, &NoResolve, ParseFlags::STRING);
553 match (a, b) {
554 (Ok(Primitive::String(cid_data)), Ok(Primitive::String(unicode_data))) => {
555 let cid = parse_cid(&cid_data)?;
556 let bytes = unicode_data.as_bytes();
557 match utf16be_to_string(bytes) {
558 Ok(unicode) => map.insert(cid, unicode),
559 Err(_) => warn!("invalid unicode for cid {cid} {bytes:?}"),
560 }
561 }
562 _ => break,
563 }
564 },
565 b"beginbfrange" => loop {
566 let a = parse_with_lexer(&mut lexer, &NoResolve, ParseFlags::STRING);
567 if a.is_err() {
568 break;
569 }
570 let b = parse_with_lexer(&mut lexer, &NoResolve, ParseFlags::STRING);
571 let c = parse_with_lexer(&mut lexer, &NoResolve, ParseFlags::STRING | ParseFlags::ARRAY);
572 match (a, b, c) {
573 (
574 Ok(Primitive::String(cid_start_data)),
575 Ok(Primitive::String(cid_end_data)),
576 Ok(Primitive::String(unicode_data)),
577 ) if unicode_data.data.len() > 0 => {
578 let cid_start = parse_cid(&cid_start_data)?;
579 let cid_end = parse_cid(&cid_end_data)?;
580 let mut unicode_data = unicode_data.into_bytes();
581
582 for cid in cid_start..=cid_end {
583 match utf16be_to_string(&unicode_data) {
584 Ok(unicode) => map.insert(cid, unicode),
585 Err(_) => warn!("invalid unicode for cid {cid} {unicode_data:?}"),
586 }
587 let last = unicode_data.last_mut().unwrap();
588 if *last < 255 {
589 *last += 1;
590 } else {
591 break;
592 }
593 }
594 }
595 (
596 Ok(Primitive::String(cid_start_data)),
597 Ok(Primitive::String(cid_end_data)),
598 Ok(Primitive::Array(unicode_data_arr)),
599 ) => {
600 let cid_start = parse_cid(&cid_start_data)?;
601 let cid_end = parse_cid(&cid_end_data)?;
602
603 for (cid, unicode_data) in (cid_start..=cid_end).zip(unicode_data_arr) {
604 let bytes = unicode_data.as_string()?.as_bytes();
605 match utf16be_to_string(bytes) {
606 Ok(unicode) => map.insert(cid, unicode),
607 Err(_) => warn!("invalid unicode for cid {cid} {bytes:?}"),
608 }
609 }
610 }
611 _ => break,
612 }
613 },
614 b"endcmap" => break,
615 _ => {}
616 }
617 }
618
619 Ok(map)
620}
621
622fn write_cid(w: &mut String, cid: u16) {
623 write!(w, "<{:04X}>", cid).unwrap();
624}
625fn write_unicode(out: &mut String, unicode: &str) {
626 let mut buf = [0; 2];
627 write!(out, "<").unwrap();
628 for c in unicode.chars() {
629 let slice = c.encode_utf16(&mut buf);
630 for &word in slice.iter() {
631 write!(out, "{:04X}", word).unwrap();
632 }
633 }
634 write!(out, ">").unwrap();
635}
636pub fn write_cmap(map: &ToUnicodeMap) -> String {
637 let mut buf = String::new();
638 let mut list: Vec<(u16, &str)> = map.inner.iter().map(|(&cid, s)| (cid, s.as_str())).collect();
639 list.sort();
640
641
642 let mut remaining = &list[..];
643 let blocks = std::iter::from_fn(move || {
644 if remaining.len() == 0 {
645 return None;
646 }
647 let first_cid = remaining[0].0;
648 let seq_len = remaining.iter().enumerate().take_while(|&(i, &(cid, _))| cid == first_cid + i as u16).count();
649
650 let (block, tail) = remaining.split_at(seq_len);
651 remaining = tail;
652 Some(block)
653 });
654
655 for (single, group) in &blocks.group_by(|b| b.len() == 1) {
656 if single {
657 writeln!(buf, "beginbfchar").unwrap();
658 for block in group {
659 for &(cid, uni) in block {
660 write_cid(&mut buf, cid);
661 write!(buf, " ").unwrap();
662 write_unicode(&mut buf, uni);
663 writeln!(buf).unwrap();
664 }
665 }
666 writeln!(buf, "endbfchar").unwrap();
667 } else {
668 writeln!(buf, "beginbfrange").unwrap();
669 for block in group {
670 write_cid(&mut buf, block[0].0);
671 write!(buf, " ").unwrap();
672 write_cid(&mut buf, block.last().unwrap().0);
673 write!(buf, " [").unwrap();
674 for (i, &(_cid, u)) in block.iter().enumerate() {
675 if i > 0 {
676 write!(buf, ", ").unwrap();
677 }
678 write_unicode(&mut buf, u);
679 }
680 writeln!(buf, "]").unwrap();
681 }
682 writeln!(buf, "endbfrange").unwrap();
683 }
684 }
685
686 buf
687}
688
689#[cfg(test)]
690mod tests {
691
692 use crate::font::{utf16be_to_string, utf16be_to_char, utf16be_to_string_lossy};
693 #[test]
694 fn utf16be_to_string_quick() {
695 let v = vec![0x20, 0x09];
696 let s = utf16be_to_string(&v);
697 assert_eq!(s.unwrap(), "\u{2009}");
698 assert!(!v.is_empty());
699 }
700
701 #[test]
702 fn test_to_char() {
703 let v = [
705 0xD8, 0x34, 0xDD, 0x1E, 0x00, 0x6d, 0x00, 0x75, 0x00, 0x73, 0xDD, 0x1E, 0x00, 0x69, 0x00,
706 0x63, 0xD8, 0x34,
707 ];
708
709 assert_eq!(
710 utf16be_to_char(&v)
711 .map(|r| r.map_err(|e| e.unpaired_surrogate()))
712 .collect::<Vec<_>>(),
713 vec![
714 Ok('𝄞'),
715 Ok('m'),
716 Ok('u'),
717 Ok('s'),
718 Err(0xDD1E),
719 Ok('i'),
720 Ok('c'),
721 Err(0xD834)
722 ]
723 );
724
725 let mut lossy = String::from("𝄞mus");
726 lossy.push(std::char::REPLACEMENT_CHARACTER);
727 lossy.push('i');
728 lossy.push('c');
729 lossy.push(std::char::REPLACEMENT_CHARACTER);
730
731 let r = utf16be_to_string(&v);
732 if let Err(r) = r {
733 assert_eq!(r.to_string(), "UTF16 decode error");
735 }
736 assert_eq!(utf16be_to_string(&v[..8]).unwrap(), String::from("𝄞mu"));
737 assert_eq!(utf16be_to_string_lossy(&v), lossy);
738 }
739}