1use std::collections::HashMap;
8
9use crate::error::BackendError;
10
11const DEFAULT_CID_WIDTH: f64 = 1000.0;
13
14const DEFAULT_CID_ASCENT: f64 = 880.0;
16
17const DEFAULT_CID_DESCENT: f64 = -120.0;
19
20#[derive(Debug, Clone, Copy, PartialEq, Eq)]
22pub enum CidFontType {
23 Type0,
25 Type2,
27}
28
29#[derive(Debug, Clone, PartialEq)]
31pub enum CidToGidMap {
32 Identity,
34 Explicit(Vec<u16>),
37}
38
39impl CidToGidMap {
40 pub fn map(&self, cid: u32) -> u32 {
42 match self {
43 CidToGidMap::Identity => cid,
44 CidToGidMap::Explicit(table) => {
45 if (cid as usize) < table.len() {
46 u32::from(table[cid as usize])
47 } else {
48 cid
49 }
50 }
51 }
52 }
53
54 pub fn from_stream(data: &[u8]) -> Self {
56 let mut table = Vec::with_capacity(data.len() / 2);
57 for chunk in data.chunks(2) {
58 if chunk.len() == 2 {
59 table.push(u16::from_be_bytes([chunk[0], chunk[1]]));
60 }
61 }
62 CidToGidMap::Explicit(table)
63 }
64}
65
66#[derive(Debug, Clone, PartialEq)]
68pub struct CidSystemInfo {
69 pub registry: String,
71 pub ordering: String,
73 pub supplement: i64,
75}
76
77impl CidSystemInfo {
78 pub fn is_adobe_cjk(&self) -> bool {
80 self.registry == "Adobe"
81 && matches!(self.ordering.as_str(), "Japan1" | "GB1" | "CNS1" | "Korea1")
82 }
83}
84
85#[derive(Debug, Clone)]
92pub struct CidFontMetrics {
93 widths: HashMap<u32, f64>,
95 default_width: f64,
97 ascent: f64,
99 descent: f64,
101 font_bbox: Option<[f64; 4]>,
103 font_type: CidFontType,
105 cid_to_gid: CidToGidMap,
107 system_info: Option<CidSystemInfo>,
109}
110
111impl CidFontMetrics {
112 #[allow(clippy::too_many_arguments)]
114 pub fn new(
115 widths: HashMap<u32, f64>,
116 default_width: f64,
117 ascent: f64,
118 descent: f64,
119 font_bbox: Option<[f64; 4]>,
120 font_type: CidFontType,
121 cid_to_gid: CidToGidMap,
122 system_info: Option<CidSystemInfo>,
123 ) -> Self {
124 Self {
125 widths,
126 default_width,
127 ascent,
128 descent,
129 font_bbox,
130 font_type,
131 cid_to_gid,
132 system_info,
133 }
134 }
135
136 pub fn default_metrics() -> Self {
138 Self {
139 widths: HashMap::new(),
140 default_width: DEFAULT_CID_WIDTH,
141 ascent: DEFAULT_CID_ASCENT,
142 descent: DEFAULT_CID_DESCENT,
143 font_bbox: None,
144 font_type: CidFontType::Type2,
145 cid_to_gid: CidToGidMap::Identity,
146 system_info: None,
147 }
148 }
149
150 pub fn get_width(&self, cid: u32) -> f64 {
152 self.widths.get(&cid).copied().unwrap_or(self.default_width)
153 }
154
155 pub fn ascent(&self) -> f64 {
157 self.ascent
158 }
159
160 pub fn descent(&self) -> f64 {
162 self.descent
163 }
164
165 pub fn font_bbox(&self) -> Option<[f64; 4]> {
167 self.font_bbox
168 }
169
170 pub fn default_width(&self) -> f64 {
172 self.default_width
173 }
174
175 pub fn font_type(&self) -> CidFontType {
177 self.font_type
178 }
179
180 pub fn cid_to_gid(&self) -> &CidToGidMap {
182 &self.cid_to_gid
183 }
184
185 pub fn map_cid_to_gid(&self, cid: u32) -> u32 {
187 self.cid_to_gid.map(cid)
188 }
189
190 pub fn system_info(&self) -> Option<&CidSystemInfo> {
192 self.system_info.as_ref()
193 }
194}
195
196pub fn parse_w_array(objects: &[lopdf::Object], doc: &lopdf::Document) -> HashMap<u32, f64> {
206 let mut widths = HashMap::new();
207 let mut i = 0;
208
209 while i < objects.len() {
210 let cid_start = match object_to_u32(resolve_object(doc, &objects[i])) {
211 Some(v) => v,
212 None => {
213 i += 1;
214 continue;
215 }
216 };
217 i += 1;
218
219 if i >= objects.len() {
220 break;
221 }
222
223 let next = resolve_object(doc, &objects[i]);
224 if let Ok(arr) = next.as_array() {
225 for (j, obj) in arr.iter().enumerate() {
227 let obj = resolve_object(doc, obj);
228 if let Some(w) = object_to_f64(obj) {
229 widths.insert(cid_start + j as u32, w);
230 }
231 }
232 i += 1;
233 } else if let Some(cid_end) = object_to_u32(next) {
234 i += 1;
236 if i < objects.len() {
237 let w_obj = resolve_object(doc, &objects[i]);
238 if let Some(w) = object_to_f64(w_obj) {
239 for cid in cid_start..=cid_end {
240 widths.insert(cid, w);
241 }
242 }
243 i += 1;
244 }
245 } else {
246 i += 1;
247 }
248 }
249
250 widths
251}
252
253pub fn extract_cid_font_metrics(
255 doc: &lopdf::Document,
256 cid_font_dict: &lopdf::Dictionary,
257) -> Result<CidFontMetrics, BackendError> {
258 let font_type = cid_font_dict
260 .get(b"Subtype")
261 .ok()
262 .and_then(|o| o.as_name_str().ok())
263 .map(|s| match s {
264 "CIDFontType0" => CidFontType::Type0,
265 _ => CidFontType::Type2,
266 })
267 .unwrap_or(CidFontType::Type2);
268
269 let default_width = cid_font_dict
271 .get(b"DW")
272 .ok()
273 .and_then(|o| object_to_f64(resolve_object(doc, o)))
274 .unwrap_or(DEFAULT_CID_WIDTH);
275
276 let widths = cid_font_dict
278 .get(b"W")
279 .ok()
280 .map(|o| resolve_object(doc, o))
281 .and_then(|o| o.as_array().ok())
282 .map(|arr| parse_w_array(arr, doc))
283 .unwrap_or_default();
284
285 let cid_to_gid = parse_cid_to_gid_map(doc, cid_font_dict);
287
288 let system_info = parse_cid_system_info(doc, cid_font_dict);
290
291 let (ascent, descent, font_bbox) = parse_cid_font_descriptor(doc, cid_font_dict);
293
294 Ok(CidFontMetrics::new(
295 widths,
296 default_width,
297 ascent,
298 descent,
299 font_bbox,
300 font_type,
301 cid_to_gid,
302 system_info,
303 ))
304}
305
306fn parse_cid_to_gid_map(doc: &lopdf::Document, dict: &lopdf::Dictionary) -> CidToGidMap {
308 match dict.get(b"CIDToGIDMap") {
309 Ok(obj) => {
310 let obj = resolve_object(doc, obj);
311 if let Ok(name) = obj.as_name_str() {
312 if name == "Identity" {
313 return CidToGidMap::Identity;
314 }
315 }
316 if let Ok(stream) = obj.as_stream() {
317 let data = if stream.dict.get(b"Filter").is_ok() {
318 stream.decompressed_content().unwrap_or_default()
319 } else {
320 stream.content.clone()
321 };
322 return CidToGidMap::from_stream(&data);
323 }
324 CidToGidMap::Identity
325 }
326 Err(_) => CidToGidMap::Identity,
327 }
328}
329
330fn parse_cid_system_info(doc: &lopdf::Document, dict: &lopdf::Dictionary) -> Option<CidSystemInfo> {
332 let info_obj = dict.get(b"CIDSystemInfo").ok()?;
333 let info_obj = resolve_object(doc, info_obj);
334 let info_dict = info_obj.as_dict().ok()?;
335
336 let registry = info_dict
337 .get(b"Registry")
338 .ok()
339 .and_then(|o| match o {
340 lopdf::Object::String(s, _) => String::from_utf8(s.clone()).ok(),
341 _ => None,
342 })
343 .unwrap_or_default();
344
345 let ordering = info_dict
346 .get(b"Ordering")
347 .ok()
348 .and_then(|o| match o {
349 lopdf::Object::String(s, _) => String::from_utf8(s.clone()).ok(),
350 _ => None,
351 })
352 .unwrap_or_default();
353
354 let supplement = info_dict
355 .get(b"Supplement")
356 .ok()
357 .and_then(|o| o.as_i64().ok())
358 .unwrap_or(0);
359
360 Some(CidSystemInfo {
361 registry,
362 ordering,
363 supplement,
364 })
365}
366
367fn parse_cid_font_descriptor(
369 doc: &lopdf::Document,
370 dict: &lopdf::Dictionary,
371) -> (f64, f64, Option<[f64; 4]>) {
372 let desc = match dict
373 .get(b"FontDescriptor")
374 .ok()
375 .map(|o| resolve_object(doc, o))
376 .and_then(|o| o.as_dict().ok())
377 {
378 Some(d) => d,
379 None => return (DEFAULT_CID_ASCENT, DEFAULT_CID_DESCENT, None),
380 };
381
382 let ascent = desc
383 .get(b"Ascent")
384 .ok()
385 .and_then(object_to_f64)
386 .unwrap_or(DEFAULT_CID_ASCENT);
387
388 let descent = desc
389 .get(b"Descent")
390 .ok()
391 .and_then(object_to_f64)
392 .unwrap_or(DEFAULT_CID_DESCENT);
393
394 let font_bbox = desc
395 .get(b"FontBBox")
396 .ok()
397 .and_then(|o| {
398 let o = resolve_object(doc, o);
399 o.as_array().ok()
400 })
401 .and_then(|arr| {
402 if arr.len() == 4 {
403 let vals: Vec<f64> = arr.iter().filter_map(object_to_f64).collect();
404 if vals.len() == 4 {
405 Some([vals[0], vals[1], vals[2], vals[3]])
406 } else {
407 None
408 }
409 } else {
410 None
411 }
412 });
413
414 (ascent, descent, font_bbox)
415}
416
417fn resolve_object<'a>(doc: &'a lopdf::Document, obj: &'a lopdf::Object) -> &'a lopdf::Object {
419 match obj {
420 lopdf::Object::Reference(id) => doc.get_object(*id).unwrap_or(obj),
421 _ => obj,
422 }
423}
424
425fn object_to_f64(obj: &lopdf::Object) -> Option<f64> {
427 match obj {
428 lopdf::Object::Integer(i) => Some(*i as f64),
429 lopdf::Object::Real(f) => Some(*f as f64),
430 _ => None,
431 }
432}
433
434fn object_to_u32(obj: &lopdf::Object) -> Option<u32> {
436 match obj {
437 lopdf::Object::Integer(i) => Some(*i as u32),
438 lopdf::Object::Real(f) => Some(*f as u32),
439 _ => None,
440 }
441}
442
443#[derive(Debug, Clone, PartialEq)]
445pub struct PredefinedCMapInfo {
446 pub name: String,
448 pub registry: String,
450 pub ordering: String,
452 pub writing_mode: u8,
454 pub is_identity: bool,
456}
457
458pub fn parse_predefined_cmap_name(name: &str) -> Option<PredefinedCMapInfo> {
468 if name == "Identity-H" {
470 return Some(PredefinedCMapInfo {
471 name: name.to_string(),
472 registry: "Adobe".to_string(),
473 ordering: "Identity".to_string(),
474 writing_mode: 0,
475 is_identity: true,
476 });
477 }
478 if name == "Identity-V" {
479 return Some(PredefinedCMapInfo {
480 name: name.to_string(),
481 registry: "Adobe".to_string(),
482 ordering: "Identity".to_string(),
483 writing_mode: 1,
484 is_identity: true,
485 });
486 }
487
488 if let Some(rest) = name.strip_prefix("Adobe-") {
490 let (ordering, supplement) = if let Some(r) = rest.strip_prefix("Japan1-") {
491 ("Japan1".to_string(), r)
492 } else if let Some(r) = rest.strip_prefix("GB1-") {
493 ("GB1".to_string(), r)
494 } else if let Some(r) = rest.strip_prefix("CNS1-") {
495 ("CNS1".to_string(), r)
496 } else if let Some(r) = rest.strip_prefix("Korea1-") {
497 ("Korea1".to_string(), r)
498 } else {
499 return None;
500 };
501
502 if supplement.parse::<i32>().is_ok() {
504 return Some(PredefinedCMapInfo {
505 name: name.to_string(),
506 registry: "Adobe".to_string(),
507 ordering,
508 writing_mode: 0,
509 is_identity: false,
510 });
511 }
512 }
513
514 let (base, writing_mode) = if let Some(b) = name.strip_suffix("-H") {
516 (b, 0u8)
517 } else if let Some(b) = name.strip_suffix("-V") {
518 (b, 1u8)
519 } else {
520 return None;
521 };
522
523 let ordering = if base.contains("JIS")
525 || base.contains("Japan")
526 || base.contains("EUC-JP")
527 || base == "78-RKSJ"
528 || base == "83pv-RKSJ"
529 || base == "90pv-RKSJ"
530 || base == "90ms-RKSJ"
531 || base == "Hankaku"
532 || base == "Hiragana"
533 || base == "Katakana"
534 || base == "Roman"
535 || base == "WP-Symbol"
536 || base == "Add-RKSJ"
537 || base == "Ext-RKSJ"
538 {
539 "Japan1"
540 } else if base.contains("GB")
541 || base.contains("GBK")
542 || base.contains("GBpc")
543 || base.contains("GBT")
544 || base == "UniCNS-UCS2"
545 {
546 if base.starts_with("UniCNS") {
548 "CNS1"
549 } else {
550 "GB1"
551 }
552 } else if base.contains("CNS") || base.contains("ETen") || base.contains("HKscs") {
553 "CNS1"
554 } else if base.contains("KSC") || base.contains("KSCms") || base.contains("UniKS") {
555 "Korea1"
556 } else {
557 return None;
558 };
559
560 Some(PredefinedCMapInfo {
561 name: name.to_string(),
562 registry: "Adobe".to_string(),
563 ordering: ordering.to_string(),
564 writing_mode,
565 is_identity: false,
566 })
567}
568
569pub fn is_type0_font(font_dict: &lopdf::Dictionary) -> bool {
571 font_dict
572 .get(b"Subtype")
573 .ok()
574 .and_then(|o| o.as_name_str().ok())
575 .is_some_and(|s| s == "Type0")
576}
577
578pub fn get_descendant_font<'a>(
580 doc: &'a lopdf::Document,
581 type0_dict: &'a lopdf::Dictionary,
582) -> Option<&'a lopdf::Dictionary> {
583 let descendants = type0_dict.get(b"DescendantFonts").ok()?;
584 let descendants = resolve_object(doc, descendants);
585 let arr = descendants.as_array().ok()?;
586 let first = arr.first()?;
587 let first = resolve_object(doc, first);
588 first.as_dict().ok()
589}
590
591pub fn get_type0_encoding(font_dict: &lopdf::Dictionary) -> Option<String> {
593 let encoding = font_dict.get(b"Encoding").ok()?;
594 encoding.as_name_str().ok().map(|s| s.to_string())
595}
596
597pub fn is_subset_font(font_name: &str) -> bool {
603 if font_name.len() < 8 {
604 return false;
605 }
606 let bytes = font_name.as_bytes();
607 for &b in &bytes[..6] {
609 if !b.is_ascii_uppercase() {
610 return false;
611 }
612 }
613 bytes[6] == b'+'
615}
616
617pub fn strip_subset_prefix(font_name: &str) -> &str {
622 if is_subset_font(font_name) {
623 &font_name[7..]
624 } else {
625 font_name
626 }
627}
628
629#[cfg(test)]
630mod tests {
631 use super::*;
632 use lopdf::{Document, Object, Stream, dictionary};
633
634 #[test]
637 fn identity_map_returns_same_cid() {
638 let map = CidToGidMap::Identity;
639 assert_eq!(map.map(0), 0);
640 assert_eq!(map.map(100), 100);
641 assert_eq!(map.map(65535), 65535);
642 }
643
644 #[test]
645 fn explicit_map_looks_up_table() {
646 let table = vec![10, 20, 30, 40, 50];
647 let map = CidToGidMap::Explicit(table);
648 assert_eq!(map.map(0), 10);
649 assert_eq!(map.map(1), 20);
650 assert_eq!(map.map(4), 50);
651 }
652
653 #[test]
654 fn explicit_map_out_of_range_returns_cid() {
655 let table = vec![10, 20, 30];
656 let map = CidToGidMap::Explicit(table);
657 assert_eq!(map.map(5), 5); }
659
660 #[test]
661 fn from_stream_parses_big_endian_u16() {
662 let data = vec![0x00, 0x05, 0x00, 0x0A];
664 let map = CidToGidMap::from_stream(&data);
665 assert_eq!(map.map(0), 5);
666 assert_eq!(map.map(1), 10);
667 }
668
669 #[test]
670 fn from_stream_handles_odd_length() {
671 let data = vec![0x00, 0x05, 0x00];
673 let map = CidToGidMap::from_stream(&data);
674 assert_eq!(map.map(0), 5);
675 assert_eq!(map.map(1), 1); }
677
678 #[test]
679 fn from_stream_empty() {
680 let map = CidToGidMap::from_stream(&[]);
681 assert_eq!(map.map(0), 0); }
683
684 #[test]
687 fn cid_system_info_adobe_japan1() {
688 let info = CidSystemInfo {
689 registry: "Adobe".to_string(),
690 ordering: "Japan1".to_string(),
691 supplement: 6,
692 };
693 assert!(info.is_adobe_cjk());
694 }
695
696 #[test]
697 fn cid_system_info_adobe_gb1() {
698 let info = CidSystemInfo {
699 registry: "Adobe".to_string(),
700 ordering: "GB1".to_string(),
701 supplement: 5,
702 };
703 assert!(info.is_adobe_cjk());
704 }
705
706 #[test]
707 fn cid_system_info_adobe_cns1() {
708 let info = CidSystemInfo {
709 registry: "Adobe".to_string(),
710 ordering: "CNS1".to_string(),
711 supplement: 7,
712 };
713 assert!(info.is_adobe_cjk());
714 }
715
716 #[test]
717 fn cid_system_info_adobe_korea1() {
718 let info = CidSystemInfo {
719 registry: "Adobe".to_string(),
720 ordering: "Korea1".to_string(),
721 supplement: 2,
722 };
723 assert!(info.is_adobe_cjk());
724 }
725
726 #[test]
727 fn cid_system_info_non_adobe_not_cjk() {
728 let info = CidSystemInfo {
729 registry: "Custom".to_string(),
730 ordering: "Japan1".to_string(),
731 supplement: 0,
732 };
733 assert!(!info.is_adobe_cjk());
734 }
735
736 #[test]
737 fn cid_system_info_adobe_non_cjk_ordering() {
738 let info = CidSystemInfo {
739 registry: "Adobe".to_string(),
740 ordering: "Identity".to_string(),
741 supplement: 0,
742 };
743 assert!(!info.is_adobe_cjk());
744 }
745
746 #[test]
749 fn cid_font_metrics_get_width_from_map() {
750 let mut widths = HashMap::new();
751 widths.insert(1, 500.0);
752 widths.insert(2, 600.0);
753 widths.insert(100, 250.0);
754
755 let metrics = CidFontMetrics::new(
756 widths,
757 1000.0,
758 880.0,
759 -120.0,
760 None,
761 CidFontType::Type2,
762 CidToGidMap::Identity,
763 None,
764 );
765
766 assert_eq!(metrics.get_width(1), 500.0);
767 assert_eq!(metrics.get_width(2), 600.0);
768 assert_eq!(metrics.get_width(100), 250.0);
769 }
770
771 #[test]
772 fn cid_font_metrics_get_width_returns_default() {
773 let metrics = CidFontMetrics::new(
774 HashMap::new(),
775 1000.0,
776 880.0,
777 -120.0,
778 None,
779 CidFontType::Type2,
780 CidToGidMap::Identity,
781 None,
782 );
783
784 assert_eq!(metrics.get_width(0), 1000.0);
785 assert_eq!(metrics.get_width(999), 1000.0);
786 }
787
788 #[test]
789 fn cid_font_metrics_custom_default_width() {
790 let metrics = CidFontMetrics::new(
791 HashMap::new(),
792 500.0,
793 880.0,
794 -120.0,
795 None,
796 CidFontType::Type0,
797 CidToGidMap::Identity,
798 None,
799 );
800
801 assert_eq!(metrics.get_width(0), 500.0);
802 assert_eq!(metrics.default_width(), 500.0);
803 }
804
805 #[test]
806 fn cid_font_metrics_accessors() {
807 let info = CidSystemInfo {
808 registry: "Adobe".to_string(),
809 ordering: "Japan1".to_string(),
810 supplement: 6,
811 };
812 let metrics = CidFontMetrics::new(
813 HashMap::new(),
814 1000.0,
815 880.0,
816 -120.0,
817 Some([-100.0, -200.0, 1100.0, 900.0]),
818 CidFontType::Type0,
819 CidToGidMap::Identity,
820 Some(info),
821 );
822
823 assert_eq!(metrics.ascent(), 880.0);
824 assert_eq!(metrics.descent(), -120.0);
825 assert_eq!(metrics.font_bbox(), Some([-100.0, -200.0, 1100.0, 900.0]));
826 assert_eq!(metrics.font_type(), CidFontType::Type0);
827 assert_eq!(metrics.cid_to_gid(), &CidToGidMap::Identity);
828 assert!(metrics.system_info().unwrap().is_adobe_cjk());
829 }
830
831 #[test]
832 fn cid_font_metrics_map_cid_to_gid() {
833 let table = vec![10, 20, 30];
834 let metrics = CidFontMetrics::new(
835 HashMap::new(),
836 1000.0,
837 880.0,
838 -120.0,
839 None,
840 CidFontType::Type2,
841 CidToGidMap::Explicit(table),
842 None,
843 );
844
845 assert_eq!(metrics.map_cid_to_gid(0), 10);
846 assert_eq!(metrics.map_cid_to_gid(1), 20);
847 assert_eq!(metrics.map_cid_to_gid(2), 30);
848 assert_eq!(metrics.map_cid_to_gid(5), 5); }
850
851 #[test]
852 fn cid_font_metrics_default() {
853 let metrics = CidFontMetrics::default_metrics();
854 assert_eq!(metrics.default_width(), DEFAULT_CID_WIDTH);
855 assert_eq!(metrics.ascent(), DEFAULT_CID_ASCENT);
856 assert_eq!(metrics.descent(), DEFAULT_CID_DESCENT);
857 assert_eq!(metrics.font_bbox(), None);
858 assert_eq!(metrics.font_type(), CidFontType::Type2);
859 assert_eq!(metrics.cid_to_gid(), &CidToGidMap::Identity);
860 assert!(metrics.system_info().is_none());
861 }
862
863 #[test]
866 fn parse_w_array_individual_widths() {
867 let doc = Document::with_version("1.5");
869 let objects = vec![
870 Object::Integer(1),
871 Object::Array(vec![
872 Object::Integer(500),
873 Object::Integer(600),
874 Object::Integer(700),
875 ]),
876 ];
877
878 let widths = parse_w_array(&objects, &doc);
879 assert_eq!(widths.get(&1), Some(&500.0));
880 assert_eq!(widths.get(&2), Some(&600.0));
881 assert_eq!(widths.get(&3), Some(&700.0));
882 assert_eq!(widths.get(&0), None);
883 assert_eq!(widths.get(&4), None);
884 }
885
886 #[test]
887 fn parse_w_array_range_format() {
888 let doc = Document::with_version("1.5");
890 let objects = vec![
891 Object::Integer(10),
892 Object::Integer(20),
893 Object::Integer(500),
894 ];
895
896 let widths = parse_w_array(&objects, &doc);
897 for cid in 10..=20 {
898 assert_eq!(widths.get(&cid), Some(&500.0), "CID {} should be 500", cid);
899 }
900 assert_eq!(widths.get(&9), None);
901 assert_eq!(widths.get(&21), None);
902 }
903
904 #[test]
905 fn parse_w_array_mixed_formats() {
906 let doc = Document::with_version("1.5");
908 let objects = vec![
909 Object::Integer(1),
910 Object::Array(vec![Object::Integer(250), Object::Integer(300)]),
911 Object::Integer(10),
912 Object::Integer(20),
913 Object::Integer(500),
914 ];
915
916 let widths = parse_w_array(&objects, &doc);
917 assert_eq!(widths.get(&1), Some(&250.0));
918 assert_eq!(widths.get(&2), Some(&300.0));
919 for cid in 10..=20 {
920 assert_eq!(widths.get(&cid), Some(&500.0));
921 }
922 }
923
924 #[test]
925 fn parse_w_array_empty() {
926 let doc = Document::with_version("1.5");
927 let widths = parse_w_array(&[], &doc);
928 assert!(widths.is_empty());
929 }
930
931 #[test]
932 fn parse_w_array_real_values() {
933 let doc = Document::with_version("1.5");
934 let objects = vec![
935 Object::Integer(1),
936 Object::Array(vec![Object::Real(500.5), Object::Real(600.5)]),
937 ];
938
939 let widths = parse_w_array(&objects, &doc);
940 assert!((widths[&1] - 500.5).abs() < 0.1);
941 assert!((widths[&2] - 600.5).abs() < 0.1);
942 }
943
944 #[test]
945 fn parse_w_array_single_cid_range() {
946 let doc = Document::with_version("1.5");
948 let objects = vec![Object::Integer(5), Object::Integer(5), Object::Integer(700)];
949
950 let widths = parse_w_array(&objects, &doc);
951 assert_eq!(widths.get(&5), Some(&700.0));
952 assert_eq!(widths.len(), 1);
953 }
954
955 #[test]
958 fn extract_cid_font_metrics_basic() {
959 let mut doc = Document::with_version("1.5");
960
961 let w_array = Object::Array(vec![
963 Object::Integer(1),
964 Object::Array(vec![Object::Integer(500), Object::Integer(600)]),
965 ]);
966 let w_id = doc.add_object(w_array);
967
968 let cid_font_dict = dictionary! {
969 "Type" => "Font",
970 "Subtype" => "CIDFontType2",
971 "BaseFont" => "MSGothic",
972 "DW" => Object::Integer(1000),
973 "W" => w_id,
974 "CIDToGIDMap" => "Identity",
975 };
976
977 let metrics = extract_cid_font_metrics(&doc, &cid_font_dict).unwrap();
978 assert_eq!(metrics.font_type(), CidFontType::Type2);
979 assert_eq!(metrics.default_width(), 1000.0);
980 assert_eq!(metrics.get_width(1), 500.0);
981 assert_eq!(metrics.get_width(2), 600.0);
982 assert_eq!(metrics.get_width(3), 1000.0); assert_eq!(metrics.cid_to_gid(), &CidToGidMap::Identity);
984 }
985
986 #[test]
987 fn extract_cid_font_metrics_type0() {
988 let doc = Document::with_version("1.5");
989
990 let cid_font_dict = dictionary! {
991 "Type" => "Font",
992 "Subtype" => "CIDFontType0",
993 "BaseFont" => "KozMinPro-Regular",
994 };
995
996 let metrics = extract_cid_font_metrics(&doc, &cid_font_dict).unwrap();
997 assert_eq!(metrics.font_type(), CidFontType::Type0);
998 assert_eq!(metrics.default_width(), DEFAULT_CID_WIDTH);
999 }
1000
1001 #[test]
1002 fn extract_cid_font_metrics_with_descriptor() {
1003 let mut doc = Document::with_version("1.5");
1004
1005 let desc_id = doc.add_object(Object::Dictionary(dictionary! {
1006 "Type" => "FontDescriptor",
1007 "FontName" => "MSGothic",
1008 "Ascent" => Object::Integer(859),
1009 "Descent" => Object::Integer(-140),
1010 "FontBBox" => Object::Array(vec![
1011 Object::Integer(0),
1012 Object::Integer(-137),
1013 Object::Integer(1000),
1014 Object::Integer(859),
1015 ]),
1016 }));
1017
1018 let cid_font_dict = dictionary! {
1019 "Type" => "Font",
1020 "Subtype" => "CIDFontType2",
1021 "BaseFont" => "MSGothic",
1022 "FontDescriptor" => desc_id,
1023 };
1024
1025 let metrics = extract_cid_font_metrics(&doc, &cid_font_dict).unwrap();
1026 assert_eq!(metrics.ascent(), 859.0);
1027 assert_eq!(metrics.descent(), -140.0);
1028 assert!(metrics.font_bbox().is_some());
1029 }
1030
1031 #[test]
1032 fn extract_cid_font_metrics_with_system_info() {
1033 let doc = Document::with_version("1.5");
1034
1035 let cid_font_dict = dictionary! {
1036 "Type" => "Font",
1037 "Subtype" => "CIDFontType2",
1038 "BaseFont" => "MSGothic",
1039 "CIDSystemInfo" => Object::Dictionary(dictionary! {
1040 "Registry" => Object::String("Adobe".as_bytes().to_vec(), lopdf::StringFormat::Literal),
1041 "Ordering" => Object::String("Japan1".as_bytes().to_vec(), lopdf::StringFormat::Literal),
1042 "Supplement" => Object::Integer(6),
1043 }),
1044 };
1045
1046 let metrics = extract_cid_font_metrics(&doc, &cid_font_dict).unwrap();
1047 let info = metrics.system_info().unwrap();
1048 assert_eq!(info.registry, "Adobe");
1049 assert_eq!(info.ordering, "Japan1");
1050 assert_eq!(info.supplement, 6);
1051 assert!(info.is_adobe_cjk());
1052 }
1053
1054 #[test]
1055 fn extract_cid_font_metrics_explicit_gid_map() {
1056 let mut doc = Document::with_version("1.5");
1057
1058 let gid_data = vec![0x00, 0x05, 0x00, 0x0A];
1060 let gid_stream = Stream::new(dictionary! {}, gid_data);
1061 let gid_stream_id = doc.add_object(Object::Stream(gid_stream));
1062
1063 let cid_font_dict = dictionary! {
1064 "Type" => "Font",
1065 "Subtype" => "CIDFontType2",
1066 "BaseFont" => "CustomFont",
1067 "CIDToGIDMap" => gid_stream_id,
1068 };
1069
1070 let metrics = extract_cid_font_metrics(&doc, &cid_font_dict).unwrap();
1071 assert_eq!(metrics.map_cid_to_gid(0), 5);
1072 assert_eq!(metrics.map_cid_to_gid(1), 10);
1073 }
1074
1075 #[test]
1078 fn parse_identity_h() {
1079 let info = parse_predefined_cmap_name("Identity-H").unwrap();
1080 assert_eq!(info.name, "Identity-H");
1081 assert_eq!(info.writing_mode, 0);
1082 assert!(info.is_identity);
1083 }
1084
1085 #[test]
1086 fn parse_identity_v() {
1087 let info = parse_predefined_cmap_name("Identity-V").unwrap();
1088 assert_eq!(info.name, "Identity-V");
1089 assert_eq!(info.writing_mode, 1);
1090 assert!(info.is_identity);
1091 }
1092
1093 #[test]
1094 fn parse_adobe_japan1() {
1095 let info = parse_predefined_cmap_name("Adobe-Japan1-6").unwrap();
1096 assert_eq!(info.registry, "Adobe");
1097 assert_eq!(info.ordering, "Japan1");
1098 assert!(!info.is_identity);
1099 }
1100
1101 #[test]
1102 fn parse_adobe_gb1() {
1103 let info = parse_predefined_cmap_name("Adobe-GB1-5").unwrap();
1104 assert_eq!(info.ordering, "GB1");
1105 }
1106
1107 #[test]
1108 fn parse_adobe_cns1() {
1109 let info = parse_predefined_cmap_name("Adobe-CNS1-7").unwrap();
1110 assert_eq!(info.ordering, "CNS1");
1111 }
1112
1113 #[test]
1114 fn parse_adobe_korea1() {
1115 let info = parse_predefined_cmap_name("Adobe-Korea1-2").unwrap();
1116 assert_eq!(info.ordering, "Korea1");
1117 }
1118
1119 #[test]
1120 fn parse_unijis_utf16_h() {
1121 let info = parse_predefined_cmap_name("UniJIS-UTF16-H").unwrap();
1122 assert_eq!(info.ordering, "Japan1");
1123 assert_eq!(info.writing_mode, 0);
1124 }
1125
1126 #[test]
1127 fn parse_unijis_utf16_v() {
1128 let info = parse_predefined_cmap_name("UniJIS-UTF16-V").unwrap();
1129 assert_eq!(info.ordering, "Japan1");
1130 assert_eq!(info.writing_mode, 1);
1131 }
1132
1133 #[test]
1134 fn parse_unigb_utf16_h() {
1135 let info = parse_predefined_cmap_name("UniGB-UTF16-H").unwrap();
1136 assert_eq!(info.ordering, "GB1");
1137 }
1138
1139 #[test]
1140 fn parse_uniksc_utf16_h() {
1141 let info = parse_predefined_cmap_name("UniKS-UTF16-H").unwrap();
1142 assert_eq!(info.ordering, "Korea1");
1143 }
1144
1145 #[test]
1146 fn parse_90ms_rksj_h() {
1147 let info = parse_predefined_cmap_name("90ms-RKSJ-H").unwrap();
1148 assert_eq!(info.ordering, "Japan1");
1149 assert_eq!(info.writing_mode, 0);
1150 }
1151
1152 #[test]
1153 fn parse_unknown_cmap_returns_none() {
1154 assert!(parse_predefined_cmap_name("UnknownCMap").is_none());
1155 }
1156
1157 #[test]
1158 fn parse_empty_cmap_returns_none() {
1159 assert!(parse_predefined_cmap_name("").is_none());
1160 }
1161
1162 #[test]
1165 fn detect_type0_font() {
1166 let dict = dictionary! {
1167 "Type" => "Font",
1168 "Subtype" => "Type0",
1169 "BaseFont" => "SomeFont",
1170 };
1171 assert!(is_type0_font(&dict));
1172 }
1173
1174 #[test]
1175 fn detect_non_type0_font() {
1176 let dict = dictionary! {
1177 "Type" => "Font",
1178 "Subtype" => "Type1",
1179 "BaseFont" => "Helvetica",
1180 };
1181 assert!(!is_type0_font(&dict));
1182 }
1183
1184 #[test]
1185 fn detect_truetype_font() {
1186 let dict = dictionary! {
1187 "Type" => "Font",
1188 "Subtype" => "TrueType",
1189 "BaseFont" => "Arial",
1190 };
1191 assert!(!is_type0_font(&dict));
1192 }
1193
1194 #[test]
1197 fn get_descendant_font_basic() {
1198 let mut doc = Document::with_version("1.5");
1199
1200 let cid_font_dict = dictionary! {
1201 "Type" => "Font",
1202 "Subtype" => "CIDFontType2",
1203 "BaseFont" => "MSGothic",
1204 };
1205 let cid_font_id = doc.add_object(Object::Dictionary(cid_font_dict));
1206
1207 let type0_dict = dictionary! {
1208 "Type" => "Font",
1209 "Subtype" => "Type0",
1210 "BaseFont" => "MSGothic",
1211 "DescendantFonts" => Object::Array(vec![Object::Reference(cid_font_id)]),
1212 };
1213
1214 let desc = get_descendant_font(&doc, &type0_dict);
1215 assert!(desc.is_some());
1216 let desc = desc.unwrap();
1217 assert_eq!(
1218 desc.get(b"Subtype").unwrap().as_name_str().unwrap(),
1219 "CIDFontType2"
1220 );
1221 }
1222
1223 #[test]
1224 fn get_descendant_font_missing() {
1225 let doc = Document::with_version("1.5");
1226 let type0_dict = dictionary! {
1227 "Type" => "Font",
1228 "Subtype" => "Type0",
1229 "BaseFont" => "MSGothic",
1230 };
1231
1232 assert!(get_descendant_font(&doc, &type0_dict).is_none());
1233 }
1234
1235 #[test]
1238 fn get_encoding_identity_h() {
1239 let dict = dictionary! {
1240 "Subtype" => "Type0",
1241 "Encoding" => "Identity-H",
1242 };
1243 assert_eq!(get_type0_encoding(&dict), Some("Identity-H".to_string()));
1244 }
1245
1246 #[test]
1247 fn get_encoding_missing() {
1248 let dict = dictionary! {
1249 "Subtype" => "Type0",
1250 };
1251 assert_eq!(get_type0_encoding(&dict), None);
1252 }
1253
1254 #[test]
1257 fn is_subset_font_valid() {
1258 assert!(is_subset_font("ABCDEF+ArialMT"));
1259 assert!(is_subset_font("XYZABC+TimesNewRoman"));
1260 assert!(is_subset_font("AAAAAA+A")); }
1262
1263 #[test]
1264 fn is_subset_font_invalid() {
1265 assert!(!is_subset_font("ArialMT")); assert!(!is_subset_font("abcdef+ArialMT")); assert!(!is_subset_font("ABCDE+ArialMT")); assert!(!is_subset_font("ABCDEF-ArialMT")); assert!(!is_subset_font("ABC1EF+ArialMT")); assert!(!is_subset_font("")); assert!(!is_subset_font("ABCDEF+")); }
1273
1274 #[test]
1275 fn strip_subset_prefix_with_prefix() {
1276 assert_eq!(strip_subset_prefix("ABCDEF+ArialMT"), "ArialMT");
1277 assert_eq!(strip_subset_prefix("XYZABC+TimesNewRoman"), "TimesNewRoman");
1278 }
1279
1280 #[test]
1281 fn strip_subset_prefix_without_prefix() {
1282 assert_eq!(strip_subset_prefix("ArialMT"), "ArialMT");
1283 assert_eq!(strip_subset_prefix("Helvetica"), "Helvetica");
1284 assert_eq!(strip_subset_prefix(""), "");
1285 }
1286
1287 #[test]
1290 fn identity_h_encoding_detected() {
1291 let dict = dictionary! {
1292 "Subtype" => "Type0",
1293 "Encoding" => "Identity-H",
1294 };
1295 let enc = get_type0_encoding(&dict).unwrap();
1296 let info = parse_predefined_cmap_name(&enc).unwrap();
1297 assert!(info.is_identity);
1298 assert_eq!(info.writing_mode, 0); }
1300
1301 #[test]
1302 fn identity_v_encoding_detected() {
1303 let dict = dictionary! {
1304 "Subtype" => "Type0",
1305 "Encoding" => "Identity-V",
1306 };
1307 let enc = get_type0_encoding(&dict).unwrap();
1308 let info = parse_predefined_cmap_name(&enc).unwrap();
1309 assert!(info.is_identity);
1310 assert_eq!(info.writing_mode, 1); }
1312}