Skip to main content

oxidize_pdf/fonts/
type0_parsing.rs

1//! Type0 (Composite) Font Parsing Utilities
2//!
3//! This module provides detection and extraction utilities for Type0 fonts
4//! per ISO 32000-1 Section 9.7 (CIDFonts and CMaps).
5//!
6//! ## Type0 Font Hierarchy
7//!
8//! ```text
9//! Type0 Font Dict
10//!   ├── /Subtype /Type0
11//!   ├── /DescendantFonts [ ref_to_CIDFont ]
12//!   └── /ToUnicode (optional) ref_to_CMap
13//!
14//! CIDFont Dict
15//!   ├── /Subtype /CIDFontType0 or /CIDFontType2
16//!   ├── /CIDSystemInfo { Registry, Ordering, Supplement }
17//!   ├── /FontDescriptor ref_to_descriptor
18//!   └── /W (widths) or /DW (default width)
19//!
20//! FontDescriptor Dict
21//!   └── /FontFile2 (TrueType) or /FontFile3 (CFF)
22//! ```
23//!
24//! ## Usage
25//!
26//! ```rust,ignore
27//! use oxidize_pdf::fonts::type0_parsing::{detect_type0_font, extract_descendant_fonts_ref};
28//! use oxidize_pdf::pdf_objects::Dictionary;
29//!
30//! let font_dict: Dictionary = /* from PDF */;
31//!
32//! if detect_type0_font(&font_dict) {
33//!     if let Some(refs) = extract_descendant_fonts_ref(&font_dict) {
34//!         // refs contains ObjectIds to CIDFont dictionaries
35//!     }
36//! }
37//! ```
38
39use crate::pdf_objects::{Dictionary, Object, ObjectId, Stream};
40
41// =============================================================================
42// Security Constants
43// =============================================================================
44
45/// Maximum font stream size (10MB) to prevent zip bombs and memory exhaustion.
46/// Real-world embedded fonts rarely exceed 5MB.
47pub const MAX_FONT_STREAM_SIZE: usize = 10 * 1024 * 1024;
48
49// =============================================================================
50// Phase 1: Detection Types and Functions
51// =============================================================================
52
53/// CIDFont subtype indicating outline format
54#[derive(Debug, Clone, Copy, PartialEq, Eq)]
55pub enum CIDFontSubtype {
56    /// CIDFontType0 - CFF (Compact Font Format) outlines
57    /// Used for PostScript-based CID fonts
58    Type0,
59    /// CIDFontType2 - TrueType outlines
60    /// Used for TrueType-based CID fonts (most common)
61    Type2,
62}
63
64/// Detect if a font dictionary is a Type0 (composite) font
65///
66/// A Type0 font has `/Subtype /Type0` in its dictionary.
67///
68/// # Arguments
69/// * `dict` - The font dictionary to check
70///
71/// # Returns
72/// `true` if the dictionary represents a Type0 font, `false` otherwise
73///
74/// # Example
75/// ```rust,ignore
76/// use oxidize_pdf::fonts::type0_parsing::detect_type0_font;
77///
78/// if detect_type0_font(&font_dict) {
79///     println!("This is a Type0 composite font");
80/// }
81/// ```
82pub fn detect_type0_font(dict: &Dictionary) -> bool {
83    // Check for /Subtype /Type0
84    if let Some(Object::Name(subtype)) = dict.get("Subtype") {
85        return subtype.as_str() == "Type0";
86    }
87    false
88}
89
90/// Extract the DescendantFonts references from a Type0 font dictionary
91///
92/// The `/DescendantFonts` entry is an array containing a single reference
93/// to a CIDFont dictionary (per ISO 32000-1, Type0 fonts have exactly one descendant).
94///
95/// # Arguments
96/// * `dict` - The Type0 font dictionary
97///
98/// # Returns
99/// `Some(Vec<ObjectId>)` containing the descendant font references,
100/// or `None` if the entry doesn't exist or is invalid.
101///
102/// # Note
103/// While the spec requires exactly one descendant, this function returns
104/// a Vec to handle malformed PDFs that might have multiple.
105pub fn extract_descendant_fonts_ref(dict: &Dictionary) -> Option<Vec<ObjectId>> {
106    let descendants = dict.get("DescendantFonts")?;
107
108    match descendants {
109        Object::Array(arr) => {
110            let refs: Vec<ObjectId> = arr
111                .iter()
112                .filter_map(|obj| {
113                    if let Object::Reference(id) = obj {
114                        Some(*id)
115                    } else {
116                        None
117                    }
118                })
119                .collect();
120
121            if refs.is_empty() {
122                None
123            } else {
124                Some(refs)
125            }
126        }
127        // Some PDFs might inline the dictionary instead of using a reference
128        Object::Reference(id) => Some(vec![*id]),
129        _ => None,
130    }
131}
132
133/// Detect the CIDFont subtype from a CIDFont dictionary
134///
135/// CIDFonts have either `/Subtype /CIDFontType0` (CFF outlines)
136/// or `/Subtype /CIDFontType2` (TrueType outlines).
137///
138/// # Arguments
139/// * `dict` - The CIDFont dictionary
140///
141/// # Returns
142/// `Some(CIDFontSubtype)` if the dictionary is a valid CIDFont,
143/// `None` if it's not a CIDFont or has an unknown subtype.
144pub fn detect_cidfont_subtype(dict: &Dictionary) -> Option<CIDFontSubtype> {
145    if let Some(Object::Name(subtype)) = dict.get("Subtype") {
146        match subtype.as_str() {
147            "CIDFontType0" => Some(CIDFontSubtype::Type0),
148            "CIDFontType2" => Some(CIDFontSubtype::Type2),
149            _ => None,
150        }
151    } else {
152        None
153    }
154}
155
156/// Extract the ToUnicode CMap reference from a font dictionary
157///
158/// The `/ToUnicode` entry provides a CMap that maps character codes
159/// to Unicode values, essential for text extraction from Type0 fonts.
160///
161/// # Arguments
162/// * `dict` - The font dictionary (Type0 or other)
163///
164/// # Returns
165/// `Some(ObjectId)` if a ToUnicode reference exists,
166/// `None` if the entry doesn't exist or is not a reference.
167pub fn extract_tounicode_ref(dict: &Dictionary) -> Option<ObjectId> {
168    if let Some(Object::Reference(id)) = dict.get("ToUnicode") {
169        Some(*id)
170    } else {
171        None
172    }
173}
174
175// =============================================================================
176// Phase 2: Hierarchy Resolution Types and Functions
177// =============================================================================
178
179/// Font file type indicating the stream format
180#[derive(Debug, Clone, Copy, PartialEq, Eq)]
181pub enum FontFileType {
182    /// FontFile - Type 1 font data (PostScript)
183    Type1,
184    /// FontFile2 - TrueType font data
185    TrueType,
186    /// FontFile3 - CFF or OpenType font data
187    CFF,
188}
189
190/// Information about a resolved Type0 font hierarchy
191#[derive(Debug, Clone)]
192pub struct Type0FontInfo {
193    /// The Type0 font dictionary
194    pub type0_dict: Dictionary,
195    /// The CIDFont dictionary (from DescendantFonts)
196    pub cidfont_dict: Option<Dictionary>,
197    /// CIDFont subtype (Type0=CFF, Type2=TrueType)
198    pub cidfont_subtype: Option<CIDFontSubtype>,
199    /// The FontDescriptor dictionary
200    pub font_descriptor: Option<Dictionary>,
201    /// The embedded font stream (FontFile, FontFile2, or FontFile3)
202    pub font_stream: Option<Stream>,
203    /// Type of embedded font file
204    pub font_file_type: Option<FontFileType>,
205    /// ToUnicode CMap stream (if present)
206    pub tounicode_stream: Option<Stream>,
207}
208
209impl Type0FontInfo {
210    /// Create a new Type0FontInfo with just the Type0 dictionary
211    pub fn new(type0_dict: Dictionary) -> Self {
212        Self {
213            type0_dict,
214            cidfont_dict: None,
215            cidfont_subtype: None,
216            font_descriptor: None,
217            font_stream: None,
218            font_file_type: None,
219            tounicode_stream: None,
220        }
221    }
222
223    /// Check if the font has embedded data
224    pub fn has_embedded_font(&self) -> bool {
225        self.font_stream.is_some()
226    }
227
228    /// Check if ToUnicode CMap is available
229    pub fn has_tounicode(&self) -> bool {
230        self.tounicode_stream.is_some()
231    }
232}
233
234/// Extract the FontDescriptor reference from a font dictionary (CIDFont or simple font)
235///
236/// # Arguments
237/// * `dict` - The font dictionary (CIDFont, Type1, TrueType, etc.)
238///
239/// # Returns
240/// `Some(ObjectId)` if a FontDescriptor reference exists,
241/// `None` if the entry doesn't exist or is not a reference.
242pub fn extract_font_descriptor_ref(dict: &Dictionary) -> Option<ObjectId> {
243    if let Some(Object::Reference(id)) = dict.get("FontDescriptor") {
244        Some(*id)
245    } else {
246        None
247    }
248}
249
250/// Extract the embedded font file reference from a FontDescriptor dictionary
251///
252/// Checks for FontFile, FontFile2, and FontFile3 entries (in that order).
253///
254/// # Arguments
255/// * `descriptor` - The FontDescriptor dictionary
256///
257/// # Returns
258/// `Some((ObjectId, FontFileType))` if a font file reference exists,
259/// `None` if no font file is embedded.
260pub fn extract_font_file_ref(descriptor: &Dictionary) -> Option<(ObjectId, FontFileType)> {
261    // Check in order of precedence
262    if let Some(Object::Reference(id)) = descriptor.get("FontFile") {
263        return Some((*id, FontFileType::Type1));
264    }
265    if let Some(Object::Reference(id)) = descriptor.get("FontFile2") {
266        return Some((*id, FontFileType::TrueType));
267    }
268    if let Some(Object::Reference(id)) = descriptor.get("FontFile3") {
269        return Some((*id, FontFileType::CFF));
270    }
271    None
272}
273
274/// Extract the W (widths) array reference from a CIDFont dictionary
275///
276/// # Arguments
277/// * `cidfont` - The CIDFont dictionary
278///
279/// # Returns
280/// `Some(ObjectId)` if a W array reference exists,
281/// `None` if the entry doesn't exist or is not a reference.
282pub fn extract_widths_ref(cidfont: &Dictionary) -> Option<ObjectId> {
283    if let Some(Object::Reference(id)) = cidfont.get("W") {
284        Some(*id)
285    } else {
286        None
287    }
288}
289
290/// Extract the default width (DW) from a CIDFont dictionary
291///
292/// # Arguments
293/// * `cidfont` - The CIDFont dictionary
294///
295/// # Returns
296/// The default width value, or 1000 if not specified (per ISO 32000-1)
297pub fn extract_default_width(cidfont: &Dictionary) -> i64 {
298    if let Some(obj) = cidfont.get("DW") {
299        obj.as_integer().unwrap_or(1000)
300    } else {
301        1000 // Default per ISO 32000-1
302    }
303}
304
305// =============================================================================
306// Phase 3: Full Hierarchy Resolution
307// =============================================================================
308
309/// Resolve the complete Type0 font hierarchy using a resolver function
310///
311/// This function walks the entire Type0 font hierarchy:
312/// `Type0 → DescendantFonts → CIDFont → FontDescriptor → FontFile`
313///
314/// It also resolves the optional ToUnicode CMap stream.
315///
316/// # Arguments
317/// * `type0_dict` - The Type0 font dictionary
318/// * `resolver` - A function that resolves ObjectId references to Objects
319///
320/// # Returns
321/// `Some(Type0FontInfo)` with all resolved components, or `None` if not a Type0 font.
322///
323/// # Security
324///
325/// This function implements two security protections:
326///
327/// 1. **Circular reference detection**: Uses a HashSet to track visited ObjectIds.
328///    If a circular reference is detected, the function returns partial info and
329///    logs a warning via `tracing::warn`. This prevents infinite loops and stack
330///    overflow from malicious PDFs.
331///
332/// 2. **Font stream size validation**: Rejects font streams larger than
333///    [`MAX_FONT_STREAM_SIZE`] (10MB) to prevent zip bomb and memory exhaustion
334///    attacks. Oversized streams are logged and `font_stream` is set to `None`.
335///
336/// # Example
337/// ```rust,ignore
338/// use oxidize_pdf::fonts::type0_parsing::resolve_type0_hierarchy;
339///
340/// let info = resolve_type0_hierarchy(&font_dict, |id| document.get_object(id));
341/// if let Some(info) = info {
342///     if info.has_embedded_font() {
343///         // Process embedded font data
344///     }
345/// }
346/// ```
347pub fn resolve_type0_hierarchy<F>(type0_dict: &Dictionary, resolver: F) -> Option<Type0FontInfo>
348where
349    F: Fn(ObjectId) -> Option<Object>,
350{
351    use std::collections::HashSet;
352
353    // Must be a Type0 font
354    if !detect_type0_font(type0_dict) {
355        return None;
356    }
357
358    let mut info = Type0FontInfo::new(type0_dict.clone());
359    let mut visited = HashSet::new(); // Track visited ObjectIds for circular ref detection
360
361    // Step 1: Resolve DescendantFonts → CIDFont
362    if let Some(refs) = extract_descendant_fonts_ref(type0_dict) {
363        // Type0 fonts have exactly one descendant (per ISO 32000-1)
364        if let Some(cidfont_ref) = refs.first() {
365            // Check for circular reference
366            if visited.contains(cidfont_ref) {
367                tracing::warn!("Circular reference detected at CIDFont {:?}", cidfont_ref);
368                return Some(info);
369            }
370            visited.insert(*cidfont_ref);
371
372            if let Some(Object::Dictionary(cidfont)) = resolver(*cidfont_ref) {
373                // Detect CIDFont subtype
374                info.cidfont_subtype = detect_cidfont_subtype(&cidfont);
375
376                // Step 2: Resolve CIDFont → FontDescriptor
377                if let Some(desc_ref) = extract_font_descriptor_ref(&cidfont) {
378                    // Check for circular reference
379                    if visited.contains(&desc_ref) {
380                        tracing::warn!(
381                            "Circular reference detected at FontDescriptor {:?}",
382                            desc_ref
383                        );
384                        info.cidfont_dict = Some(cidfont);
385                        return Some(info);
386                    }
387                    visited.insert(desc_ref);
388
389                    if let Some(Object::Dictionary(descriptor)) = resolver(desc_ref) {
390                        // Step 3: Resolve FontDescriptor → FontFile stream
391                        if let Some((file_ref, file_type)) = extract_font_file_ref(&descriptor) {
392                            // Check for circular reference
393                            if visited.contains(&file_ref) {
394                                tracing::warn!(
395                                    "Circular reference detected at FontFile {:?}",
396                                    file_ref
397                                );
398                                info.font_descriptor = Some(descriptor);
399                                info.cidfont_dict = Some(cidfont);
400                                return Some(info);
401                            }
402                            visited.insert(file_ref);
403
404                            if let Some(Object::Stream(stream)) = resolver(file_ref) {
405                                // Security: Validate stream size to prevent zip bombs
406                                if stream.data.len() > MAX_FONT_STREAM_SIZE {
407                                    tracing::warn!(
408                                        "Font stream size {} exceeds limit {} for {:?}",
409                                        stream.data.len(),
410                                        MAX_FONT_STREAM_SIZE,
411                                        file_ref
412                                    );
413                                    // Don't set font_stream/font_file_type for oversized streams
414                                } else {
415                                    info.font_stream = Some(stream);
416                                    info.font_file_type = Some(file_type);
417                                }
418                            }
419                        }
420
421                        info.font_descriptor = Some(descriptor);
422                    }
423                }
424
425                info.cidfont_dict = Some(cidfont);
426            }
427        }
428    }
429
430    // Step 4: Resolve ToUnicode CMap stream (optional but important for text extraction)
431    if let Some(tounicode_ref) = extract_tounicode_ref(type0_dict) {
432        // Check for circular reference (ToUnicode pointing to already-visited object)
433        if visited.contains(&tounicode_ref) {
434            tracing::warn!(
435                "Circular reference detected at ToUnicode {:?}",
436                tounicode_ref
437            );
438            return Some(info);
439        }
440
441        if let Some(Object::Stream(stream)) = resolver(tounicode_ref) {
442            info.tounicode_stream = Some(stream);
443        }
444    }
445
446    Some(info)
447}
448
449#[cfg(test)]
450mod tests {
451    use super::*;
452    use crate::pdf_objects::{Array, Name};
453
454    #[test]
455    fn test_detect_type0_font() {
456        let mut type0_dict = Dictionary::new();
457        type0_dict.set("Subtype", Name::new("Type0"));
458
459        assert!(detect_type0_font(&type0_dict));
460
461        let mut type1_dict = Dictionary::new();
462        type1_dict.set("Subtype", Name::new("Type1"));
463
464        assert!(!detect_type0_font(&type1_dict));
465
466        let empty_dict = Dictionary::new();
467        assert!(!detect_type0_font(&empty_dict));
468    }
469
470    #[test]
471    fn test_extract_descendant_fonts_ref() {
472        let mut dict = Dictionary::new();
473        let mut arr = Array::new();
474        arr.push(Object::Reference(ObjectId::new(10, 0)));
475        dict.set("DescendantFonts", Object::Array(arr));
476
477        let result = extract_descendant_fonts_ref(&dict);
478        assert!(result.is_some());
479        let refs = result.unwrap();
480        assert_eq!(refs.len(), 1);
481        assert_eq!(refs[0], ObjectId::new(10, 0));
482
483        // Empty array should return None
484        let mut empty_arr_dict = Dictionary::new();
485        empty_arr_dict.set("DescendantFonts", Object::Array(Array::new()));
486        assert!(extract_descendant_fonts_ref(&empty_arr_dict).is_none());
487
488        // No DescendantFonts should return None
489        let no_descendants = Dictionary::new();
490        assert!(extract_descendant_fonts_ref(&no_descendants).is_none());
491    }
492
493    #[test]
494    fn test_detect_cidfont_subtype() {
495        let mut type0_dict = Dictionary::new();
496        type0_dict.set("Subtype", Name::new("CIDFontType0"));
497        assert_eq!(
498            detect_cidfont_subtype(&type0_dict),
499            Some(CIDFontSubtype::Type0)
500        );
501
502        let mut type2_dict = Dictionary::new();
503        type2_dict.set("Subtype", Name::new("CIDFontType2"));
504        assert_eq!(
505            detect_cidfont_subtype(&type2_dict),
506            Some(CIDFontSubtype::Type2)
507        );
508
509        let mut truetype_dict = Dictionary::new();
510        truetype_dict.set("Subtype", Name::new("TrueType"));
511        assert_eq!(detect_cidfont_subtype(&truetype_dict), None);
512
513        let empty_dict = Dictionary::new();
514        assert_eq!(detect_cidfont_subtype(&empty_dict), None);
515    }
516
517    #[test]
518    fn test_extract_tounicode_ref() {
519        let mut dict = Dictionary::new();
520        dict.set("ToUnicode", Object::Reference(ObjectId::new(20, 0)));
521
522        assert_eq!(extract_tounicode_ref(&dict), Some(ObjectId::new(20, 0)));
523
524        // No ToUnicode
525        let empty_dict = Dictionary::new();
526        assert!(extract_tounicode_ref(&empty_dict).is_none());
527
528        // Wrong type (Name instead of Reference)
529        let mut wrong_type = Dictionary::new();
530        wrong_type.set("ToUnicode", Name::new("Identity-H"));
531        assert!(extract_tounicode_ref(&wrong_type).is_none());
532    }
533
534    #[test]
535    fn test_descendant_fonts_with_direct_reference() {
536        // Some PDFs might have a direct reference instead of array
537        let mut dict = Dictionary::new();
538        dict.set("DescendantFonts", Object::Reference(ObjectId::new(15, 0)));
539
540        let result = extract_descendant_fonts_ref(&dict);
541        assert!(result.is_some());
542        let refs = result.unwrap();
543        assert_eq!(refs.len(), 1);
544        assert_eq!(refs[0], ObjectId::new(15, 0));
545    }
546
547    // =========================================================================
548    // Phase 2 Tests: Hierarchy Resolution
549    // =========================================================================
550
551    #[test]
552    fn test_type0_font_info_new() {
553        let mut dict = Dictionary::new();
554        dict.set("Subtype", Name::new("Type0"));
555
556        let info = Type0FontInfo::new(dict);
557
558        assert!(info.cidfont_dict.is_none());
559        assert!(info.cidfont_subtype.is_none());
560        assert!(info.font_descriptor.is_none());
561        assert!(info.font_stream.is_none());
562        assert!(info.font_file_type.is_none());
563        assert!(info.tounicode_stream.is_none());
564        assert!(!info.has_embedded_font());
565        assert!(!info.has_tounicode());
566    }
567
568    #[test]
569    fn test_extract_font_descriptor_ref() {
570        let mut dict = Dictionary::new();
571        dict.set("FontDescriptor", Object::Reference(ObjectId::new(25, 0)));
572
573        assert_eq!(
574            extract_font_descriptor_ref(&dict),
575            Some(ObjectId::new(25, 0))
576        );
577
578        // No FontDescriptor
579        let empty_dict = Dictionary::new();
580        assert!(extract_font_descriptor_ref(&empty_dict).is_none());
581
582        // Wrong type
583        let mut wrong_type = Dictionary::new();
584        wrong_type.set("FontDescriptor", Name::new("SomeFont"));
585        assert!(extract_font_descriptor_ref(&wrong_type).is_none());
586    }
587
588    #[test]
589    fn test_extract_font_file_ref_truetype() {
590        let mut descriptor = Dictionary::new();
591        descriptor.set("FontFile2", Object::Reference(ObjectId::new(30, 0)));
592
593        let result = extract_font_file_ref(&descriptor);
594        assert!(result.is_some());
595        let (id, file_type) = result.unwrap();
596        assert_eq!(id, ObjectId::new(30, 0));
597        assert_eq!(file_type, FontFileType::TrueType);
598    }
599
600    #[test]
601    fn test_extract_font_file_ref_cff() {
602        let mut descriptor = Dictionary::new();
603        descriptor.set("FontFile3", Object::Reference(ObjectId::new(35, 0)));
604
605        let result = extract_font_file_ref(&descriptor);
606        assert!(result.is_some());
607        let (id, file_type) = result.unwrap();
608        assert_eq!(id, ObjectId::new(35, 0));
609        assert_eq!(file_type, FontFileType::CFF);
610    }
611
612    #[test]
613    fn test_extract_font_file_ref_type1() {
614        let mut descriptor = Dictionary::new();
615        descriptor.set("FontFile", Object::Reference(ObjectId::new(40, 0)));
616
617        let result = extract_font_file_ref(&descriptor);
618        assert!(result.is_some());
619        let (id, file_type) = result.unwrap();
620        assert_eq!(id, ObjectId::new(40, 0));
621        assert_eq!(file_type, FontFileType::Type1);
622    }
623
624    #[test]
625    fn test_extract_font_file_ref_precedence() {
626        // If multiple FontFile entries exist, FontFile takes precedence
627        let mut descriptor = Dictionary::new();
628        descriptor.set("FontFile", Object::Reference(ObjectId::new(1, 0)));
629        descriptor.set("FontFile2", Object::Reference(ObjectId::new(2, 0)));
630        descriptor.set("FontFile3", Object::Reference(ObjectId::new(3, 0)));
631
632        let result = extract_font_file_ref(&descriptor);
633        assert!(result.is_some());
634        let (id, file_type) = result.unwrap();
635        assert_eq!(id, ObjectId::new(1, 0));
636        assert_eq!(file_type, FontFileType::Type1);
637    }
638
639    #[test]
640    fn test_extract_font_file_ref_none() {
641        let empty_descriptor = Dictionary::new();
642        assert!(extract_font_file_ref(&empty_descriptor).is_none());
643    }
644
645    #[test]
646    fn test_extract_widths_ref() {
647        let mut cidfont = Dictionary::new();
648        cidfont.set("W", Object::Reference(ObjectId::new(50, 0)));
649
650        assert_eq!(extract_widths_ref(&cidfont), Some(ObjectId::new(50, 0)));
651
652        // No W entry
653        let empty_dict = Dictionary::new();
654        assert!(extract_widths_ref(&empty_dict).is_none());
655
656        // W is inline array (not reference)
657        let mut inline_w = Dictionary::new();
658        inline_w.set("W", Object::Array(Array::new()));
659        assert!(extract_widths_ref(&inline_w).is_none());
660    }
661
662    #[test]
663    fn test_extract_default_width() {
664        let mut cidfont = Dictionary::new();
665        cidfont.set("DW", Object::Integer(500));
666
667        assert_eq!(extract_default_width(&cidfont), 500);
668
669        // No DW entry - should return 1000 (ISO default)
670        let empty_dict = Dictionary::new();
671        assert_eq!(extract_default_width(&empty_dict), 1000);
672
673        // Wrong type (should use default)
674        let mut wrong_type = Dictionary::new();
675        wrong_type.set("DW", Name::new("invalid"));
676        assert_eq!(extract_default_width(&wrong_type), 1000);
677    }
678
679    #[test]
680    fn test_font_file_type_equality() {
681        assert_eq!(FontFileType::Type1, FontFileType::Type1);
682        assert_eq!(FontFileType::TrueType, FontFileType::TrueType);
683        assert_eq!(FontFileType::CFF, FontFileType::CFF);
684        assert_ne!(FontFileType::Type1, FontFileType::TrueType);
685        assert_ne!(FontFileType::TrueType, FontFileType::CFF);
686    }
687
688    // =========================================================================
689    // Phase 3 Tests: Full Hierarchy Resolution
690    // =========================================================================
691
692    use std::collections::HashMap;
693
694    /// Helper to create a mock object store for testing hierarchy resolution
695    fn create_mock_object_store() -> HashMap<ObjectId, Object> {
696        let mut store = HashMap::new();
697
698        // CIDFont dictionary (object 15)
699        let mut cidfont = Dictionary::new();
700        cidfont.set("Type", Name::new("Font"));
701        cidfont.set("Subtype", Name::new("CIDFontType2"));
702        cidfont.set("BaseFont", Name::new("Arial-Bold"));
703        cidfont.set("FontDescriptor", Object::Reference(ObjectId::new(16, 0)));
704        cidfont.set("DW", Object::Integer(1000));
705        store.insert(ObjectId::new(15, 0), Object::Dictionary(cidfont));
706
707        // FontDescriptor dictionary (object 16)
708        let mut descriptor = Dictionary::new();
709        descriptor.set("Type", Name::new("FontDescriptor"));
710        descriptor.set("FontName", Name::new("Arial-Bold"));
711        descriptor.set("FontFile2", Object::Reference(ObjectId::new(17, 0)));
712        store.insert(ObjectId::new(16, 0), Object::Dictionary(descriptor));
713
714        // FontFile2 stream (object 17)
715        let font_stream = Stream::new(
716            Dictionary::new(),
717            vec![0x00, 0x01, 0x00, 0x00], // TTF magic bytes
718        );
719        store.insert(ObjectId::new(17, 0), Object::Stream(font_stream));
720
721        // ToUnicode CMap stream (object 20)
722        let tounicode_stream = Stream::new(
723            Dictionary::new(),
724            b"/CIDInit /ProcSet findresource begin".to_vec(),
725        );
726        store.insert(ObjectId::new(20, 0), Object::Stream(tounicode_stream));
727
728        store
729    }
730
731    /// Helper to create a Type0 font dictionary for testing
732    fn create_test_type0_dict() -> Dictionary {
733        let mut dict = Dictionary::new();
734        dict.set("Type", Name::new("Font"));
735        dict.set("Subtype", Name::new("Type0"));
736        dict.set("BaseFont", Name::new("Arial-Bold"));
737        dict.set("Encoding", Name::new("Identity-H"));
738
739        let mut descendant_array = Array::new();
740        descendant_array.push(Object::Reference(ObjectId::new(15, 0)));
741        dict.set("DescendantFonts", Object::Array(descendant_array));
742
743        dict.set("ToUnicode", Object::Reference(ObjectId::new(20, 0)));
744
745        dict
746    }
747
748    #[test]
749    fn test_resolve_type0_hierarchy_complete() {
750        use super::resolve_type0_hierarchy;
751
752        let type0_dict = create_test_type0_dict();
753        let store = create_mock_object_store();
754
755        // Create resolver closure
756        let resolver = |id: ObjectId| -> Option<Object> { store.get(&id).cloned() };
757
758        let result = resolve_type0_hierarchy(&type0_dict, resolver);
759        assert!(result.is_some(), "Should resolve complete hierarchy");
760
761        let info = result.unwrap();
762
763        // Verify CIDFont was resolved
764        assert!(
765            info.cidfont_dict.is_some(),
766            "Should have CIDFont dictionary"
767        );
768        assert_eq!(
769            info.cidfont_subtype,
770            Some(CIDFontSubtype::Type2),
771            "Should detect CIDFontType2"
772        );
773
774        // Verify FontDescriptor was resolved
775        assert!(info.font_descriptor.is_some(), "Should have FontDescriptor");
776
777        // Verify font stream was resolved
778        assert!(info.font_stream.is_some(), "Should have font stream");
779        assert_eq!(
780            info.font_file_type,
781            Some(FontFileType::TrueType),
782            "Should be TrueType"
783        );
784
785        // Verify ToUnicode was resolved
786        assert!(
787            info.tounicode_stream.is_some(),
788            "Should have ToUnicode stream"
789        );
790    }
791
792    #[test]
793    fn test_resolve_type0_hierarchy_missing_cidfont() {
794        use super::resolve_type0_hierarchy;
795
796        let type0_dict = create_test_type0_dict();
797        let store: HashMap<ObjectId, Object> = HashMap::new(); // Empty store
798
799        let resolver = |id: ObjectId| -> Option<Object> { store.get(&id).cloned() };
800
801        let result = resolve_type0_hierarchy(&type0_dict, resolver);
802        assert!(
803            result.is_some(),
804            "Should return partial info even with missing refs"
805        );
806
807        let info = result.unwrap();
808        assert!(info.cidfont_dict.is_none(), "CIDFont should be None");
809        assert!(
810            info.font_descriptor.is_none(),
811            "FontDescriptor should be None"
812        );
813        assert!(info.font_stream.is_none(), "Font stream should be None");
814    }
815
816    #[test]
817    fn test_resolve_type0_hierarchy_partial_chain() {
818        use super::resolve_type0_hierarchy;
819
820        let type0_dict = create_test_type0_dict();
821        let mut store = HashMap::new();
822
823        // Only add CIDFont, not FontDescriptor
824        let mut cidfont = Dictionary::new();
825        cidfont.set("Subtype", Name::new("CIDFontType2"));
826        cidfont.set("FontDescriptor", Object::Reference(ObjectId::new(16, 0)));
827        store.insert(ObjectId::new(15, 0), Object::Dictionary(cidfont));
828
829        let resolver = |id: ObjectId| -> Option<Object> { store.get(&id).cloned() };
830
831        let result = resolve_type0_hierarchy(&type0_dict, resolver);
832        assert!(result.is_some());
833
834        let info = result.unwrap();
835        assert!(info.cidfont_dict.is_some(), "CIDFont should be resolved");
836        assert_eq!(info.cidfont_subtype, Some(CIDFontSubtype::Type2));
837        assert!(
838            info.font_descriptor.is_none(),
839            "FontDescriptor should be None"
840        );
841        assert!(info.font_stream.is_none(), "Font stream should be None");
842    }
843
844    #[test]
845    fn test_resolve_type0_hierarchy_cff_font() {
846        use super::resolve_type0_hierarchy;
847
848        let type0_dict = create_test_type0_dict();
849        let mut store = HashMap::new();
850
851        // CIDFont with CFF outlines
852        let mut cidfont = Dictionary::new();
853        cidfont.set("Subtype", Name::new("CIDFontType0"));
854        cidfont.set("FontDescriptor", Object::Reference(ObjectId::new(16, 0)));
855        store.insert(ObjectId::new(15, 0), Object::Dictionary(cidfont));
856
857        // FontDescriptor with FontFile3 (CFF)
858        let mut descriptor = Dictionary::new();
859        descriptor.set("FontFile3", Object::Reference(ObjectId::new(17, 0)));
860        store.insert(ObjectId::new(16, 0), Object::Dictionary(descriptor));
861
862        // CFF font stream
863        let cff_stream = Stream::new(Dictionary::new(), vec![0x01, 0x00, 0x04, 0x00]);
864        store.insert(ObjectId::new(17, 0), Object::Stream(cff_stream));
865
866        let resolver = |id: ObjectId| -> Option<Object> { store.get(&id).cloned() };
867
868        let result = resolve_type0_hierarchy(&type0_dict, resolver);
869        assert!(result.is_some());
870
871        let info = result.unwrap();
872        assert_eq!(info.cidfont_subtype, Some(CIDFontSubtype::Type0));
873        assert_eq!(info.font_file_type, Some(FontFileType::CFF));
874        assert!(info.font_stream.is_some());
875    }
876
877    #[test]
878    fn test_resolve_type0_hierarchy_not_type0_font() {
879        use super::resolve_type0_hierarchy;
880
881        // Type1 font (not Type0)
882        let mut type1_dict = Dictionary::new();
883        type1_dict.set("Subtype", Name::new("Type1"));
884        type1_dict.set("BaseFont", Name::new("Helvetica"));
885
886        let store = create_mock_object_store();
887        let resolver = |id: ObjectId| -> Option<Object> { store.get(&id).cloned() };
888
889        let result = resolve_type0_hierarchy(&type1_dict, resolver);
890        assert!(result.is_none(), "Should return None for non-Type0 font");
891    }
892
893    // =========================================================================
894    // Phase 6 Tests: Edge Cases
895    // =========================================================================
896
897    #[test]
898    fn test_multiple_descendant_fonts() {
899        // Malformed PDF with multiple descendants (ISO 32000-1 says only one)
900        let mut dict = Dictionary::new();
901        dict.set("Subtype", Name::new("Type0"));
902
903        let mut arr = Array::new();
904        arr.push(Object::Reference(ObjectId::new(10, 0)));
905        arr.push(Object::Reference(ObjectId::new(11, 0)));
906        arr.push(Object::Reference(ObjectId::new(12, 0)));
907        dict.set("DescendantFonts", Object::Array(arr));
908
909        // Should extract all references (graceful handling of malformed PDFs)
910        let result = extract_descendant_fonts_ref(&dict);
911        assert!(result.is_some());
912        let refs = result.unwrap();
913        assert_eq!(refs.len(), 3, "Should extract all descendant refs");
914    }
915
916    #[test]
917    fn test_descendant_fonts_with_inline_dict() {
918        // Some malformed PDFs might have inline dicts instead of references
919        let mut dict = Dictionary::new();
920        dict.set("Subtype", Name::new("Type0"));
921
922        let mut arr = Array::new();
923        // Inline dictionary instead of reference
924        let mut inline_cidfont = Dictionary::new();
925        inline_cidfont.set("Subtype", Name::new("CIDFontType2"));
926        arr.push(Object::Dictionary(inline_cidfont));
927        dict.set("DescendantFonts", Object::Array(arr));
928
929        // Should return None because we only extract references
930        let result = extract_descendant_fonts_ref(&dict);
931        assert!(
932            result.is_none(),
933            "Inline dicts should not be extracted as refs"
934        );
935    }
936
937    #[test]
938    fn test_wrong_object_type_in_subtype() {
939        let mut dict = Dictionary::new();
940        // Subtype is Integer instead of Name
941        dict.set("Subtype", Object::Integer(0));
942
943        assert!(!detect_type0_font(&dict));
944        assert!(detect_cidfont_subtype(&dict).is_none());
945    }
946
947    #[test]
948    fn test_descendant_fonts_wrong_type() {
949        let mut dict = Dictionary::new();
950        dict.set("Subtype", Name::new("Type0"));
951        // DescendantFonts is Integer instead of Array or Reference
952        dict.set("DescendantFonts", Object::Integer(15));
953
954        assert!(extract_descendant_fonts_ref(&dict).is_none());
955    }
956
957    #[test]
958    fn test_resolver_returns_wrong_object_type() {
959        use super::resolve_type0_hierarchy;
960
961        let type0_dict = create_test_type0_dict();
962        let mut store = HashMap::new();
963
964        // CIDFont reference points to Integer instead of Dictionary
965        store.insert(ObjectId::new(15, 0), Object::Integer(42));
966
967        let resolver = |id: ObjectId| -> Option<Object> { store.get(&id).cloned() };
968
969        let result = resolve_type0_hierarchy(&type0_dict, resolver);
970        assert!(result.is_some()); // Should return partial info
971
972        let info = result.unwrap();
973        assert!(
974            info.cidfont_dict.is_none(),
975            "CIDFont should be None when resolved to wrong type"
976        );
977    }
978
979    #[test]
980    fn test_font_descriptor_returns_wrong_type() {
981        use super::resolve_type0_hierarchy;
982
983        let type0_dict = create_test_type0_dict();
984        let mut store = HashMap::new();
985
986        // Valid CIDFont
987        let mut cidfont = Dictionary::new();
988        cidfont.set("Subtype", Name::new("CIDFontType2"));
989        cidfont.set("FontDescriptor", Object::Reference(ObjectId::new(16, 0)));
990        store.insert(ObjectId::new(15, 0), Object::Dictionary(cidfont));
991
992        // FontDescriptor reference points to Stream instead of Dictionary
993        let stream = Stream::new(Dictionary::new(), vec![0x00]);
994        store.insert(ObjectId::new(16, 0), Object::Stream(stream));
995
996        let resolver = |id: ObjectId| -> Option<Object> { store.get(&id).cloned() };
997
998        let result = resolve_type0_hierarchy(&type0_dict, resolver);
999        assert!(result.is_some());
1000
1001        let info = result.unwrap();
1002        assert!(info.cidfont_dict.is_some(), "CIDFont should be resolved");
1003        assert!(
1004            info.font_descriptor.is_none(),
1005            "FontDescriptor should be None when wrong type"
1006        );
1007    }
1008
1009    #[test]
1010    fn test_font_file_returns_wrong_type() {
1011        use super::resolve_type0_hierarchy;
1012
1013        let type0_dict = create_test_type0_dict();
1014        let mut store = HashMap::new();
1015
1016        // Valid CIDFont
1017        let mut cidfont = Dictionary::new();
1018        cidfont.set("Subtype", Name::new("CIDFontType2"));
1019        cidfont.set("FontDescriptor", Object::Reference(ObjectId::new(16, 0)));
1020        store.insert(ObjectId::new(15, 0), Object::Dictionary(cidfont));
1021
1022        // Valid FontDescriptor
1023        let mut descriptor = Dictionary::new();
1024        descriptor.set("FontFile2", Object::Reference(ObjectId::new(17, 0)));
1025        store.insert(ObjectId::new(16, 0), Object::Dictionary(descriptor));
1026
1027        // FontFile2 reference points to Dictionary instead of Stream
1028        let wrong_dict = Dictionary::new();
1029        store.insert(ObjectId::new(17, 0), Object::Dictionary(wrong_dict));
1030
1031        let resolver = |id: ObjectId| -> Option<Object> { store.get(&id).cloned() };
1032
1033        let result = resolve_type0_hierarchy(&type0_dict, resolver);
1034        assert!(result.is_some());
1035
1036        let info = result.unwrap();
1037        assert!(info.font_descriptor.is_some());
1038        assert!(
1039            info.font_stream.is_none(),
1040            "Font stream should be None when wrong type"
1041        );
1042        assert!(info.font_file_type.is_none());
1043    }
1044
1045    #[test]
1046    fn test_tounicode_returns_wrong_type() {
1047        use super::resolve_type0_hierarchy;
1048
1049        let type0_dict = create_test_type0_dict();
1050        let mut store = HashMap::new();
1051
1052        // ToUnicode reference points to Dictionary instead of Stream
1053        let wrong_dict = Dictionary::new();
1054        store.insert(ObjectId::new(20, 0), Object::Dictionary(wrong_dict));
1055
1056        let resolver = |id: ObjectId| -> Option<Object> { store.get(&id).cloned() };
1057
1058        let result = resolve_type0_hierarchy(&type0_dict, resolver);
1059        assert!(result.is_some());
1060
1061        let info = result.unwrap();
1062        assert!(
1063            info.tounicode_stream.is_none(),
1064            "ToUnicode should be None when wrong type"
1065        );
1066    }
1067
1068    #[test]
1069    fn test_cidfont_subtype_clone_copy() {
1070        let subtype = CIDFontSubtype::Type2;
1071        let cloned = subtype;
1072        assert_eq!(subtype, cloned);
1073
1074        let subtype2 = CIDFontSubtype::Type0;
1075        assert_ne!(subtype, subtype2);
1076    }
1077
1078    #[test]
1079    fn test_font_file_type_clone_copy() {
1080        let file_type = FontFileType::TrueType;
1081        let cloned = file_type;
1082        assert_eq!(file_type, cloned);
1083    }
1084
1085    #[test]
1086    fn test_type0_font_info_clone() {
1087        let mut dict = Dictionary::new();
1088        dict.set("Subtype", Name::new("Type0"));
1089
1090        let info = Type0FontInfo::new(dict);
1091        let cloned = info.clone();
1092
1093        assert!(!cloned.has_embedded_font());
1094        assert!(!cloned.has_tounicode());
1095    }
1096
1097    #[test]
1098    fn test_empty_descendant_array_edge_case() {
1099        let mut dict = Dictionary::new();
1100        dict.set("Subtype", Name::new("Type0"));
1101        dict.set("DescendantFonts", Object::Array(Array::new()));
1102
1103        assert!(extract_descendant_fonts_ref(&dict).is_none());
1104    }
1105
1106    #[test]
1107    fn test_mixed_array_with_refs_and_other_types() {
1108        let mut dict = Dictionary::new();
1109        dict.set("Subtype", Name::new("Type0"));
1110
1111        let mut arr = Array::new();
1112        arr.push(Object::Reference(ObjectId::new(10, 0)));
1113        arr.push(Object::Integer(42)); // Not a reference
1114        arr.push(Object::Reference(ObjectId::new(11, 0)));
1115        arr.push(Object::Name(Name::new("SomeName"))); // Not a reference
1116        dict.set("DescendantFonts", Object::Array(arr));
1117
1118        // Should only extract the references
1119        let result = extract_descendant_fonts_ref(&dict);
1120        assert!(result.is_some());
1121        let refs = result.unwrap();
1122        assert_eq!(refs.len(), 2, "Should only extract valid references");
1123        assert_eq!(refs[0], ObjectId::new(10, 0));
1124        assert_eq!(refs[1], ObjectId::new(11, 0));
1125    }
1126}