Skip to main content

oxidize_pdf/text/fonts/
embedding.rs

1//! Font embedding for PDF generation according to ISO 32000-1 Section 9.8
2//!
3//! This module provides complete font embedding capabilities including:
4//! - TrueType font embedding with subsetting
5//! - Font descriptor generation  
6//! - Character encoding mappings
7//! - CID font support for complex scripts
8
9use crate::error::{PdfError, Result};
10use crate::objects::{Dictionary, Object, ObjectId};
11use crate::text::fonts::truetype::TrueTypeFont;
12use std::collections::{HashMap, HashSet};
13
14/// Font type enumeration for embedding
15#[derive(Debug, Clone, Copy, PartialEq)]
16pub enum FontType {
17    /// TrueType font
18    TrueType,
19    /// Type 0 font (composite/CID)
20    Type0,
21}
22
23/// Font encoding types for embedding
24#[derive(Debug, Clone, PartialEq)]
25pub enum FontEncoding {
26    /// Standard encoding
27    StandardEncoding,
28    /// MacRoman encoding
29    MacRomanEncoding,
30    /// WinAnsi encoding
31    WinAnsiEncoding,
32    /// Custom encoding with differences
33    Custom(Vec<EncodingDifference>),
34    /// Identity encoding for CID fonts
35    Identity,
36}
37
38/// CJK font types for proper CIDSystemInfo configuration
39#[derive(Debug, Clone, Copy, PartialEq)]
40pub enum CjkFontType {
41    /// Chinese Simplified (Source Han Sans SC, Noto Sans CJK SC, etc.)
42    ChineseSimplified,
43    /// Chinese Traditional (Source Han Sans TC, Noto Sans CJK TC, etc.)
44    ChineseTraditional,
45    /// Japanese (Source Han Sans JP, Noto Sans CJK JP, etc.)
46    Japanese,
47    /// Korean (Source Han Sans KR, Noto Sans CJK KR, etc.)
48    Korean,
49    /// Generic CJK (fallback)
50    Generic,
51}
52
53impl CjkFontType {
54    /// Get the appropriate CIDSystemInfo values for this font type
55    pub fn cid_system_info(&self) -> (&'static str, &'static str, i32) {
56        match self {
57            CjkFontType::ChineseSimplified => ("Adobe", "Identity", 0), // Use Identity for multi-script fonts
58            CjkFontType::ChineseTraditional => ("Adobe", "Identity", 0), // Use Identity for multi-script fonts
59            CjkFontType::Japanese => ("Adobe", "Identity", 0), // Use Identity for multi-script fonts
60            CjkFontType::Korean => ("Adobe", "Identity", 0), // Use Identity for multi-script fonts
61            CjkFontType::Generic => ("Adobe", "Identity", 0),
62        }
63    }
64
65    /// Detect CJK font type from font name
66    pub fn detect_from_name(font_name: &str) -> Option<Self> {
67        let name_lower = font_name.to_lowercase();
68
69        // Source Han Sans detection (various naming patterns)
70        if name_lower.contains("sourcehansans")
71            || name_lower.contains("source han sans")
72            || name_lower.contains("hansans")
73            || name_lower.contains("han sans")
74            || name_lower.contains("sourcehan")
75            || name_lower.contains("source han")
76        {
77            if name_lower.contains("sc") || name_lower.contains("simplifiedchinese") {
78                return Some(CjkFontType::ChineseSimplified);
79            }
80            if name_lower.contains("tc") || name_lower.contains("traditionalchinese") {
81                return Some(CjkFontType::ChineseTraditional);
82            }
83            if name_lower.contains("jp") || name_lower.contains("japanese") {
84                return Some(CjkFontType::Japanese);
85            }
86            if name_lower.contains("kr") || name_lower.contains("korean") {
87                return Some(CjkFontType::Korean);
88            }
89        }
90
91        // Noto Sans CJK detection
92        if name_lower.contains("notosanscjk") || name_lower.contains("noto sans cjk") {
93            if name_lower.contains("sc") {
94                return Some(CjkFontType::ChineseSimplified);
95            }
96            if name_lower.contains("tc") {
97                return Some(CjkFontType::ChineseTraditional);
98            }
99            if name_lower.contains("jp") {
100                return Some(CjkFontType::Japanese);
101            }
102            if name_lower.contains("kr") {
103                return Some(CjkFontType::Korean);
104            }
105        }
106
107        // Generic patterns
108        if name_lower.contains("chinese") || name_lower.contains("zh") || name_lower.contains("gb")
109        {
110            if name_lower.contains("traditional")
111                || name_lower.contains("tw")
112                || name_lower.contains("hk")
113            {
114                return Some(CjkFontType::ChineseTraditional);
115            }
116            return Some(CjkFontType::ChineseSimplified);
117        }
118
119        if name_lower.contains("japanese")
120            || name_lower.contains("jp")
121            || name_lower.contains("japan")
122        {
123            return Some(CjkFontType::Japanese);
124        }
125
126        if name_lower.contains("korean")
127            || name_lower.contains("kr")
128            || name_lower.contains("korea")
129        {
130            return Some(CjkFontType::Korean);
131        }
132
133        None
134    }
135
136    /// Determine if a CID font should use CIDFontType2 subtype.
137    ///
138    /// Per ISO 32000-1 §9.7.4:
139    /// - CIDFontType0: CFF/OpenType fonts (PostScript outlines)
140    /// - CIDFontType2: TrueType fonts (TrueType outlines)
141    ///
142    /// Using the wrong subtype causes PDF viewers to fail to render glyphs.
143    /// A CFF font declared as CIDFontType2 is structurally invalid.
144    ///
145    /// # Arguments
146    /// * `is_cff` - Whether the font has CFF outlines (OpenType/CFF)
147    ///
148    /// # Returns
149    /// * `true` for TrueType fonts → CIDFontType2
150    /// * `false` for CFF/OpenType fonts → CIDFontType0
151    pub fn should_use_cidfonttype2(is_cff: bool) -> bool {
152        !is_cff
153    }
154}
155
156/// Encoding difference entry
157#[derive(Debug, Clone, PartialEq)]
158pub struct EncodingDifference {
159    /// Starting character code
160    pub code: u8,
161    /// Glyph names for consecutive character codes
162    pub names: Vec<String>,
163}
164
165/// Font flags for font descriptor
166#[derive(Debug, Clone, Copy, Default)]
167pub struct FontFlags {
168    /// All glyphs have the same width
169    pub fixed_pitch: bool,
170    /// Glyphs have serifs
171    pub serif: bool,
172    /// Font uses symbolic character set
173    pub symbolic: bool,
174    /// Font is a script font
175    pub script: bool,
176    /// Font uses Adobe standard Latin character set
177    pub non_symbolic: bool,
178    /// Glyphs resemble cursive handwriting
179    pub italic: bool,
180    /// All glyphs have dominant vertical strokes
181    pub all_cap: bool,
182    /// Font is a small-cap font
183    pub small_cap: bool,
184    /// Font weight is bold or black
185    pub force_bold: bool,
186}
187
188impl FontFlags {
189    /// Convert to PDF font flags integer
190    pub fn to_flags(&self) -> u32 {
191        let mut flags = 0u32;
192        if self.fixed_pitch {
193            flags |= 1 << 0;
194        }
195        if self.serif {
196            flags |= 1 << 1;
197        }
198        if self.symbolic {
199            flags |= 1 << 2;
200        }
201        if self.script {
202            flags |= 1 << 3;
203        }
204        if self.non_symbolic {
205            flags |= 1 << 5;
206        }
207        if self.italic {
208            flags |= 1 << 6;
209        }
210        if self.all_cap {
211            flags |= 1 << 16;
212        }
213        if self.small_cap {
214            flags |= 1 << 17;
215        }
216        if self.force_bold {
217            flags |= 1 << 18;
218        }
219        flags
220    }
221}
222
223/// Font descriptor for PDF embedding
224#[derive(Debug, Clone)]
225pub struct FontDescriptor {
226    /// Font name
227    pub font_name: String,
228    /// Font flags
229    pub flags: FontFlags,
230    /// Font bounding box [llx, lly, urx, ury]
231    pub bbox: [i32; 4],
232    /// Italic angle in degrees
233    pub italic_angle: f64,
234    /// Maximum height above baseline
235    pub ascent: i32,
236    /// Maximum depth below baseline (negative)
237    pub descent: i32,
238    /// Height of capital letters
239    pub cap_height: i32,
240    /// Thickness of dominant vertical stems
241    pub stem_v: i32,
242    /// Thickness of dominant horizontal stems
243    pub stem_h: i32,
244    /// Average character width
245    pub avg_width: i32,
246    /// Maximum character width
247    pub max_width: i32,
248    /// Width for missing characters
249    pub missing_width: i32,
250    /// Font file reference (if embedded)
251    pub font_file: Option<ObjectId>,
252}
253
254/// Font metrics for embedded fonts
255#[derive(Debug, Clone)]
256pub struct FontMetrics {
257    /// Maximum height above baseline
258    pub ascent: i32,
259    /// Maximum depth below baseline (negative)
260    pub descent: i32,
261    /// Height of capital letters
262    pub cap_height: i32,
263    /// Height of lowercase letters
264    pub x_height: i32,
265    /// Thickness of dominant vertical stems
266    pub stem_v: i32,
267    /// Thickness of dominant horizontal stems
268    pub stem_h: i32,
269    /// Average character width
270    pub avg_width: i32,
271    /// Maximum character width
272    pub max_width: i32,
273    /// Width for missing characters
274    pub missing_width: i32,
275}
276
277/// PDF font embedding manager
278#[derive(Debug)]
279pub struct FontEmbedder {
280    /// Font data cache
281    embedded_fonts: HashMap<String, EmbeddedFontData>,
282    /// Next font ID
283    next_font_id: u32,
284}
285
286/// Embedded font data for PDF generation
287#[derive(Debug, Clone)]
288pub struct EmbeddedFontData {
289    /// Font name in PDF
290    pub pdf_name: String,
291    /// Font type
292    pub font_type: FontType,
293    /// Font descriptor object
294    pub descriptor: FontDescriptor,
295    /// Font program data (subset or full)
296    pub font_program: Vec<u8>,
297    /// Character mappings
298    pub encoding: FontEncoding,
299    /// Font metrics
300    pub metrics: FontMetrics,
301    /// Subset glyph set (if subsetted)
302    pub subset_glyphs: Option<HashSet<u16>>,
303    /// Unicode mappings for ToUnicode CMap
304    pub unicode_mappings: HashMap<u16, String>,
305}
306
307/// Font embedding options
308#[derive(Debug, Clone)]
309pub struct EmbeddingOptions {
310    /// Whether to subset the font
311    pub subset: bool,
312    /// Maximum number of glyphs in subset
313    pub max_subset_size: Option<usize>,
314    /// Whether to compress font streams
315    pub compress_font_streams: bool,
316    /// Whether to embed font license info
317    pub embed_license_info: bool,
318}
319
320impl Default for EmbeddingOptions {
321    fn default() -> Self {
322        Self {
323            subset: true,
324            max_subset_size: Some(256),
325            compress_font_streams: true,
326            embed_license_info: false,
327        }
328    }
329}
330
331impl FontEmbedder {
332    /// Create a new font embedder
333    pub fn new() -> Self {
334        Self {
335            embedded_fonts: HashMap::new(),
336            next_font_id: 1,
337        }
338    }
339
340    /// Embed a TrueType font with optional subsetting
341    pub fn embed_truetype_font(
342        &mut self,
343        font_data: &[u8],
344        used_glyphs: &HashSet<u16>,
345        options: &EmbeddingOptions,
346    ) -> Result<String> {
347        // Parse the TrueType font
348        let font = TrueTypeFont::from_data(font_data)
349            .map_err(|e| PdfError::FontError(format!("Failed to parse font: {e}")))?;
350
351        // Generate unique font name
352        let font_name = format!("ABCDEF+Font{next_id}", next_id = self.next_font_id);
353        self.next_font_id += 1;
354
355        // Determine if we should subset
356        let should_subset =
357            options.subset && used_glyphs.len() < options.max_subset_size.unwrap_or(256);
358
359        // Create font program (subset or full)
360        let font_program = if should_subset {
361            font.create_subset(used_glyphs)
362                .map_err(|e| PdfError::FontError(format!("Failed to create subset: {e}")))?
363        } else {
364            font_data.to_vec()
365        };
366
367        // Extract font metrics
368        let metrics = self.extract_font_metrics(&font)?;
369
370        // Create font descriptor
371        let descriptor = self.create_font_descriptor(&font, &font_name)?;
372
373        // Create character encoding
374        let encoding = self.create_encoding_for_font(&font, used_glyphs)?;
375
376        // Create Unicode mappings for ToUnicode CMap
377        let unicode_mappings = self.create_unicode_mappings(&font, used_glyphs)?;
378
379        // Store embedded font data
380        let embedded_font = EmbeddedFontData {
381            pdf_name: font_name.clone(),
382            font_type: FontType::TrueType,
383            descriptor,
384            font_program,
385            encoding,
386            metrics,
387            subset_glyphs: if should_subset {
388                Some(used_glyphs.clone())
389            } else {
390                None
391            },
392            unicode_mappings,
393        };
394
395        self.embedded_fonts.insert(font_name.clone(), embedded_font);
396        Ok(font_name)
397    }
398
399    /// Create a Type0 (CID) font for complex scripts
400    pub fn embed_cid_font(
401        &mut self,
402        font_data: &[u8],
403        used_chars: &HashSet<u32>,
404        _cmap_name: &str,
405        options: &EmbeddingOptions,
406    ) -> Result<String> {
407        // Parse the font
408        let font = TrueTypeFont::from_data(font_data)
409            .map_err(|e| PdfError::FontError(format!("Failed to parse font: {e}")))?;
410
411        // Generate unique font name
412        let font_name = format!("ABCDEF+CIDFont{next_id}", next_id = self.next_font_id);
413        self.next_font_id += 1;
414
415        // Convert character codes to glyph indices
416        let used_glyphs = self.chars_to_glyphs(&font, used_chars)?;
417
418        // Create subset if requested
419        let font_program = if options.subset {
420            font.create_subset(&used_glyphs)
421                .map_err(|e| PdfError::FontError(format!("Failed to create subset: {e}")))?
422        } else {
423            font_data.to_vec()
424        };
425
426        // Extract metrics
427        let metrics = self.extract_font_metrics(&font)?;
428
429        // Create CID font descriptor
430        let descriptor = self.create_cid_font_descriptor(&font, &font_name)?;
431
432        // Create Identity encoding for CID fonts
433        let encoding = FontEncoding::Identity;
434
435        // Create Unicode mappings
436        let unicode_mappings = self.create_cid_unicode_mappings(&font, used_chars)?;
437
438        let embedded_font = EmbeddedFontData {
439            pdf_name: font_name.clone(),
440            font_type: FontType::Type0,
441            descriptor,
442            font_program,
443            encoding,
444            metrics,
445            subset_glyphs: Some(used_glyphs),
446            unicode_mappings,
447        };
448
449        self.embedded_fonts.insert(font_name.clone(), embedded_font);
450        Ok(font_name)
451    }
452
453    /// Generate PDF font dictionary for embedded font
454    pub fn generate_font_dictionary(&self, font_name: &str) -> Result<Dictionary> {
455        let font_data = self
456            .embedded_fonts
457            .get(font_name)
458            .ok_or_else(|| PdfError::FontError(format!("Font {font_name} not found")))?;
459
460        match font_data.font_type {
461            FontType::TrueType => self.generate_truetype_dictionary(font_data),
462            FontType::Type0 => self.generate_type0_dictionary(font_data),
463            // _ => Err(PdfError::FontError("Unsupported font type for embedding".to_string())),
464        }
465    }
466
467    /// Generate TrueType font dictionary
468    fn generate_truetype_dictionary(&self, font_data: &EmbeddedFontData) -> Result<Dictionary> {
469        let mut font_dict = Dictionary::new();
470
471        // Basic font properties
472        font_dict.set("Type", Object::Name("Font".to_string()));
473        font_dict.set("Subtype", Object::Name("TrueType".to_string()));
474        font_dict.set("BaseFont", Object::Name(font_data.pdf_name.clone()));
475
476        // Font descriptor reference (would be resolved during PDF generation)
477        font_dict.set("FontDescriptor", Object::Reference(ObjectId::new(0, 0))); // Placeholder
478
479        // Encoding
480        match &font_data.encoding {
481            FontEncoding::WinAnsiEncoding => {
482                font_dict.set("Encoding", Object::Name("WinAnsiEncoding".to_string()));
483            }
484            FontEncoding::MacRomanEncoding => {
485                font_dict.set("Encoding", Object::Name("MacRomanEncoding".to_string()));
486            }
487            FontEncoding::StandardEncoding => {
488                font_dict.set("Encoding", Object::Name("StandardEncoding".to_string()));
489            }
490            FontEncoding::Custom(differences) => {
491                let mut encoding_dict = Dictionary::new();
492                encoding_dict.set("Type", Object::Name("Encoding".to_string()));
493                encoding_dict.set("BaseEncoding", Object::Name("WinAnsiEncoding".to_string()));
494
495                // Add differences array
496                let mut diff_array = Vec::new();
497                for diff in differences {
498                    diff_array.push(Object::Integer(diff.code as i64));
499                    for name in &diff.names {
500                        diff_array.push(Object::Name(name.clone()));
501                    }
502                }
503                encoding_dict.set("Differences", Object::Array(diff_array));
504                font_dict.set("Encoding", Object::Dictionary(encoding_dict));
505            }
506            _ => {}
507        }
508
509        // First and last character codes
510        font_dict.set("FirstChar", Object::Integer(32));
511        font_dict.set("LastChar", Object::Integer(255));
512
513        // Character widths (simplified - would need actual glyph widths)
514        let widths: Vec<Object> = (32..=255)
515            .map(|_| Object::Integer(500)) // Default width
516            .collect();
517        font_dict.set("Widths", Object::Array(widths));
518
519        Ok(font_dict)
520    }
521
522    /// Generate Type0 (CID) font dictionary
523    fn generate_type0_dictionary(&self, font_data: &EmbeddedFontData) -> Result<Dictionary> {
524        let mut font_dict = Dictionary::new();
525
526        // Type0 font properties
527        font_dict.set("Type", Object::Name("Font".to_string()));
528        font_dict.set("Subtype", Object::Name("Type0".to_string()));
529        font_dict.set("BaseFont", Object::Name(font_data.pdf_name.clone()));
530
531        // Encoding (CMap)
532        font_dict.set("Encoding", Object::Name("Identity-H".to_string()));
533
534        // DescendantFonts array (would contain CIDFont reference)
535        font_dict.set(
536            "DescendantFonts",
537            Object::Array(vec![
538                Object::Reference(ObjectId::new(0, 0)), // Placeholder for CIDFont reference
539            ]),
540        );
541
542        // ToUnicode CMap reference (if needed)
543        if !font_data.unicode_mappings.is_empty() {
544            font_dict.set("ToUnicode", Object::Reference(ObjectId::new(0, 0))); // Placeholder
545        }
546
547        Ok(font_dict)
548    }
549
550    /// Generate font descriptor dictionary
551    pub fn generate_font_descriptor(&self, font_name: &str) -> Result<Dictionary> {
552        let font_data = self
553            .embedded_fonts
554            .get(font_name)
555            .ok_or_else(|| PdfError::FontError(format!("Font {font_name} not found")))?;
556
557        let mut desc_dict = Dictionary::new();
558
559        desc_dict.set("Type", Object::Name("FontDescriptor".to_string()));
560        desc_dict.set("FontName", Object::Name(font_data.pdf_name.clone()));
561
562        // Font flags
563        desc_dict.set(
564            "Flags",
565            Object::Integer(font_data.descriptor.flags.to_flags() as i64),
566        );
567
568        // Font metrics
569        desc_dict.set("Ascent", Object::Integer(font_data.metrics.ascent as i64));
570        desc_dict.set("Descent", Object::Integer(font_data.metrics.descent as i64));
571        desc_dict.set(
572            "CapHeight",
573            Object::Integer(font_data.metrics.cap_height as i64),
574        );
575        desc_dict.set(
576            "ItalicAngle",
577            Object::Real(font_data.descriptor.italic_angle),
578        );
579        desc_dict.set("StemV", Object::Integer(font_data.descriptor.stem_v as i64));
580
581        // Font bounding box
582        let bbox = vec![
583            Object::Integer(font_data.descriptor.bbox[0] as i64),
584            Object::Integer(font_data.descriptor.bbox[1] as i64),
585            Object::Integer(font_data.descriptor.bbox[2] as i64),
586            Object::Integer(font_data.descriptor.bbox[3] as i64),
587        ];
588        desc_dict.set("FontBBox", Object::Array(bbox));
589
590        // Font file reference (would be set during PDF generation)
591        match font_data.font_type {
592            FontType::TrueType => {
593                desc_dict.set("FontFile2", Object::Reference(ObjectId::new(0, 0)));
594                // Placeholder
595            }
596            FontType::Type0 => {
597                desc_dict.set("FontFile2", Object::Reference(ObjectId::new(0, 0)));
598                // Placeholder
599            }
600        }
601
602        Ok(desc_dict)
603    }
604
605    /// Generate ToUnicode CMap stream
606    pub fn generate_tounicode_cmap(&self, font_name: &str) -> Result<String> {
607        let font_data = self
608            .embedded_fonts
609            .get(font_name)
610            .ok_or_else(|| PdfError::FontError(format!("Font {font_name} not found")))?;
611
612        if font_data.unicode_mappings.is_empty() {
613            return Err(PdfError::FontError(
614                "No Unicode mappings available".to_string(),
615            ));
616        }
617
618        let mut cmap_content = String::new();
619
620        // CMap header
621        cmap_content.push_str("/CIDInit /ProcSet findresource begin\n");
622        cmap_content.push_str("12 dict begin\n");
623        cmap_content.push_str("begincmap\n");
624        cmap_content.push_str("/CIDSystemInfo\n");
625        cmap_content.push_str("<<\n");
626        cmap_content.push_str("/Registry (Adobe)\n");
627        cmap_content.push_str("/Ordering (UCS)\n");
628        cmap_content.push_str("/Supplement 0\n");
629        cmap_content.push_str(">> def\n");
630        cmap_content.push_str("/CMapName /Adobe-Identity-UCS def\n");
631        cmap_content.push_str("/CMapType 2 def\n");
632        cmap_content.push_str("1 begincodespacerange\n");
633        cmap_content.push_str("<0000> <FFFF>\n");
634        cmap_content.push_str("endcodespacerange\n");
635
636        // Unicode mappings
637        cmap_content.push_str(&format!(
638            "{} beginbfchar\n",
639            font_data.unicode_mappings.len()
640        ));
641        for (glyph_id, unicode_string) in &font_data.unicode_mappings {
642            cmap_content.push_str(&format!(
643                "<{:04X}> <{}>\n",
644                glyph_id,
645                unicode_string
646                    .chars()
647                    .map(|c| format!("{c:04X}", c = c as u32))
648                    .collect::<String>()
649            ));
650        }
651        cmap_content.push_str("endbfchar\n");
652
653        // CMap footer
654        cmap_content.push_str("endcmap\n");
655        cmap_content.push_str("CMapName currentdict /CMap defineresource pop\n");
656        cmap_content.push_str("end\n");
657        cmap_content.push_str("end\n");
658
659        Ok(cmap_content)
660    }
661
662    /// Get all embedded fonts
663    pub fn embedded_fonts(&self) -> &HashMap<String, EmbeddedFontData> {
664        &self.embedded_fonts
665    }
666
667    /// Extract font metrics from TrueType font
668    fn extract_font_metrics(&self, _font: &TrueTypeFont) -> Result<FontMetrics> {
669        // This would extract actual metrics from font tables
670        // For now, return default metrics
671        Ok(FontMetrics {
672            ascent: 750,
673            descent: -250,
674            cap_height: 700,
675            x_height: 500,
676            stem_v: 100,
677            stem_h: 50,
678            avg_width: 500,
679            max_width: 1000,
680            missing_width: 500,
681        })
682    }
683
684    /// Create font descriptor from TrueType font
685    fn create_font_descriptor(
686        &self,
687        _font: &TrueTypeFont,
688        font_name: &str,
689    ) -> Result<FontDescriptor> {
690        Ok(FontDescriptor {
691            font_name: font_name.to_string(),
692            flags: FontFlags {
693                non_symbolic: true,
694                ..Default::default()
695            },
696            bbox: [-100, -250, 1000, 750], // Default bounding box
697            italic_angle: 0.0,
698            ascent: 750,
699            descent: -250,
700            cap_height: 700,
701            stem_v: 100,
702            stem_h: 50,
703            avg_width: 500,
704            max_width: 1000,
705            missing_width: 500,
706            font_file: None,
707        })
708    }
709
710    /// Create CID font descriptor
711    fn create_cid_font_descriptor(
712        &self,
713        font: &TrueTypeFont,
714        font_name: &str,
715    ) -> Result<FontDescriptor> {
716        // Similar to create_font_descriptor but for CID fonts
717        self.create_font_descriptor(font, font_name)
718    }
719
720    /// Create encoding for font
721    fn create_encoding_for_font(
722        &self,
723        _font: &TrueTypeFont,
724        _used_glyphs: &HashSet<u16>,
725    ) -> Result<FontEncoding> {
726        // For now, return WinAnsi encoding
727        // In a full implementation, this would analyze the font and create appropriate encoding
728        Ok(FontEncoding::WinAnsiEncoding)
729    }
730
731    /// Create Unicode mappings for simple fonts
732    fn create_unicode_mappings(
733        &self,
734        _font: &TrueTypeFont,
735        used_glyphs: &HashSet<u16>,
736    ) -> Result<HashMap<u16, String>> {
737        let mut mappings = HashMap::new();
738
739        // Create basic ASCII mappings
740        for glyph_id in used_glyphs {
741            if *glyph_id < 256 {
742                let unicode_char = char::from(*glyph_id as u8);
743                if unicode_char.is_ascii_graphic() || unicode_char == ' ' {
744                    mappings.insert(*glyph_id, unicode_char.to_string());
745                }
746            }
747        }
748
749        Ok(mappings)
750    }
751
752    /// Create Unicode mappings for CID fonts
753    fn create_cid_unicode_mappings(
754        &self,
755        _font: &TrueTypeFont,
756        used_chars: &HashSet<u32>,
757    ) -> Result<HashMap<u16, String>> {
758        let mut mappings = HashMap::new();
759
760        // Convert character codes to Unicode strings
761        for &char_code in used_chars {
762            if let Some(unicode_char) = char::from_u32(char_code) {
763                // Find glyph ID for this character (simplified)
764                let glyph_id = char_code as u16; // Simplified mapping
765                mappings.insert(glyph_id, unicode_char.to_string());
766            }
767        }
768
769        Ok(mappings)
770    }
771
772    /// Convert character codes to glyph indices
773    fn chars_to_glyphs(&self, _font: &TrueTypeFont, chars: &HashSet<u32>) -> Result<HashSet<u16>> {
774        let mut glyphs = HashSet::new();
775
776        // Always include glyph 0 (missing glyph)
777        glyphs.insert(0);
778
779        // Convert characters to glyph indices using font's character map
780        for &char_code in chars {
781            // This is simplified - a real implementation would use the font's cmap table
782            let glyph_id = if char_code < 65536 {
783                char_code as u16
784            } else {
785                0 // Missing glyph for characters outside BMP
786            };
787            glyphs.insert(glyph_id);
788        }
789
790        Ok(glyphs)
791    }
792}
793
794impl Default for FontEmbedder {
795    fn default() -> Self {
796        Self::new()
797    }
798}
799
800#[cfg(test)]
801mod tests {
802    use super::*;
803
804    #[test]
805    fn test_font_embedder_creation() {
806        let embedder = FontEmbedder::new();
807        assert_eq!(embedder.embedded_fonts.len(), 0);
808        assert_eq!(embedder.next_font_id, 1);
809    }
810
811    #[test]
812    fn test_embedding_options_default() {
813        let options = EmbeddingOptions::default();
814        assert!(options.subset);
815        assert_eq!(options.max_subset_size, Some(256));
816        assert!(options.compress_font_streams);
817        assert!(!options.embed_license_info);
818    }
819
820    #[test]
821    fn test_generate_tounicode_cmap_empty() {
822        let mut embedder = FontEmbedder::new();
823
824        // Create a font with no Unicode mappings
825        let font_data = EmbeddedFontData {
826            pdf_name: "TestFont".to_string(),
827            font_type: FontType::TrueType,
828            descriptor: FontDescriptor {
829                font_name: "TestFont".to_string(),
830                flags: FontFlags::default(),
831                bbox: [0, 0, 1000, 1000],
832                italic_angle: 0.0,
833                ascent: 750,
834                descent: -250,
835                cap_height: 700,
836                stem_v: 100,
837                stem_h: 50,
838                avg_width: 500,
839                max_width: 1000,
840                missing_width: 500,
841                font_file: None,
842            },
843            font_program: vec![],
844            encoding: FontEncoding::WinAnsiEncoding,
845            metrics: FontMetrics {
846                ascent: 750,
847                descent: -250,
848                cap_height: 700,
849                x_height: 500,
850                stem_v: 100,
851                stem_h: 50,
852                avg_width: 500,
853                max_width: 1000,
854                missing_width: 500,
855            },
856            subset_glyphs: None,
857            unicode_mappings: HashMap::new(),
858        };
859
860        embedder
861            .embedded_fonts
862            .insert("TestFont".to_string(), font_data);
863
864        let result = embedder.generate_tounicode_cmap("TestFont");
865        assert!(result.is_err());
866    }
867
868    #[test]
869    fn test_generate_truetype_dictionary() {
870        let embedder = FontEmbedder::new();
871
872        let font_data = EmbeddedFontData {
873            pdf_name: "TestFont".to_string(),
874            font_type: FontType::TrueType,
875            descriptor: FontDescriptor {
876                font_name: "TestFont".to_string(),
877                flags: FontFlags::default(),
878                bbox: [0, 0, 1000, 1000],
879                italic_angle: 0.0,
880                ascent: 750,
881                descent: -250,
882                cap_height: 700,
883                stem_v: 100,
884                stem_h: 50,
885                avg_width: 500,
886                max_width: 1000,
887                missing_width: 500,
888                font_file: None,
889            },
890            font_program: vec![],
891            encoding: FontEncoding::WinAnsiEncoding,
892            metrics: FontMetrics {
893                ascent: 750,
894                descent: -250,
895                cap_height: 700,
896                x_height: 500,
897                stem_v: 100,
898                stem_h: 50,
899                avg_width: 500,
900                max_width: 1000,
901                missing_width: 500,
902            },
903            subset_glyphs: None,
904            unicode_mappings: HashMap::new(),
905        };
906
907        let dict = embedder.generate_truetype_dictionary(&font_data).unwrap();
908
909        // Verify basic font properties
910        if let Some(Object::Name(font_type)) = dict.get("Type") {
911            assert_eq!(font_type, "Font");
912        }
913        if let Some(Object::Name(subtype)) = dict.get("Subtype") {
914            assert_eq!(subtype, "TrueType");
915        }
916        if let Some(Object::Name(base_font)) = dict.get("BaseFont") {
917            assert_eq!(base_font, "TestFont");
918        }
919    }
920
921    #[test]
922    fn test_generate_type0_dictionary() {
923        let embedder = FontEmbedder::new();
924
925        let font_data = EmbeddedFontData {
926            pdf_name: "TestCIDFont".to_string(),
927            font_type: FontType::Type0,
928            descriptor: FontDescriptor {
929                font_name: "TestCIDFont".to_string(),
930                flags: FontFlags::default(),
931                bbox: [0, 0, 1000, 1000],
932                italic_angle: 0.0,
933                ascent: 750,
934                descent: -250,
935                cap_height: 700,
936                stem_v: 100,
937                stem_h: 50,
938                avg_width: 500,
939                max_width: 1000,
940                missing_width: 500,
941                font_file: None,
942            },
943            font_program: vec![],
944            encoding: FontEncoding::Identity,
945            metrics: FontMetrics {
946                ascent: 750,
947                descent: -250,
948                cap_height: 700,
949                x_height: 500,
950                stem_v: 100,
951                stem_h: 50,
952                avg_width: 500,
953                max_width: 1000,
954                missing_width: 500,
955            },
956            subset_glyphs: None,
957            unicode_mappings: HashMap::new(),
958        };
959
960        let dict = embedder.generate_type0_dictionary(&font_data).unwrap();
961
962        // Verify Type0 font properties
963        if let Some(Object::Name(subtype)) = dict.get("Subtype") {
964            assert_eq!(subtype, "Type0");
965        }
966        if let Some(Object::Name(encoding)) = dict.get("Encoding") {
967            assert_eq!(encoding, "Identity-H");
968        }
969        if let Some(Object::Array(descendant_fonts)) = dict.get("DescendantFonts") {
970            assert_eq!(descendant_fonts.len(), 1);
971        }
972    }
973
974    #[test]
975    fn test_chars_to_glyphs_conversion() {
976        let _embedder = FontEmbedder::new();
977        let _font_data = vec![0; 100]; // Dummy font data
978
979        // This would fail in real implementation due to invalid font data
980        // but tests the function structure
981        let chars: HashSet<u32> = [65, 66, 67].iter().cloned().collect(); // A, B, C
982
983        // Test would require valid font data to complete
984        // For now, test that the function exists and compiles
985        assert!(chars.len() == 3);
986    }
987
988    #[test]
989    fn test_unicode_mappings_creation() {
990        let _embedder = FontEmbedder::new();
991        let glyphs: HashSet<u16> = [65, 66, 67].iter().cloned().collect();
992
993        // Create dummy font for testing
994        let _font_data = vec![0; 100];
995
996        // Test would require valid TrueType font parsing
997        // For now, verify function signature
998        assert!(glyphs.len() == 3);
999    }
1000
1001    #[test]
1002    fn test_font_descriptor_generation() {
1003        let _embedder = FontEmbedder::new();
1004
1005        let font_data = EmbeddedFontData {
1006            pdf_name: "TestFont".to_string(),
1007            font_type: FontType::TrueType,
1008            descriptor: FontDescriptor {
1009                font_name: "TestFont".to_string(),
1010                flags: FontFlags {
1011                    non_symbolic: true,
1012                    serif: true,
1013                    ..Default::default()
1014                },
1015                bbox: [-100, -250, 1000, 750],
1016                italic_angle: 0.0,
1017                ascent: 750,
1018                descent: -250,
1019                cap_height: 700,
1020                stem_v: 100,
1021                stem_h: 50,
1022                avg_width: 500,
1023                max_width: 1000,
1024                missing_width: 500,
1025                font_file: None,
1026            },
1027            font_program: vec![],
1028            encoding: FontEncoding::WinAnsiEncoding,
1029            metrics: FontMetrics {
1030                ascent: 750,
1031                descent: -250,
1032                cap_height: 700,
1033                x_height: 500,
1034                stem_v: 100,
1035                stem_h: 50,
1036                avg_width: 500,
1037                max_width: 1000,
1038                missing_width: 500,
1039            },
1040            subset_glyphs: None,
1041            unicode_mappings: HashMap::new(),
1042        };
1043
1044        let mut embedder_with_font = FontEmbedder::new();
1045        embedder_with_font
1046            .embedded_fonts
1047            .insert("TestFont".to_string(), font_data);
1048
1049        let desc_dict = embedder_with_font
1050            .generate_font_descriptor("TestFont")
1051            .unwrap();
1052
1053        // Verify font descriptor properties
1054        if let Some(Object::Name(font_name)) = desc_dict.get("FontName") {
1055            assert_eq!(font_name, "TestFont");
1056        }
1057        if let Some(Object::Integer(flags)) = desc_dict.get("Flags") {
1058            assert!(*flags > 0); // Should have some flags set
1059        }
1060        if let Some(Object::Array(bbox)) = desc_dict.get("FontBBox") {
1061            assert_eq!(bbox.len(), 4);
1062        }
1063    }
1064
1065    // =========================================================================
1066    // CIDFontType selection tests (Issue #165)
1067    // =========================================================================
1068
1069    #[test]
1070    fn test_cff_font_uses_cidfonttype0() {
1071        // CFF/OpenType fonts MUST use CIDFontType0 per ISO 32000-1 §9.7.4
1072        assert!(
1073            !CjkFontType::should_use_cidfonttype2(true),
1074            "CFF → CIDFontType0"
1075        );
1076    }
1077
1078    #[test]
1079    fn test_truetype_font_uses_cidfonttype2() {
1080        // TrueType fonts MUST use CIDFontType2 per ISO 32000-1 §9.7.4
1081        assert!(
1082            CjkFontType::should_use_cidfonttype2(false),
1083            "TrueType → CIDFontType2"
1084        );
1085    }
1086}