Skip to main content

oxidize_pdf/text/fonts/
embedding.rs

1//! Font embedding for PDF generation according to ISO 32000-1 Section 9.8
2//!
3//! This module provides complete font embedding capabilities including:
4//! - TrueType font embedding with subsetting
5//! - Font descriptor generation  
6//! - Character encoding mappings
7//! - CID font support for complex scripts
8
9use crate::error::{PdfError, Result};
10use crate::objects::{Dictionary, Object, ObjectId};
11use crate::text::fonts::truetype::TrueTypeFont;
12use std::collections::{HashMap, HashSet};
13
14/// Font type enumeration for embedding
15#[derive(Debug, Clone, Copy, PartialEq)]
16pub enum FontType {
17    /// TrueType font
18    TrueType,
19    /// Type 0 font (composite/CID)
20    Type0,
21}
22
23/// Font encoding types for embedding
24#[derive(Debug, Clone, PartialEq)]
25pub enum FontEncoding {
26    /// Standard encoding
27    StandardEncoding,
28    /// MacRoman encoding
29    MacRomanEncoding,
30    /// WinAnsi encoding
31    WinAnsiEncoding,
32    /// Custom encoding with differences
33    Custom(Vec<EncodingDifference>),
34    /// Identity encoding for CID fonts
35    Identity,
36}
37
38/// CJK font types for proper CIDSystemInfo configuration
39#[derive(Debug, Clone, Copy, PartialEq)]
40pub enum CjkFontType {
41    /// Chinese Simplified (Source Han Sans SC, Noto Sans CJK SC, etc.)
42    ChineseSimplified,
43    /// Chinese Traditional (Source Han Sans TC, Noto Sans CJK TC, etc.)
44    ChineseTraditional,
45    /// Japanese (Source Han Sans JP, Noto Sans CJK JP, etc.)
46    Japanese,
47    /// Korean (Source Han Sans KR, Noto Sans CJK KR, etc.)
48    Korean,
49    /// Generic CJK (fallback)
50    Generic,
51}
52
53impl CjkFontType {
54    /// Get the appropriate CIDSystemInfo values for this font type
55    pub fn cid_system_info(&self) -> (&'static str, &'static str, i32) {
56        match self {
57            CjkFontType::ChineseSimplified => ("Adobe", "Identity", 0), // Use Identity for multi-script fonts
58            CjkFontType::ChineseTraditional => ("Adobe", "Identity", 0), // Use Identity for multi-script fonts
59            CjkFontType::Japanese => ("Adobe", "Identity", 0), // Use Identity for multi-script fonts
60            CjkFontType::Korean => ("Adobe", "Identity", 0), // Use Identity for multi-script fonts
61            CjkFontType::Generic => ("Adobe", "Identity", 0),
62        }
63    }
64
65    /// Detect CJK font type from font name
66    pub fn detect_from_name(font_name: &str) -> Option<Self> {
67        let name_lower = font_name.to_lowercase();
68
69        // Source Han Sans detection (various naming patterns)
70        if name_lower.contains("sourcehansans")
71            || name_lower.contains("source han sans")
72            || name_lower.contains("hansans")
73            || name_lower.contains("han sans")
74            || name_lower.contains("sourcehan")
75            || name_lower.contains("source han")
76        {
77            if name_lower.contains("sc") || name_lower.contains("simplifiedchinese") {
78                return Some(CjkFontType::ChineseSimplified);
79            }
80            if name_lower.contains("tc") || name_lower.contains("traditionalchinese") {
81                return Some(CjkFontType::ChineseTraditional);
82            }
83            if name_lower.contains("jp") || name_lower.contains("japanese") {
84                return Some(CjkFontType::Japanese);
85            }
86            if name_lower.contains("kr") || name_lower.contains("korean") {
87                return Some(CjkFontType::Korean);
88            }
89        }
90
91        // Noto Sans CJK detection
92        if name_lower.contains("notosanscjk") || name_lower.contains("noto sans cjk") {
93            if name_lower.contains("sc") {
94                return Some(CjkFontType::ChineseSimplified);
95            }
96            if name_lower.contains("tc") {
97                return Some(CjkFontType::ChineseTraditional);
98            }
99            if name_lower.contains("jp") {
100                return Some(CjkFontType::Japanese);
101            }
102            if name_lower.contains("kr") {
103                return Some(CjkFontType::Korean);
104            }
105        }
106
107        // Generic patterns
108        if name_lower.contains("chinese") || name_lower.contains("zh") || name_lower.contains("gb")
109        {
110            if name_lower.contains("traditional")
111                || name_lower.contains("tw")
112                || name_lower.contains("hk")
113            {
114                return Some(CjkFontType::ChineseTraditional);
115            }
116            return Some(CjkFontType::ChineseSimplified);
117        }
118
119        if name_lower.contains("japanese")
120            || name_lower.contains("jp")
121            || name_lower.contains("japan")
122        {
123            return Some(CjkFontType::Japanese);
124        }
125
126        if name_lower.contains("korean")
127            || name_lower.contains("kr")
128            || name_lower.contains("korea")
129        {
130            return Some(CjkFontType::Korean);
131        }
132
133        None
134    }
135}
136
137/// Encoding difference entry
138#[derive(Debug, Clone, PartialEq)]
139pub struct EncodingDifference {
140    /// Starting character code
141    pub code: u8,
142    /// Glyph names for consecutive character codes
143    pub names: Vec<String>,
144}
145
146/// Font flags for font descriptor
147#[derive(Debug, Clone, Copy, Default)]
148pub struct FontFlags {
149    /// All glyphs have the same width
150    pub fixed_pitch: bool,
151    /// Glyphs have serifs
152    pub serif: bool,
153    /// Font uses symbolic character set
154    pub symbolic: bool,
155    /// Font is a script font
156    pub script: bool,
157    /// Font uses Adobe standard Latin character set
158    pub non_symbolic: bool,
159    /// Glyphs resemble cursive handwriting
160    pub italic: bool,
161    /// All glyphs have dominant vertical strokes
162    pub all_cap: bool,
163    /// Font is a small-cap font
164    pub small_cap: bool,
165    /// Font weight is bold or black
166    pub force_bold: bool,
167}
168
169impl FontFlags {
170    /// Convert to PDF font flags integer
171    pub fn to_flags(&self) -> u32 {
172        let mut flags = 0u32;
173        if self.fixed_pitch {
174            flags |= 1 << 0;
175        }
176        if self.serif {
177            flags |= 1 << 1;
178        }
179        if self.symbolic {
180            flags |= 1 << 2;
181        }
182        if self.script {
183            flags |= 1 << 3;
184        }
185        if self.non_symbolic {
186            flags |= 1 << 5;
187        }
188        if self.italic {
189            flags |= 1 << 6;
190        }
191        if self.all_cap {
192            flags |= 1 << 16;
193        }
194        if self.small_cap {
195            flags |= 1 << 17;
196        }
197        if self.force_bold {
198            flags |= 1 << 18;
199        }
200        flags
201    }
202}
203
204/// Font descriptor for PDF embedding
205#[derive(Debug, Clone)]
206pub struct FontDescriptor {
207    /// Font name
208    pub font_name: String,
209    /// Font flags
210    pub flags: FontFlags,
211    /// Font bounding box [llx, lly, urx, ury]
212    pub bbox: [i32; 4],
213    /// Italic angle in degrees
214    pub italic_angle: f64,
215    /// Maximum height above baseline
216    pub ascent: i32,
217    /// Maximum depth below baseline (negative)
218    pub descent: i32,
219    /// Height of capital letters
220    pub cap_height: i32,
221    /// Thickness of dominant vertical stems
222    pub stem_v: i32,
223    /// Thickness of dominant horizontal stems
224    pub stem_h: i32,
225    /// Average character width
226    pub avg_width: i32,
227    /// Maximum character width
228    pub max_width: i32,
229    /// Width for missing characters
230    pub missing_width: i32,
231    /// Font file reference (if embedded)
232    pub font_file: Option<ObjectId>,
233}
234
235/// Font metrics for embedded fonts
236#[derive(Debug, Clone)]
237pub struct FontMetrics {
238    /// Maximum height above baseline
239    pub ascent: i32,
240    /// Maximum depth below baseline (negative)
241    pub descent: i32,
242    /// Height of capital letters
243    pub cap_height: i32,
244    /// Height of lowercase letters
245    pub x_height: i32,
246    /// Thickness of dominant vertical stems
247    pub stem_v: i32,
248    /// Thickness of dominant horizontal stems
249    pub stem_h: i32,
250    /// Average character width
251    pub avg_width: i32,
252    /// Maximum character width
253    pub max_width: i32,
254    /// Width for missing characters
255    pub missing_width: i32,
256}
257
258/// PDF font embedding manager
259#[derive(Debug)]
260pub struct FontEmbedder {
261    /// Font data cache
262    embedded_fonts: HashMap<String, EmbeddedFontData>,
263    /// Next font ID
264    next_font_id: u32,
265}
266
267/// Embedded font data for PDF generation
268#[derive(Debug, Clone)]
269pub struct EmbeddedFontData {
270    /// Font name in PDF
271    pub pdf_name: String,
272    /// Font type
273    pub font_type: FontType,
274    /// Font descriptor object
275    pub descriptor: FontDescriptor,
276    /// Font program data (subset or full)
277    pub font_program: Vec<u8>,
278    /// Character mappings
279    pub encoding: FontEncoding,
280    /// Font metrics
281    pub metrics: FontMetrics,
282    /// Subset glyph set (if subsetted)
283    pub subset_glyphs: Option<HashSet<u16>>,
284    /// Unicode mappings for ToUnicode CMap
285    pub unicode_mappings: HashMap<u16, String>,
286}
287
288/// Font embedding options
289#[derive(Debug, Clone)]
290pub struct EmbeddingOptions {
291    /// Whether to subset the font
292    pub subset: bool,
293    /// Maximum number of glyphs in subset
294    pub max_subset_size: Option<usize>,
295    /// Whether to compress font streams
296    pub compress_font_streams: bool,
297    /// Whether to embed font license info
298    pub embed_license_info: bool,
299}
300
301impl Default for EmbeddingOptions {
302    fn default() -> Self {
303        Self {
304            subset: true,
305            max_subset_size: Some(256),
306            compress_font_streams: true,
307            embed_license_info: false,
308        }
309    }
310}
311
312impl FontEmbedder {
313    /// Create a new font embedder
314    pub fn new() -> Self {
315        Self {
316            embedded_fonts: HashMap::new(),
317            next_font_id: 1,
318        }
319    }
320
321    /// Embed a TrueType font with optional subsetting
322    pub fn embed_truetype_font(
323        &mut self,
324        font_data: &[u8],
325        used_glyphs: &HashSet<u16>,
326        options: &EmbeddingOptions,
327    ) -> Result<String> {
328        // Parse the TrueType font
329        let font = TrueTypeFont::from_data(font_data)
330            .map_err(|e| PdfError::FontError(format!("Failed to parse font: {e}")))?;
331
332        // Generate unique font name
333        let font_name = format!("ABCDEF+Font{next_id}", next_id = self.next_font_id);
334        self.next_font_id += 1;
335
336        // Determine if we should subset
337        let should_subset =
338            options.subset && used_glyphs.len() < options.max_subset_size.unwrap_or(256);
339
340        // Create font program (subset or full)
341        let font_program = if should_subset {
342            font.create_subset(used_glyphs)
343                .map_err(|e| PdfError::FontError(format!("Failed to create subset: {e}")))?
344        } else {
345            font_data.to_vec()
346        };
347
348        // Extract font metrics
349        let metrics = self.extract_font_metrics(&font)?;
350
351        // Create font descriptor
352        let descriptor = self.create_font_descriptor(&font, &font_name)?;
353
354        // Create character encoding
355        let encoding = self.create_encoding_for_font(&font, used_glyphs)?;
356
357        // Create Unicode mappings for ToUnicode CMap
358        let unicode_mappings = self.create_unicode_mappings(&font, used_glyphs)?;
359
360        // Store embedded font data
361        let embedded_font = EmbeddedFontData {
362            pdf_name: font_name.clone(),
363            font_type: FontType::TrueType,
364            descriptor,
365            font_program,
366            encoding,
367            metrics,
368            subset_glyphs: if should_subset {
369                Some(used_glyphs.clone())
370            } else {
371                None
372            },
373            unicode_mappings,
374        };
375
376        self.embedded_fonts.insert(font_name.clone(), embedded_font);
377        Ok(font_name)
378    }
379
380    /// Create a Type0 (CID) font for complex scripts
381    pub fn embed_cid_font(
382        &mut self,
383        font_data: &[u8],
384        used_chars: &HashSet<u32>,
385        _cmap_name: &str,
386        options: &EmbeddingOptions,
387    ) -> Result<String> {
388        // Parse the font
389        let font = TrueTypeFont::from_data(font_data)
390            .map_err(|e| PdfError::FontError(format!("Failed to parse font: {e}")))?;
391
392        // Generate unique font name
393        let font_name = format!("ABCDEF+CIDFont{next_id}", next_id = self.next_font_id);
394        self.next_font_id += 1;
395
396        // Convert character codes to glyph indices
397        let used_glyphs = self.chars_to_glyphs(&font, used_chars)?;
398
399        // Create subset if requested
400        let font_program = if options.subset {
401            font.create_subset(&used_glyphs)
402                .map_err(|e| PdfError::FontError(format!("Failed to create subset: {e}")))?
403        } else {
404            font_data.to_vec()
405        };
406
407        // Extract metrics
408        let metrics = self.extract_font_metrics(&font)?;
409
410        // Create CID font descriptor
411        let descriptor = self.create_cid_font_descriptor(&font, &font_name)?;
412
413        // Create Identity encoding for CID fonts
414        let encoding = FontEncoding::Identity;
415
416        // Create Unicode mappings
417        let unicode_mappings = self.create_cid_unicode_mappings(&font, used_chars)?;
418
419        let embedded_font = EmbeddedFontData {
420            pdf_name: font_name.clone(),
421            font_type: FontType::Type0,
422            descriptor,
423            font_program,
424            encoding,
425            metrics,
426            subset_glyphs: Some(used_glyphs),
427            unicode_mappings,
428        };
429
430        self.embedded_fonts.insert(font_name.clone(), embedded_font);
431        Ok(font_name)
432    }
433
434    /// Generate PDF font dictionary for embedded font
435    pub fn generate_font_dictionary(&self, font_name: &str) -> Result<Dictionary> {
436        let font_data = self
437            .embedded_fonts
438            .get(font_name)
439            .ok_or_else(|| PdfError::FontError(format!("Font {font_name} not found")))?;
440
441        match font_data.font_type {
442            FontType::TrueType => self.generate_truetype_dictionary(font_data),
443            FontType::Type0 => self.generate_type0_dictionary(font_data),
444            // _ => Err(PdfError::FontError("Unsupported font type for embedding".to_string())),
445        }
446    }
447
448    /// Generate TrueType font dictionary
449    fn generate_truetype_dictionary(&self, font_data: &EmbeddedFontData) -> Result<Dictionary> {
450        let mut font_dict = Dictionary::new();
451
452        // Basic font properties
453        font_dict.set("Type", Object::Name("Font".to_string()));
454        font_dict.set("Subtype", Object::Name("TrueType".to_string()));
455        font_dict.set("BaseFont", Object::Name(font_data.pdf_name.clone()));
456
457        // Font descriptor reference (would be resolved during PDF generation)
458        font_dict.set("FontDescriptor", Object::Reference(ObjectId::new(0, 0))); // Placeholder
459
460        // Encoding
461        match &font_data.encoding {
462            FontEncoding::WinAnsiEncoding => {
463                font_dict.set("Encoding", Object::Name("WinAnsiEncoding".to_string()));
464            }
465            FontEncoding::MacRomanEncoding => {
466                font_dict.set("Encoding", Object::Name("MacRomanEncoding".to_string()));
467            }
468            FontEncoding::StandardEncoding => {
469                font_dict.set("Encoding", Object::Name("StandardEncoding".to_string()));
470            }
471            FontEncoding::Custom(differences) => {
472                let mut encoding_dict = Dictionary::new();
473                encoding_dict.set("Type", Object::Name("Encoding".to_string()));
474                encoding_dict.set("BaseEncoding", Object::Name("WinAnsiEncoding".to_string()));
475
476                // Add differences array
477                let mut diff_array = Vec::new();
478                for diff in differences {
479                    diff_array.push(Object::Integer(diff.code as i64));
480                    for name in &diff.names {
481                        diff_array.push(Object::Name(name.clone()));
482                    }
483                }
484                encoding_dict.set("Differences", Object::Array(diff_array));
485                font_dict.set("Encoding", Object::Dictionary(encoding_dict));
486            }
487            _ => {}
488        }
489
490        // First and last character codes
491        font_dict.set("FirstChar", Object::Integer(32));
492        font_dict.set("LastChar", Object::Integer(255));
493
494        // Character widths (simplified - would need actual glyph widths)
495        let widths: Vec<Object> = (32..=255)
496            .map(|_| Object::Integer(500)) // Default width
497            .collect();
498        font_dict.set("Widths", Object::Array(widths));
499
500        Ok(font_dict)
501    }
502
503    /// Generate Type0 (CID) font dictionary
504    fn generate_type0_dictionary(&self, font_data: &EmbeddedFontData) -> Result<Dictionary> {
505        let mut font_dict = Dictionary::new();
506
507        // Type0 font properties
508        font_dict.set("Type", Object::Name("Font".to_string()));
509        font_dict.set("Subtype", Object::Name("Type0".to_string()));
510        font_dict.set("BaseFont", Object::Name(font_data.pdf_name.clone()));
511
512        // Encoding (CMap)
513        font_dict.set("Encoding", Object::Name("Identity-H".to_string()));
514
515        // DescendantFonts array (would contain CIDFont reference)
516        font_dict.set(
517            "DescendantFonts",
518            Object::Array(vec![
519                Object::Reference(ObjectId::new(0, 0)), // Placeholder for CIDFont reference
520            ]),
521        );
522
523        // ToUnicode CMap reference (if needed)
524        if !font_data.unicode_mappings.is_empty() {
525            font_dict.set("ToUnicode", Object::Reference(ObjectId::new(0, 0))); // Placeholder
526        }
527
528        Ok(font_dict)
529    }
530
531    /// Generate font descriptor dictionary
532    pub fn generate_font_descriptor(&self, font_name: &str) -> Result<Dictionary> {
533        let font_data = self
534            .embedded_fonts
535            .get(font_name)
536            .ok_or_else(|| PdfError::FontError(format!("Font {font_name} not found")))?;
537
538        let mut desc_dict = Dictionary::new();
539
540        desc_dict.set("Type", Object::Name("FontDescriptor".to_string()));
541        desc_dict.set("FontName", Object::Name(font_data.pdf_name.clone()));
542
543        // Font flags
544        desc_dict.set(
545            "Flags",
546            Object::Integer(font_data.descriptor.flags.to_flags() as i64),
547        );
548
549        // Font metrics
550        desc_dict.set("Ascent", Object::Integer(font_data.metrics.ascent as i64));
551        desc_dict.set("Descent", Object::Integer(font_data.metrics.descent as i64));
552        desc_dict.set(
553            "CapHeight",
554            Object::Integer(font_data.metrics.cap_height as i64),
555        );
556        desc_dict.set(
557            "ItalicAngle",
558            Object::Real(font_data.descriptor.italic_angle),
559        );
560        desc_dict.set("StemV", Object::Integer(font_data.descriptor.stem_v as i64));
561
562        // Font bounding box
563        let bbox = vec![
564            Object::Integer(font_data.descriptor.bbox[0] as i64),
565            Object::Integer(font_data.descriptor.bbox[1] as i64),
566            Object::Integer(font_data.descriptor.bbox[2] as i64),
567            Object::Integer(font_data.descriptor.bbox[3] as i64),
568        ];
569        desc_dict.set("FontBBox", Object::Array(bbox));
570
571        // Font file reference (would be set during PDF generation)
572        match font_data.font_type {
573            FontType::TrueType => {
574                desc_dict.set("FontFile2", Object::Reference(ObjectId::new(0, 0)));
575                // Placeholder
576            }
577            FontType::Type0 => {
578                desc_dict.set("FontFile2", Object::Reference(ObjectId::new(0, 0)));
579                // Placeholder
580            }
581        }
582
583        Ok(desc_dict)
584    }
585
586    /// Generate ToUnicode CMap stream
587    pub fn generate_tounicode_cmap(&self, font_name: &str) -> Result<String> {
588        let font_data = self
589            .embedded_fonts
590            .get(font_name)
591            .ok_or_else(|| PdfError::FontError(format!("Font {font_name} not found")))?;
592
593        if font_data.unicode_mappings.is_empty() {
594            return Err(PdfError::FontError(
595                "No Unicode mappings available".to_string(),
596            ));
597        }
598
599        let mut cmap_content = String::new();
600
601        // CMap header
602        cmap_content.push_str("/CIDInit /ProcSet findresource begin\n");
603        cmap_content.push_str("12 dict begin\n");
604        cmap_content.push_str("begincmap\n");
605        cmap_content.push_str("/CIDSystemInfo\n");
606        cmap_content.push_str("<<\n");
607        cmap_content.push_str("/Registry (Adobe)\n");
608        cmap_content.push_str("/Ordering (UCS)\n");
609        cmap_content.push_str("/Supplement 0\n");
610        cmap_content.push_str(">> def\n");
611        cmap_content.push_str("/CMapName /Adobe-Identity-UCS def\n");
612        cmap_content.push_str("/CMapType 2 def\n");
613        cmap_content.push_str("1 begincodespacerange\n");
614        cmap_content.push_str("<0000> <FFFF>\n");
615        cmap_content.push_str("endcodespacerange\n");
616
617        // Unicode mappings
618        cmap_content.push_str(&format!(
619            "{} beginbfchar\n",
620            font_data.unicode_mappings.len()
621        ));
622        for (glyph_id, unicode_string) in &font_data.unicode_mappings {
623            cmap_content.push_str(&format!(
624                "<{:04X}> <{}>\n",
625                glyph_id,
626                unicode_string
627                    .chars()
628                    .map(|c| format!("{c:04X}", c = c as u32))
629                    .collect::<String>()
630            ));
631        }
632        cmap_content.push_str("endbfchar\n");
633
634        // CMap footer
635        cmap_content.push_str("endcmap\n");
636        cmap_content.push_str("CMapName currentdict /CMap defineresource pop\n");
637        cmap_content.push_str("end\n");
638        cmap_content.push_str("end\n");
639
640        Ok(cmap_content)
641    }
642
643    /// Get all embedded fonts
644    pub fn embedded_fonts(&self) -> &HashMap<String, EmbeddedFontData> {
645        &self.embedded_fonts
646    }
647
648    /// Extract font metrics from TrueType font
649    fn extract_font_metrics(&self, _font: &TrueTypeFont) -> Result<FontMetrics> {
650        // This would extract actual metrics from font tables
651        // For now, return default metrics
652        Ok(FontMetrics {
653            ascent: 750,
654            descent: -250,
655            cap_height: 700,
656            x_height: 500,
657            stem_v: 100,
658            stem_h: 50,
659            avg_width: 500,
660            max_width: 1000,
661            missing_width: 500,
662        })
663    }
664
665    /// Create font descriptor from TrueType font
666    fn create_font_descriptor(
667        &self,
668        _font: &TrueTypeFont,
669        font_name: &str,
670    ) -> Result<FontDescriptor> {
671        Ok(FontDescriptor {
672            font_name: font_name.to_string(),
673            flags: FontFlags {
674                non_symbolic: true,
675                ..Default::default()
676            },
677            bbox: [-100, -250, 1000, 750], // Default bounding box
678            italic_angle: 0.0,
679            ascent: 750,
680            descent: -250,
681            cap_height: 700,
682            stem_v: 100,
683            stem_h: 50,
684            avg_width: 500,
685            max_width: 1000,
686            missing_width: 500,
687            font_file: None,
688        })
689    }
690
691    /// Create CID font descriptor
692    fn create_cid_font_descriptor(
693        &self,
694        font: &TrueTypeFont,
695        font_name: &str,
696    ) -> Result<FontDescriptor> {
697        // Similar to create_font_descriptor but for CID fonts
698        self.create_font_descriptor(font, font_name)
699    }
700
701    /// Create encoding for font
702    fn create_encoding_for_font(
703        &self,
704        _font: &TrueTypeFont,
705        _used_glyphs: &HashSet<u16>,
706    ) -> Result<FontEncoding> {
707        // For now, return WinAnsi encoding
708        // In a full implementation, this would analyze the font and create appropriate encoding
709        Ok(FontEncoding::WinAnsiEncoding)
710    }
711
712    /// Create Unicode mappings for simple fonts
713    fn create_unicode_mappings(
714        &self,
715        _font: &TrueTypeFont,
716        used_glyphs: &HashSet<u16>,
717    ) -> Result<HashMap<u16, String>> {
718        let mut mappings = HashMap::new();
719
720        // Create basic ASCII mappings
721        for glyph_id in used_glyphs {
722            if *glyph_id < 256 {
723                let unicode_char = char::from(*glyph_id as u8);
724                if unicode_char.is_ascii_graphic() || unicode_char == ' ' {
725                    mappings.insert(*glyph_id, unicode_char.to_string());
726                }
727            }
728        }
729
730        Ok(mappings)
731    }
732
733    /// Create Unicode mappings for CID fonts
734    fn create_cid_unicode_mappings(
735        &self,
736        _font: &TrueTypeFont,
737        used_chars: &HashSet<u32>,
738    ) -> Result<HashMap<u16, String>> {
739        let mut mappings = HashMap::new();
740
741        // Convert character codes to Unicode strings
742        for &char_code in used_chars {
743            if let Some(unicode_char) = char::from_u32(char_code) {
744                // Find glyph ID for this character (simplified)
745                let glyph_id = char_code as u16; // Simplified mapping
746                mappings.insert(glyph_id, unicode_char.to_string());
747            }
748        }
749
750        Ok(mappings)
751    }
752
753    /// Convert character codes to glyph indices
754    fn chars_to_glyphs(&self, _font: &TrueTypeFont, chars: &HashSet<u32>) -> Result<HashSet<u16>> {
755        let mut glyphs = HashSet::new();
756
757        // Always include glyph 0 (missing glyph)
758        glyphs.insert(0);
759
760        // Convert characters to glyph indices using font's character map
761        for &char_code in chars {
762            // This is simplified - a real implementation would use the font's cmap table
763            let glyph_id = if char_code < 65536 {
764                char_code as u16
765            } else {
766                0 // Missing glyph for characters outside BMP
767            };
768            glyphs.insert(glyph_id);
769        }
770
771        Ok(glyphs)
772    }
773}
774
775impl Default for FontEmbedder {
776    fn default() -> Self {
777        Self::new()
778    }
779}
780
781#[cfg(test)]
782mod tests {
783    use super::*;
784
785    #[test]
786    fn test_font_embedder_creation() {
787        let embedder = FontEmbedder::new();
788        assert_eq!(embedder.embedded_fonts.len(), 0);
789        assert_eq!(embedder.next_font_id, 1);
790    }
791
792    #[test]
793    fn test_embedding_options_default() {
794        let options = EmbeddingOptions::default();
795        assert!(options.subset);
796        assert_eq!(options.max_subset_size, Some(256));
797        assert!(options.compress_font_streams);
798        assert!(!options.embed_license_info);
799    }
800
801    #[test]
802    fn test_generate_tounicode_cmap_empty() {
803        let mut embedder = FontEmbedder::new();
804
805        // Create a font with no Unicode mappings
806        let font_data = EmbeddedFontData {
807            pdf_name: "TestFont".to_string(),
808            font_type: FontType::TrueType,
809            descriptor: FontDescriptor {
810                font_name: "TestFont".to_string(),
811                flags: FontFlags::default(),
812                bbox: [0, 0, 1000, 1000],
813                italic_angle: 0.0,
814                ascent: 750,
815                descent: -250,
816                cap_height: 700,
817                stem_v: 100,
818                stem_h: 50,
819                avg_width: 500,
820                max_width: 1000,
821                missing_width: 500,
822                font_file: None,
823            },
824            font_program: vec![],
825            encoding: FontEncoding::WinAnsiEncoding,
826            metrics: FontMetrics {
827                ascent: 750,
828                descent: -250,
829                cap_height: 700,
830                x_height: 500,
831                stem_v: 100,
832                stem_h: 50,
833                avg_width: 500,
834                max_width: 1000,
835                missing_width: 500,
836            },
837            subset_glyphs: None,
838            unicode_mappings: HashMap::new(),
839        };
840
841        embedder
842            .embedded_fonts
843            .insert("TestFont".to_string(), font_data);
844
845        let result = embedder.generate_tounicode_cmap("TestFont");
846        assert!(result.is_err());
847    }
848
849    #[test]
850    fn test_generate_truetype_dictionary() {
851        let embedder = FontEmbedder::new();
852
853        let font_data = EmbeddedFontData {
854            pdf_name: "TestFont".to_string(),
855            font_type: FontType::TrueType,
856            descriptor: FontDescriptor {
857                font_name: "TestFont".to_string(),
858                flags: FontFlags::default(),
859                bbox: [0, 0, 1000, 1000],
860                italic_angle: 0.0,
861                ascent: 750,
862                descent: -250,
863                cap_height: 700,
864                stem_v: 100,
865                stem_h: 50,
866                avg_width: 500,
867                max_width: 1000,
868                missing_width: 500,
869                font_file: None,
870            },
871            font_program: vec![],
872            encoding: FontEncoding::WinAnsiEncoding,
873            metrics: FontMetrics {
874                ascent: 750,
875                descent: -250,
876                cap_height: 700,
877                x_height: 500,
878                stem_v: 100,
879                stem_h: 50,
880                avg_width: 500,
881                max_width: 1000,
882                missing_width: 500,
883            },
884            subset_glyphs: None,
885            unicode_mappings: HashMap::new(),
886        };
887
888        let dict = embedder.generate_truetype_dictionary(&font_data).unwrap();
889
890        // Verify basic font properties
891        if let Some(Object::Name(font_type)) = dict.get("Type") {
892            assert_eq!(font_type, "Font");
893        }
894        if let Some(Object::Name(subtype)) = dict.get("Subtype") {
895            assert_eq!(subtype, "TrueType");
896        }
897        if let Some(Object::Name(base_font)) = dict.get("BaseFont") {
898            assert_eq!(base_font, "TestFont");
899        }
900    }
901
902    #[test]
903    fn test_generate_type0_dictionary() {
904        let embedder = FontEmbedder::new();
905
906        let font_data = EmbeddedFontData {
907            pdf_name: "TestCIDFont".to_string(),
908            font_type: FontType::Type0,
909            descriptor: FontDescriptor {
910                font_name: "TestCIDFont".to_string(),
911                flags: FontFlags::default(),
912                bbox: [0, 0, 1000, 1000],
913                italic_angle: 0.0,
914                ascent: 750,
915                descent: -250,
916                cap_height: 700,
917                stem_v: 100,
918                stem_h: 50,
919                avg_width: 500,
920                max_width: 1000,
921                missing_width: 500,
922                font_file: None,
923            },
924            font_program: vec![],
925            encoding: FontEncoding::Identity,
926            metrics: FontMetrics {
927                ascent: 750,
928                descent: -250,
929                cap_height: 700,
930                x_height: 500,
931                stem_v: 100,
932                stem_h: 50,
933                avg_width: 500,
934                max_width: 1000,
935                missing_width: 500,
936            },
937            subset_glyphs: None,
938            unicode_mappings: HashMap::new(),
939        };
940
941        let dict = embedder.generate_type0_dictionary(&font_data).unwrap();
942
943        // Verify Type0 font properties
944        if let Some(Object::Name(subtype)) = dict.get("Subtype") {
945            assert_eq!(subtype, "Type0");
946        }
947        if let Some(Object::Name(encoding)) = dict.get("Encoding") {
948            assert_eq!(encoding, "Identity-H");
949        }
950        if let Some(Object::Array(descendant_fonts)) = dict.get("DescendantFonts") {
951            assert_eq!(descendant_fonts.len(), 1);
952        }
953    }
954
955    #[test]
956    fn test_chars_to_glyphs_conversion() {
957        let _embedder = FontEmbedder::new();
958        let _font_data = vec![0; 100]; // Dummy font data
959
960        // This would fail in real implementation due to invalid font data
961        // but tests the function structure
962        let chars: HashSet<u32> = [65, 66, 67].iter().cloned().collect(); // A, B, C
963
964        // Test would require valid font data to complete
965        // For now, test that the function exists and compiles
966        assert!(chars.len() == 3);
967    }
968
969    #[test]
970    fn test_unicode_mappings_creation() {
971        let _embedder = FontEmbedder::new();
972        let glyphs: HashSet<u16> = [65, 66, 67].iter().cloned().collect();
973
974        // Create dummy font for testing
975        let _font_data = vec![0; 100];
976
977        // Test would require valid TrueType font parsing
978        // For now, verify function signature
979        assert!(glyphs.len() == 3);
980    }
981
982    #[test]
983    fn test_font_descriptor_generation() {
984        let _embedder = FontEmbedder::new();
985
986        let font_data = EmbeddedFontData {
987            pdf_name: "TestFont".to_string(),
988            font_type: FontType::TrueType,
989            descriptor: FontDescriptor {
990                font_name: "TestFont".to_string(),
991                flags: FontFlags {
992                    non_symbolic: true,
993                    serif: true,
994                    ..Default::default()
995                },
996                bbox: [-100, -250, 1000, 750],
997                italic_angle: 0.0,
998                ascent: 750,
999                descent: -250,
1000                cap_height: 700,
1001                stem_v: 100,
1002                stem_h: 50,
1003                avg_width: 500,
1004                max_width: 1000,
1005                missing_width: 500,
1006                font_file: None,
1007            },
1008            font_program: vec![],
1009            encoding: FontEncoding::WinAnsiEncoding,
1010            metrics: FontMetrics {
1011                ascent: 750,
1012                descent: -250,
1013                cap_height: 700,
1014                x_height: 500,
1015                stem_v: 100,
1016                stem_h: 50,
1017                avg_width: 500,
1018                max_width: 1000,
1019                missing_width: 500,
1020            },
1021            subset_glyphs: None,
1022            unicode_mappings: HashMap::new(),
1023        };
1024
1025        let mut embedder_with_font = FontEmbedder::new();
1026        embedder_with_font
1027            .embedded_fonts
1028            .insert("TestFont".to_string(), font_data);
1029
1030        let desc_dict = embedder_with_font
1031            .generate_font_descriptor("TestFont")
1032            .unwrap();
1033
1034        // Verify font descriptor properties
1035        if let Some(Object::Name(font_name)) = desc_dict.get("FontName") {
1036            assert_eq!(font_name, "TestFont");
1037        }
1038        if let Some(Object::Integer(flags)) = desc_dict.get("Flags") {
1039            assert!(*flags > 0); // Should have some flags set
1040        }
1041        if let Some(Object::Array(bbox)) = desc_dict.get("FontBBox") {
1042            assert_eq!(bbox.len(), 4);
1043        }
1044    }
1045}