oxidize_pdf/text/fonts/
embedding.rs

1//! Font embedding for PDF generation according to ISO 32000-1 Section 9.8
2//!
3//! This module provides complete font embedding capabilities including:
4//! - TrueType font embedding with subsetting
5//! - Font descriptor generation  
6//! - Character encoding mappings
7//! - CID font support for complex scripts
8
9use crate::error::{PdfError, Result};
10use crate::objects::{Dictionary, Object, ObjectId};
11use crate::text::fonts::truetype::TrueTypeFont;
12use std::collections::{HashMap, HashSet};
13
14/// Font type enumeration for embedding
15#[derive(Debug, Clone, Copy, PartialEq)]
16pub enum FontType {
17    /// TrueType font
18    TrueType,
19    /// Type 0 font (composite/CID)
20    Type0,
21}
22
23/// Font encoding types for embedding
24#[derive(Debug, Clone, PartialEq)]
25pub enum FontEncoding {
26    /// Standard encoding
27    StandardEncoding,
28    /// MacRoman encoding
29    MacRomanEncoding,
30    /// WinAnsi encoding
31    WinAnsiEncoding,
32    /// Custom encoding with differences
33    Custom(Vec<EncodingDifference>),
34    /// Identity encoding for CID fonts
35    Identity,
36}
37
38/// CJK font types for proper CIDSystemInfo configuration
39#[derive(Debug, Clone, Copy, PartialEq)]
40pub enum CjkFontType {
41    /// Chinese Simplified (Source Han Sans SC, Noto Sans CJK SC, etc.)
42    ChineseSimplified,
43    /// Chinese Traditional (Source Han Sans TC, Noto Sans CJK TC, etc.)
44    ChineseTraditional,
45    /// Japanese (Source Han Sans JP, Noto Sans CJK JP, etc.)
46    Japanese,
47    /// Korean (Source Han Sans KR, Noto Sans CJK KR, etc.)
48    Korean,
49    /// Generic CJK (fallback)
50    Generic,
51}
52
53impl CjkFontType {
54    /// Get the appropriate CIDSystemInfo values for this font type
55    pub fn cid_system_info(&self) -> (&'static str, &'static str, i32) {
56        match self {
57            CjkFontType::ChineseSimplified => ("Adobe", "Identity", 0), // Use Identity for multi-script fonts
58            CjkFontType::ChineseTraditional => ("Adobe", "Identity", 0), // Use Identity for multi-script fonts
59            CjkFontType::Japanese => ("Adobe", "Identity", 0), // Use Identity for multi-script fonts
60            CjkFontType::Korean => ("Adobe", "Identity", 0), // Use Identity for multi-script fonts
61            CjkFontType::Generic => ("Adobe", "Identity", 0),
62        }
63    }
64
65    /// Detect CJK font type from font name
66    pub fn detect_from_name(font_name: &str) -> Option<Self> {
67        let name_lower = font_name.to_lowercase();
68
69        // Source Han Sans detection (various naming patterns)
70        if name_lower.contains("sourcehansans")
71            || name_lower.contains("source han sans")
72            || name_lower.contains("hansans")
73            || name_lower.contains("han sans")
74            || name_lower.contains("sourcehan")
75            || name_lower.contains("source han")
76        {
77            if name_lower.contains("sc") || name_lower.contains("simplifiedchinese") {
78                return Some(CjkFontType::ChineseSimplified);
79            }
80            if name_lower.contains("tc") || name_lower.contains("traditionalchinese") {
81                return Some(CjkFontType::ChineseTraditional);
82            }
83            if name_lower.contains("jp") || name_lower.contains("japanese") {
84                return Some(CjkFontType::Japanese);
85            }
86            if name_lower.contains("kr") || name_lower.contains("korean") {
87                return Some(CjkFontType::Korean);
88            }
89        }
90
91        // Noto Sans CJK detection
92        if name_lower.contains("notosanscjk") || name_lower.contains("noto sans cjk") {
93            if name_lower.contains("sc") {
94                return Some(CjkFontType::ChineseSimplified);
95            }
96            if name_lower.contains("tc") {
97                return Some(CjkFontType::ChineseTraditional);
98            }
99            if name_lower.contains("jp") {
100                return Some(CjkFontType::Japanese);
101            }
102            if name_lower.contains("kr") {
103                return Some(CjkFontType::Korean);
104            }
105        }
106
107        // Generic patterns
108        if name_lower.contains("chinese") || name_lower.contains("zh") || name_lower.contains("gb")
109        {
110            if name_lower.contains("traditional")
111                || name_lower.contains("tw")
112                || name_lower.contains("hk")
113            {
114                return Some(CjkFontType::ChineseTraditional);
115            }
116            return Some(CjkFontType::ChineseSimplified);
117        }
118
119        if name_lower.contains("japanese")
120            || name_lower.contains("jp")
121            || name_lower.contains("japan")
122        {
123            return Some(CjkFontType::Japanese);
124        }
125
126        if name_lower.contains("korean")
127            || name_lower.contains("kr")
128            || name_lower.contains("korea")
129        {
130            return Some(CjkFontType::Korean);
131        }
132
133        None
134    }
135
136    /// Determine if we should use CIDFontType2 for this font to ensure Preview.app compatibility
137    ///
138    /// # macOS Preview.app Workaround
139    ///
140    /// macOS Preview.app has a documented bug where it fails to render CIDFontType0 (CFF/PostScript)
141    /// fonts correctly, even when they are properly embedded. However, it renders CIDFontType2
142    /// (TrueType) fonts correctly regardless of the actual font format.
143    ///
144    /// According to Stack Overflow discussions, "embedding the same font as a CIDFontType2 font
145    /// instead of CIDFontType0 makes Preview show the desired result" while maintaining
146    /// compatibility with other PDF viewers.
147    ///
148    /// For CJK fonts, this workaround is particularly important because:
149    /// 1. CJK fonts are commonly in OpenType CFF format (like Source Han Sans)
150    /// 2. Preview.app is widely used on macOS for PDF viewing
151    /// 3. The alternative is to tell users to install system fonts or use different viewers
152    ///
153    /// # Technical Details
154    ///
155    /// This function returns `true` for CJK fonts regardless of their actual format, causing
156    /// the PDF writer to use CIDFontType2 instead of the technically correct CIDFontType0.
157    /// Non-CJK fonts continue to use the format-appropriate CIDFont type.
158    ///
159    /// # Arguments
160    /// * `font_name` - The PostScript name of the font
161    ///
162    /// # Returns
163    /// * `true` if this font should use CIDFontType2 (either because it's CJK or actually TrueType)
164    /// * `false` if this font should use format-appropriate CIDFont type
165    pub fn should_use_cidfonttype2_for_preview_compatibility(font_name: &str) -> bool {
166        // Always use CIDFontType2 for CJK fonts (Preview.app compatibility workaround)
167        Self::detect_from_name(font_name).is_some()
168    }
169}
170
171/// Encoding difference entry
172#[derive(Debug, Clone, PartialEq)]
173pub struct EncodingDifference {
174    /// Starting character code
175    pub code: u8,
176    /// Glyph names for consecutive character codes
177    pub names: Vec<String>,
178}
179
180/// Font flags for font descriptor
181#[derive(Debug, Clone, Copy, Default)]
182pub struct FontFlags {
183    /// All glyphs have the same width
184    pub fixed_pitch: bool,
185    /// Glyphs have serifs
186    pub serif: bool,
187    /// Font uses symbolic character set
188    pub symbolic: bool,
189    /// Font is a script font
190    pub script: bool,
191    /// Font uses Adobe standard Latin character set
192    pub non_symbolic: bool,
193    /// Glyphs resemble cursive handwriting
194    pub italic: bool,
195    /// All glyphs have dominant vertical strokes
196    pub all_cap: bool,
197    /// Font is a small-cap font
198    pub small_cap: bool,
199    /// Font weight is bold or black
200    pub force_bold: bool,
201}
202
203impl FontFlags {
204    /// Convert to PDF font flags integer
205    pub fn to_flags(&self) -> u32 {
206        let mut flags = 0u32;
207        if self.fixed_pitch {
208            flags |= 1 << 0;
209        }
210        if self.serif {
211            flags |= 1 << 1;
212        }
213        if self.symbolic {
214            flags |= 1 << 2;
215        }
216        if self.script {
217            flags |= 1 << 3;
218        }
219        if self.non_symbolic {
220            flags |= 1 << 5;
221        }
222        if self.italic {
223            flags |= 1 << 6;
224        }
225        if self.all_cap {
226            flags |= 1 << 16;
227        }
228        if self.small_cap {
229            flags |= 1 << 17;
230        }
231        if self.force_bold {
232            flags |= 1 << 18;
233        }
234        flags
235    }
236}
237
238/// Font descriptor for PDF embedding
239#[derive(Debug, Clone)]
240pub struct FontDescriptor {
241    /// Font name
242    pub font_name: String,
243    /// Font flags
244    pub flags: FontFlags,
245    /// Font bounding box [llx, lly, urx, ury]
246    pub bbox: [i32; 4],
247    /// Italic angle in degrees
248    pub italic_angle: f64,
249    /// Maximum height above baseline
250    pub ascent: i32,
251    /// Maximum depth below baseline (negative)
252    pub descent: i32,
253    /// Height of capital letters
254    pub cap_height: i32,
255    /// Thickness of dominant vertical stems
256    pub stem_v: i32,
257    /// Thickness of dominant horizontal stems
258    pub stem_h: i32,
259    /// Average character width
260    pub avg_width: i32,
261    /// Maximum character width
262    pub max_width: i32,
263    /// Width for missing characters
264    pub missing_width: i32,
265    /// Font file reference (if embedded)
266    pub font_file: Option<ObjectId>,
267}
268
269/// Font metrics for embedded fonts
270#[derive(Debug, Clone)]
271pub struct FontMetrics {
272    /// Maximum height above baseline
273    pub ascent: i32,
274    /// Maximum depth below baseline (negative)
275    pub descent: i32,
276    /// Height of capital letters
277    pub cap_height: i32,
278    /// Height of lowercase letters
279    pub x_height: i32,
280    /// Thickness of dominant vertical stems
281    pub stem_v: i32,
282    /// Thickness of dominant horizontal stems
283    pub stem_h: i32,
284    /// Average character width
285    pub avg_width: i32,
286    /// Maximum character width
287    pub max_width: i32,
288    /// Width for missing characters
289    pub missing_width: i32,
290}
291
292/// PDF font embedding manager
293#[derive(Debug)]
294pub struct FontEmbedder {
295    /// Font data cache
296    embedded_fonts: HashMap<String, EmbeddedFontData>,
297    /// Next font ID
298    next_font_id: u32,
299}
300
301/// Embedded font data for PDF generation
302#[derive(Debug, Clone)]
303pub struct EmbeddedFontData {
304    /// Font name in PDF
305    pub pdf_name: String,
306    /// Font type
307    pub font_type: FontType,
308    /// Font descriptor object
309    pub descriptor: FontDescriptor,
310    /// Font program data (subset or full)
311    pub font_program: Vec<u8>,
312    /// Character mappings
313    pub encoding: FontEncoding,
314    /// Font metrics
315    pub metrics: FontMetrics,
316    /// Subset glyph set (if subsetted)
317    pub subset_glyphs: Option<HashSet<u16>>,
318    /// Unicode mappings for ToUnicode CMap
319    pub unicode_mappings: HashMap<u16, String>,
320}
321
322/// Font embedding options
323#[derive(Debug, Clone)]
324pub struct EmbeddingOptions {
325    /// Whether to subset the font
326    pub subset: bool,
327    /// Maximum number of glyphs in subset
328    pub max_subset_size: Option<usize>,
329    /// Whether to compress font streams
330    pub compress_font_streams: bool,
331    /// Whether to embed font license info
332    pub embed_license_info: bool,
333}
334
335impl Default for EmbeddingOptions {
336    fn default() -> Self {
337        Self {
338            subset: true,
339            max_subset_size: Some(256),
340            compress_font_streams: true,
341            embed_license_info: false,
342        }
343    }
344}
345
346impl FontEmbedder {
347    /// Create a new font embedder
348    pub fn new() -> Self {
349        Self {
350            embedded_fonts: HashMap::new(),
351            next_font_id: 1,
352        }
353    }
354
355    /// Embed a TrueType font with optional subsetting
356    pub fn embed_truetype_font(
357        &mut self,
358        font_data: &[u8],
359        used_glyphs: &HashSet<u16>,
360        options: &EmbeddingOptions,
361    ) -> Result<String> {
362        // Parse the TrueType font
363        let font = TrueTypeFont::from_data(font_data)
364            .map_err(|e| PdfError::FontError(format!("Failed to parse font: {e}")))?;
365
366        // Generate unique font name
367        let font_name = format!("ABCDEF+Font{next_id}", next_id = self.next_font_id);
368        self.next_font_id += 1;
369
370        // Determine if we should subset
371        let should_subset =
372            options.subset && used_glyphs.len() < options.max_subset_size.unwrap_or(256);
373
374        // Create font program (subset or full)
375        let font_program = if should_subset {
376            font.create_subset(used_glyphs)
377                .map_err(|e| PdfError::FontError(format!("Failed to create subset: {e}")))?
378        } else {
379            font_data.to_vec()
380        };
381
382        // Extract font metrics
383        let metrics = self.extract_font_metrics(&font)?;
384
385        // Create font descriptor
386        let descriptor = self.create_font_descriptor(&font, &font_name)?;
387
388        // Create character encoding
389        let encoding = self.create_encoding_for_font(&font, used_glyphs)?;
390
391        // Create Unicode mappings for ToUnicode CMap
392        let unicode_mappings = self.create_unicode_mappings(&font, used_glyphs)?;
393
394        // Store embedded font data
395        let embedded_font = EmbeddedFontData {
396            pdf_name: font_name.clone(),
397            font_type: FontType::TrueType,
398            descriptor,
399            font_program,
400            encoding,
401            metrics,
402            subset_glyphs: if should_subset {
403                Some(used_glyphs.clone())
404            } else {
405                None
406            },
407            unicode_mappings,
408        };
409
410        self.embedded_fonts.insert(font_name.clone(), embedded_font);
411        Ok(font_name)
412    }
413
414    /// Create a Type0 (CID) font for complex scripts
415    pub fn embed_cid_font(
416        &mut self,
417        font_data: &[u8],
418        used_chars: &HashSet<u32>,
419        _cmap_name: &str,
420        options: &EmbeddingOptions,
421    ) -> Result<String> {
422        // Parse the font
423        let font = TrueTypeFont::from_data(font_data)
424            .map_err(|e| PdfError::FontError(format!("Failed to parse font: {e}")))?;
425
426        // Generate unique font name
427        let font_name = format!("ABCDEF+CIDFont{next_id}", next_id = self.next_font_id);
428        self.next_font_id += 1;
429
430        // Convert character codes to glyph indices
431        let used_glyphs = self.chars_to_glyphs(&font, used_chars)?;
432
433        // Create subset if requested
434        let font_program = if options.subset {
435            font.create_subset(&used_glyphs)
436                .map_err(|e| PdfError::FontError(format!("Failed to create subset: {e}")))?
437        } else {
438            font_data.to_vec()
439        };
440
441        // Extract metrics
442        let metrics = self.extract_font_metrics(&font)?;
443
444        // Create CID font descriptor
445        let descriptor = self.create_cid_font_descriptor(&font, &font_name)?;
446
447        // Create Identity encoding for CID fonts
448        let encoding = FontEncoding::Identity;
449
450        // Create Unicode mappings
451        let unicode_mappings = self.create_cid_unicode_mappings(&font, used_chars)?;
452
453        let embedded_font = EmbeddedFontData {
454            pdf_name: font_name.clone(),
455            font_type: FontType::Type0,
456            descriptor,
457            font_program,
458            encoding,
459            metrics,
460            subset_glyphs: Some(used_glyphs),
461            unicode_mappings,
462        };
463
464        self.embedded_fonts.insert(font_name.clone(), embedded_font);
465        Ok(font_name)
466    }
467
468    /// Generate PDF font dictionary for embedded font
469    pub fn generate_font_dictionary(&self, font_name: &str) -> Result<Dictionary> {
470        let font_data = self
471            .embedded_fonts
472            .get(font_name)
473            .ok_or_else(|| PdfError::FontError(format!("Font {font_name} not found")))?;
474
475        match font_data.font_type {
476            FontType::TrueType => self.generate_truetype_dictionary(font_data),
477            FontType::Type0 => self.generate_type0_dictionary(font_data),
478            // _ => Err(PdfError::FontError("Unsupported font type for embedding".to_string())),
479        }
480    }
481
482    /// Generate TrueType font dictionary
483    fn generate_truetype_dictionary(&self, font_data: &EmbeddedFontData) -> Result<Dictionary> {
484        let mut font_dict = Dictionary::new();
485
486        // Basic font properties
487        font_dict.set("Type", Object::Name("Font".to_string()));
488        font_dict.set("Subtype", Object::Name("TrueType".to_string()));
489        font_dict.set("BaseFont", Object::Name(font_data.pdf_name.clone()));
490
491        // Font descriptor reference (would be resolved during PDF generation)
492        font_dict.set("FontDescriptor", Object::Reference(ObjectId::new(0, 0))); // Placeholder
493
494        // Encoding
495        match &font_data.encoding {
496            FontEncoding::WinAnsiEncoding => {
497                font_dict.set("Encoding", Object::Name("WinAnsiEncoding".to_string()));
498            }
499            FontEncoding::MacRomanEncoding => {
500                font_dict.set("Encoding", Object::Name("MacRomanEncoding".to_string()));
501            }
502            FontEncoding::StandardEncoding => {
503                font_dict.set("Encoding", Object::Name("StandardEncoding".to_string()));
504            }
505            FontEncoding::Custom(differences) => {
506                let mut encoding_dict = Dictionary::new();
507                encoding_dict.set("Type", Object::Name("Encoding".to_string()));
508                encoding_dict.set("BaseEncoding", Object::Name("WinAnsiEncoding".to_string()));
509
510                // Add differences array
511                let mut diff_array = Vec::new();
512                for diff in differences {
513                    diff_array.push(Object::Integer(diff.code as i64));
514                    for name in &diff.names {
515                        diff_array.push(Object::Name(name.clone()));
516                    }
517                }
518                encoding_dict.set("Differences", Object::Array(diff_array));
519                font_dict.set("Encoding", Object::Dictionary(encoding_dict));
520            }
521            _ => {}
522        }
523
524        // First and last character codes
525        font_dict.set("FirstChar", Object::Integer(32));
526        font_dict.set("LastChar", Object::Integer(255));
527
528        // Character widths (simplified - would need actual glyph widths)
529        let widths: Vec<Object> = (32..=255)
530            .map(|_| Object::Integer(500)) // Default width
531            .collect();
532        font_dict.set("Widths", Object::Array(widths));
533
534        Ok(font_dict)
535    }
536
537    /// Generate Type0 (CID) font dictionary
538    fn generate_type0_dictionary(&self, font_data: &EmbeddedFontData) -> Result<Dictionary> {
539        let mut font_dict = Dictionary::new();
540
541        // Type0 font properties
542        font_dict.set("Type", Object::Name("Font".to_string()));
543        font_dict.set("Subtype", Object::Name("Type0".to_string()));
544        font_dict.set("BaseFont", Object::Name(font_data.pdf_name.clone()));
545
546        // Encoding (CMap)
547        font_dict.set("Encoding", Object::Name("Identity-H".to_string()));
548
549        // DescendantFonts array (would contain CIDFont reference)
550        font_dict.set(
551            "DescendantFonts",
552            Object::Array(vec![
553                Object::Reference(ObjectId::new(0, 0)), // Placeholder for CIDFont reference
554            ]),
555        );
556
557        // ToUnicode CMap reference (if needed)
558        if !font_data.unicode_mappings.is_empty() {
559            font_dict.set("ToUnicode", Object::Reference(ObjectId::new(0, 0))); // Placeholder
560        }
561
562        Ok(font_dict)
563    }
564
565    /// Generate font descriptor dictionary
566    pub fn generate_font_descriptor(&self, font_name: &str) -> Result<Dictionary> {
567        let font_data = self
568            .embedded_fonts
569            .get(font_name)
570            .ok_or_else(|| PdfError::FontError(format!("Font {font_name} not found")))?;
571
572        let mut desc_dict = Dictionary::new();
573
574        desc_dict.set("Type", Object::Name("FontDescriptor".to_string()));
575        desc_dict.set("FontName", Object::Name(font_data.pdf_name.clone()));
576
577        // Font flags
578        desc_dict.set(
579            "Flags",
580            Object::Integer(font_data.descriptor.flags.to_flags() as i64),
581        );
582
583        // Font metrics
584        desc_dict.set("Ascent", Object::Integer(font_data.metrics.ascent as i64));
585        desc_dict.set("Descent", Object::Integer(font_data.metrics.descent as i64));
586        desc_dict.set(
587            "CapHeight",
588            Object::Integer(font_data.metrics.cap_height as i64),
589        );
590        desc_dict.set(
591            "ItalicAngle",
592            Object::Real(font_data.descriptor.italic_angle),
593        );
594        desc_dict.set("StemV", Object::Integer(font_data.descriptor.stem_v as i64));
595
596        // Font bounding box
597        let bbox = vec![
598            Object::Integer(font_data.descriptor.bbox[0] as i64),
599            Object::Integer(font_data.descriptor.bbox[1] as i64),
600            Object::Integer(font_data.descriptor.bbox[2] as i64),
601            Object::Integer(font_data.descriptor.bbox[3] as i64),
602        ];
603        desc_dict.set("FontBBox", Object::Array(bbox));
604
605        // Font file reference (would be set during PDF generation)
606        match font_data.font_type {
607            FontType::TrueType => {
608                desc_dict.set("FontFile2", Object::Reference(ObjectId::new(0, 0)));
609                // Placeholder
610            }
611            FontType::Type0 => {
612                desc_dict.set("FontFile2", Object::Reference(ObjectId::new(0, 0)));
613                // Placeholder
614            }
615        }
616
617        Ok(desc_dict)
618    }
619
620    /// Generate ToUnicode CMap stream
621    pub fn generate_tounicode_cmap(&self, font_name: &str) -> Result<String> {
622        let font_data = self
623            .embedded_fonts
624            .get(font_name)
625            .ok_or_else(|| PdfError::FontError(format!("Font {font_name} not found")))?;
626
627        if font_data.unicode_mappings.is_empty() {
628            return Err(PdfError::FontError(
629                "No Unicode mappings available".to_string(),
630            ));
631        }
632
633        let mut cmap_content = String::new();
634
635        // CMap header
636        cmap_content.push_str("/CIDInit /ProcSet findresource begin\n");
637        cmap_content.push_str("12 dict begin\n");
638        cmap_content.push_str("begincmap\n");
639        cmap_content.push_str("/CIDSystemInfo\n");
640        cmap_content.push_str("<<\n");
641        cmap_content.push_str("/Registry (Adobe)\n");
642        cmap_content.push_str("/Ordering (UCS)\n");
643        cmap_content.push_str("/Supplement 0\n");
644        cmap_content.push_str(">> def\n");
645        cmap_content.push_str("/CMapName /Adobe-Identity-UCS def\n");
646        cmap_content.push_str("/CMapType 2 def\n");
647        cmap_content.push_str("1 begincodespacerange\n");
648        cmap_content.push_str("<0000> <FFFF>\n");
649        cmap_content.push_str("endcodespacerange\n");
650
651        // Unicode mappings
652        cmap_content.push_str(&format!(
653            "{} beginbfchar\n",
654            font_data.unicode_mappings.len()
655        ));
656        for (glyph_id, unicode_string) in &font_data.unicode_mappings {
657            cmap_content.push_str(&format!(
658                "<{:04X}> <{}>\n",
659                glyph_id,
660                unicode_string
661                    .chars()
662                    .map(|c| format!("{c:04X}", c = c as u32))
663                    .collect::<String>()
664            ));
665        }
666        cmap_content.push_str("endbfchar\n");
667
668        // CMap footer
669        cmap_content.push_str("endcmap\n");
670        cmap_content.push_str("CMapName currentdict /CMap defineresource pop\n");
671        cmap_content.push_str("end\n");
672        cmap_content.push_str("end\n");
673
674        Ok(cmap_content)
675    }
676
677    /// Get all embedded fonts
678    pub fn embedded_fonts(&self) -> &HashMap<String, EmbeddedFontData> {
679        &self.embedded_fonts
680    }
681
682    /// Extract font metrics from TrueType font
683    fn extract_font_metrics(&self, _font: &TrueTypeFont) -> Result<FontMetrics> {
684        // This would extract actual metrics from font tables
685        // For now, return default metrics
686        Ok(FontMetrics {
687            ascent: 750,
688            descent: -250,
689            cap_height: 700,
690            x_height: 500,
691            stem_v: 100,
692            stem_h: 50,
693            avg_width: 500,
694            max_width: 1000,
695            missing_width: 500,
696        })
697    }
698
699    /// Create font descriptor from TrueType font
700    fn create_font_descriptor(
701        &self,
702        _font: &TrueTypeFont,
703        font_name: &str,
704    ) -> Result<FontDescriptor> {
705        Ok(FontDescriptor {
706            font_name: font_name.to_string(),
707            flags: FontFlags {
708                non_symbolic: true,
709                ..Default::default()
710            },
711            bbox: [-100, -250, 1000, 750], // Default bounding box
712            italic_angle: 0.0,
713            ascent: 750,
714            descent: -250,
715            cap_height: 700,
716            stem_v: 100,
717            stem_h: 50,
718            avg_width: 500,
719            max_width: 1000,
720            missing_width: 500,
721            font_file: None,
722        })
723    }
724
725    /// Create CID font descriptor
726    fn create_cid_font_descriptor(
727        &self,
728        font: &TrueTypeFont,
729        font_name: &str,
730    ) -> Result<FontDescriptor> {
731        // Similar to create_font_descriptor but for CID fonts
732        self.create_font_descriptor(font, font_name)
733    }
734
735    /// Create encoding for font
736    fn create_encoding_for_font(
737        &self,
738        _font: &TrueTypeFont,
739        _used_glyphs: &HashSet<u16>,
740    ) -> Result<FontEncoding> {
741        // For now, return WinAnsi encoding
742        // In a full implementation, this would analyze the font and create appropriate encoding
743        Ok(FontEncoding::WinAnsiEncoding)
744    }
745
746    /// Create Unicode mappings for simple fonts
747    fn create_unicode_mappings(
748        &self,
749        _font: &TrueTypeFont,
750        used_glyphs: &HashSet<u16>,
751    ) -> Result<HashMap<u16, String>> {
752        let mut mappings = HashMap::new();
753
754        // Create basic ASCII mappings
755        for glyph_id in used_glyphs {
756            if *glyph_id < 256 {
757                let unicode_char = char::from(*glyph_id as u8);
758                if unicode_char.is_ascii_graphic() || unicode_char == ' ' {
759                    mappings.insert(*glyph_id, unicode_char.to_string());
760                }
761            }
762        }
763
764        Ok(mappings)
765    }
766
767    /// Create Unicode mappings for CID fonts
768    fn create_cid_unicode_mappings(
769        &self,
770        _font: &TrueTypeFont,
771        used_chars: &HashSet<u32>,
772    ) -> Result<HashMap<u16, String>> {
773        let mut mappings = HashMap::new();
774
775        // Convert character codes to Unicode strings
776        for &char_code in used_chars {
777            if let Some(unicode_char) = char::from_u32(char_code) {
778                // Find glyph ID for this character (simplified)
779                let glyph_id = char_code as u16; // Simplified mapping
780                mappings.insert(glyph_id, unicode_char.to_string());
781            }
782        }
783
784        Ok(mappings)
785    }
786
787    /// Convert character codes to glyph indices
788    fn chars_to_glyphs(&self, _font: &TrueTypeFont, chars: &HashSet<u32>) -> Result<HashSet<u16>> {
789        let mut glyphs = HashSet::new();
790
791        // Always include glyph 0 (missing glyph)
792        glyphs.insert(0);
793
794        // Convert characters to glyph indices using font's character map
795        for &char_code in chars {
796            // This is simplified - a real implementation would use the font's cmap table
797            let glyph_id = if char_code < 65536 {
798                char_code as u16
799            } else {
800                0 // Missing glyph for characters outside BMP
801            };
802            glyphs.insert(glyph_id);
803        }
804
805        Ok(glyphs)
806    }
807}
808
809impl Default for FontEmbedder {
810    fn default() -> Self {
811        Self::new()
812    }
813}
814
815#[cfg(test)]
816mod tests {
817    use super::*;
818
819    #[test]
820    fn test_font_embedder_creation() {
821        let embedder = FontEmbedder::new();
822        assert_eq!(embedder.embedded_fonts.len(), 0);
823        assert_eq!(embedder.next_font_id, 1);
824    }
825
826    #[test]
827    fn test_embedding_options_default() {
828        let options = EmbeddingOptions::default();
829        assert!(options.subset);
830        assert_eq!(options.max_subset_size, Some(256));
831        assert!(options.compress_font_streams);
832        assert!(!options.embed_license_info);
833    }
834
835    #[test]
836    fn test_generate_tounicode_cmap_empty() {
837        let mut embedder = FontEmbedder::new();
838
839        // Create a font with no Unicode mappings
840        let font_data = EmbeddedFontData {
841            pdf_name: "TestFont".to_string(),
842            font_type: FontType::TrueType,
843            descriptor: FontDescriptor {
844                font_name: "TestFont".to_string(),
845                flags: FontFlags::default(),
846                bbox: [0, 0, 1000, 1000],
847                italic_angle: 0.0,
848                ascent: 750,
849                descent: -250,
850                cap_height: 700,
851                stem_v: 100,
852                stem_h: 50,
853                avg_width: 500,
854                max_width: 1000,
855                missing_width: 500,
856                font_file: None,
857            },
858            font_program: vec![],
859            encoding: FontEncoding::WinAnsiEncoding,
860            metrics: FontMetrics {
861                ascent: 750,
862                descent: -250,
863                cap_height: 700,
864                x_height: 500,
865                stem_v: 100,
866                stem_h: 50,
867                avg_width: 500,
868                max_width: 1000,
869                missing_width: 500,
870            },
871            subset_glyphs: None,
872            unicode_mappings: HashMap::new(),
873        };
874
875        embedder
876            .embedded_fonts
877            .insert("TestFont".to_string(), font_data);
878
879        let result = embedder.generate_tounicode_cmap("TestFont");
880        assert!(result.is_err());
881    }
882
883    #[test]
884    fn test_generate_truetype_dictionary() {
885        let embedder = FontEmbedder::new();
886
887        let font_data = EmbeddedFontData {
888            pdf_name: "TestFont".to_string(),
889            font_type: FontType::TrueType,
890            descriptor: FontDescriptor {
891                font_name: "TestFont".to_string(),
892                flags: FontFlags::default(),
893                bbox: [0, 0, 1000, 1000],
894                italic_angle: 0.0,
895                ascent: 750,
896                descent: -250,
897                cap_height: 700,
898                stem_v: 100,
899                stem_h: 50,
900                avg_width: 500,
901                max_width: 1000,
902                missing_width: 500,
903                font_file: None,
904            },
905            font_program: vec![],
906            encoding: FontEncoding::WinAnsiEncoding,
907            metrics: FontMetrics {
908                ascent: 750,
909                descent: -250,
910                cap_height: 700,
911                x_height: 500,
912                stem_v: 100,
913                stem_h: 50,
914                avg_width: 500,
915                max_width: 1000,
916                missing_width: 500,
917            },
918            subset_glyphs: None,
919            unicode_mappings: HashMap::new(),
920        };
921
922        let dict = embedder.generate_truetype_dictionary(&font_data).unwrap();
923
924        // Verify basic font properties
925        if let Some(Object::Name(font_type)) = dict.get("Type") {
926            assert_eq!(font_type, "Font");
927        }
928        if let Some(Object::Name(subtype)) = dict.get("Subtype") {
929            assert_eq!(subtype, "TrueType");
930        }
931        if let Some(Object::Name(base_font)) = dict.get("BaseFont") {
932            assert_eq!(base_font, "TestFont");
933        }
934    }
935
936    #[test]
937    fn test_generate_type0_dictionary() {
938        let embedder = FontEmbedder::new();
939
940        let font_data = EmbeddedFontData {
941            pdf_name: "TestCIDFont".to_string(),
942            font_type: FontType::Type0,
943            descriptor: FontDescriptor {
944                font_name: "TestCIDFont".to_string(),
945                flags: FontFlags::default(),
946                bbox: [0, 0, 1000, 1000],
947                italic_angle: 0.0,
948                ascent: 750,
949                descent: -250,
950                cap_height: 700,
951                stem_v: 100,
952                stem_h: 50,
953                avg_width: 500,
954                max_width: 1000,
955                missing_width: 500,
956                font_file: None,
957            },
958            font_program: vec![],
959            encoding: FontEncoding::Identity,
960            metrics: FontMetrics {
961                ascent: 750,
962                descent: -250,
963                cap_height: 700,
964                x_height: 500,
965                stem_v: 100,
966                stem_h: 50,
967                avg_width: 500,
968                max_width: 1000,
969                missing_width: 500,
970            },
971            subset_glyphs: None,
972            unicode_mappings: HashMap::new(),
973        };
974
975        let dict = embedder.generate_type0_dictionary(&font_data).unwrap();
976
977        // Verify Type0 font properties
978        if let Some(Object::Name(subtype)) = dict.get("Subtype") {
979            assert_eq!(subtype, "Type0");
980        }
981        if let Some(Object::Name(encoding)) = dict.get("Encoding") {
982            assert_eq!(encoding, "Identity-H");
983        }
984        if let Some(Object::Array(descendant_fonts)) = dict.get("DescendantFonts") {
985            assert_eq!(descendant_fonts.len(), 1);
986        }
987    }
988
989    #[test]
990    fn test_chars_to_glyphs_conversion() {
991        let _embedder = FontEmbedder::new();
992        let _font_data = vec![0; 100]; // Dummy font data
993
994        // This would fail in real implementation due to invalid font data
995        // but tests the function structure
996        let chars: HashSet<u32> = [65, 66, 67].iter().cloned().collect(); // A, B, C
997
998        // Test would require valid font data to complete
999        // For now, test that the function exists and compiles
1000        assert!(chars.len() == 3);
1001    }
1002
1003    #[test]
1004    fn test_unicode_mappings_creation() {
1005        let _embedder = FontEmbedder::new();
1006        let glyphs: HashSet<u16> = [65, 66, 67].iter().cloned().collect();
1007
1008        // Create dummy font for testing
1009        let _font_data = vec![0; 100];
1010
1011        // Test would require valid TrueType font parsing
1012        // For now, verify function signature
1013        assert!(glyphs.len() == 3);
1014    }
1015
1016    #[test]
1017    fn test_font_descriptor_generation() {
1018        let _embedder = FontEmbedder::new();
1019
1020        let font_data = EmbeddedFontData {
1021            pdf_name: "TestFont".to_string(),
1022            font_type: FontType::TrueType,
1023            descriptor: FontDescriptor {
1024                font_name: "TestFont".to_string(),
1025                flags: FontFlags {
1026                    non_symbolic: true,
1027                    serif: true,
1028                    ..Default::default()
1029                },
1030                bbox: [-100, -250, 1000, 750],
1031                italic_angle: 0.0,
1032                ascent: 750,
1033                descent: -250,
1034                cap_height: 700,
1035                stem_v: 100,
1036                stem_h: 50,
1037                avg_width: 500,
1038                max_width: 1000,
1039                missing_width: 500,
1040                font_file: None,
1041            },
1042            font_program: vec![],
1043            encoding: FontEncoding::WinAnsiEncoding,
1044            metrics: FontMetrics {
1045                ascent: 750,
1046                descent: -250,
1047                cap_height: 700,
1048                x_height: 500,
1049                stem_v: 100,
1050                stem_h: 50,
1051                avg_width: 500,
1052                max_width: 1000,
1053                missing_width: 500,
1054            },
1055            subset_glyphs: None,
1056            unicode_mappings: HashMap::new(),
1057        };
1058
1059        let mut embedder_with_font = FontEmbedder::new();
1060        embedder_with_font
1061            .embedded_fonts
1062            .insert("TestFont".to_string(), font_data);
1063
1064        let desc_dict = embedder_with_font
1065            .generate_font_descriptor("TestFont")
1066            .unwrap();
1067
1068        // Verify font descriptor properties
1069        if let Some(Object::Name(font_name)) = desc_dict.get("FontName") {
1070            assert_eq!(font_name, "TestFont");
1071        }
1072        if let Some(Object::Integer(flags)) = desc_dict.get("Flags") {
1073            assert!(*flags > 0); // Should have some flags set
1074        }
1075        if let Some(Object::Array(bbox)) = desc_dict.get("FontBBox") {
1076            assert_eq!(bbox.len(), 4);
1077        }
1078    }
1079}