use crate::error::{PdfError, Result};
use crate::objects::{Dictionary, Object, ObjectId};
use crate::text::fonts::truetype::TrueTypeFont;
use std::collections::{HashMap, HashSet};
#[derive(Debug, Clone, Copy, PartialEq)]
pub enum FontType {
TrueType,
Type0,
}
#[derive(Debug, Clone, PartialEq)]
pub enum FontEncoding {
StandardEncoding,
MacRomanEncoding,
WinAnsiEncoding,
Custom(Vec<EncodingDifference>),
Identity,
}
#[derive(Debug, Clone, Copy, PartialEq)]
pub enum CjkFontType {
ChineseSimplified,
ChineseTraditional,
Japanese,
Korean,
Generic,
}
impl CjkFontType {
pub fn cid_system_info(&self) -> (&'static str, &'static str, i32) {
match self {
CjkFontType::ChineseSimplified => ("Adobe", "Identity", 0), CjkFontType::ChineseTraditional => ("Adobe", "Identity", 0), CjkFontType::Japanese => ("Adobe", "Identity", 0), CjkFontType::Korean => ("Adobe", "Identity", 0), CjkFontType::Generic => ("Adobe", "Identity", 0),
}
}
pub fn detect_from_name(font_name: &str) -> Option<Self> {
let name_lower = font_name.to_lowercase();
if name_lower.contains("sourcehansans")
|| name_lower.contains("source han sans")
|| name_lower.contains("hansans")
|| name_lower.contains("han sans")
|| name_lower.contains("sourcehan")
|| name_lower.contains("source han")
{
if name_lower.contains("sc") || name_lower.contains("simplifiedchinese") {
return Some(CjkFontType::ChineseSimplified);
}
if name_lower.contains("tc") || name_lower.contains("traditionalchinese") {
return Some(CjkFontType::ChineseTraditional);
}
if name_lower.contains("jp") || name_lower.contains("japanese") {
return Some(CjkFontType::Japanese);
}
if name_lower.contains("kr") || name_lower.contains("korean") {
return Some(CjkFontType::Korean);
}
}
if name_lower.contains("notosanscjk") || name_lower.contains("noto sans cjk") {
if name_lower.contains("sc") {
return Some(CjkFontType::ChineseSimplified);
}
if name_lower.contains("tc") {
return Some(CjkFontType::ChineseTraditional);
}
if name_lower.contains("jp") {
return Some(CjkFontType::Japanese);
}
if name_lower.contains("kr") {
return Some(CjkFontType::Korean);
}
}
if name_lower.contains("chinese") || name_lower.contains("zh") || name_lower.contains("gb")
{
if name_lower.contains("traditional")
|| name_lower.contains("tw")
|| name_lower.contains("hk")
{
return Some(CjkFontType::ChineseTraditional);
}
return Some(CjkFontType::ChineseSimplified);
}
if name_lower.contains("japanese")
|| name_lower.contains("jp")
|| name_lower.contains("japan")
{
return Some(CjkFontType::Japanese);
}
if name_lower.contains("korean")
|| name_lower.contains("kr")
|| name_lower.contains("korea")
{
return Some(CjkFontType::Korean);
}
None
}
pub fn should_use_cidfonttype2(is_cff: bool) -> bool {
!is_cff
}
}
#[derive(Debug, Clone, PartialEq)]
pub struct EncodingDifference {
pub code: u8,
pub names: Vec<String>,
}
#[derive(Debug, Clone, Copy, Default)]
pub struct FontFlags {
pub fixed_pitch: bool,
pub serif: bool,
pub symbolic: bool,
pub script: bool,
pub non_symbolic: bool,
pub italic: bool,
pub all_cap: bool,
pub small_cap: bool,
pub force_bold: bool,
}
impl FontFlags {
pub fn to_flags(&self) -> u32 {
let mut flags = 0u32;
if self.fixed_pitch {
flags |= 1 << 0;
}
if self.serif {
flags |= 1 << 1;
}
if self.symbolic {
flags |= 1 << 2;
}
if self.script {
flags |= 1 << 3;
}
if self.non_symbolic {
flags |= 1 << 5;
}
if self.italic {
flags |= 1 << 6;
}
if self.all_cap {
flags |= 1 << 16;
}
if self.small_cap {
flags |= 1 << 17;
}
if self.force_bold {
flags |= 1 << 18;
}
flags
}
}
#[derive(Debug, Clone)]
pub struct FontDescriptor {
pub font_name: String,
pub flags: FontFlags,
pub bbox: [i32; 4],
pub italic_angle: f64,
pub ascent: i32,
pub descent: i32,
pub cap_height: i32,
pub stem_v: i32,
pub stem_h: i32,
pub avg_width: i32,
pub max_width: i32,
pub missing_width: i32,
pub font_file: Option<ObjectId>,
}
#[derive(Debug, Clone)]
pub struct FontMetrics {
pub ascent: i32,
pub descent: i32,
pub cap_height: i32,
pub x_height: i32,
pub stem_v: i32,
pub stem_h: i32,
pub avg_width: i32,
pub max_width: i32,
pub missing_width: i32,
}
#[derive(Debug)]
pub struct FontEmbedder {
embedded_fonts: HashMap<String, EmbeddedFontData>,
next_font_id: u32,
}
#[derive(Debug, Clone)]
pub struct EmbeddedFontData {
pub pdf_name: String,
pub font_type: FontType,
pub descriptor: FontDescriptor,
pub font_program: Vec<u8>,
pub encoding: FontEncoding,
pub metrics: FontMetrics,
pub subset_glyphs: Option<HashSet<u16>>,
pub unicode_mappings: HashMap<u16, String>,
}
#[derive(Debug, Clone)]
pub struct EmbeddingOptions {
pub subset: bool,
pub max_subset_size: Option<usize>,
pub compress_font_streams: bool,
pub embed_license_info: bool,
}
impl Default for EmbeddingOptions {
fn default() -> Self {
Self {
subset: true,
max_subset_size: Some(256),
compress_font_streams: true,
embed_license_info: false,
}
}
}
impl FontEmbedder {
pub fn new() -> Self {
Self {
embedded_fonts: HashMap::new(),
next_font_id: 1,
}
}
pub fn embed_truetype_font(
&mut self,
font_data: &[u8],
used_glyphs: &HashSet<u16>,
options: &EmbeddingOptions,
) -> Result<String> {
let font = TrueTypeFont::from_data(font_data)
.map_err(|e| PdfError::FontError(format!("Failed to parse font: {e}")))?;
let font_name = format!("ABCDEF+Font{next_id}", next_id = self.next_font_id);
self.next_font_id += 1;
let should_subset =
options.subset && used_glyphs.len() < options.max_subset_size.unwrap_or(256);
let font_program = if should_subset {
font.create_subset(used_glyphs)
.map_err(|e| PdfError::FontError(format!("Failed to create subset: {e}")))?
} else {
font_data.to_vec()
};
let metrics = self.extract_font_metrics(&font)?;
let descriptor = self.create_font_descriptor(&font, &font_name)?;
let encoding = self.create_encoding_for_font(&font, used_glyphs)?;
let unicode_mappings = self.create_unicode_mappings(&font, used_glyphs)?;
let embedded_font = EmbeddedFontData {
pdf_name: font_name.clone(),
font_type: FontType::TrueType,
descriptor,
font_program,
encoding,
metrics,
subset_glyphs: if should_subset {
Some(used_glyphs.clone())
} else {
None
},
unicode_mappings,
};
self.embedded_fonts.insert(font_name.clone(), embedded_font);
Ok(font_name)
}
pub fn embed_cid_font(
&mut self,
font_data: &[u8],
used_chars: &HashSet<u32>,
_cmap_name: &str,
options: &EmbeddingOptions,
) -> Result<String> {
let font = TrueTypeFont::from_data(font_data)
.map_err(|e| PdfError::FontError(format!("Failed to parse font: {e}")))?;
let font_name = format!("ABCDEF+CIDFont{next_id}", next_id = self.next_font_id);
self.next_font_id += 1;
let used_glyphs = self.chars_to_glyphs(&font, used_chars)?;
let font_program = if options.subset {
font.create_subset(&used_glyphs)
.map_err(|e| PdfError::FontError(format!("Failed to create subset: {e}")))?
} else {
font_data.to_vec()
};
let metrics = self.extract_font_metrics(&font)?;
let descriptor = self.create_cid_font_descriptor(&font, &font_name)?;
let encoding = FontEncoding::Identity;
let unicode_mappings = self.create_cid_unicode_mappings(&font, used_chars)?;
let embedded_font = EmbeddedFontData {
pdf_name: font_name.clone(),
font_type: FontType::Type0,
descriptor,
font_program,
encoding,
metrics,
subset_glyphs: Some(used_glyphs),
unicode_mappings,
};
self.embedded_fonts.insert(font_name.clone(), embedded_font);
Ok(font_name)
}
pub fn generate_font_dictionary(&self, font_name: &str) -> Result<Dictionary> {
let font_data = self
.embedded_fonts
.get(font_name)
.ok_or_else(|| PdfError::FontError(format!("Font {font_name} not found")))?;
match font_data.font_type {
FontType::TrueType => self.generate_truetype_dictionary(font_data),
FontType::Type0 => self.generate_type0_dictionary(font_data),
}
}
fn generate_truetype_dictionary(&self, font_data: &EmbeddedFontData) -> Result<Dictionary> {
let mut font_dict = Dictionary::new();
font_dict.set("Type", Object::Name("Font".to_string()));
font_dict.set("Subtype", Object::Name("TrueType".to_string()));
font_dict.set("BaseFont", Object::Name(font_data.pdf_name.clone()));
font_dict.set("FontDescriptor", Object::Reference(ObjectId::new(0, 0)));
match &font_data.encoding {
FontEncoding::WinAnsiEncoding => {
font_dict.set("Encoding", Object::Name("WinAnsiEncoding".to_string()));
}
FontEncoding::MacRomanEncoding => {
font_dict.set("Encoding", Object::Name("MacRomanEncoding".to_string()));
}
FontEncoding::StandardEncoding => {
font_dict.set("Encoding", Object::Name("StandardEncoding".to_string()));
}
FontEncoding::Custom(differences) => {
let mut encoding_dict = Dictionary::new();
encoding_dict.set("Type", Object::Name("Encoding".to_string()));
encoding_dict.set("BaseEncoding", Object::Name("WinAnsiEncoding".to_string()));
let mut diff_array = Vec::new();
for diff in differences {
diff_array.push(Object::Integer(diff.code as i64));
for name in &diff.names {
diff_array.push(Object::Name(name.clone()));
}
}
encoding_dict.set("Differences", Object::Array(diff_array));
font_dict.set("Encoding", Object::Dictionary(encoding_dict));
}
_ => {}
}
font_dict.set("FirstChar", Object::Integer(32));
font_dict.set("LastChar", Object::Integer(255));
let widths: Vec<Object> = (32..=255)
.map(|_| Object::Integer(500)) .collect();
font_dict.set("Widths", Object::Array(widths));
Ok(font_dict)
}
fn generate_type0_dictionary(&self, font_data: &EmbeddedFontData) -> Result<Dictionary> {
let mut font_dict = Dictionary::new();
font_dict.set("Type", Object::Name("Font".to_string()));
font_dict.set("Subtype", Object::Name("Type0".to_string()));
font_dict.set("BaseFont", Object::Name(font_data.pdf_name.clone()));
font_dict.set("Encoding", Object::Name("Identity-H".to_string()));
font_dict.set(
"DescendantFonts",
Object::Array(vec![
Object::Reference(ObjectId::new(0, 0)), ]),
);
if !font_data.unicode_mappings.is_empty() {
font_dict.set("ToUnicode", Object::Reference(ObjectId::new(0, 0))); }
Ok(font_dict)
}
pub fn generate_font_descriptor(&self, font_name: &str) -> Result<Dictionary> {
let font_data = self
.embedded_fonts
.get(font_name)
.ok_or_else(|| PdfError::FontError(format!("Font {font_name} not found")))?;
let mut desc_dict = Dictionary::new();
desc_dict.set("Type", Object::Name("FontDescriptor".to_string()));
desc_dict.set("FontName", Object::Name(font_data.pdf_name.clone()));
desc_dict.set(
"Flags",
Object::Integer(font_data.descriptor.flags.to_flags() as i64),
);
desc_dict.set("Ascent", Object::Integer(font_data.metrics.ascent as i64));
desc_dict.set("Descent", Object::Integer(font_data.metrics.descent as i64));
desc_dict.set(
"CapHeight",
Object::Integer(font_data.metrics.cap_height as i64),
);
desc_dict.set(
"ItalicAngle",
Object::Real(font_data.descriptor.italic_angle),
);
desc_dict.set("StemV", Object::Integer(font_data.descriptor.stem_v as i64));
let bbox = vec![
Object::Integer(font_data.descriptor.bbox[0] as i64),
Object::Integer(font_data.descriptor.bbox[1] as i64),
Object::Integer(font_data.descriptor.bbox[2] as i64),
Object::Integer(font_data.descriptor.bbox[3] as i64),
];
desc_dict.set("FontBBox", Object::Array(bbox));
match font_data.font_type {
FontType::TrueType => {
desc_dict.set("FontFile2", Object::Reference(ObjectId::new(0, 0)));
}
FontType::Type0 => {
desc_dict.set("FontFile2", Object::Reference(ObjectId::new(0, 0)));
}
}
Ok(desc_dict)
}
pub fn generate_tounicode_cmap(&self, font_name: &str) -> Result<String> {
let font_data = self
.embedded_fonts
.get(font_name)
.ok_or_else(|| PdfError::FontError(format!("Font {font_name} not found")))?;
if font_data.unicode_mappings.is_empty() {
return Err(PdfError::FontError(
"No Unicode mappings available".to_string(),
));
}
let mut cmap_content = String::new();
cmap_content.push_str("/CIDInit /ProcSet findresource begin\n");
cmap_content.push_str("12 dict begin\n");
cmap_content.push_str("begincmap\n");
cmap_content.push_str("/CIDSystemInfo\n");
cmap_content.push_str("<<\n");
cmap_content.push_str("/Registry (Adobe)\n");
cmap_content.push_str("/Ordering (UCS)\n");
cmap_content.push_str("/Supplement 0\n");
cmap_content.push_str(">> def\n");
cmap_content.push_str("/CMapName /Adobe-Identity-UCS def\n");
cmap_content.push_str("/CMapType 2 def\n");
cmap_content.push_str("1 begincodespacerange\n");
cmap_content.push_str("<0000> <FFFF>\n");
cmap_content.push_str("endcodespacerange\n");
cmap_content.push_str(&format!(
"{} beginbfchar\n",
font_data.unicode_mappings.len()
));
for (glyph_id, unicode_string) in &font_data.unicode_mappings {
cmap_content.push_str(&format!(
"<{:04X}> <{}>\n",
glyph_id,
unicode_string
.chars()
.map(|c| format!("{c:04X}", c = c as u32))
.collect::<String>()
));
}
cmap_content.push_str("endbfchar\n");
cmap_content.push_str("endcmap\n");
cmap_content.push_str("CMapName currentdict /CMap defineresource pop\n");
cmap_content.push_str("end\n");
cmap_content.push_str("end\n");
Ok(cmap_content)
}
pub fn embedded_fonts(&self) -> &HashMap<String, EmbeddedFontData> {
&self.embedded_fonts
}
fn extract_font_metrics(&self, _font: &TrueTypeFont) -> Result<FontMetrics> {
Ok(FontMetrics {
ascent: 750,
descent: -250,
cap_height: 700,
x_height: 500,
stem_v: 100,
stem_h: 50,
avg_width: 500,
max_width: 1000,
missing_width: 500,
})
}
fn create_font_descriptor(
&self,
_font: &TrueTypeFont,
font_name: &str,
) -> Result<FontDescriptor> {
Ok(FontDescriptor {
font_name: font_name.to_string(),
flags: FontFlags {
non_symbolic: true,
..Default::default()
},
bbox: [-100, -250, 1000, 750], italic_angle: 0.0,
ascent: 750,
descent: -250,
cap_height: 700,
stem_v: 100,
stem_h: 50,
avg_width: 500,
max_width: 1000,
missing_width: 500,
font_file: None,
})
}
fn create_cid_font_descriptor(
&self,
font: &TrueTypeFont,
font_name: &str,
) -> Result<FontDescriptor> {
self.create_font_descriptor(font, font_name)
}
fn create_encoding_for_font(
&self,
_font: &TrueTypeFont,
_used_glyphs: &HashSet<u16>,
) -> Result<FontEncoding> {
Ok(FontEncoding::WinAnsiEncoding)
}
fn create_unicode_mappings(
&self,
_font: &TrueTypeFont,
used_glyphs: &HashSet<u16>,
) -> Result<HashMap<u16, String>> {
let mut mappings = HashMap::new();
for glyph_id in used_glyphs {
if *glyph_id < 256 {
let unicode_char = char::from(*glyph_id as u8);
if unicode_char.is_ascii_graphic() || unicode_char == ' ' {
mappings.insert(*glyph_id, unicode_char.to_string());
}
}
}
Ok(mappings)
}
fn create_cid_unicode_mappings(
&self,
_font: &TrueTypeFont,
used_chars: &HashSet<u32>,
) -> Result<HashMap<u16, String>> {
let mut mappings = HashMap::new();
for &char_code in used_chars {
if let Some(unicode_char) = char::from_u32(char_code) {
let glyph_id = char_code as u16; mappings.insert(glyph_id, unicode_char.to_string());
}
}
Ok(mappings)
}
fn chars_to_glyphs(&self, _font: &TrueTypeFont, chars: &HashSet<u32>) -> Result<HashSet<u16>> {
let mut glyphs = HashSet::new();
glyphs.insert(0);
for &char_code in chars {
let glyph_id = if char_code < 65536 {
char_code as u16
} else {
0 };
glyphs.insert(glyph_id);
}
Ok(glyphs)
}
}
impl Default for FontEmbedder {
fn default() -> Self {
Self::new()
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_font_embedder_creation() {
let embedder = FontEmbedder::new();
assert_eq!(embedder.embedded_fonts.len(), 0);
assert_eq!(embedder.next_font_id, 1);
}
#[test]
fn test_embedding_options_default() {
let options = EmbeddingOptions::default();
assert!(options.subset);
assert_eq!(options.max_subset_size, Some(256));
assert!(options.compress_font_streams);
assert!(!options.embed_license_info);
}
#[test]
fn test_generate_tounicode_cmap_empty() {
let mut embedder = FontEmbedder::new();
let font_data = EmbeddedFontData {
pdf_name: "TestFont".to_string(),
font_type: FontType::TrueType,
descriptor: FontDescriptor {
font_name: "TestFont".to_string(),
flags: FontFlags::default(),
bbox: [0, 0, 1000, 1000],
italic_angle: 0.0,
ascent: 750,
descent: -250,
cap_height: 700,
stem_v: 100,
stem_h: 50,
avg_width: 500,
max_width: 1000,
missing_width: 500,
font_file: None,
},
font_program: vec![],
encoding: FontEncoding::WinAnsiEncoding,
metrics: FontMetrics {
ascent: 750,
descent: -250,
cap_height: 700,
x_height: 500,
stem_v: 100,
stem_h: 50,
avg_width: 500,
max_width: 1000,
missing_width: 500,
},
subset_glyphs: None,
unicode_mappings: HashMap::new(),
};
embedder
.embedded_fonts
.insert("TestFont".to_string(), font_data);
let result = embedder.generate_tounicode_cmap("TestFont");
assert!(result.is_err());
}
#[test]
fn test_generate_truetype_dictionary() {
let embedder = FontEmbedder::new();
let font_data = EmbeddedFontData {
pdf_name: "TestFont".to_string(),
font_type: FontType::TrueType,
descriptor: FontDescriptor {
font_name: "TestFont".to_string(),
flags: FontFlags::default(),
bbox: [0, 0, 1000, 1000],
italic_angle: 0.0,
ascent: 750,
descent: -250,
cap_height: 700,
stem_v: 100,
stem_h: 50,
avg_width: 500,
max_width: 1000,
missing_width: 500,
font_file: None,
},
font_program: vec![],
encoding: FontEncoding::WinAnsiEncoding,
metrics: FontMetrics {
ascent: 750,
descent: -250,
cap_height: 700,
x_height: 500,
stem_v: 100,
stem_h: 50,
avg_width: 500,
max_width: 1000,
missing_width: 500,
},
subset_glyphs: None,
unicode_mappings: HashMap::new(),
};
let dict = embedder.generate_truetype_dictionary(&font_data).unwrap();
if let Some(Object::Name(font_type)) = dict.get("Type") {
assert_eq!(font_type, "Font");
}
if let Some(Object::Name(subtype)) = dict.get("Subtype") {
assert_eq!(subtype, "TrueType");
}
if let Some(Object::Name(base_font)) = dict.get("BaseFont") {
assert_eq!(base_font, "TestFont");
}
}
#[test]
fn test_generate_type0_dictionary() {
let embedder = FontEmbedder::new();
let font_data = EmbeddedFontData {
pdf_name: "TestCIDFont".to_string(),
font_type: FontType::Type0,
descriptor: FontDescriptor {
font_name: "TestCIDFont".to_string(),
flags: FontFlags::default(),
bbox: [0, 0, 1000, 1000],
italic_angle: 0.0,
ascent: 750,
descent: -250,
cap_height: 700,
stem_v: 100,
stem_h: 50,
avg_width: 500,
max_width: 1000,
missing_width: 500,
font_file: None,
},
font_program: vec![],
encoding: FontEncoding::Identity,
metrics: FontMetrics {
ascent: 750,
descent: -250,
cap_height: 700,
x_height: 500,
stem_v: 100,
stem_h: 50,
avg_width: 500,
max_width: 1000,
missing_width: 500,
},
subset_glyphs: None,
unicode_mappings: HashMap::new(),
};
let dict = embedder.generate_type0_dictionary(&font_data).unwrap();
if let Some(Object::Name(subtype)) = dict.get("Subtype") {
assert_eq!(subtype, "Type0");
}
if let Some(Object::Name(encoding)) = dict.get("Encoding") {
assert_eq!(encoding, "Identity-H");
}
if let Some(Object::Array(descendant_fonts)) = dict.get("DescendantFonts") {
assert_eq!(descendant_fonts.len(), 1);
}
}
#[test]
fn test_chars_to_glyphs_conversion() {
let _embedder = FontEmbedder::new();
let _font_data = vec![0; 100];
let chars: HashSet<u32> = [65, 66, 67].iter().cloned().collect();
assert!(chars.len() == 3);
}
#[test]
fn test_unicode_mappings_creation() {
let _embedder = FontEmbedder::new();
let glyphs: HashSet<u16> = [65, 66, 67].iter().cloned().collect();
let _font_data = vec![0; 100];
assert!(glyphs.len() == 3);
}
#[test]
fn test_font_descriptor_generation() {
let _embedder = FontEmbedder::new();
let font_data = EmbeddedFontData {
pdf_name: "TestFont".to_string(),
font_type: FontType::TrueType,
descriptor: FontDescriptor {
font_name: "TestFont".to_string(),
flags: FontFlags {
non_symbolic: true,
serif: true,
..Default::default()
},
bbox: [-100, -250, 1000, 750],
italic_angle: 0.0,
ascent: 750,
descent: -250,
cap_height: 700,
stem_v: 100,
stem_h: 50,
avg_width: 500,
max_width: 1000,
missing_width: 500,
font_file: None,
},
font_program: vec![],
encoding: FontEncoding::WinAnsiEncoding,
metrics: FontMetrics {
ascent: 750,
descent: -250,
cap_height: 700,
x_height: 500,
stem_v: 100,
stem_h: 50,
avg_width: 500,
max_width: 1000,
missing_width: 500,
},
subset_glyphs: None,
unicode_mappings: HashMap::new(),
};
let mut embedder_with_font = FontEmbedder::new();
embedder_with_font
.embedded_fonts
.insert("TestFont".to_string(), font_data);
let desc_dict = embedder_with_font
.generate_font_descriptor("TestFont")
.unwrap();
if let Some(Object::Name(font_name)) = desc_dict.get("FontName") {
assert_eq!(font_name, "TestFont");
}
if let Some(Object::Integer(flags)) = desc_dict.get("Flags") {
assert!(*flags > 0); }
if let Some(Object::Array(bbox)) = desc_dict.get("FontBBox") {
assert_eq!(bbox.len(), 4);
}
}
#[test]
fn test_cff_font_uses_cidfonttype0() {
assert!(
!CjkFontType::should_use_cidfonttype2(true),
"CFF → CIDFontType0"
);
}
#[test]
fn test_truetype_font_uses_cidfonttype2() {
assert!(
CjkFontType::should_use_cidfonttype2(false),
"TrueType → CIDFontType2"
);
}
}