#[cfg(not(target_arch = "wasm32"))]
use std::path::{Path, PathBuf};
use quick_xml::Reader;
use quick_xml::events::Event;
use crate::parser::xml_util::get_attr_str;
#[cfg(test)]
#[path = "embedded_fonts_tests.rs"]
mod tests;
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub(crate) enum FontFileFormat {
Ttf,
Otf,
Ttc,
}
impl FontFileFormat {
fn extension(self) -> &'static str {
match self {
Self::Ttf => "ttf",
Self::Otf => "otf",
Self::Ttc => "ttc",
}
}
}
#[derive(Debug)]
struct PptxEmbeddedFontEntry {
typeface: String,
variants: Vec<FontVariantRef>,
}
#[derive(Debug)]
struct FontVariantRef {
style: String,
r_id: String,
}
#[derive(Debug)]
struct DocxEmbeddedFontEntry {
font_name: String,
variants: Vec<DocxFontVariantRef>,
}
#[derive(Debug)]
struct DocxFontVariantRef {
style: String,
r_id: String,
font_key: String,
}
#[cfg(not(target_arch = "wasm32"))]
pub(crate) struct EmbeddedFontDir {
path: PathBuf,
font_count: usize,
}
#[cfg(not(target_arch = "wasm32"))]
impl EmbeddedFontDir {
pub(crate) fn path(&self) -> &Path {
&self.path
}
pub(crate) fn is_empty(&self) -> bool {
self.font_count == 0
}
}
#[cfg(not(target_arch = "wasm32"))]
impl Drop for EmbeddedFontDir {
fn drop(&mut self) {
let _ = std::fs::remove_dir_all(&self.path);
}
}
#[cfg(not(target_arch = "wasm32"))]
pub(crate) fn extract_embedded_fonts(
data: &[u8],
format: crate::config::Format,
) -> Option<EmbeddedFontDir> {
use crate::config::Format;
let result = match format {
Format::Pptx => extract_pptx_fonts(data),
Format::Docx => extract_docx_fonts(data),
Format::Xlsx => None,
};
if let Some(ref dir) = result {
tracing::info!(
font_count = dir.font_count,
path = ?dir.path,
"extracted embedded fonts from archive"
);
}
result
}
fn parse_guid_to_bytes(guid: &str) -> Option<[u8; 16]> {
let s = guid.trim_start_matches('{').trim_end_matches('}');
let parts: Vec<&str> = s.split('-').collect();
if parts.len() != 5 {
return None;
}
let group1 = u32::from_str_radix(parts[0], 16).ok()?;
let group2 = u16::from_str_radix(parts[1], 16).ok()?;
let group3 = u16::from_str_radix(parts[2], 16).ok()?;
let group4 = u16::from_str_radix(parts[3], 16).ok()?;
let group5 = u64::from_str_radix(parts[4], 16).ok()?;
if parts[4].len() != 12 {
return None;
}
let mut key = [0u8; 16];
key[0..4].copy_from_slice(&group1.to_le_bytes());
key[4..6].copy_from_slice(&group2.to_le_bytes());
key[6..8].copy_from_slice(&group3.to_le_bytes());
key[8..10].copy_from_slice(&group4.to_be_bytes());
let g5_bytes = group5.to_be_bytes();
key[10..16].copy_from_slice(&g5_bytes[2..8]);
Some(key)
}
fn deobfuscate_font_data(data: &mut [u8], key: &[u8; 16]) {
let len = std::cmp::min(32, data.len());
for i in 0..len {
data[i] ^= key[i % 16];
}
}
fn detect_font_format(data: &[u8]) -> Option<FontFileFormat> {
if data.len() < 4 {
return None;
}
if data[0..4] == [0x00, 0x01, 0x00, 0x00] {
Some(FontFileFormat::Ttf)
} else if &data[0..4] == b"OTTO" {
Some(FontFileFormat::Otf)
} else if &data[0..4] == b"ttcf" {
Some(FontFileFormat::Ttc)
} else {
None
}
}
fn parse_pptx_embedded_font_list(xml: &str) -> Vec<PptxEmbeddedFontEntry> {
let mut reader = Reader::from_str(xml);
let mut entries: Vec<PptxEmbeddedFontEntry> = Vec::new();
let mut in_embedded_font = false;
let mut current_typeface: Option<String> = None;
let mut current_variants: Vec<FontVariantRef> = Vec::new();
loop {
match reader.read_event() {
Ok(Event::Start(ref e)) => {
if e.local_name().as_ref() == b"embeddedFont" {
in_embedded_font = true;
current_typeface = None;
current_variants = Vec::new();
}
}
Ok(Event::Empty(ref e)) if in_embedded_font => {
let local_name = e.local_name();
match local_name.as_ref() {
b"font" => {
current_typeface = get_attr_str(e, b"typeface");
}
b"regular" | b"bold" | b"italic" | b"boldItalic" => {
if let Some(r_id) = get_attr_str(e, b"r:id") {
let style = String::from_utf8_lossy(local_name.as_ref()).to_string();
current_variants.push(FontVariantRef { style, r_id });
}
}
_ => {}
}
}
Ok(Event::End(ref e)) => {
if e.local_name().as_ref() == b"embeddedFont" && in_embedded_font {
in_embedded_font = false;
if let Some(typeface) = current_typeface.take()
&& !current_variants.is_empty()
{
entries.push(PptxEmbeddedFontEntry {
typeface,
variants: std::mem::take(&mut current_variants),
});
}
}
}
Ok(Event::Eof) => break,
Err(_) => break,
_ => {}
}
}
entries
}
fn extract_guid_from_font_path(path: &str) -> Option<String> {
let filename = path.rsplit('/').next()?;
let stem = filename.strip_suffix(".fntdata")?;
if stem.starts_with('{') && stem.ends_with('}') && stem.len() == 38 {
Some(stem.to_string())
} else {
None
}
}
#[cfg(not(target_arch = "wasm32"))]
fn extract_pptx_fonts(data: &[u8]) -> Option<EmbeddedFontDir> {
use std::io::Read;
let mut archive = crate::parser::open_zip(data).ok()?;
let pres_xml = {
let mut file = archive.by_name("ppt/presentation.xml").ok()?;
let mut content = String::new();
file.read_to_string(&mut content).ok()?;
content
};
let font_entries = parse_pptx_embedded_font_list(&pres_xml);
if font_entries.is_empty() {
return None;
}
let rels_xml = {
let mut file = archive.by_name("ppt/_rels/presentation.xml.rels").ok()?;
let mut content = String::new();
file.read_to_string(&mut content).ok()?;
content
};
let rels = crate::parser::xml_util::parse_rels_id_target(&rels_xml);
let temp_dir = create_temp_font_dir("office2pdf-pptx-fonts")?;
let mut font_count: usize = 0;
for entry in &font_entries {
for variant in &entry.variants {
let Some(target) = rels.get(&variant.r_id) else {
continue;
};
let font_zip_path = if target.starts_with('/') {
target.trim_start_matches('/').to_string()
} else {
format!("ppt/{target}")
};
let Some(guid_str) = extract_guid_from_font_path(&font_zip_path) else {
continue;
};
let Some(key) = parse_guid_to_bytes(&guid_str) else {
continue;
};
let mut font_data = Vec::new();
{
let mut file = match archive.by_name(&font_zip_path) {
Ok(f) => f,
Err(_) => continue,
};
if file.read_to_end(&mut font_data).is_err() {
continue;
}
}
deobfuscate_font_data(&mut font_data, &key);
let ext = detect_font_format(&font_data)
.map(|f| f.extension())
.unwrap_or("ttf");
let filename = format!("{}-{}.{}", entry.typeface, variant.style, ext);
let out_path = temp_dir.join(&filename);
if std::fs::write(&out_path, &font_data).is_ok() {
font_count += 1;
}
}
}
if font_count == 0 {
let _ = std::fs::remove_dir_all(&temp_dir);
return None;
}
Some(EmbeddedFontDir {
path: temp_dir,
font_count,
})
}
fn parse_docx_embedded_font_entries(xml: &str) -> Vec<DocxEmbeddedFontEntry> {
let mut reader = Reader::from_str(xml);
let mut entries: Vec<DocxEmbeddedFontEntry> = Vec::new();
let mut in_font = false;
let mut current_name: Option<String> = None;
let mut current_variants: Vec<DocxFontVariantRef> = Vec::new();
loop {
match reader.read_event() {
Ok(Event::Start(ref e)) => {
if e.local_name().as_ref() == b"font" {
in_font = true;
current_name = get_attr_str(e, b"w:name").or_else(|| get_attr_str(e, b"name"));
current_variants = Vec::new();
}
}
Ok(Event::Empty(ref e)) if in_font => {
let local_name = e.local_name();
let style = match local_name.as_ref() {
b"embedRegular" => Some("regular"),
b"embedBold" => Some("bold"),
b"embedItalic" => Some("italic"),
b"embedBoldItalic" => Some("boldItalic"),
_ => None,
};
if let Some(style) = style {
let r_id = get_attr_str(e, b"r:id");
let font_key =
get_attr_str(e, b"w:fontKey").or_else(|| get_attr_str(e, b"fontKey"));
if let (Some(r_id), Some(font_key)) = (r_id, font_key) {
current_variants.push(DocxFontVariantRef {
style: style.to_string(),
r_id,
font_key,
});
}
}
}
Ok(Event::End(ref e)) => {
if e.local_name().as_ref() == b"font" && in_font {
in_font = false;
if let Some(name) = current_name.take()
&& !current_variants.is_empty()
{
entries.push(DocxEmbeddedFontEntry {
font_name: name,
variants: std::mem::take(&mut current_variants),
});
}
}
}
Ok(Event::Eof) => break,
Err(_) => break,
_ => {}
}
}
entries
}
#[cfg(not(target_arch = "wasm32"))]
fn extract_docx_fonts(data: &[u8]) -> Option<EmbeddedFontDir> {
use std::io::Read;
let mut archive = crate::parser::open_zip(data).ok()?;
let font_table_xml = {
let mut file = archive.by_name("word/fontTable.xml").ok()?;
let mut content = String::new();
file.read_to_string(&mut content).ok()?;
content
};
let font_entries = parse_docx_embedded_font_entries(&font_table_xml);
if font_entries.is_empty() {
return None;
}
let rels_xml = {
let mut file = archive.by_name("word/_rels/fontTable.xml.rels").ok()?;
let mut content = String::new();
file.read_to_string(&mut content).ok()?;
content
};
let rels = crate::parser::xml_util::parse_rels_id_target(&rels_xml);
let temp_dir = create_temp_font_dir("office2pdf-docx-fonts")?;
let mut font_count: usize = 0;
for entry in &font_entries {
for variant in &entry.variants {
let Some(target) = rels.get(&variant.r_id) else {
continue;
};
let font_zip_path = if target.starts_with('/') {
target.trim_start_matches('/').to_string()
} else {
format!("word/{target}")
};
let Some(key) = parse_guid_to_bytes(&variant.font_key) else {
continue;
};
let mut font_data = Vec::new();
{
let mut file = match archive.by_name(&font_zip_path) {
Ok(f) => f,
Err(_) => continue,
};
if file.read_to_end(&mut font_data).is_err() {
continue;
}
}
deobfuscate_font_data(&mut font_data, &key);
let ext = detect_font_format(&font_data)
.map(|f| f.extension())
.unwrap_or("ttf");
let filename = format!("{}-{}.{}", entry.font_name, variant.style, ext);
let out_path = temp_dir.join(&filename);
if std::fs::write(&out_path, &font_data).is_ok() {
font_count += 1;
}
}
}
if font_count == 0 {
let _ = std::fs::remove_dir_all(&temp_dir);
return None;
}
Some(EmbeddedFontDir {
path: temp_dir,
font_count,
})
}
#[cfg(not(target_arch = "wasm32"))]
fn create_temp_font_dir(prefix: &str) -> Option<PathBuf> {
use std::time::{SystemTime, UNIX_EPOCH};
let unique = SystemTime::now()
.duration_since(UNIX_EPOCH)
.expect("system time should be valid")
.as_nanos();
let path = std::env::temp_dir().join(format!("{prefix}-{unique}"));
std::fs::create_dir_all(&path).ok()?;
Some(path)
}