use std::collections::{BTreeMap, HashMap, HashSet};
use pdf_writer::{types::*, Filter, Finish, Name, Pdf, Ref, Str};
use ratex_font::FontId;
use skrifa::raw::FontRef as SfFontRef;
use skrifa::raw::TableProvider;
use subsetter::GlyphRemapper;
pub(crate) type RawFontData = HashMap<FontId, Vec<u8>>;
pub(crate) const FONT_MAP: &[(FontId, &str)] = &[
(FontId::MainRegular, "KaTeX_Main-Regular.ttf"),
(FontId::MainBold, "KaTeX_Main-Bold.ttf"),
(FontId::MainItalic, "KaTeX_Main-Italic.ttf"),
(FontId::MainBoldItalic, "KaTeX_Main-BoldItalic.ttf"),
(FontId::MathItalic, "KaTeX_Math-Italic.ttf"),
(FontId::MathBoldItalic, "KaTeX_Math-BoldItalic.ttf"),
(FontId::AmsRegular, "KaTeX_AMS-Regular.ttf"),
(FontId::CaligraphicRegular, "KaTeX_Caligraphic-Regular.ttf"),
(FontId::FrakturRegular, "KaTeX_Fraktur-Regular.ttf"),
(FontId::FrakturBold, "KaTeX_Fraktur-Bold.ttf"),
(FontId::SansSerifRegular, "KaTeX_SansSerif-Regular.ttf"),
(FontId::SansSerifBold, "KaTeX_SansSerif-Bold.ttf"),
(FontId::SansSerifItalic, "KaTeX_SansSerif-Italic.ttf"),
(FontId::ScriptRegular, "KaTeX_Script-Regular.ttf"),
(FontId::TypewriterRegular, "KaTeX_Typewriter-Regular.ttf"),
(FontId::Size1Regular, "KaTeX_Size1-Regular.ttf"),
(FontId::Size2Regular, "KaTeX_Size2-Regular.ttf"),
(FontId::Size3Regular, "KaTeX_Size3-Regular.ttf"),
(FontId::Size4Regular, "KaTeX_Size4-Regular.ttf"),
];
#[allow(unused_variables)]
pub(crate) fn load_all_fonts(font_dir: &str) -> Result<RawFontData, String> {
let mut data = HashMap::new();
#[cfg(not(feature = "embed-fonts"))]
{
let dir = std::path::Path::new(font_dir);
for (id, filename) in FONT_MAP {
let path = dir.join(filename);
if path.exists() {
let bytes = std::fs::read(&path)
.map_err(|e| format!("Failed to read {}: {e}", path.display()))?;
data.insert(*id, bytes);
}
}
if data.is_empty() {
return Err(format!("No fonts found in {font_dir}"));
}
}
#[cfg(feature = "embed-fonts")]
{
for (id, filename) in FONT_MAP {
let cow = ratex_katex_fonts::ttf_bytes(filename)
.ok_or_else(|| format!("Missing embedded font {filename}"))?;
data.insert(*id, cow.to_vec());
}
}
Ok(data)
}
pub(crate) fn resolve_glyph_id(raw_bytes: &[u8], font_id: FontId, char_code: u32) -> Option<u16> {
let mapped_char = ratex_font::katex_ttf_glyph_char(font_id, char_code);
let sf = SfFontRef::new(raw_bytes).ok()?;
let charmap = skrifa::charmap::Charmap::new(&sf);
let gid = charmap.map(mapped_char)?;
let v = gid.to_u32();
if v == 0 { None } else { Some(v as u16) }
}
#[derive(Clone, Debug)]
pub(crate) struct GlyphInfo {
pub unicode: u32,
}
pub(crate) struct FontUsage {
pub font_id: FontId,
pub glyphs: BTreeMap<u16, GlyphInfo>,
}
pub(crate) fn collect_glyph_usage(
items: &[ratex_types::display_item::DisplayItem],
font_data: &RawFontData,
) -> Vec<FontUsage> {
let mut usage_map: HashMap<FontId, HashSet<(u16, u32)>> = HashMap::new();
for item in items {
if let ratex_types::display_item::DisplayItem::GlyphPath {
font, char_code, ..
} = item
{
let font_id = FontId::parse(font).unwrap_or(FontId::MainRegular);
let bytes = match font_data.get(&font_id) {
Some(b) => b,
None => match font_data.get(&FontId::MainRegular) {
Some(b) => b,
None => continue,
},
};
let actual_fid = if font_data.contains_key(&font_id) {
font_id
} else {
FontId::MainRegular
};
if let Some(gid) = resolve_glyph_id(bytes, font_id, *char_code) {
usage_map
.entry(actual_fid)
.or_default()
.insert((gid, *char_code));
}
}
}
usage_map
.into_iter()
.map(|(font_id, set)| {
let mut glyphs = BTreeMap::new();
for (gid, unicode) in set {
glyphs.insert(gid, GlyphInfo { unicode });
}
FontUsage { font_id, glyphs }
})
.collect()
}
pub(crate) struct EmbeddedFont {
pub font_id: FontId,
pub res_name: String,
pub type0_ref: Ref,
pub remapper: GlyphRemapper,
}
pub(crate) fn embed_fonts(
pdf: &mut Pdf,
alloc: &mut Ref,
usages: &[FontUsage],
font_data: &RawFontData,
) -> Result<Vec<EmbeddedFont>, String> {
let mut embedded = Vec::new();
for (idx, usage) in usages.iter().enumerate() {
let raw = font_data
.get(&usage.font_id)
.ok_or_else(|| format!("Missing font data for {:?}", usage.font_id))?;
let mut remapper = GlyphRemapper::new();
for &gid in usage.glyphs.keys() {
remapper.remap(gid);
}
let subsetted = subsetter::subset(raw, 0, &remapper)
.map_err(|e| format!("Subset error for {:?}: {e}", usage.font_id))?;
let compressed =
miniz_oxide::deflate::compress_to_vec_zlib(&subsetted, 6);
let sf = SfFontRef::new(raw).map_err(|e| format!("skrifa error: {e}"))?;
let upem = sf.head().map_err(|_| "no head table")?.units_per_em() as f32;
let scale = 1000.0 / upem;
let (ascent, descent, cap_height) = if let Ok(os2) = sf.os2() {
let asc = os2.s_typo_ascender() as f32 * scale;
let desc = os2.s_typo_descender() as f32 * scale;
let cap = os2.s_cap_height().map_or(asc, |v| v as f32 * scale);
(asc, desc, cap)
} else {
(800.0, -200.0, 800.0)
};
let bbox = {
let head = sf.head().map_err(|_| "no head table")?;
[
head.x_min() as f32 * scale,
head.y_min() as f32 * scale,
head.x_max() as f32 * scale,
head.y_max() as f32 * scale,
]
};
let hmtx = sf.hmtx().map_err(|_| "no hmtx table")?;
let mut widths: Vec<(u16, f32)> = Vec::new();
for &old_gid in usage.glyphs.keys() {
let new_cid = remapper.get(old_gid).unwrap_or(0);
let gid = skrifa::raw::types::GlyphId::new(old_gid as u32);
let advance = hmtx.advance(gid).unwrap_or(0) as f32 * scale;
widths.push((new_cid, advance));
}
widths.sort_by_key(|(cid, _)| *cid);
let type0_ref = alloc.bump();
let cid_ref = alloc.bump();
let descriptor_ref = alloc.bump();
let tounicode_ref = alloc.bump();
let stream_ref = alloc.bump();
let base_name = format!("KaTeX_{}", usage.font_id.as_str().replace('-', "_"));
let res_name = format!("F{idx}");
pdf.font_descriptor(descriptor_ref)
.name(Name(base_name.as_bytes()))
.flags(FontFlags::SYMBOLIC)
.bbox(pdf_writer::Rect::new(bbox[0], bbox[1], bbox[2], bbox[3]))
.italic_angle(0.0)
.ascent(ascent)
.descent(descent)
.cap_height(cap_height)
.stem_v(80.0)
.font_file2(stream_ref);
let mut cid_font = pdf.cid_font(cid_ref);
cid_font
.subtype(CidFontType::Type2)
.base_font(Name(base_name.as_bytes()))
.default_width(0.0)
.font_descriptor(descriptor_ref);
cid_font
.system_info(pdf_writer::types::SystemInfo {
registry: Str(b"Adobe"),
ordering: Str(b"Identity"),
supplement: 0,
});
if !widths.is_empty() {
let mut w = cid_font.widths();
for &(cid, adv) in &widths {
w.consecutive(cid, [adv]);
}
w.finish();
}
cid_font.finish();
pdf.type0_font(type0_ref)
.base_font(Name(base_name.as_bytes()))
.encoding_predefined(Name(b"Identity-H"))
.descendant_font(cid_ref)
.to_unicode(tounicode_ref);
let cmap = build_tounicode_cmap(&usage.glyphs, &remapper);
pdf.stream(tounicode_ref, cmap.as_bytes())
.pair(Name(b"Type"), Name(b"CMap"));
let mut font_stream = pdf.stream(stream_ref, &compressed);
font_stream.filter(Filter::FlateDecode);
font_stream.pair(Name(b"Length1"), subsetted.len() as i32);
font_stream.finish();
embedded.push(EmbeddedFont {
font_id: usage.font_id,
res_name,
type0_ref,
remapper,
});
}
Ok(embedded)
}
fn build_tounicode_cmap(glyphs: &BTreeMap<u16, GlyphInfo>, remapper: &GlyphRemapper) -> String {
let mut entries = Vec::new();
for (old_gid, info) in glyphs {
if let Some(new_cid) = remapper.get(*old_gid) {
entries.push((new_cid, info.unicode));
}
}
entries.sort_by_key(|(cid, _)| *cid);
let mut cmap = String::new();
cmap.push_str("/CIDInit /ProcSet findresource begin\n");
cmap.push_str("12 dict begin\n");
cmap.push_str("begincmap\n");
cmap.push_str("/CIDSystemInfo\n");
cmap.push_str("<< /Registry (Adobe) /Ordering (UCS) /Supplement 0 >> def\n");
cmap.push_str("/CMapName /Adobe-Identity-UCS def\n");
cmap.push_str("/CMapType 2 def\n");
cmap.push_str("1 begincodespacerange\n");
cmap.push_str("<0000> <FFFF>\n");
cmap.push_str("endcodespacerange\n");
for chunk in entries.chunks(100) {
cmap.push_str(&format!("{} beginbfchar\n", chunk.len()));
for &(cid, unicode) in chunk {
if unicode <= 0xFFFF {
cmap.push_str(&format!("<{:04X}> <{:04X}>\n", cid, unicode));
} else {
let hi = ((unicode - 0x10000) >> 10) + 0xD800;
let lo = ((unicode - 0x10000) & 0x3FF) + 0xDC00;
cmap.push_str(&format!("<{:04X}> <{:04X}{:04X}>\n", cid, hi, lo));
}
}
cmap.push_str("endbfchar\n");
}
cmap.push_str("endcmap\n");
cmap.push_str("CMapName currentdict /CMap defineresource pop\n");
cmap.push_str("end\n");
cmap.push_str("end\n");
cmap
}