use std::collections::{BTreeSet, HashMap};
#[derive(Debug, Default)]
pub struct FontSubsetter {
used_chars: HashMap<u32, u16>,
used_glyphs: BTreeSet<u16>,
subset_tag: Option<String>,
}
impl FontSubsetter {
pub fn new() -> Self {
Self::default()
}
pub fn use_char(&mut self, codepoint: u32, glyph_id: u16) {
self.used_chars.insert(codepoint, glyph_id);
self.used_glyphs.insert(glyph_id);
}
pub fn use_string(&mut self, text: &str, glyph_lookup: impl Fn(u32) -> Option<u16>) {
for ch in text.chars() {
let codepoint = ch as u32;
if let Some(glyph_id) = glyph_lookup(codepoint) {
self.use_char(codepoint, glyph_id);
}
}
}
pub fn used_glyphs(&self) -> &BTreeSet<u16> {
&self.used_glyphs
}
pub fn used_chars(&self) -> &HashMap<u32, u16> {
&self.used_chars
}
pub fn glyph_count(&self) -> usize {
self.used_glyphs.len()
}
pub fn char_count(&self) -> usize {
self.used_chars.len()
}
pub fn is_empty(&self) -> bool {
self.used_chars.is_empty()
}
pub fn generate_subset_tag(&mut self) -> &str {
if self.subset_tag.is_none() {
let hash = self.compute_subset_hash();
let tag = Self::hash_to_tag(hash);
self.subset_tag = Some(tag);
}
self.subset_tag
.as_ref()
.expect("subset_tag set on prior line")
}
pub fn subset_tag(&self) -> Option<&str> {
self.subset_tag.as_deref()
}
fn compute_subset_hash(&self) -> u64 {
use std::hash::{Hash, Hasher};
let mut hasher = std::collections::hash_map::DefaultHasher::new();
for glyph in &self.used_glyphs {
glyph.hash(&mut hasher);
}
hasher.finish()
}
fn hash_to_tag(hash: u64) -> String {
let mut tag = String::with_capacity(6);
let mut h = hash;
for _ in 0..6 {
let ch = (h % 26) as u8 + b'A';
tag.push(ch as char);
h /= 26;
}
tag
}
pub fn subset_font_name(&mut self, base_name: &str) -> String {
let tag = self.generate_subset_tag();
format!("{}+{}", tag, base_name)
}
pub fn clear(&mut self) {
self.used_chars.clear();
self.used_glyphs.clear();
self.subset_tag = None;
}
pub fn stats(&self) -> SubsetStats {
SubsetStats {
unique_chars: self.used_chars.len(),
unique_glyphs: self.used_glyphs.len(),
min_glyph_id: self.used_glyphs.first().copied(),
max_glyph_id: self.used_glyphs.last().copied(),
}
}
}
#[derive(Debug, Clone)]
pub struct SubsetStats {
pub unique_chars: usize,
pub unique_glyphs: usize,
pub min_glyph_id: Option<u16>,
pub max_glyph_id: Option<u16>,
}
impl SubsetStats {
pub fn estimated_reduction(&self, total_glyphs: u16) -> f32 {
if total_glyphs == 0 || self.unique_glyphs == 0 {
return 0.0;
}
let used = self.unique_glyphs as f32;
let total = total_glyphs as f32;
(1.0 - used / total) * 100.0
}
}
#[derive(Debug)]
pub struct SubsetBuilder {
subsetter: FontSubsetter,
always_include: BTreeSet<u16>,
}
impl SubsetBuilder {
pub fn new() -> Self {
let mut always_include = BTreeSet::new();
always_include.insert(0);
Self {
subsetter: FontSubsetter::new(),
always_include,
}
}
pub fn always_include_glyph(mut self, glyph_id: u16) -> Self {
self.always_include.insert(glyph_id);
self
}
pub fn use_char(mut self, codepoint: u32, glyph_id: u16) -> Self {
self.subsetter.use_char(codepoint, glyph_id);
self
}
pub fn use_string(mut self, text: &str, glyph_lookup: impl Fn(u32) -> Option<u16>) -> Self {
self.subsetter.use_string(text, glyph_lookup);
self
}
pub fn build(mut self) -> FontSubsetter {
for glyph in self.always_include {
self.subsetter.used_glyphs.insert(glyph);
}
self.subsetter
}
}
impl Default for SubsetBuilder {
fn default() -> Self {
Self::new()
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_subsetter_creation() {
let subsetter = FontSubsetter::new();
assert!(subsetter.is_empty());
assert_eq!(subsetter.glyph_count(), 0);
}
#[test]
fn test_use_char() {
let mut subsetter = FontSubsetter::new();
subsetter.use_char(0x0041, 1); subsetter.use_char(0x0042, 2);
assert!(!subsetter.is_empty());
assert_eq!(subsetter.char_count(), 2);
assert_eq!(subsetter.glyph_count(), 2);
assert!(subsetter.used_glyphs().contains(&1));
assert!(subsetter.used_glyphs().contains(&2));
}
#[test]
fn test_use_string() {
let mut subsetter = FontSubsetter::new();
subsetter.use_string("AB", |cp| Some(cp as u16));
assert_eq!(subsetter.char_count(), 2);
assert!(subsetter.used_chars().contains_key(&0x41));
assert!(subsetter.used_chars().contains_key(&0x42));
}
#[test]
fn test_subset_tag_generation() {
let mut subsetter = FontSubsetter::new();
subsetter.use_char(0x0041, 1);
let tag = subsetter.generate_subset_tag().to_string();
assert_eq!(tag.len(), 6);
assert!(tag.chars().all(|c| c.is_ascii_uppercase()));
let tag2 = subsetter.generate_subset_tag().to_string();
assert_eq!(tag, tag2);
}
#[test]
fn test_subset_font_name() {
let mut subsetter = FontSubsetter::new();
subsetter.use_char(0x0041, 1);
let name = subsetter.subset_font_name("Arial");
assert!(name.contains('+'));
assert!(name.ends_with("Arial"));
assert_eq!(name.split('+').next().unwrap().len(), 6);
}
#[test]
fn test_stats() {
let mut subsetter = FontSubsetter::new();
subsetter.use_char(0x0041, 5);
subsetter.use_char(0x0042, 10);
subsetter.use_char(0x0043, 15);
let stats = subsetter.stats();
assert_eq!(stats.unique_chars, 3);
assert_eq!(stats.unique_glyphs, 3);
assert_eq!(stats.min_glyph_id, Some(5));
assert_eq!(stats.max_glyph_id, Some(15));
}
#[test]
fn test_estimated_reduction() {
let mut subsetter = FontSubsetter::new();
for i in 0..10 {
subsetter.use_char(0x0041 + i, i as u16 + 1);
}
let stats = subsetter.stats();
let reduction = stats.estimated_reduction(1000);
assert!(reduction > 98.0);
assert!(reduction < 100.0);
}
#[test]
fn test_builder_always_includes_notdef() {
let subsetter = SubsetBuilder::new().use_char(0x0041, 1).build();
assert!(subsetter.used_glyphs().contains(&0));
assert!(subsetter.used_glyphs().contains(&1));
}
#[test]
fn test_clear() {
let mut subsetter = FontSubsetter::new();
subsetter.use_char(0x0041, 1);
let _ = subsetter.generate_subset_tag();
subsetter.clear();
assert!(subsetter.is_empty());
assert!(subsetter.subset_tag().is_none());
}
}