use crate::font_info::{FontFaceWrapper, FontStyle, FontWeight};
use anyhow::*;
use mkwebfont_common::{
character_set::CharacterSet,
hashing::{hash_fragment, hash_full},
};
use std::{fs, ops::RangeInclusive, path::Path, sync::Arc};
use tokio::{task, task::JoinHandle};
use tracing::{debug, Instrument};
use unicode_blocks::find_unicode_block;
fn extract_name(str: &str) -> String {
let mut out = String::new();
for char in str.chars() {
if char.is_alphanumeric() {
out.push(char);
}
if out.len() == 20 {
break;
}
}
out
}
fn extract_version(mut str: &str) -> String {
let mut out = String::new();
let version_txt = "version ";
if str.to_lowercase().starts_with(version_txt) {
str = &str[version_txt.len()..];
}
for char in str.chars() {
if char.is_numeric() || char == '.' {
out.push(char);
} else {
break;
}
if out.len() == 20 {
break;
}
}
out.trim_matches('.').to_string()
}
fn is_same_block(ch_a: char, ch_b: char) -> bool {
if let Some(block_a) = find_unicode_block(ch_a) {
if let Some(block_b) = find_unicode_block(ch_b) {
return block_a.name() == block_b.name();
}
}
false
}
fn decode_range(bitmap: &CharacterSet, all_chars: &CharacterSet) -> Vec<RangeInclusive<u32>> {
let mut range_start = None;
let mut range_last = '\u{fffff}';
let mut ranges = Vec::new();
for ch in bitmap.iter_sorted() {
let ch = char::from_u32(ch).expect("Invalid char in RoaringBitmap");
if let Some(start) = range_start {
let next = char::from_u32(range_last as u32 + 1).unwrap();
if next != ch {
let mut can_merge = false;
if is_same_block(next, ch) {
can_merge = true;
for ch in next..ch {
if all_chars.contains(ch as u32) {
can_merge = false;
break;
}
}
}
if !can_merge {
ranges.push(start as u32..=range_last as u32);
range_start = Some(ch);
}
}
} else {
range_start = Some(ch);
}
range_last = ch;
}
if let Some(start) = range_start {
ranges.push(start as u32..=range_last as u32);
}
ranges
}
#[derive(Debug, Clone)]
pub struct WebfontInfo {
font_family: Arc<str>,
font_style_text: Arc<str>,
font_style: FontStyle,
font_weight: FontWeight,
weight_range: RangeInclusive<u32>,
entries: Vec<Arc<SubsetInfo>>,
}
impl WebfontInfo {
pub fn write_to_store(&self, target: &Path) -> Result<()> {
let mut path = target.to_path_buf();
for entry in &self.entries {
path.push(&entry.woff2_file_name);
debug!("Writing {}...", path.display());
fs::write(&path, &entry.woff2_data)?;
path.pop();
}
Ok(())
}
pub fn setup_as_fallback(mut self, name: impl ToString) -> WebfontInfo {
self.font_family = name.to_string().into();
self.weight_range = 100..=900;
self
}
pub fn font_family(&self) -> &str {
&self.font_family
}
pub fn font_style(&self) -> &str {
&self.font_style_text
}
pub fn parsed_font_style(&self) -> FontStyle {
self.font_style
}
pub fn parsed_font_weight(&self) -> FontWeight {
self.font_weight
}
pub fn weight_range(&self) -> RangeInclusive<u32> {
self.weight_range.clone()
}
pub fn subset_count(&self) -> usize {
self.entries.len()
}
pub fn subsets(&self) -> &[Arc<SubsetInfo>] {
&self.entries
}
pub fn all_chars(&self) -> CharacterSet {
let mut bitmap = CharacterSet::new();
for subset in &self.entries {
bitmap.extend(&subset.subset);
}
bitmap
}
}
#[derive(Debug, Clone)]
pub struct SubsetInfo {
name: String,
woff2_file_name: String,
subset: CharacterSet,
subset_ranges: Vec<RangeInclusive<u32>>,
woff2_data: Vec<u8>,
}
impl SubsetInfo {
fn new(
font: &FontFaceWrapper,
name: &str,
subset: CharacterSet,
woff2_data: Vec<u8>,
range_exclusions: &CharacterSet,
) -> Self {
let font_name = extract_name(font.font_family());
let font_style = extract_name(font.font_style());
let font_version = extract_version(font.font_version());
let is_regular = font_style.to_lowercase() == "regular";
let subset_ranges = decode_range(&subset, range_exclusions);
SubsetInfo {
name: name.to_string(),
woff2_file_name: format!(
"{font_name}{}{}_{font_version}_{name}",
if !is_regular || font.is_variable() { "_" } else { "" },
if font.is_variable() {
"Variable"
} else if !is_regular {
&font_style
} else {
""
},
),
subset,
subset_ranges,
woff2_data,
}
}
fn finalize_name(&mut self, frag: &str) {
self.woff2_file_name = format!("{}_{frag}.woff2", self.woff2_file_name);
}
pub fn name(&self) -> &str {
&self.name
}
pub fn woff2_file_name(&self) -> &str {
&self.woff2_file_name
}
pub fn subset(&self) -> &CharacterSet {
&self.subset
}
pub fn unicode_ranges(&self) -> &[RangeInclusive<u32>] {
&self.subset_ranges
}
pub fn woff2_data(&self) -> &[u8] {
&self.woff2_data
}
}
pub struct FontEncoder {
font: FontFaceWrapper,
woff2_subsets: Vec<JoinHandle<Result<SubsetInfo>>>,
range_exclusion: Arc<CharacterSet>,
}
impl FontEncoder {
pub fn new(font: FontFaceWrapper, range_exclusion: CharacterSet) -> Self {
let range_exclusion = Arc::new(range_exclusion);
FontEncoder { font, woff2_subsets: Vec::new(), range_exclusion }
}
pub fn add_subset(&mut self, name: &str, codepoints: CharacterSet) {
let name = name.to_string();
let font = self.font.clone();
let range_exclusion = self.range_exclusion.clone();
self.woff2_subsets.push(task::spawn(
async move {
debug!("Encoding subset '{name}' with {} codepoints.", codepoints.len());
let subset_woff2 = font.subset(&name, &codepoints)?;
Ok(SubsetInfo::new(&font, &name, codepoints, subset_woff2, &range_exclusion))
}
.in_current_span(),
));
}
pub async fn produce_webfont(self) -> Result<WebfontInfo> {
let mut entries = Vec::new();
for data in self.woff2_subsets {
entries.push(data.await??);
}
entries.sort_by_cached_key(|x| x.woff2_file_name.to_string());
let fragment = {
let mut data = Vec::new();
for entry in &entries {
data.extend(hash_full(&entry.woff2_data).as_bytes());
}
hash_fragment(&data)
};
let entries: Vec<_> = entries
.into_iter()
.map(|mut x| {
x.finalize_name(&fragment);
Arc::new(x)
})
.collect();
Ok(WebfontInfo {
font_family: self.font.font_family().to_string().into(),
font_style_text: self.font.font_style().to_string().into(),
font_style: self.font.parsed_font_style(),
font_weight: self.font.parsed_font_weight(),
weight_range: self.font.weight_range(),
entries,
})
}
}