use crate::render::fonts::{FontRegistry, TypefaceEntry, TypefaceOrigin};
use crate::render::subset::format::{FontFormat, FormatError, SfntFlavor};
#[derive(Debug, Clone)]
pub struct ExtractedSfnt {
pub bytes: Vec<u8>,
pub flavor: SfntFlavor,
}
#[derive(thiserror::Error, Debug)]
pub enum ExtractionError {
#[error("system typeface returned no font data via Typeface::to_font_data")]
NoBytesAvailable,
#[error("font format detection failed: {0}")]
Format(#[from] FormatError),
#[error("unsupported font format for subsetting: {0:?}")]
UnsupportedFormat(FontFormat),
#[error("WOFF2 decompression failed: {0}")]
Woff2DecompressionFailed(String),
#[error("post-decompression bytes are not SFNT (got {0:?})")]
PostUnwrapNotSfnt(FontFormat),
}
pub fn extract(
entry: &TypefaceEntry,
registry: &FontRegistry,
) -> Result<ExtractedSfnt, ExtractionError> {
let raw = match entry.origin {
TypefaceOrigin::Embedded { id } => registry.embedded_bytes(id).to_vec(),
TypefaceOrigin::System { .. } => entry
.typeface
.to_font_data()
.map(|(bytes, _ttc_index)| bytes)
.ok_or(ExtractionError::NoBytesAvailable)?,
};
classify_and_unwrap(raw)
}
fn classify_and_unwrap(bytes: Vec<u8>) -> Result<ExtractedSfnt, ExtractionError> {
let format = FontFormat::detect(&bytes)?;
match format {
FontFormat::Sfnt(flavor) => Ok(ExtractedSfnt { bytes, flavor }),
FontFormat::Woff(crate::render::subset::WoffVersion::V2) => unwrap_woff2(&bytes),
FontFormat::Woff(crate::render::subset::WoffVersion::V1) | FontFormat::Ttc { .. } => {
Err(ExtractionError::UnsupportedFormat(format))
}
}
}
#[cfg(feature = "subset-fonts")]
fn unwrap_woff2(bytes: &[u8]) -> Result<ExtractedSfnt, ExtractionError> {
let unwrapped = fontcull::decompress_font(bytes)
.map_err(|e| ExtractionError::Woff2DecompressionFailed(e.to_string()))?;
let format = FontFormat::detect(&unwrapped)?;
match format {
FontFormat::Sfnt(flavor) => Ok(ExtractedSfnt {
bytes: unwrapped,
flavor,
}),
other => Err(ExtractionError::PostUnwrapNotSfnt(other)),
}
}
#[cfg(not(feature = "subset-fonts"))]
fn unwrap_woff2(_bytes: &[u8]) -> Result<ExtractedSfnt, ExtractionError> {
Err(ExtractionError::UnsupportedFormat(FontFormat::Woff(
crate::render::subset::WoffVersion::V2,
)))
}
#[cfg(test)]
mod tests {
use super::*;
use crate::model::EmbeddedFontVariant;
use crate::render::fonts::FontRegistry;
use crate::render::subset::WoffVersion;
use skia_safe::{FontMgr, FontStyle};
fn registry() -> FontRegistry {
FontRegistry::new(FontMgr::new())
}
fn arbitrary_system_font_bytes() -> Vec<u8> {
let mgr = FontMgr::new();
let tf = mgr
.legacy_make_typeface(None::<&str>, FontStyle::normal())
.expect("system has no default typeface");
tf.to_font_data().expect("legacy default lacks bytes").0
}
#[test]
fn extract_from_embedded_origin_returns_raw_docx_bytes() {
let bytes = arbitrary_system_font_bytes();
let mut r = registry();
r.register_embedded(
"EmbeddedExtractProbe",
EmbeddedFontVariant::Regular,
bytes.clone(),
)
.unwrap();
let entry = r.resolve("EmbeddedExtractProbe", FontStyle::normal());
assert!(matches!(entry.origin, TypefaceOrigin::Embedded { .. }));
let extracted = extract(&entry, &r).expect("embedded extraction must succeed");
assert_eq!(
extracted.bytes, bytes,
"embedded extraction must hand back exactly the registered bytes"
);
}
#[test]
fn extract_from_system_origin_calls_to_font_data() {
let r = registry();
let entry = r.resolve("UnknownExtractProbe", FontStyle::normal());
assert!(matches!(entry.origin, TypefaceOrigin::System { .. }));
let extracted = extract(&entry, &r).expect("system extraction must succeed");
assert!(matches!(
extracted.flavor,
SfntFlavor::TrueTypeStandard | SfntFlavor::TrueTypeApple | SfntFlavor::OpenTypeCff,
));
assert!(!extracted.bytes.is_empty());
}
#[test]
fn extract_returns_unsupported_for_woff1_input() {
let mut woff1 = b"wOFF".to_vec();
woff1.extend_from_slice(&[0u8; 40]); let result = classify_and_unwrap(woff1);
assert!(matches!(
result,
Err(ExtractionError::UnsupportedFormat(FontFormat::Woff(
WoffVersion::V1
)))
));
}
#[test]
fn extract_returns_unsupported_for_ttc_input() {
let mut ttc = b"ttcf".to_vec();
ttc.extend_from_slice(&[0x00, 0x01, 0x00, 0x00]); ttc.extend_from_slice(&[0x00, 0x00, 0x00, 0x02]); let result = classify_and_unwrap(ttc);
assert!(matches!(
result,
Err(ExtractionError::UnsupportedFormat(FontFormat::Ttc {
face_count: 2
}))
));
}
#[test]
fn extract_returns_format_error_for_garbage() {
let garbage = vec![0xab, 0xcd, 0xef, 0x00, 1, 2, 3];
let result = classify_and_unwrap(garbage);
assert!(matches!(result, Err(ExtractionError::Format(_))));
}
#[cfg(feature = "subset-fonts")]
#[test]
fn extract_unwraps_woff2_to_sfnt() {
let sfnt = arbitrary_system_font_bytes();
let woff2 = match fontcull::compress_to_woff2(&sfnt) {
Ok(b) => b,
Err(e) => {
eprintln!("skipping: WOFF2 compression of system font failed: {e}");
return;
}
};
assert_eq!(&woff2[..4], b"wOF2", "fixture must actually be WOFF2");
let unwrapped = classify_and_unwrap(woff2).expect("WOFF2 must unwrap to SFNT");
assert!(matches!(
unwrapped.flavor,
SfntFlavor::TrueTypeStandard | SfntFlavor::TrueTypeApple | SfntFlavor::OpenTypeCff,
));
assert!(
FontFormat::detect(&unwrapped.bytes)
.unwrap()
.is_directly_subsettable(),
"post-unwrap bytes must be directly subsettable"
);
}
}