use scraper::{Html, Selector};
use std::collections::{HashMap, HashSet};
type CssVariables = HashMap<String, String>;
#[derive(Debug, Clone, PartialEq, Eq)]
#[allow(dead_code)] pub struct FontFace {
pub family: String,
pub src: String,
pub weight: Option<String>,
pub style: Option<String>,
}
#[derive(Debug, Default, Clone, PartialEq, Eq)]
pub struct FontAnalysis {
pub chars_per_font: HashMap<String, HashSet<char>>,
pub font_faces: Vec<FontFace>,
}
pub fn analyze_fonts(html: &str, css: &str) -> FontAnalysis {
let chars_per_font = collect_chars_per_font(html, css);
let font_faces = parse_font_face_rules(css);
FontAnalysis {
chars_per_font,
font_faces,
}
}
pub fn collect_chars_per_font(html: &str, css: &str) -> HashMap<String, HashSet<char>> {
let document = Html::parse_document(html);
let css_vars = parse_css_custom_properties(css);
let font_rules = parse_font_family_rules_with_vars(css, &css_vars);
let mut result: HashMap<String, HashSet<char>> = HashMap::new();
let all_elements = Selector::parse("*").unwrap();
for element in document.select(&all_elements) {
let text: String = element
.text()
.next()
.map(|s| s.to_string())
.unwrap_or_default();
if text.trim().is_empty() {
continue;
}
let font_family = find_font_family_for_element(&element, &font_rules)
.unwrap_or_else(|| "sans-serif".to_string());
let chars = result.entry(font_family).or_default();
for c in text.chars() {
chars.insert(c);
}
}
result
}
#[derive(Debug)]
struct FontFamilyRule {
selector: String,
font_family: String,
}
fn parse_font_family_rules_with_vars(css: &str, css_vars: &CssVariables) -> Vec<FontFamilyRule> {
let mut rules = Vec::new();
let chars = css.chars().peekable();
let mut current_selector = String::new();
let mut in_block = false;
let mut block_content = String::new();
for c in chars {
if c == '{' {
in_block = true;
block_content.clear();
} else if c == '}' {
in_block = false;
if let Some(font_family) = extract_font_family_with_vars(&block_content, css_vars) {
let selector = current_selector.trim().to_string();
if !selector.is_empty() && !selector.starts_with('@') {
rules.push(FontFamilyRule {
selector,
font_family,
});
}
}
current_selector.clear();
} else if in_block {
block_content.push(c);
} else {
current_selector.push(c);
}
}
rules
}
fn extract_font_family_with_vars(block: &str, css_vars: &CssVariables) -> Option<String> {
for declaration in block.split(';') {
let declaration = declaration.trim();
if let Some(value) = declaration.strip_prefix("font-family:") {
return Some(parse_font_family_value_with_vars(value, css_vars));
}
if declaration.starts_with("font:") {
}
}
None
}
fn parse_font_face_rules(css: &str) -> Vec<FontFace> {
let mut faces = Vec::new();
let mut remaining = css;
while let Some(start) = remaining.find("@font-face") {
remaining = &remaining[start + "@font-face".len()..];
let Some(brace_start) = remaining.find('{') else {
break;
};
remaining = &remaining[brace_start + 1..];
let mut depth = 1;
let mut block_end = 0;
for (i, c) in remaining.char_indices() {
match c {
'{' => depth += 1,
'}' => {
depth -= 1;
if depth == 0 {
block_end = i;
break;
}
}
_ => {}
}
}
if block_end == 0 {
break;
}
let block = &remaining[..block_end];
remaining = &remaining[block_end + 1..];
if let Some(face) = parse_font_face_block(block) {
faces.push(face);
}
}
faces
}
fn parse_font_face_block(block: &str) -> Option<FontFace> {
let mut family = None;
let mut src = None;
let mut weight = None;
let mut style = None;
for declaration in block.split(';') {
let declaration = declaration.trim();
if let Some(value) = declaration.strip_prefix("font-family:") {
family = Some(parse_font_family_value(value));
} else if let Some(value) = declaration.strip_prefix("src:") {
src = parse_font_src(value);
} else if let Some(value) = declaration.strip_prefix("font-weight:") {
weight = Some(value.trim().to_string());
} else if let Some(value) = declaration.strip_prefix("font-style:") {
style = Some(value.trim().to_string());
}
}
Some(FontFace {
family: family?,
src: src?,
weight,
style,
})
}
fn parse_font_src(value: &str) -> Option<String> {
let value = value.trim();
let url_start = value.find("url(")?;
let after_url = &value[url_start + 4..];
let url_end = after_url.find(')')?;
let url_content = &after_url[..url_end];
let url = url_content
.trim()
.trim_matches('"')
.trim_matches('\'')
.to_string();
Some(url)
}
fn parse_font_family_value(value: &str) -> String {
parse_font_family_value_with_vars(value, &HashMap::new())
}
fn parse_font_family_value_with_vars(value: &str, css_vars: &CssVariables) -> String {
let value = value.trim();
let resolved = resolve_css_var(value, css_vars);
let first = resolved.split(',').next().unwrap_or(&resolved).trim();
let first = first.trim_matches('"').trim_matches('\'');
first.to_string()
}
fn resolve_css_var(value: &str, css_vars: &CssVariables) -> String {
let mut result = value.to_string();
let mut iterations = 0;
const MAX_ITERATIONS: usize = 10;
while let Some(var_start) = result.find("var(") {
if iterations >= MAX_ITERATIONS {
break;
}
iterations += 1;
let after_var = &result[var_start + 4..];
let mut depth = 1;
let mut var_end = None;
for (i, c) in after_var.char_indices() {
match c {
'(' => depth += 1,
')' => {
depth -= 1;
if depth == 0 {
var_end = Some(i);
break;
}
}
_ => {}
}
}
let Some(end_offset) = var_end else {
break; };
let var_content = &after_var[..end_offset];
let full_var_end = var_start + 4 + end_offset + 1;
let (var_name, fallback) = if let Some(comma_pos) = var_content.find(',') {
let name = var_content[..comma_pos].trim();
let fallback = var_content[comma_pos + 1..].trim();
(name, Some(fallback))
} else {
(var_content.trim(), None)
};
let replacement = css_vars
.get(var_name)
.map(|s| s.as_str())
.or(fallback)
.unwrap_or("");
result = format!(
"{}{}{}",
&result[..var_start],
replacement,
&result[full_var_end..]
);
}
result
}
fn parse_css_custom_properties(css: &str) -> CssVariables {
let mut vars = HashMap::new();
let mut remaining = css;
while let Some(brace_start) = remaining.find('{') {
let after_brace = &remaining[brace_start + 1..];
let mut depth = 1;
let mut block_end = None;
for (i, c) in after_brace.char_indices() {
match c {
'{' => depth += 1,
'}' => {
depth -= 1;
if depth == 0 {
block_end = Some(i);
break;
}
}
_ => {}
}
}
let Some(end) = block_end else {
break;
};
let block = &after_brace[..end];
for declaration in block.split(';') {
let declaration = declaration.trim();
if declaration.starts_with("--")
&& let Some(colon_pos) = declaration.find(':')
{
let name = declaration[..colon_pos].trim();
let value = declaration[colon_pos + 1..].trim();
vars.insert(name.to_string(), value.to_string());
}
}
remaining = &after_brace[end + 1..];
}
vars
}
fn find_font_family_for_element(
element: &scraper::ElementRef,
rules: &[FontFamilyRule],
) -> Option<String> {
let mut matched_font: Option<String> = None;
for rule in rules {
if let Ok(selector) = Selector::parse(&rule.selector) {
if selector.matches(element) {
matched_font = Some(rule.font_family.clone());
}
}
}
if matched_font.is_none() {
for ancestor in element.ancestors() {
if let Some(ancestor_el) = scraper::ElementRef::wrap(ancestor) {
for rule in rules {
if let Ok(selector) = Selector::parse(&rule.selector)
&& selector.matches(&ancestor_el)
{
matched_font = Some(rule.font_family.clone());
}
}
}
if matched_font.is_some() {
break;
}
}
}
matched_font
}
pub fn extract_css_from_html(html: &str) -> String {
let document = Html::parse_document(html);
let style_selector = Selector::parse("style").unwrap();
let mut css = String::new();
for style in document.select(&style_selector) {
css.push_str(&style.inner_html());
css.push('\n');
}
css
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_parse_font_family_rules() {
let css = r#"
body { font-family: "Inter", sans-serif; }
h1 { font-family: 'Playfair Display'; }
.code { font-family: monospace; }
"#;
let rules = parse_font_family_rules_with_vars(css, &HashMap::new());
assert_eq!(rules.len(), 3);
assert_eq!(rules[0].selector, "body");
assert_eq!(rules[0].font_family, "Inter");
assert_eq!(rules[1].font_family, "Playfair Display");
assert_eq!(rules[2].font_family, "monospace");
}
#[test]
fn test_collect_chars_basic() {
let html = r#"
<html>
<head>
<style>
body { font-family: "TestFont"; }
</style>
</head>
<body>
<p>Hello</p>
</body>
</html>
"#;
let css = extract_css_from_html(html);
let chars = collect_chars_per_font(html, &css);
assert!(chars.contains_key("TestFont"));
let test_font_chars = &chars["TestFont"];
assert!(test_font_chars.contains(&'H'));
assert!(test_font_chars.contains(&'e'));
assert!(test_font_chars.contains(&'l'));
assert!(test_font_chars.contains(&'o'));
}
#[test]
fn test_different_fonts_for_elements() {
let html = r#"
<html>
<head>
<style>
body { font-family: "BodyFont"; }
h1 { font-family: "HeadingFont"; }
</style>
</head>
<body>
<h1>Title</h1>
<p>Body text</p>
</body>
</html>
"#;
let css = extract_css_from_html(html);
let chars = collect_chars_per_font(html, &css);
assert!(chars.contains_key("HeadingFont"));
assert!(chars["HeadingFont"].contains(&'T'));
assert!(chars.contains_key("BodyFont"));
assert!(chars["BodyFont"].contains(&'B'));
}
#[test]
fn test_parse_font_face_rules() {
let css = r#"
@font-face {
font-family: "Inter";
src: url("/fonts/Inter-Regular.woff2") format("woff2");
font-weight: 400;
font-style: normal;
}
@font-face {
font-family: "Inter";
src: url('/fonts/Inter-Bold.woff2');
font-weight: 700;
}
@font-face {
font-family: 'Playfair Display';
src: url(fonts/Playfair.ttf);
}
body { font-family: "Inter", sans-serif; }
"#;
let faces = parse_font_face_rules(css);
assert_eq!(faces.len(), 3);
assert_eq!(faces[0].family, "Inter");
assert_eq!(faces[0].src, "/fonts/Inter-Regular.woff2");
assert_eq!(faces[0].weight, Some("400".to_string()));
assert_eq!(faces[0].style, Some("normal".to_string()));
assert_eq!(faces[1].family, "Inter");
assert_eq!(faces[1].src, "/fonts/Inter-Bold.woff2");
assert_eq!(faces[1].weight, Some("700".to_string()));
assert_eq!(faces[1].style, None);
assert_eq!(faces[2].family, "Playfair Display");
assert_eq!(faces[2].src, "fonts/Playfair.ttf");
}
#[test]
fn test_analyze_fonts_full() {
let html = r#"
<html>
<head>
<style>
@font-face {
font-family: "MyFont";
src: url("/fonts/MyFont.woff2");
}
body { font-family: "MyFont"; }
</style>
</head>
<body>
<p>Hello World</p>
</body>
</html>
"#;
let css = extract_css_from_html(html);
let analysis = analyze_fonts(html, &css);
assert_eq!(analysis.font_faces.len(), 1);
assert_eq!(analysis.font_faces[0].family, "MyFont");
assert_eq!(analysis.font_faces[0].src, "/fonts/MyFont.woff2");
assert!(analysis.chars_per_font.contains_key("MyFont"));
let chars = &analysis.chars_per_font["MyFont"];
assert!(chars.contains(&'H'));
assert!(chars.contains(&'W'));
}
#[test]
fn test_parse_css_custom_properties() {
let css = r#"
:root {
--font-mono: 'Iosevka', monospace;
--font-body: "Inter", sans-serif;
--spacing: 1rem;
}
body { color: black; }
"#;
let vars = parse_css_custom_properties(css);
assert_eq!(
vars.get("--font-mono"),
Some(&"'Iosevka', monospace".to_string())
);
assert_eq!(
vars.get("--font-body"),
Some(&"\"Inter\", sans-serif".to_string())
);
assert_eq!(vars.get("--spacing"), Some(&"1rem".to_string()));
}
#[test]
fn test_resolve_css_var_simple() {
let mut vars = HashMap::new();
vars.insert(
"--font-mono".to_string(),
"'Iosevka', monospace".to_string(),
);
let result = resolve_css_var("var(--font-mono)", &vars);
assert_eq!(result, "'Iosevka', monospace");
}
#[test]
fn test_resolve_css_var_with_fallback() {
let vars: CssVariables = HashMap::new();
let result = resolve_css_var("var(--undefined, Arial)", &vars);
assert_eq!(result, "Arial");
}
#[test]
fn test_resolve_css_var_nested() {
let mut vars = HashMap::new();
vars.insert("--base-font".to_string(), "'Inter'".to_string());
vars.insert(
"--font-stack".to_string(),
"var(--base-font), sans-serif".to_string(),
);
let result = resolve_css_var("var(--font-stack)", &vars);
assert_eq!(result, "'Inter', sans-serif");
}
#[test]
fn test_font_family_with_css_var() {
let html = r#"
<html>
<head>
<style>
@font-face {
font-family: 'Iosevka';
src: url('/fonts/Iosevka-Regular.woff2') format('woff2');
}
:root {
--font-mono: 'Iosevka', monospace;
}
code {
font-family: var(--font-mono);
}
</style>
</head>
<body>
<code>fn main() { println!("hello"); }</code>
</body>
</html>
"#;
let css = extract_css_from_html(html);
let analysis = analyze_fonts(html, &css);
assert_eq!(analysis.font_faces.len(), 1);
assert_eq!(analysis.font_faces[0].family, "Iosevka");
assert!(
analysis.chars_per_font.contains_key("Iosevka"),
"chars_per_font should contain Iosevka, but got: {:?}",
analysis.chars_per_font.keys().collect::<Vec<_>>()
);
let iosevka_chars = &analysis.chars_per_font["Iosevka"];
assert!(iosevka_chars.contains(&'f'));
assert!(iosevka_chars.contains(&'n'));
assert!(iosevka_chars.contains(&'m'));
assert!(iosevka_chars.contains(&'('));
assert!(iosevka_chars.contains(&'{'));
assert!(iosevka_chars.contains(&'h'));
assert!(iosevka_chars.contains(&'e'));
assert!(iosevka_chars.contains(&'l'));
assert!(iosevka_chars.contains(&'o'));
}
#[test]
fn test_css_var_in_multiple_rules() {
let css = r#"
:root {
--heading-font: 'Playfair Display';
--body-font: 'Inter';
}
h1 { font-family: var(--heading-font); }
h2 { font-family: var(--heading-font); }
p { font-family: var(--body-font); }
"#;
let vars = parse_css_custom_properties(css);
let rules = parse_font_family_rules_with_vars(css, &vars);
assert_eq!(rules.len(), 3);
assert_eq!(rules[0].font_family, "Playfair Display");
assert_eq!(rules[1].font_family, "Playfair Display");
assert_eq!(rules[2].font_family, "Inter");
}
}
#[cfg(test)]
mod unicode_tests {
use super::*;
#[test]
fn test_css_var_with_unicode_variable_name() {
let css = r#"
:root {
--日本語: 'Noto Sans JP';
}
body {
font-family: var(--日本語);
}
"#;
let vars = parse_css_custom_properties(css);
assert_eq!(vars.get("--日本語"), Some(&"'Noto Sans JP'".to_string()));
let rules = parse_font_family_rules_with_vars(css, &vars);
assert_eq!(rules.len(), 1);
assert_eq!(rules[0].font_family, "Noto Sans JP");
}
#[test]
fn test_css_var_with_unicode_in_value() {
let css = r#"
:root {
--font: '日本語フォント', sans-serif;
}
body {
font-family: var(--font);
}
"#;
let vars = parse_css_custom_properties(css);
let rules = parse_font_family_rules_with_vars(css, &vars);
assert_eq!(rules[0].font_family, "日本語フォント");
}
#[test]
fn test_css_var_unicode_before_var() {
let css = r#"
:root {
--font: 'Test';
}
body {
font-family: var(--font);
}
"#;
let vars = parse_css_custom_properties(css);
let rules = parse_font_family_rules_with_vars(css, &vars);
assert_eq!(rules[0].font_family, "Test");
}
#[test]
fn test_css_var_with_emoji() {
let css = r#"
:root {
--emoji-font: '🎉 Party Font';
}
body {
font-family: var(--emoji-font);
}
"#;
let vars = parse_css_custom_properties(css);
let rules = parse_font_family_rules_with_vars(css, &vars);
assert_eq!(rules[0].font_family, "🎉 Party Font");
}
#[test]
fn test_css_var_fallback_with_unicode() {
let css = r#"
body {
font-family: var(--undefined, '日本語フォント');
}
"#;
let vars = parse_css_custom_properties(css);
let rules = parse_font_family_rules_with_vars(css, &vars);
assert_eq!(rules[0].font_family, "日本語フォント");
}
#[test]
fn test_css_var_nested_with_unicode() {
let css = r#"
:root {
--base: '日本語';
--full: var(--base), sans-serif;
}
body {
font-family: var(--full);
}
"#;
let vars = parse_css_custom_properties(css);
let rules = parse_font_family_rules_with_vars(css, &vars);
assert_eq!(rules[0].font_family, "日本語");
}
#[test]
fn test_unicode_selector() {
let css = r#"
.日本語-class {
font-family: 'Test Font';
}
"#;
let rules = parse_font_family_rules_with_vars(css, &HashMap::new());
assert_eq!(rules.len(), 1);
assert_eq!(rules[0].selector, ".日本語-class");
assert_eq!(rules[0].font_family, "Test Font");
}
#[test]
fn test_unicode_in_font_face() {
let css = r#"
@font-face {
font-family: '日本語フォント';
src: url('/fonts/japanese.woff2');
}
"#;
let faces = parse_font_face_rules(css);
assert_eq!(faces.len(), 1);
assert_eq!(faces[0].family, "日本語フォント");
}
#[test]
fn test_mixed_unicode_and_ascii_complex() {
let css = r#"
:root {
--primary: 'Helvetica';
--日本語: 'Noto Sans JP';
--combined: var(--primary), var(--日本語), sans-serif;
}
.my-class {
font-family: var(--combined);
}
"#;
let vars = parse_css_custom_properties(css);
assert_eq!(vars.get("--primary"), Some(&"'Helvetica'".to_string()));
assert_eq!(vars.get("--日本語"), Some(&"'Noto Sans JP'".to_string()));
let rules = parse_font_family_rules_with_vars(css, &vars);
assert_eq!(rules[0].font_family, "Helvetica");
}
#[test]
fn test_collect_chars_with_unicode_content() {
let html = r#"
<html>
<head>
<style>
:root { --font: 'TestFont'; }
body { font-family: var(--font); }
</style>
</head>
<body>
<p>日本語テキスト</p>
</body>
</html>
"#;
let css = extract_css_from_html(html);
let chars = collect_chars_per_font(html, &css);
assert!(chars.contains_key("TestFont"));
let font_chars = &chars["TestFont"];
assert!(font_chars.contains(&'日'));
assert!(font_chars.contains(&'本'));
assert!(font_chars.contains(&'語'));
}
#[test]
fn test_analyze_fonts_unicode_everywhere() {
let html = r#"
<html>
<head>
<style>
@font-face {
font-family: '日本語フォント';
src: url('/fonts/jp.woff2');
}
:root {
--jp-font: '日本語フォント', sans-serif;
}
body {
font-family: var(--jp-font);
}
</style>
</head>
<body>
<p>こんにちは世界</p>
</body>
</html>
"#;
let css = extract_css_from_html(html);
let analysis = analyze_fonts(html, &css);
assert_eq!(analysis.font_faces.len(), 1);
assert_eq!(analysis.font_faces[0].family, "日本語フォント");
assert!(analysis.chars_per_font.contains_key("日本語フォント"));
let chars = &analysis.chars_per_font["日本語フォント"];
assert!(chars.contains(&'こ'));
assert!(chars.contains(&'世'));
assert!(chars.contains(&'界'));
}
#[test]
fn test_var_immediately_after_unicode() {
let value = "日本語var(--test)";
let mut vars = HashMap::new();
vars.insert("--test".to_string(), "'Result'".to_string());
let resolved = resolve_css_var(value, &vars);
assert_eq!(resolved, "日本語'Result'");
}
#[test]
fn test_var_between_unicode() {
let value = "前var(--mid)後";
let mut vars = HashMap::new();
vars.insert("--mid".to_string(), "中".to_string());
let resolved = resolve_css_var(value, &vars);
assert_eq!(resolved, "前中後");
}
#[test]
fn test_multiple_vars_with_unicode() {
let value = "var(--a)日本語var(--b)";
let mut vars = HashMap::new();
vars.insert("--a".to_string(), "前".to_string());
vars.insert("--b".to_string(), "後".to_string());
let resolved = resolve_css_var(value, &vars);
assert_eq!(resolved, "前日本語後");
}
#[test]
fn test_four_byte_unicode() {
let css = r#"
:root {
--emoji: '😀🎉🚀';
}
body {
font-family: var(--emoji);
}
"#;
let vars = parse_css_custom_properties(css);
assert_eq!(vars.get("--emoji"), Some(&"'😀🎉🚀'".to_string()));
let rules = parse_font_family_rules_with_vars(css, &vars);
assert_eq!(rules[0].font_family, "😀🎉🚀");
}
#[test]
fn test_zalgo_text() {
let css = r#"
:root {
--zalgo: 'H̷e̶l̵l̴o̷';
}
body {
font-family: var(--zalgo);
}
"#;
let vars = parse_css_custom_properties(css);
let rules = parse_font_family_rules_with_vars(css, &vars);
assert_eq!(rules[0].font_family, "H̷e̶l̵l̴o̷");
}
}