use std::collections::{BTreeMap, BTreeSet};
#[derive(Debug, Clone, PartialEq)]
pub struct FontScrubInput {
pub first_char: u32,
pub widths: Vec<f32>,
pub missing_width: f32,
pub to_unicode: BTreeMap<u32, String>,
pub removed_codes: BTreeSet<u32>,
pub surviving_codes: BTreeSet<u32>,
}
#[derive(Debug, Clone, Default, PartialEq)]
pub struct FontScrubResult {
pub widths: Vec<f32>,
pub to_unicode: BTreeMap<u32, String>,
pub codes_scrubbed: usize,
}
fn is_redaction_only(code: u32, input: &FontScrubInput) -> bool {
input.removed_codes.contains(&code) && !input.surviving_codes.contains(&code)
}
pub fn scrub_font(input: &FontScrubInput) -> FontScrubResult {
let mut scrubbed: BTreeSet<u32> = BTreeSet::new();
let mut widths = input.widths.clone();
for (i, w) in widths.iter_mut().enumerate() {
let code = input.first_char + i as u32;
if is_redaction_only(code, input) {
*w = input.missing_width;
scrubbed.insert(code);
}
}
let mut to_unicode = BTreeMap::new();
for (&code, text) in &input.to_unicode {
if is_redaction_only(code, input) {
scrubbed.insert(code);
} else {
to_unicode.insert(code, text.clone());
}
}
FontScrubResult {
widths,
to_unicode,
codes_scrubbed: scrubbed.len(),
}
}
#[cfg(test)]
mod tests {
use super::*;
fn tu(pairs: &[(u32, &str)]) -> BTreeMap<u32, String> {
pairs.iter().map(|(c, s)| (*c, (*s).to_string())).collect()
}
fn codes(cs: &[u32]) -> BTreeSet<u32> {
cs.iter().copied().collect()
}
#[test]
fn redaction_only_code_is_scrubbed() {
let input = FontScrubInput {
first_char: 65,
widths: vec![500.0, 600.0, 700.0], missing_width: 0.0,
to_unicode: tu(&[(65, "A"), (66, "B"), (67, "C")]),
removed_codes: codes(&[66]),
surviving_codes: codes(&[65, 67]),
};
let out = scrub_font(&input);
assert_eq!(out.codes_scrubbed, 1);
assert_eq!(out.widths, vec![500.0, 0.0, 700.0]);
assert_eq!(out.to_unicode, tu(&[(65, "A"), (67, "C")]));
}
#[test]
fn code_removed_but_also_surviving_is_kept() {
let input = FontScrubInput {
first_char: 0,
widths: vec![400.0, 450.0],
missing_width: 0.0,
to_unicode: tu(&[(0, "x"), (1, "y")]),
removed_codes: codes(&[0, 1]),
surviving_codes: codes(&[1]), };
let out = scrub_font(&input);
assert_eq!(out.codes_scrubbed, 1); assert_eq!(out.widths, vec![0.0, 450.0]);
assert_eq!(out.to_unicode, tu(&[(1, "y")]));
}
#[test]
fn untouched_when_nothing_removed() {
let input = FontScrubInput {
first_char: 32,
widths: vec![250.0, 333.0],
missing_width: 0.0,
to_unicode: tu(&[(32, " "), (33, "!")]),
removed_codes: codes(&[]),
surviving_codes: codes(&[32, 33]),
};
let out = scrub_font(&input);
assert_eq!(out.codes_scrubbed, 0);
assert_eq!(out.widths, input.widths);
assert_eq!(out.to_unicode, input.to_unicode);
}
#[test]
fn missing_width_value_is_used_for_reset() {
let input = FontScrubInput {
first_char: 10,
widths: vec![900.0],
missing_width: 42.0, to_unicode: tu(&[(10, "Z")]),
removed_codes: codes(&[10]),
surviving_codes: codes(&[]),
};
let out = scrub_font(&input);
assert_eq!(out.codes_scrubbed, 1);
assert_eq!(out.widths, vec![42.0]);
assert!(out.to_unicode.is_empty());
}
#[test]
fn tounicode_only_code_outside_widths_range_still_scrubbed() {
let input = FontScrubInput {
first_char: 0,
widths: vec![300.0],
missing_width: 0.0,
to_unicode: tu(&[(0, "a"), (999, "secret")]),
removed_codes: codes(&[999]),
surviving_codes: codes(&[0]),
};
let out = scrub_font(&input);
assert_eq!(out.codes_scrubbed, 1);
assert_eq!(out.widths, vec![300.0]); assert_eq!(out.to_unicode, tu(&[(0, "a")]));
}
#[test]
fn empty_input_is_empty_result() {
let input = FontScrubInput {
first_char: 0,
widths: vec![],
missing_width: 0.0,
to_unicode: BTreeMap::new(),
removed_codes: BTreeSet::new(),
surviving_codes: BTreeSet::new(),
};
assert_eq!(scrub_font(&input), FontScrubResult::default());
}
#[test]
fn scrubbed_count_dedups_widths_and_tounicode() {
let input = FontScrubInput {
first_char: 5,
widths: vec![700.0],
missing_width: 0.0,
to_unicode: tu(&[(5, "s")]),
removed_codes: codes(&[5]),
surviving_codes: codes(&[]),
};
let out = scrub_font(&input);
assert_eq!(out.codes_scrubbed, 1);
}
}