autobase 0.2.1

Utilities for manipulating BASE tables for OpenType fonts
Documentation
//! This module determines base table BaseScriptRecords; that is, script-specific vertical metrics.
//!
//! It does this by examining the reports generated by fontheight, and finding the highest
//! and lowest glyphs in each script. If the user has specified particular languages to
//! separate out, we do so. We also respect any manual overrides specified in the config file.
use crate::{
    base::{BaseScript, MinMax},
    config::{Config, ScriptLanguage},
    utils::{iso15924_to_opentype, iso639_to_opentype},
};
use fontheight::{Report, WordList};
use std::collections::{BTreeMap, HashMap};

impl MinMax {
    fn from_report(r: Report, config: &Config) -> Option<Self> {
        let script_and_language = wordlist_script_and_language(r.word_list);
        let override_ = config.r#override.get(&script_and_language);
        // If there are no exemplars and no overrides, we can't produce a MinMax
        if r.exemplars.is_empty() && override_.is_none() {
            return None;
        }

        let (mut highest, mut highest_word) = if r.exemplars.is_empty() {
            (None, "<none>".to_string())
        } else {
            let h = r.exemplars.highest().first().unwrap();
            (Some(h.extremes.highest() as i16), h.word.to_string())
        };
        let (mut lowest, mut lowest_word) = if r.exemplars.is_empty() {
            (None, "<none>".to_string())
        } else {
            let l = r.exemplars.lowest().first().unwrap();
            (Some(l.extremes.lowest() as i16), l.word.to_string())
        };
        if let Some(ov) = override_ {
            if let Some(max) = ov.max {
                highest = Some(max);
                highest_word = "<override>".to_string();
            }
            if let Some(min) = ov.min {
                lowest = Some(min);
                lowest_word = "<override>".to_string();
            }
        }
        if highest.is_none() && lowest.is_none() {
            return None;
        }

        Some(MinMax {
            highest,
            highest_word,
            lowest,
            lowest_word,
        })
    }
    fn aggregate(minmaxes: &[MinMax], tolerance: Option<u16>) -> Option<Self> {
        if minmaxes.is_empty() {
            return None;
        }
        let mut agg = minmaxes[0].clone();
        for mm in &minmaxes[1..] {
            agg.merge(mm, tolerance);
        }
        Some(agg)
    }
}

fn wordlist_script_and_language(w: &WordList) -> ScriptLanguage {
    if let Some(lang) = w.language() {
        ScriptLanguage {
            script: w.script().unwrap_or("Zzzz").to_string(),
            language: Some(lang.to_string()),
        }
    } else {
        ScriptLanguage {
            script: w.script().unwrap_or("Zzzz").to_string(),
            language: None,
        }
    }
}
pub fn base_script_record(
    script: &str,
    reports: &[Report],
    config: &Config,
    font_default: &MinMax,
) -> Option<BaseScript> {
    let Some(ot_script) = iso15924_to_opentype(script) else {
        log::warn!("Script {} does not have an OpenType tag, skipping", script);
        return None;
    };
    log::info!("Writing min-max BASE script records for script {}", script);
    log::debug!("Got {} reports", reports.len());
    log::debug!(
        "Reports: {:#?}",
        reports
            .iter()
            .map(|r| r.word_list.name().to_string())
            .collect::<Vec<_>>()
    );

    // We've received multiple reports for the script, which may be distinguished by language.
    // If the config specifies particular languages, we break them out of our computations.
    // (In the future, we might also automatically break out outliers.)
    let mut remaining_langs = vec![];
    let mut lang_specific_minmax: HashMap<String, MinMax> = HashMap::new();
    let mut split_languages: Vec<&String> = config
        .languages
        .iter()
        .filter(|sl| sl.script == script)
        .flat_map(|sl| sl.language.as_ref())
        .collect::<Vec<_>>();
    // Also split out anything manually overridden
    split_languages.extend(
        config
            .r#override
            .keys()
            .filter(|sl| sl.script == script)
            .flat_map(|sl| sl.language.as_ref()),
    );
    split_languages.sort();
    split_languages.dedup();
    log::debug!(" Splitting out languages: {:?}", split_languages);
    for report in reports.iter() {
        let Some(minmax) = MinMax::from_report(report.clone(), config) else {
            continue;
        };
        let minmax =
            minmax.with_inliers_removed(&font_default.extend(config.tolerance.unwrap_or(0)));
        if minmax.is_empty() {
            log::debug!(
                "  Skipping report for {} ({}) as within {} of font default {:?}",
                report.word_list.name(),
                report.word_list.language().unwrap_or("<none>"),
                config.tolerance.unwrap_or(0),
                font_default
            );
            continue;
        }
        if let Some(lang) = report.word_list.language() {
            if split_languages.contains(&&lang.to_string()) {
                lang_specific_minmax
                    .entry(lang.to_string())
                    .and_modify(|existing| existing.merge(&minmax, config.tolerance))
                    .or_insert(minmax);
            } else {
                remaining_langs.push(minmax);
            }
        } else {
            remaining_langs.push(minmax);
        }
    }

    let language_minmax = lang_specific_minmax
        .into_iter()
        .map(|(lang, mm)| {
            log::info!(" Language {}: {:?}", lang, mm);
            (iso639_to_opentype(&lang), mm)
        })
        .collect::<BTreeMap<_, _>>();

    let mut script_minmax = MinMax::aggregate(&remaining_langs, config.tolerance);
    if let Some(ref script_mm) = script_minmax {
        script_minmax = Some(script_mm.clone().with_inliers_removed(font_default));
    }
    log::info!(" Script {}: {:?}", script, script_minmax);
    if script_minmax.is_none() && language_minmax.is_empty() {
        log::info!(" No BASE table needed for script {}, skipping", script);
        return None;
    }
    Some(BaseScript {
        script: ot_script,
        default_baseline: None,
        baselines: BTreeMap::new(),
        default_minmax: script_minmax,
        languages: language_minmax,
    })
}