use std::{
collections::{BTreeMap, HashMap, HashSet},
hash::Hash,
};
use icu_properties::{
CodePointMapData, PropertyNamesShort, PropertyParser,
props::{BidiClass, Script},
};
use tinystr::tinystr;
use write_fonts::{
read::{ReadError, collections::IntSet, tables::gsub::Gsub},
types::{GlyphId, GlyphId16, Tag},
};
use crate::features::ot_tags::{NEW_SCRIPTS, SCRIPT_ALIASES, SCRIPT_EXCEPTIONS_REVERSED};
use super::ot_tags::{DFLT_SCRIPT, INDIC_SCRIPTS, NEW_SCRIPT_TAGS, SCRIPT_EXCEPTIONS, USE_SCRIPTS};
pub const COMMON_SCRIPT: UnicodeShortName = tinystr!(4, "Zyyy");
pub const INHERITED_SCRIPT: UnicodeShortName = tinystr!(4, "Zinh");
pub const HIRA: UnicodeShortName = tinystr!(4, "Hira");
pub const KANA: UnicodeShortName = tinystr!(4, "Kana");
pub const HRKT: UnicodeShortName = tinystr!(4, "Hrkt");
pub type UnicodeShortName = tinystr::TinyAsciiStr<4>;
#[derive(Clone, Debug, Copy, Hash, PartialEq, Eq, PartialOrd, Ord)]
pub enum ScriptDirection {
Auto,
LeftToRight,
RightToLeft,
}
pub trait CharMap {
fn iter_glyphs(&self) -> impl Iterator<Item = (GlyphId16, u32)>;
}
impl CharMap for HashMap<u32, GlyphId16> {
fn iter_glyphs(&self) -> impl Iterator<Item = (GlyphId16, u32)> {
self.iter().map(|(k, v)| (*v, *k))
}
}
impl ScriptDirection {
pub(crate) fn for_script(script: &UnicodeShortName) -> Self {
match script.as_str() {
"Zyyy" => ScriptDirection::Auto,
"Arab" | "Hebr" | "Syrc" | "Thaa" | "Cprt" | "Khar" | "Phnx" | "Nkoo" | "Lydi"
| "Avst" | "Armi" | "Phli" | "Prti" | "Sarb" | "Orkh" | "Samr" | "Mand" | "Merc"
| "Mero" | "Mani" | "Mend" | "Nbat" | "Narb" | "Palm" | "Phlp" | "Hatr" | "Hung"
| "Adlm" | "Rohg" | "Sogo" | "Sogd" | "Elym" | "Chrs" | "Yezi" | "Ougr" => {
ScriptDirection::RightToLeft
}
_ => ScriptDirection::LeftToRight,
}
}
pub(crate) fn plays_nicely_with(&self, other: &ScriptDirection) -> bool {
matches!(
(self, other),
(ScriptDirection::Auto, _)
| (_, ScriptDirection::Auto)
| (ScriptDirection::LeftToRight, ScriptDirection::LeftToRight)
| (ScriptDirection::RightToLeft, ScriptDirection::RightToLeft)
)
}
}
fn unicode_bidi_type(c: u32) -> Option<BidiClass> {
match CodePointMapData::<BidiClass>::new().get32(c) {
BidiClass::RightToLeft | BidiClass::ArabicLetter => Some(BidiClass::RightToLeft),
BidiClass::LeftToRight | BidiClass::ArabicNumber | BidiClass::EuropeanNumber => {
Some(BidiClass::LeftToRight)
}
_ => None,
}
}
fn classify<T, F, CM>(
char_map: &CM,
mut props_fn: F,
gsub: Option<&Gsub>,
) -> Result<BTreeMap<T, IntSet<GlyphId16>>, ReadError>
where
T: Ord + Eq,
F: FnMut(u32, &mut Vec<T>),
CM: CharMap,
{
let mut sets = BTreeMap::new();
let mut neutral_glyphs = IntSet::new();
let mut buf = Vec::new();
for (gid, unicode_value) in char_map.iter_glyphs() {
let mut has_props = false;
props_fn(unicode_value, &mut buf);
for prop in buf.drain(..) {
sets.entry(prop).or_insert(IntSet::new()).insert(gid);
has_props = true;
}
if !has_props {
neutral_glyphs.insert(gid.into());
}
}
if let Some(gsub) = gsub.as_ref() {
let initial_lookups = gsub.collect_lookups(&IntSet::all())?;
gsub.closure_glyphs(&initial_lookups, &mut neutral_glyphs)?;
for glyphs in sets.values_mut() {
let mut temp: IntSet<GlyphId> = glyphs.iter().map(|g| g.into()).collect();
temp.union(&neutral_glyphs);
gsub.closure_glyphs(&initial_lookups, &mut temp)?;
glyphs.extend(
temp.iter()
.filter(|gid| !neutral_glyphs.contains(*gid))
.map(|g| g.try_into().unwrap()),
);
}
}
Ok(sets)
}
pub(crate) fn glyphs_matching_predicate(
glyphs: &impl CharMap,
predicate: impl Fn(u32) -> Option<bool>,
gsub: Option<&Gsub>,
) -> Result<IntSet<GlyphId16>, ReadError> {
classify(
glyphs,
|cp, buf| {
if let Some(val) = predicate(cp) {
buf.push(val)
}
},
gsub,
)
.map(|mut items| items.remove(&true).unwrap_or_default())
}
pub(crate) fn unicode_script_extensions(cp: u32) -> impl Iterator<Item = UnicodeShortName> {
let mut seen_hrkt = false;
icu_properties::script::ScriptWithExtensions::new()
.get_script_extensions_val32(cp)
.iter()
.flat_map(get_script_short_name)
.filter_map(move |script| {
if script == HIRA || script == KANA {
if seen_hrkt {
None
} else {
seen_hrkt = true;
Some(HRKT)
}
} else {
Some(script)
}
})
}
pub(crate) fn scripts_by_glyph(
glyphs: &impl CharMap,
known_scripts: &HashSet<UnicodeShortName>,
gsub: Option<&Gsub>,
) -> Result<HashMap<GlyphId16, HashSet<UnicodeShortName>>, ReadError> {
let mut result = HashMap::new();
for (script, glyphs) in classify(
glyphs,
|cp, buf| {
if known_scripts.is_empty() {
buf.push(COMMON_SCRIPT);
} else {
buf.extend(unicode_script_extensions(cp).filter(|script| {
*script == COMMON_SCRIPT
|| *script == INHERITED_SCRIPT
|| known_scripts.contains(script)
}));
}
},
gsub,
)? {
for glyph in glyphs.iter() {
result.entry(glyph).or_insert(HashSet::new()).insert(script);
}
}
Ok(result)
}
pub(crate) fn glyphs_by_bidi_class(
glyphs: &impl CharMap,
gsub: Option<&Gsub>,
) -> Result<BTreeMap<BidiClass, IntSet<GlyphId16>>, ReadError> {
classify(
glyphs,
|codepoint, buf| buf.extend(unicode_bidi_type(codepoint)),
gsub,
)
}
pub(crate) fn glyphs_by_script_direction(
glyphs: &impl CharMap,
gsub: Option<&Gsub>,
) -> Result<BTreeMap<ScriptDirection, IntSet<GlyphId16>>, ReadError> {
classify(
glyphs,
|cp, buf| buf.extend(unicode_script_direction(cp)),
gsub,
)
}
fn unicode_script_direction(cp: u32) -> Option<ScriptDirection> {
let sc = script_for_codepoint(cp)?;
if [COMMON_SCRIPT, INHERITED_SCRIPT].contains(&sc) {
return None;
}
if ScriptDirection::for_script(&sc) == ScriptDirection::RightToLeft {
Some(ScriptDirection::RightToLeft)
} else {
Some(ScriptDirection::LeftToRight)
}
}
pub(crate) fn dist_feature_enabled_scripts() -> HashSet<UnicodeShortName> {
INDIC_SCRIPTS
.iter()
.chain(USE_SCRIPTS)
.chain(["Khmr", "Mymr"].iter())
.map(|s| UnicodeShortName::try_from_str(s).unwrap())
.collect()
}
trait BinarySearchExact<T, U> {
fn binary_search_exact(&self, needle: &T) -> Option<U>;
}
impl<T: Ord + Eq, U: Clone> BinarySearchExact<T, U> for &[(T, U)] {
fn binary_search_exact(&self, needle: &T) -> Option<U> {
self.binary_search_by(|probe| probe.0.cmp(needle))
.ok()
.map(|idx| &self[idx].1)
.cloned()
}
}
fn script_for_codepoint(cp: u32) -> Option<UnicodeShortName> {
get_script_short_name(icu_properties::script::ScriptWithExtensions::new().get_script_val32(cp))
}
pub(crate) fn get_script_short_name(script: Script) -> Option<UnicodeShortName> {
let lookup = PropertyNamesShort::<Script>::new();
lookup
.get(script)
.and_then(|script| tinystr::TinyStr4::try_from_str(script).ok())
}
pub(crate) fn ot_tag_to_script(script_tag: Tag) -> Option<UnicodeShortName> {
const DFLT: Tag = Tag::new(b"DFLT");
if script_tag == DFLT {
return None;
}
let tag = SCRIPT_ALIASES
.binary_search_exact(&script_tag)
.unwrap_or(script_tag);
if let Some(exception) = SCRIPT_EXCEPTIONS_REVERSED
.binary_search_exact(&tag)
.or_else(|| NEW_SCRIPTS.binary_search_exact(&tag))
{
return Some(UnicodeShortName::try_from_str(exception).unwrap());
}
Some(ot_tag_to_unicode_short_name(tag))
}
fn ot_tag_to_unicode_short_name(tag: Tag) -> UnicodeShortName {
const SPACE: u8 = b' ';
let tag_bytes = tag.into_bytes();
let mut out = [b'\0'; 4];
out[0] = tag_bytes[0].to_ascii_uppercase();
let mut last_non_space = tag_bytes[1];
for i in 1..=3 {
if tag_bytes[i] != SPACE {
out[i] = tag_bytes[i];
last_non_space = tag_bytes[i];
} else {
out[i] = last_non_space;
}
}
UnicodeShortName::try_from_raw(out).expect("cannot fail, as tag cannot have leading nul byte")
}
pub(crate) fn script_to_ot_tags(script: &UnicodeShortName) -> impl Iterator<Item = Tag> + use<> {
let mut out = [None, None];
if let Some(tag) = SCRIPT_EXCEPTIONS.binary_search_exact(&script.as_str()) {
out[0] = Some(tag);
} else if PropertyParser::<Script>::new().get_strict(script).is_none() {
out[0] = Some(DFLT_SCRIPT);
} else {
out[0] = NEW_SCRIPT_TAGS.binary_search_exact(&script.as_str());
out[1] = Some(Tag::new(script.to_owned().to_ascii_lowercase().all_bytes()));
}
out.into_iter().flatten()
}
#[cfg(test)]
mod tests {
use write_fonts::read::FontRead;
use super::*;
#[test]
fn const_arrays_are_sorted() {
fn get_original_and_sorted_items<T: Clone + Ord + Eq, U>(
items: &[(T, U)],
) -> (Vec<T>, Vec<T>) {
let originals = items.iter().map(|(a, _)| a.clone()).collect::<Vec<_>>();
let mut sorted = originals.clone();
sorted.sort();
(originals, sorted)
}
let (actual, expected) = get_original_and_sorted_items(SCRIPT_ALIASES);
assert_eq!(actual, expected);
let (actual, expected) = get_original_and_sorted_items(SCRIPT_EXCEPTIONS_REVERSED);
assert_eq!(actual, expected);
let (actual, expected) = get_original_and_sorted_items(NEW_SCRIPTS);
assert_eq!(actual, expected);
let (actual, expected) = get_original_and_sorted_items(NEW_SCRIPT_TAGS);
assert_eq!(actual, expected);
let (actual, expected) = get_original_and_sorted_items(SCRIPT_EXCEPTIONS);
assert_eq!(actual, expected);
}
#[test]
fn raw_tag_conversion() {
assert_eq!(ot_tag_to_unicode_short_name(Tag::new(b"deva")), "Deva");
assert_eq!(ot_tag_to_unicode_short_name(Tag::new(b"yi ")), "Yiii");
assert_eq!(ot_tag_to_unicode_short_name(Tag::new(b"nko ")), "Nkoo");
}
#[test]
fn expected_unicode_script_overrides() {
let mut apostrophemod: Vec<_> = unicode_script_extensions(0x2bc).collect();
apostrophemod.sort();
assert_eq!(
apostrophemod,
["Beng", "Cyrl", "Deva", "Latn", "Lisu", "Thai", "Toto",]
);
let other = unicode_script_extensions(0x0ce6);
assert_eq!(other.collect::<Vec<_>>(), ["Knda", "Nand", "Tutg"]);
}
#[test]
fn glyphs_matching_predicate_behaves_like_ufo2ft() {
use write_fonts::tables::{gsub as wgsub, layout as wlayout};
let a_gid = GlyphId16::new(0);
let b_gid = GlyphId16::new(1);
let neutral_gid = GlyphId16::new(2);
let coverage = [a_gid].into_iter().collect();
let lig_set = wgsub::LigatureSet::new(vec![wgsub::Ligature::new(b_gid, vec![neutral_gid])]);
let subtable = wgsub::LigatureSubstFormat1::new(coverage, vec![lig_set]);
let lookup = wlayout::Lookup::new(Default::default(), vec![subtable]);
let lookup_list = wgsub::SubstitutionLookupList::new(vec![lookup.into()]);
let features = wlayout::FeatureList::new(vec![wlayout::FeatureRecord::new(
Tag::new(b"derp"),
wlayout::Feature::new(None, vec![0]),
)]);
let gsub = wgsub::Gsub::new(Default::default(), features, lookup_list);
let bytes = write_fonts::dump_table(&gsub).unwrap();
let read_gsub =
write_fonts::read::tables::gsub::Gsub::read(bytes.as_slice().into()).unwrap();
let charmap = HashMap::from([('a' as u32, a_gid)]);
let reachable_from_a =
glyphs_matching_predicate(&charmap, |uv| Some(uv == 'a' as u32), Some(&read_gsub))
.unwrap();
assert!(reachable_from_a.contains(a_gid) && reachable_from_a.len() == 1);
}
#[test]
fn script_direction_smoke_test() {
assert_eq!(
unicode_script_direction('a' as u32),
Some(ScriptDirection::LeftToRight)
);
assert_eq!(
unicode_script_direction('ء' as u32), Some(ScriptDirection::RightToLeft)
);
assert_eq!(unicode_script_direction(' ' as u32), None)
}
#[test]
fn aliases_for_hira_kata() {
let cp = '\u{30a0}';
assert_eq!(
unicode_script_extensions(cp as _).collect::<Vec<_>>(),
[HRKT]
);
}
}