use crate::SourceDataProvider;
use icu::collections::codepointtrie::CodePointTrie;
use icu::properties::props::Script;
use icu::properties::provider::{PropertyScriptWithExtensionsV1, ScriptWithExtensionsProperty};
use icu::properties::script::ScriptWithExt;
use icu_provider::prelude::*;
use std::collections::HashSet;
use std::convert::TryFrom;
use zerovec::{VarZeroVec, ZeroSlice, ZeroVec};
impl DataProvider<PropertyScriptWithExtensionsV1> for SourceDataProvider {
fn load(
&self,
req: DataRequest,
) -> Result<DataResponse<PropertyScriptWithExtensionsV1>, DataError> {
self.check_req::<PropertyScriptWithExtensionsV1>(req)?;
let scx_data = self
.icuexport()?
.read_and_parse_toml::<super::uprops_serde::script_extensions::Main>(&format!(
"uprops/{}/scx.toml",
self.trie_type(),
))?
.script_extensions
.first()
.ok_or_else(|| DataError::custom("Could not parse Script_Extensions data from TOML"))?;
if scx_data.long_name != "Script_Extensions" || scx_data.short_name != "scx" {
return Err(DataError::custom("Property name mismatch")
.with_marker(PropertyScriptWithExtensionsV1::INFO));
}
let cpt_data = &scx_data.code_point_trie;
let scx_array_data = &scx_data.script_code_array;
let trie = CodePointTrie::<ScriptWithExt>::try_from(cpt_data).map_err(|e| {
DataError::custom("Could not parse CodePointTrie TOML").with_display_context(&e)
})?;
let ule_scx_array_data: Vec<ZeroVec<Script>> = scx_array_data
.iter()
.map(|v| {
v.iter()
.copied()
.map(Script::from_icu4c_value)
.collect::<ZeroVec<Script>>()
})
.collect::<Vec<ZeroVec<Script>>>();
let scx_vzv: VarZeroVec<ZeroSlice<Script>> =
VarZeroVec::from(ule_scx_array_data.as_slice());
let data_struct = ScriptWithExtensionsProperty {
trie,
extensions: scx_vzv,
};
Ok(DataResponse {
metadata: Default::default(),
payload: DataPayload::from_owned(data_struct),
})
}
}
impl crate::IterableDataProviderCached<PropertyScriptWithExtensionsV1> for SourceDataProvider {
fn iter_ids_cached(&self) -> Result<HashSet<DataIdentifierCow<'static>>, DataError> {
Ok(HashSet::from_iter([Default::default()]))
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_script_val_from_script_extensions() {
let provider = SourceDataProvider::new_testing();
let swe =
icu::properties::script::ScriptWithExtensions::try_new_unstable(&provider).unwrap();
let swe = swe.as_borrowed();
assert_eq!(swe.get_script_val('𐓐'), Script::Osage); assert_eq!(swe.get_script_val('🥳'), Script::Common); assert_eq!(swe.get_script_val32(0x200D), Script::Inherited); assert_eq!(swe.get_script_val('௫'), Script::Tamil); assert_eq!(swe.get_script_val32(0x11303), Script::Grantha); assert_eq!(swe.get_script_val32(0x30A0), Script::Common); }
#[test]
fn test_scx_array_from_script_extensions() {
let provider = SourceDataProvider::new_testing();
let swe =
icu::properties::script::ScriptWithExtensions::try_new_unstable(&provider).unwrap();
let swe = swe.as_borrowed();
assert_eq!(
swe.get_script_extensions_val('𐓐')
.iter()
.collect::<Vec<_>>(),
[Script::Osage]
);
assert_eq!(
swe.get_script_extensions_val('🥳')
.iter()
.collect::<Vec<_>>(),
[Script::Common]
);
assert_eq!(
swe.get_script_extensions_val32(0x200D) .iter()
.collect::<Vec<_>>(),
[Script::Inherited]
);
assert_eq!(
swe.get_script_extensions_val('௫') .iter()
.collect::<Vec<_>>(),
[Script::Tamil, Script::Grantha]
);
assert_eq!(
swe.get_script_extensions_val32(0x11303) .iter()
.collect::<Vec<_>>(),
[Script::Tamil, Script::Grantha]
);
assert_eq!(
swe.get_script_extensions_val32(0x30A0) .iter()
.collect::<Vec<_>>(),
[Script::Hiragana, Script::Katakana]
);
assert_eq!(
swe.get_script_extensions_val32(0x200D) .iter()
.next(),
Some(Script::Inherited)
);
assert!(swe
.get_script_extensions_val32(0x11303) .contains(&Script::Grantha));
assert!(!swe
.get_script_extensions_val32(0x11303) .contains(&Script::Common));
assert_eq!(
swe.get_script_extensions_val32(0x11_0000) .iter()
.collect::<Vec<_>>(),
[Script::Unknown]
);
}
#[test]
fn test_has_script() {
let provider = SourceDataProvider::new_testing();
let swe =
icu::properties::script::ScriptWithExtensions::try_new_unstable(&provider).unwrap();
let swe = swe.as_borrowed();
assert!(swe.has_script('𐓐', Script::Osage));
assert!(!swe.has_script('𐓐', Script::Common));
assert!(!swe.has_script('𐓐', Script::Inherited));
assert!(swe.has_script('🥳', Script::Common));
assert!(!swe.has_script('🥳', Script::Inherited));
assert!(!swe.has_script32(0x200D, Script::Common));
assert!(swe.has_script32(0x200D, Script::Inherited));
assert!(swe.has_script('௫', Script::Tamil));
assert!(swe.has_script('௫', Script::Grantha));
assert!(!swe.has_script('௫', Script::Common));
assert!(!swe.has_script('௫', Script::Inherited));
assert!(swe.has_script32(0x11303, Script::Tamil));
assert!(swe.has_script32(0x11303, Script::Grantha));
assert!(!swe.has_script32(0x11303, Script::Common));
assert!(!swe.has_script32(0x11303, Script::Inherited));
assert!(swe.has_script32(0x30A0, Script::Hiragana));
assert!(swe.has_script32(0x30A0, Script::Katakana));
assert!(!swe.has_script32(0x30A0, Script::Common));
assert!(!swe.has_script32(0x30A0, Script::Inherited));
assert!(!swe.has_script32(0x0964, Script::Common));
assert!(swe.has_script32(0x0964, Script::Devanagari));
assert!(swe.has_script32(0x0964, Script::Bengali));
assert!(!swe.has_script32(0x063F, Script::Common));
assert!(swe.has_script32(0x063F, Script::Arabic)); assert!(!swe.has_script32(0x063F, Script::Syriac));
assert!(!swe.has_script32(0x063F, Script::Thaana));
assert!(!swe.has_script32(0x0640, Script::Common)); assert!(swe.has_script32(0x0640, Script::Arabic));
assert!(swe.has_script32(0x0640, Script::Syriac));
assert!(!swe.has_script32(0x0640, Script::Thaana));
assert!(!swe.has_script32(0x0650, Script::Inherited)); assert!(swe.has_script32(0x0650, Script::Arabic));
assert!(swe.has_script32(0x0650, Script::Syriac));
assert!(!swe.has_script32(0x0650, Script::Thaana));
assert!(!swe.has_script32(0x0660, Script::Common));
assert!(swe.has_script32(0x0660, Script::Arabic)); assert!(!swe.has_script32(0x0660, Script::Syriac));
assert!(swe.has_script32(0x0660, Script::Thaana));
assert!(!swe.has_script32(0xFDF2, Script::Common));
assert!(swe.has_script32(0xFDF2, Script::Arabic)); assert!(!swe.has_script32(0xFDF2, Script::Syriac));
assert!(swe.has_script32(0xFDF2, Script::Thaana));
assert!(!swe.has_script32(0x0640, Script::from_icu4c_value(0xAFFE)));
}
#[test]
fn test_get_script_extensions_set() {
let provider = SourceDataProvider::new_testing();
let swe =
icu::properties::script::ScriptWithExtensions::try_new_unstable(&provider).unwrap();
let swe = swe.as_borrowed();
let grantha = swe.get_script_extensions_set(Script::Grantha);
assert!(!grantha.contains32(0x0BE5)); assert!(grantha.contains32(0x0BE6)); assert!(grantha.contains32(0x0BEB)); assert!(grantha.contains32(0x0BEF)); assert!(grantha.contains32(0x0BF2)); assert!(grantha.contains32(0x0BF3)); assert!(!grantha.contains32(0x0BF4)); assert!(grantha.contains32(0x11300)); assert!(grantha.contains32(0x11301)); assert!(grantha.contains32(0x11302)); assert!(grantha.contains32(0x11303)); assert!(!grantha.contains32(0x11304)); assert!(grantha.contains32(0x11305));
let tamil = swe.get_script_extensions_set(Script::Tamil);
assert!(!tamil.contains32(0x0BE5)); assert!(tamil.contains32(0x0BE6)); assert!(tamil.contains32(0x0BEB)); assert!(tamil.contains32(0x0BEF)); assert!(tamil.contains32(0x0BF2)); assert!(tamil.contains32(0x0BF3)); assert!(tamil.contains32(0x0BF4)); assert!(!tamil.contains32(0x11300)); assert!(tamil.contains32(0x11301)); assert!(!tamil.contains32(0x11302)); assert!(tamil.contains32(0x11303)); assert!(!tamil.contains32(0x11304)); assert!(!tamil.contains32(0x11305));
let hiragana = swe.get_script_extensions_set(Script::Hiragana);
assert!(hiragana.contains32(0x3046)); assert!(hiragana.contains32(0x309F)); assert!(hiragana.contains32(0x30A0)); assert!(!hiragana.contains32(0x30A1)); assert!(hiragana.contains32(0x30FB)); assert!(hiragana.contains32(0x30FC)); assert!(!hiragana.contains32(0x30FD));
let katakana = swe.get_script_extensions_set(Script::Katakana);
assert!(!katakana.contains32(0x3046)); assert!(!katakana.contains32(0x309F)); assert!(katakana.contains32(0x30A0)); assert!(katakana.contains32(0x30A1)); assert!(katakana.contains32(0x30FB)); assert!(katakana.contains32(0x30FC)); assert!(katakana.contains32(0x30FD));
let common = swe.get_script_extensions_set(Script::Common);
assert!(common.contains('🥳'));
assert!(!common.contains32(0x200D));
assert!(!common.contains32(0x30A0));
let inherited = swe.get_script_extensions_set(Script::Inherited);
assert!(!inherited.contains('🥳'));
assert!(inherited.contains32(0x200D));
assert!(!inherited.contains32(0x30A0));
let bangla = swe.get_script_extensions_set(Script::Bengali);
assert!(bangla.contains32(0x09E7)); assert!(!bangla.contains32(0x0963)); assert!(bangla.contains32(0x0964)); assert!(bangla.contains32(0x0965)); assert!(!bangla.contains32(0x0966));
let devanagari = swe.get_script_extensions_set(Script::Devanagari);
assert!(!devanagari.contains32(0x09E7)); assert!(devanagari.contains32(0x0963)); assert!(devanagari.contains32(0x0964)); assert!(devanagari.contains32(0x0965)); assert!(devanagari.contains32(0x0966));
assert!(!common.contains32(0x0964)); assert!(!common.contains32(0x0965)); }
}