icu_datagen/transform/cldr/transforms/
mod.rs1use super::cldr_serde::transforms;
6use super::source::CldrCache;
7use crate::provider::DatagenProvider;
8use icu_experimental::transliterate::provider::*;
9use icu_experimental::transliterate::RuleCollection;
10use icu_locid::Locale;
11use icu_provider::datagen::IterableDataProvider;
12use icu_provider::prelude::*;
13use std::sync::Mutex;
14
15impl CldrCache {
16 fn transforms(&self) -> Result<&Mutex<RuleCollection>, DataError> {
17 self.transforms.get_or_try_init(|| {
18 fn find_bcp47(aliases: &[transforms::TransformAlias]) -> Option<&Locale> {
19 aliases
20 .iter()
21 .find_map(|alias| {
22 if let transforms::TransformAlias::Bcp47(locale) = alias {
23 Some(locale)
24 } else {
25 None
26 }
27 })
28 }
29
30 let mut provider = RuleCollection::default();
31
32 let transforms = &format!("cldr-transforms-{}/main", self.dir_suffix()?);
33 for transform in self.serde_cache.list(transforms)? {
34 let metadata = self
35 .serde_cache
36 .read_and_parse_json::<transforms::Resource>(&format!(
37 "{transforms}/{transform}/metadata.json"
38 ))?;
39 let source = self
40 .serde_cache
41 .root
42 .read_to_string(&format!("{transforms}/{transform}/source.txt",))?;
43
44 if matches!(
45 metadata.direction,
46 transforms::Direction::Forward | transforms::Direction::Both
47 ) {
48 if let Some(bcp47) = find_bcp47(&metadata.alias) {
49 provider.register_source(
50 bcp47,
51 source.clone(),
52 metadata
53 .alias
54 .iter()
55 .filter_map(|alias| match alias {
56 transforms::TransformAlias::LegacyId(s) => Some(s.as_str()),
57 _ => None,
58 })
59 .chain([
60 if let Some(variant) = &metadata.variant {
62 format!("{}-{}/{}", metadata.source, metadata.target, variant)
63 } else {
64 format!("{}-{}", metadata.source, metadata.target)
65 }
66 .to_ascii_lowercase()
67 .as_str(),
68 ]),
69 false,
70 metadata.visibility == transforms::Visibility::External,
71 );
72 } else {
73 log::warn!("Skipping transliterator {transform} (forward) as it does not have a BCP-47 identifier.")
74 }
75 }
76
77 if matches!(
78 metadata.direction,
79 transforms::Direction::Backward | transforms::Direction::Both
80 ) {
81 if let Some(bcp47) = find_bcp47(&metadata.backward_alias) {
82 provider.register_source(
83 bcp47,
84 source,
85 metadata
86 .backward_alias
87 .iter()
88 .filter_map(|alias| match alias {
89 transforms::TransformAlias::LegacyId(s) => Some(s.as_str()),
90 _ => None,
91 })
92 .chain([
93 if let Some(variant) = &metadata.variant {
95 format!("{}-{}/{}", metadata.target, metadata.source, variant)
96 } else {
97 format!("{}-{}", metadata.target, metadata.source)
98 }
99 .to_ascii_lowercase()
100 .as_str(),
101 ]),
102 true,
103 metadata.visibility == transforms::Visibility::External,
104 );
105 } else {
106 log::warn!("Skipping transliterator {transform} (backward) as it does not have a BCP-47 identifier.")
107 }
108 }
109 }
110 Ok(Mutex::new(provider))
111 })
112 }
113}
114
115impl DataProvider<TransliteratorRulesV1Marker> for DatagenProvider {
116 fn load(
117 &self,
118 req: DataRequest,
119 ) -> Result<DataResponse<TransliteratorRulesV1Marker>, DataError> {
120 self.check_req::<TransliteratorRulesV1Marker>(req)?;
121 self.cldr()?
122 .transforms()?
123 .lock()
124 .expect("poison")
125 .as_provider_unstable(self, self)?
126 .load(req)
127 }
128}
129
130impl IterableDataProvider<TransliteratorRulesV1Marker> for DatagenProvider {
131 fn supported_locales(&self) -> Result<Vec<DataLocale>, DataError> {
133 self.cldr()?
134 .transforms()?
135 .lock()
136 .expect("poison")
137 .as_provider_unstable(self, self)?
138 .supported_locales()
139 }
140}
141
142#[cfg(test)]
143mod tests {
144 use super::*;
145
146 #[test]
147 fn test_de_ascii_forward() {
148 let provider = DatagenProvider::new_testing();
149
150 let _data: DataPayload<TransliteratorRulesV1Marker> = provider
151 .load(DataRequest {
152 locale: &"und-x-de-t-de-d0-ascii".parse().unwrap(),
153 metadata: Default::default(),
154 })
155 .unwrap()
156 .take_payload()
157 .unwrap();
158 }
159
160 #[test]
161 fn test_latin_ascii_backward() {
162 let provider = DatagenProvider::new_testing();
163
164 let _data: DataPayload<TransliteratorRulesV1Marker> = provider
165 .load(DataRequest {
166 locale: &"und-x-und-Latn-t-s0-ascii".parse().unwrap(),
167 metadata: Default::default(),
168 })
169 .unwrap()
170 .take_payload()
171 .unwrap();
172 }
173}