icu_datagen/transform/cldr/transforms/
mod.rs

1// This file is part of ICU4X. For terms of use, please see the file
2// called LICENSE at the top level of the ICU4X source tree
3// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
4
5use super::cldr_serde::transforms;
6use super::source::CldrCache;
7use crate::provider::DatagenProvider;
8use icu_experimental::transliterate::provider::*;
9use icu_experimental::transliterate::RuleCollection;
10use icu_locid::Locale;
11use icu_provider::datagen::IterableDataProvider;
12use icu_provider::prelude::*;
13use std::sync::Mutex;
14
15impl CldrCache {
16    fn transforms(&self) -> Result<&Mutex<RuleCollection>, DataError> {
17        self.transforms.get_or_try_init(|| {
18            fn find_bcp47(aliases: &[transforms::TransformAlias]) -> Option<&Locale> {
19                aliases
20                    .iter()
21                    .find_map(|alias| {
22                        if let transforms::TransformAlias::Bcp47(locale) = alias {
23                            Some(locale)
24                        } else {
25                            None
26                        }
27                    })
28            }
29
30            let mut provider = RuleCollection::default();
31
32            let transforms = &format!("cldr-transforms-{}/main", self.dir_suffix()?);
33            for transform in self.serde_cache.list(transforms)? {
34                let metadata = self
35                    .serde_cache
36                    .read_and_parse_json::<transforms::Resource>(&format!(
37                        "{transforms}/{transform}/metadata.json"
38                    ))?;
39                let source = self
40                    .serde_cache
41                    .root
42                    .read_to_string(&format!("{transforms}/{transform}/source.txt",))?;
43
44                if matches!(
45                    metadata.direction,
46                    transforms::Direction::Forward | transforms::Direction::Both
47                ) {
48                    if let Some(bcp47) = find_bcp47(&metadata.alias) {
49                        provider.register_source(
50                            bcp47,
51                            source.clone(),
52                            metadata
53                                .alias
54                                .iter()
55                                .filter_map(|alias| match alias {
56                                    transforms::TransformAlias::LegacyId(s) => Some(s.as_str()),
57                                    _ => None,
58                                })
59                                .chain([
60                                    // source, target, and variant may also be used
61                                    if let Some(variant) = &metadata.variant {
62                                        format!("{}-{}/{}", metadata.source, metadata.target, variant)
63                                    } else {
64                                        format!("{}-{}", metadata.source, metadata.target)
65                                    }
66                                    .to_ascii_lowercase()
67                                    .as_str(),
68                                ]),
69                            false,
70                            metadata.visibility == transforms::Visibility::External,
71                        );
72                    } else {
73                        log::warn!("Skipping transliterator {transform} (forward) as it does not have a BCP-47 identifier.")
74                    }
75                }
76
77                if matches!(
78                    metadata.direction,
79                    transforms::Direction::Backward | transforms::Direction::Both
80                ) {
81                    if let Some(bcp47) = find_bcp47(&metadata.backward_alias) {
82                        provider.register_source(
83                            bcp47,
84                            source,
85                            metadata
86                                .backward_alias
87                                .iter()
88                                .filter_map(|alias| match alias {
89                                    transforms::TransformAlias::LegacyId(s) => Some(s.as_str()),
90                                    _ => None,
91                                })
92                                .chain([
93                                    // source, target, and variant may also be used
94                                    if let Some(variant) = &metadata.variant {
95                                        format!("{}-{}/{}", metadata.target, metadata.source, variant)
96                                    } else {
97                                        format!("{}-{}", metadata.target, metadata.source)
98                                    }
99                                    .to_ascii_lowercase()
100                                    .as_str(),
101                                ]),
102                            true,
103                            metadata.visibility == transforms::Visibility::External,
104                        );
105                    } else {
106                        log::warn!("Skipping transliterator {transform} (backward) as it does not have a BCP-47 identifier.")
107                    }
108                }
109            }
110            Ok(Mutex::new(provider))
111        })
112    }
113}
114
115impl DataProvider<TransliteratorRulesV1Marker> for DatagenProvider {
116    fn load(
117        &self,
118        req: DataRequest,
119    ) -> Result<DataResponse<TransliteratorRulesV1Marker>, DataError> {
120        self.check_req::<TransliteratorRulesV1Marker>(req)?;
121        self.cldr()?
122            .transforms()?
123            .lock()
124            .expect("poison")
125            .as_provider_unstable(self, self)?
126            .load(req)
127    }
128}
129
130impl IterableDataProvider<TransliteratorRulesV1Marker> for DatagenProvider {
131    // Don't do caching for this one. It uses its own mutex
132    fn supported_locales(&self) -> Result<Vec<DataLocale>, DataError> {
133        self.cldr()?
134            .transforms()?
135            .lock()
136            .expect("poison")
137            .as_provider_unstable(self, self)?
138            .supported_locales()
139    }
140}
141
142#[cfg(test)]
143mod tests {
144    use super::*;
145
146    #[test]
147    fn test_de_ascii_forward() {
148        let provider = DatagenProvider::new_testing();
149
150        let _data: DataPayload<TransliteratorRulesV1Marker> = provider
151            .load(DataRequest {
152                locale: &"und-x-de-t-de-d0-ascii".parse().unwrap(),
153                metadata: Default::default(),
154            })
155            .unwrap()
156            .take_payload()
157            .unwrap();
158    }
159
160    #[test]
161    fn test_latin_ascii_backward() {
162        let provider = DatagenProvider::new_testing();
163
164        let _data: DataPayload<TransliteratorRulesV1Marker> = provider
165            .load(DataRequest {
166                locale: &"und-x-und-Latn-t-s0-ascii".parse().unwrap(),
167                metadata: Default::default(),
168            })
169            .unwrap()
170            .take_payload()
171            .unwrap();
172    }
173}