glossa_codegen/generator/
output_phf.rs

1use std::{
2  fs::File,
3  io::{self, BufWriter, Write},
4};
5
6use glossa_shared::{
7  PhfTupleKey, ToCompactString, fmt_compact,
8  phf_triple_key::RawTripleKey,
9  tap::{Pipe, Tap},
10};
11use phf_codegen::OrderedMap;
12use rayon::iter::{IntoParallelRefIterator, ParallelIterator};
13
14use crate::{
15  MiniStr,
16  generator::{
17    Generator, MapType, flattening::L10nBTreeMap, output_bincode::create_buf_writer,
18  },
19};
20
21impl<'h> Generator<'h> {
22  /// Collect all localized resources into a **`const phf::OrderedMap`**
23  /// function, i.e., a single table can accommodate different `language`,
24  /// `map_name`, and `map_key`.
25  ///
26  ///
27  /// ## Example
28  ///
29  /// ```no_run
30  /// use glossa_codegen::{L10nResources, Generator, generator::MapType};
31  /// use glossa_shared::tap::Pipe;
32  ///
33  /// let data = L10nResources::new("../../locales/")
34  ///   .with_include_map_names(["error"])
35  ///   .with_include_languages([
36  ///     "de",
37  ///     "zh-pinyin",
38  ///     "zh",
39  ///     "pt",
40  ///     "es",
41  ///     "en",
42  ///     "en-GB",
43  /// ]);
44  ///
45  /// let function_data = Generator::default()
46  ///   .with_resources(data)
47  ///   .output_phf_all_in_one(MapType::Regular)?;
48  ///
49  /// # Ok::<(), std::io::Error>(())
50  /// ```
51  ///
52  /// ### function data:
53  ///
54  /// ```ignore
55  /// // glossa_shared::{phf, PhfL10nAllInOneMap, PhfTripleKey};
56  ///
57  /// pub(crate) const fn map() -> super::PhfL10nAllInOneMap {
58  ///     use super::PhfTripleKey as Key;
59  ///     super::phf::OrderedMap {
60  ///       key: 12913932095322966823,
61  ///       disps: &[(2, 3), (2, 0)],
62  ///       idxs: &[5, 4, 0, 6, 3, 2, 1],
63  ///       entries: &[
64  ///         (
65  ///           Key(r#"de"#, r##"error"##, r###"text-not-found"###),
66  ///           r#####"Kein lokalisierter Text gefunden"#####,
67  ///         ),
68  ///         (
69  ///           Key(r#"en"#, r##"error"##, r###"text-not-found"###),
70  ///           r#####"No localized text found"#####,
71  ///         ),
72  ///         (
73  ///           Key(r#"en-GB"#, r##"error"##, r###"text-not-found"###),
74  ///           r#####"No localised text found"#####,
75  ///         ),
76  ///         (
77  ///           Key(r#"es"#, r##"error"##, r###"text-not-found"###),
78  ///           r#####"No se encontró texto localizado"#####,
79  ///         ),
80  ///         (
81  ///           Key(r#"pt"#, r##"error"##, r###"text-not-found"###),
82  ///           r#####"Nenhum texto localizado encontrado"#####,
83  ///         ),
84  ///         (
85  ///           Key(r#"zh"#, r##"error"##, r###"text-not-found"###),
86  ///           r#####"未找到本地化文本"#####,
87  ///         ),
88  ///         (
89  ///           Key(r#"zh-Latn-CN"#, r##"error"##, r###"text-not-found"###),
90  ///           r#####"MeiYou ZhaoDao BenDiHua WenBen"#####,
91  ///         ),
92  ///       ],
93  ///     }
94  /// }
95  /// ```
96  ///
97  /// ### Get Text
98  ///
99  /// ```ignore
100  /// use glossa_shared::PhfTripleKey;
101  ///
102  /// fn test_get_text() {
103  ///     let map = map();
104  ///     let get_text =
105  ///       |language| map.get(&PhfTripleKey(language, "error", "text-not-found"));
106  ///
107  ///     let zh_text = get_text("zh");
108  ///     assert_eq!(zh_text, Some(&"未找到本地化文本"));
109  ///
110  ///     let language_chain = ["gsw", "de-CH", "de", "en"];
111  ///
112  ///     let text = language_chain
113  ///       .into_iter()
114  ///       .find_map(get_text);
115  ///     assert_eq!(text, Some(&"Kein lokalisierter Text gefunden"));
116  /// }
117  /// ```
118  pub fn output_phf_all_in_one(&'h self, non_dsl: MapType) -> io::Result<String> {
119    let vis_fn = self.get_visibility().as_str();
120
121    non_dsl
122      .get_non_dsl_maps(self)?
123      .iter()
124      .filter(|(_, data)| !data.is_empty())
125      .flat_map(|(lang, map_entry)| {
126        map_entry
127          .iter()
128          .map(|((name, k), v)| {
129            let new_key =
130              RawTripleKey(lang.to_compact_string(), name.as_str(), k.as_str());
131            let value = fmt_compact!(r##########"r#####"{v}"#####"##########);
132            (new_key, value)
133          })
134      })
135      .fold(OrderedMap::new(), |mut acc, (k, v)| {
136        acc.entry(k, &v);
137        acc
138      })
139      .pipe_ref_mut(|ordered_map| {
140        format!(
141          r#"{vis_fn} const fn map() -> super::PhfL10nAllInOneMap {{
142      use super::PhfTripleKey as Key;
143        {code}  }}"#,
144          code = ordered_map
145            .phf_path("super::phf")
146            .build()
147        )
148      })
149      .pipe(Ok)
150  }
151
152  /// Generates Perfect Hash Function (PHF) maps for localization data.
153  /// => `const fn map() -> super::PhfL10nOrderedMap`
154  ///
155  /// # Behavior
156  ///
157  /// - Processes non-DSL maps in parallel
158  /// - Filters out empty localization datasets
159  /// - Generates PHF maps preserving insertion order
160  /// - Creates individual Rust module files per language
161  ///
162  /// # Errors
163  ///
164  /// Returns [`io::Result`] for file I/O operations failures
165  pub fn output_phf(&'h self, non_dsl: MapType) -> io::Result<()> {
166    let vis_fn = self.get_visibility().as_str();
167
168    non_dsl
169      .get_non_dsl_maps(self)?
170      .par_iter()
171      .filter(|(_, data)| !data.is_empty())
172      .map(|(lang, map_entry)| {
173        let new_phf_map = assemble_phf_map(map_entry);
174        (lang, new_phf_map)
175      })
176      .try_for_each(|(lang, mut map)| {
177        writeln!(
178          &mut self.create_rs_mod_file(lang)?,
179          r##"{vis_fn} const fn map() -> super::PhfL10nOrderedMap {{
180          use super::PhfTupleKey as Key;
181          {code}  }}"##,
182          code = map
183            .phf_path("super::phf")
184            .build()
185        )
186      })
187  }
188
189  /// Generates Perfect Hash Function (PHF) maps for localization data.
190  /// => `const fn map() -> super::OrderedMap<&'static str, &'static str>`
191  ///
192  /// > Note: The generated PHF map is only for localization data where map_name
193  /// > can be ignored. If map_name cannot be ignored, please use
194  /// > [output_phf](Self::output_phf).
195  pub fn output_phf_by_key(&'h self, non_dsl: MapType) -> io::Result<()> {
196    let vis_fn = self.get_visibility().as_str();
197
198    non_dsl
199      .get_non_dsl_maps(self)?
200      .par_iter()
201      .filter(|(_, data)| !data.is_empty())
202      .map(|(lang, map_entry)| {
203        let new_phf_map = map_entry
204          .iter()
205          .map(|((_name, k), v)| {
206            let value = fmt_compact!(r##########"r#####"{v}"#####"##########);
207            (k.as_str(), value)
208          })
209          .fold(OrderedMap::new(), |mut acc, (k,v)| {
210            acc.entry(k, &v);
211            acc
212          });
213        (lang, new_phf_map)
214      })
215      .try_for_each(|(lang, mut map)| {
216        writeln!(
217          &mut self.create_rs_mod_file(lang)?,
218          r##"{vis_fn} const fn map() -> super::OrderedMap<&'static str, &'static str> {{
219          {code}  }}"##,
220          code = map
221            .phf_path("super::phf")
222            .build()
223        )
224      })
225  }
226
227  /// Creates Rust module file writer with standardized naming
228  ///
229  /// # File Naming
230  ///
231  /// Generates filenames following format:
232  /// `{mod_prefix}{language_snake_case}.rs`
233  /// - Converts language ID to snake_case (e.g., "en-US" → "en_us")
234  /// - Applies module prefix from generator configuration
235  pub(crate) fn create_rs_mod_file<D: core::fmt::Display>(
236    &self,
237    language: &D,
238  ) -> io::Result<BufWriter<File>> {
239    let mod_prefix = self.get_mod_prefix();
240    let rs_file_name =
241      fmt_compact!("{mod_prefix}{}.rs", to_lower_snake_case(language));
242
243    eprintln!(
244      "#[cfg(feature = \"{mod_prefix}{language}\")]\n\
245      mod {rs_file_name}\n"
246    );
247
248    let out_dir = self.get_outdir().as_deref();
249
250    create_buf_writer(out_dir, rs_file_name)
251  }
252}
253
254/// Constructs ordered PHF map from localization entries
255///
256/// # Parameter
257///
258/// - `map_entry`
259///   - Localization data in BTreeMap format
260///
261/// # Note
262/// Preserves insertion order using [`phf_codegen::OrderedMap`]
263fn assemble_phf_map(map_entry: &L10nBTreeMap) -> OrderedMap<PhfTupleKey<'_>> {
264  map_entry
265    .iter()
266    .map(|((name, k), v)| {
267      let tuple_key = PhfTupleKey(name.as_str(), k.as_str());
268      let value = fmt_compact!(r##########"r#####"{v}"#####"##########);
269      (tuple_key, value)
270    })
271    .fold(OrderedMap::new(), |mut acc, x| {
272      acc.entry(x.0, &x.1);
273      acc
274    })
275}
276
277/// Normalizes snake_case format
278///
279/// - en.US => en_us
280/// - en-US => en_us
281/// - en-Latn-US => en_latn_us
282/// - zh-Hans-CN => zh_hans_cn
283///
284/// ## Conversion Rules
285///
286/// 1. Convert to ASCII lowercase
287/// 2. Replace hyphens/dots with underscores
288pub fn to_lower_snake_case<D: core::fmt::Display>(id: D) -> MiniStr {
289  fmt_compact!("{id}")
290    .tap_mut(|s| s.make_ascii_lowercase())
291    .chars()
292    .map(|c| match c {
293      '-' | '.' => '_',
294      c => c,
295    })
296    .collect()
297}
298
299#[cfg(test)]
300mod tests {
301  use anyhow::Result as AnyResult;
302  use glossa_shared::{PhfL10nAllInOneMap, PhfL10nOrderedMap, PhfTripleKey};
303
304  use super::*;
305  use crate::generator::dbg_generator::{
306    de_en_fr_pt_zh_generator, en_gb_generator, es_generator, new_generator,
307  };
308
309  #[ignore]
310  #[test]
311  fn test_build_en_gb_phf() -> AnyResult<()> {
312    en_gb_generator().output_phf(MapType::Regular)?;
313    Ok(())
314  }
315
316  #[ignore]
317  #[test]
318  fn test_build_es_phf() -> AnyResult<()> {
319    es_generator().output_phf(MapType::Regular)?;
320    Ok(())
321  }
322
323  #[ignore]
324  #[test]
325  fn test_build_all_phf() -> AnyResult<()> {
326    new_generator().output_phf(MapType::Regular)?;
327    Ok(())
328  }
329
330  #[ignore]
331  #[test]
332  fn test_build_all_in_one_phf() -> AnyResult<()> {
333    let function_data = new_generator().output_phf_all_in_one(MapType::Regular)?;
334    println!("{function_data}");
335    Ok(())
336  }
337
338  #[ignore]
339  #[test]
340  fn test_build_de_zh_fr_pt_phf_all_in_one() -> AnyResult<()> {
341    let function_data =
342      de_en_fr_pt_zh_generator().output_phf_all_in_one(MapType::Regular)?;
343    println!("{function_data}");
344    Ok(())
345  }
346
347  pub(crate) const fn en_gb_map() -> PhfL10nOrderedMap {
348    use PhfTupleKey as Key;
349    // use glossa_shared::phf;
350    use lang_id::maps::phf;
351
352    phf::OrderedMap {
353      key: 12913932095322966823,
354      disps: &[(0, 0)],
355      idxs: &[0],
356      entries: &[(
357        Key(r#"error"#, r##"text-not-found"##),
358        r#####"No localised text found"#####,
359      )],
360    }
361  }
362
363  pub(crate) const fn all_in_one_map() -> PhfL10nAllInOneMap {
364    use PhfTripleKey as Key;
365    use lang_id::maps::phf;
366
367    phf::OrderedMap {
368      key: 12913932095322966823,
369      disps: &[(2, 3), (2, 0)],
370      idxs: &[5, 4, 0, 6, 3, 2, 1],
371      entries: &[
372        (
373          Key(r#"de"#, r##"error"##, r###"text-not-found"###),
374          r#####"Kein lokalisierter Text gefunden"#####,
375        ),
376        (
377          Key(r#"en"#, r##"error"##, r###"text-not-found"###),
378          r#####"No localized text found"#####,
379        ),
380        (
381          Key(r#"en-GB"#, r##"error"##, r###"text-not-found"###),
382          r#####"No localised text found"#####,
383        ),
384        (
385          Key(r#"es"#, r##"error"##, r###"text-not-found"###),
386          r#####"No se encontró texto localizado"#####,
387        ),
388        (
389          Key(r#"pt"#, r##"error"##, r###"text-not-found"###),
390          r#####"Nenhum texto localizado encontrado"#####,
391        ),
392        (
393          Key(r#"zh"#, r##"error"##, r###"text-not-found"###),
394          r#####"未找到本地化文本"#####,
395        ),
396        (
397          Key(r#"zh-Latn-CN"#, r##"error"##, r###"text-not-found"###),
398          r#####"MeiYou ZhaoDao BenDiHua WenBen"#####,
399        ),
400      ],
401    }
402  }
403
404  #[ignore]
405  #[test]
406  fn test_get_phf_en_map() {
407    let map = en_gb_map();
408    let v = map.get(&PhfTupleKey("error", "text-not-found"));
409    dbg!(v);
410  }
411
412  #[ignore]
413  #[test]
414  fn doc_test_get_all_in_one_map() {
415    let map = all_in_one_map();
416    let get_text =
417      |language| map.get(&PhfTripleKey(language, "error", "text-not-found"));
418
419    let zh_text = get_text("zh");
420    assert_eq!(zh_text, Some(&"未找到本地化文本"));
421
422    let language_chain = ["gsw", "de-CH", "de", "en"];
423
424    let text = language_chain
425      .into_iter()
426      .find_map(get_text);
427    assert_eq!(text, Some(&"Kein lokalisierter Text gefunden"));
428  }
429}