glossa_codegen/generator/
output_phf.rs

1use std::{
2  fs::File,
3  io::{self, BufWriter, Write},
4};
5
6use glossa_shared::{
7  PhfTupleKey, ToCompactString, fmt_compact,
8  phf_triple_key::RawTripleKey,
9  tap::{Pipe, Tap},
10};
11use phf_codegen::OrderedMap;
12use rayon::iter::{IntoParallelRefIterator, ParallelIterator};
13
14use crate::{
15  MiniStr,
16  generator::{
17    Generator, MapType, flattening::L10nBTreeMap, output_bincode::create_buf_writer,
18  },
19};
20
21impl<'h> Generator<'h> {
22  /// Collect all localized resources into a **`const phf::OrderedMap`**
23  /// function, i.e., a single table can accommodate different `language`,
24  /// `map_name`, and `map_key`.
25  ///
26  ///
27  /// ## Example
28  ///
29  /// ```no_run
30  /// use glossa_codegen::{L10nResources, Generator, generator::MapType};
31  /// use glossa_shared::tap::Pipe;
32  ///
33  /// let data = L10nResources::new("../../locales/")
34  ///   .with_include_map_names(["error"])
35  ///   .with_include_languages([
36  ///     "de",
37  ///     "zh-pinyin",
38  ///     "zh",
39  ///     "pt",
40  ///     "es",
41  ///     "en",
42  ///     "en-GB",
43  /// ]);
44  ///
45  /// let function_data = Generator::default()
46  ///   .with_resources(data)
47  ///   .output_phf_all_in_one(MapType::Regular)?;
48  ///
49  /// # Ok::<(), std::io::Error>(())
50  /// ```
51  ///
52  /// ### function data:
53  ///
54  /// ```ignore
55  /// // glossa_shared::{phf, PhfL10nAllInOneMap, PhfTripleKey};
56  ///
57  /// pub(crate) const fn map() -> super::PhfL10nAllInOneMap {
58  ///     use super::PhfTripleKey as Key;
59  ///     super::phf::OrderedMap {
60  ///       key: 12913932095322966823,
61  ///       disps: &[(2, 3), (2, 0)],
62  ///       idxs: &[5, 4, 0, 6, 3, 2, 1],
63  ///       entries: &[
64  ///         (
65  ///           Key(r#"de"#, r##"error"##, r###"text-not-found"###),
66  ///           r#####"Kein lokalisierter Text gefunden"#####,
67  ///         ),
68  ///         (
69  ///           Key(r#"en"#, r##"error"##, r###"text-not-found"###),
70  ///           r#####"No localized text found"#####,
71  ///         ),
72  ///         (
73  ///           Key(r#"en-GB"#, r##"error"##, r###"text-not-found"###),
74  ///           r#####"No localised text found"#####,
75  ///         ),
76  ///         (
77  ///           Key(r#"es"#, r##"error"##, r###"text-not-found"###),
78  ///           r#####"No se encontró texto localizado"#####,
79  ///         ),
80  ///         (
81  ///           Key(r#"pt"#, r##"error"##, r###"text-not-found"###),
82  ///           r#####"Nenhum texto localizado encontrado"#####,
83  ///         ),
84  ///         (
85  ///           Key(r#"zh"#, r##"error"##, r###"text-not-found"###),
86  ///           r#####"未找到本地化文本"#####,
87  ///         ),
88  ///         (
89  ///           Key(r#"zh-Latn-CN"#, r##"error"##, r###"text-not-found"###),
90  ///           r#####"MeiYou ZhaoDao BenDiHua WenBen"#####,
91  ///         ),
92  ///       ],
93  ///     }
94  /// }
95  /// ```
96  ///
97  /// ### Get Text
98  ///
99  /// ```ignore
100  /// use glossa_shared::PhfTripleKey;
101  ///
102  /// fn test_get_text() {
103  ///     let map = map();
104  ///     let get_text =
105  ///       |language| map.get(&PhfTripleKey(language, "error", "text-not-found"));
106  ///
107  ///     let zh_text = get_text("zh");
108  ///     assert_eq!(zh_text, Some(&"未找到本地化文本"));
109  ///
110  ///     let language_chain = ["gsw", "de-CH", "de", "en"];
111  ///
112  ///     let text = language_chain
113  ///       .into_iter()
114  ///       .find_map(get_text);
115  ///     assert_eq!(text, Some(&"Kein lokalisierter Text gefunden"));
116  /// }
117  /// ```
118  pub fn output_phf_all_in_one(&'h self, non_dsl: MapType) -> io::Result<String> {
119    let vis_fn = self.get_visibility().as_str();
120
121    non_dsl
122      .get_non_dsl_maps(self)?
123      .iter()
124      .filter(|(_, data)| !data.is_empty())
125      .flat_map(|(lang, map_entry)| {
126        map_entry
127          .iter()
128          .map(|((name, k), v)| {
129            let new_key =
130              RawTripleKey(lang.to_compact_string(), name.as_str(), k.as_str());
131            let value = fmt_compact!(r##########"r#####"{v}"#####"##########);
132            (new_key, value)
133          })
134      })
135      .fold(OrderedMap::new(), |mut acc, (k, v)| {
136        acc.entry(k, &v);
137        acc
138      })
139      .pipe_ref_mut(|ordered_map| {
140        format!(
141          r#"{vis_fn} const fn map() -> super::PhfL10nAllInOneMap {{
142      use super::PhfTripleKey as Key;
143        {code}  }}"#,
144          code = ordered_map
145            .phf_path("super::phf")
146            .build()
147        )
148      })
149      .pipe(Ok)
150  }
151
152  /// Generates Perfect Hash Function (PHF) maps for localization data.
153  /// => `const fn map() -> super::PhfL10nOrderedMap`
154  ///
155  /// # Behavior
156  ///
157  /// - Processes non-DSL maps in parallel
158  /// - Filters out empty localization datasets
159  /// - Generates PHF maps preserving insertion order
160  /// - Creates individual Rust module files per language
161  ///
162  /// # Errors
163  ///
164  /// Returns [`io::Result`] for file I/O operations failures
165  pub fn output_phf(&'h self, non_dsl: MapType) -> io::Result<()> {
166    let vis_fn = self.get_visibility().as_str();
167
168    non_dsl
169      .get_non_dsl_maps(self)?
170      .par_iter()
171      .filter(|(_, data)| !data.is_empty())
172      .map(|(lang, map_entry)| {
173        let new_phf_map = assemble_phf_map(map_entry);
174        (lang, new_phf_map)
175      })
176      .try_for_each(|(lang, mut map)| {
177        writeln!(
178          &mut self.create_rs_mod_file(lang)?,
179          r##"{vis_fn} const fn map() -> super::PhfL10nOrderedMap {{
180          use super::PhfTupleKey as Key;
181          {code}  }}"##,
182          code = map
183            .phf_path("super::phf")
184            .build()
185        )
186      })
187  }
188
189  /// Generates Perfect Hash Function (PHF) maps for localization data.
190  /// => `const fn map() -> super::PhfStrMap`
191  ///
192  /// > Note: The generated PHF map is only for localization data where map_name
193  /// > can be ignored. If map_name cannot be ignored, please use
194  /// > [output_phf](Self::output_phf).
195  pub fn output_phf_without_map_name(&'h self, non_dsl: MapType) -> io::Result<()> {
196    let vis_fn = self.get_visibility().as_str();
197
198    non_dsl
199      .get_non_dsl_maps(self)?
200      .par_iter()
201      .filter(|(_, data)| !data.is_empty())
202      .map(|(lang, map_entry)| {
203        let new_phf_map = map_entry
204          .iter()
205          .map(|((_name, k), v)| {
206            let value = fmt_compact!(r##########"r#####"{v}"#####"##########);
207            (k.as_str(), value)
208          })
209          .fold(OrderedMap::new(), |mut acc, (k, v)| {
210            acc.entry(k, &v);
211            acc
212          });
213        (lang, new_phf_map)
214      })
215      .try_for_each(|(lang, mut map)| {
216        writeln!(
217          &mut self.create_rs_mod_file(lang)?,
218          r##"{vis_fn} const fn map() -> super::PhfStrMap {{
219          {code}  }}"##,
220          code = map
221            .phf_path("super::phf")
222            .build()
223        )
224      })
225  }
226
227  /// Creates Rust module file writer with standardized naming
228  ///
229  /// # File Naming
230  ///
231  /// Generates filenames following format:
232  /// `{mod_prefix}{language_snake_case}.rs`
233  /// - Converts language ID to snake_case (e.g., "en-US" → "en_us")
234  /// - Applies module prefix from generator configuration
235  pub(crate) fn create_rs_mod_file<D: core::fmt::Display>(
236    &self,
237    language: &D,
238  ) -> io::Result<BufWriter<File>> {
239    let mod_prefix = self.get_mod_prefix();
240    let feat_prefix = self.get_feature_prefix();
241    let rs_file_name =
242      fmt_compact!("{mod_prefix}{}.rs", to_lower_snake_case(language));
243    let mod_name = rs_file_name.trim_end_matches(".rs");
244
245    eprintln!(
246      "#[cfg(feature = \"{feat_prefix}{language}\")]\n\
247      mod {mod_name};\n",
248    );
249
250    let out_dir = self.get_outdir().as_deref();
251
252    create_buf_writer(out_dir, rs_file_name)
253  }
254}
255
256/// Constructs ordered PHF map from localization entries
257///
258/// # Parameter
259///
260/// - `map_entry`
261///   - Localization data in BTreeMap format
262///
263/// # Note
264/// Preserves insertion order using [`phf_codegen::OrderedMap`]
265fn assemble_phf_map(map_entry: &L10nBTreeMap) -> OrderedMap<PhfTupleKey<'_>> {
266  map_entry
267    .iter()
268    .map(|((name, k), v)| {
269      let tuple_key = PhfTupleKey(name.as_str(), k.as_str());
270      let value = fmt_compact!(r##########"r#####"{v}"#####"##########);
271      (tuple_key, value)
272    })
273    .fold(OrderedMap::new(), |mut acc, x| {
274      acc.entry(x.0, &x.1);
275      acc
276    })
277}
278
279/// Normalizes snake_case format
280///
281/// - en.US => en_us
282/// - en-US => en_us
283/// - en-Latn-US => en_latn_us
284/// - zh-Hans-CN => zh_hans_cn
285///
286/// ## Conversion Rules
287///
288/// 1. Convert to ASCII lowercase
289/// 2. Replace hyphens/dots with underscores
290pub fn to_lower_snake_case<D: core::fmt::Display>(id: D) -> MiniStr {
291  fmt_compact!("{id}")
292    .tap_mut(|s| s.make_ascii_lowercase())
293    .chars()
294    .map(|c| match c {
295      '-' | '.' => '_',
296      c => c,
297    })
298    .collect()
299}
300
301#[cfg(test)]
302mod tests {
303  use anyhow::Result as AnyResult;
304  use glossa_shared::{PhfL10nAllInOneMap, PhfL10nOrderedMap, PhfTripleKey};
305
306  use super::*;
307  use crate::generator::dbg_generator::{
308    de_en_fr_pt_zh_generator, en_gb_generator, es_generator, new_generator,
309  };
310
311  #[ignore]
312  #[test]
313  fn test_build_en_gb_phf() -> AnyResult<()> {
314    en_gb_generator().output_phf(MapType::Regular)?;
315    Ok(())
316  }
317
318  #[ignore]
319  #[test]
320  fn test_build_es_phf() -> AnyResult<()> {
321    es_generator().output_phf(MapType::Regular)?;
322    Ok(())
323  }
324
325  #[ignore]
326  #[test]
327  fn test_build_all_phf() -> AnyResult<()> {
328    new_generator().output_phf(MapType::Regular)?;
329    Ok(())
330  }
331
332  #[ignore]
333  #[test]
334  fn test_build_all_in_one_phf() -> AnyResult<()> {
335    let function_data = new_generator().output_phf_all_in_one(MapType::Regular)?;
336    println!("{function_data}");
337    Ok(())
338  }
339
340  #[ignore]
341  #[test]
342  fn test_build_de_zh_fr_pt_phf_all_in_one() -> AnyResult<()> {
343    let function_data =
344      de_en_fr_pt_zh_generator().output_phf_all_in_one(MapType::Regular)?;
345    println!("{function_data}");
346    Ok(())
347  }
348
349  pub(crate) const fn en_gb_map() -> PhfL10nOrderedMap {
350    use PhfTupleKey as Key;
351    // use glossa_shared::phf;
352    use lang_id::maps::phf;
353
354    phf::OrderedMap {
355      key: 12913932095322966823,
356      disps: &[(0, 0)],
357      idxs: &[0],
358      entries: &[(
359        Key(r#"error"#, r##"text-not-found"##),
360        r#####"No localised text found"#####,
361      )],
362    }
363  }
364
365  pub(crate) const fn all_in_one_map() -> PhfL10nAllInOneMap {
366    use PhfTripleKey as Key;
367    use lang_id::maps::phf;
368
369    phf::OrderedMap {
370      key: 12913932095322966823,
371      disps: &[(2, 3), (2, 0)],
372      idxs: &[5, 4, 0, 6, 3, 2, 1],
373      entries: &[
374        (
375          Key(r#"de"#, r##"error"##, r###"text-not-found"###),
376          r#####"Kein lokalisierter Text gefunden"#####,
377        ),
378        (
379          Key(r#"en"#, r##"error"##, r###"text-not-found"###),
380          r#####"No localized text found"#####,
381        ),
382        (
383          Key(r#"en-GB"#, r##"error"##, r###"text-not-found"###),
384          r#####"No localised text found"#####,
385        ),
386        (
387          Key(r#"es"#, r##"error"##, r###"text-not-found"###),
388          r#####"No se encontró texto localizado"#####,
389        ),
390        (
391          Key(r#"pt"#, r##"error"##, r###"text-not-found"###),
392          r#####"Nenhum texto localizado encontrado"#####,
393        ),
394        (
395          Key(r#"zh"#, r##"error"##, r###"text-not-found"###),
396          r#####"未找到本地化文本"#####,
397        ),
398        (
399          Key(r#"zh-Latn-CN"#, r##"error"##, r###"text-not-found"###),
400          r#####"MeiYou ZhaoDao BenDiHua WenBen"#####,
401        ),
402      ],
403    }
404  }
405
406  #[ignore]
407  #[test]
408  fn test_get_phf_en_map() {
409    let map = en_gb_map();
410    let v = map.get(&PhfTupleKey("error", "text-not-found"));
411    dbg!(v);
412  }
413
414  #[ignore]
415  #[test]
416  fn doc_test_get_all_in_one_map() {
417    let map = all_in_one_map();
418    let get_text =
419      |language| map.get(&PhfTripleKey(language, "error", "text-not-found"));
420
421    let zh_text = get_text("zh");
422    assert_eq!(zh_text, Some(&"未找到本地化文本"));
423
424    let language_chain = ["gsw", "de-CH", "de", "en"];
425
426    let text = language_chain
427      .into_iter()
428      .find_map(get_text);
429    assert_eq!(text, Some(&"Kein lokalisierter Text gefunden"));
430  }
431}