glossa_codegen/generator/
output_phf.rs

1use std::{
2  fs::File,
3  io::{self, BufWriter, Write},
4};
5
6use glossa_shared::{
7  PhfTupleKey, ToCompactString, fmt_compact,
8  phf_triple_key::RawTripleKey,
9  tap::{Pipe, Tap},
10};
11use phf_codegen::OrderedMap;
12use rayon::iter::{IntoParallelRefIterator, ParallelIterator};
13
14use crate::{
15  MiniStr,
16  generator::{
17    Generator, MapType, flattening::L10nBTreeMap, output_bincode::create_buf_writer,
18  },
19};
20
21impl<'h> Generator<'h> {
22  /// Collect all localized resources into a **`const phf::OrderedMap`**
23  /// function, i.e., a single table can accommodate different `language`,
24  /// `map_name`, and `map_key`.
25  ///
26  ///
27  /// ## Example
28  ///
29  /// ```no_run
30  /// use glossa_codegen::{L10nResources, Generator, generator::MapType};
31  /// use glossa_shared::tap::Pipe;
32  ///
33  /// let data = L10nResources::new("../../locales/")
34  ///   .with_include_map_names(["error"])
35  ///   .with_include_languages([
36  ///     "de",
37  ///     "zh-pinyin",
38  ///     "zh",
39  ///     "pt",
40  ///     "es",
41  ///     "en",
42  ///     "en-GB",
43  /// ]);
44  ///
45  /// let function_data = Generator::default()
46  ///   .with_resources(data)
47  ///   .output_phf_all_in_one(MapType::Regular)?;
48  ///
49  /// # Ok::<(), std::io::Error>(())
50  /// ```
51  ///
52  /// ### function data:
53  ///
54  /// ```ignore
55  /// // glossa_shared::{phf, PhfL10nAllInOneMap, PhfTripleKey};
56  ///
57  /// pub(crate) const fn map() -> super::PhfL10nAllInOneMap {
58  ///     use super::PhfTripleKey as Key;
59  ///     super::phf::OrderedMap {
60  ///       key: 12913932095322966823,
61  ///       disps: &[(2, 3), (2, 0)],
62  ///       idxs: &[5, 4, 0, 6, 3, 2, 1],
63  ///       entries: &[
64  ///         (
65  ///           Key(r#"de"#, r##"error"##, r###"text-not-found"###),
66  ///           r#####"Kein lokalisierter Text gefunden"#####,
67  ///         ),
68  ///         (
69  ///           Key(r#"en"#, r##"error"##, r###"text-not-found"###),
70  ///           r#####"No localized text found"#####,
71  ///         ),
72  ///         (
73  ///           Key(r#"en-GB"#, r##"error"##, r###"text-not-found"###),
74  ///           r#####"No localised text found"#####,
75  ///         ),
76  ///         (
77  ///           Key(r#"es"#, r##"error"##, r###"text-not-found"###),
78  ///           r#####"No se encontró texto localizado"#####,
79  ///         ),
80  ///         (
81  ///           Key(r#"pt"#, r##"error"##, r###"text-not-found"###),
82  ///           r#####"Nenhum texto localizado encontrado"#####,
83  ///         ),
84  ///         (
85  ///           Key(r#"zh"#, r##"error"##, r###"text-not-found"###),
86  ///           r#####"未找到本地化文本"#####,
87  ///         ),
88  ///         (
89  ///           Key(r#"zh-Latn-CN"#, r##"error"##, r###"text-not-found"###),
90  ///           r#####"MeiYou ZhaoDao BenDiHua WenBen"#####,
91  ///         ),
92  ///       ],
93  ///     }
94  /// }
95  /// ```
96  ///
97  /// ### Get Text
98  ///
99  /// ```ignore
100  /// use glossa_shared::PhfTripleKey;
101  ///
102  /// fn test_get_text() {
103  ///     let map = map();
104  ///     let get_text =
105  ///       |language| map.get(&PhfTripleKey(language, "error", "text-not-found"));
106  ///
107  ///     let zh_text = get_text("zh");
108  ///     assert_eq!(zh_text, Some(&"未找到本地化文本"));
109  ///
110  ///     let language_chain = ["gsw", "de-CH", "de", "en"];
111  ///
112  ///     let text = language_chain
113  ///       .into_iter()
114  ///       .find_map(get_text);
115  ///     assert_eq!(text, Some(&"Kein lokalisierter Text gefunden"));
116  /// }
117  /// ```
118  pub fn output_phf_all_in_one(&'h self, non_dsl: MapType) -> io::Result<String> {
119    let vis_fn = self.get_visibility().as_str();
120
121    non_dsl
122      .get_non_dsl_maps(self)?
123      .iter()
124      .filter(|(_, data)| !data.is_empty())
125      .flat_map(|(lang, map_entry)| {
126        map_entry
127          .iter()
128          .map(|((name, k), v)| {
129            let new_key =
130              RawTripleKey(lang.to_compact_string(), name.as_str(), k.as_str());
131            let value = fmt_compact!(r##########"r#####"{v}"#####"##########);
132            (new_key, value)
133          })
134      })
135      .fold(OrderedMap::new(), |mut acc, (k, v)| {
136        acc.entry(k, &v);
137        acc
138      })
139      .pipe_ref_mut(|ordered_map| {
140        format!(
141          r#"{vis_fn} const fn map() -> super::PhfL10nAllInOneMap {{
142      use super::PhfTripleKey as Key;
143        {code}  }}"#,
144          code = ordered_map
145            .phf_path("super::phf")
146            .build()
147        )
148      })
149      .pipe(Ok)
150  }
151
152  /// Generates Perfect Hash Function (PHF) maps for localization data
153  ///
154  /// # Behavior
155  ///
156  /// - Processes non-DSL maps in parallel
157  /// - Filters out empty localization datasets
158  /// - Generates PHF maps preserving insertion order
159  /// - Creates individual Rust module files per language
160  ///
161  /// # Errors
162  ///
163  /// Returns [`io::Result`] for file I/O operations failures
164  pub fn output_phf(&'h self, non_dsl: MapType) -> io::Result<()> {
165    let vis_fn = self.get_visibility().as_str();
166
167    non_dsl
168      .get_non_dsl_maps(self)?
169      .par_iter()
170      .filter(|(_, data)| !data.is_empty())
171      .map(|(lang, map_entry)| {
172        let new_phf_map = assemble_phf_map(map_entry);
173        (lang, new_phf_map)
174      })
175      .try_for_each(|(lang, mut map)| {
176        writeln!(
177          &mut self.create_rs_mod_file(lang)?,
178          r##"{vis_fn} const fn map() -> super::PhfL10nOrderedMap {{
179          use super::PhfTupleKey as Key;
180          {code}  }}"##,
181          code = map
182            .phf_path("super::phf")
183            .build()
184        )
185      })
186
187    // fn l10n_maps() -> Box<[(lang_id::LangID, PhfL10nOrderedMap)]> {
188    //  use lang_id::consts::*;
189    //  vec![
190    //    #[cfg(feature = "l10n_en")]
191    //    (lang_id_en(), l10n_en::maps),
192    //
193    //    #[cfg(feature = "l10n_zh")]
194    //    (lang_id_zh(), l10n_zh::maps),
195    // ].into_boxed_slice()
196    // };
197  }
198
199  /// Creates Rust module file writer with standardized naming
200  ///
201  /// # File Naming
202  ///
203  /// Generates filenames following format:
204  /// `{mod_prefix}{language_snake_case}.rs`
205  /// - Converts language ID to snake_case (e.g., "en-US" → "en_us")
206  /// - Applies module prefix from generator configuration
207  pub(crate) fn create_rs_mod_file<D: core::fmt::Display>(
208    &self,
209    language: &D,
210  ) -> io::Result<BufWriter<File>> {
211    let mod_prefix = self.get_mod_prefix();
212    let rs_file_name =
213      fmt_compact!("{mod_prefix}{}.rs", to_lower_snake_case(language));
214
215    eprintln!(
216      "#[cfg(feature = \"{mod_prefix}{language}\")]\n\
217      mod {rs_file_name}\n"
218    );
219
220    let out_dir = self.get_outdir().as_deref();
221
222    create_buf_writer(out_dir, rs_file_name)
223  }
224}
225
226/// Constructs ordered PHF map from localization entries
227///
228/// # Parameter
229///
230/// - `map_entry`
231///   - Localization data in BTreeMap format
232///
233/// # Note
234/// Preserves insertion order using [`phf_codegen::OrderedMap`]
235fn assemble_phf_map(map_entry: &L10nBTreeMap) -> OrderedMap<PhfTupleKey<'_>> {
236  map_entry
237    .iter()
238    .map(|((name, k), v)| {
239      let tuple_key = PhfTupleKey(name.as_str(), k.as_str());
240      let value = fmt_compact!(r##########"r#####"{v}"#####"##########);
241      (tuple_key, value)
242    })
243    .fold(OrderedMap::new(), |mut acc, x| {
244      acc.entry(x.0, &x.1);
245      acc
246    })
247}
248
249/// Normalizes snake_case format
250///
251/// - en.US => en_us
252/// - en-US => en_us
253/// - en-Latn-US => en_latn_us
254/// - zh-Hans-CN => zh_hans_cn
255///
256/// ## Conversion Rules
257///
258/// 1. Convert to ASCII lowercase
259/// 2. Replace hyphens/dots with underscores
260pub fn to_lower_snake_case<D: core::fmt::Display>(id: D) -> MiniStr {
261  fmt_compact!("{id}")
262    .tap_mut(|s| s.make_ascii_lowercase())
263    .chars()
264    .map(|c| match c {
265      '-' | '.' => '_',
266      c => c,
267    })
268    .collect()
269}
270
271#[cfg(test)]
272mod tests {
273  use anyhow::Result as AnyResult;
274  use glossa_shared::{PhfL10nAllInOneMap, PhfL10nOrderedMap, PhfTripleKey};
275
276  use super::*;
277  use crate::generator::dbg_generator::{
278    de_en_fr_pt_zh_generator, en_gb_generator, es_generator, new_generator,
279  };
280
281  #[ignore]
282  #[test]
283  fn test_build_en_gb_phf() -> AnyResult<()> {
284    en_gb_generator().output_phf(MapType::Regular)?;
285    Ok(())
286  }
287
288  #[ignore]
289  #[test]
290  fn test_build_es_phf() -> AnyResult<()> {
291    es_generator().output_phf(MapType::Regular)?;
292    Ok(())
293  }
294
295  #[ignore]
296  #[test]
297  fn test_build_all_phf() -> AnyResult<()> {
298    new_generator().output_phf(MapType::Regular)?;
299    Ok(())
300  }
301
302  #[ignore]
303  #[test]
304  fn test_build_all_in_one_phf() -> AnyResult<()> {
305    let function_data = new_generator().output_phf_all_in_one(MapType::Regular)?;
306    println!("{function_data}");
307    Ok(())
308  }
309
310  #[ignore]
311  #[test]
312  fn test_build_de_zh_fr_pt_phf_all_in_one() -> AnyResult<()> {
313    let function_data =
314      de_en_fr_pt_zh_generator().output_phf_all_in_one(MapType::Regular)?;
315    println!("{function_data}");
316    Ok(())
317  }
318
319  pub(crate) const fn en_gb_map() -> PhfL10nOrderedMap {
320    use PhfTupleKey as Key;
321    // use glossa_shared::phf;
322    use lang_id::maps::phf;
323
324    phf::OrderedMap {
325      key: 12913932095322966823,
326      disps: &[(0, 0)],
327      idxs: &[0],
328      entries: &[(
329        Key(r#"error"#, r##"text-not-found"##),
330        r#####"No localised text found"#####,
331      )],
332    }
333  }
334
335  pub(crate) const fn all_in_one_map() -> PhfL10nAllInOneMap {
336    use PhfTripleKey as Key;
337    use lang_id::maps::phf;
338
339    phf::OrderedMap {
340      key: 12913932095322966823,
341      disps: &[(2, 3), (2, 0)],
342      idxs: &[5, 4, 0, 6, 3, 2, 1],
343      entries: &[
344        (
345          Key(r#"de"#, r##"error"##, r###"text-not-found"###),
346          r#####"Kein lokalisierter Text gefunden"#####,
347        ),
348        (
349          Key(r#"en"#, r##"error"##, r###"text-not-found"###),
350          r#####"No localized text found"#####,
351        ),
352        (
353          Key(r#"en-GB"#, r##"error"##, r###"text-not-found"###),
354          r#####"No localised text found"#####,
355        ),
356        (
357          Key(r#"es"#, r##"error"##, r###"text-not-found"###),
358          r#####"No se encontró texto localizado"#####,
359        ),
360        (
361          Key(r#"pt"#, r##"error"##, r###"text-not-found"###),
362          r#####"Nenhum texto localizado encontrado"#####,
363        ),
364        (
365          Key(r#"zh"#, r##"error"##, r###"text-not-found"###),
366          r#####"未找到本地化文本"#####,
367        ),
368        (
369          Key(r#"zh-Latn-CN"#, r##"error"##, r###"text-not-found"###),
370          r#####"MeiYou ZhaoDao BenDiHua WenBen"#####,
371        ),
372      ],
373    }
374  }
375
376  #[ignore]
377  #[test]
378  fn test_get_phf_en_map() {
379    let map = en_gb_map();
380    let v = map.get(&PhfTupleKey("error", "text-not-found"));
381    dbg!(v);
382  }
383
384  #[ignore]
385  #[test]
386  fn doc_test_get_all_in_one_map() {
387    let map = all_in_one_map();
388    let get_text =
389      |language| map.get(&PhfTripleKey(language, "error", "text-not-found"));
390
391    let zh_text = get_text("zh");
392    assert_eq!(zh_text, Some(&"未找到本地化文本"));
393
394    let language_chain = ["gsw", "de-CH", "de", "en"];
395
396    let text = language_chain
397      .into_iter()
398      .find_map(get_text);
399    assert_eq!(text, Some(&"Kein lokalisierter Text gefunden"));
400  }
401}