glossa_codegen/
resources.rs

1use std::{
2  ffi::OsStr,
3  fs,
4  path::{Path, PathBuf},
5  sync::OnceLock,
6};
7
8pub type L10nResMap = HashMap<KString, Vec<L10nMapEntry>>;
9
10use anyhow::bail;
11use dashmap::DashSet;
12use getset::{Getters, WithSetters};
13use glossa_dsl::Resolver;
14use glossa_shared::{
15  ToCompactString,
16  small_list::SmallList,
17  tap::{Pipe, TapFallible, TryConv},
18  type_aliases::ahash::HashMap,
19};
20use kstring::KString;
21use rayon::iter::{ParallelBridge, ParallelIterator};
22use serde::{Deserialize, Serialize};
23use walkdir::{DirEntry, WalkDir};
24
25use crate::{AnyResult, MiniStr, to_kstr};
26
27#[derive(Getters, WithSetters, Debug, Clone)]
28#[getset(get = "pub with_prefix", set_with = "pub")]
29pub struct L10nResources {
30  dir: PathBuf,
31  dsl_suffix: MiniStr,
32
33  #[getset(skip)]
34  #[getset(get = "pub")]
35  include_languages: SmallList<3>,
36
37  #[getset(skip)]
38  #[getset(get = "pub")]
39  include_map_names: SmallList<2>,
40
41  #[getset(skip)]
42  #[getset(get = "pub")]
43  exclude_languages: SmallList<1>,
44
45  #[getset(skip)]
46  #[getset(get = "pub")]
47  exclude_map_names: SmallList<1>,
48
49  #[getset(get)]
50  /// get data: [Self::get_or_init_data]
51  lazy_data: OnceLock<L10nResMap>,
52}
53
54impl Default for L10nResources {
55  /// Default:
56  ///
57  /// ```ignore
58  /// {
59  ///   dsl_suffix: ".dsl"
60  ///   ..Default::default()
61  /// }
62  /// ```
63  fn default() -> Self {
64    Self {
65      dsl_suffix: ".dsl".into(),
66      dir: Default::default(),
67      include_languages: Default::default(),
68      include_map_names: Default::default(),
69      exclude_languages: Default::default(),
70      exclude_map_names: Default::default(),
71      lazy_data: Default::default(),
72    }
73  }
74}
75
76fn walk_file<P: AsRef<Path>>(dir: P) -> Option<impl Iterator<Item = DirEntry>> {
77  dir
78    .pipe(WalkDir::new)
79    .follow_links(true)
80    .into_iter()
81    .filter_map(Result::ok)
82    .filter(is_supported_config_file)
83    .pipe(Some)
84}
85
86fn is_supported_config_file(e: &DirEntry) -> bool {
87  let f = e.path();
88  f.is_file()
89    && f
90      .extension()
91      .is_some_and(is_supported_cfg_format)
92}
93
94fn is_supported_cfg_format<S: AsRef<OsStr>>(o: S) -> bool {
95  ["toml", "ron", "json", "json5", "yml", "yaml"]
96    .iter()
97    .map(OsStr::new)
98    .any(|a| o.as_ref() == a)
99}
100
101fn dir_name_to_opt_lang(dir: &Path) -> Option<KString> {
102  dir
103    .file_name()?
104    .to_str()?
105    .pipe(KString::from_ref)
106    .pipe(Some)
107}
108
109impl L10nResources {
110  /// Constructs a new `L10nResources` instance with localization directory.
111  ///
112  /// The provided path should point to a directory containing
113  /// localization files.
114  ///
115  /// ## Example
116  ///
117  /// ```
118  /// use glossa_codegen::L10nResources;
119  ///
120  /// let _res = L10nResources::new("../../locales/");
121  /// ```
122  pub fn new<P: Into<PathBuf>>(dir: P) -> Self {
123    Self {
124      dir: dir.into(),
125      ..Default::default()
126    }
127  }
128
129  pub fn get_or_init_data(&self) -> &L10nResMap {
130    self
131      .get_lazy_data()
132      .get_or_init(|| {
133        self
134          .collect_localized_files()
135          .expect("Failed to init L10nResources Data")
136      })
137  }
138
139  fn walk_dir(&self) -> Option<impl ParallelIterator<Item = PathBuf>> {
140    self
141      .get_dir()
142      .pipe(fs::read_dir)
143      .ok()?
144      .par_bridge()
145      .filter_map(Result::ok)
146      .map(|e| e.path())
147      .filter(|d| d.is_dir())
148      .into()
149  }
150
151  /// Processes localization file and converts to L10nMapEntry
152  ///
153  /// ## Implementation Notes
154  ///
155  /// 1. Deserialization Precedence: File contents must be deserialized and
156  ///    validated BEFORE registering the file stem in the tracking set. This
157  ///    ensures only files with valid data claim priority.
158  /// 2. Empty Data Handling: HashMap data with empty content after
159  ///    deserialization are intentionally excluded to prevent empty entries
160  ///    from shadowing valid data files.
161  /// 3. File Stem Collisions: The first valid file for each stem establishes
162  ///    priority. Subsequent files with the same stem (different extensions)
163  ///    will be ignored, even if they contain data.
164  ///
165  /// ## Edge Case Example
166  ///
167  /// When both `test.json` (empty) and `test.ron` (valid data) exist:
168  /// - Bad order: Processing `test.json` first would permanently block
169  ///   `test.ron`
170  /// - Correct order: Deserialize first, then `test.json` gets filtered out by
171  ///   empty check, allowing `test.ron` to claim the stem when processed.
172  fn process_file<P: AsRef<Path> + core::fmt::Debug>(
173    &self,
174    file: P,
175    file_stem: &MiniStr,
176    stem_set: &DashSet<MiniStr>,
177  ) -> Option<L10nMapEntry> {
178    let data = deser_config_file(&file)
179      .tap_err(|e| eprintln!("[WARN] Deserialization error for {file:?}: {e}"))
180      .ok()?;
181
182    // Reject empty datasets to prevent empty entries from reserving file stems
183    (!data.is_empty()).then_some(())?;
184
185    // let file_stem = get_file_stem(file)?;
186
187    stem_set
188      .insert(file_stem.clone())
189      .then_some(())?;
190
191    let suffix = self.get_dsl_suffix().as_str();
192
193    let (tmpl_data, data) = match file_stem.ends_with(suffix) && !suffix.is_empty() {
194      true => (data.try_conv::<Resolver>().ok(), None),
195      _ => (None, Some(data)),
196    };
197
198    let map_name = file_stem
199      .trim_end_matches(suffix)
200      .into();
201
202    L10nMapEntry {
203      map_name,
204      data,
205      tmpl_data,
206    }
207    .pipe(Some)
208  }
209
210  fn collect_localized_files(&self) -> Option<L10nResMap> {
211    self
212      .walk_dir()?
213      .filter(|dir| self.filter_include_languages(dir))
214      .filter(|dir| self.filter_exclude_languages(dir))
215      .filter_map(|ref dir| {
216        let entries = self.parallel_collect_l10n_entries(dir)?;
217        let lang = dir_name_to_opt_lang(dir)?;
218        (!entries.is_empty()).then_some((lang, entries))
219      })
220      .collect::<HashMap<_, _>>()
221      .into()
222  }
223
224  fn parallel_collect_l10n_entries(&self, dir: &Path) -> Option<Vec<L10nMapEntry>> {
225    let stem_set = DashSet::with_capacity(64);
226
227    dir
228      .pipe(walk_file)?
229      .par_bridge()
230      .filter_map(annotate_entry_with_stem)
231      .filter(|(_, map_name)| self.filter_include_map_names(map_name))
232      .filter(|(_, map_name)| self.filter_exclude_map_names(map_name))
233      .filter_map(|(file, file_stem)| {
234        self.process_file(file.path(), &file_stem, &stem_set)
235      })
236      .collect::<Vec<_>>()
237      .into()
238  }
239
240  fn filter_include_map_names(&self, map_name: &MiniStr) -> bool {
241    match self.include_map_names.as_ref() {
242      [] => true,
243      list => contain_map_name(list, map_name),
244    }
245  }
246
247  fn filter_exclude_map_names(&self, map_name: &MiniStr) -> bool {
248    match self.exclude_map_names.as_ref() {
249      [] => true,
250      list => !contain_map_name(list, map_name),
251    }
252  }
253
254  fn filter_exclude_languages(&self, dir: &Path) -> bool {
255    match self.exclude_languages.as_ref() {
256      [] => true,
257      list => match dir.file_name() {
258        Some(dirname) => !contain_language(list, dirname),
259        _ => true,
260      },
261    }
262  }
263
264  fn filter_include_languages(&self, dir: &Path) -> bool {
265    match self.include_languages.as_ref() {
266      [] => true,
267      list => dir
268        .file_name()
269        .is_some_and(|dirname| contain_language(list, dirname)),
270    }
271  }
272
273  pub fn with_include_languages<S: Into<MiniStr>>(
274    mut self,
275    include_languages: impl IntoIterator<Item = S>,
276  ) -> Self {
277    self.include_languages = include_languages
278      .into_iter()
279      .collect();
280    self
281  }
282
283  pub fn with_include_map_names<S: Into<MiniStr>>(
284    mut self,
285    include_map_names: impl IntoIterator<Item = S>,
286  ) -> Self {
287    self.include_map_names = include_map_names
288      .into_iter()
289      .collect();
290    self
291  }
292
293  pub fn with_exclude_languages<S: Into<MiniStr>>(
294    mut self,
295    exclude_languages: impl IntoIterator<Item = S>,
296  ) -> Self {
297    self.exclude_languages = exclude_languages
298      .into_iter()
299      .collect();
300    self
301  }
302
303  pub fn with_exclude_map_names<S: Into<MiniStr>>(
304    mut self,
305    exclude_map_names: impl IntoIterator<Item = S>,
306  ) -> Self {
307    self.exclude_map_names = exclude_map_names
308      .into_iter()
309      .collect();
310    self
311  }
312}
313
314fn contain_language(list: &[MiniStr], language: &OsStr) -> bool {
315  list
316    .iter()
317    .any(|item| language.eq_ignore_ascii_case(item))
318}
319
320fn contain_map_name(list: &[MiniStr], map_name: &MiniStr) -> bool {
321  list
322    .iter()
323    .any(|item| map_name.eq_ignore_ascii_case(item))
324}
325
326fn annotate_entry_with_stem(p: DirEntry) -> Option<(DirEntry, MiniStr)> {
327  p.path()
328    .pipe(get_file_stem)
329    .map(|stem| (p, stem))
330}
331
332fn get_file_stem<P: AsRef<Path>>(file: P) -> Option<MiniStr> {
333  file
334    .as_ref()
335    .file_stem()?
336    .to_str()?
337    .to_compact_string()
338    .pipe(Some)
339}
340
341fn deser_config_file<P: AsRef<Path>>(
342  file: P,
343) -> AnyResult<HashMap<KString, MiniStr>> {
344  let cfg_text = file.pipe_ref(fs::read_to_string)?;
345
346  if cfg_text.trim().is_empty() {
347    bail!("Empty File Content")
348  }
349
350  let new_err = || "Failed to deserialize config file.".pipe(anyhow::Error::msg);
351
352  let data = match file
353    .as_ref()
354    .extension()
355    .map(|x| x.to_string_lossy())
356    .ok_or_else(new_err)?
357    .as_ref()
358  {
359    #[cfg(feature = "json")]
360    "json" => match serde_json::from_str(&cfg_text) {
361      Ok(m) => m,
362      #[cfg(not(feature = "json5"))]
363      e => e?,
364      #[cfg(feature = "json5")]
365      _ => serde_json5::from_str(&cfg_text)?,
366    },
367    #[cfg(feature = "json5")]
368    "json5" => serde_json5::from_str(&cfg_text)?,
369    #[cfg(feature = "ron")]
370    "ron" => ron::from_str(&cfg_text)?,
371    #[cfg(feature = "toml")]
372    "toml" => toml::from_str(&cfg_text)?,
373    #[cfg(feature = "yaml")]
374    "yaml" | "yml" => serde_yml::from_str(&cfg_text)?,
375    _ => bail!("Skip unsupported file"),
376  };
377
378  Ok(data)
379}
380
381#[derive(Getters, WithSetters, Debug, Clone, Default, Serialize, Deserialize)]
382#[getset(get = "pub(crate) with_prefix", set_with = "pub(crate)")]
383pub struct L10nMapEntry {
384  map_name: MiniStr,
385  data: Option<HashMap<KString, MiniStr>>,
386  tmpl_data: Option<Resolver>,
387}
388
389impl L10nMapEntry {
390  pub(crate) fn map_name_to_kstring(&self) -> KString {
391    self
392      .get_map_name()
393      .pipe(to_kstr)
394  }
395}
396
397#[cfg(test)]
398pub(crate) mod dbg_shared {
399  use crate::L10nResources;
400
401  pub(crate) const DIR: &str = "../../locales/";
402
403  pub(crate) fn new_resources() -> L10nResources {
404    // L10nResources::default().with_dir(DIR.into())
405    L10nResources::new(DIR)
406    // .with_dsl_suffix(".dsl".into())
407  }
408}
409
410#[cfg(test)]
411mod tests {
412  use std::{collections::BTreeMap, fs, io};
413
414  use testutils::simple_benchmark;
415
416  use super::*;
417  use crate::resources::dbg_shared::new_resources;
418
419  #[ignore]
420  #[test]
421  fn test_read_dir() -> io::Result<()> {
422    for (idx, path) in dbg_shared::DIR
423      .pipe(fs::read_dir)?
424      .filter_map(Result::ok)
425      .map(|x| x.path())
426      .filter(|e| e.is_dir())
427      .enumerate()
428    {
429      dbg!(path.file_name(), idx);
430    }
431    Ok(())
432  }
433
434  #[ignore]
435  #[test]
436  fn bench_init_res_data() {
437    simple_benchmark(|| {
438      dbg_shared::new_resources();
439    });
440  }
441
442  #[ignore]
443  #[test]
444  fn test_init_res_data() {
445    let res = dbg_shared::new_resources();
446    let map = res
447      .get_or_init_data()
448      .iter()
449      .collect::<BTreeMap<_, _>>();
450    dbg!(map);
451    // HashMap<("en", "map_name", map)>
452  }
453
454  #[ignore]
455  #[test]
456  fn test_only_includes_en() {
457    let res = new_resources()
458      .with_include_languages(["zh", "en"])
459      // .with_include_map_names(["hi.tmpl"])
460      .with_exclude_map_names(["hi.tmpl", "test", "unread.tmpl"])
461      .with_exclude_languages(["zh"]);
462    let map = res.get_or_init_data();
463    // println!("{map:?}")
464    dbg!(map);
465  }
466
467  #[ignore]
468  #[test]
469  fn test_only_includes_de_and_und() {
470    let res = new_resources()
471      .with_include_languages(["de", "und", "es"])
472      .with_exclude_languages(["es"]);
473    let map = res.get_or_init_data();
474    // println!("{map:?}")
475    dbg!(map);
476  }
477}