Skip to main content

lipilekhika/
lib.rs

1use crate::script_data::ScriptData;
2pub use crate::script_data::{
3  ScriptListData, get_all_options, get_normalized_script_name, get_script_list_data,
4};
5use crate::transliterate::transliterate_text;
6pub use crate::typing::{
7  KramaDataItem, ListType, ScriptTypingDataMap, TypingDataMapItem, get_script_krama_data,
8  get_script_typing_data_map,
9};
10use std::collections::HashMap;
11
12mod script_data;
13mod transliterate;
14mod utils;
15
16// will be publically exported
17pub mod typing;
18
19/// Transliterates `text` from `from` to `to`.
20///
21/// - `from` / `to` can be script or language names/aliases
22/// - `trans_options` are the custom transliteration options
23///
24/// Returns the transliterated text, or an error string if script names are invalid.
25pub fn transliterate(
26  text: &str,
27  from: &str,
28  to: &str,
29  trans_options: Option<&HashMap<String, bool>>,
30) -> Result<String, String> {
31  let normalized_from =
32    get_normalized_script_name(from).ok_or_else(|| format!("Invalid script name: {}", from))?;
33  let normalized_to =
34    get_normalized_script_name(to).ok_or_else(|| format!("Invalid script name: {}", to))?;
35
36  if normalized_from == normalized_to {
37    return Ok(text.to_string());
38  }
39
40  let result = transliterate_text(
41    text,
42    &normalized_from,
43    &normalized_to,
44    trans_options,
45    None,
46  )?;
47
48  Ok(result.output)
49}
50
51/// Returns the schwa deletion characteristic of the script provided.
52pub fn get_schwa_status_for_script(script_name: &str) -> Result<Option<bool>, String> {
53  let normalized_script_name = get_normalized_script_name(script_name)
54    .ok_or_else(|| format!("Invalid script name: {}", script_name))?;
55  let script_data = ScriptData::get_script_data(&normalized_script_name);
56  match script_data {
57    ScriptData::Brahmic { schwa_property, .. } => Ok(Some(*schwa_property)),
58    ScriptData::Other { .. } => Ok(None),
59  }
60}
61
62/// Preload script data for a normalized script, alias, or language name.
63pub fn preload_script_data(script_name: &str) {
64  if let Some(normalized_script_name) = get_normalized_script_name(script_name) {
65    ScriptData::get_script_data(&normalized_script_name);
66  }
67}
68
69#[cfg(test)]
70mod tests {
71  use super::*;
72
73  use owo_colors::OwoColorize;
74  use serde::Deserialize;
75  use std::fs;
76  use std::io::Write;
77  use std::path::{Path, PathBuf};
78  use std::time::Instant;
79
80  fn de_index<'de, D>(deserializer: D) -> Result<String, D::Error>
81  where
82    D: serde::Deserializer<'de>,
83  {
84    struct IndexVisitor;
85
86    impl serde::de::Visitor<'_> for IndexVisitor {
87      type Value = String;
88
89      fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result {
90        formatter.write_str("a yaml index (number or string)")
91      }
92
93      fn visit_i64<E>(self, v: i64) -> Result<Self::Value, E>
94      where
95        E: serde::de::Error,
96      {
97        Ok(v.to_string())
98      }
99
100      fn visit_u64<E>(self, v: u64) -> Result<Self::Value, E>
101      where
102        E: serde::de::Error,
103      {
104        Ok(v.to_string())
105      }
106
107      fn visit_f64<E>(self, v: f64) -> Result<Self::Value, E>
108      where
109        E: serde::de::Error,
110      {
111        Ok(v.to_string())
112      }
113
114      fn visit_str<E>(self, v: &str) -> Result<Self::Value, E>
115      where
116        E: serde::de::Error,
117      {
118        Ok(v.to_string())
119      }
120
121      fn visit_string<E>(self, v: String) -> Result<Self::Value, E>
122      where
123        E: serde::de::Error,
124      {
125        Ok(v)
126      }
127    }
128
129    deserializer.deserialize_any(IndexVisitor)
130  }
131
132  #[derive(Debug, Deserialize)]
133  struct TransliterationTestCase {
134    #[serde(deserialize_with = "de_index")]
135    index: String,
136    from: String,
137    to: String,
138    input: String,
139    output: String,
140    #[serde(default)]
141    options: Option<HashMap<String, bool>>,
142    #[serde(default)]
143    reversible: Option<bool>,
144    #[serde(default)]
145    todo: Option<bool>,
146  }
147
148  fn list_yaml_files(dir: &Path, out: &mut Vec<PathBuf>) -> std::io::Result<()> {
149    for entry in fs::read_dir(dir)? {
150      let entry = entry?;
151      let path = entry.path();
152      if path.is_dir() {
153        list_yaml_files(&path, out)?;
154      } else if path.extension().is_some_and(|e| e == "yaml") {
155        out.push(path);
156      }
157    }
158    Ok(())
159  }
160
161  fn test_data_root() -> PathBuf {
162    // `packages/rust` -> `../../test_data/transliteration`
163    let manifest_dir = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
164    manifest_dir
165      .join("..")
166      .join("..")
167      .join("test_data")
168      .join("transliteration")
169  }
170
171  fn contains_vedic_svara(s: &str) -> bool {
172    const VEDIC_SVARAS: [&str; 4] = ["॒", "॑", "᳚", "᳛"];
173    VEDIC_SVARAS.iter().any(|sv| s.contains(sv))
174  }
175
176  #[derive(Default)]
177  struct FileStats {
178    total_cases: usize,
179    todo_cases: usize,
180    auto_vedic_skipped: usize,
181    forward_passed: usize,
182    reverse_passed: usize,
183    forward_asserts: usize,
184    reverse_asserts: usize,
185    failures_total: usize,
186  }
187
188  #[derive(Debug)]
189  enum FailureKind {
190    ForwardError,
191    ForwardMismatch,
192    ReverseError,
193    ReverseMismatch,
194  }
195
196  #[derive(Debug)]
197  struct Failure {
198    file: String,
199    index: String,
200    from: String,
201    to: String,
202    kind: FailureKind,
203    input: String,
204    expected: Option<String>,
205    actual: Option<String>,
206    error: Option<String>,
207  }
208
209  const MAX_FAILURES_TO_STORE: usize = 400;
210  const SUMMARY_LABEL_WIDTH: usize = 12;
211
212  fn push_failure(failures: &mut Vec<Failure>, failure: Failure) {
213    if failures.len() < MAX_FAILURES_TO_STORE {
214      failures.push(failure);
215    }
216  }
217
218  // fn plural(n: usize, singular: &'static str, plural: &'static str) -> &'static str {
219  //     if n == 1 { singular } else { plural }
220  // }
221
222  fn print_summary_line(label: &str, value: String) {
223    println!(
224      "  {:<width$} {}",
225      label.bold(),
226      value,
227      width = SUMMARY_LABEL_WIDTH
228    );
229  }
230
231  fn run_yaml_file(file_path: &Path, root: &Path) -> (FileStats, Vec<Failure>) {
232    let file_name = file_path
233      .file_name()
234      .and_then(|n| n.to_str())
235      .unwrap_or("<unknown>");
236    let rel = file_path.strip_prefix(root).unwrap_or(file_path);
237    let rel_s = rel.display().to_string();
238
239    let yaml_text = fs::read_to_string(file_path)
240      .unwrap_or_else(|e| panic!("Failed reading YAML file `{}`: {e}", rel.display()));
241    let cases: Vec<TransliterationTestCase> = serde_yaml_ng::from_str(&yaml_text)
242      .unwrap_or_else(|e| panic!("Failed parsing YAML file `{}`: {e}", rel.display()));
243
244    let mut stats = FileStats::default();
245    let mut failures: Vec<Failure> = Vec::new();
246
247    for case in cases {
248      stats.total_cases += 1;
249      if case.todo.unwrap_or(false) {
250        stats.todo_cases += 1;
251        continue;
252      }
253
254      let result = transliterate(&case.input, &case.from, &case.to, case.options.as_ref());
255
256      let result = match result {
257        Ok(r) => r,
258        Err(e) => {
259          stats.forward_asserts += 1;
260          stats.failures_total += 1;
261          push_failure(
262            &mut failures,
263            Failure {
264              file: rel_s.clone(),
265              index: case.index.clone(),
266              from: case.from.clone(),
267              to: case.to.clone(),
268              kind: FailureKind::ForwardError,
269              input: case.input.clone(),
270              expected: Some(case.output.clone()),
271              actual: None,
272              error: Some(e),
273            },
274          );
275          continue;
276        }
277      };
278
279      if file_name.starts_with("auto")
280        && case.to == "Tamil-Extended"
281        && contains_vedic_svara(&result)
282      {
283        stats.auto_vedic_skipped += 1;
284        continue;
285      }
286
287      stats.forward_asserts += 1;
288      if result == case.output {
289        stats.forward_passed += 1;
290      } else {
291        stats.failures_total += 1;
292        push_failure(
293          &mut failures,
294          Failure {
295            file: rel_s.clone(),
296            index: case.index.clone(),
297            from: case.from.clone(),
298            to: case.to.clone(),
299            kind: FailureKind::ForwardMismatch,
300            input: case.input.clone(),
301            expected: Some(case.output.clone()),
302            actual: Some(result.clone()),
303            error: None,
304          },
305        );
306      }
307
308      if case.reversible.unwrap_or(false) {
309        stats.reverse_asserts += 1;
310        let reversed = transliterate(&result, &case.to, &case.from, case.options.as_ref());
311
312        match reversed {
313          Ok(rev) => {
314            if rev == case.input {
315              stats.reverse_passed += 1;
316            } else {
317              stats.failures_total += 1;
318              push_failure(
319                &mut failures,
320                Failure {
321                  file: rel_s.clone(),
322                  index: case.index.clone(),
323                  from: case.to.clone(),
324                  to: case.from.clone(),
325                  kind: FailureKind::ReverseMismatch,
326                  input: result.clone(),
327                  expected: Some(case.input.clone()),
328                  actual: Some(rev),
329                  error: None,
330                },
331              );
332            }
333          }
334          Err(e) => {
335            stats.failures_total += 1;
336            push_failure(
337              &mut failures,
338              Failure {
339                file: rel_s.clone(),
340                index: case.index.clone(),
341                from: case.to.clone(),
342                to: case.from.clone(),
343                kind: FailureKind::ReverseError,
344                input: result.clone(),
345                expected: Some(case.input.clone()),
346                actual: None,
347                error: Some(e),
348              },
349            );
350          }
351        }
352      }
353    }
354
355    (stats, failures)
356  }
357
358  #[test]
359  fn transliteration_yaml_test_data() {
360    let started = Instant::now();
361    let root = test_data_root();
362    let mut files: Vec<PathBuf> = Vec::new();
363    list_yaml_files(&root, &mut files)
364      .unwrap_or_else(|e| panic!("Failed listing YAML files in `{}`: {e}", root.display()));
365    files.sort();
366
367    let file_count = files.len();
368
369    assert!(
370      !files.is_empty(),
371      "No YAML test files found in `{}`",
372      root.display()
373    );
374
375    let mut overall = FileStats::default();
376    let mut overall_failures: Vec<Failure> = Vec::new();
377    let mut failed_files: Vec<(String, FileStats)> = Vec::new();
378
379    for file in files {
380      let rel_s = file
381        .strip_prefix(&root)
382        .unwrap_or(&file)
383        .display()
384        .to_string();
385      let (stats, failures) = run_yaml_file(&file, &root);
386      overall.total_cases += stats.total_cases;
387      overall.todo_cases += stats.todo_cases;
388      overall.auto_vedic_skipped += stats.auto_vedic_skipped;
389      overall.forward_passed += stats.forward_passed;
390      overall.reverse_passed += stats.reverse_passed;
391      overall.forward_asserts += stats.forward_asserts;
392      overall.reverse_asserts += stats.reverse_asserts;
393      overall.failures_total += stats.failures_total;
394
395      if stats.failures_total == 0 {
396        print!("{}", ".".green());
397      } else {
398        print!("{}", "F".red().bold());
399        failed_files.push((rel_s, stats));
400      }
401      let _ = std::io::stdout().flush();
402
403      for f in failures {
404        push_failure(&mut overall_failures, f);
405      }
406    }
407
408    println!();
409
410    let total_asserts = overall.forward_asserts + overall.reverse_asserts;
411    let total_passed = overall.forward_passed + overall.reverse_passed;
412    let total_skipped = overall.todo_cases + overall.auto_vedic_skipped;
413    let failed_file_count = failed_files.len();
414    let passed_file_count = file_count.saturating_sub(failed_file_count);
415
416    if !failed_files.is_empty() {
417      println!();
418      for (file_rel, stats) in &failed_files {
419        let total_asserts = stats.forward_asserts + stats.reverse_asserts;
420        let total_passed = stats.forward_passed + stats.reverse_passed;
421        let skipped = stats.todo_cases + stats.auto_vedic_skipped;
422        println!(
423          "{} {}  {} = {}/{}  {} = {}  {} = {}",
424          "FAIL".red().bold(),
425          file_rel.dimmed(),
426          "tests".bold(),
427          total_passed,
428          total_asserts,
429          "failures".bold(),
430          stats.failures_total.to_string().red(),
431          "skipped".bold(),
432          skipped.to_string().yellow()
433        );
434      }
435    }
436    {
437      println!();
438      println!("{}", "Transliteration".bold());
439
440      let test_files_value = if failed_file_count == 0 {
441        format!(
442          "{} ({})",
443          format!("{} passed", file_count).green(),
444          file_count
445        )
446      } else {
447        format!(
448          "{} ({}), {} ({})",
449          format!("{} passed", passed_file_count).green(),
450          file_count,
451          format!("{} failed", failed_file_count).red(),
452          file_count
453        )
454      };
455      print_summary_line("Test Files", test_files_value);
456
457      let tests_value = if overall.failures_total == 0 {
458        format!(
459          "{} ({})",
460          format!("{} passed", total_passed).green(),
461          total_asserts
462        )
463      } else {
464        format!(
465          "{} ({}), {} ({})",
466          format!("{} passed", total_passed).green(),
467          total_asserts,
468          format!("{} failed", overall.failures_total).red(),
469          total_asserts
470        )
471      };
472      print_summary_line("Tests", tests_value);
473
474      if total_skipped > 0 {
475        print_summary_line(
476          "Skipped",
477          format!(
478            "{} (todo: {}, auto-vedic: {})",
479            total_skipped.to_string().yellow(),
480            overall.todo_cases.to_string().yellow(),
481            overall.auto_vedic_skipped.to_string().yellow()
482          ),
483        );
484      }
485
486      print_summary_line(
487        "Duration",
488        format!("{:.2?}", started.elapsed()).dimmed().to_string(),
489      );
490    }
491
492    // Always write a one-line summary to a log file so it's visible even when tests succeed.
493    let summary = {
494      let total_asserts = overall.forward_asserts + overall.reverse_asserts;
495      let total_passed = overall.forward_passed + overall.reverse_passed;
496      let total_skipped = overall.todo_cases + overall.auto_vedic_skipped;
497      format!(
498        "Transliteration: files_total={}, files_passed={}, files_failed={}, tests_total={}, tests_passed={}, tests_failed={}, tests_skipped={}",
499        file_count,
500        passed_file_count,
501        failed_file_count,
502        total_asserts,
503        total_passed,
504        overall.failures_total,
505        total_skipped
506      )
507    };
508
509    let _ = std::fs::create_dir_all("test_log");
510    if let Ok(mut file) = std::fs::OpenOptions::new()
511      .create(true)
512      .write(true)
513      .truncate(true)
514      .open("test_log/transliteration_summary.txt")
515    {
516      let _ = writeln!(file, "{}", summary);
517    }
518
519    if overall.failures_total > 0 {
520      let mut msg = String::new();
521      msg.push_str(&format!(
522        "Transliteration had {} failing assertions (showing up to {}).\n",
523        overall.failures_total, MAX_FAILURES_TO_STORE
524      ));
525
526      for (i, f) in overall_failures.iter().enumerate() {
527        msg.push_str(&format!("\n{}. File: {}\n", i + 1, f.file));
528        msg.push_str(&format!("   Index: {}\n", f.index));
529
530        match f.kind {
531          FailureKind::ForwardMismatch => {
532            msg.push_str("   Transliteration failed:\n");
533            msg.push_str(&format!("     From: {}\n", f.from));
534            msg.push_str(&format!("     To: {}\n", f.to));
535            msg.push_str(&format!("     Input: \"{}\"\n", f.input));
536            if let (Some(expected), Some(actual)) = (&f.expected, &f.actual) {
537              msg.push_str(&format!("     Expected: \"{}\"\n", expected));
538              msg.push_str(&format!("     Actual: \"{}\"\n", actual));
539            }
540          }
541          FailureKind::ReverseMismatch => {
542            msg.push_str("   Reversed Transliteration failed:\n");
543            msg.push_str(&format!("     From: {}\n", f.from));
544            msg.push_str(&format!("     To: {}\n", f.to));
545            msg.push_str(&format!("     Input: \"{}\"\n", f.input));
546            if let (Some(expected), Some(actual)) = (&f.expected, &f.actual) {
547              msg.push_str(&format!("     Original Input: \"{}\"\n", expected));
548              msg.push_str(&format!("     Reversed Output: \"{}\"\n", actual));
549            }
550          }
551          FailureKind::ForwardError => {
552            msg.push_str("   Transliteration error:\n");
553            msg.push_str(&format!("     From: {}\n", f.from));
554            msg.push_str(&format!("     To: {}\n", f.to));
555            msg.push_str(&format!("     Input: \"{}\"\n", f.input));
556            if let Some(error) = &f.error {
557              msg.push_str(&format!("     Error: {}\n", error));
558            }
559          }
560          FailureKind::ReverseError => {
561            msg.push_str("   Reverse transliteration error:\n");
562            msg.push_str(&format!("     From: {}\n", f.from));
563            msg.push_str(&format!("     To: {}\n", f.to));
564            msg.push_str(&format!("     Input: \"{}\"\n", f.input));
565            if let Some(error) = &f.error {
566              msg.push_str(&format!("     Error: {}\n", error));
567            }
568          }
569        }
570      }
571
572      use std::fs::OpenOptions;
573      use std::io::Write;
574
575      let _ = std::fs::create_dir_all("test_log");
576      if let Ok(mut file) = OpenOptions::new()
577        .create(true)
578        .append(false)
579        .write(true)
580        .truncate(true)
581        .open("test_log/transliteration.txt")
582      {
583        let _ = file.write_all(msg.as_bytes());
584      }
585      panic!("failed");
586    }
587  }
588}