Skip to main content

rosetta_date/i18n/
mod.rs

1//! Internationalization module: language data structures and registry.
2//!
3//! Language data is compiled into the binary for maximum performance.
4//! Each language is gated behind a feature flag (e.g. `lang-en`, `lang-zh`).
5
6#[cfg(feature = "lang-de")]
7pub mod de;
8#[cfg(feature = "lang-en")]
9pub mod en;
10#[cfg(feature = "lang-es")]
11pub mod es;
12#[cfg(feature = "lang-fr")]
13pub mod fr;
14#[cfg(feature = "lang-it")]
15pub mod it;
16#[cfg(feature = "lang-ja")]
17pub mod ja;
18#[cfg(feature = "lang-ko")]
19pub mod ko;
20#[cfg(feature = "lang-pt")]
21pub mod pt;
22#[cfg(feature = "lang-ru")]
23pub mod ru;
24#[cfg(feature = "lang-th")]
25pub mod th;
26#[cfg(feature = "lang-tr")]
27pub mod tr;
28#[cfg(feature = "lang-zh")]
29pub mod zh;
30
31/// Represents a time unit for relative time expressions.
32#[derive(Debug, Clone, Copy, PartialEq, Eq)]
33pub enum TimeUnit {
34    Second,
35    Minute,
36    Hour,
37    Day,
38    Week,
39    Month,
40    Year,
41}
42
43/// Direction of a relative time expression.
44#[derive(Debug, Clone, Copy, PartialEq, Eq)]
45pub enum Direction {
46    /// Indicates past ("ago", "前")
47    Past,
48    /// Indicates future ("from now", "后", "in")
49    Future,
50}
51
52/// Special standalone time words that map to a fixed offset from "now".
53#[derive(Debug, Clone, Copy, PartialEq, Eq)]
54pub enum SpecialWord {
55    Now,
56    Yesterday,
57    Today,
58    Tomorrow,
59    DayBeforeYesterday,
60    DayAfterTomorrow,
61}
62
63/// Preferred date component order for a language/locale.
64#[derive(Debug, Clone, Copy, PartialEq, Eq)]
65pub enum DateOrder {
66    /// Year-Month-Day (ISO, Chinese, Japanese, Korean)
67    YMD,
68    /// Month-Day-Year (US English)
69    MDY,
70    /// Day-Month-Year (European, most of world)
71    DMY,
72}
73
74/// Language-specific data for parsing and formatting dates.
75///
76/// All string data is `&'static` and compiled directly into the binary.
77#[derive(Debug)]
78pub struct LanguageData {
79    /// ISO 639-1 language code (e.g. "en", "zh")
80    pub code: &'static str,
81    /// Human-readable language name
82    pub name: &'static str,
83
84    // -- Month names (index 0 = January) --
85    pub months_long: &'static [&'static str; 12],
86    pub months_short: &'static [&'static str; 12],
87
88    // -- Weekday names (index 0 = Monday) --
89    pub weekdays_long: &'static [&'static str; 7],
90    pub weekdays_short: &'static [&'static str; 7],
91
92    // -- AM / PM indicators --
93    pub am_indicators: &'static [&'static str],
94    pub pm_indicators: &'static [&'static str],
95
96    // -- Relative time direction words --
97    /// Words that appear *after* `<n> <unit>` to indicate the past (e.g. "ago")
98    pub ago_words: &'static [&'static str],
99    /// Words that appear *after* `<n> <unit>` to indicate the future (e.g. "from now", "later")
100    pub future_words: &'static [&'static str],
101    /// Prefixes that appear *before* `<n> <unit>` to indicate the future (e.g. "in")
102    pub future_prefix: &'static [&'static str],
103    /// Prefixes that appear *before* `<n> <unit>` to indicate the past
104    /// (e.g. "hace" in Spanish, "il y a" in French, "vor" in German)
105    pub past_prefix: &'static [&'static str],
106
107    // -- Special standalone words --
108    pub special_words: &'static [(SpecialWord, &'static [&'static str])],
109
110    // -- Relative modifiers --
111    pub last_words: &'static [&'static str],
112    pub next_words: &'static [&'static str],
113    pub this_words: &'static [&'static str],
114
115    // -- Time unit keywords --
116    pub time_units: &'static [(TimeUnit, &'static [&'static str])],
117
118    // -- Number words (maps word → value) --
119    pub number_words: &'static [(&'static str, i64)],
120
121    // -- Default date component order --
122    pub date_order: DateOrder,
123}
124
125/// Returns all available languages based on enabled feature flags.
126#[allow(clippy::vec_init_then_push)]
127pub fn available_languages() -> Vec<&'static LanguageData> {
128    let mut langs = Vec::new();
129    #[cfg(feature = "lang-en")]
130    langs.push(&en::ENGLISH);
131    #[cfg(feature = "lang-zh")]
132    langs.push(&zh::CHINESE);
133    #[cfg(feature = "lang-es")]
134    langs.push(&es::SPANISH);
135    #[cfg(feature = "lang-fr")]
136    langs.push(&fr::FRENCH);
137    #[cfg(feature = "lang-de")]
138    langs.push(&de::GERMAN);
139    #[cfg(feature = "lang-it")]
140    langs.push(&it::ITALIAN);
141    #[cfg(feature = "lang-ja")]
142    langs.push(&ja::JAPANESE);
143    #[cfg(feature = "lang-ru")]
144    langs.push(&ru::RUSSIAN);
145    #[cfg(feature = "lang-tr")]
146    langs.push(&tr::TURKISH);
147    #[cfg(feature = "lang-th")]
148    langs.push(&th::THAI);
149    #[cfg(feature = "lang-pt")]
150    langs.push(&pt::PORTUGUESE);
151    #[cfg(feature = "lang-ko")]
152    langs.push(&ko::KOREAN);
153    langs
154}
155
156/// Attempt to detect the language of a date string by scoring keyword matches.
157pub fn detect_language(input_lower: &str) -> Vec<&'static LanguageData> {
158    let langs = available_languages();
159
160    let mut scored_langs = Vec::new();
161
162    for lang in &langs {
163        let mut score = 0usize;
164
165        // Score month name matches
166        for name in lang.months_long.iter().chain(lang.months_short.iter()) {
167            if !name.is_empty() && input_lower.contains(*name) {
168                score += 3;
169            }
170        }
171
172        // Score weekday name matches
173        for name in lang.weekdays_long.iter().chain(lang.weekdays_short.iter()) {
174            if !name.is_empty() && input_lower.contains(*name) {
175                score += 2;
176            }
177        }
178
179        // Score relative/special keyword matches
180        for word in lang
181            .ago_words
182            .iter()
183            .chain(lang.future_words.iter())
184            .chain(lang.future_prefix.iter())
185            .chain(lang.past_prefix.iter())
186            .chain(lang.last_words.iter())
187            .chain(lang.next_words.iter())
188        {
189            if !word.is_empty() && input_lower.contains(*word) {
190                score += 2;
191            }
192        }
193
194        for &(_, words) in lang.special_words {
195            for word in words {
196                if !word.is_empty() && input_lower.contains(*word) {
197                    score += 3;
198                }
199            }
200        }
201
202        // Score time unit keyword matches
203        for &(_, keywords) in lang.time_units {
204            for kw in keywords {
205                if !kw.is_empty() && input_lower.contains(*kw) {
206                    score += 1;
207                }
208            }
209        }
210
211        if score > 0 {
212            scored_langs.push((*lang, score));
213        }
214    }
215
216    scored_langs.sort_by_key(|&(_, score)| std::cmp::Reverse(score));
217    scored_langs.into_iter().map(|(l, _)| l).collect()
218}