langcodec/
types.rs

1//! Core, format-agnostic types for langcodec.
2//! Parsers decode into these; encoders serialize these.
3
4use std::{
5    collections::{BTreeMap, HashMap},
6    fmt::Display,
7    str::FromStr,
8};
9
10use lazy_static::lazy_static;
11use regex::Regex;
12use serde::{Deserialize, Serialize};
13use unic_langid::LanguageIdentifier;
14
15use crate::{error::Error, traits::Parser};
16
17// Static regex patterns for HTML tag removal
18lazy_static! {
19    static ref HTML_TAG_REGEX: Regex = Regex::new(r"<[^>]+>").unwrap();
20    static ref HTML_CLOSE_TAG_REGEX: Regex = Regex::new(r"</[^>]+>").unwrap();
21}
22
23impl Parser for Vec<Resource> {
24    /// Parse from any reader.
25    fn from_reader<R: std::io::BufRead>(reader: R) -> Result<Self, Error> {
26        serde_json::from_reader(reader).map_err(Error::Parse)
27    }
28
29    /// Write to any writer (file, memory, etc.).
30    fn to_writer<W: std::io::Write>(&self, mut writer: W) -> Result<(), Error> {
31        serde_json::to_writer(&mut writer, self).map_err(Error::Parse)
32    }
33}
34
35/// A complete localization resource (corresponds to a `.strings`, `.xml`, `.xcstrings`, etc. file).
36/// Contains metadata and all entries for a single language and domain.
37#[derive(Debug, Clone, PartialEq, Eq, Deserialize, Serialize)]
38pub struct Resource {
39    /// Optional header-level metadata (language code, domain/project, etc.).
40    pub metadata: Metadata,
41
42    /// Ordered list of all entries in this resource.
43    #[serde(skip_serializing_if = "Vec::is_empty")]
44    #[serde(default)]
45    pub entries: Vec<Entry>,
46}
47
48impl Resource {
49    pub(crate) fn add_entry(&mut self, entry: Entry) {
50        self.entries.push(entry);
51    }
52
53    pub fn parse_language_identifier(&self) -> Option<LanguageIdentifier> {
54        self.metadata.language.parse().ok()
55    }
56
57    /// Check if this resource has a specific language.
58    pub fn has_language(&self, lang: &str) -> bool {
59        match (
60            self.parse_language_identifier(),
61            lang.parse::<LanguageIdentifier>(),
62        ) {
63            (Some(lang_id), Ok(target_lang)) => lang_id.language == target_lang.language,
64            _ => false,
65        }
66    }
67}
68
69/// Free-form metadata for the resource as a whole.
70///
71/// `language` and `domain` are standard; any extra fields can be placed in `custom`.
72#[derive(Debug, Clone, PartialEq, Eq, Deserialize, Serialize)]
73pub struct Metadata {
74    /// The language code (e.g. "en", "fr", "es", etc.).
75    pub language: String,
76
77    /// The domain or project name (e.g. "MyApp").
78    #[serde(skip_serializing_if = "String::is_empty")]
79    #[serde(default)]
80    pub domain: String,
81
82    /// Any other metadata fields not covered by the above.
83    pub custom: HashMap<String, String>,
84}
85
86impl Display for Metadata {
87    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
88        let mut map_all = self.custom.clone();
89        map_all.insert("language".to_string(), self.language.clone());
90        map_all.insert("domain".to_string(), self.domain.clone());
91        write!(
92            f,
93            "Metadata {{ {} }}",
94            map_all
95                .iter()
96                .map(|(k, v)| format!("{}: {}", k, v))
97                .collect::<Vec<_>>()
98                .join(", ")
99        )
100    }
101}
102
103/// A single message/translation entry.
104#[derive(Debug, Clone, PartialEq, Eq, Deserialize, Serialize)]
105pub struct Entry {
106    /// Unique message identifier (key).  
107    /// For PO/XLIFF this is `msgid` or `<trans-unit>@id`; for .strings it’s the key.
108    pub id: String,
109
110    /// Translation context corresponding to this message.
111    pub value: Translation,
112
113    /// Optional comment for translators.
114    #[serde(skip_serializing_if = "Option::is_none")]
115    #[serde(default)]
116    pub comment: Option<String>,
117
118    /// Entry translation status.
119    pub status: EntryStatus,
120
121    /// Any additional, format-specific data attached to this entry.
122    #[serde(skip_serializing_if = "HashMap::is_empty")]
123    #[serde(default)]
124    pub custom: HashMap<String, String>,
125}
126
127impl Display for Entry {
128    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
129        write!(
130            f,
131            "Entry {{ id: {}, value: {}, status: {:?} }}",
132            self.id, self.value, self.status
133        )
134    }
135}
136
137#[derive(Debug, Clone, PartialEq, Eq, Deserialize, Serialize)]
138pub enum Translation {
139    /// A single translation without plural forms.
140    Singular(String),
141
142    /// A translation with plural forms.
143    Plural(Plural),
144}
145
146impl Translation {
147    pub fn plain_translation(translation: Translation) -> Translation {
148        match translation {
149            Translation::Singular(value) => {
150                Translation::Singular(make_plain_translation_string(value))
151            }
152            Translation::Plural(plural) => {
153                // Return the first plural form as a singular translation
154                let id = plural.id;
155                let forms = plural.forms.into_iter().next().map_or_else(
156                    BTreeMap::new,
157                    |(category, value)| {
158                        let mut map = BTreeMap::new();
159                        map.insert(category, make_plain_translation_string(value));
160                        map
161                    },
162                );
163                Translation::Plural(Plural { id, forms })
164            }
165        }
166    }
167
168    pub fn plain_translation_string(&self) -> String {
169        match self {
170            Translation::Singular(value) => make_plain_translation_string(value.clone()),
171            Translation::Plural(plural) => {
172                // Return the plural ID, not the first form
173                plural.id.clone()
174            }
175        }
176    }
177}
178
179impl Display for Translation {
180    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
181        match self {
182            Translation::Singular(value) => write!(f, "{}", value),
183            Translation::Plural(plural) => write!(f, "{}", plural.id), // Displaying only the ID for brevity
184        }
185    }
186}
187
188/// All plural forms for a single message.
189#[derive(Debug, Clone, PartialEq, Eq, Deserialize, Serialize)]
190pub struct Plural {
191    /// The canonical plural ID (`msgid_plural` in PO).
192    pub id: String,
193
194    /// Map from category → translation.  
195    /// Categories depend on the target locale’s rules.
196    #[serde(skip_serializing_if = "BTreeMap::is_empty")]
197    #[serde(default)]
198    pub forms: BTreeMap<PluralCategory, String>,
199}
200
201impl Plural {
202    pub(crate) fn new(
203        id: &str,
204        forms: impl Iterator<Item = (PluralCategory, String)>,
205    ) -> Option<Self> {
206        let forms: BTreeMap<PluralCategory, String> = forms.collect();
207
208        if forms.is_empty() {
209            None // No plural forms provided
210        } else {
211            Some(Self {
212                id: id.to_string(),
213                forms,
214            })
215        }
216    }
217}
218
219/// Standard CLDR plural forms.
220#[derive(Ord, PartialOrd, Eq, PartialEq, Debug, Clone, Deserialize, Serialize)]
221#[serde(rename_all = "snake_case")]
222#[derive(Hash)]
223pub enum PluralCategory {
224    Zero,
225    One,
226    Two,
227    Few,
228    Many,
229    Other,
230}
231
232impl FromStr for PluralCategory {
233    type Err = String;
234
235    fn from_str(s: &str) -> Result<Self, Self::Err> {
236        match s.to_uppercase().as_str() {
237            "ZERO" => Ok(PluralCategory::Zero),
238            "ONE" => Ok(PluralCategory::One),
239            "TWO" => Ok(PluralCategory::Two),
240            "FEW" => Ok(PluralCategory::Few),
241            "MANY" => Ok(PluralCategory::Many),
242            "OTHER" => Ok(PluralCategory::Other),
243            _ => Err(format!("Unknown plural category: {}", s)),
244        }
245    }
246}
247
248/// Status of a translation entry.
249#[derive(Debug, Clone, PartialEq, Eq, Deserialize, Serialize)]
250#[serde(rename_all = "snake_case")]
251pub enum EntryStatus {
252    /// The entry is not translated and should not be.
253    DoNotTranslate,
254
255    /// The entry is new and has not been translated yet.
256    New,
257
258    /// The entry is outdated.
259    Stale,
260
261    /// The entry has been modified and needs review.
262    NeedsReview,
263
264    /// The entry is translated and reviewed.
265    Translated,
266}
267
268impl FromStr for EntryStatus {
269    type Err = String;
270
271    fn from_str(s: &str) -> Result<Self, Self::Err> {
272        match s.to_uppercase().as_str() {
273            "DO_NOT_TRANSLATE" => Ok(EntryStatus::DoNotTranslate),
274            "NEW" => Ok(EntryStatus::New),
275            "STALE" => Ok(EntryStatus::Stale),
276            "NEEDS_REVIEW" => Ok(EntryStatus::NeedsReview),
277            "TRANSLATED" => Ok(EntryStatus::Translated),
278            _ => Err(format!("Unknown entry status: {}", s)),
279        }
280    }
281}
282
283/// Strategy for handling conflicts when merging resources.
284#[derive(Debug, Clone, PartialEq, Eq)]
285pub enum ConflictStrategy {
286    /// Keep the first occurrence of a key
287    First,
288    /// Keep the last occurrence of a key (default)
289    Last,
290    /// Skip conflicting entries
291    Skip,
292}
293
294// Remove HTML tags from translation string.
295fn make_plain_translation_string(translation: String) -> String {
296    let mut translation = translation;
297    translation = translation.trim().to_string();
298
299    // Remove all HTML tags (non-greedy)
300    translation = HTML_TAG_REGEX.replace_all(&translation, "").to_string();
301
302    // Remove all closing tags like </font>
303    translation = HTML_CLOSE_TAG_REGEX
304        .replace_all(&translation, "")
305        .to_string();
306
307    // Replace any newline characters with explicit "\n" for better formatting,
308    translation = translation
309        .lines()
310        .map(str::trim_start)
311        .collect::<Vec<_>>()
312        .join(r"\n"); // Use r"\n" for a literal \n
313
314    translation
315}
316
317#[cfg(test)]
318mod tests {
319    use super::*;
320    use std::collections::HashMap;
321
322    #[test]
323    fn test_resource_add_entry() {
324        let mut resource = Resource {
325            metadata: Metadata {
326                language: "en".to_string(),
327                domain: "test".to_string(),
328                custom: HashMap::new(),
329            },
330            entries: Vec::new(),
331        };
332
333        let entry = Entry {
334            id: "hello".to_string(),
335            value: Translation::Singular("Hello".to_string()),
336            comment: None,
337            status: EntryStatus::Translated,
338            custom: HashMap::new(),
339        };
340
341        resource.add_entry(entry);
342        assert_eq!(resource.entries.len(), 1);
343        assert_eq!(resource.entries[0].id, "hello");
344    }
345
346    #[test]
347    fn test_resource_parse_language_identifier() {
348        let resource = Resource {
349            metadata: Metadata {
350                language: "en-US".to_string(),
351                domain: "test".to_string(),
352                custom: HashMap::new(),
353            },
354            entries: Vec::new(),
355        };
356
357        let lang_id = resource.parse_language_identifier().unwrap();
358        assert_eq!(lang_id.language.as_str(), "en");
359        assert_eq!(lang_id.region.unwrap().as_str(), "US");
360    }
361
362    #[test]
363    fn test_resource_parse_invalid_language() {
364        let resource = Resource {
365            metadata: Metadata {
366                language: "not-a-language".to_string(),
367                domain: "test".to_string(),
368                custom: HashMap::new(),
369            },
370            entries: Vec::new(),
371        };
372
373        // This should fail because "not-a-language" is not a valid BCP 47 language identifier
374        assert!(resource.parse_language_identifier().is_none());
375    }
376
377    #[test]
378    fn test_resource_has_language() {
379        let resource = Resource {
380            metadata: Metadata {
381                language: "en-US".to_string(),
382                domain: "test".to_string(),
383                custom: HashMap::new(),
384            },
385            entries: Vec::new(),
386        };
387
388        assert!(resource.has_language("en"));
389        assert!(resource.has_language("en-US"));
390        assert!(!resource.has_language("fr"));
391    }
392
393    #[test]
394    fn test_metadata_display() {
395        let mut metadata = Metadata {
396            language: "en".to_string(),
397            domain: "test".to_string(),
398            custom: HashMap::new(),
399        };
400        metadata
401            .custom
402            .insert("version".to_string(), "1.0".to_string());
403
404        let display = format!("{}", metadata);
405        assert!(display.contains("language: en"));
406        assert!(display.contains("domain: test"));
407        assert!(display.contains("version: 1.0"));
408    }
409
410    #[test]
411    fn test_entry_display() {
412        let entry = Entry {
413            id: "hello".to_string(),
414            value: Translation::Singular("Hello".to_string()),
415            comment: Some("Greeting".to_string()),
416            status: EntryStatus::Translated,
417            custom: HashMap::new(),
418        };
419
420        let display = format!("{}", entry);
421        assert!(display.contains("hello"));
422        assert!(display.contains("Hello"));
423        // The display format might not include comments, so we'll just check the basic structure
424        assert!(!display.is_empty());
425    }
426
427    #[test]
428    fn test_translation_plain_translation() {
429        let singular = Translation::Singular("Hello".to_string());
430        let plain = Translation::plain_translation(singular);
431        assert!(matches!(plain, Translation::Singular(_)));
432    }
433
434    #[test]
435    fn test_translation_plain_translation_string() {
436        let singular = Translation::Singular("Hello".to_string());
437        assert_eq!(singular.plain_translation_string(), "Hello");
438
439        let plural = Translation::Plural(
440            Plural::new(
441                "apples",
442                vec![
443                    (PluralCategory::One, "1 apple".to_string()),
444                    (PluralCategory::Other, "%d apples".to_string()),
445                ]
446                .into_iter(),
447            )
448            .unwrap(),
449        );
450        // For plural translations, we return the plural ID, not the first form
451        assert_eq!(plural.plain_translation_string(), "apples");
452    }
453
454    #[test]
455    fn test_translation_display() {
456        let singular = Translation::Singular("Hello".to_string());
457        assert_eq!(format!("{}", singular), "Hello");
458
459        let plural = Translation::Plural(
460            Plural::new(
461                "apples",
462                vec![
463                    (PluralCategory::One, "1 apple".to_string()),
464                    (PluralCategory::Other, "%d apples".to_string()),
465                ]
466                .into_iter(),
467            )
468            .unwrap(),
469        );
470        assert!(format!("{}", plural).contains("apples"));
471    }
472
473    #[test]
474    fn test_plural_new() {
475        let forms = vec![
476            (PluralCategory::One, "1 apple".to_string()),
477            (PluralCategory::Other, "%d apples".to_string()),
478        ];
479
480        let plural = Plural::new("apples", forms.into_iter()).unwrap();
481        assert_eq!(plural.id, "apples");
482        assert_eq!(plural.forms.len(), 2);
483        assert_eq!(plural.forms.get(&PluralCategory::One).unwrap(), "1 apple");
484        assert_eq!(
485            plural.forms.get(&PluralCategory::Other).unwrap(),
486            "%d apples"
487        );
488    }
489
490    #[test]
491    fn test_plural_new_empty() {
492        let forms: Vec<(PluralCategory, String)> = vec![];
493        let plural = Plural::new("apples", forms.into_iter());
494        assert!(plural.is_none());
495    }
496
497    #[test]
498    fn test_plural_category_from_str() {
499        assert_eq!(
500            PluralCategory::from_str("zero").unwrap(),
501            PluralCategory::Zero
502        );
503        assert_eq!(
504            PluralCategory::from_str("one").unwrap(),
505            PluralCategory::One
506        );
507        assert_eq!(
508            PluralCategory::from_str("two").unwrap(),
509            PluralCategory::Two
510        );
511        assert_eq!(
512            PluralCategory::from_str("few").unwrap(),
513            PluralCategory::Few
514        );
515        assert_eq!(
516            PluralCategory::from_str("many").unwrap(),
517            PluralCategory::Many
518        );
519        assert_eq!(
520            PluralCategory::from_str("other").unwrap(),
521            PluralCategory::Other
522        );
523    }
524
525    #[test]
526    fn test_plural_category_from_str_invalid() {
527        assert!(PluralCategory::from_str("invalid").is_err());
528    }
529
530    #[test]
531    fn test_entry_status_from_str() {
532        assert_eq!(
533            EntryStatus::from_str("do_not_translate").unwrap(),
534            EntryStatus::DoNotTranslate
535        );
536        assert_eq!(EntryStatus::from_str("new").unwrap(), EntryStatus::New);
537        assert_eq!(EntryStatus::from_str("stale").unwrap(), EntryStatus::Stale);
538        assert_eq!(
539            EntryStatus::from_str("needs_review").unwrap(),
540            EntryStatus::NeedsReview
541        );
542        assert_eq!(
543            EntryStatus::from_str("translated").unwrap(),
544            EntryStatus::Translated
545        );
546    }
547
548    #[test]
549    fn test_entry_status_from_str_invalid() {
550        assert!(EntryStatus::from_str("invalid").is_err());
551    }
552
553    #[test]
554    fn test_make_plain_translation_string() {
555        let result = make_plain_translation_string("Hello".to_string());
556        assert_eq!(result, "Hello");
557
558        let result = make_plain_translation_string("Hello\nWorld".to_string());
559        assert_eq!(result, "Hello\\nWorld");
560    }
561
562    #[test]
563    fn test_resource_parser_trait() {
564        let resources = vec![Resource {
565            metadata: Metadata {
566                language: "en".to_string(),
567                domain: "test".to_string(),
568                custom: HashMap::new(),
569            },
570            entries: vec![],
571        }];
572
573        let mut writer = Vec::new();
574        resources.to_writer(&mut writer).unwrap();
575
576        let reader = std::io::Cursor::new(writer);
577        let parsed: Vec<Resource> = Vec::<Resource>::from_reader(reader).unwrap();
578        assert_eq!(parsed.len(), 1);
579        assert_eq!(parsed[0].metadata.language, "en");
580    }
581}