langcodec/
types.rs

1//! Core, format-agnostic types for langcodec.
2//! Parsers decode into these; encoders serialize these.
3
4use std::{
5    collections::{BTreeMap, HashMap},
6    fmt::Display,
7    str::FromStr,
8};
9
10use lazy_static::lazy_static;
11use regex::Regex;
12use serde::{Deserialize, Serialize};
13use unic_langid::LanguageIdentifier;
14
15use crate::{error::Error, traits::Parser};
16
17// Static regex patterns for HTML tag removal
18lazy_static! {
19    static ref HTML_TAG_REGEX: Regex = Regex::new(r"<[^>]+>").unwrap();
20    static ref HTML_CLOSE_TAG_REGEX: Regex = Regex::new(r"</[^>]+>").unwrap();
21}
22
23impl Parser for Vec<Resource> {
24    /// Parse from any reader.
25    fn from_reader<R: std::io::BufRead>(reader: R) -> Result<Self, Error> {
26        serde_json::from_reader(reader).map_err(Error::Parse)
27    }
28
29    /// Write to any writer (file, memory, etc.).
30    fn to_writer<W: std::io::Write>(&self, mut writer: W) -> Result<(), Error> {
31        serde_json::to_writer(&mut writer, self).map_err(Error::Parse)
32    }
33}
34
35/// A complete localization resource (corresponds to a `.strings`, `.xml`, `.xcstrings`, etc. file).
36/// Contains metadata and all entries for a single language and domain.
37#[derive(Debug, Clone, PartialEq, Eq, Deserialize, Serialize)]
38pub struct Resource {
39    /// Optional header-level metadata (language code, domain/project, etc.).
40    pub metadata: Metadata,
41
42    /// Ordered list of all entries in this resource.
43    #[serde(skip_serializing_if = "Vec::is_empty")]
44    #[serde(default)]
45    pub entries: Vec<Entry>,
46}
47
48impl Resource {
49    /// Add an entry to the resource.
50    ///
51    /// ```rust
52    /// use langcodec::types::{Resource, Entry, Translation, EntryStatus, Metadata};
53    /// use std::collections::HashMap;
54    ///
55    /// let mut resource = Resource {
56    ///     metadata: Metadata {
57    ///         language: "en".to_string(),
58    ///         domain: "test".to_string(),
59    ///         custom: HashMap::new(),
60    ///     },
61    ///     entries: Vec::new(),
62    /// };
63    /// resource.add_entry(Entry {
64    ///     id: "hello".to_string(),
65    ///     value: Translation::Singular("Hello".to_string()),
66    ///     status: EntryStatus::Translated,
67    ///     comment: None,
68    ///     custom: HashMap::new(),
69    /// });
70    /// ```
71    ///
72    /// # Returns
73    ///
74    /// The added entry.
75    pub fn add_entry(&mut self, entry: Entry) {
76        self.entries.push(entry);
77    }
78
79    /// Find an entry by its id.
80    ///
81    /// ```rust
82    /// use langcodec::types::{Resource, Entry, Translation, EntryStatus, Metadata};
83    /// use std::collections::HashMap;
84    ///
85    /// let mut resource = Resource {
86    ///     metadata: Metadata {
87    ///         language: "en".to_string(),
88    ///         domain: "test".to_string(),
89    ///         custom: HashMap::new(),
90    ///     },
91    ///     entries: Vec::new(),
92    /// };
93    /// resource.add_entry(Entry {
94    ///     id: "hello".to_string(),
95    ///     value: Translation::Singular("Hello".to_string()),
96    ///     status: EntryStatus::Translated,
97    ///     comment: None,
98    ///     custom: HashMap::new(),
99    /// });
100    /// let entry = resource.find_entry("hello").unwrap();
101    /// assert_eq!(entry.value, Translation::Singular("Hello".to_string()));
102    /// assert_eq!(entry.status, EntryStatus::Translated);
103    /// assert_eq!(entry.comment, None);
104    /// ```
105    pub fn find_entry(&self, id: &str) -> Option<&Entry> {
106        self.entries.iter().find(|e| e.id == id)
107    }
108
109    /// Find a mutable entry by its id.
110    ///
111    /// ```rust
112    /// use langcodec::types::{Resource, Entry, Translation, EntryStatus, Metadata};
113    /// use std::collections::HashMap;
114    ///
115    /// let mut resource = Resource {
116    ///     metadata: Metadata {
117    ///         language: "en".to_string(),
118    ///         domain: "test".to_string(),
119    ///         custom: HashMap::new(),
120    ///     },
121    ///     entries: Vec::new(),
122    /// };
123    /// resource.add_entry(Entry {
124    ///     id: "hello".to_string(),
125    ///     value: Translation::Singular("Hello".to_string()),
126    ///     status: EntryStatus::Translated,
127    ///     comment: None,
128    ///     custom: HashMap::new(),
129    /// });
130    /// let entry = resource.find_entry_mut("hello").unwrap();
131    /// assert_eq!(entry.value, Translation::Singular("Hello".to_string()));
132    /// assert_eq!(entry.status, EntryStatus::Translated);
133    /// assert_eq!(entry.comment, None);
134    /// entry.value = Translation::Singular("Hello, World!".to_string());
135    /// entry.status = EntryStatus::NeedsReview;
136    /// entry.comment = Some("Hello, World!".to_string());
137    /// assert_eq!(entry.value, Translation::Singular("Hello, World!".to_string()));
138    /// assert_eq!(entry.status, EntryStatus::NeedsReview);
139    /// assert_eq!(entry.comment, Some("Hello, World!".to_string()));
140    /// ```
141    pub fn find_entry_mut(&mut self, id: &str) -> Option<&mut Entry> {
142        self.entries.iter_mut().find(|e| e.id == id)
143    }
144
145    pub fn parse_language_identifier(&self) -> Option<LanguageIdentifier> {
146        self.metadata.language.parse().ok()
147    }
148
149    /// Check if this resource has a specific language.
150    pub fn has_language(&self, lang: &str) -> bool {
151        match (
152            self.parse_language_identifier(),
153            lang.parse::<LanguageIdentifier>(),
154        ) {
155            (Some(lang_id), Ok(target_lang)) => lang_id.language == target_lang.language,
156            _ => false,
157        }
158    }
159}
160
161/// Free-form metadata for the resource as a whole.
162///
163/// `language` and `domain` are standard; any extra fields can be placed in `custom`.
164#[derive(Debug, Clone, PartialEq, Eq, Deserialize, Serialize)]
165pub struct Metadata {
166    /// The language code (e.g. "en", "fr", "es", etc.).
167    pub language: String,
168
169    /// The domain or project name (e.g. "MyApp").
170    #[serde(skip_serializing_if = "String::is_empty")]
171    #[serde(default)]
172    pub domain: String,
173
174    /// Any other metadata fields not covered by the above.
175    pub custom: HashMap<String, String>,
176}
177
178impl Display for Metadata {
179    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
180        let mut map_all = self.custom.clone();
181        map_all.insert("language".to_string(), self.language.clone());
182        map_all.insert("domain".to_string(), self.domain.clone());
183        write!(
184            f,
185            "Metadata {{ {} }}",
186            map_all
187                .iter()
188                .map(|(k, v)| format!("{}: {}", k, v))
189                .collect::<Vec<_>>()
190                .join(", ")
191        )
192    }
193}
194
195/// A single message/translation entry.
196#[derive(Debug, Clone, PartialEq, Eq, Deserialize, Serialize)]
197pub struct Entry {
198    /// Unique message identifier (key).  
199    /// For PO/XLIFF this is `msgid` or `<trans-unit>@id`; for .strings it’s the key.
200    pub id: String,
201
202    /// Translation context corresponding to this message.
203    pub value: Translation,
204
205    /// Optional comment for translators.
206    #[serde(skip_serializing_if = "Option::is_none")]
207    #[serde(default)]
208    pub comment: Option<String>,
209
210    /// Entry translation status.
211    pub status: EntryStatus,
212
213    /// Any additional, format-specific data attached to this entry.
214    #[serde(skip_serializing_if = "HashMap::is_empty")]
215    #[serde(default)]
216    pub custom: HashMap<String, String>,
217}
218
219impl Display for Entry {
220    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
221        write!(
222            f,
223            "Entry {{ id: {}, value: {}, status: {:?} }}",
224            self.id, self.value, self.status
225        )
226    }
227}
228
229#[derive(Debug, Clone, PartialEq, Eq, Deserialize, Serialize)]
230pub enum Translation {
231    /// A single translation without plural forms.
232    Singular(String),
233
234    /// A translation with plural forms.
235    Plural(Plural),
236}
237
238impl Translation {
239    pub fn plain_translation(translation: Translation) -> Translation {
240        match translation {
241            Translation::Singular(value) => {
242                Translation::Singular(make_plain_translation_string(value))
243            }
244            Translation::Plural(plural) => {
245                // Return the first plural form as a singular translation
246                let id = plural.id;
247                let forms = plural.forms.into_iter().next().map_or_else(
248                    BTreeMap::new,
249                    |(category, value)| {
250                        let mut map = BTreeMap::new();
251                        map.insert(category, make_plain_translation_string(value));
252                        map
253                    },
254                );
255                Translation::Plural(Plural { id, forms })
256            }
257        }
258    }
259
260    pub fn plain_translation_string(&self) -> String {
261        match self {
262            Translation::Singular(value) => make_plain_translation_string(value.clone()),
263            Translation::Plural(plural) => {
264                // Return the plural ID, not the first form
265                plural.id.clone()
266            }
267        }
268    }
269}
270
271impl Display for Translation {
272    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
273        match self {
274            Translation::Singular(value) => write!(f, "{}", value),
275            Translation::Plural(plural) => write!(f, "{}", plural.id), // Displaying only the ID for brevity
276        }
277    }
278}
279
280/// All plural forms for a single message.
281#[derive(Debug, Clone, PartialEq, Eq, Deserialize, Serialize)]
282pub struct Plural {
283    /// The canonical plural ID (`msgid_plural` in PO).
284    pub id: String,
285
286    /// Map from category → translation.  
287    /// Categories depend on the target locale’s rules.
288    #[serde(skip_serializing_if = "BTreeMap::is_empty")]
289    #[serde(default)]
290    pub forms: BTreeMap<PluralCategory, String>,
291}
292
293impl Plural {
294    pub(crate) fn new(
295        id: &str,
296        forms: impl Iterator<Item = (PluralCategory, String)>,
297    ) -> Option<Self> {
298        let forms: BTreeMap<PluralCategory, String> = forms.collect();
299
300        if forms.is_empty() {
301            None // No plural forms provided
302        } else {
303            Some(Self {
304                id: id.to_string(),
305                forms,
306            })
307        }
308    }
309}
310
311/// Standard CLDR plural forms.
312#[derive(Ord, PartialOrd, Eq, PartialEq, Debug, Clone, Deserialize, Serialize)]
313#[serde(rename_all = "snake_case")]
314#[derive(Hash)]
315pub enum PluralCategory {
316    Zero,
317    One,
318    Two,
319    Few,
320    Many,
321    Other,
322}
323
324impl FromStr for PluralCategory {
325    type Err = String;
326
327    fn from_str(s: &str) -> Result<Self, Self::Err> {
328        match s.to_uppercase().as_str() {
329            "ZERO" => Ok(PluralCategory::Zero),
330            "ONE" => Ok(PluralCategory::One),
331            "TWO" => Ok(PluralCategory::Two),
332            "FEW" => Ok(PluralCategory::Few),
333            "MANY" => Ok(PluralCategory::Many),
334            "OTHER" => Ok(PluralCategory::Other),
335            _ => Err(format!("Unknown plural category: {}", s)),
336        }
337    }
338}
339
340/// Status of a translation entry.
341#[derive(Debug, Clone, PartialEq, Eq, Deserialize, Serialize)]
342#[serde(rename_all = "snake_case")]
343pub enum EntryStatus {
344    /// The entry is not translated and should not be.
345    DoNotTranslate,
346
347    /// The entry is new and has not been translated yet.
348    New,
349
350    /// The entry is outdated.
351    Stale,
352
353    /// The entry has been modified and needs review.
354    NeedsReview,
355
356    /// The entry is translated and reviewed.
357    Translated,
358}
359
360impl FromStr for EntryStatus {
361    type Err = String;
362
363    fn from_str(s: &str) -> Result<Self, Self::Err> {
364        match s.to_uppercase().as_str() {
365            "DO_NOT_TRANSLATE" => Ok(EntryStatus::DoNotTranslate),
366            "NEW" => Ok(EntryStatus::New),
367            "STALE" => Ok(EntryStatus::Stale),
368            "NEEDS_REVIEW" => Ok(EntryStatus::NeedsReview),
369            "TRANSLATED" => Ok(EntryStatus::Translated),
370            _ => Err(format!("Unknown entry status: {}", s)),
371        }
372    }
373}
374
375/// Strategy for handling conflicts when merging resources.
376#[derive(Debug, Clone, PartialEq, Eq)]
377pub enum ConflictStrategy {
378    /// Keep the first occurrence of a key
379    First,
380    /// Keep the last occurrence of a key (default)
381    Last,
382    /// Skip conflicting entries
383    Skip,
384}
385
386// Remove HTML tags from translation string.
387fn make_plain_translation_string(translation: String) -> String {
388    let mut translation = translation;
389    translation = translation.trim().to_string();
390
391    // Remove all HTML tags (non-greedy)
392    translation = HTML_TAG_REGEX.replace_all(&translation, "").to_string();
393
394    // Remove all closing tags like </font>
395    translation = HTML_CLOSE_TAG_REGEX
396        .replace_all(&translation, "")
397        .to_string();
398
399    // Replace any newline characters with explicit "\n" for better formatting,
400    translation = translation
401        .lines()
402        .map(str::trim_start)
403        .collect::<Vec<_>>()
404        .join(r"\n"); // Use r"\n" for a literal \n
405
406    translation
407}
408
409#[cfg(test)]
410mod tests {
411    use super::*;
412    use std::collections::HashMap;
413
414    #[test]
415    fn test_resource_add_entry() {
416        let mut resource = Resource {
417            metadata: Metadata {
418                language: "en".to_string(),
419                domain: "test".to_string(),
420                custom: HashMap::new(),
421            },
422            entries: Vec::new(),
423        };
424
425        let entry = Entry {
426            id: "hello".to_string(),
427            value: Translation::Singular("Hello".to_string()),
428            comment: None,
429            status: EntryStatus::Translated,
430            custom: HashMap::new(),
431        };
432
433        resource.add_entry(entry);
434        assert_eq!(resource.entries.len(), 1);
435        assert_eq!(resource.entries[0].id, "hello");
436    }
437
438    #[test]
439    fn test_resource_parse_language_identifier() {
440        let resource = Resource {
441            metadata: Metadata {
442                language: "en-US".to_string(),
443                domain: "test".to_string(),
444                custom: HashMap::new(),
445            },
446            entries: Vec::new(),
447        };
448
449        let lang_id = resource.parse_language_identifier().unwrap();
450        assert_eq!(lang_id.language.as_str(), "en");
451        assert_eq!(lang_id.region.unwrap().as_str(), "US");
452    }
453
454    #[test]
455    fn test_resource_parse_invalid_language() {
456        let resource = Resource {
457            metadata: Metadata {
458                language: "not-a-language".to_string(),
459                domain: "test".to_string(),
460                custom: HashMap::new(),
461            },
462            entries: Vec::new(),
463        };
464
465        // This should fail because "not-a-language" is not a valid BCP 47 language identifier
466        assert!(resource.parse_language_identifier().is_none());
467    }
468
469    #[test]
470    fn test_resource_has_language() {
471        let resource = Resource {
472            metadata: Metadata {
473                language: "en-US".to_string(),
474                domain: "test".to_string(),
475                custom: HashMap::new(),
476            },
477            entries: Vec::new(),
478        };
479
480        assert!(resource.has_language("en"));
481        assert!(resource.has_language("en-US"));
482        assert!(!resource.has_language("fr"));
483    }
484
485    #[test]
486    fn test_metadata_display() {
487        let mut metadata = Metadata {
488            language: "en".to_string(),
489            domain: "test".to_string(),
490            custom: HashMap::new(),
491        };
492        metadata
493            .custom
494            .insert("version".to_string(), "1.0".to_string());
495
496        let display = format!("{}", metadata);
497        assert!(display.contains("language: en"));
498        assert!(display.contains("domain: test"));
499        assert!(display.contains("version: 1.0"));
500    }
501
502    #[test]
503    fn test_entry_display() {
504        let entry = Entry {
505            id: "hello".to_string(),
506            value: Translation::Singular("Hello".to_string()),
507            comment: Some("Greeting".to_string()),
508            status: EntryStatus::Translated,
509            custom: HashMap::new(),
510        };
511
512        let display = format!("{}", entry);
513        assert!(display.contains("hello"));
514        assert!(display.contains("Hello"));
515        // The display format might not include comments, so we'll just check the basic structure
516        assert!(!display.is_empty());
517    }
518
519    #[test]
520    fn test_translation_plain_translation() {
521        let singular = Translation::Singular("Hello".to_string());
522        let plain = Translation::plain_translation(singular);
523        assert!(matches!(plain, Translation::Singular(_)));
524    }
525
526    #[test]
527    fn test_translation_plain_translation_string() {
528        let singular = Translation::Singular("Hello".to_string());
529        assert_eq!(singular.plain_translation_string(), "Hello");
530
531        let plural = Translation::Plural(
532            Plural::new(
533                "apples",
534                vec![
535                    (PluralCategory::One, "1 apple".to_string()),
536                    (PluralCategory::Other, "%d apples".to_string()),
537                ]
538                .into_iter(),
539            )
540            .unwrap(),
541        );
542        // For plural translations, we return the plural ID, not the first form
543        assert_eq!(plural.plain_translation_string(), "apples");
544    }
545
546    #[test]
547    fn test_translation_display() {
548        let singular = Translation::Singular("Hello".to_string());
549        assert_eq!(format!("{}", singular), "Hello");
550
551        let plural = Translation::Plural(
552            Plural::new(
553                "apples",
554                vec![
555                    (PluralCategory::One, "1 apple".to_string()),
556                    (PluralCategory::Other, "%d apples".to_string()),
557                ]
558                .into_iter(),
559            )
560            .unwrap(),
561        );
562        assert!(format!("{}", plural).contains("apples"));
563    }
564
565    #[test]
566    fn test_plural_new() {
567        let forms = vec![
568            (PluralCategory::One, "1 apple".to_string()),
569            (PluralCategory::Other, "%d apples".to_string()),
570        ];
571
572        let plural = Plural::new("apples", forms.into_iter()).unwrap();
573        assert_eq!(plural.id, "apples");
574        assert_eq!(plural.forms.len(), 2);
575        assert_eq!(plural.forms.get(&PluralCategory::One).unwrap(), "1 apple");
576        assert_eq!(
577            plural.forms.get(&PluralCategory::Other).unwrap(),
578            "%d apples"
579        );
580    }
581
582    #[test]
583    fn test_plural_new_empty() {
584        let forms: Vec<(PluralCategory, String)> = vec![];
585        let plural = Plural::new("apples", forms.into_iter());
586        assert!(plural.is_none());
587    }
588
589    #[test]
590    fn test_plural_category_from_str() {
591        assert_eq!(
592            PluralCategory::from_str("zero").unwrap(),
593            PluralCategory::Zero
594        );
595        assert_eq!(
596            PluralCategory::from_str("one").unwrap(),
597            PluralCategory::One
598        );
599        assert_eq!(
600            PluralCategory::from_str("two").unwrap(),
601            PluralCategory::Two
602        );
603        assert_eq!(
604            PluralCategory::from_str("few").unwrap(),
605            PluralCategory::Few
606        );
607        assert_eq!(
608            PluralCategory::from_str("many").unwrap(),
609            PluralCategory::Many
610        );
611        assert_eq!(
612            PluralCategory::from_str("other").unwrap(),
613            PluralCategory::Other
614        );
615    }
616
617    #[test]
618    fn test_plural_category_from_str_invalid() {
619        assert!(PluralCategory::from_str("invalid").is_err());
620    }
621
622    #[test]
623    fn test_entry_status_from_str() {
624        assert_eq!(
625            EntryStatus::from_str("do_not_translate").unwrap(),
626            EntryStatus::DoNotTranslate
627        );
628        assert_eq!(EntryStatus::from_str("new").unwrap(), EntryStatus::New);
629        assert_eq!(EntryStatus::from_str("stale").unwrap(), EntryStatus::Stale);
630        assert_eq!(
631            EntryStatus::from_str("needs_review").unwrap(),
632            EntryStatus::NeedsReview
633        );
634        assert_eq!(
635            EntryStatus::from_str("translated").unwrap(),
636            EntryStatus::Translated
637        );
638    }
639
640    #[test]
641    fn test_entry_status_from_str_invalid() {
642        assert!(EntryStatus::from_str("invalid").is_err());
643    }
644
645    #[test]
646    fn test_make_plain_translation_string() {
647        let result = make_plain_translation_string("Hello".to_string());
648        assert_eq!(result, "Hello");
649
650        let result = make_plain_translation_string("Hello\nWorld".to_string());
651        assert_eq!(result, "Hello\\nWorld");
652    }
653
654    #[test]
655    fn test_resource_parser_trait() {
656        let resources = vec![Resource {
657            metadata: Metadata {
658                language: "en".to_string(),
659                domain: "test".to_string(),
660                custom: HashMap::new(),
661            },
662            entries: vec![],
663        }];
664
665        let mut writer = Vec::new();
666        resources.to_writer(&mut writer).unwrap();
667
668        let reader = std::io::Cursor::new(writer);
669        let parsed: Vec<Resource> = Vec::<Resource>::from_reader(reader).unwrap();
670        assert_eq!(parsed.len(), 1);
671        assert_eq!(parsed[0].metadata.language, "en");
672    }
673}