langcodec/formats/
android_strings.rs

1//! Support for Android `strings.xml` localization format.
2//!
3//! Supports singular `<string>` and plural `<plurals>` elements.
4//! Provides parsing, serialization, and conversion to/from the internal `Resource` model.
5
6use quick_xml::{
7    Reader, Writer,
8    events::{BytesDecl, BytesEnd, BytesStart, BytesText, Event},
9};
10use serde::Serialize;
11use std::{
12    collections::HashMap,
13    fmt::Debug,
14    io::{BufRead, Write},
15    str::FromStr,
16};
17
18use crate::{
19    error::Error,
20    traits::Parser,
21    types::{Entry, EntryStatus, Metadata, Plural, PluralCategory, Resource, Translation},
22};
23
24#[derive(Debug, Serialize)]
25pub struct Format {
26    pub language: String,
27    pub strings: Vec<StringResource>,
28    pub plurals: Vec<PluralsResource>,
29}
30
31impl Parser for Format {
32    /// Parse from any reader.
33    fn from_reader<R: BufRead>(reader: R) -> Result<Self, Error> {
34        let mut xml_reader = Reader::from_reader(reader);
35        // Preserve whitespace inside text nodes so multi-line strings and
36        // indentation are kept exactly as authored in XML.
37        xml_reader.config_mut().trim_text(false);
38
39        let mut buf = Vec::new();
40        let mut string_resources = Vec::new();
41        let mut plural_resources: Vec<PluralsResource> = Vec::new();
42
43        loop {
44            match xml_reader.read_event_into(&mut buf) {
45                Ok(Event::Start(ref e)) if e.name().as_ref() == b"string" => {
46                    let sr = parse_string_resource(e, &mut xml_reader)?;
47                    string_resources.push(sr);
48                }
49                Ok(Event::Start(ref e)) if e.name().as_ref() == b"plurals" => {
50                    let pr = parse_plurals_resource(e, &mut xml_reader)?;
51                    plural_resources.push(pr);
52                }
53                Ok(Event::Eof) => break,
54                Ok(_) => {}
55                Err(e) => return Err(Error::XmlParse(e)),
56            }
57            buf.clear();
58        }
59        Ok(Format {
60            language: String::new(), // strings.xml does not contain language metadata
61            strings: string_resources,
62            plurals: plural_resources,
63        })
64    }
65
66    /// Write to any writer (file, memory, etc.).
67    fn to_writer<W: Write>(&self, mut writer: W) -> Result<(), Error> {
68        let mut xml_writer = Writer::new(&mut writer);
69
70        xml_writer.write_event(Event::Decl(BytesDecl::new("1.0", Some("utf-8"), None)))?;
71        xml_writer.write_event(Event::Text(BytesText::new("\n")))?;
72
73        let resources_start = BytesStart::new("resources");
74        xml_writer.write_event(Event::Start(resources_start))?;
75        xml_writer.write_event(Event::Text(BytesText::new("\n")))?;
76
77        for sr in &self.strings {
78            let mut elem = BytesStart::new("string");
79            elem.push_attribute(("name", sr.name.as_str()));
80            if let Some(trans) = sr.translatable {
81                elem.push_attribute(("translatable", if trans { "true" } else { "false" }));
82            }
83
84            xml_writer.write_event(Event::Start(elem))?;
85            xml_writer.write_event(Event::Text(BytesText::new(&sr.value)))?;
86            xml_writer.write_event(Event::End(BytesEnd::new("string")))?;
87            xml_writer.write_event(Event::Text(BytesText::new("\n")))?;
88        }
89
90        // Write plurals
91        for pr in &self.plurals {
92            let mut elem = BytesStart::new("plurals");
93            elem.push_attribute(("name", pr.name.as_str()));
94            if let Some(trans) = pr.translatable {
95                elem.push_attribute(("translatable", if trans { "true" } else { "false" }));
96            }
97            xml_writer.write_event(Event::Start(elem))?;
98            xml_writer.write_event(Event::Text(BytesText::new("\n")))?;
99
100            // Sort items by quantity for stable output
101            let mut items = pr.items.clone();
102            items.sort_by(|a, b| a.quantity.cmp(&b.quantity));
103            for item in &items {
104                let mut it = BytesStart::new("item");
105                it.push_attribute((
106                    "quantity",
107                    match item.quantity {
108                        PluralCategory::Zero => "zero",
109                        PluralCategory::One => "one",
110                        PluralCategory::Two => "two",
111                        PluralCategory::Few => "few",
112                        PluralCategory::Many => "many",
113                        PluralCategory::Other => "other",
114                    },
115                ));
116                xml_writer.write_event(Event::Start(it))?;
117                xml_writer.write_event(Event::Text(BytesText::new(&item.value)))?;
118                xml_writer.write_event(Event::End(BytesEnd::new("item")))?;
119                xml_writer.write_event(Event::Text(BytesText::new("\n")))?;
120            }
121
122            xml_writer.write_event(Event::End(BytesEnd::new("plurals")))?;
123            xml_writer.write_event(Event::Text(BytesText::new("\n")))?;
124        }
125
126        xml_writer.write_event(Event::End(BytesEnd::new("resources")))?;
127        xml_writer.write_event(Event::Text(BytesText::new("\n")))?;
128        Ok(())
129    }
130}
131
132impl From<Resource> for Format {
133    fn from(value: Resource) -> Self {
134        let mut strings = Vec::new();
135        let mut plurals = Vec::new();
136        for entry in value.entries {
137            match entry.value {
138                Translation::Empty => {} // Do nothing
139                Translation::Singular(_) => strings.push(StringResource::from_entry(&entry)),
140                Translation::Plural(p) => {
141                    let mut items: Vec<PluralItem> = p
142                        .forms
143                        .into_iter()
144                        .map(|(cat, v)| PluralItem {
145                            quantity: cat,
146                            value: v,
147                        })
148                        .collect();
149                    // Ensure stable order later
150                    items.sort_by(|a, b| a.quantity.cmp(&b.quantity));
151                    plurals.push(PluralsResource {
152                        name: entry.id,
153                        items,
154                        translatable: match entry.status {
155                            EntryStatus::Translated => Some(true),
156                            EntryStatus::DoNotTranslate => Some(false),
157                            _ => None,
158                        },
159                    });
160                }
161            }
162        }
163
164        Self {
165            language: value.metadata.language,
166            strings,
167            plurals,
168        }
169    }
170}
171
172impl From<Format> for Resource {
173    fn from(value: Format) -> Self {
174        let mut entries: Vec<Entry> = value
175            .strings
176            .into_iter()
177            .map(StringResource::into_entry)
178            .collect();
179
180        // Convert plurals to entries
181        for pr in value.plurals {
182            let mut forms = std::collections::BTreeMap::new();
183            for item in pr.items {
184                let PluralItem { quantity, value } = item;
185                forms.insert(quantity, value);
186            }
187            let all_empty = forms.values().all(|v| v.is_empty());
188            let status = match pr.translatable {
189                Some(true) => EntryStatus::Translated,
190                Some(false) => EntryStatus::DoNotTranslate,
191                None => {
192                    if all_empty {
193                        EntryStatus::New
194                    } else {
195                        EntryStatus::Translated
196                    }
197                }
198            };
199            entries.push(Entry {
200                id: pr.name.clone(),
201                value: Translation::Plural(Plural { id: pr.name, forms }),
202                comment: None,
203                status,
204                custom: HashMap::new(),
205            });
206        }
207
208        Resource {
209            metadata: Metadata {
210                language: value.language,
211                domain: String::new(), // strings.xml does not have a domain
212                custom: HashMap::new(),
213            },
214            entries,
215        }
216    }
217}
218
219#[derive(Debug, Serialize)]
220pub struct StringResource {
221    pub name: String,
222    pub value: String,
223    pub translatable: Option<bool>,
224}
225
226impl StringResource {
227    fn into_entry(self) -> Entry {
228        let StringResource {
229            name,
230            value,
231            translatable,
232        } = self;
233
234        let is_value_empty = value.is_empty();
235
236        Entry {
237            id: name,
238            value: Translation::Singular(value),
239            comment: None,
240            status: match translatable {
241                Some(true) => EntryStatus::Translated,
242                Some(false) => EntryStatus::DoNotTranslate,
243                None if is_value_empty => EntryStatus::New,
244                None => EntryStatus::Translated,
245            },
246            custom: HashMap::new(),
247        }
248    }
249
250    fn from_entry(entry: &Entry) -> Self {
251        StringResource {
252            name: entry.id.clone(),
253            value: match &entry.value {
254                Translation::Empty => String::new(),
255                Translation::Singular(v) => v.clone(),
256                Translation::Plural(_) => String::new(), // Plurals not supported in strings.xml
257            },
258            translatable: match entry.status {
259                EntryStatus::Translated => Some(true),
260                EntryStatus::DoNotTranslate => Some(false),
261                EntryStatus::New => None,
262                _ => None, // Other statuses not applicable
263            },
264        }
265    }
266}
267
268#[derive(Debug, Serialize, Clone)]
269pub struct PluralItem {
270    pub quantity: PluralCategory,
271    pub value: String,
272}
273
274#[derive(Debug, Serialize)]
275pub struct PluralsResource {
276    pub name: String,
277    pub items: Vec<PluralItem>,
278    pub translatable: Option<bool>,
279}
280
281fn parse_string_resource<R: BufRead>(
282    e: &BytesStart,
283    xml_reader: &mut Reader<R>,
284) -> Result<StringResource, Error> {
285    let mut name = None;
286    let mut translatable = None;
287
288    for attr in e.attributes().with_checks(false) {
289        let attr = attr.map_err(|e| Error::DataMismatch(e.to_string()))?;
290        match attr.key.as_ref() {
291            b"name" => name = Some(attr.unescape_value()?.to_string()),
292            b"translatable" => {
293                let v = attr.unescape_value()?.to_string();
294                translatable = Some(v == "true");
295            }
296            _ => {}
297        }
298    }
299    let name =
300        name.ok_or_else(|| Error::InvalidResource("string tag missing 'name'".to_string()))?;
301
302    let mut buf = Vec::new();
303    // Read and accumulate all text nodes until we reach the end of this <string> element
304    let mut value = String::new();
305    loop {
306        match xml_reader.read_event_into(&mut buf) {
307            Ok(Event::Text(e)) => {
308                value.push_str(e.unescape().map_err(Error::XmlParse)?.as_ref());
309            }
310            Ok(Event::End(ref end)) if end.name().as_ref() == b"string" => break,
311            Ok(Event::Eof) => return Err(Error::InvalidResource("Unexpected EOF".to_string())),
312            Ok(_) => (),
313            Err(e) => return Err(Error::XmlParse(e)),
314        }
315        buf.clear();
316    }
317
318    // Normalize: if the content ends with a newline followed only by indentation
319    // spaces, collapse that trailing indentation to 4 spaces to avoid
320    // propagating XML pretty-print indentation.
321    if let Some(pos) = value.rfind('\n') {
322        let tail = &value[pos + 1..];
323        if !tail.is_empty() && tail.chars().all(|c| c == ' ' || c == '\t') {
324            value.truncate(pos + 1);
325            value.push_str("    ");
326        }
327    }
328
329    // Convert actual newlines into literal "\\n" sequences for internal consistency
330    if value.contains('\n') {
331        value = value.split('\n').collect::<Vec<_>>().join("\\n");
332    }
333    Ok(StringResource {
334        name,
335        value,
336        translatable,
337    })
338}
339
340fn parse_plurals_resource<R: BufRead>(
341    e: &BytesStart,
342    xml_reader: &mut Reader<R>,
343) -> Result<PluralsResource, Error> {
344    let mut name: Option<String> = None;
345    let mut translatable: Option<bool> = None;
346
347    for attr in e.attributes().with_checks(false) {
348        let attr = attr.map_err(|e| Error::DataMismatch(e.to_string()))?;
349        match attr.key.as_ref() {
350            b"name" => name = Some(attr.unescape_value()?.to_string()),
351            b"translatable" => {
352                let v = attr.unescape_value()?.to_string();
353                translatable = Some(v == "true");
354            }
355            _ => {}
356        }
357    }
358    let name =
359        name.ok_or_else(|| Error::InvalidResource("plurals tag missing 'name'".to_string()))?;
360
361    let mut buf = Vec::new();
362    let mut items: Vec<PluralItem> = Vec::new();
363    loop {
364        match xml_reader.read_event_into(&mut buf) {
365            Ok(Event::Start(ref e)) if e.name().as_ref() == b"item" => {
366                // parse quantity
367                let mut quantity: Option<PluralCategory> = None;
368                for attr in e.attributes().with_checks(false) {
369                    let attr = attr.map_err(|e| Error::DataMismatch(e.to_string()))?;
370                    if attr.key.as_ref() == b"quantity" {
371                        let v = attr.unescape_value()?.to_string();
372                        quantity = PluralCategory::from_str(&v).ok();
373                    }
374                }
375                let quantity = quantity
376                    .ok_or_else(|| Error::InvalidResource("item missing 'quantity'".to_string()))?;
377                // Read text content until End(item)
378                let mut value = String::new();
379                let mut local_buf = Vec::new();
380                loop {
381                    match xml_reader.read_event_into(&mut local_buf) {
382                        Ok(Event::Text(e)) => {
383                            value.push_str(e.unescape().map_err(Error::XmlParse)?.as_ref());
384                        }
385                        Ok(Event::End(ref end)) if end.name().as_ref() == b"item" => break,
386                        Ok(Event::Eof) => {
387                            return Err(Error::InvalidResource(
388                                "Unexpected EOF inside <item>".to_string(),
389                            ));
390                        }
391                        Ok(_) => {}
392                        Err(e) => return Err(Error::XmlParse(e)),
393                    }
394                    local_buf.clear();
395                }
396                items.push(PluralItem { quantity, value });
397            }
398            Ok(Event::End(ref end)) if end.name().as_ref() == b"plurals" => break,
399            Ok(Event::Eof) => {
400                return Err(Error::InvalidResource(
401                    "Unexpected EOF inside <plurals>".to_string(),
402                ));
403            }
404            Ok(_) => {}
405            Err(e) => return Err(Error::XmlParse(e)),
406        }
407        buf.clear();
408    }
409
410    Ok(PluralsResource {
411        name,
412        items,
413        translatable,
414    })
415}
416
417#[cfg(test)]
418mod tests {
419
420    use super::*;
421    use crate::traits::Parser;
422    use crate::types::EntryStatus;
423
424    #[test]
425    fn test_parse_basic_strings_xml() {
426        let xml = r#"
427        <resources>
428            <string name="hello">Hello</string>
429            <string name="bye" translatable="false">Goodbye</string>
430            <string name="empty"></string>
431            <string name="multiple_lines">Hello\n\n
432World
433            </string>
434            <string name="some_non_ascii">你好</string>
435        </resources>
436        "#;
437        let format = Format::from_str(xml).unwrap();
438        assert_eq!(format.strings.len(), 5);
439        let hello = &format.strings[0];
440        assert_eq!(hello.name, "hello");
441        assert_eq!(hello.value, "Hello");
442        assert_eq!(hello.translatable, None); // no attribute
443        let bye = &format.strings[1];
444        assert_eq!(bye.name, "bye");
445        assert_eq!(bye.value, "Goodbye");
446        assert_eq!(bye.translatable, Some(false));
447        let empty = &format.strings[2];
448        assert_eq!(empty.name, "empty");
449        assert_eq!(empty.value, "");
450        assert_eq!(empty.translatable, None);
451        let multiple_lines = &format.strings[3];
452        assert_eq!(multiple_lines.name, "multiple_lines");
453        assert_eq!(multiple_lines.value, r#"Hello\n\n\nWorld\n    "#);
454        assert_eq!(multiple_lines.translatable, None);
455        let some_non_ascii = &format.strings[4];
456        assert_eq!(some_non_ascii.name, "some_non_ascii");
457        assert_eq!(some_non_ascii.value, "你好");
458        assert_eq!(some_non_ascii.translatable, None);
459
460        let resource = Resource::from(format);
461        assert_eq!(resource.entries.len(), 5);
462        let entry = &resource.entries[0];
463        assert_eq!(entry.id, "hello");
464        assert_eq!(entry.value, Translation::Singular("Hello".to_string()));
465        assert_eq!(entry.status, EntryStatus::Translated);
466        assert_eq!(entry.comment, None);
467
468        let entry = resource.find_entry("hello").unwrap();
469        assert_eq!(entry.value, Translation::Singular("Hello".to_string()));
470        assert_eq!(entry.status, EntryStatus::Translated);
471        assert_eq!(entry.comment, None);
472
473        let entry = resource.find_entry("multiple_lines").unwrap();
474        assert_eq!(
475            entry.value,
476            Translation::Singular("Hello\\n\\n\\nWorld\\n    ".to_string())
477        );
478        assert_eq!(entry.status, EntryStatus::Translated);
479        assert_eq!(entry.comment, None);
480
481        let entry = resource.find_entry("some_non_ascii").unwrap();
482        assert_eq!(entry.value, Translation::Singular("你好".to_string()));
483        assert_eq!(entry.status, EntryStatus::Translated);
484        assert_eq!(entry.comment, None);
485    }
486
487    #[test]
488    fn test_parse_plurals_included() {
489        let xml = r#"
490        <resources>
491            <string name="hello">Hello</string>
492            <plurals name="apples">
493                <item quantity="one">One apple</item>
494                <item quantity="other">%d apples</item>
495            </plurals>
496        </resources>
497        "#;
498        // Plurals are parsed into `plurals`
499        let format = Format::from_str(xml).unwrap();
500        assert_eq!(format.strings.len(), 1);
501        assert_eq!(format.plurals.len(), 1);
502        assert_eq!(format.strings[0].name, "hello");
503        assert_eq!(format.plurals[0].name, "apples");
504        assert_eq!(format.plurals[0].items.len(), 2);
505    }
506
507    #[test]
508    fn test_missing_name_attribute() {
509        let xml = r#"
510        <resources>
511            <string>No name attr</string>
512        </resources>
513        "#;
514        let result = Format::from_str(xml);
515        assert!(result.is_err());
516        let err = format!("{:?}", result.unwrap_err());
517        assert!(err.contains("missing 'name'"));
518    }
519
520    #[test]
521    fn test_round_trip_serialization() {
522        let xml = r#"
523        <resources>
524            <string name="greet">Hi</string>
525            <string name="bye" translatable="false">Bye</string>
526            <plurals name="apples" translatable="true">
527                <item quantity="one">One apple</item>
528                <item quantity="other">%d apples</item>
529            </plurals>
530        </resources>
531        "#;
532        let format = Format::from_str(xml).unwrap();
533        let mut out = Vec::new();
534        format.to_writer(&mut out).unwrap();
535        let out_str = String::from_utf8(out).unwrap();
536        let reparsed = Format::from_str(&out_str).unwrap();
537        assert_eq!(format.strings.len(), reparsed.strings.len());
538        assert_eq!(format.plurals.len(), reparsed.plurals.len());
539        for (orig, new) in format.strings.iter().zip(reparsed.strings.iter()) {
540            assert_eq!(orig.name, new.name);
541            assert_eq!(orig.value, new.value);
542            assert_eq!(orig.translatable, new.translatable);
543        }
544        for (orig, new) in format.plurals.iter().zip(reparsed.plurals.iter()) {
545            assert_eq!(orig.name, new.name);
546            assert_eq!(orig.translatable, new.translatable);
547            assert_eq!(orig.items.len(), new.items.len());
548        }
549    }
550
551    #[test]
552    fn test_entry_with_empty_value_status_new() {
553        let xml = r#"
554        <resources>
555            <string name="empty"></string>
556        </resources>
557        "#;
558        let format = Format::from_str(xml).unwrap();
559        let length = format.strings.len();
560        assert_eq!(length, 1);
561        let entry = format.strings.into_iter().next().unwrap().into_entry();
562        assert_eq!(entry.status, EntryStatus::New);
563    }
564
565    #[test]
566    fn test_resource_to_android_format_with_plurals() {
567        use std::collections::BTreeMap;
568        let mut forms = BTreeMap::new();
569        forms.insert(PluralCategory::One, "One file".to_string());
570        forms.insert(PluralCategory::Other, "%d files".to_string());
571
572        let resource = Resource {
573            metadata: Metadata {
574                language: "en".into(),
575                domain: String::new(),
576                custom: HashMap::new(),
577            },
578            entries: vec![Entry {
579                id: "files".into(),
580                value: Translation::Plural(Plural {
581                    id: "files".into(),
582                    forms,
583                }),
584                comment: None,
585                status: EntryStatus::Translated,
586                custom: HashMap::new(),
587            }],
588        };
589
590        let fmt = Format::from(resource);
591        assert_eq!(fmt.strings.len(), 0);
592        assert_eq!(fmt.plurals.len(), 1);
593        let pr = &fmt.plurals[0];
594        assert_eq!(pr.name, "files");
595        assert!(
596            pr.items
597                .iter()
598                .any(|i| matches!(i.quantity, PluralCategory::One) && i.value == "One file")
599        );
600        assert!(
601            pr.items
602                .iter()
603                .any(|i| matches!(i.quantity, PluralCategory::Other) && i.value == "%d files")
604        );
605    }
606}