Skip to main content

xcstrings_mcp/service/
stringsdict_parser.rs

1use std::collections::BTreeMap;
2
3use indexmap::IndexMap;
4use quick_xml::Reader;
5use quick_xml::escape::resolve_xml_entity;
6use quick_xml::events::Event;
7
8use crate::error::XcStringsError;
9
10/// A single entry from a `.stringsdict` file, representing a pluralized string.
11#[derive(Debug)]
12pub struct StringsdictEntry {
13    pub key: String,
14    /// The `NSStringLocalizedFormatKey` value, e.g. `%#@varname@`.
15    pub format_key: String,
16    /// Plural variables referenced by the format key.
17    pub variables: IndexMap<String, PluralVariable>,
18}
19
20/// A plural variable within a stringsdict entry.
21#[derive(Debug)]
22pub struct PluralVariable {
23    /// The format specifier from `NSStringFormatValueTypeKey`, e.g. "d", "lld", "f", "@".
24    pub format_specifier: String,
25    /// CLDR plural forms: "zero", "one", "two", "few", "many", "other".
26    pub forms: BTreeMap<String, String>,
27}
28
29/// Result of parsing a `.stringsdict` file, including any skipped entries.
30#[derive(Debug)]
31pub struct ParsedStringsdict {
32    pub entries: Vec<StringsdictEntry>,
33    /// Keys that were skipped due to unsupported rule types
34    /// (e.g., `NSStringDeviceSpecificRuleType`, `NSStringVariableWidthRuleType`).
35    pub skipped_keys: Vec<String>,
36}
37
38const PLURAL_FORMS: &[&str] = &["zero", "one", "two", "few", "many", "other"];
39
40/// Parse a `.stringsdict` XML plist file into plural entries and skipped key names.
41///
42/// Entries using unsupported rule types (`NSStringDeviceSpecificRuleType`,
43/// `NSStringVariableWidthRuleType`) are collected in `ParsedStringsdict::skipped_keys`.
44pub fn parse_stringsdict(content: &str) -> Result<ParsedStringsdict, XcStringsError> {
45    let mut reader = Reader::from_str(content);
46
47    // Navigate to root <dict> inside <plist>
48    let mut found_plist = false;
49    loop {
50        match reader.read_event() {
51            Ok(Event::Start(ref e)) => {
52                if e.name().as_ref() == b"plist" {
53                    found_plist = true;
54                } else if found_plist && e.name().as_ref() == b"dict" {
55                    break;
56                }
57            }
58            Ok(Event::Eof) => {
59                return Err(XcStringsError::StringsdictParse(
60                    "unexpected EOF before root <dict>".into(),
61                ));
62            }
63            Err(e) => return Err(XcStringsError::StringsdictParse(e.to_string())),
64            _ => {}
65        }
66    }
67
68    // Now inside root <dict>. Parse top-level key/dict pairs.
69    let mut entries = Vec::new();
70    let mut skipped_keys = Vec::new();
71    loop {
72        match read_next_significant_event(&mut reader)? {
73            SignificantEvent::Key(entry_key) => {
74                // Expect a <dict> for this entry
75                skip_to_start_tag(&mut reader, b"dict")?;
76                if let Some(entry) = parse_entry(&mut reader, &entry_key)? {
77                    entries.push(entry);
78                } else {
79                    skipped_keys.push(entry_key);
80                }
81            }
82            SignificantEvent::EndTag => break, // </dict> — end of root dict
83            SignificantEvent::Eof => break,
84        }
85    }
86
87    Ok(ParsedStringsdict {
88        entries,
89        skipped_keys,
90    })
91}
92
93/// Events we care about when iterating dict contents.
94enum SignificantEvent {
95    Key(String),
96    EndTag,
97    Eof,
98}
99
100fn read_next_significant_event(
101    reader: &mut Reader<&[u8]>,
102) -> Result<SignificantEvent, XcStringsError> {
103    loop {
104        match reader.read_event() {
105            Ok(Event::Start(ref e)) if e.name().as_ref() == b"key" => {
106                let text = read_text_content(reader)?;
107                return Ok(SignificantEvent::Key(text));
108            }
109            Ok(Event::End(_)) => return Ok(SignificantEvent::EndTag),
110            Ok(Event::Eof) => return Ok(SignificantEvent::Eof),
111            Err(e) => return Err(XcStringsError::StringsdictParse(e.to_string())),
112            _ => {}
113        }
114    }
115}
116
117/// Read text content until the closing tag.
118fn read_text_content(reader: &mut Reader<&[u8]>) -> Result<String, XcStringsError> {
119    let mut text = String::new();
120    loop {
121        match reader.read_event() {
122            Ok(Event::Text(ref e)) => {
123                let decoded = e
124                    .decode()
125                    .map_err(|err| XcStringsError::StringsdictParse(err.to_string()))?;
126                text.push_str(&decoded);
127            }
128            Ok(Event::GeneralRef(ref e)) => {
129                let name = e
130                    .decode()
131                    .map_err(|err| XcStringsError::StringsdictParse(err.to_string()))?;
132                if let Some(resolved) = resolve_xml_entity(&name) {
133                    text.push_str(resolved);
134                } else if let Ok(Some(ch)) = e.resolve_char_ref() {
135                    text.push(ch);
136                } else {
137                    return Err(XcStringsError::StringsdictParse(format!(
138                        "unknown XML entity: &{name};"
139                    )));
140                }
141            }
142            Ok(Event::CData(ref e)) => {
143                text.push_str(&String::from_utf8_lossy(e.as_ref()));
144            }
145            Ok(Event::End(_)) => return Ok(text),
146            Ok(Event::Eof) => {
147                return Err(XcStringsError::StringsdictParse(
148                    "unexpected EOF in text content".into(),
149                ));
150            }
151            Err(e) => return Err(XcStringsError::StringsdictParse(e.to_string())),
152            _ => {}
153        }
154    }
155}
156
157/// Skip events until we find a `<start>` tag with the given name.
158fn skip_to_start_tag(reader: &mut Reader<&[u8]>, tag_name: &[u8]) -> Result<(), XcStringsError> {
159    loop {
160        match reader.read_event() {
161            Ok(Event::Start(ref e)) if e.name().as_ref() == tag_name => return Ok(()),
162            Ok(Event::Eof) => {
163                return Err(XcStringsError::StringsdictParse(format!(
164                    "unexpected EOF waiting for <{}>",
165                    String::from_utf8_lossy(tag_name)
166                )));
167            }
168            Err(e) => return Err(XcStringsError::StringsdictParse(e.to_string())),
169            _ => {}
170        }
171    }
172}
173
174/// Parse a single entry dict. Returns `None` if the entry should be skipped
175/// (e.g., contains only unsupported rule types).
176fn parse_entry(
177    reader: &mut Reader<&[u8]>,
178    key: &str,
179) -> Result<Option<StringsdictEntry>, XcStringsError> {
180    let mut format_key = String::new();
181    let mut variables = IndexMap::new();
182    let mut has_plural_variable = false;
183
184    // Read key/value pairs inside the entry dict
185    loop {
186        match read_next_significant_event(reader)? {
187            SignificantEvent::Key(k) if k == "NSStringLocalizedFormatKey" => {
188                // Next element should be <string>
189                skip_to_start_tag(reader, b"string")?;
190                format_key = read_text_content(reader)?;
191            }
192            SignificantEvent::Key(var_name) => {
193                // Should be a variable dict
194                skip_to_start_tag(reader, b"dict")?;
195                if let Some(var) = parse_variable_dict(reader)? {
196                    has_plural_variable = true;
197                    variables.insert(var_name, var);
198                }
199            }
200            SignificantEvent::EndTag => break, // </dict>
201            SignificantEvent::Eof => {
202                return Err(XcStringsError::StringsdictParse(
203                    "unexpected EOF inside entry dict".into(),
204                ));
205            }
206        }
207    }
208
209    if !has_plural_variable {
210        // Entry has no plural variables (all were unsupported types) — skip
211        return Ok(None);
212    }
213
214    if format_key.is_empty() {
215        return Err(XcStringsError::StringsdictParse(format!(
216            "entry '{key}' missing NSStringLocalizedFormatKey"
217        )));
218    }
219
220    Ok(Some(StringsdictEntry {
221        key: key.to_owned(),
222        format_key,
223        variables,
224    }))
225}
226
227/// Parse a variable dict (containing NSStringFormatSpecTypeKey, NSStringFormatValueTypeKey,
228/// and plural forms). Returns `None` for unsupported rule types.
229fn parse_variable_dict(
230    reader: &mut Reader<&[u8]>,
231) -> Result<Option<PluralVariable>, XcStringsError> {
232    let mut spec_type = String::new();
233    let mut format_specifier = String::new();
234    let mut forms = BTreeMap::new();
235
236    loop {
237        match read_next_significant_event(reader)? {
238            SignificantEvent::Key(k) => {
239                // All values here are <string> elements
240                skip_to_start_tag(reader, b"string")?;
241                let value = read_text_content(reader)?;
242
243                match k.as_str() {
244                    "NSStringFormatSpecTypeKey" => spec_type = value,
245                    "NSStringFormatValueTypeKey" => format_specifier = value,
246                    _ if PLURAL_FORMS.contains(&k.as_str()) => {
247                        forms.insert(k, value);
248                    }
249                    _ => {} // ignore unknown keys
250                }
251            }
252            SignificantEvent::EndTag => break, // </dict>
253            SignificantEvent::Eof => {
254                return Err(XcStringsError::StringsdictParse(
255                    "unexpected EOF inside variable dict".into(),
256                ));
257            }
258        }
259    }
260
261    if spec_type != "NSStringPluralRuleType" {
262        return Ok(None);
263    }
264
265    if format_specifier.is_empty() {
266        return Err(XcStringsError::StringsdictParse(
267            "plural variable missing NSStringFormatValueTypeKey".into(),
268        ));
269    }
270
271    if !forms.contains_key("other") {
272        return Err(XcStringsError::StringsdictParse(
273            "plural variable missing required 'other' form".into(),
274        ));
275    }
276
277    Ok(Some(PluralVariable {
278        format_specifier,
279        forms,
280    }))
281}
282
283#[cfg(test)]
284mod tests {
285    use super::*;
286
287    fn fixture_content() -> &'static str {
288        include_str!("../../tests/fixtures/en.lproj/Localizable.stringsdict")
289    }
290
291    #[test]
292    fn simple_single_variable_plural() {
293        let parsed = parse_stringsdict(fixture_content()).expect("should parse");
294        let entry = parsed
295            .entries
296            .iter()
297            .find(|e| e.key == "items_count")
298            .unwrap();
299
300        assert_eq!(entry.format_key, "%#@items@");
301        assert_eq!(entry.variables.len(), 1);
302
303        let var = &entry.variables["items"];
304        assert_eq!(var.format_specifier, "lld");
305        assert_eq!(var.forms["one"], "%lld item");
306        assert_eq!(var.forms["other"], "%lld items");
307    }
308
309    #[test]
310    fn multiple_plural_categories() {
311        let parsed = parse_stringsdict(fixture_content()).expect("should parse");
312        let entry = parsed
313            .entries
314            .iter()
315            .find(|e| e.key == "messages_remaining")
316            .unwrap();
317
318        let var = &entry.variables["count"];
319        assert_eq!(var.forms.len(), 3);
320        assert!(var.forms.contains_key("zero"));
321        assert!(var.forms.contains_key("one"));
322        assert!(var.forms.contains_key("other"));
323        assert_eq!(var.forms["zero"], "No messages remaining");
324    }
325
326    #[test]
327    fn multiple_variables_in_one_entry() {
328        let parsed = parse_stringsdict(fixture_content()).expect("should parse");
329        let entry = parsed
330            .entries
331            .iter()
332            .find(|e| e.key == "photos_in_albums")
333            .unwrap();
334
335        assert_eq!(entry.format_key, "%1$#@photos@ in %2$#@albums@");
336        assert_eq!(entry.variables.len(), 2);
337        assert!(entry.variables.contains_key("photos"));
338        assert!(entry.variables.contains_key("albums"));
339
340        assert_eq!(entry.variables["photos"].forms["one"], "%lld photo");
341        assert_eq!(entry.variables["albums"].forms["other"], "%lld albums");
342    }
343
344    #[test]
345    fn missing_other_category_is_error() {
346        let xml = r#"<?xml version="1.0" encoding="UTF-8"?>
347<plist version="1.0">
348<dict>
349    <key>bad_entry</key>
350    <dict>
351        <key>NSStringLocalizedFormatKey</key>
352        <string>%#@count@</string>
353        <key>count</key>
354        <dict>
355            <key>NSStringFormatSpecTypeKey</key>
356            <string>NSStringPluralRuleType</string>
357            <key>NSStringFormatValueTypeKey</key>
358            <string>d</string>
359            <key>one</key>
360            <string>one thing</string>
361        </dict>
362    </dict>
363</dict>
364</plist>"#;
365
366        let err = parse_stringsdict(xml).unwrap_err();
367        assert!(err.to_string().contains("other"));
368    }
369
370    #[test]
371    fn unsupported_rule_type_is_skipped() {
372        let xml = r#"<?xml version="1.0" encoding="UTF-8"?>
373<plist version="1.0">
374<dict>
375    <key>device_entry</key>
376    <dict>
377        <key>NSStringLocalizedFormatKey</key>
378        <string>%#@device@</string>
379        <key>device</key>
380        <dict>
381            <key>NSStringFormatSpecTypeKey</key>
382            <string>NSStringDeviceSpecificRuleType</string>
383            <key>iphone</key>
384            <string>iPhone text</string>
385            <key>ipad</key>
386            <string>iPad text</string>
387        </dict>
388    </dict>
389</dict>
390</plist>"#;
391
392        let parsed = parse_stringsdict(xml).expect("should parse without error");
393        assert!(
394            parsed.entries.is_empty(),
395            "device-specific entries should be skipped"
396        );
397        assert_eq!(
398            parsed.skipped_keys,
399            vec!["device_entry"],
400            "skipped key should be reported"
401        );
402    }
403
404    #[test]
405    fn empty_stringsdict() {
406        let xml = r#"<?xml version="1.0" encoding="UTF-8"?>
407<plist version="1.0">
408<dict>
409</dict>
410</plist>"#;
411
412        let parsed = parse_stringsdict(xml).expect("should parse");
413        assert!(parsed.entries.is_empty());
414    }
415
416    #[test]
417    fn invalid_xml_is_error() {
418        let result = parse_stringsdict("this is not xml at all < >");
419        assert!(result.is_err());
420    }
421
422    #[test]
423    fn format_specifier_preservation() {
424        let xml = r#"<?xml version="1.0" encoding="UTF-8"?>
425<plist version="1.0">
426<dict>
427    <key>test_specifiers</key>
428    <dict>
429        <key>NSStringLocalizedFormatKey</key>
430        <string>%#@count@</string>
431        <key>count</key>
432        <dict>
433            <key>NSStringFormatSpecTypeKey</key>
434            <string>NSStringPluralRuleType</string>
435            <key>NSStringFormatValueTypeKey</key>
436            <string>@</string>
437            <key>other</key>
438            <string>%@ things</string>
439        </dict>
440    </dict>
441</dict>
442</plist>"#;
443
444        let parsed = parse_stringsdict(xml).expect("should parse");
445        assert_eq!(parsed.entries[0].variables["count"].format_specifier, "@");
446    }
447
448    #[test]
449    fn empty_format_value_type_key_is_error() {
450        let xml = r#"<?xml version="1.0" encoding="UTF-8"?>
451<plist version="1.0">
452<dict>
453    <key>test</key>
454    <dict>
455        <key>NSStringLocalizedFormatKey</key>
456        <string>%#@count@</string>
457        <key>count</key>
458        <dict>
459            <key>NSStringFormatSpecTypeKey</key>
460            <string>NSStringPluralRuleType</string>
461            <key>NSStringFormatValueTypeKey</key>
462            <string></string>
463            <key>other</key>
464            <string>%d things</string>
465        </dict>
466    </dict>
467</dict>
468</plist>"#;
469
470        let err = parse_stringsdict(xml).unwrap_err();
471        assert!(
472            err.to_string().contains("NSStringFormatValueTypeKey"),
473            "error should mention missing format value type key: {err}"
474        );
475    }
476
477    #[test]
478    fn cdata_in_text_content() {
479        let xml = r#"<?xml version="1.0" encoding="UTF-8"?>
480<plist version="1.0">
481<dict>
482    <key>cdata_test</key>
483    <dict>
484        <key>NSStringLocalizedFormatKey</key>
485        <string><![CDATA[%#@count@]]></string>
486        <key>count</key>
487        <dict>
488            <key>NSStringFormatSpecTypeKey</key>
489            <string>NSStringPluralRuleType</string>
490            <key>NSStringFormatValueTypeKey</key>
491            <string>d</string>
492            <key>other</key>
493            <string><![CDATA[%d items & more]]></string>
494        </dict>
495    </dict>
496</dict>
497</plist>"#;
498
499        let parsed = parse_stringsdict(xml).expect("should parse CDATA");
500        assert_eq!(parsed.entries[0].format_key, "%#@count@");
501        assert_eq!(
502            parsed.entries[0].variables["count"].forms["other"],
503            "%d items & more"
504        );
505    }
506}