xcstrings_mcp/service/
stringsdict_parser.rs1use std::collections::BTreeMap;
2
3use indexmap::IndexMap;
4use quick_xml::Reader;
5use quick_xml::escape::resolve_xml_entity;
6use quick_xml::events::Event;
7
8use crate::error::XcStringsError;
9
10#[derive(Debug)]
12pub struct StringsdictEntry {
13 pub key: String,
14 pub format_key: String,
16 pub variables: IndexMap<String, PluralVariable>,
18}
19
20#[derive(Debug)]
22pub struct PluralVariable {
23 pub format_specifier: String,
25 pub forms: BTreeMap<String, String>,
27}
28
29#[derive(Debug)]
31pub struct ParsedStringsdict {
32 pub entries: Vec<StringsdictEntry>,
33 pub skipped_keys: Vec<String>,
36}
37
38const PLURAL_FORMS: &[&str] = &["zero", "one", "two", "few", "many", "other"];
39
40pub fn parse_stringsdict(content: &str) -> Result<ParsedStringsdict, XcStringsError> {
45 let mut reader = Reader::from_str(content);
46
47 let mut found_plist = false;
49 loop {
50 match reader.read_event() {
51 Ok(Event::Start(ref e)) => {
52 if e.name().as_ref() == b"plist" {
53 found_plist = true;
54 } else if found_plist && e.name().as_ref() == b"dict" {
55 break;
56 }
57 }
58 Ok(Event::Eof) => {
59 return Err(XcStringsError::StringsdictParse(
60 "unexpected EOF before root <dict>".into(),
61 ));
62 }
63 Err(e) => return Err(XcStringsError::StringsdictParse(e.to_string())),
64 _ => {}
65 }
66 }
67
68 let mut entries = Vec::new();
70 let mut skipped_keys = Vec::new();
71 loop {
72 match read_next_significant_event(&mut reader)? {
73 SignificantEvent::Key(entry_key) => {
74 skip_to_start_tag(&mut reader, b"dict")?;
76 if let Some(entry) = parse_entry(&mut reader, &entry_key)? {
77 entries.push(entry);
78 } else {
79 skipped_keys.push(entry_key);
80 }
81 }
82 SignificantEvent::EndTag => break, SignificantEvent::Eof => break,
84 }
85 }
86
87 Ok(ParsedStringsdict {
88 entries,
89 skipped_keys,
90 })
91}
92
93enum SignificantEvent {
95 Key(String),
96 EndTag,
97 Eof,
98}
99
100fn read_next_significant_event(
101 reader: &mut Reader<&[u8]>,
102) -> Result<SignificantEvent, XcStringsError> {
103 loop {
104 match reader.read_event() {
105 Ok(Event::Start(ref e)) if e.name().as_ref() == b"key" => {
106 let text = read_text_content(reader)?;
107 return Ok(SignificantEvent::Key(text));
108 }
109 Ok(Event::End(_)) => return Ok(SignificantEvent::EndTag),
110 Ok(Event::Eof) => return Ok(SignificantEvent::Eof),
111 Err(e) => return Err(XcStringsError::StringsdictParse(e.to_string())),
112 _ => {}
113 }
114 }
115}
116
117fn read_text_content(reader: &mut Reader<&[u8]>) -> Result<String, XcStringsError> {
119 let mut text = String::new();
120 loop {
121 match reader.read_event() {
122 Ok(Event::Text(ref e)) => {
123 let decoded = e
124 .decode()
125 .map_err(|err| XcStringsError::StringsdictParse(err.to_string()))?;
126 text.push_str(&decoded);
127 }
128 Ok(Event::GeneralRef(ref e)) => {
129 let name = e
130 .decode()
131 .map_err(|err| XcStringsError::StringsdictParse(err.to_string()))?;
132 if let Some(resolved) = resolve_xml_entity(&name) {
133 text.push_str(resolved);
134 } else if let Ok(Some(ch)) = e.resolve_char_ref() {
135 text.push(ch);
136 } else {
137 return Err(XcStringsError::StringsdictParse(format!(
138 "unknown XML entity: &{name};"
139 )));
140 }
141 }
142 Ok(Event::CData(ref e)) => {
143 text.push_str(&String::from_utf8_lossy(e.as_ref()));
144 }
145 Ok(Event::End(_)) => return Ok(text),
146 Ok(Event::Eof) => {
147 return Err(XcStringsError::StringsdictParse(
148 "unexpected EOF in text content".into(),
149 ));
150 }
151 Err(e) => return Err(XcStringsError::StringsdictParse(e.to_string())),
152 _ => {}
153 }
154 }
155}
156
157fn skip_to_start_tag(reader: &mut Reader<&[u8]>, tag_name: &[u8]) -> Result<(), XcStringsError> {
159 loop {
160 match reader.read_event() {
161 Ok(Event::Start(ref e)) if e.name().as_ref() == tag_name => return Ok(()),
162 Ok(Event::Eof) => {
163 return Err(XcStringsError::StringsdictParse(format!(
164 "unexpected EOF waiting for <{}>",
165 String::from_utf8_lossy(tag_name)
166 )));
167 }
168 Err(e) => return Err(XcStringsError::StringsdictParse(e.to_string())),
169 _ => {}
170 }
171 }
172}
173
174fn parse_entry(
177 reader: &mut Reader<&[u8]>,
178 key: &str,
179) -> Result<Option<StringsdictEntry>, XcStringsError> {
180 let mut format_key = String::new();
181 let mut variables = IndexMap::new();
182 let mut has_plural_variable = false;
183
184 loop {
186 match read_next_significant_event(reader)? {
187 SignificantEvent::Key(k) if k == "NSStringLocalizedFormatKey" => {
188 skip_to_start_tag(reader, b"string")?;
190 format_key = read_text_content(reader)?;
191 }
192 SignificantEvent::Key(var_name) => {
193 skip_to_start_tag(reader, b"dict")?;
195 if let Some(var) = parse_variable_dict(reader)? {
196 has_plural_variable = true;
197 variables.insert(var_name, var);
198 }
199 }
200 SignificantEvent::EndTag => break, SignificantEvent::Eof => {
202 return Err(XcStringsError::StringsdictParse(
203 "unexpected EOF inside entry dict".into(),
204 ));
205 }
206 }
207 }
208
209 if !has_plural_variable {
210 return Ok(None);
212 }
213
214 if format_key.is_empty() {
215 return Err(XcStringsError::StringsdictParse(format!(
216 "entry '{key}' missing NSStringLocalizedFormatKey"
217 )));
218 }
219
220 Ok(Some(StringsdictEntry {
221 key: key.to_owned(),
222 format_key,
223 variables,
224 }))
225}
226
227fn parse_variable_dict(
230 reader: &mut Reader<&[u8]>,
231) -> Result<Option<PluralVariable>, XcStringsError> {
232 let mut spec_type = String::new();
233 let mut format_specifier = String::new();
234 let mut forms = BTreeMap::new();
235
236 loop {
237 match read_next_significant_event(reader)? {
238 SignificantEvent::Key(k) => {
239 skip_to_start_tag(reader, b"string")?;
241 let value = read_text_content(reader)?;
242
243 match k.as_str() {
244 "NSStringFormatSpecTypeKey" => spec_type = value,
245 "NSStringFormatValueTypeKey" => format_specifier = value,
246 _ if PLURAL_FORMS.contains(&k.as_str()) => {
247 forms.insert(k, value);
248 }
249 _ => {} }
251 }
252 SignificantEvent::EndTag => break, SignificantEvent::Eof => {
254 return Err(XcStringsError::StringsdictParse(
255 "unexpected EOF inside variable dict".into(),
256 ));
257 }
258 }
259 }
260
261 if spec_type != "NSStringPluralRuleType" {
262 return Ok(None);
263 }
264
265 if format_specifier.is_empty() {
266 return Err(XcStringsError::StringsdictParse(
267 "plural variable missing NSStringFormatValueTypeKey".into(),
268 ));
269 }
270
271 if !forms.contains_key("other") {
272 return Err(XcStringsError::StringsdictParse(
273 "plural variable missing required 'other' form".into(),
274 ));
275 }
276
277 Ok(Some(PluralVariable {
278 format_specifier,
279 forms,
280 }))
281}
282
283#[cfg(test)]
284mod tests {
285 use super::*;
286
287 fn fixture_content() -> &'static str {
288 include_str!("../../tests/fixtures/en.lproj/Localizable.stringsdict")
289 }
290
291 #[test]
292 fn simple_single_variable_plural() {
293 let parsed = parse_stringsdict(fixture_content()).expect("should parse");
294 let entry = parsed
295 .entries
296 .iter()
297 .find(|e| e.key == "items_count")
298 .unwrap();
299
300 assert_eq!(entry.format_key, "%#@items@");
301 assert_eq!(entry.variables.len(), 1);
302
303 let var = &entry.variables["items"];
304 assert_eq!(var.format_specifier, "lld");
305 assert_eq!(var.forms["one"], "%lld item");
306 assert_eq!(var.forms["other"], "%lld items");
307 }
308
309 #[test]
310 fn multiple_plural_categories() {
311 let parsed = parse_stringsdict(fixture_content()).expect("should parse");
312 let entry = parsed
313 .entries
314 .iter()
315 .find(|e| e.key == "messages_remaining")
316 .unwrap();
317
318 let var = &entry.variables["count"];
319 assert_eq!(var.forms.len(), 3);
320 assert!(var.forms.contains_key("zero"));
321 assert!(var.forms.contains_key("one"));
322 assert!(var.forms.contains_key("other"));
323 assert_eq!(var.forms["zero"], "No messages remaining");
324 }
325
326 #[test]
327 fn multiple_variables_in_one_entry() {
328 let parsed = parse_stringsdict(fixture_content()).expect("should parse");
329 let entry = parsed
330 .entries
331 .iter()
332 .find(|e| e.key == "photos_in_albums")
333 .unwrap();
334
335 assert_eq!(entry.format_key, "%1$#@photos@ in %2$#@albums@");
336 assert_eq!(entry.variables.len(), 2);
337 assert!(entry.variables.contains_key("photos"));
338 assert!(entry.variables.contains_key("albums"));
339
340 assert_eq!(entry.variables["photos"].forms["one"], "%lld photo");
341 assert_eq!(entry.variables["albums"].forms["other"], "%lld albums");
342 }
343
344 #[test]
345 fn missing_other_category_is_error() {
346 let xml = r#"<?xml version="1.0" encoding="UTF-8"?>
347<plist version="1.0">
348<dict>
349 <key>bad_entry</key>
350 <dict>
351 <key>NSStringLocalizedFormatKey</key>
352 <string>%#@count@</string>
353 <key>count</key>
354 <dict>
355 <key>NSStringFormatSpecTypeKey</key>
356 <string>NSStringPluralRuleType</string>
357 <key>NSStringFormatValueTypeKey</key>
358 <string>d</string>
359 <key>one</key>
360 <string>one thing</string>
361 </dict>
362 </dict>
363</dict>
364</plist>"#;
365
366 let err = parse_stringsdict(xml).unwrap_err();
367 assert!(err.to_string().contains("other"));
368 }
369
370 #[test]
371 fn unsupported_rule_type_is_skipped() {
372 let xml = r#"<?xml version="1.0" encoding="UTF-8"?>
373<plist version="1.0">
374<dict>
375 <key>device_entry</key>
376 <dict>
377 <key>NSStringLocalizedFormatKey</key>
378 <string>%#@device@</string>
379 <key>device</key>
380 <dict>
381 <key>NSStringFormatSpecTypeKey</key>
382 <string>NSStringDeviceSpecificRuleType</string>
383 <key>iphone</key>
384 <string>iPhone text</string>
385 <key>ipad</key>
386 <string>iPad text</string>
387 </dict>
388 </dict>
389</dict>
390</plist>"#;
391
392 let parsed = parse_stringsdict(xml).expect("should parse without error");
393 assert!(
394 parsed.entries.is_empty(),
395 "device-specific entries should be skipped"
396 );
397 assert_eq!(
398 parsed.skipped_keys,
399 vec!["device_entry"],
400 "skipped key should be reported"
401 );
402 }
403
404 #[test]
405 fn empty_stringsdict() {
406 let xml = r#"<?xml version="1.0" encoding="UTF-8"?>
407<plist version="1.0">
408<dict>
409</dict>
410</plist>"#;
411
412 let parsed = parse_stringsdict(xml).expect("should parse");
413 assert!(parsed.entries.is_empty());
414 }
415
416 #[test]
417 fn invalid_xml_is_error() {
418 let result = parse_stringsdict("this is not xml at all < >");
419 assert!(result.is_err());
420 }
421
422 #[test]
423 fn format_specifier_preservation() {
424 let xml = r#"<?xml version="1.0" encoding="UTF-8"?>
425<plist version="1.0">
426<dict>
427 <key>test_specifiers</key>
428 <dict>
429 <key>NSStringLocalizedFormatKey</key>
430 <string>%#@count@</string>
431 <key>count</key>
432 <dict>
433 <key>NSStringFormatSpecTypeKey</key>
434 <string>NSStringPluralRuleType</string>
435 <key>NSStringFormatValueTypeKey</key>
436 <string>@</string>
437 <key>other</key>
438 <string>%@ things</string>
439 </dict>
440 </dict>
441</dict>
442</plist>"#;
443
444 let parsed = parse_stringsdict(xml).expect("should parse");
445 assert_eq!(parsed.entries[0].variables["count"].format_specifier, "@");
446 }
447
448 #[test]
449 fn empty_format_value_type_key_is_error() {
450 let xml = r#"<?xml version="1.0" encoding="UTF-8"?>
451<plist version="1.0">
452<dict>
453 <key>test</key>
454 <dict>
455 <key>NSStringLocalizedFormatKey</key>
456 <string>%#@count@</string>
457 <key>count</key>
458 <dict>
459 <key>NSStringFormatSpecTypeKey</key>
460 <string>NSStringPluralRuleType</string>
461 <key>NSStringFormatValueTypeKey</key>
462 <string></string>
463 <key>other</key>
464 <string>%d things</string>
465 </dict>
466 </dict>
467</dict>
468</plist>"#;
469
470 let err = parse_stringsdict(xml).unwrap_err();
471 assert!(
472 err.to_string().contains("NSStringFormatValueTypeKey"),
473 "error should mention missing format value type key: {err}"
474 );
475 }
476
477 #[test]
478 fn cdata_in_text_content() {
479 let xml = r#"<?xml version="1.0" encoding="UTF-8"?>
480<plist version="1.0">
481<dict>
482 <key>cdata_test</key>
483 <dict>
484 <key>NSStringLocalizedFormatKey</key>
485 <string><![CDATA[%#@count@]]></string>
486 <key>count</key>
487 <dict>
488 <key>NSStringFormatSpecTypeKey</key>
489 <string>NSStringPluralRuleType</string>
490 <key>NSStringFormatValueTypeKey</key>
491 <string>d</string>
492 <key>other</key>
493 <string><![CDATA[%d items & more]]></string>
494 </dict>
495 </dict>
496</dict>
497</plist>"#;
498
499 let parsed = parse_stringsdict(xml).expect("should parse CDATA");
500 assert_eq!(parsed.entries[0].format_key, "%#@count@");
501 assert_eq!(
502 parsed.entries[0].variables["count"].forms["other"],
503 "%d items & more"
504 );
505 }
506}