1use std::collections::HashMap;
6use std::fs::File;
7use std::path::Path;
9
10use indoc::indoc;
11
12use crate::{
13 error::Error,
14 traits::Parser,
15 types::{Entry, EntryStatus, Metadata, Resource, Translation},
16};
17
18#[derive(Debug, Clone, PartialEq, Eq)]
22pub struct Format {
23 pub language: String,
25 pub pairs: Vec<Pair>,
27}
28
29impl Parser for Format {
30 fn from_reader<R: std::io::BufRead>(reader: R) -> Result<Self, Error> {
35 let mut reader = reader;
37 let mut bytes = Vec::new();
38 std::io::Read::read_to_end(&mut reader, &mut bytes).map_err(Error::Io)?;
39 let content = String::from_utf8(bytes)
40 .map_err(|_| Error::InvalidResource("Invalid UTF-8 in .strings file".to_string()))?;
41
42 let header_language = extract_header_language(&content).unwrap_or_default();
44 let (pairs, _warnings) = parse_strings_content(&content);
45 Ok(Format {
46 language: header_language,
47 pairs,
48 })
49 }
50
51 fn to_writer<W: std::io::Write>(&self, mut writer: W) -> Result<(), Error> {
52 let mut content = String::new();
53
54 let header = format!(
55 indoc! {"
56 // This file is automatically generated by langcodec.
57 // Do not edit it manually, as your changes will be overwritten.
58 // Here's the basic information about the file which could be useful
59 // for translators, and langcodec would use it to generate the
60 // appropriate metadata for the resource.
61 //
62 //: Language: {}
63 //
64
65 "},
66 self.language
67 );
68
69 content.push_str(&header);
70
71 for pair in &self.pairs {
72 if let Some(comment) = &pair.comment {
73 let trimmed = comment.trim_end_matches(['\n', '\r']);
74 content.push_str(trimmed);
75 content.push('\n');
76 }
77
78 let key = escape_strings_token(&pair.key);
79 let value = escape_strings_token(&pair.value);
80 content.push_str(&format!("\"{}\" = \"{}\";\n", key, value));
81 }
82
83 writer.write_all(content.as_bytes()).map_err(Error::Io)
84 }
85
86 fn read_from<P: AsRef<Path>>(path: P) -> Result<Self, Error>
88 where
89 Self: Sized,
90 {
91 let file = File::open(path).map_err(Error::Io)?;
92 let mut decoder = encoding_rs_io::DecodeReaderBytesBuilder::new()
94 .bom_override(true)
95 .build(file);
96
97 let mut decoded_bytes = Vec::new();
98 std::io::Read::read_to_end(&mut decoder, &mut decoded_bytes).map_err(Error::Io)?;
99 let decoded = String::from_utf8(decoded_bytes)
100 .map_err(|_| Error::InvalidResource("Invalid UTF-8 in .strings file".to_string()))?;
101 Self::from_str(&decoded)
102 }
103}
104
105impl From<Format> for Resource {
106 fn from(value: Format) -> Self {
107 Resource {
108 metadata: Metadata {
109 language: value.language,
110 domain: String::from(""),
111 custom: HashMap::new(),
112 },
113 entries: value.pairs.into_iter().map(Pair::into_entry).collect(),
114 }
115 }
116}
117
118impl TryFrom<Resource> for Format {
119 type Error = Error;
120
121 fn try_from(value: Resource) -> Result<Self, Self::Error> {
122 let Resource { metadata, entries } = value;
123 let language = metadata.language;
124 let pairs = entries
125 .into_iter()
126 .map(Pair::try_from)
127 .collect::<Result<Vec<_>, _>>()?;
128 Ok(Format { language, pairs })
129 }
130}
131
132#[derive(Debug, Clone, PartialEq, Eq)]
134pub struct Pair {
135 pub key: String,
137 pub value: String,
139 pub comment: Option<String>,
148}
149
150impl Pair {
151 fn into_entry(self) -> Entry {
152 let Pair {
153 key,
154 value,
155 comment,
156 } = self;
157
158 let is_pair_value_empty = value.is_empty();
159
160 Entry {
161 id: key,
162 value: Translation::Singular(value),
163 comment,
164 status: if is_pair_value_empty {
165 EntryStatus::New
166 } else {
167 EntryStatus::Translated
168 },
169 custom: HashMap::new(),
170 }
171 }
172}
173
174fn parse_strings_content(content: &str) -> (Vec<Pair>, Vec<String>) {
179 let bytes = content.as_bytes();
180 let mut i = 0usize;
181 let len = bytes.len();
182 let mut pairs: Vec<Pair> = Vec::new();
183 let warnings: Vec<String> = Vec::new();
184 let mut pending_comment: Option<String> = None;
185 let mut have_seen_pair = false;
186
187 while i < len {
188 let (ni, _saw_newline) = skip_whitespace(bytes, i);
189 i = ni;
190 if i >= len {
191 break;
192 }
193
194 if !have_seen_pair && let Some(next_i) = try_skip_langcodec_header(bytes, i) {
196 i = next_i;
197 pending_comment = None;
198 continue;
199 }
200
201 if starts_with(bytes, i, b"//") {
203 let (nj, comment) = parse_line_comment(bytes, i);
204 pending_comment = Some(comment);
205 i = nj;
206 continue;
207 }
208 if starts_with(bytes, i, b"/*") {
209 let (nj, comment) = parse_block_comment(bytes, i);
210 pending_comment = Some(comment);
211 i = nj;
212 continue;
213 }
214
215 if let Some((j, key)) = parse_quoted_utf8(content, bytes, i) {
217 i = j;
218 let (ni2, _) = skip_inline_ws(bytes, i);
219 i = ni2;
220 if i < len && bytes[i] == b'=' {
221 i += 1; let (ni3, _) = skip_inline_ws(bytes, i);
223 i = ni3;
224 if let Some((jv, value_raw)) = parse_quoted_utf8(content, bytes, i) {
225 i = jv;
226 let (ni4, _) = skip_inline_ws(bytes, i);
228 i = ni4;
229 if i < len && bytes[i] == b';' {
231 i += 1; } else {
233 while i < len && bytes[i] != b';' && bytes[i] != b'\n' {
235 i += 1;
236 }
237 if i < len && bytes[i] == b';' {
238 i += 1;
239 }
240 }
241
242 let value = normalize_value_newlines(&value_raw);
243 let pair = Pair {
244 key,
245 value,
246 comment: pending_comment.take(),
247 };
248 pairs.push(pair);
249 have_seen_pair = true;
250 continue;
251 }
252 }
253 }
254
255 while i < len && bytes[i] != b'\n' {
257 i += 1;
258 }
259 }
261
262 (pairs, warnings)
263}
264
265fn starts_with(hay: &[u8], i: usize, needle: &[u8]) -> bool {
266 hay.len() >= i + needle.len() && &hay[i..i + needle.len()] == needle
267}
268
269fn skip_whitespace(bytes: &[u8], mut i: usize) -> (usize, bool) {
270 let mut saw_newline = false;
271 while i < bytes.len() {
272 match bytes[i] {
273 b' ' | b'\t' | 0x0C | 0x0D => i += 1, b'\n' => {
275 saw_newline = true;
276 i += 1;
277 }
278 _ => break,
279 }
280 }
281 (i, saw_newline)
282}
283
284fn skip_inline_ws(bytes: &[u8], mut i: usize) -> (usize, bool) {
285 let mut saw_newline = false;
286 while i < bytes.len() {
287 match bytes[i] {
288 b' ' | b'\t' | 0x0C | 0x0D => i += 1,
289 b'\n' => {
290 saw_newline = true;
291 i += 1;
292 }
293 _ => break,
294 }
295 }
296 (i, saw_newline)
297}
298
299fn parse_line_comment(bytes: &[u8], i: usize) -> (usize, String) {
300 let mut j = i;
301 while j < bytes.len() && bytes[j] != b'\n' {
302 j += 1;
303 }
304 let comment = String::from_utf8_lossy(&bytes[i..j]).to_string();
305 (j, comment)
306}
307
308fn parse_block_comment(bytes: &[u8], i: usize) -> (usize, String) {
309 let mut j = i + 2; while j + 1 < bytes.len() {
311 if bytes[j] == b'*' && bytes[j + 1] == b'/' {
312 j += 2;
313 break;
314 }
315 j += 1;
316 }
317 let comment = String::from_utf8_lossy(&bytes[i..j.min(bytes.len())]).to_string();
318 (j, comment)
319}
320
321fn try_skip_langcodec_header(bytes: &[u8], mut i: usize) -> Option<usize> {
324 let start = i;
325 let mut saw_header_marker = false;
326 while i < bytes.len() {
329 let (ni, _nl) = skip_whitespace(bytes, i);
331 i = ni;
332 if i >= bytes.len() {
333 break;
334 }
335 if starts_with(bytes, i, b"//:") || starts_with(bytes, i, b"//") {
336 if starts_with(bytes, i, b"//:") {
337 saw_header_marker = true;
338 }
339 while i < bytes.len() && bytes[i] != b'\n' {
341 i += 1;
342 }
343 continue;
344 }
345 break;
346 }
347 if saw_header_marker && i > start {
348 Some(i)
349 } else {
350 None
351 }
352}
353
354fn extract_header_language(content: &str) -> Option<String> {
355 for line in content.lines().take(50) {
357 let trimmed = line.trim_start();
358 if let Some(rest) = trimmed
360 .strip_prefix("//:")
361 .or_else(|| trimmed.strip_prefix("// :"))
362 {
363 let rest = rest.trim_start();
364 if let Some(lang_part) = rest.strip_prefix("Language:") {
365 let lang = lang_part.trim();
366 if !lang.is_empty() {
367 return Some(lang.to_string());
368 }
369 }
370 }
371 }
372 None
373}
374
375fn parse_quoted_utf8(source: &str, bytes: &[u8], i: usize) -> Option<(usize, String)> {
379 if i >= bytes.len() || bytes[i] != b'"' {
380 return None;
381 }
382 let start = i + 1; let mut j = start;
384 let mut consecutive_backslashes = 0usize;
385 while j < bytes.len() {
386 let b = bytes[j];
387 if b == b'\\' {
388 consecutive_backslashes += 1;
389 j += 1;
390 continue;
391 }
392 if b == b'"' {
393 if consecutive_backslashes % 2 == 0 {
395 let end = j;
396 let s = &source[start..end];
397 return Some((j + 1, s.to_string()));
398 }
399 }
401 consecutive_backslashes = 0;
403 j += 1;
404 }
405 None
406}
407
408fn normalize_value_newlines(raw: &str) -> String {
409 if !raw.contains('\n') {
410 return raw.to_string();
411 }
412 let mut out = String::new();
413 for (idx, line) in raw.split('\n').enumerate() {
414 if idx > 0 {
415 out.push_str(r"\n");
416 }
417 let segment = line.replace('\t', "\\t");
419 out.push_str(&segment);
420 }
421 out
422}
423
424fn escape_strings_token(s: &str) -> String {
425 let mut out = String::new();
427 let chars: Vec<char> = s.chars().collect();
428 let mut i = 0usize;
429 while i < chars.len() {
430 let ch = chars[i];
431 match ch {
432 '"' => {
433 out.push('\\');
434 out.push('"');
435 i += 1;
436 }
437 '\n' => {
438 out.push('\\');
439 out.push('n');
440 i += 1;
441 }
442 '\\' => {
443 let mut j = i;
445 while j < chars.len() && chars[j] == '\\' {
446 j += 1;
447 }
448 let next_char = if j < chars.len() {
449 Some(chars[j])
450 } else {
451 None
452 };
453
454 match next_char {
455 Some('\'') => {
456 for _ in i..j {
458 out.push('\\');
459 }
460 out.push('\'');
461 i = j + 1;
462 }
463 Some('n') | Some('t') | Some('r') | Some('"') | Some('\\') => {
464 for _ in i..j {
466 out.push('\\');
467 }
468 out.push(next_char.unwrap());
469 i = j + 1;
470 }
471 Some(other) => {
472 for _ in i..j {
474 out.push('\\');
475 out.push('\\');
476 }
477 out.push(other);
478 i = j + 1;
479 }
480 None => {
481 for _ in i..j {
483 out.push('\\');
484 out.push('\\');
485 }
486 i = j;
487 }
488 }
489 }
490 _ => {
491 out.push(ch);
492 i += 1;
493 }
494 }
495 }
496 out
497}
498
499impl TryFrom<Entry> for Pair {
500 type Error = Error;
501
502 fn try_from(entry: Entry) -> Result<Self, Self::Error> {
503 match entry.value {
505 Translation::Singular(value) => Ok(Pair {
506 key: entry.id,
507 value: crate::placeholder::to_ios_placeholders(&value),
508 comment: entry.comment,
509 }),
510 Translation::Plural(_) => Err(Error::DataMismatch(
511 "Plural translations are not supported in .strings format".to_string(),
512 )),
513 }
514 }
515}
516
517impl From<Pair> for Entry {
518 fn from(pair: Pair) -> Self {
519 let is_pair_value_empty = pair.value.is_empty();
520 Entry {
521 id: pair.key,
522 value: Translation::Singular(pair.value),
523 comment: pair.comment,
524 status: if is_pair_value_empty {
525 EntryStatus::New
526 } else {
527 EntryStatus::Translated
528 },
529 custom: HashMap::new(),
530 }
531 }
532}
533
534impl Pair {
535 pub fn formatted_comment(&self) -> String {
537 if let Some(comment) = &self.comment {
538 if comment.starts_with("/*") && comment.ends_with("*/") {
539 comment[2..comment.len() - 2].trim().to_string()
540 } else if let Some(comment) = comment.strip_prefix("//") {
541 comment.trim().to_string()
542 } else {
543 comment.trim().to_string()
544 }
545 } else {
546 String::new()
547 }
548 }
549}
550
551#[cfg(test)]
552mod tests {
553 use super::*;
554 use crate::traits::Parser;
555
556 #[test]
557 fn test_parse_basic_strings_with_comment() {
558 let content = r#"
559 /* Greeting for the user */
560 "hello" = "Hello, world!";
561 "#;
562 let parsed = Format::from_str(content).unwrap();
563 assert_eq!(parsed.pairs.len(), 1);
564 let pair = &parsed.pairs[0];
565 assert_eq!(pair.key, "hello");
566 assert_eq!(pair.value, "Hello, world!");
567 assert!(
568 pair.comment
569 .as_ref()
570 .unwrap()
571 .contains("Greeting for the user")
572 );
573 }
574
575 #[test]
576 fn test_round_trip_serialization() {
577 let content = r#"
578 /* Farewell */
579 "bye" = "Goodbye!";
580 "#;
581 let parsed = Format::from_str(content).unwrap();
582 let mut output = Vec::new();
583 parsed.to_writer(&mut output).unwrap();
584 let output_str = String::from_utf8(output).unwrap();
585 let reparsed = Format::from_str(&output_str).unwrap();
587 assert_eq!(parsed.pairs.len(), reparsed.pairs.len());
588 for (orig, new) in parsed.pairs.iter().zip(reparsed.pairs.iter()) {
589 assert_eq!(orig.key, new.key);
590 assert_eq!(orig.value, new.value);
591 }
592 }
593
594 #[test]
595 fn test_strings_writer_escapes_quotes_backslashes_and_newlines() {
596 let format = Format {
597 language: String::new(),
598 pairs: vec![Pair {
599 key: "greet\"key\\with\nline".to_string(),
600 value: "He said: \"hi\"\\and newline\n".to_string(),
601 comment: None,
602 }],
603 };
604 let mut out = Vec::new();
605 format.to_writer(&mut out).unwrap();
606 let out_str = String::from_utf8(out).unwrap();
607 assert!(out_str.contains("\"greet\\\"key\\\\with\\nline\""));
609 assert!(out_str.contains("\"He said: \\\"hi\\\"\\\\and newline\\n\""));
610 }
611
612 #[test]
613 fn test_unescape_minimal_apostrophe_and_backslash() {
614 let content = r#"
615 "key1" = "Can\'t accept";
616 "key2" = "Can\\'t accept";
617 "#;
618 let parsed = Format::from_str(content).unwrap();
619 assert_eq!(parsed.pairs.len(), 2);
620 assert_eq!(parsed.pairs[0].value, r#"Can\'t accept"#);
621 assert_eq!(parsed.pairs[1].value, r#"Can\\'t accept"#);
622
623 let mut out = Vec::new();
625 parsed.to_writer(&mut out).unwrap();
626 let out_str = String::from_utf8(out).unwrap();
627 assert!(out_str.contains(r#""key1" = "Can\'t accept";"#));
628 assert!(out_str.contains(r#""key2" = "Can\\'t accept";"#));
629 }
630
631 #[test]
632 fn test_strings_writer_ios_placeholder_conversion() {
633 let resource = Resource {
635 metadata: Metadata {
636 language: "en".to_string(),
637 domain: String::new(),
638 custom: HashMap::new(),
639 },
640 entries: vec![Entry {
641 id: "g".to_string(),
642 value: Translation::Singular("Hi %1$s and %s".to_string()),
643 comment: None,
644 status: EntryStatus::Translated,
645 custom: HashMap::new(),
646 }],
647 };
648 let fmt = Format::try_from(resource).unwrap();
649 assert_eq!(fmt.pairs.len(), 1);
650 assert_eq!(fmt.pairs[0].value, "Hi %1$@ and %@");
651 }
652
653 #[test]
654 fn test_multiline_value_with_embedded_newlines_and_whitespace() {
655 let content = r#"
656 /* Multiline value */
657 "multiline" = "This is line 1.
658 \t\tThis is line 2.
659 This is line 3.";
660 "#;
661 let parsed = Format::from_str(content).unwrap();
662 assert_eq!(parsed.pairs.len(), 1);
663 let pair = &parsed.pairs[0];
664 assert_eq!(pair.key, "multiline");
665 assert_eq!(
667 pair.value,
668 "This is line 1.\\n \\t\\tThis is line 2.\\n This is line 3."
669 );
670 }
671
672 #[test]
673 fn test_multiline_value_with_tabs_and_embedded_newlines() {
674 let content =
675 "\"multiline\" = \"This is line 1.\n\t\tThis is line\n\t\t\t2.This is line\n3.\";";
676 let parsed = Format::from_str(content).unwrap();
677 assert_eq!(parsed.pairs.len(), 1);
678 let pair = &parsed.pairs[0];
679 assert_eq!(pair.key, "multiline");
680 assert_eq!(
681 pair.value,
682 r#"This is line 1.\n\t\tThis is line\n\t\t\t2.This is line\n3."#
683 );
684 assert!(pair.comment.is_none());
685 }
686
687 #[test]
688 fn test_blank_lines_and_ignored_malformed_lines() {
689 let content = r#"
690
691 // Comment
692
693 "good" = "yes";
694 bad line without equals
695 "another" = "ok";
696
697 "#;
698 let parsed = Format::from_str(content).unwrap();
699 assert_eq!(parsed.pairs.len(), 2);
700 assert_eq!(parsed.pairs[0].key, "good");
701 assert_eq!(parsed.pairs[0].value, "yes");
702 assert_eq!(parsed.pairs[1].key, "another");
703 assert_eq!(parsed.pairs[1].value, "ok");
704 }
705
706 #[test]
707 fn test_entry_with_empty_value() {
708 let content = r#"
709 /* Empty value */
710 "empty" = "";
711 "#;
712 let parsed = Format::from_str(content).unwrap();
713 assert_eq!(parsed.pairs.len(), 1);
714 let pair = &parsed.pairs[0];
715 assert_eq!(pair.key, "empty");
716 assert_eq!(pair.value, "");
717 let entry = pair.clone().into_entry();
719 assert_eq!(entry.status, EntryStatus::New);
720 }
721
722 #[test]
723 fn test_preserve_trailing_spaces() {
724 let content = r#"
725 "key1" = "Value with trailing space ";
726 "key2" = "Another value with trailing spaces ";
727 "key3" = "No trailing spaces";
728 "key4" = "过去一天 ";
729 "#;
730 let parsed = Format::from_str(content).unwrap();
731 assert_eq!(parsed.pairs.len(), 4);
732
733 let pair1 = &parsed.pairs[0];
734 let pair2 = &parsed.pairs[1];
735 let pair3 = &parsed.pairs[2];
736 let pair4 = &parsed.pairs[3];
737
738 assert_eq!(pair1.value, "Value with trailing space ");
739 assert_eq!(pair2.value, "Another value with trailing spaces ");
740 assert_eq!(pair3.value, "No trailing spaces");
741 assert_eq!(pair4.value, "过去一天 ");
742 }
743
744 #[test]
745 fn test_comments_attached_to_correct_key_value_pairs() {
746 let content = r#"
747 // Comment for A
748 "A" = "a";
749 // Comment for B
750 "B" = "b";
751 /* Block comment for C */
752 "C" = "c";
753 "#;
754 let parsed = Format::from_str(content).unwrap();
755 assert_eq!(parsed.pairs.len(), 3);
756 let a = &parsed.pairs[0];
757 let b = &parsed.pairs[1];
758 let c = &parsed.pairs[2];
759 assert!(a.comment.as_ref().unwrap().contains("Comment for A"));
760 assert!(b.comment.as_ref().unwrap().contains("Comment for B"));
761 assert!(c.comment.as_ref().unwrap().contains("Block comment for C"));
762 }
763
764 #[test]
765 fn test_parse_strings_with_empty_value() {
766 let content = r#"
767 // String
768
769 "PlayConsumed" = "%.2fMB traffic will be consumed if you play it";
770 "Score" = "%@ reviews";
771 "Wan" = "";//英文逻辑不一样,为空就好
772 "#;
773 let parsed = Format::from_str(content).unwrap();
774 assert_eq!(parsed.pairs.len(), 3);
775 }
776}