use std::collections::HashMap;
use std::fs::File;
use std::path::Path;
use indoc::indoc;
use crate::{
error::Error,
traits::Parser,
types::{Entry, EntryStatus, Metadata, Resource, Translation},
};
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct Format {
pub language: String,
pub pairs: Vec<Pair>,
}
impl Parser for Format {
fn from_reader<R: std::io::BufRead>(reader: R) -> Result<Self, Error> {
let mut reader = reader;
let mut bytes = Vec::new();
std::io::Read::read_to_end(&mut reader, &mut bytes).map_err(Error::Io)?;
let content = String::from_utf8(bytes)
.map_err(|_| Error::InvalidResource("Invalid UTF-8 in .strings file".to_string()))?;
let header_language = extract_header_language(&content).unwrap_or_default();
let (pairs, _warnings) = parse_strings_content(&content);
Ok(Format {
language: header_language,
pairs,
})
}
fn to_writer<W: std::io::Write>(&self, mut writer: W) -> Result<(), Error> {
let mut content = String::new();
let header = format!(
indoc! {"
// This file is automatically generated by langcodec.
// Do not edit it manually, as your changes will be overwritten.
// Here's the basic information about the file which could be useful
// for translators, and langcodec would use it to generate the
// appropriate metadata for the resource.
//
//: Language: {}
//
"},
self.language
);
content.push_str(&header);
for pair in &self.pairs {
if let Some(comment) = &pair.comment {
let trimmed = comment.trim_end_matches(['\n', '\r']);
content.push_str(trimmed);
content.push('\n');
}
let key = escape_strings_token(&pair.key);
let value = escape_strings_token(&pair.value);
content.push_str(&format!("\"{}\" = \"{}\";\n", key, value));
}
writer.write_all(content.as_bytes()).map_err(Error::Io)
}
fn read_from<P: AsRef<Path>>(path: P) -> Result<Self, Error>
where
Self: Sized,
{
let file = File::open(path).map_err(Error::Io)?;
let mut decoder = encoding_rs_io::DecodeReaderBytesBuilder::new()
.bom_override(true)
.build(file);
let mut decoded_bytes = Vec::new();
std::io::Read::read_to_end(&mut decoder, &mut decoded_bytes).map_err(Error::Io)?;
let decoded = String::from_utf8(decoded_bytes)
.map_err(|_| Error::InvalidResource("Invalid UTF-8 in .strings file".to_string()))?;
Self::from_str(&decoded)
}
}
impl From<Format> for Resource {
fn from(value: Format) -> Self {
Resource {
metadata: Metadata {
language: value.language,
domain: String::from(""),
custom: HashMap::new(),
},
entries: value.pairs.into_iter().map(Pair::into_entry).collect(),
}
}
}
impl TryFrom<Resource> for Format {
type Error = Error;
fn try_from(value: Resource) -> Result<Self, Self::Error> {
let Resource { metadata, entries } = value;
let language = metadata.language;
let pairs = entries
.into_iter()
.map(Pair::try_from)
.collect::<Result<Vec<_>, _>>()?;
Ok(Format { language, pairs })
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct Pair {
pub key: String,
pub value: String,
pub comment: Option<String>,
}
impl Pair {
fn into_entry(self) -> Entry {
let Pair {
key,
value,
comment,
} = self;
let is_pair_value_empty = value.is_empty();
Entry {
id: key,
value: Translation::Singular(value),
comment,
status: if is_pair_value_empty {
EntryStatus::New
} else {
EntryStatus::Translated
},
custom: HashMap::new(),
}
}
}
fn parse_strings_content(content: &str) -> (Vec<Pair>, Vec<String>) {
let bytes = content.as_bytes();
let mut i = 0usize;
let len = bytes.len();
let mut pairs: Vec<Pair> = Vec::new();
let warnings: Vec<String> = Vec::new();
let mut pending_comment: Option<String> = None;
let mut have_seen_pair = false;
while i < len {
let (ni, _saw_newline) = skip_whitespace(bytes, i);
i = ni;
if i >= len {
break;
}
if !have_seen_pair && let Some(next_i) = try_skip_langcodec_header(bytes, i) {
i = next_i;
pending_comment = None;
continue;
}
if starts_with(bytes, i, b"//") {
let (nj, comment) = parse_line_comment(bytes, i);
pending_comment = Some(comment);
i = nj;
continue;
}
if starts_with(bytes, i, b"/*") {
let (nj, comment) = parse_block_comment(bytes, i);
pending_comment = Some(comment);
i = nj;
continue;
}
if let Some((j, key)) = parse_quoted_utf8(content, bytes, i) {
i = j;
let (ni2, _) = skip_inline_ws(bytes, i);
i = ni2;
if i < len && bytes[i] == b'=' {
i += 1; let (ni3, _) = skip_inline_ws(bytes, i);
i = ni3;
if let Some((jv, value_raw)) = parse_quoted_utf8(content, bytes, i) {
i = jv;
let (ni4, _) = skip_inline_ws(bytes, i);
i = ni4;
if i < len && bytes[i] == b';' {
i += 1; } else {
while i < len && bytes[i] != b';' && bytes[i] != b'\n' {
i += 1;
}
if i < len && bytes[i] == b';' {
i += 1;
}
}
let value = normalize_value_newlines(&value_raw);
let pair = Pair {
key,
value,
comment: pending_comment.take(),
};
pairs.push(pair);
have_seen_pair = true;
continue;
}
}
}
while i < len && bytes[i] != b'\n' {
i += 1;
}
}
(pairs, warnings)
}
fn starts_with(hay: &[u8], i: usize, needle: &[u8]) -> bool {
hay.len() >= i + needle.len() && &hay[i..i + needle.len()] == needle
}
fn skip_whitespace(bytes: &[u8], mut i: usize) -> (usize, bool) {
let mut saw_newline = false;
while i < bytes.len() {
match bytes[i] {
b' ' | b'\t' | 0x0C | 0x0D => i += 1, b'\n' => {
saw_newline = true;
i += 1;
}
_ => break,
}
}
(i, saw_newline)
}
fn skip_inline_ws(bytes: &[u8], mut i: usize) -> (usize, bool) {
let mut saw_newline = false;
while i < bytes.len() {
match bytes[i] {
b' ' | b'\t' | 0x0C | 0x0D => i += 1,
b'\n' => {
saw_newline = true;
i += 1;
}
_ => break,
}
}
(i, saw_newline)
}
fn parse_line_comment(bytes: &[u8], i: usize) -> (usize, String) {
let mut j = i;
while j < bytes.len() && bytes[j] != b'\n' {
j += 1;
}
let comment = String::from_utf8_lossy(&bytes[i..j]).to_string();
(j, comment)
}
fn parse_block_comment(bytes: &[u8], i: usize) -> (usize, String) {
let mut j = i + 2; while j + 1 < bytes.len() {
if bytes[j] == b'*' && bytes[j + 1] == b'/' {
j += 2;
break;
}
j += 1;
}
let comment = String::from_utf8_lossy(&bytes[i..j.min(bytes.len())]).to_string();
(j, comment)
}
fn try_skip_langcodec_header(bytes: &[u8], mut i: usize) -> Option<usize> {
let start = i;
let mut saw_header_marker = false;
while i < bytes.len() {
let (ni, _nl) = skip_whitespace(bytes, i);
i = ni;
if i >= bytes.len() {
break;
}
if starts_with(bytes, i, b"//:") || starts_with(bytes, i, b"//") {
if starts_with(bytes, i, b"//:") {
saw_header_marker = true;
}
while i < bytes.len() && bytes[i] != b'\n' {
i += 1;
}
continue;
}
break;
}
if saw_header_marker && i > start {
Some(i)
} else {
None
}
}
fn extract_header_language(content: &str) -> Option<String> {
for line in content.lines().take(50) {
let trimmed = line.trim_start();
if let Some(rest) = trimmed
.strip_prefix("//:")
.or_else(|| trimmed.strip_prefix("// :"))
{
let rest = rest.trim_start();
if let Some(lang_part) = rest.strip_prefix("Language:") {
let lang = lang_part.trim();
if !lang.is_empty() {
return Some(lang.to_string());
}
}
}
}
None
}
fn parse_quoted_utf8(source: &str, bytes: &[u8], i: usize) -> Option<(usize, String)> {
if i >= bytes.len() || bytes[i] != b'"' {
return None;
}
let start = i + 1; let mut j = start;
let mut consecutive_backslashes = 0usize;
while j < bytes.len() {
let b = bytes[j];
if b == b'\\' {
consecutive_backslashes += 1;
j += 1;
continue;
}
if b == b'"' {
if consecutive_backslashes.is_multiple_of(2) {
let end = j;
let s = &source[start..end];
return Some((j + 1, s.to_string()));
}
}
consecutive_backslashes = 0;
j += 1;
}
None
}
fn normalize_value_newlines(raw: &str) -> String {
if !raw.contains('\n') {
return raw.to_string();
}
let mut out = String::new();
for (idx, line) in raw.split('\n').enumerate() {
if idx > 0 {
out.push_str(r"\n");
}
let segment = line.replace('\t', "\\t");
out.push_str(&segment);
}
out
}
fn escape_strings_token(s: &str) -> String {
let mut out = String::new();
let chars: Vec<char> = s.chars().collect();
let mut i = 0usize;
while i < chars.len() {
let ch = chars[i];
match ch {
'"' => {
out.push('\\');
out.push('"');
i += 1;
}
'\n' => {
out.push('\\');
out.push('n');
i += 1;
}
'\\' => {
let mut j = i;
while j < chars.len() && chars[j] == '\\' {
j += 1;
}
let next_char = if j < chars.len() {
Some(chars[j])
} else {
None
};
match next_char {
Some('\'') => {
for _ in i..j {
out.push('\\');
}
out.push('\'');
i = j + 1;
}
Some('n') | Some('t') | Some('r') | Some('"') | Some('\\') => {
for _ in i..j {
out.push('\\');
}
out.push(next_char.unwrap());
i = j + 1;
}
Some(other) => {
for _ in i..j {
out.push('\\');
out.push('\\');
}
out.push(other);
i = j + 1;
}
None => {
for _ in i..j {
out.push('\\');
out.push('\\');
}
i = j;
}
}
}
_ => {
out.push(ch);
i += 1;
}
}
}
out
}
impl TryFrom<Entry> for Pair {
type Error = Error;
fn try_from(entry: Entry) -> Result<Self, Self::Error> {
match entry.value {
Translation::Empty => Ok(Pair {
key: entry.id,
value: String::new(),
comment: entry.comment,
}),
Translation::Singular(value) => Ok(Pair {
key: entry.id,
value: crate::placeholder::to_ios_placeholders(&value),
comment: entry.comment,
}),
Translation::Plural(_) => Err(Error::DataMismatch(
"Plural translations are not supported in .strings format".to_string(),
)),
}
}
}
impl From<Pair> for Entry {
fn from(pair: Pair) -> Self {
let is_pair_value_empty = pair.value.is_empty();
Entry {
id: pair.key,
value: Translation::Singular(pair.value),
comment: pair.comment,
status: if is_pair_value_empty {
EntryStatus::New
} else {
EntryStatus::Translated
},
custom: HashMap::new(),
}
}
}
impl Pair {
pub fn formatted_comment(&self) -> String {
if let Some(comment) = &self.comment {
if comment.starts_with("/*") && comment.ends_with("*/") {
comment[2..comment.len() - 2].trim().to_string()
} else if let Some(comment) = comment.strip_prefix("//") {
comment.trim().to_string()
} else {
comment.trim().to_string()
}
} else {
String::new()
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::traits::Parser;
#[test]
fn test_parse_basic_strings_with_comment() {
let content = r#"
/* Greeting for the user */
"hello" = "Hello, world!";
"#;
let parsed = Format::from_str(content).unwrap();
assert_eq!(parsed.pairs.len(), 1);
let pair = &parsed.pairs[0];
assert_eq!(pair.key, "hello");
assert_eq!(pair.value, "Hello, world!");
assert!(
pair.comment
.as_ref()
.unwrap()
.contains("Greeting for the user")
);
}
#[test]
fn test_round_trip_serialization() {
let content = r#"
/* Farewell */
"bye" = "Goodbye!";
"#;
let parsed = Format::from_str(content).unwrap();
let mut output = Vec::new();
parsed.to_writer(&mut output).unwrap();
let output_str = String::from_utf8(output).unwrap();
let reparsed = Format::from_str(&output_str).unwrap();
assert_eq!(parsed.pairs.len(), reparsed.pairs.len());
for (orig, new) in parsed.pairs.iter().zip(reparsed.pairs.iter()) {
assert_eq!(orig.key, new.key);
assert_eq!(orig.value, new.value);
}
}
#[test]
fn test_strings_writer_escapes_quotes_backslashes_and_newlines() {
let format = Format {
language: String::new(),
pairs: vec![Pair {
key: "greet\"key\\with\nline".to_string(),
value: "He said: \"hi\"\\and newline\n".to_string(),
comment: None,
}],
};
let mut out = Vec::new();
format.to_writer(&mut out).unwrap();
let out_str = String::from_utf8(out).unwrap();
assert!(out_str.contains("\"greet\\\"key\\\\with\\nline\""));
assert!(out_str.contains("\"He said: \\\"hi\\\"\\\\and newline\\n\""));
}
#[test]
fn test_unescape_minimal_apostrophe_and_backslash() {
let content = r#"
"key1" = "Can\'t accept";
"key2" = "Can\\'t accept";
"#;
let parsed = Format::from_str(content).unwrap();
assert_eq!(parsed.pairs.len(), 2);
assert_eq!(parsed.pairs[0].value, r#"Can\'t accept"#);
assert_eq!(parsed.pairs[1].value, r#"Can\\'t accept"#);
let mut out = Vec::new();
parsed.to_writer(&mut out).unwrap();
let out_str = String::from_utf8(out).unwrap();
assert!(out_str.contains(r#""key1" = "Can\'t accept";"#));
assert!(out_str.contains(r#""key2" = "Can\\'t accept";"#));
}
#[test]
fn test_strings_writer_ios_placeholder_conversion() {
let resource = Resource {
metadata: Metadata {
language: "en".to_string(),
domain: String::new(),
custom: HashMap::new(),
},
entries: vec![Entry {
id: "g".to_string(),
value: Translation::Singular("Hi %1$s and %s".to_string()),
comment: None,
status: EntryStatus::Translated,
custom: HashMap::new(),
}],
};
let fmt = Format::try_from(resource).unwrap();
assert_eq!(fmt.pairs.len(), 1);
assert_eq!(fmt.pairs[0].value, "Hi %1$@ and %@");
}
#[test]
fn test_multiline_value_with_embedded_newlines_and_whitespace() {
let content = r#"
/* Multiline value */
"multiline" = "This is line 1.
\t\tThis is line 2.
This is line 3.";
"#;
let parsed = Format::from_str(content).unwrap();
assert_eq!(parsed.pairs.len(), 1);
let pair = &parsed.pairs[0];
assert_eq!(pair.key, "multiline");
assert_eq!(
pair.value,
"This is line 1.\\n \\t\\tThis is line 2.\\n This is line 3."
);
}
#[test]
fn test_multiline_value_with_tabs_and_embedded_newlines() {
let content =
"\"multiline\" = \"This is line 1.\n\t\tThis is line\n\t\t\t2.This is line\n3.\";";
let parsed = Format::from_str(content).unwrap();
assert_eq!(parsed.pairs.len(), 1);
let pair = &parsed.pairs[0];
assert_eq!(pair.key, "multiline");
assert_eq!(
pair.value,
r#"This is line 1.\n\t\tThis is line\n\t\t\t2.This is line\n3."#
);
assert!(pair.comment.is_none());
}
#[test]
fn test_blank_lines_and_ignored_malformed_lines() {
let content = r#"
// Comment
"good" = "yes";
bad line without equals
"another" = "ok";
"#;
let parsed = Format::from_str(content).unwrap();
assert_eq!(parsed.pairs.len(), 2);
assert_eq!(parsed.pairs[0].key, "good");
assert_eq!(parsed.pairs[0].value, "yes");
assert_eq!(parsed.pairs[1].key, "another");
assert_eq!(parsed.pairs[1].value, "ok");
}
#[test]
fn test_entry_with_empty_value() {
let content = r#"
/* Empty value */
"empty" = "";
"#;
let parsed = Format::from_str(content).unwrap();
assert_eq!(parsed.pairs.len(), 1);
let pair = &parsed.pairs[0];
assert_eq!(pair.key, "empty");
assert_eq!(pair.value, "");
let entry = pair.clone().into_entry();
assert_eq!(entry.status, EntryStatus::New);
}
#[test]
fn test_preserve_trailing_spaces() {
let content = r#"
"key1" = "Value with trailing space ";
"key2" = "Another value with trailing spaces ";
"key3" = "No trailing spaces";
"key4" = "过去一天 ";
"#;
let parsed = Format::from_str(content).unwrap();
assert_eq!(parsed.pairs.len(), 4);
let pair1 = &parsed.pairs[0];
let pair2 = &parsed.pairs[1];
let pair3 = &parsed.pairs[2];
let pair4 = &parsed.pairs[3];
assert_eq!(pair1.value, "Value with trailing space ");
assert_eq!(pair2.value, "Another value with trailing spaces ");
assert_eq!(pair3.value, "No trailing spaces");
assert_eq!(pair4.value, "过去一天 ");
}
#[test]
fn test_comments_attached_to_correct_key_value_pairs() {
let content = r#"
// Comment for A
"A" = "a";
// Comment for B
"B" = "b";
/* Block comment for C */
"C" = "c";
"#;
let parsed = Format::from_str(content).unwrap();
assert_eq!(parsed.pairs.len(), 3);
let a = &parsed.pairs[0];
let b = &parsed.pairs[1];
let c = &parsed.pairs[2];
assert!(a.comment.as_ref().unwrap().contains("Comment for A"));
assert!(b.comment.as_ref().unwrap().contains("Comment for B"));
assert!(c.comment.as_ref().unwrap().contains("Block comment for C"));
}
#[test]
fn test_parse_strings_with_empty_value() {
let content = r#"
// String
"PlayConsumed" = "%.2fMB traffic will be consumed if you play it";
"Score" = "%@ reviews";
"Wan" = "";//英文逻辑不一样,为空就好
"#;
let parsed = Format::from_str(content).unwrap();
assert_eq!(parsed.pairs.len(), 3);
}
}