Skip to main content

ferrocat_po/
serialize.rs

1use crate::scan::find_escapable_byte;
2use crate::text::{escape_string_into, escape_string_into_with_first_escape};
3use crate::{PoFile, PoItem, SerializeOptions};
4
5/// Serializes a [`PoFile`] back into gettext PO text.
6#[must_use]
7pub fn stringify_po(file: &PoFile, options: &SerializeOptions) -> String {
8    let mut out = String::with_capacity(estimate_capacity(file));
9    let mut scratch = String::new();
10
11    for comment in &file.comments {
12        push_prefixed_comment(&mut out, "#", comment);
13    }
14    for comment in &file.extracted_comments {
15        push_prefixed_comment(&mut out, "#.", comment);
16    }
17
18    out.push_str("msgid \"\"\n");
19    out.push_str("msgstr \"\"\n");
20    for header in &file.headers {
21        out.push('"');
22        append_escaped(&mut out, &header.key);
23        out.push_str(": ");
24        append_escaped(&mut out, &header.value);
25        out.push_str("\\n");
26        out.push_str("\"\n");
27    }
28    out.push('\n');
29
30    let mut iter = file.items.iter().peekable();
31    while let Some(item) = iter.next() {
32        write_item(&mut out, &mut scratch, item, options);
33        if iter.peek().is_some() {
34            out.push('\n');
35        }
36    }
37
38    out
39}
40
41fn estimate_capacity(file: &PoFile) -> usize {
42    let headers_len: usize = file
43        .headers
44        .iter()
45        .map(|header| header.key.len() + header.value.len() + 8)
46        .sum();
47    let items_len: usize = file
48        .items
49        .iter()
50        .map(|item| {
51            item.msgid.len()
52                + item.msgctxt.as_ref().map_or(0, String::len)
53                + item.msgid_plural.as_ref().map_or(0, String::len)
54                + item.msgstr.iter().map(String::len).sum::<usize>()
55                + item.comments.iter().map(String::len).sum::<usize>()
56                + item
57                    .extracted_comments
58                    .iter()
59                    .map(String::len)
60                    .sum::<usize>()
61                + item.references.iter().map(String::len).sum::<usize>()
62                + item.flags.iter().map(String::len).sum::<usize>()
63        })
64        .sum();
65
66    headers_len + items_len + 256
67}
68
69fn push_prefixed_comment(out: &mut String, prefix: &str, comment: &str) {
70    out.push_str(prefix);
71    if !comment.is_empty() {
72        out.push(' ');
73        out.push_str(comment);
74    }
75    out.push('\n');
76}
77
78fn write_item(out: &mut String, scratch: &mut String, item: &PoItem, options: &SerializeOptions) {
79    let obsolete_prefix = if item.obsolete { "#~ " } else { "" };
80
81    for comment in &item.comments {
82        write_prefixed_line(out, obsolete_prefix, "#", comment);
83    }
84    for comment in &item.extracted_comments {
85        write_prefixed_line(out, obsolete_prefix, "#.", comment);
86    }
87    for (key, value) in &item.metadata {
88        out.push_str(obsolete_prefix);
89        out.push_str("#@ ");
90        out.push_str(key);
91        out.push_str(": ");
92        out.push_str(value);
93        out.push('\n');
94    }
95    for reference in &item.references {
96        out.push_str(obsolete_prefix);
97        out.push_str("#: ");
98        out.push_str(reference);
99        out.push('\n');
100    }
101    if !item.flags.is_empty() {
102        out.push_str(obsolete_prefix);
103        out.push_str("#, ");
104        for (index, flag) in item.flags.iter().enumerate() {
105            if index > 0 {
106                out.push(',');
107            }
108            out.push_str(flag);
109        }
110        out.push('\n');
111    }
112
113    if let Some(context) = &item.msgctxt {
114        write_keyword(
115            out,
116            scratch,
117            obsolete_prefix,
118            "msgctxt",
119            context,
120            None,
121            options,
122        );
123    }
124    write_keyword(
125        out,
126        scratch,
127        obsolete_prefix,
128        "msgid",
129        &item.msgid,
130        None,
131        options,
132    );
133    if let Some(plural) = &item.msgid_plural {
134        write_keyword(
135            out,
136            scratch,
137            obsolete_prefix,
138            "msgid_plural",
139            plural,
140            None,
141            options,
142        );
143    }
144
145    if item.msgid_plural.is_some() && item.msgstr.is_empty() {
146        let count = item.nplurals.max(1);
147        for index in 0..count {
148            write_keyword(
149                out,
150                scratch,
151                obsolete_prefix,
152                "msgstr",
153                "",
154                Some(index),
155                options,
156            );
157        }
158        return;
159    }
160
161    if item.msgstr.is_empty() {
162        write_keyword(out, scratch, obsolete_prefix, "msgstr", "", None, options);
163        return;
164    }
165
166    let indexed = item.msgid_plural.is_some() || item.msgstr.len() > 1;
167    for (index, value) in item.msgstr.iter().enumerate() {
168        write_keyword(
169            out,
170            scratch,
171            obsolete_prefix,
172            "msgstr",
173            value,
174            if indexed { Some(index) } else { None },
175            options,
176        );
177    }
178}
179
180pub fn write_prefixed_line(out: &mut String, obsolete_prefix: &str, prefix: &str, value: &str) {
181    out.push_str(obsolete_prefix);
182    out.push_str(prefix);
183    if !value.is_empty() {
184        out.push(' ');
185        out.push_str(value);
186    }
187    out.push('\n');
188}
189
190pub fn write_keyword(
191    out: &mut String,
192    scratch: &mut String,
193    obsolete_prefix: &str,
194    keyword: &str,
195    value: &str,
196    index: Option<usize>,
197    options: &SerializeOptions,
198) {
199    if try_write_simple_keyword(out, obsolete_prefix, keyword, value, index, options) {
200        return;
201    }
202
203    write_complex_keyword(
204        out,
205        scratch,
206        obsolete_prefix,
207        keyword,
208        value,
209        index,
210        options,
211    );
212}
213
214fn try_write_simple_keyword(
215    out: &mut String,
216    obsolete_prefix: &str,
217    keyword: &str,
218    value: &str,
219    index: Option<usize>,
220    options: &SerializeOptions,
221) -> bool {
222    let first_escape = find_escapable_byte(value.as_bytes());
223    if matches!(first_escape, Some(index) if value.as_bytes()[index] == b'\n') {
224        return false;
225    }
226
227    let prefix_len = keyword_prefix_len(keyword, index);
228    if options.fold_length > 0
229        && value.len()
230            > options
231                .fold_length
232                .saturating_sub(obsolete_prefix.len() + prefix_len + 2)
233    {
234        return false;
235    }
236
237    let start_len = out.len();
238    out.reserve(obsolete_prefix.len() + prefix_len + value.len() + 3);
239    out.push_str(obsolete_prefix);
240    push_keyword_prefix(out, keyword, index);
241    out.push('"');
242    escape_string_into_with_first_escape(out, value, first_escape);
243    out.push_str("\"\n");
244
245    if options.fold_length > 0 && out.len() - start_len - 1 > options.fold_length {
246        out.truncate(start_len);
247        return false;
248    }
249
250    true
251}
252
253fn keyword_prefix_len(keyword: &str, index: Option<usize>) -> usize {
254    index.map_or_else(
255        || keyword.len() + 1,
256        |value| keyword.len() + digits(value) + 3,
257    )
258}
259
260fn push_keyword_prefix(out: &mut String, keyword: &str, index: Option<usize>) {
261    out.push_str(keyword);
262    if let Some(value) = index {
263        out.push('[');
264        push_usize(out, value);
265        out.push(']');
266    }
267    out.push(' ');
268}
269
270fn push_usize(out: &mut String, mut value: usize) {
271    if value == 0 {
272        out.push('0');
273        return;
274    }
275
276    let mut buf = [0u8; 20];
277    let mut len = 0usize;
278    while value > 0 {
279        let digit = u8::try_from(value % 10).expect("single decimal digit fits in u8");
280        buf[len] = b'0' + digit;
281        len += 1;
282        value /= 10;
283    }
284    for index in (0..len).rev() {
285        out.push(char::from(buf[index]));
286    }
287}
288
289const fn digits(mut value: usize) -> usize {
290    let mut count = 1usize;
291    while value >= 10 {
292        value /= 10;
293        count += 1;
294    }
295    count
296}
297
298fn append_escaped(out: &mut String, input: &str) {
299    escape_string_into(out, input);
300}
301
302fn write_complex_keyword(
303    out: &mut String,
304    scratch: &mut String,
305    obsolete_prefix: &str,
306    keyword: &str,
307    text: &str,
308    index: Option<usize>,
309    options: &SerializeOptions,
310) {
311    let prefix_len = keyword_prefix_len(keyword, index);
312    let has_multiple_lines = text.contains('\n');
313    let first_line_max = if options.fold_length == 0 {
314        usize::MAX
315    } else {
316        options.fold_length.saturating_sub(prefix_len + 2).max(1)
317    };
318    let other_line_max = if options.fold_length == 0 {
319        usize::MAX
320    } else {
321        options.fold_length.saturating_sub(2).max(1)
322    };
323    let parts = parts_with_has_next(text).collect::<Vec<_>>();
324    let requires_folding = options.fold_length > 0
325        && parts.iter().any(|(part, has_next)| {
326            let escaped_len = escaped_part_len(part, *has_next);
327            let limit = if has_multiple_lines {
328                other_line_max
329            } else {
330                first_line_max
331            };
332            escaped_len > limit
333        });
334    let use_compact = options.compact_multiline
335        && text.split('\n').next().unwrap_or_default() != ""
336        && !requires_folding;
337    let mut wrote_first_value_line = if use_compact {
338        false
339    } else {
340        out.push_str(obsolete_prefix);
341        push_keyword_prefix(out, keyword, index);
342        out.push_str("\"\"\n");
343        true
344    };
345
346    for (part, has_next) in parts {
347        scratch.clear();
348        escape_string_into(scratch, part);
349        if has_next {
350            scratch.push_str("\\n");
351        }
352
353        let limit = if wrote_first_value_line || has_multiple_lines {
354            other_line_max
355        } else {
356            first_line_max
357        };
358
359        write_folded_segments(
360            out,
361            obsolete_prefix,
362            keyword,
363            index,
364            scratch,
365            limit,
366            &mut wrote_first_value_line,
367        );
368    }
369}
370
371fn parts_with_has_next(input: &str) -> impl Iterator<Item = (&str, bool)> {
372    input.split_inclusive('\n').map(|part| {
373        part.strip_suffix('\n')
374            .map_or((part, false), |stripped| (stripped, true))
375    })
376}
377
378fn write_folded_segments(
379    out: &mut String,
380    obsolete_prefix: &str,
381    keyword: &str,
382    index: Option<usize>,
383    input: &str,
384    max_len: usize,
385    wrote_first_value_line: &mut bool,
386) {
387    let mut start = 0;
388    loop {
389        let end = folded_split_point(input, start, max_len);
390        write_quoted_segment(
391            out,
392            obsolete_prefix,
393            keyword,
394            index,
395            &input[start..end],
396            wrote_first_value_line,
397        );
398        if end == input.len() {
399            break;
400        }
401        start = end;
402    }
403}
404
405fn write_quoted_segment(
406    out: &mut String,
407    obsolete_prefix: &str,
408    keyword: &str,
409    index: Option<usize>,
410    segment: &str,
411    wrote_first_value_line: &mut bool,
412) {
413    out.push_str(obsolete_prefix);
414    if !*wrote_first_value_line {
415        push_keyword_prefix(out, keyword, index);
416        *wrote_first_value_line = true;
417    }
418    out.push('"');
419    out.push_str(segment);
420    out.push_str("\"\n");
421}
422
423fn escaped_part_len(part: &str, has_next: bool) -> usize {
424    let escaped_len = match find_escapable_byte(part.as_bytes()) {
425        Some(_) => {
426            let mut escaped = String::new();
427            escape_string_into(&mut escaped, part);
428            escaped.len()
429        }
430        None => part.len(),
431    };
432
433    escaped_len + if has_next { 2 } else { 0 }
434}
435
436fn folded_split_point(input: &str, start: usize, max_len: usize) -> usize {
437    let remaining = input.len() - start;
438    if remaining <= max_len {
439        return input.len();
440    }
441
442    let mut end = start;
443    while end < input.len() {
444        let chunk_end = next_fold_chunk_end(input, end);
445        let next_len = chunk_end - start;
446        if next_len > max_len {
447            break;
448        }
449        end = chunk_end;
450    }
451
452    if end > start {
453        return end;
454    }
455
456    let end = clamp_char_boundary(input, start, start + max_len);
457    if input.as_bytes()[end - 1] == b'\\' {
458        end - 1
459    } else {
460        end
461    }
462}
463
464fn next_fold_chunk_end(input: &str, start: usize) -> usize {
465    let bytes = input.as_bytes();
466    let is_space = bytes[start] == b' ';
467    let mut end = start + 1;
468    while end < bytes.len() && (bytes[end] == b' ') == is_space {
469        end += 1;
470    }
471    end
472}
473
474fn clamp_char_boundary(input: &str, start: usize, requested_end: usize) -> usize {
475    let mut end = requested_end.min(input.len());
476    while end > start && !input.is_char_boundary(end) {
477        end -= 1;
478    }
479    if end > start {
480        return end;
481    }
482
483    let mut end = requested_end.min(input.len());
484    while end < input.len() && !input.is_char_boundary(end) {
485        end += 1;
486    }
487    end
488}
489
490#[cfg(test)]
491mod tests {
492    use crate::{Header, MsgStr, PoFile, PoItem, SerializeOptions, parse_po};
493
494    use super::stringify_po;
495
496    #[test]
497    fn serializes_comments_headers_and_items() {
498        let file = PoFile {
499            comments: vec!["Translator comment".to_owned()],
500            extracted_comments: vec!["Extracted".to_owned()],
501            headers: vec![
502                Header {
503                    key: "Language".to_owned(),
504                    value: "de".to_owned(),
505                },
506                Header {
507                    key: "Plural-Forms".to_owned(),
508                    value: "nplurals=2; plural=(n != 1);".to_owned(),
509                },
510            ],
511            items: vec![PoItem {
512                msgid: "Line1\nLine2".to_owned(),
513                msgstr: MsgStr::from(vec!["Zeile1\nZeile2".to_owned()]),
514                ..PoItem::new(2)
515            }],
516        };
517
518        let output = stringify_po(&file, &SerializeOptions::default());
519        assert!(output.contains("# Translator comment\n"));
520        assert!(output.contains("#. Extracted\n"));
521        assert!(output.contains("\"Language: de\\n\"\n"));
522        assert!(output.contains("msgid \"Line1\\n\"\n\"Line2\"\n"));
523        assert!(output.contains("msgstr \"Zeile1\\n\"\n\"Zeile2\"\n"));
524    }
525
526    #[test]
527    fn serializes_empty_plural_translations() {
528        let file = PoFile {
529            headers: vec![],
530            comments: vec![],
531            extracted_comments: vec![],
532            items: vec![PoItem {
533                msgid: "item".to_owned(),
534                msgid_plural: Some("items".to_owned()),
535                nplurals: 3,
536                ..PoItem::new(3)
537            }],
538        };
539
540        let output = stringify_po(&file, &SerializeOptions::default());
541        assert!(output.contains("msgstr[0] \"\"\n"));
542        assert!(output.contains("msgstr[1] \"\"\n"));
543        assert!(output.contains("msgstr[2] \"\"\n"));
544    }
545
546    #[test]
547    fn serializes_non_compact_multiline_values() {
548        let file = PoFile {
549            headers: vec![],
550            comments: vec![],
551            extracted_comments: vec![],
552            items: vec![PoItem {
553                msgid: "\nIndented".to_owned(),
554                msgstr: MsgStr::from(vec!["\nUebersetzt".to_owned()]),
555                ..PoItem::new(2)
556            }],
557        };
558
559        let output = stringify_po(
560            &file,
561            &SerializeOptions {
562                compact_multiline: false,
563                ..SerializeOptions::default()
564            },
565        );
566
567        assert!(output.contains("msgid \"\"\n\"\\n\"\n\"Indented\"\n"));
568        assert!(output.contains("msgstr \"\"\n\"\\n\"\n\"Uebersetzt\"\n"));
569    }
570
571    #[test]
572    fn does_not_fold_when_fold_length_is_zero() {
573        let file = PoFile {
574            headers: vec![],
575            comments: vec![],
576            extracted_comments: vec![],
577            items: vec![PoItem {
578                msgid: "Alpha beta gamma delta".to_owned(),
579                msgstr: MsgStr::from(vec!["Uno dos tres cuatro".to_owned()]),
580                ..PoItem::new(2)
581            }],
582        };
583
584        let output = stringify_po(
585            &file,
586            &SerializeOptions {
587                fold_length: 0,
588                compact_multiline: true,
589            },
590        );
591
592        assert!(output.contains("msgid \"Alpha beta gamma delta\"\n"));
593        assert!(output.contains("msgstr \"Uno dos tres cuatro\"\n"));
594    }
595
596    #[test]
597    fn folds_utf8_without_splitting_codepoints() {
598        let file = PoFile {
599            headers: vec![],
600            comments: vec![],
601            extracted_comments: vec![],
602            items: vec![PoItem {
603                msgid: "Grüße aus Köln".to_owned(),
604                msgstr: MsgStr::from(vec!["Übermäßig höflich".to_owned()]),
605                ..PoItem::new(2)
606            }],
607        };
608
609        let output = stringify_po(
610            &file,
611            &SerializeOptions {
612                fold_length: 12,
613                compact_multiline: true,
614            },
615        );
616
617        let reparsed = parse_po(&output).expect("reparse folded utf8 output");
618        assert_eq!(reparsed.items[0].msgid, "Grüße aus Köln");
619        assert_eq!(reparsed.items[0].msgstr[0], "Übermäßig höflich");
620    }
621
622    #[test]
623    fn drops_previous_msgid_history_on_roundtrip() {
624        let input = r#"#| msgctxt "Old menu context"
625#| msgid "Old file label"
626msgctxt "menu"
627msgid "File"
628msgstr "Datei"
629"#;
630
631        let parsed = parse_po(input).expect("parse previous-msgid input");
632        assert_eq!(parsed.items.len(), 1);
633        assert_eq!(parsed.items[0].msgctxt.as_deref(), Some("menu"));
634        assert_eq!(parsed.items[0].msgid, "File");
635
636        let output = stringify_po(&parsed, &SerializeOptions::default());
637        assert!(!output.contains("#| "));
638        assert!(output.contains("msgctxt \"menu\"\n"));
639        assert!(output.contains("msgid \"File\"\n"));
640    }
641
642    #[test]
643    fn normalizes_headerless_files_with_explicit_empty_header() {
644        let file = PoFile {
645            headers: vec![],
646            comments: vec![],
647            extracted_comments: vec![],
648            items: vec![PoItem {
649                msgid: "Save".to_owned(),
650                msgstr: MsgStr::from("Speichern".to_owned()),
651                flags: vec!["fuzzy".to_owned()],
652                ..PoItem::new(2)
653            }],
654        };
655
656        let output = stringify_po(&file, &SerializeOptions::default());
657        assert!(output.starts_with("msgid \"\"\nmsgstr \"\"\n\n"));
658        assert!(output.contains("#, fuzzy\nmsgid \"Save\"\nmsgstr \"Speichern\"\n"));
659    }
660
661    #[test]
662    fn folds_single_line_values_like_gettext_style_multiline_entries() {
663        let file = PoFile {
664            headers: vec![],
665            comments: vec!["test wrapping".to_owned()],
666            extracted_comments: vec![],
667            items: vec![
668                PoItem {
669                    msgid: "Some line that contain special characters \" and that \t is very, very, very long...: %s \n".to_owned(),
670                    msgstr: MsgStr::from(vec!["".to_owned()]),
671                    ..PoItem::new(2)
672                },
673                PoItem {
674                    msgid: "Some line that contain special characters \"foobar\" and that contains whitespace at the end          ".to_owned(),
675                    msgstr: MsgStr::from(vec!["".to_owned()]),
676                    ..PoItem::new(2)
677                },
678            ],
679        };
680
681        let output = stringify_po(
682            &file,
683            &SerializeOptions {
684                fold_length: 50,
685                compact_multiline: true,
686            },
687        );
688
689        assert!(output.contains("msgid \"\"\n\"Some line that contain special characters \\\" and\"\n\" that \\t is very, very, very long...: %s \\n\"\n"));
690        assert!(output.contains("msgid \"\"\n\"Some line that contain special characters \"\n\"\\\"foobar\\\" and that contains whitespace at the \"\n\"end          \"\n"));
691    }
692}