Skip to main content

ferrocat_po/
merge.rs

1use std::borrow::Cow;
2
3use crate::scan::{
4    CommentKind, Keyword, LineKind, LineScanner, classify_line, find_byte, find_quoted_bounds,
5    has_byte, parse_plural_index, split_once_byte, trim_ascii,
6};
7use crate::serialize::{write_keyword, write_prefixed_line};
8use crate::text::{escape_string_into, unescape_string, validate_quoted_content};
9use crate::utf8::input_slice_as_str;
10use crate::{BorrowedMsgStr, ParseError, SerializeOptions};
11
12/// Borrowed extracted message input for the lightweight merge helper.
13#[derive(Debug, Clone, PartialEq, Eq, Default)]
14pub struct ExtractedMessage<'a> {
15    /// Optional gettext message context.
16    pub msgctxt: Option<Cow<'a, str>>,
17    /// Source message identifier.
18    pub msgid: Cow<'a, str>,
19    /// Optional plural source identifier.
20    pub msgid_plural: Option<Cow<'a, str>>,
21    /// Source references such as `src/app.rs:10`.
22    pub references: Vec<Cow<'a, str>>,
23    /// Extracted translator guidance comments.
24    pub extracted_comments: Vec<Cow<'a, str>>,
25    /// Flags such as `fuzzy`.
26    pub flags: Vec<Cow<'a, str>>,
27}
28
29#[derive(Debug, Clone, PartialEq, Eq, Default)]
30struct MergeBorrowedFile<'a> {
31    comments: Vec<&'a str>,
32    extracted_comments: Vec<&'a str>,
33    headers: Vec<MergeHeader<'a>>,
34    items: Vec<MergeBorrowedItem<'a>>,
35}
36
37#[derive(Debug, Clone, PartialEq, Eq, Default)]
38struct MergeHeader<'a> {
39    key: Cow<'a, str>,
40    value: Cow<'a, str>,
41}
42
43#[derive(Debug, Clone, PartialEq, Eq, Default)]
44struct MergeBorrowedItem<'a> {
45    msgid: Cow<'a, str>,
46    msgctxt: Option<Cow<'a, str>>,
47    references: Vec<&'a str>,
48    msgid_plural: Option<Cow<'a, str>>,
49    msgstr: BorrowedMsgStr<'a>,
50    comments: Vec<&'a str>,
51    extracted_comments: Vec<&'a str>,
52    flags: Vec<&'a str>,
53    metadata: Vec<(&'a str, &'a str)>,
54    obsolete: bool,
55    nplurals: usize,
56}
57
58impl MergeBorrowedItem<'_> {
59    fn new(nplurals: usize) -> Self {
60        Self {
61            nplurals,
62            ..Self::default()
63        }
64    }
65}
66
67#[derive(Debug, Clone, Copy, PartialEq, Eq)]
68enum Context {
69    Id,
70    IdPlural,
71    Str,
72    Ctxt,
73}
74
75#[derive(Debug)]
76struct ParserState<'a> {
77    item: MergeBorrowedItem<'a>,
78    header_entries: Vec<MergeHeader<'a>>,
79    msgstr: BorrowedMsgStr<'a>,
80    context: Option<Context>,
81    plural_index: usize,
82    obsolete_line_count: usize,
83    content_line_count: usize,
84    has_keyword: bool,
85}
86
87impl<'a> ParserState<'a> {
88    fn new(nplurals: usize) -> Self {
89        Self {
90            item: MergeBorrowedItem::new(nplurals),
91            header_entries: Vec::new(),
92            msgstr: BorrowedMsgStr::None,
93            context: None,
94            plural_index: 0,
95            obsolete_line_count: 0,
96            content_line_count: 0,
97            has_keyword: false,
98        }
99    }
100
101    fn reset(&mut self, nplurals: usize) {
102        *self = Self::new(nplurals);
103    }
104
105    #[inline]
106    fn reset_after_take(&mut self, nplurals: usize) {
107        self.item.nplurals = nplurals;
108        self.header_entries.clear();
109        self.msgstr = BorrowedMsgStr::None;
110        self.context = None;
111        self.plural_index = 0;
112        self.obsolete_line_count = 0;
113        self.content_line_count = 0;
114        self.has_keyword = false;
115    }
116
117    fn set_msgstr(&mut self, plural_index: usize, value: Cow<'a, str>) {
118        match (&mut self.msgstr, plural_index) {
119            (BorrowedMsgStr::None, 0) => self.msgstr = BorrowedMsgStr::Singular(value),
120            (BorrowedMsgStr::Singular(existing), 0) => *existing = value,
121            (BorrowedMsgStr::Plural(values), 0) => {
122                if values.is_empty() {
123                    values.push(Cow::Borrowed(""));
124                }
125                values[0] = value;
126            }
127            _ => {
128                let msgstr = self.promote_plural_msgstr(plural_index);
129                msgstr[plural_index] = value;
130            }
131        }
132    }
133
134    fn append_msgstr(&mut self, plural_index: usize, value: Cow<'a, str>) {
135        match (&mut self.msgstr, plural_index) {
136            (BorrowedMsgStr::None, 0) => self.msgstr = BorrowedMsgStr::Singular(value),
137            (BorrowedMsgStr::Singular(existing), 0) => existing.to_mut().push_str(value.as_ref()),
138            (BorrowedMsgStr::Plural(values), 0) => {
139                if values.is_empty() {
140                    values.push(Cow::Borrowed(""));
141                }
142                values[0].to_mut().push_str(value.as_ref());
143            }
144            _ => {
145                let msgstr = self.promote_plural_msgstr(plural_index);
146                msgstr[plural_index].to_mut().push_str(value.as_ref());
147            }
148        }
149    }
150
151    fn materialize_msgstr(&mut self) {
152        self.item.msgstr = std::mem::take(&mut self.msgstr);
153    }
154
155    fn promote_plural_msgstr(&mut self, plural_index: usize) -> &mut Vec<Cow<'a, str>> {
156        if !matches!(self.msgstr, BorrowedMsgStr::Plural(_)) {
157            self.msgstr = match std::mem::take(&mut self.msgstr) {
158                BorrowedMsgStr::None => BorrowedMsgStr::Plural(Vec::with_capacity(2)),
159                BorrowedMsgStr::Singular(value) => BorrowedMsgStr::Plural(vec![value]),
160                BorrowedMsgStr::Plural(values) => BorrowedMsgStr::Plural(values),
161            };
162        }
163        let BorrowedMsgStr::Plural(values) = &mut self.msgstr else {
164            unreachable!("plural msgstr promotion must yield plural storage");
165        };
166        if values.len() <= plural_index {
167            values.resize(plural_index + 1, Cow::Borrowed(""));
168        }
169        values
170    }
171}
172
173#[derive(Debug, Clone, Copy)]
174struct MergeLine<'a> {
175    trimmed: &'a [u8],
176    obsolete: bool,
177}
178
179/// Merges extracted messages into an existing PO catalog while preserving the
180/// existing translation payload.
181///
182/// # Errors
183///
184/// Returns [`ParseError`] when the existing PO file cannot be parsed.
185pub fn merge_catalog<'a>(
186    existing_po: &'a str,
187    extracted_messages: &[ExtractedMessage<'a>],
188) -> Result<String, ParseError> {
189    let normalized;
190    let input = if existing_po.as_bytes().contains(&b'\r') {
191        normalized = existing_po.replace("\r\n", "\n").replace('\r', "\n");
192        normalized.as_str()
193    } else {
194        existing_po
195    };
196
197    let existing = parse_merge_po(input)?;
198    let nplurals = parse_nplurals(&existing.headers).unwrap_or(2);
199    let options = SerializeOptions::default();
200    let mut out = String::with_capacity(estimate_merge_capacity(input, extracted_messages));
201    let mut scratch = String::new();
202
203    write_file_preamble(&mut out, &existing);
204
205    let mut existing_index =
206        std::collections::HashMap::<&str, Vec<(Option<&str>, usize)>>::with_capacity(
207            existing.items.len(),
208        );
209    for (index, item) in existing.items.iter().enumerate() {
210        existing_index
211            .entry(item.msgid.as_ref())
212            .or_default()
213            .push((item.msgctxt.as_deref(), index));
214    }
215
216    let mut matched = vec![false; existing.items.len()];
217    let mut wrote_item = false;
218
219    for extracted in extracted_messages {
220        if wrote_item {
221            out.push('\n');
222        }
223        let existing_index = find_existing_index(
224            &existing_index,
225            extracted.msgctxt.as_deref(),
226            extracted.msgid.as_ref(),
227        );
228
229        match existing_index {
230            Some(index) => {
231                matched[index] = true;
232                write_merged_existing_item(
233                    &mut out,
234                    &mut scratch,
235                    &existing.items[index],
236                    extracted,
237                    nplurals,
238                    &options,
239                );
240            }
241            None => write_new_item(&mut out, &mut scratch, extracted, nplurals, &options),
242        }
243        out.push('\n');
244        wrote_item = true;
245    }
246
247    for (index, item) in existing.items.iter().enumerate() {
248        if matched[index] {
249            continue;
250        }
251        if wrote_item {
252            out.push('\n');
253        }
254        write_existing_item(&mut out, &mut scratch, item, true, &options);
255        out.push('\n');
256        wrote_item = true;
257    }
258
259    Ok(out)
260}
261
262fn parse_merge_po(input: &str) -> Result<MergeBorrowedFile<'_>, ParseError> {
263    let mut file = MergeBorrowedFile::default();
264    file.items.reserve((input.len() / 96).max(1));
265    let mut current_nplurals = 2usize;
266    let mut state = ParserState::new(current_nplurals);
267
268    for line in LineScanner::new(input.as_bytes()) {
269        parse_line(
270            MergeLine {
271                trimmed: line.trimmed,
272                obsolete: line.obsolete,
273            },
274            &mut state,
275            &mut file,
276            &mut current_nplurals,
277        )?;
278    }
279
280    finish_item(&mut state, &mut file, &mut current_nplurals);
281    Ok(file)
282}
283
284fn parse_line<'a>(
285    line: MergeLine<'a>,
286    state: &mut ParserState<'a>,
287    file: &mut MergeBorrowedFile<'a>,
288    current_nplurals: &mut usize,
289) -> Result<(), ParseError> {
290    match classify_line(line.trimmed) {
291        LineKind::Continuation => {
292            append_continuation(line.trimmed, line.obsolete, state)?;
293            Ok(())
294        }
295        LineKind::Comment(kind) => {
296            parse_comment_line(line.trimmed, kind, state, file, current_nplurals);
297            Ok(())
298        }
299        LineKind::Keyword(keyword) => parse_keyword_line(
300            line.trimmed,
301            line.obsolete,
302            keyword,
303            state,
304            file,
305            current_nplurals,
306        ),
307        LineKind::Other => Ok(()),
308    }
309}
310
311fn parse_comment_line<'a>(
312    line_bytes: &'a [u8],
313    kind: CommentKind,
314    state: &mut ParserState<'a>,
315    file: &mut MergeBorrowedFile<'a>,
316    current_nplurals: &mut usize,
317) {
318    finish_item(state, file, current_nplurals);
319
320    match kind {
321        CommentKind::Reference => state.item.references.push(trimmed_str(&line_bytes[2..])),
322        CommentKind::Flags => {
323            for flag in trimmed_str(&line_bytes[2..]).split(',') {
324                state.item.flags.push(flag.trim());
325            }
326        }
327        CommentKind::Extracted => state
328            .item
329            .extracted_comments
330            .push(trimmed_str(&line_bytes[2..])),
331        CommentKind::Metadata => {
332            let trimmed = trim_ascii(&line_bytes[2..]);
333            if let Some((key_bytes, value_bytes)) = split_once_byte(trimmed, b':') {
334                let key = trimmed_str(key_bytes);
335                if !key.is_empty() {
336                    state.item.metadata.push((key, trimmed_str(value_bytes)));
337                }
338            }
339        }
340        CommentKind::Translator => state.item.comments.push(trimmed_str(&line_bytes[1..])),
341        CommentKind::Other => {}
342    }
343}
344
345fn parse_keyword_line<'a>(
346    line_bytes: &'a [u8],
347    obsolete: bool,
348    keyword: Keyword,
349    state: &mut ParserState<'a>,
350    file: &mut MergeBorrowedFile<'a>,
351    current_nplurals: &mut usize,
352) -> Result<(), ParseError> {
353    match keyword {
354        Keyword::IdPlural => {
355            state.obsolete_line_count += usize::from(obsolete);
356            state.item.msgid_plural = Some(extract_merge_quoted_cow(line_bytes)?);
357            state.context = Some(Context::IdPlural);
358            state.content_line_count += 1;
359            state.has_keyword = true;
360        }
361        Keyword::Id => {
362            finish_item(state, file, current_nplurals);
363            state.obsolete_line_count += usize::from(obsolete);
364            state.item.msgid = extract_merge_quoted_cow(line_bytes)?;
365            state.context = Some(Context::Id);
366            state.content_line_count += 1;
367            state.has_keyword = true;
368        }
369        Keyword::Str => {
370            let plural_index = parse_plural_index(line_bytes).unwrap_or(0);
371            state.plural_index = plural_index;
372            state.obsolete_line_count += usize::from(obsolete);
373            state.set_msgstr(plural_index, extract_merge_quoted_cow(line_bytes)?);
374            if is_header_candidate(state) {
375                state
376                    .header_entries
377                    .extend(parse_header_fragment(line_bytes)?);
378            }
379            state.context = Some(Context::Str);
380            state.content_line_count += 1;
381            state.has_keyword = true;
382        }
383        Keyword::Ctxt => {
384            finish_item(state, file, current_nplurals);
385            state.obsolete_line_count += usize::from(obsolete);
386            state.item.msgctxt = Some(extract_merge_quoted_cow(line_bytes)?);
387            state.context = Some(Context::Ctxt);
388            state.content_line_count += 1;
389            state.has_keyword = true;
390        }
391    }
392
393    Ok(())
394}
395
396fn append_continuation<'a>(
397    line_bytes: &'a [u8],
398    obsolete: bool,
399    state: &mut ParserState<'a>,
400) -> Result<(), ParseError> {
401    state.obsolete_line_count += usize::from(obsolete);
402    state.content_line_count += 1;
403    let value = extract_merge_quoted_cow(line_bytes)?;
404
405    match state.context {
406        Some(Context::Str) => {
407            state.append_msgstr(state.plural_index, value);
408            if is_header_candidate(state) {
409                state
410                    .header_entries
411                    .extend(parse_header_fragment(line_bytes)?);
412            }
413        }
414        Some(Context::Id) => state.item.msgid.to_mut().push_str(value.as_ref()),
415        Some(Context::IdPlural) => {
416            let target = state.item.msgid_plural.get_or_insert(Cow::Borrowed(""));
417            target.to_mut().push_str(value.as_ref());
418        }
419        Some(Context::Ctxt) => {
420            let target = state.item.msgctxt.get_or_insert(Cow::Borrowed(""));
421            target.to_mut().push_str(value.as_ref());
422        }
423        None => {}
424    }
425
426    Ok(())
427}
428
429fn finish_item<'a>(
430    state: &mut ParserState<'a>,
431    file: &mut MergeBorrowedFile<'a>,
432    current_nplurals: &mut usize,
433) {
434    if !state.has_keyword {
435        return;
436    }
437
438    if state.item.msgid.is_empty() && !is_header_state(state) {
439        return;
440    }
441
442    if state.obsolete_line_count >= state.content_line_count && state.content_line_count > 0 {
443        state.item.obsolete = true;
444    }
445
446    if is_header_state(state) && file.headers.is_empty() && file.items.is_empty() {
447        file.comments = std::mem::take(&mut state.item.comments);
448        file.extracted_comments = std::mem::take(&mut state.item.extracted_comments);
449        file.headers = std::mem::take(&mut state.header_entries);
450        *current_nplurals = parse_nplurals(&file.headers).unwrap_or(2);
451        state.reset(*current_nplurals);
452        return;
453    }
454
455    state.materialize_msgstr();
456
457    if matches!(state.item.msgstr, BorrowedMsgStr::None) {
458        state.item.msgstr = BorrowedMsgStr::Singular(Cow::Borrowed(""));
459    }
460    if state.item.msgid_plural.is_some() && msgstr_len(&state.item.msgstr) == 1 {
461        let mut values = match std::mem::take(&mut state.item.msgstr) {
462            BorrowedMsgStr::None => Vec::new(),
463            BorrowedMsgStr::Singular(value) => vec![value],
464            BorrowedMsgStr::Plural(values) => values,
465        };
466        values.resize(state.item.nplurals.max(1), Cow::Borrowed(""));
467        state.item.msgstr = BorrowedMsgStr::Plural(values);
468    }
469
470    state.item.nplurals = *current_nplurals;
471    file.items.push(std::mem::take(&mut state.item));
472    state.reset_after_take(*current_nplurals);
473}
474
475fn msgstr_len(msgstr: &BorrowedMsgStr<'_>) -> usize {
476    match msgstr {
477        BorrowedMsgStr::None => 0,
478        BorrowedMsgStr::Singular(_) => 1,
479        BorrowedMsgStr::Plural(values) => values.len(),
480    }
481}
482
483fn is_header_state(state: &ParserState<'_>) -> bool {
484    state.item.msgid.is_empty()
485        && state.item.msgctxt.is_none()
486        && state.item.msgid_plural.is_none()
487        && !matches!(state.msgstr, BorrowedMsgStr::None)
488}
489
490fn is_header_candidate(state: &ParserState<'_>) -> bool {
491    state.item.msgid.is_empty()
492        && state.item.msgctxt.is_none()
493        && state.item.msgid_plural.is_none()
494        && state.plural_index == 0
495}
496
497fn parse_header_fragment(line_bytes: &[u8]) -> Result<Vec<MergeHeader<'_>>, ParseError> {
498    let Some(raw) = merge_quoted_raw(line_bytes) else {
499        return Ok(Vec::new());
500    };
501
502    if header_fragment_is_borrowable(raw) {
503        return Ok(parse_header_fragment_borrowed(raw));
504    }
505
506    parse_header_fragment_owned(line_bytes)
507}
508
509fn parse_header_fragment_borrowed(raw: &[u8]) -> Vec<MergeHeader<'_>> {
510    let mut headers = Vec::new();
511    let mut start = 0usize;
512    let mut index = 0usize;
513
514    while index < raw.len() {
515        if raw[index] == b'\\' && raw.get(index + 1) == Some(&b'n') {
516            push_borrowed_header_segment(&raw[start..index], &mut headers);
517            index += 2;
518            start = index;
519            continue;
520        }
521        index += 1;
522    }
523
524    push_borrowed_header_segment(&raw[start..], &mut headers);
525    headers
526}
527
528fn push_borrowed_header_segment<'a>(segment: &'a [u8], out: &mut Vec<MergeHeader<'a>>) {
529    if segment.is_empty() {
530        return;
531    }
532    if let Some((key_bytes, value_bytes)) = split_once_byte(segment, b':') {
533        out.push(MergeHeader {
534            key: Cow::Borrowed(trimmed_str(key_bytes)),
535            value: Cow::Borrowed(trimmed_str(value_bytes)),
536        });
537    }
538}
539
540fn parse_header_fragment_owned(line_bytes: &[u8]) -> Result<Vec<MergeHeader<'_>>, ParseError> {
541    let decoded = extract_merge_quoted_cow(line_bytes)?;
542    let mut headers = Vec::new();
543    for segment in decoded.split('\n') {
544        if segment.is_empty() {
545            continue;
546        }
547        if let Some((key, value)) = segment.split_once(':') {
548            headers.push(MergeHeader {
549                key: Cow::Owned(key.trim().to_owned()),
550                value: Cow::Owned(value.trim().to_owned()),
551            });
552        }
553    }
554    Ok(headers)
555}
556
557fn header_fragment_is_borrowable(raw: &[u8]) -> bool {
558    let mut index = 0usize;
559    while index < raw.len() {
560        if raw[index] == b'\\' {
561            if raw.get(index + 1) != Some(&b'n') {
562                return false;
563            }
564            index += 2;
565            continue;
566        }
567        index += 1;
568    }
569    !has_byte(b'"', raw)
570}
571
572#[inline]
573fn extract_merge_quoted_cow(line_bytes: &[u8]) -> Result<Cow<'_, str>, ParseError> {
574    let Some(raw) = merge_quoted_raw(line_bytes) else {
575        return Ok(Cow::Borrowed(""));
576    };
577
578    validate_quoted_content(raw)?;
579    if !has_byte(b'\\', raw) {
580        return Ok(Cow::Borrowed(bytes_to_str(raw)));
581    }
582
583    Ok(Cow::Owned(unescape_string(bytes_to_str(raw))?))
584}
585
586#[inline]
587fn merge_quoted_raw(line_bytes: &[u8]) -> Option<&[u8]> {
588    let start = match line_bytes.first() {
589        Some(b'"') => 1,
590        _ => find_byte(b'"', line_bytes)? + 1,
591    };
592
593    if start > line_bytes.len() {
594        return None;
595    }
596
597    if line_bytes.len() > start && line_bytes.last() == Some(&b'"') {
598        return Some(&line_bytes[start..line_bytes.len() - 1]);
599    }
600
601    let (quoted_start, quoted_end) = find_quoted_bounds(line_bytes)?;
602    Some(&line_bytes[quoted_start..quoted_end])
603}
604
605fn find_existing_index(
606    existing_index: &std::collections::HashMap<&str, Vec<(Option<&str>, usize)>>,
607    msgctxt: Option<&str>,
608    msgid: &str,
609) -> Option<usize> {
610    let candidates = existing_index.get(msgid)?;
611    candidates
612        .iter()
613        .find_map(|(candidate_ctxt, index)| (*candidate_ctxt == msgctxt).then_some(*index))
614}
615
616fn estimate_merge_capacity(input: &str, extracted_messages: &[ExtractedMessage<'_>]) -> usize {
617    let extracted_bytes: usize = extracted_messages
618        .iter()
619        .map(|message| {
620            message.msgid.len()
621                + message.msgctxt.as_ref().map_or(0, |value| value.len())
622                + message.msgid_plural.as_ref().map_or(0, |value| value.len())
623                + message
624                    .references
625                    .iter()
626                    .map(|value| value.len())
627                    .sum::<usize>()
628                + message
629                    .extracted_comments
630                    .iter()
631                    .map(|value| value.len())
632                    .sum::<usize>()
633                + message.flags.iter().map(|value| value.len()).sum::<usize>()
634        })
635        .sum();
636
637    input.len() + extracted_bytes + 256
638}
639
640fn write_file_preamble(out: &mut String, file: &MergeBorrowedFile<'_>) {
641    write_prefixed_lines(out, "", "#", &file.comments);
642    write_prefixed_lines(out, "", "#.", &file.extracted_comments);
643
644    out.push_str("msgid \"\"\n");
645    out.push_str("msgstr \"\"\n");
646    for header in &file.headers {
647        out.push('"');
648        escape_string_into(out, header.key.as_ref());
649        out.push_str(": ");
650        escape_string_into(out, header.value.as_ref());
651        out.push_str("\\n\"\n");
652    }
653    out.push('\n');
654}
655
656fn write_merged_existing_item(
657    out: &mut String,
658    scratch: &mut String,
659    existing: &MergeBorrowedItem<'_>,
660    extracted: &ExtractedMessage<'_>,
661    nplurals: usize,
662    options: &SerializeOptions,
663) {
664    let obsolete_prefix = "";
665
666    write_prefixed_lines(out, obsolete_prefix, "#", &existing.comments);
667    write_prefixed_lines(out, obsolete_prefix, "#.", &extracted.extracted_comments);
668    write_metadata_lines(out, obsolete_prefix, &existing.metadata);
669    write_prefixed_lines(out, obsolete_prefix, "#:", &extracted.references);
670    write_merged_flags_line(out, obsolete_prefix, &existing.flags, &extracted.flags);
671
672    if let Some(context) = extracted.msgctxt.as_deref() {
673        write_keyword(
674            out,
675            scratch,
676            obsolete_prefix,
677            "msgctxt",
678            context,
679            None,
680            options,
681        );
682    }
683    write_keyword(
684        out,
685        scratch,
686        obsolete_prefix,
687        "msgid",
688        extracted.msgid.as_ref(),
689        None,
690        options,
691    );
692    if let Some(plural) = extracted.msgid_plural.as_deref() {
693        write_keyword(
694            out,
695            scratch,
696            obsolete_prefix,
697            "msgid_plural",
698            plural,
699            None,
700            options,
701        );
702    }
703
704    write_normalized_msgstr(
705        out,
706        scratch,
707        obsolete_prefix,
708        &existing.msgstr,
709        MsgstrShape {
710            preserve_existing: existing.msgid_plural.is_some() == extracted.msgid_plural.is_some(),
711            plural: extracted.msgid_plural.is_some(),
712        },
713        nplurals,
714        options,
715    );
716}
717
718fn write_new_item(
719    out: &mut String,
720    scratch: &mut String,
721    extracted: &ExtractedMessage<'_>,
722    nplurals: usize,
723    options: &SerializeOptions,
724) {
725    let obsolete_prefix = "";
726
727    write_prefixed_lines(out, obsolete_prefix, "#.", &extracted.extracted_comments);
728    write_prefixed_lines(out, obsolete_prefix, "#:", &extracted.references);
729    write_flags_line(out, obsolete_prefix, &extracted.flags);
730
731    if let Some(context) = extracted.msgctxt.as_deref() {
732        write_keyword(
733            out,
734            scratch,
735            obsolete_prefix,
736            "msgctxt",
737            context,
738            None,
739            options,
740        );
741    }
742    write_keyword(
743        out,
744        scratch,
745        obsolete_prefix,
746        "msgid",
747        extracted.msgid.as_ref(),
748        None,
749        options,
750    );
751    if let Some(plural) = extracted.msgid_plural.as_deref() {
752        write_keyword(
753            out,
754            scratch,
755            obsolete_prefix,
756            "msgid_plural",
757            plural,
758            None,
759            options,
760        );
761    }
762
763    write_default_msgstr(
764        out,
765        scratch,
766        obsolete_prefix,
767        extracted.msgid_plural.is_some(),
768        nplurals,
769        options,
770    );
771}
772
773fn write_existing_item(
774    out: &mut String,
775    scratch: &mut String,
776    item: &MergeBorrowedItem<'_>,
777    obsolete: bool,
778    options: &SerializeOptions,
779) {
780    let obsolete_prefix = if obsolete { "#~ " } else { "" };
781
782    write_prefixed_lines(out, obsolete_prefix, "#", &item.comments);
783    write_prefixed_lines(out, obsolete_prefix, "#.", &item.extracted_comments);
784    write_metadata_lines(out, obsolete_prefix, &item.metadata);
785    write_prefixed_lines(out, obsolete_prefix, "#:", &item.references);
786    write_flags_line(out, obsolete_prefix, &item.flags);
787
788    if let Some(context) = item.msgctxt.as_deref() {
789        write_keyword(
790            out,
791            scratch,
792            obsolete_prefix,
793            "msgctxt",
794            context,
795            None,
796            options,
797        );
798    }
799    write_keyword(
800        out,
801        scratch,
802        obsolete_prefix,
803        "msgid",
804        item.msgid.as_ref(),
805        None,
806        options,
807    );
808    if let Some(plural) = item.msgid_plural.as_deref() {
809        write_keyword(
810            out,
811            scratch,
812            obsolete_prefix,
813            "msgid_plural",
814            plural,
815            None,
816            options,
817        );
818    }
819
820    write_existing_msgstr(
821        out,
822        scratch,
823        obsolete_prefix,
824        &item.msgstr,
825        item.msgid_plural.is_some(),
826        item.nplurals,
827        options,
828    );
829}
830
831fn write_prefixed_lines<T: AsRef<str>>(
832    out: &mut String,
833    obsolete_prefix: &str,
834    prefix: &str,
835    values: &[T],
836) {
837    for value in values {
838        write_prefixed_line(out, obsolete_prefix, prefix, value.as_ref());
839    }
840}
841
842fn write_metadata_lines(out: &mut String, obsolete_prefix: &str, values: &[(&str, &str)]) {
843    for (key, value) in values {
844        out.push_str(obsolete_prefix);
845        out.push_str("#@ ");
846        out.push_str(key);
847        out.push_str(": ");
848        out.push_str(value);
849        out.push('\n');
850    }
851}
852
853fn write_flags_line<T: AsRef<str>>(out: &mut String, obsolete_prefix: &str, values: &[T]) {
854    if values.is_empty() {
855        return;
856    }
857
858    out.push_str(obsolete_prefix);
859    out.push_str("#, ");
860    for (index, value) in values.iter().enumerate() {
861        if index > 0 {
862            out.push(',');
863        }
864        out.push_str(value.as_ref());
865    }
866    out.push('\n');
867}
868
869fn write_merged_flags_line(
870    out: &mut String,
871    obsolete_prefix: &str,
872    existing: &[&str],
873    extracted: &[Cow<'_, str>],
874) {
875    if existing.is_empty() && extracted.is_empty() {
876        return;
877    }
878
879    out.push_str(obsolete_prefix);
880    out.push_str("#, ");
881
882    let mut wrote_any = false;
883    let mut seen = Vec::with_capacity(existing.len() + extracted.len());
884    for flag in existing
885        .iter()
886        .copied()
887        .chain(extracted.iter().map(AsRef::as_ref))
888    {
889        if seen.contains(&flag) {
890            continue;
891        }
892        if wrote_any {
893            out.push(',');
894        }
895        out.push_str(flag);
896        wrote_any = true;
897        seen.push(flag);
898    }
899    out.push('\n');
900}
901
902fn write_existing_msgstr(
903    out: &mut String,
904    scratch: &mut String,
905    obsolete_prefix: &str,
906    msgstr: &BorrowedMsgStr<'_>,
907    is_plural: bool,
908    nplurals: usize,
909    options: &SerializeOptions,
910) {
911    if is_plural {
912        for index in 0..nplurals.max(1) {
913            let value = match msgstr {
914                BorrowedMsgStr::Singular(value) if index == 0 => value.as_ref(),
915                BorrowedMsgStr::None | BorrowedMsgStr::Singular(_) => "",
916                BorrowedMsgStr::Plural(values) => {
917                    values.get(index).map_or("", |value| value.as_ref())
918                }
919            };
920            write_keyword(
921                out,
922                scratch,
923                obsolete_prefix,
924                "msgstr",
925                value,
926                Some(index),
927                options,
928            );
929        }
930        return;
931    }
932
933    let value = match msgstr {
934        BorrowedMsgStr::None => "",
935        BorrowedMsgStr::Singular(value) => value.as_ref(),
936        BorrowedMsgStr::Plural(values) => values.first().map_or("", |value| value.as_ref()),
937    };
938    write_keyword(
939        out,
940        scratch,
941        obsolete_prefix,
942        "msgstr",
943        value,
944        None,
945        options,
946    );
947}
948
949#[derive(Debug, Clone, Copy, PartialEq, Eq)]
950struct MsgstrShape {
951    preserve_existing: bool,
952    plural: bool,
953}
954
955fn write_normalized_msgstr(
956    out: &mut String,
957    scratch: &mut String,
958    obsolete_prefix: &str,
959    msgstr: &BorrowedMsgStr<'_>,
960    shape: MsgstrShape,
961    nplurals: usize,
962    options: &SerializeOptions,
963) {
964    if !shape.preserve_existing {
965        write_default_msgstr(
966            out,
967            scratch,
968            obsolete_prefix,
969            shape.plural,
970            nplurals,
971            options,
972        );
973        return;
974    }
975
976    write_existing_msgstr(
977        out,
978        scratch,
979        obsolete_prefix,
980        msgstr,
981        shape.plural,
982        nplurals,
983        options,
984    );
985}
986
987fn write_default_msgstr(
988    out: &mut String,
989    scratch: &mut String,
990    obsolete_prefix: &str,
991    is_plural: bool,
992    nplurals: usize,
993    options: &SerializeOptions,
994) {
995    if is_plural {
996        for index in 0..nplurals.max(1) {
997            write_keyword(
998                out,
999                scratch,
1000                obsolete_prefix,
1001                "msgstr",
1002                "",
1003                Some(index),
1004                options,
1005            );
1006        }
1007        return;
1008    }
1009
1010    write_keyword(out, scratch, obsolete_prefix, "msgstr", "", None, options);
1011}
1012
1013fn parse_nplurals(headers: &[MergeHeader<'_>]) -> Option<usize> {
1014    let plural_forms = headers
1015        .iter()
1016        .find(|header| header.key.as_ref() == "Plural-Forms")?
1017        .value
1018        .as_bytes();
1019    let mut rest = plural_forms;
1020
1021    while !rest.is_empty() {
1022        let (part, next) = match split_once_byte(rest, b';') {
1023            Some((part, tail)) => (part, tail),
1024            None => (rest, &b""[..]),
1025        };
1026        let trimmed = trim_ascii(part);
1027        if let Some((key, value)) = split_once_byte(trimmed, b'=')
1028            && trim_ascii(key) == b"nplurals"
1029            && let value = bytes_to_str(trim_ascii(value))
1030            && let Ok(parsed) = value.parse::<usize>()
1031        {
1032            return Some(parsed);
1033        }
1034        rest = next;
1035    }
1036
1037    None
1038}
1039
1040fn bytes_to_str(bytes: &[u8]) -> &str {
1041    input_slice_as_str(bytes)
1042}
1043
1044fn trimmed_str(bytes: &[u8]) -> &str {
1045    bytes_to_str(trim_ascii(bytes))
1046}
1047
1048#[cfg(test)]
1049mod tests {
1050    use std::borrow::Cow;
1051
1052    use super::{
1053        ExtractedMessage, MergeHeader, estimate_merge_capacity, extract_merge_quoted_cow,
1054        find_existing_index, header_fragment_is_borrowable, merge_catalog, parse_header_fragment,
1055        parse_nplurals,
1056    };
1057    use crate::{BorrowedMsgStr, parse_po};
1058
1059    #[test]
1060    fn preserves_existing_translations_and_updates_references() {
1061        let existing = concat!(
1062            "msgid \"hello\"\n",
1063            "msgstr \"world\"\n\n",
1064            "msgid \"old\"\n",
1065            "msgstr \"alt\"\n",
1066        );
1067        let extracted = vec![ExtractedMessage {
1068            msgid: Cow::Borrowed("hello"),
1069            references: vec![Cow::Borrowed("src/new.rs:10")],
1070            ..ExtractedMessage::default()
1071        }];
1072
1073        let merged = merge_catalog(existing, &extracted).expect("merge");
1074        let reparsed = parse_po(&merged).expect("reparse");
1075        let old_items: Vec<_> = reparsed
1076            .items
1077            .iter()
1078            .filter(|item| item.msgid == "old")
1079            .map(|item| (item.obsolete, item.msgstr[0].clone()))
1080            .collect();
1081        assert_eq!(old_items, vec![(true, "alt".to_owned())]);
1082
1083        let hello = reparsed
1084            .items
1085            .iter()
1086            .find(|item| item.msgid == "hello")
1087            .expect("merged hello item");
1088        assert_eq!(hello.msgstr[0], "world");
1089        assert_eq!(hello.references, vec!["src/new.rs:10".to_owned()]);
1090    }
1091
1092    #[test]
1093    fn creates_new_items_for_new_extracted_messages() {
1094        let merged = merge_catalog(
1095            "",
1096            &[ExtractedMessage {
1097                msgid: Cow::Borrowed("fresh"),
1098                extracted_comments: vec![Cow::Borrowed("from extractor")],
1099                ..ExtractedMessage::default()
1100            }],
1101        )
1102        .expect("merge");
1103        let reparsed = parse_po(&merged).expect("reparse");
1104
1105        assert_eq!(reparsed.items[0].msgid, "fresh");
1106        assert_eq!(reparsed.items[0].msgstr[0], "");
1107        assert_eq!(
1108            reparsed.items[0].extracted_comments,
1109            vec!["from extractor".to_owned()]
1110        );
1111    }
1112
1113    #[test]
1114    fn resets_msgstr_when_switching_between_singular_and_plural() {
1115        let existing = concat!("msgid \"count\"\n", "msgstr \"Anzahl\"\n",);
1116        let extracted = vec![ExtractedMessage {
1117            msgid: Cow::Borrowed("count"),
1118            msgid_plural: Some(Cow::Borrowed("counts")),
1119            ..ExtractedMessage::default()
1120        }];
1121
1122        let merged = merge_catalog(existing, &extracted).expect("merge");
1123        let reparsed = parse_po(&merged).expect("reparse");
1124
1125        assert!(reparsed.items[0].msgid_plural.is_some());
1126        assert_eq!(reparsed.items[0].msgstr.len(), 2);
1127        assert_eq!(reparsed.items[0].msgstr[0], "");
1128        assert_eq!(reparsed.items[0].msgstr[1], "");
1129    }
1130
1131    #[test]
1132    fn merge_helpers_cover_header_and_lookup_paths() {
1133        assert_eq!(
1134            extract_merge_quoted_cow(br#"msgid "plain""#),
1135            Ok(Cow::Borrowed("plain"))
1136        );
1137        assert_eq!(
1138            extract_merge_quoted_cow(br#"msgid "line\nbreak""#),
1139            Ok(Cow::Owned("line\nbreak".to_owned()))
1140        );
1141        assert!(header_fragment_is_borrowable(
1142            br#"Language: de\nPlural-Forms: nplurals=2;\n"#
1143        ));
1144        assert!(!header_fragment_is_borrowable(br#"Language: \"de\"\n"#));
1145        assert_eq!(
1146            parse_header_fragment(br#""Language: de\nPlural-Forms: nplurals=3; plural=(n>1);\n""#)
1147                .expect("header fragment"),
1148            vec![
1149                MergeHeader {
1150                    key: Cow::Borrowed("Language"),
1151                    value: Cow::Borrowed("de"),
1152                },
1153                MergeHeader {
1154                    key: Cow::Borrowed("Plural-Forms"),
1155                    value: Cow::Borrowed("nplurals=3; plural=(n>1);"),
1156                },
1157            ]
1158        );
1159        assert_eq!(
1160            parse_nplurals(&[
1161                MergeHeader {
1162                    key: Cow::Borrowed("Language"),
1163                    value: Cow::Borrowed("de"),
1164                },
1165                MergeHeader {
1166                    key: Cow::Borrowed("Plural-Forms"),
1167                    value: Cow::Borrowed(" nplurals = 4 ; plural = (n > 1); "),
1168                },
1169            ]),
1170            Some(4)
1171        );
1172        assert_eq!(
1173            find_existing_index(
1174                &std::collections::HashMap::from([(
1175                    "hello",
1176                    vec![(Some("menu"), 3usize), (None, 1usize)],
1177                )]),
1178                Some("menu"),
1179                "hello",
1180            ),
1181            Some(3)
1182        );
1183        assert!(
1184            estimate_merge_capacity(
1185                "msgid \"a\"\nmsgstr \"b\"\n",
1186                &[ExtractedMessage {
1187                    msgid: Cow::Borrowed("hello"),
1188                    references: vec![Cow::Borrowed("src/app.rs:1")],
1189                    ..ExtractedMessage::default()
1190                }],
1191            ) > 24
1192        );
1193    }
1194
1195    #[test]
1196    fn merge_preserves_existing_plural_values_and_dedupes_flags() {
1197        let existing = concat!(
1198            "msgid \"\"\n",
1199            "msgstr \"\"\n",
1200            "\"Plural-Forms: nplurals=3; plural=(n > 1);\\n\"\n",
1201            "\n",
1202            "#, fuzzy\n",
1203            "msgid \"count\"\n",
1204            "msgid_plural \"counts\"\n",
1205            "msgstr[0] \"eins\"\n",
1206            "msgstr[1] \"zwei\"\n",
1207            "msgstr[2] \"viele\"\n",
1208        );
1209        let extracted = vec![ExtractedMessage {
1210            msgid: Cow::Borrowed("count"),
1211            msgid_plural: Some(Cow::Borrowed("counts")),
1212            flags: vec![Cow::Borrowed("fuzzy"), Cow::Borrowed("rust-format")],
1213            ..ExtractedMessage::default()
1214        }];
1215
1216        let merged = merge_catalog(existing, &extracted).expect("merge plural");
1217        assert!(merged.contains("#, fuzzy,rust-format"));
1218        let reparsed = parse_po(&merged).expect("reparse merged plural");
1219        assert_eq!(reparsed.items[0].msgstr.len(), 3);
1220        assert_eq!(reparsed.items[0].msgstr[0], "eins");
1221        assert_eq!(reparsed.items[0].msgstr[2], "viele");
1222    }
1223
1224    #[test]
1225    fn merge_normalizes_crlf_input_and_keeps_existing_single_value() {
1226        let existing = "msgid \"hello\"\r\nmsgstr \"world\"\r\n";
1227        let merged = merge_catalog(
1228            existing,
1229            &[ExtractedMessage {
1230                msgid: Cow::Borrowed("hello"),
1231                ..ExtractedMessage::default()
1232            }],
1233        )
1234        .expect("merge normalized crlf");
1235
1236        let reparsed = parse_po(&merged).expect("reparse normalized");
1237        assert_eq!(reparsed.items[0].msgstr[0], "world");
1238        assert!(!matches!(
1239            BorrowedMsgStr::Singular(Cow::Borrowed("world")),
1240            BorrowedMsgStr::None
1241        ));
1242    }
1243}