1use std::borrow::Cow;
2
3use crate::scan::{
4 CommentKind, Keyword, LineKind, LineScanner, classify_line, find_byte, find_quoted_bounds,
5 has_byte, parse_plural_index, split_once_byte, trim_ascii,
6};
7use crate::serialize::{write_keyword, write_prefixed_line};
8use crate::text::{escape_string_into, unescape_string, validate_quoted_content};
9use crate::utf8::input_slice_as_str;
10use crate::{BorrowedMsgStr, ParseError, SerializeOptions};
11
12#[derive(Debug, Clone, PartialEq, Eq, Default)]
14pub struct ExtractedMessage<'a> {
15 pub msgctxt: Option<Cow<'a, str>>,
17 pub msgid: Cow<'a, str>,
19 pub msgid_plural: Option<Cow<'a, str>>,
21 pub references: Vec<Cow<'a, str>>,
23 pub extracted_comments: Vec<Cow<'a, str>>,
25 pub flags: Vec<Cow<'a, str>>,
27}
28
29#[derive(Debug, Clone, PartialEq, Eq, Default)]
30struct MergeBorrowedFile<'a> {
31 comments: Vec<&'a str>,
32 extracted_comments: Vec<&'a str>,
33 headers: Vec<MergeHeader<'a>>,
34 items: Vec<MergeBorrowedItem<'a>>,
35}
36
37#[derive(Debug, Clone, PartialEq, Eq, Default)]
38struct MergeHeader<'a> {
39 key: Cow<'a, str>,
40 value: Cow<'a, str>,
41}
42
43#[derive(Debug, Clone, PartialEq, Eq, Default)]
44struct MergeBorrowedItem<'a> {
45 msgid: Cow<'a, str>,
46 msgctxt: Option<Cow<'a, str>>,
47 references: Vec<&'a str>,
48 msgid_plural: Option<Cow<'a, str>>,
49 msgstr: BorrowedMsgStr<'a>,
50 comments: Vec<&'a str>,
51 extracted_comments: Vec<&'a str>,
52 flags: Vec<&'a str>,
53 metadata: Vec<(&'a str, &'a str)>,
54 obsolete: bool,
55 nplurals: usize,
56}
57
58impl MergeBorrowedItem<'_> {
59 fn new(nplurals: usize) -> Self {
60 Self {
61 nplurals,
62 ..Self::default()
63 }
64 }
65}
66
67#[derive(Debug, Clone, Copy, PartialEq, Eq)]
68enum Context {
69 Id,
70 IdPlural,
71 Str,
72 Ctxt,
73}
74
75#[derive(Debug)]
76struct ParserState<'a> {
77 item: MergeBorrowedItem<'a>,
78 header_entries: Vec<MergeHeader<'a>>,
79 msgstr: BorrowedMsgStr<'a>,
80 context: Option<Context>,
81 plural_index: usize,
82 obsolete_line_count: usize,
83 content_line_count: usize,
84 has_keyword: bool,
85}
86
87impl<'a> ParserState<'a> {
88 fn new(nplurals: usize) -> Self {
89 Self {
90 item: MergeBorrowedItem::new(nplurals),
91 header_entries: Vec::new(),
92 msgstr: BorrowedMsgStr::None,
93 context: None,
94 plural_index: 0,
95 obsolete_line_count: 0,
96 content_line_count: 0,
97 has_keyword: false,
98 }
99 }
100
101 fn reset(&mut self, nplurals: usize) {
102 *self = Self::new(nplurals);
103 }
104
105 #[inline]
106 fn reset_after_take(&mut self, nplurals: usize) {
107 self.item.nplurals = nplurals;
108 self.header_entries.clear();
109 self.msgstr = BorrowedMsgStr::None;
110 self.context = None;
111 self.plural_index = 0;
112 self.obsolete_line_count = 0;
113 self.content_line_count = 0;
114 self.has_keyword = false;
115 }
116
117 fn set_msgstr(&mut self, plural_index: usize, value: Cow<'a, str>) {
118 match (&mut self.msgstr, plural_index) {
119 (BorrowedMsgStr::None, 0) => self.msgstr = BorrowedMsgStr::Singular(value),
120 (BorrowedMsgStr::Singular(existing), 0) => *existing = value,
121 (BorrowedMsgStr::Plural(values), 0) => {
122 if values.is_empty() {
123 values.push(Cow::Borrowed(""));
124 }
125 values[0] = value;
126 }
127 _ => {
128 let msgstr = self.promote_plural_msgstr(plural_index);
129 msgstr[plural_index] = value;
130 }
131 }
132 }
133
134 fn append_msgstr(&mut self, plural_index: usize, value: Cow<'a, str>) {
135 match (&mut self.msgstr, plural_index) {
136 (BorrowedMsgStr::None, 0) => self.msgstr = BorrowedMsgStr::Singular(value),
137 (BorrowedMsgStr::Singular(existing), 0) => existing.to_mut().push_str(value.as_ref()),
138 (BorrowedMsgStr::Plural(values), 0) => {
139 if values.is_empty() {
140 values.push(Cow::Borrowed(""));
141 }
142 values[0].to_mut().push_str(value.as_ref());
143 }
144 _ => {
145 let msgstr = self.promote_plural_msgstr(plural_index);
146 msgstr[plural_index].to_mut().push_str(value.as_ref());
147 }
148 }
149 }
150
151 fn materialize_msgstr(&mut self) {
152 self.item.msgstr = std::mem::take(&mut self.msgstr);
153 }
154
155 fn promote_plural_msgstr(&mut self, plural_index: usize) -> &mut Vec<Cow<'a, str>> {
156 if !matches!(self.msgstr, BorrowedMsgStr::Plural(_)) {
157 self.msgstr = match std::mem::take(&mut self.msgstr) {
158 BorrowedMsgStr::None => BorrowedMsgStr::Plural(Vec::with_capacity(2)),
159 BorrowedMsgStr::Singular(value) => BorrowedMsgStr::Plural(vec![value]),
160 BorrowedMsgStr::Plural(values) => BorrowedMsgStr::Plural(values),
161 };
162 }
163 let BorrowedMsgStr::Plural(values) = &mut self.msgstr else {
164 unreachable!("plural msgstr promotion must yield plural storage");
165 };
166 if values.len() <= plural_index {
167 values.resize(plural_index + 1, Cow::Borrowed(""));
168 }
169 values
170 }
171}
172
173#[derive(Debug, Clone, Copy)]
174struct MergeLine<'a> {
175 trimmed: &'a [u8],
176 obsolete: bool,
177}
178
179pub fn merge_catalog<'a>(
186 existing_po: &'a str,
187 extracted_messages: &[ExtractedMessage<'a>],
188) -> Result<String, ParseError> {
189 let normalized;
190 let input = if existing_po.as_bytes().contains(&b'\r') {
191 normalized = existing_po.replace("\r\n", "\n").replace('\r', "\n");
192 normalized.as_str()
193 } else {
194 existing_po
195 };
196
197 let existing = parse_merge_po(input)?;
198 let nplurals = parse_nplurals(&existing.headers).unwrap_or(2);
199 let options = SerializeOptions::default();
200 let mut out = String::with_capacity(estimate_merge_capacity(input, extracted_messages));
201 let mut scratch = String::new();
202
203 write_file_preamble(&mut out, &existing);
204
205 let mut existing_index =
206 std::collections::HashMap::<&str, Vec<(Option<&str>, usize)>>::with_capacity(
207 existing.items.len(),
208 );
209 for (index, item) in existing.items.iter().enumerate() {
210 existing_index
211 .entry(item.msgid.as_ref())
212 .or_default()
213 .push((item.msgctxt.as_deref(), index));
214 }
215
216 let mut matched = vec![false; existing.items.len()];
217 let mut wrote_item = false;
218
219 for extracted in extracted_messages {
220 if wrote_item {
221 out.push('\n');
222 }
223 let existing_index = find_existing_index(
224 &existing_index,
225 extracted.msgctxt.as_deref(),
226 extracted.msgid.as_ref(),
227 );
228
229 match existing_index {
230 Some(index) => {
231 matched[index] = true;
232 write_merged_existing_item(
233 &mut out,
234 &mut scratch,
235 &existing.items[index],
236 extracted,
237 nplurals,
238 &options,
239 );
240 }
241 None => write_new_item(&mut out, &mut scratch, extracted, nplurals, &options),
242 }
243 out.push('\n');
244 wrote_item = true;
245 }
246
247 for (index, item) in existing.items.iter().enumerate() {
248 if matched[index] {
249 continue;
250 }
251 if wrote_item {
252 out.push('\n');
253 }
254 write_existing_item(&mut out, &mut scratch, item, true, &options);
255 out.push('\n');
256 wrote_item = true;
257 }
258
259 Ok(out)
260}
261
262fn parse_merge_po(input: &str) -> Result<MergeBorrowedFile<'_>, ParseError> {
263 let mut file = MergeBorrowedFile::default();
264 file.items.reserve((input.len() / 96).max(1));
265 let mut current_nplurals = 2usize;
266 let mut state = ParserState::new(current_nplurals);
267
268 for line in LineScanner::new(input.as_bytes()) {
269 parse_line(
270 MergeLine {
271 trimmed: line.trimmed,
272 obsolete: line.obsolete,
273 },
274 &mut state,
275 &mut file,
276 &mut current_nplurals,
277 )?;
278 }
279
280 finish_item(&mut state, &mut file, &mut current_nplurals);
281 Ok(file)
282}
283
284fn parse_line<'a>(
285 line: MergeLine<'a>,
286 state: &mut ParserState<'a>,
287 file: &mut MergeBorrowedFile<'a>,
288 current_nplurals: &mut usize,
289) -> Result<(), ParseError> {
290 match classify_line(line.trimmed) {
291 LineKind::Continuation => {
292 append_continuation(line.trimmed, line.obsolete, state)?;
293 Ok(())
294 }
295 LineKind::Comment(kind) => {
296 parse_comment_line(line.trimmed, kind, state, file, current_nplurals);
297 Ok(())
298 }
299 LineKind::Keyword(keyword) => parse_keyword_line(
300 line.trimmed,
301 line.obsolete,
302 keyword,
303 state,
304 file,
305 current_nplurals,
306 ),
307 LineKind::Other => Ok(()),
308 }
309}
310
311fn parse_comment_line<'a>(
312 line_bytes: &'a [u8],
313 kind: CommentKind,
314 state: &mut ParserState<'a>,
315 file: &mut MergeBorrowedFile<'a>,
316 current_nplurals: &mut usize,
317) {
318 finish_item(state, file, current_nplurals);
319
320 match kind {
321 CommentKind::Reference => state.item.references.push(trimmed_str(&line_bytes[2..])),
322 CommentKind::Flags => {
323 for flag in trimmed_str(&line_bytes[2..]).split(',') {
324 state.item.flags.push(flag.trim());
325 }
326 }
327 CommentKind::Extracted => state
328 .item
329 .extracted_comments
330 .push(trimmed_str(&line_bytes[2..])),
331 CommentKind::Metadata => {
332 let trimmed = trim_ascii(&line_bytes[2..]);
333 if let Some((key_bytes, value_bytes)) = split_once_byte(trimmed, b':') {
334 let key = trimmed_str(key_bytes);
335 if !key.is_empty() {
336 state.item.metadata.push((key, trimmed_str(value_bytes)));
337 }
338 }
339 }
340 CommentKind::Translator => state.item.comments.push(trimmed_str(&line_bytes[1..])),
341 CommentKind::Other => {}
342 }
343}
344
345fn parse_keyword_line<'a>(
346 line_bytes: &'a [u8],
347 obsolete: bool,
348 keyword: Keyword,
349 state: &mut ParserState<'a>,
350 file: &mut MergeBorrowedFile<'a>,
351 current_nplurals: &mut usize,
352) -> Result<(), ParseError> {
353 match keyword {
354 Keyword::IdPlural => {
355 state.obsolete_line_count += usize::from(obsolete);
356 state.item.msgid_plural = Some(extract_merge_quoted_cow(line_bytes)?);
357 state.context = Some(Context::IdPlural);
358 state.content_line_count += 1;
359 state.has_keyword = true;
360 }
361 Keyword::Id => {
362 finish_item(state, file, current_nplurals);
363 state.obsolete_line_count += usize::from(obsolete);
364 state.item.msgid = extract_merge_quoted_cow(line_bytes)?;
365 state.context = Some(Context::Id);
366 state.content_line_count += 1;
367 state.has_keyword = true;
368 }
369 Keyword::Str => {
370 let plural_index = parse_plural_index(line_bytes).unwrap_or(0);
371 state.plural_index = plural_index;
372 state.obsolete_line_count += usize::from(obsolete);
373 state.set_msgstr(plural_index, extract_merge_quoted_cow(line_bytes)?);
374 if is_header_candidate(state) {
375 state
376 .header_entries
377 .extend(parse_header_fragment(line_bytes)?);
378 }
379 state.context = Some(Context::Str);
380 state.content_line_count += 1;
381 state.has_keyword = true;
382 }
383 Keyword::Ctxt => {
384 finish_item(state, file, current_nplurals);
385 state.obsolete_line_count += usize::from(obsolete);
386 state.item.msgctxt = Some(extract_merge_quoted_cow(line_bytes)?);
387 state.context = Some(Context::Ctxt);
388 state.content_line_count += 1;
389 state.has_keyword = true;
390 }
391 }
392
393 Ok(())
394}
395
396fn append_continuation<'a>(
397 line_bytes: &'a [u8],
398 obsolete: bool,
399 state: &mut ParserState<'a>,
400) -> Result<(), ParseError> {
401 state.obsolete_line_count += usize::from(obsolete);
402 state.content_line_count += 1;
403 let value = extract_merge_quoted_cow(line_bytes)?;
404
405 match state.context {
406 Some(Context::Str) => {
407 state.append_msgstr(state.plural_index, value);
408 if is_header_candidate(state) {
409 state
410 .header_entries
411 .extend(parse_header_fragment(line_bytes)?);
412 }
413 }
414 Some(Context::Id) => state.item.msgid.to_mut().push_str(value.as_ref()),
415 Some(Context::IdPlural) => {
416 let target = state.item.msgid_plural.get_or_insert(Cow::Borrowed(""));
417 target.to_mut().push_str(value.as_ref());
418 }
419 Some(Context::Ctxt) => {
420 let target = state.item.msgctxt.get_or_insert(Cow::Borrowed(""));
421 target.to_mut().push_str(value.as_ref());
422 }
423 None => {}
424 }
425
426 Ok(())
427}
428
429fn finish_item<'a>(
430 state: &mut ParserState<'a>,
431 file: &mut MergeBorrowedFile<'a>,
432 current_nplurals: &mut usize,
433) {
434 if !state.has_keyword {
435 return;
436 }
437
438 if state.item.msgid.is_empty() && !is_header_state(state) {
439 return;
440 }
441
442 if state.obsolete_line_count >= state.content_line_count && state.content_line_count > 0 {
443 state.item.obsolete = true;
444 }
445
446 if is_header_state(state) && file.headers.is_empty() && file.items.is_empty() {
447 file.comments = std::mem::take(&mut state.item.comments);
448 file.extracted_comments = std::mem::take(&mut state.item.extracted_comments);
449 file.headers = std::mem::take(&mut state.header_entries);
450 *current_nplurals = parse_nplurals(&file.headers).unwrap_or(2);
451 state.reset(*current_nplurals);
452 return;
453 }
454
455 state.materialize_msgstr();
456
457 if matches!(state.item.msgstr, BorrowedMsgStr::None) {
458 state.item.msgstr = BorrowedMsgStr::Singular(Cow::Borrowed(""));
459 }
460 if state.item.msgid_plural.is_some() && msgstr_len(&state.item.msgstr) == 1 {
461 let mut values = match std::mem::take(&mut state.item.msgstr) {
462 BorrowedMsgStr::None => Vec::new(),
463 BorrowedMsgStr::Singular(value) => vec![value],
464 BorrowedMsgStr::Plural(values) => values,
465 };
466 values.resize(state.item.nplurals.max(1), Cow::Borrowed(""));
467 state.item.msgstr = BorrowedMsgStr::Plural(values);
468 }
469
470 state.item.nplurals = *current_nplurals;
471 file.items.push(std::mem::take(&mut state.item));
472 state.reset_after_take(*current_nplurals);
473}
474
475fn msgstr_len(msgstr: &BorrowedMsgStr<'_>) -> usize {
476 match msgstr {
477 BorrowedMsgStr::None => 0,
478 BorrowedMsgStr::Singular(_) => 1,
479 BorrowedMsgStr::Plural(values) => values.len(),
480 }
481}
482
483fn is_header_state(state: &ParserState<'_>) -> bool {
484 state.item.msgid.is_empty()
485 && state.item.msgctxt.is_none()
486 && state.item.msgid_plural.is_none()
487 && !matches!(state.msgstr, BorrowedMsgStr::None)
488}
489
490fn is_header_candidate(state: &ParserState<'_>) -> bool {
491 state.item.msgid.is_empty()
492 && state.item.msgctxt.is_none()
493 && state.item.msgid_plural.is_none()
494 && state.plural_index == 0
495}
496
497fn parse_header_fragment(line_bytes: &[u8]) -> Result<Vec<MergeHeader<'_>>, ParseError> {
498 let Some(raw) = merge_quoted_raw(line_bytes) else {
499 return Ok(Vec::new());
500 };
501
502 if header_fragment_is_borrowable(raw) {
503 return Ok(parse_header_fragment_borrowed(raw));
504 }
505
506 parse_header_fragment_owned(line_bytes)
507}
508
509fn parse_header_fragment_borrowed(raw: &[u8]) -> Vec<MergeHeader<'_>> {
510 let mut headers = Vec::new();
511 let mut start = 0usize;
512 let mut index = 0usize;
513
514 while index < raw.len() {
515 if raw[index] == b'\\' && raw.get(index + 1) == Some(&b'n') {
516 push_borrowed_header_segment(&raw[start..index], &mut headers);
517 index += 2;
518 start = index;
519 continue;
520 }
521 index += 1;
522 }
523
524 push_borrowed_header_segment(&raw[start..], &mut headers);
525 headers
526}
527
528fn push_borrowed_header_segment<'a>(segment: &'a [u8], out: &mut Vec<MergeHeader<'a>>) {
529 if segment.is_empty() {
530 return;
531 }
532 if let Some((key_bytes, value_bytes)) = split_once_byte(segment, b':') {
533 out.push(MergeHeader {
534 key: Cow::Borrowed(trimmed_str(key_bytes)),
535 value: Cow::Borrowed(trimmed_str(value_bytes)),
536 });
537 }
538}
539
540fn parse_header_fragment_owned(line_bytes: &[u8]) -> Result<Vec<MergeHeader<'_>>, ParseError> {
541 let decoded = extract_merge_quoted_cow(line_bytes)?;
542 let mut headers = Vec::new();
543 for segment in decoded.split('\n') {
544 if segment.is_empty() {
545 continue;
546 }
547 if let Some((key, value)) = segment.split_once(':') {
548 headers.push(MergeHeader {
549 key: Cow::Owned(key.trim().to_owned()),
550 value: Cow::Owned(value.trim().to_owned()),
551 });
552 }
553 }
554 Ok(headers)
555}
556
557fn header_fragment_is_borrowable(raw: &[u8]) -> bool {
558 let mut index = 0usize;
559 while index < raw.len() {
560 if raw[index] == b'\\' {
561 if raw.get(index + 1) != Some(&b'n') {
562 return false;
563 }
564 index += 2;
565 continue;
566 }
567 index += 1;
568 }
569 !has_byte(b'"', raw)
570}
571
572#[inline]
573fn extract_merge_quoted_cow(line_bytes: &[u8]) -> Result<Cow<'_, str>, ParseError> {
574 let Some(raw) = merge_quoted_raw(line_bytes) else {
575 return Ok(Cow::Borrowed(""));
576 };
577
578 validate_quoted_content(raw)?;
579 if !has_byte(b'\\', raw) {
580 return Ok(Cow::Borrowed(bytes_to_str(raw)));
581 }
582
583 Ok(Cow::Owned(unescape_string(bytes_to_str(raw))?))
584}
585
586#[inline]
587fn merge_quoted_raw(line_bytes: &[u8]) -> Option<&[u8]> {
588 let start = match line_bytes.first() {
589 Some(b'"') => 1,
590 _ => find_byte(b'"', line_bytes)? + 1,
591 };
592
593 if start > line_bytes.len() {
594 return None;
595 }
596
597 if line_bytes.len() > start && line_bytes.last() == Some(&b'"') {
598 return Some(&line_bytes[start..line_bytes.len() - 1]);
599 }
600
601 let (quoted_start, quoted_end) = find_quoted_bounds(line_bytes)?;
602 Some(&line_bytes[quoted_start..quoted_end])
603}
604
605fn find_existing_index(
606 existing_index: &std::collections::HashMap<&str, Vec<(Option<&str>, usize)>>,
607 msgctxt: Option<&str>,
608 msgid: &str,
609) -> Option<usize> {
610 let candidates = existing_index.get(msgid)?;
611 candidates
612 .iter()
613 .find_map(|(candidate_ctxt, index)| (*candidate_ctxt == msgctxt).then_some(*index))
614}
615
616fn estimate_merge_capacity(input: &str, extracted_messages: &[ExtractedMessage<'_>]) -> usize {
617 let extracted_bytes: usize = extracted_messages
618 .iter()
619 .map(|message| {
620 message.msgid.len()
621 + message.msgctxt.as_ref().map_or(0, |value| value.len())
622 + message.msgid_plural.as_ref().map_or(0, |value| value.len())
623 + message
624 .references
625 .iter()
626 .map(|value| value.len())
627 .sum::<usize>()
628 + message
629 .extracted_comments
630 .iter()
631 .map(|value| value.len())
632 .sum::<usize>()
633 + message.flags.iter().map(|value| value.len()).sum::<usize>()
634 })
635 .sum();
636
637 input.len() + extracted_bytes + 256
638}
639
640fn write_file_preamble(out: &mut String, file: &MergeBorrowedFile<'_>) {
641 write_prefixed_lines(out, "", "#", &file.comments);
642 write_prefixed_lines(out, "", "#.", &file.extracted_comments);
643
644 out.push_str("msgid \"\"\n");
645 out.push_str("msgstr \"\"\n");
646 for header in &file.headers {
647 out.push('"');
648 escape_string_into(out, header.key.as_ref());
649 out.push_str(": ");
650 escape_string_into(out, header.value.as_ref());
651 out.push_str("\\n\"\n");
652 }
653 out.push('\n');
654}
655
656fn write_merged_existing_item(
657 out: &mut String,
658 scratch: &mut String,
659 existing: &MergeBorrowedItem<'_>,
660 extracted: &ExtractedMessage<'_>,
661 nplurals: usize,
662 options: &SerializeOptions,
663) {
664 let obsolete_prefix = "";
665
666 write_prefixed_lines(out, obsolete_prefix, "#", &existing.comments);
667 write_prefixed_lines(out, obsolete_prefix, "#.", &extracted.extracted_comments);
668 write_metadata_lines(out, obsolete_prefix, &existing.metadata);
669 write_prefixed_lines(out, obsolete_prefix, "#:", &extracted.references);
670 write_merged_flags_line(out, obsolete_prefix, &existing.flags, &extracted.flags);
671
672 if let Some(context) = extracted.msgctxt.as_deref() {
673 write_keyword(
674 out,
675 scratch,
676 obsolete_prefix,
677 "msgctxt",
678 context,
679 None,
680 options,
681 );
682 }
683 write_keyword(
684 out,
685 scratch,
686 obsolete_prefix,
687 "msgid",
688 extracted.msgid.as_ref(),
689 None,
690 options,
691 );
692 if let Some(plural) = extracted.msgid_plural.as_deref() {
693 write_keyword(
694 out,
695 scratch,
696 obsolete_prefix,
697 "msgid_plural",
698 plural,
699 None,
700 options,
701 );
702 }
703
704 write_normalized_msgstr(
705 out,
706 scratch,
707 obsolete_prefix,
708 &existing.msgstr,
709 MsgstrShape {
710 preserve_existing: existing.msgid_plural.is_some() == extracted.msgid_plural.is_some(),
711 plural: extracted.msgid_plural.is_some(),
712 },
713 nplurals,
714 options,
715 );
716}
717
718fn write_new_item(
719 out: &mut String,
720 scratch: &mut String,
721 extracted: &ExtractedMessage<'_>,
722 nplurals: usize,
723 options: &SerializeOptions,
724) {
725 let obsolete_prefix = "";
726
727 write_prefixed_lines(out, obsolete_prefix, "#.", &extracted.extracted_comments);
728 write_prefixed_lines(out, obsolete_prefix, "#:", &extracted.references);
729 write_flags_line(out, obsolete_prefix, &extracted.flags);
730
731 if let Some(context) = extracted.msgctxt.as_deref() {
732 write_keyword(
733 out,
734 scratch,
735 obsolete_prefix,
736 "msgctxt",
737 context,
738 None,
739 options,
740 );
741 }
742 write_keyword(
743 out,
744 scratch,
745 obsolete_prefix,
746 "msgid",
747 extracted.msgid.as_ref(),
748 None,
749 options,
750 );
751 if let Some(plural) = extracted.msgid_plural.as_deref() {
752 write_keyword(
753 out,
754 scratch,
755 obsolete_prefix,
756 "msgid_plural",
757 plural,
758 None,
759 options,
760 );
761 }
762
763 write_default_msgstr(
764 out,
765 scratch,
766 obsolete_prefix,
767 extracted.msgid_plural.is_some(),
768 nplurals,
769 options,
770 );
771}
772
773fn write_existing_item(
774 out: &mut String,
775 scratch: &mut String,
776 item: &MergeBorrowedItem<'_>,
777 obsolete: bool,
778 options: &SerializeOptions,
779) {
780 let obsolete_prefix = if obsolete { "#~ " } else { "" };
781
782 write_prefixed_lines(out, obsolete_prefix, "#", &item.comments);
783 write_prefixed_lines(out, obsolete_prefix, "#.", &item.extracted_comments);
784 write_metadata_lines(out, obsolete_prefix, &item.metadata);
785 write_prefixed_lines(out, obsolete_prefix, "#:", &item.references);
786 write_flags_line(out, obsolete_prefix, &item.flags);
787
788 if let Some(context) = item.msgctxt.as_deref() {
789 write_keyword(
790 out,
791 scratch,
792 obsolete_prefix,
793 "msgctxt",
794 context,
795 None,
796 options,
797 );
798 }
799 write_keyword(
800 out,
801 scratch,
802 obsolete_prefix,
803 "msgid",
804 item.msgid.as_ref(),
805 None,
806 options,
807 );
808 if let Some(plural) = item.msgid_plural.as_deref() {
809 write_keyword(
810 out,
811 scratch,
812 obsolete_prefix,
813 "msgid_plural",
814 plural,
815 None,
816 options,
817 );
818 }
819
820 write_existing_msgstr(
821 out,
822 scratch,
823 obsolete_prefix,
824 &item.msgstr,
825 item.msgid_plural.is_some(),
826 item.nplurals,
827 options,
828 );
829}
830
831fn write_prefixed_lines<T: AsRef<str>>(
832 out: &mut String,
833 obsolete_prefix: &str,
834 prefix: &str,
835 values: &[T],
836) {
837 for value in values {
838 write_prefixed_line(out, obsolete_prefix, prefix, value.as_ref());
839 }
840}
841
842fn write_metadata_lines(out: &mut String, obsolete_prefix: &str, values: &[(&str, &str)]) {
843 for (key, value) in values {
844 out.push_str(obsolete_prefix);
845 out.push_str("#@ ");
846 out.push_str(key);
847 out.push_str(": ");
848 out.push_str(value);
849 out.push('\n');
850 }
851}
852
853fn write_flags_line<T: AsRef<str>>(out: &mut String, obsolete_prefix: &str, values: &[T]) {
854 if values.is_empty() {
855 return;
856 }
857
858 out.push_str(obsolete_prefix);
859 out.push_str("#, ");
860 for (index, value) in values.iter().enumerate() {
861 if index > 0 {
862 out.push(',');
863 }
864 out.push_str(value.as_ref());
865 }
866 out.push('\n');
867}
868
869fn write_merged_flags_line(
870 out: &mut String,
871 obsolete_prefix: &str,
872 existing: &[&str],
873 extracted: &[Cow<'_, str>],
874) {
875 if existing.is_empty() && extracted.is_empty() {
876 return;
877 }
878
879 out.push_str(obsolete_prefix);
880 out.push_str("#, ");
881
882 let mut wrote_any = false;
883 let mut seen = Vec::with_capacity(existing.len() + extracted.len());
884 for flag in existing
885 .iter()
886 .copied()
887 .chain(extracted.iter().map(AsRef::as_ref))
888 {
889 if seen.contains(&flag) {
890 continue;
891 }
892 if wrote_any {
893 out.push(',');
894 }
895 out.push_str(flag);
896 wrote_any = true;
897 seen.push(flag);
898 }
899 out.push('\n');
900}
901
902fn write_existing_msgstr(
903 out: &mut String,
904 scratch: &mut String,
905 obsolete_prefix: &str,
906 msgstr: &BorrowedMsgStr<'_>,
907 is_plural: bool,
908 nplurals: usize,
909 options: &SerializeOptions,
910) {
911 if is_plural {
912 for index in 0..nplurals.max(1) {
913 let value = match msgstr {
914 BorrowedMsgStr::Singular(value) if index == 0 => value.as_ref(),
915 BorrowedMsgStr::None | BorrowedMsgStr::Singular(_) => "",
916 BorrowedMsgStr::Plural(values) => {
917 values.get(index).map_or("", |value| value.as_ref())
918 }
919 };
920 write_keyword(
921 out,
922 scratch,
923 obsolete_prefix,
924 "msgstr",
925 value,
926 Some(index),
927 options,
928 );
929 }
930 return;
931 }
932
933 let value = match msgstr {
934 BorrowedMsgStr::None => "",
935 BorrowedMsgStr::Singular(value) => value.as_ref(),
936 BorrowedMsgStr::Plural(values) => values.first().map_or("", |value| value.as_ref()),
937 };
938 write_keyword(
939 out,
940 scratch,
941 obsolete_prefix,
942 "msgstr",
943 value,
944 None,
945 options,
946 );
947}
948
949#[derive(Debug, Clone, Copy, PartialEq, Eq)]
950struct MsgstrShape {
951 preserve_existing: bool,
952 plural: bool,
953}
954
955fn write_normalized_msgstr(
956 out: &mut String,
957 scratch: &mut String,
958 obsolete_prefix: &str,
959 msgstr: &BorrowedMsgStr<'_>,
960 shape: MsgstrShape,
961 nplurals: usize,
962 options: &SerializeOptions,
963) {
964 if !shape.preserve_existing {
965 write_default_msgstr(
966 out,
967 scratch,
968 obsolete_prefix,
969 shape.plural,
970 nplurals,
971 options,
972 );
973 return;
974 }
975
976 write_existing_msgstr(
977 out,
978 scratch,
979 obsolete_prefix,
980 msgstr,
981 shape.plural,
982 nplurals,
983 options,
984 );
985}
986
987fn write_default_msgstr(
988 out: &mut String,
989 scratch: &mut String,
990 obsolete_prefix: &str,
991 is_plural: bool,
992 nplurals: usize,
993 options: &SerializeOptions,
994) {
995 if is_plural {
996 for index in 0..nplurals.max(1) {
997 write_keyword(
998 out,
999 scratch,
1000 obsolete_prefix,
1001 "msgstr",
1002 "",
1003 Some(index),
1004 options,
1005 );
1006 }
1007 return;
1008 }
1009
1010 write_keyword(out, scratch, obsolete_prefix, "msgstr", "", None, options);
1011}
1012
1013fn parse_nplurals(headers: &[MergeHeader<'_>]) -> Option<usize> {
1014 let plural_forms = headers
1015 .iter()
1016 .find(|header| header.key.as_ref() == "Plural-Forms")?
1017 .value
1018 .as_bytes();
1019 let mut rest = plural_forms;
1020
1021 while !rest.is_empty() {
1022 let (part, next) = match split_once_byte(rest, b';') {
1023 Some((part, tail)) => (part, tail),
1024 None => (rest, &b""[..]),
1025 };
1026 let trimmed = trim_ascii(part);
1027 if let Some((key, value)) = split_once_byte(trimmed, b'=')
1028 && trim_ascii(key) == b"nplurals"
1029 && let value = bytes_to_str(trim_ascii(value))
1030 && let Ok(parsed) = value.parse::<usize>()
1031 {
1032 return Some(parsed);
1033 }
1034 rest = next;
1035 }
1036
1037 None
1038}
1039
1040fn bytes_to_str(bytes: &[u8]) -> &str {
1041 input_slice_as_str(bytes)
1042}
1043
1044fn trimmed_str(bytes: &[u8]) -> &str {
1045 bytes_to_str(trim_ascii(bytes))
1046}
1047
1048#[cfg(test)]
1049mod tests {
1050 use std::borrow::Cow;
1051
1052 use super::{
1053 ExtractedMessage, MergeHeader, estimate_merge_capacity, extract_merge_quoted_cow,
1054 find_existing_index, header_fragment_is_borrowable, merge_catalog, parse_header_fragment,
1055 parse_nplurals,
1056 };
1057 use crate::{BorrowedMsgStr, parse_po};
1058
1059 #[test]
1060 fn preserves_existing_translations_and_updates_references() {
1061 let existing = concat!(
1062 "msgid \"hello\"\n",
1063 "msgstr \"world\"\n\n",
1064 "msgid \"old\"\n",
1065 "msgstr \"alt\"\n",
1066 );
1067 let extracted = vec![ExtractedMessage {
1068 msgid: Cow::Borrowed("hello"),
1069 references: vec![Cow::Borrowed("src/new.rs:10")],
1070 ..ExtractedMessage::default()
1071 }];
1072
1073 let merged = merge_catalog(existing, &extracted).expect("merge");
1074 let reparsed = parse_po(&merged).expect("reparse");
1075 let old_items: Vec<_> = reparsed
1076 .items
1077 .iter()
1078 .filter(|item| item.msgid == "old")
1079 .map(|item| (item.obsolete, item.msgstr[0].clone()))
1080 .collect();
1081 assert_eq!(old_items, vec![(true, "alt".to_owned())]);
1082
1083 let hello = reparsed
1084 .items
1085 .iter()
1086 .find(|item| item.msgid == "hello")
1087 .expect("merged hello item");
1088 assert_eq!(hello.msgstr[0], "world");
1089 assert_eq!(hello.references, vec!["src/new.rs:10".to_owned()]);
1090 }
1091
1092 #[test]
1093 fn creates_new_items_for_new_extracted_messages() {
1094 let merged = merge_catalog(
1095 "",
1096 &[ExtractedMessage {
1097 msgid: Cow::Borrowed("fresh"),
1098 extracted_comments: vec![Cow::Borrowed("from extractor")],
1099 ..ExtractedMessage::default()
1100 }],
1101 )
1102 .expect("merge");
1103 let reparsed = parse_po(&merged).expect("reparse");
1104
1105 assert_eq!(reparsed.items[0].msgid, "fresh");
1106 assert_eq!(reparsed.items[0].msgstr[0], "");
1107 assert_eq!(
1108 reparsed.items[0].extracted_comments,
1109 vec!["from extractor".to_owned()]
1110 );
1111 }
1112
1113 #[test]
1114 fn resets_msgstr_when_switching_between_singular_and_plural() {
1115 let existing = concat!("msgid \"count\"\n", "msgstr \"Anzahl\"\n",);
1116 let extracted = vec![ExtractedMessage {
1117 msgid: Cow::Borrowed("count"),
1118 msgid_plural: Some(Cow::Borrowed("counts")),
1119 ..ExtractedMessage::default()
1120 }];
1121
1122 let merged = merge_catalog(existing, &extracted).expect("merge");
1123 let reparsed = parse_po(&merged).expect("reparse");
1124
1125 assert!(reparsed.items[0].msgid_plural.is_some());
1126 assert_eq!(reparsed.items[0].msgstr.len(), 2);
1127 assert_eq!(reparsed.items[0].msgstr[0], "");
1128 assert_eq!(reparsed.items[0].msgstr[1], "");
1129 }
1130
1131 #[test]
1132 fn merge_helpers_cover_header_and_lookup_paths() {
1133 assert_eq!(
1134 extract_merge_quoted_cow(br#"msgid "plain""#),
1135 Ok(Cow::Borrowed("plain"))
1136 );
1137 assert_eq!(
1138 extract_merge_quoted_cow(br#"msgid "line\nbreak""#),
1139 Ok(Cow::Owned("line\nbreak".to_owned()))
1140 );
1141 assert!(header_fragment_is_borrowable(
1142 br#"Language: de\nPlural-Forms: nplurals=2;\n"#
1143 ));
1144 assert!(!header_fragment_is_borrowable(br#"Language: \"de\"\n"#));
1145 assert_eq!(
1146 parse_header_fragment(br#""Language: de\nPlural-Forms: nplurals=3; plural=(n>1);\n""#)
1147 .expect("header fragment"),
1148 vec![
1149 MergeHeader {
1150 key: Cow::Borrowed("Language"),
1151 value: Cow::Borrowed("de"),
1152 },
1153 MergeHeader {
1154 key: Cow::Borrowed("Plural-Forms"),
1155 value: Cow::Borrowed("nplurals=3; plural=(n>1);"),
1156 },
1157 ]
1158 );
1159 assert_eq!(
1160 parse_nplurals(&[
1161 MergeHeader {
1162 key: Cow::Borrowed("Language"),
1163 value: Cow::Borrowed("de"),
1164 },
1165 MergeHeader {
1166 key: Cow::Borrowed("Plural-Forms"),
1167 value: Cow::Borrowed(" nplurals = 4 ; plural = (n > 1); "),
1168 },
1169 ]),
1170 Some(4)
1171 );
1172 assert_eq!(
1173 find_existing_index(
1174 &std::collections::HashMap::from([(
1175 "hello",
1176 vec![(Some("menu"), 3usize), (None, 1usize)],
1177 )]),
1178 Some("menu"),
1179 "hello",
1180 ),
1181 Some(3)
1182 );
1183 assert!(
1184 estimate_merge_capacity(
1185 "msgid \"a\"\nmsgstr \"b\"\n",
1186 &[ExtractedMessage {
1187 msgid: Cow::Borrowed("hello"),
1188 references: vec![Cow::Borrowed("src/app.rs:1")],
1189 ..ExtractedMessage::default()
1190 }],
1191 ) > 24
1192 );
1193 }
1194
1195 #[test]
1196 fn merge_preserves_existing_plural_values_and_dedupes_flags() {
1197 let existing = concat!(
1198 "msgid \"\"\n",
1199 "msgstr \"\"\n",
1200 "\"Plural-Forms: nplurals=3; plural=(n > 1);\\n\"\n",
1201 "\n",
1202 "#, fuzzy\n",
1203 "msgid \"count\"\n",
1204 "msgid_plural \"counts\"\n",
1205 "msgstr[0] \"eins\"\n",
1206 "msgstr[1] \"zwei\"\n",
1207 "msgstr[2] \"viele\"\n",
1208 );
1209 let extracted = vec![ExtractedMessage {
1210 msgid: Cow::Borrowed("count"),
1211 msgid_plural: Some(Cow::Borrowed("counts")),
1212 flags: vec![Cow::Borrowed("fuzzy"), Cow::Borrowed("rust-format")],
1213 ..ExtractedMessage::default()
1214 }];
1215
1216 let merged = merge_catalog(existing, &extracted).expect("merge plural");
1217 assert!(merged.contains("#, fuzzy,rust-format"));
1218 let reparsed = parse_po(&merged).expect("reparse merged plural");
1219 assert_eq!(reparsed.items[0].msgstr.len(), 3);
1220 assert_eq!(reparsed.items[0].msgstr[0], "eins");
1221 assert_eq!(reparsed.items[0].msgstr[2], "viele");
1222 }
1223
1224 #[test]
1225 fn merge_normalizes_crlf_input_and_keeps_existing_single_value() {
1226 let existing = "msgid \"hello\"\r\nmsgstr \"world\"\r\n";
1227 let merged = merge_catalog(
1228 existing,
1229 &[ExtractedMessage {
1230 msgid: Cow::Borrowed("hello"),
1231 ..ExtractedMessage::default()
1232 }],
1233 )
1234 .expect("merge normalized crlf");
1235
1236 let reparsed = parse_po(&merged).expect("reparse normalized");
1237 assert_eq!(reparsed.items[0].msgstr[0], "world");
1238 assert!(!matches!(
1239 BorrowedMsgStr::Singular(Cow::Borrowed("world")),
1240 BorrowedMsgStr::None
1241 ));
1242 }
1243}