Skip to main content

ferrocat_po/api/
catalog.rs

1//! Internal catalog pipeline for the public `ferrocat-po` catalog API.
2//!
3//! This module owns the higher-level workflow around PO parsing, extracted-message
4//! normalization, merge semantics, and export back to PO. The byte-oriented parser
5//! and serializer hot paths stay elsewhere; this layer is where we preserve
6//! catalog semantics and diagnostics.
7
8use std::collections::{BTreeMap, BTreeSet};
9use std::fs;
10
11use super::file_io::atomic_write;
12use super::helpers::{
13    dedupe_origins, dedupe_placeholders, dedupe_strings, merge_placeholders, merge_unique_origins,
14    merge_unique_strings,
15};
16use super::ndjson::{parse_catalog_to_internal_ndjson, stringify_catalog_ndjson};
17use super::plural::{PluralProfile, derive_plural_variable, synthesize_icu_plural};
18use super::{
19    ApiError, CatalogMessage, CatalogMessageExtra, CatalogOrigin, CatalogSemantics, CatalogStats,
20    CatalogStorageFormat, CatalogUpdateInput, CatalogUpdateResult, Diagnostic, DiagnosticSeverity,
21    ExtractedMessage, ObsoleteStrategy, OrderBy, ParseCatalogOptions, ParsedCatalog,
22    PlaceholderCommentMode, PluralEncoding, PluralSource, TranslationShape,
23    UpdateCatalogFileOptions, UpdateCatalogOptions,
24};
25use crate::{Header, MsgStr, PoFile, PoItem, SerializeOptions, parse_po, stringify_po};
26
27#[derive(Debug, Clone, PartialEq, Eq, Default)]
28pub(super) struct Catalog {
29    pub(super) locale: Option<String>,
30    pub(super) headers: BTreeMap<String, String>,
31    pub(super) file_comments: Vec<String>,
32    pub(super) file_extracted_comments: Vec<String>,
33    pub(super) messages: Vec<CanonicalMessage>,
34    pub(super) diagnostics: Vec<Diagnostic>,
35}
36
37#[derive(Debug, Clone, PartialEq, Eq)]
38pub(super) struct CanonicalMessage {
39    pub(super) msgid: String,
40    pub(super) msgctxt: Option<String>,
41    pub(super) translation: CanonicalTranslation,
42    pub(super) comments: Vec<String>,
43    pub(super) origins: Vec<CatalogOrigin>,
44    pub(super) placeholders: BTreeMap<String, Vec<String>>,
45    pub(super) obsolete: bool,
46    pub(super) translator_comments: Vec<String>,
47    pub(super) flags: Vec<String>,
48}
49
50#[derive(Debug, Clone, PartialEq, Eq)]
51pub(super) enum CanonicalTranslation {
52    Singular {
53        value: String,
54    },
55    Plural {
56        source: PluralSource,
57        translation_by_category: BTreeMap<String, String>,
58        variable: String,
59    },
60}
61
62#[derive(Debug, Clone, PartialEq, Eq)]
63struct NormalizedMessage {
64    msgid: String,
65    msgctxt: Option<String>,
66    kind: NormalizedKind,
67    comments: Vec<String>,
68    origins: Vec<CatalogOrigin>,
69    placeholders: BTreeMap<String, Vec<String>>,
70}
71
72#[derive(Debug, Clone, PartialEq, Eq)]
73enum NormalizedKind {
74    Singular,
75    Plural {
76        source: PluralSource,
77        variable: Option<String>,
78    },
79}
80
81#[derive(Debug, Clone, PartialEq, Eq, Default)]
82struct ParsedPluralFormsHeader {
83    raw: Option<String>,
84    nplurals: Option<usize>,
85    plural: Option<String>,
86}
87
88/// Merges extracted messages into an existing catalog and returns updated catalog content.
89///
90/// # Errors
91///
92/// Returns [`ApiError`] when the source locale is missing, the existing catalog
93/// cannot be parsed, or the requested storage format cannot be rendered safely.
94#[expect(
95    clippy::needless_pass_by_value,
96    reason = "Public API takes owned option structs so callers can build and move them ergonomically."
97)]
98pub fn update_catalog(options: UpdateCatalogOptions<'_>) -> Result<CatalogUpdateResult, ApiError> {
99    super::validate_source_locale(options.source_locale)?;
100    super::validate_catalog_semantics(
101        options.semantics,
102        options.storage_format,
103        options.plural_encoding,
104    )?;
105
106    let created = options.existing.is_none();
107    let original = options.existing.unwrap_or("");
108    let existing = match options.existing {
109        Some(content) if !content.is_empty() => parse_catalog_to_internal(
110            content,
111            options.locale,
112            options.source_locale,
113            options.semantics,
114            options.plural_encoding,
115            false,
116            options.storage_format,
117        )?,
118        Some(_) | None => Catalog {
119            locale: options.locale.map(str::to_owned),
120            headers: BTreeMap::new(),
121            file_comments: Vec::new(),
122            file_extracted_comments: Vec::new(),
123            messages: Vec::new(),
124            diagnostics: Vec::new(),
125        },
126    };
127
128    let locale = options
129        .locale
130        .map(str::to_owned)
131        .or_else(|| existing.locale.clone())
132        .or_else(|| existing.headers.get("Language").cloned());
133    let mut diagnostics = existing.diagnostics.clone();
134    let normalized = normalize_update_input(&options.input)?;
135    let (mut merged, stats) = merge_catalogs(
136        existing,
137        &normalized,
138        locale.as_deref(),
139        options.source_locale,
140        options.overwrite_source_translations,
141        options.obsolete_strategy,
142        &mut diagnostics,
143    );
144    merged.locale.clone_from(&locale);
145    apply_storage_defaults(&mut merged, &options, locale.as_deref(), &mut diagnostics)?;
146    sort_messages(&mut merged.messages, options.order_by);
147    let content = export_catalog_content(&merged, &options, locale.as_deref(), &mut diagnostics)?;
148
149    Ok(CatalogUpdateResult {
150        updated: content != original,
151        content,
152        created,
153        stats,
154        diagnostics,
155    })
156}
157
158/// Updates a catalog on disk and only writes the file when the rendered
159/// output changes.
160///
161/// # Errors
162///
163/// Returns [`ApiError`] when the input is invalid, when the existing file
164/// cannot be read or parsed, or when the updated content cannot be written.
165pub fn update_catalog_file(
166    options: UpdateCatalogFileOptions<'_>,
167) -> Result<CatalogUpdateResult, ApiError> {
168    super::validate_source_locale(options.source_locale)?;
169    if options.target_path.as_os_str().is_empty() {
170        return Err(ApiError::InvalidArguments(
171            "target_path must not be empty".to_owned(),
172        ));
173    }
174
175    let existing = match fs::read_to_string(options.target_path) {
176        Ok(content) => Some(content),
177        Err(error) if error.kind() == std::io::ErrorKind::NotFound => None,
178        Err(error) => return Err(ApiError::Io(error)),
179    };
180
181    let result = update_catalog(UpdateCatalogOptions {
182        locale: options.locale,
183        source_locale: options.source_locale,
184        input: options.input,
185        existing: existing.as_deref(),
186        storage_format: options.storage_format,
187        semantics: options.semantics,
188        plural_encoding: options.plural_encoding,
189        obsolete_strategy: options.obsolete_strategy,
190        overwrite_source_translations: options.overwrite_source_translations,
191        order_by: options.order_by,
192        include_origins: options.include_origins,
193        include_line_numbers: options.include_line_numbers,
194        print_placeholders_in_comments: options.print_placeholders_in_comments,
195        custom_header_attributes: options.custom_header_attributes,
196    })?;
197
198    if result.created || result.updated {
199        atomic_write(options.target_path, &result.content)?;
200    }
201
202    Ok(result)
203}
204
205/// Parses catalog content into the higher-level representation used by
206/// `ferrocat`'s catalog APIs.
207///
208/// # Errors
209///
210/// Returns [`ApiError`] when the catalog content cannot be parsed, the source
211/// locale is missing, or strict ICU projection fails.
212#[expect(
213    clippy::needless_pass_by_value,
214    reason = "Public API takes owned option structs so callers can build and move them ergonomically."
215)]
216pub fn parse_catalog(options: ParseCatalogOptions<'_>) -> Result<ParsedCatalog, ApiError> {
217    super::validate_source_locale(options.source_locale)?;
218    super::validate_catalog_semantics(
219        options.semantics,
220        options.storage_format,
221        options.plural_encoding,
222    )?;
223    let catalog = parse_catalog_to_internal(
224        options.content,
225        options.locale,
226        options.source_locale,
227        options.semantics,
228        options.plural_encoding,
229        options.strict,
230        options.storage_format,
231    )?;
232    let messages = catalog
233        .messages
234        .into_iter()
235        .map(public_message_from_canonical)
236        .collect();
237
238    Ok(ParsedCatalog {
239        locale: catalog.locale,
240        semantics: options.semantics,
241        headers: catalog.headers,
242        messages,
243        diagnostics: catalog.diagnostics,
244    })
245}
246
247/// Collapses the accepted extractor input shapes into one merge-oriented form.
248///
249/// The result keeps only the fields that matter for catalog identity and merge
250/// semantics, while also projecting source-first ICU plurals into the same
251/// structured plural representation used by `CatalogUpdateInput::Structured`.
252fn normalize_update_input(input: &CatalogUpdateInput) -> Result<Vec<NormalizedMessage>, ApiError> {
253    let mut index = BTreeMap::<(String, Option<String>), usize>::new();
254    let mut normalized = Vec::<NormalizedMessage>::new();
255
256    match input {
257        CatalogUpdateInput::Structured(extracted) => {
258            for message in extracted {
259                let (msgid, msgctxt, kind, comments, origins, placeholders) = match message {
260                    ExtractedMessage::Singular(message) => (
261                        message.msgid.clone(),
262                        message.msgctxt.clone(),
263                        NormalizedKind::Singular,
264                        message.comments.clone(),
265                        message.origin.clone(),
266                        message.placeholders.clone(),
267                    ),
268                    ExtractedMessage::Plural(message) => (
269                        message.msgid.clone(),
270                        message.msgctxt.clone(),
271                        NormalizedKind::Plural {
272                            source: message.source.clone(),
273                            variable: None,
274                        },
275                        message.comments.clone(),
276                        message.origin.clone(),
277                        message.placeholders.clone(),
278                    ),
279                };
280
281                push_normalized_message(
282                    &mut index,
283                    &mut normalized,
284                    NormalizedMessage {
285                        msgid,
286                        msgctxt,
287                        kind,
288                        comments: dedupe_strings(comments),
289                        origins: dedupe_origins(origins),
290                        placeholders: dedupe_placeholders(placeholders),
291                    },
292                )?;
293            }
294        }
295        CatalogUpdateInput::SourceFirst(messages) => {
296            for message in messages {
297                push_normalized_message(
298                    &mut index,
299                    &mut normalized,
300                    NormalizedMessage {
301                        msgid: message.msgid.clone(),
302                        msgctxt: message.msgctxt.clone(),
303                        kind: NormalizedKind::Singular,
304                        comments: dedupe_strings(message.comments.clone()),
305                        origins: dedupe_origins(message.origin.clone()),
306                        placeholders: dedupe_placeholders(message.placeholders.clone()),
307                    },
308                )?;
309            }
310        }
311    }
312
313    Ok(normalized)
314}
315
316/// Inserts one normalized message, merging duplicate extractor entries that
317/// refer to the same gettext identity.
318///
319/// Duplicate singular/plural shape mismatches remain a hard error because they
320/// would otherwise make the final catalog shape ambiguous.
321fn push_normalized_message(
322    index: &mut BTreeMap<(String, Option<String>), usize>,
323    normalized: &mut Vec<NormalizedMessage>,
324    message: NormalizedMessage,
325) -> Result<(), ApiError> {
326    let msgid = message.msgid.clone();
327    let msgctxt = message.msgctxt.clone();
328    if msgid.is_empty() {
329        return Err(ApiError::InvalidArguments(
330            "extracted msgid must not be empty".to_owned(),
331        ));
332    }
333
334    let key = (msgid.clone(), msgctxt);
335    if let Some(existing_index) = index.get(&key).copied() {
336        let existing = &mut normalized[existing_index];
337        if existing.kind != message.kind {
338            return Err(ApiError::Conflict(format!(
339                "conflicting duplicate extracted message for msgid {msgid:?}"
340            )));
341        }
342        merge_unique_strings(&mut existing.comments, message.comments);
343        merge_unique_origins(&mut existing.origins, message.origins);
344        merge_placeholders(&mut existing.placeholders, message.placeholders);
345    } else {
346        index.insert(key, normalized.len());
347        normalized.push(message);
348    }
349
350    Ok(())
351}
352
353/// Applies extracted messages onto an existing canonical catalog and records the
354/// coarse-grained update counters used by the high-level API.
355fn merge_catalogs(
356    existing: Catalog,
357    normalized: &[NormalizedMessage],
358    locale: Option<&str>,
359    source_locale: &str,
360    overwrite_source_translations: bool,
361    obsolete_strategy: ObsoleteStrategy,
362    diagnostics: &mut Vec<Diagnostic>,
363) -> (Catalog, CatalogStats) {
364    let is_source_locale = locale.is_none_or(|value| value == source_locale);
365    let mut stats = CatalogStats::default();
366
367    let mut existing_index = BTreeMap::<(String, Option<String>), usize>::new();
368    for (index, message) in existing.messages.iter().enumerate() {
369        existing_index.insert((message.msgid.clone(), message.msgctxt.clone()), index);
370    }
371
372    let mut matched = vec![false; existing.messages.len()];
373    let mut messages = Vec::with_capacity(normalized.len() + existing.messages.len());
374
375    for next in normalized {
376        let key = (next.msgid.clone(), next.msgctxt.clone());
377        let previous = existing_index.get(&key).copied().map(|index| {
378            matched[index] = true;
379            existing.messages[index].clone()
380        });
381        let merged = merge_message(
382            previous.as_ref(),
383            next,
384            is_source_locale,
385            locale,
386            overwrite_source_translations,
387            diagnostics,
388        );
389        if previous.is_none() {
390            stats.added += 1;
391        } else if previous.as_ref() == Some(&merged) {
392            stats.unchanged += 1;
393        } else {
394            stats.changed += 1;
395        }
396        messages.push(merged);
397    }
398
399    for (index, message) in existing.messages.into_iter().enumerate() {
400        if matched[index] {
401            continue;
402        }
403        match obsolete_strategy {
404            ObsoleteStrategy::Delete => {
405                stats.obsolete_removed += 1;
406            }
407            ObsoleteStrategy::Mark => {
408                let mut message = message;
409                if !message.obsolete {
410                    message.obsolete = true;
411                    stats.obsolete_marked += 1;
412                }
413                messages.push(message);
414            }
415            ObsoleteStrategy::Keep => {
416                let mut message = message;
417                message.obsolete = false;
418                messages.push(message);
419            }
420        }
421    }
422
423    stats.total = messages.len();
424    (
425        Catalog {
426            locale: existing.locale,
427            headers: existing.headers,
428            file_comments: existing.file_comments,
429            file_extracted_comments: existing.file_extracted_comments,
430            messages,
431            diagnostics: existing.diagnostics,
432        },
433        stats,
434    )
435}
436
437/// Resolves the final canonical message for one gettext identity.
438///
439/// This is the central place where source-locale overwrite rules, plural
440/// variable inference, and locale-aware plural category materialization meet.
441fn merge_message(
442    previous: Option<&CanonicalMessage>,
443    next: &NormalizedMessage,
444    is_source_locale: bool,
445    locale: Option<&str>,
446    overwrite_source_translations: bool,
447    diagnostics: &mut Vec<Diagnostic>,
448) -> CanonicalMessage {
449    let translation = match (&next.kind, previous) {
450        (NormalizedKind::Singular, Some(previous))
451            if matches!(previous.translation, CanonicalTranslation::Singular { .. })
452                && !(is_source_locale && overwrite_source_translations) =>
453        {
454            previous.translation.clone()
455        }
456        (NormalizedKind::Singular, _) => CanonicalTranslation::Singular {
457            value: if is_source_locale {
458                next.msgid.clone()
459            } else {
460                String::new()
461            },
462        },
463        (NormalizedKind::Plural { source, variable }, previous) => {
464            let plural_profile = PluralProfile::for_locale(locale);
465
466            match previous {
467                Some(previous)
468                    if matches!(previous.translation, CanonicalTranslation::Plural { .. })
469                        && !(is_source_locale && overwrite_source_translations) =>
470                {
471                    match &previous.translation {
472                        CanonicalTranslation::Plural {
473                            translation_by_category,
474                            variable: previous_variable,
475                            ..
476                        } => CanonicalTranslation::Plural {
477                            source: source.clone(),
478                            translation_by_category: plural_profile
479                                .materialize_translation(translation_by_category),
480                            variable: variable
481                                .as_deref()
482                                .map_or_else(|| previous_variable.clone(), str::to_owned),
483                        },
484                        CanonicalTranslation::Singular { .. } => unreachable!(),
485                    }
486                }
487                _ => {
488                    let variable = variable
489                        .clone()
490                        .or_else(|| previous.and_then(extract_plural_variable))
491                        .or_else(|| derive_plural_variable(&next.placeholders))
492                        .unwrap_or_else(|| {
493                            diagnostics.push(
494                                Diagnostic::new(
495                                    DiagnosticSeverity::Warning,
496                                    "plural.assumed_variable",
497                                    "Unable to determine plural placeholder name, assuming \"count\".",
498                                )
499                                .with_identity(&next.msgid, next.msgctxt.as_deref()),
500                            );
501                            "count".to_owned()
502                        });
503
504                    CanonicalTranslation::Plural {
505                        source: source.clone(),
506                        translation_by_category: if is_source_locale {
507                            plural_profile.source_locale_translation(source)
508                        } else {
509                            plural_profile.empty_translation()
510                        },
511                        variable,
512                    }
513                }
514            }
515        }
516    };
517
518    let (translator_comments, flags, obsolete) = previous.map_or_else(
519        || (Vec::new(), Vec::new(), false),
520        |message| {
521            (
522                message.translator_comments.clone(),
523                message.flags.clone(),
524                false,
525            )
526        },
527    );
528
529    CanonicalMessage {
530        msgid: next.msgid.clone(),
531        msgctxt: next.msgctxt.clone(),
532        translation,
533        comments: next.comments.clone(),
534        origins: next.origins.clone(),
535        placeholders: next.placeholders.clone(),
536        obsolete,
537        translator_comments,
538        flags,
539    }
540}
541
542fn extract_plural_variable(message: &CanonicalMessage) -> Option<String> {
543    match &message.translation {
544        CanonicalTranslation::Plural { variable, .. } => Some(variable.clone()),
545        CanonicalTranslation::Singular { .. } => None,
546    }
547}
548
549/// Fills in the standard catalog headers and only synthesizes `Plural-Forms`
550/// when we have a conservative, locale-safe default.
551fn apply_header_defaults(
552    headers: &mut BTreeMap<String, String>,
553    locale: Option<&str>,
554    semantics: CatalogSemantics,
555    diagnostics: &mut Vec<Diagnostic>,
556    custom: &BTreeMap<String, String>,
557) {
558    headers
559        .entry("MIME-Version".to_owned())
560        .or_insert_with(|| "1.0".to_owned());
561    headers
562        .entry("Content-Type".to_owned())
563        .or_insert_with(|| "text/plain; charset=utf-8".to_owned());
564    headers
565        .entry("Content-Transfer-Encoding".to_owned())
566        .or_insert_with(|| "8bit".to_owned());
567    headers
568        .entry("X-Generator".to_owned())
569        .or_insert_with(|| "ferrocat".to_owned());
570    if let Some(locale) = locale {
571        headers.insert("Language".to_owned(), locale.to_owned());
572    }
573    if semantics == CatalogSemantics::GettextCompat && !custom.contains_key("Plural-Forms") {
574        let profile = PluralProfile::for_locale(locale);
575        let parsed_header = parse_plural_forms_from_headers(headers);
576        match (parsed_header.raw.as_deref(), profile.gettext_header()) {
577            (None, Some(header)) => {
578                headers.insert("Plural-Forms".to_owned(), header);
579            }
580            (None, None) => diagnostics.push(Diagnostic::new(
581                DiagnosticSeverity::Info,
582                "plural.missing_plural_forms_header",
583                "No safe default Plural-Forms header is known for this locale; keeping the header unset.",
584            )),
585            (Some(_), Some(header))
586                if parsed_header.nplurals == Some(profile.nplurals())
587                    && parsed_header.plural.is_none() =>
588            {
589                headers.insert("Plural-Forms".to_owned(), header);
590                diagnostics.push(Diagnostic::new(
591                    DiagnosticSeverity::Info,
592                    "plural.completed_plural_forms_header",
593                    "Plural-Forms header was missing the plural expression and has been completed using a safe locale default.",
594                ));
595            }
596            _ => {}
597        }
598    }
599    for (key, value) in custom {
600        headers.insert(key.clone(), value.clone());
601    }
602}
603
604fn sort_messages(messages: &mut [CanonicalMessage], order_by: OrderBy) {
605    match order_by {
606        OrderBy::Msgid => messages.sort_by(|left, right| {
607            left.msgid
608                .cmp(&right.msgid)
609                .then_with(|| left.msgctxt.cmp(&right.msgctxt))
610                .then_with(|| left.obsolete.cmp(&right.obsolete))
611        }),
612        OrderBy::Origin => messages.sort_by(|left, right| {
613            first_origin_sort_key(&left.origins)
614                .cmp(&first_origin_sort_key(&right.origins))
615                .then_with(|| left.msgid.cmp(&right.msgid))
616                .then_with(|| left.msgctxt.cmp(&right.msgctxt))
617        }),
618    }
619}
620
621fn first_origin_sort_key(origins: &[CatalogOrigin]) -> (String, Option<u32>) {
622    origins.first().map_or_else(
623        || (String::new(), None),
624        |origin| (origin.file.clone(), origin.line),
625    )
626}
627
628fn apply_storage_defaults(
629    catalog: &mut Catalog,
630    options: &UpdateCatalogOptions<'_>,
631    locale: Option<&str>,
632    diagnostics: &mut Vec<Diagnostic>,
633) -> Result<(), ApiError> {
634    match options.storage_format {
635        CatalogStorageFormat::Po => {
636            let empty_custom_headers = BTreeMap::new();
637            apply_header_defaults(
638                &mut catalog.headers,
639                locale,
640                options.semantics,
641                diagnostics,
642                options
643                    .custom_header_attributes
644                    .unwrap_or(&empty_custom_headers),
645            );
646            Ok(())
647        }
648        CatalogStorageFormat::Ndjson => {
649            if options
650                .custom_header_attributes
651                .is_some_and(|headers| !headers.is_empty())
652            {
653                return Err(ApiError::Unsupported(
654                    "custom_header_attributes are not supported for NDJSON catalogs".to_owned(),
655                ));
656            }
657            catalog.headers.clear();
658            Ok(())
659        }
660    }
661}
662
663fn export_catalog_content(
664    catalog: &Catalog,
665    options: &UpdateCatalogOptions<'_>,
666    locale: Option<&str>,
667    diagnostics: &mut Vec<Diagnostic>,
668) -> Result<String, ApiError> {
669    match options.storage_format {
670        CatalogStorageFormat::Po => {
671            let file = export_catalog_to_po(catalog, options, locale, diagnostics)?;
672            Ok(stringify_po(&file, &SerializeOptions::default()))
673        }
674        CatalogStorageFormat::Ndjson => Ok(stringify_catalog_ndjson(
675            catalog,
676            locale,
677            options.source_locale,
678            &options.print_placeholders_in_comments,
679        )),
680    }
681}
682
683/// Converts the canonical in-memory catalog back into a `PoFile` while keeping
684/// file-level comments and header order normalized.
685fn export_catalog_to_po(
686    catalog: &Catalog,
687    options: &UpdateCatalogOptions<'_>,
688    locale: Option<&str>,
689    diagnostics: &mut Vec<Diagnostic>,
690) -> Result<PoFile, ApiError> {
691    let mut file = PoFile {
692        comments: catalog.file_comments.clone(),
693        extracted_comments: catalog.file_extracted_comments.clone(),
694        headers: catalog
695            .headers
696            .iter()
697            .map(|(key, value)| Header {
698                key: key.clone(),
699                value: value.clone(),
700            })
701            .collect(),
702        items: Vec::with_capacity(catalog.messages.len()),
703    };
704
705    for message in &catalog.messages {
706        file.items
707            .push(export_message_to_po(message, options, locale, diagnostics)?);
708    }
709
710    Ok(file)
711}
712
713/// Renders one canonical message into the chosen PO representation.
714///
715/// Singular messages are straightforward, while plural messages either stay as
716/// a synthesized ICU string or are lowered into gettext slots depending on the
717/// caller-selected `PluralEncoding`.
718fn export_message_to_po(
719    message: &CanonicalMessage,
720    options: &UpdateCatalogOptions<'_>,
721    locale: Option<&str>,
722    diagnostics: &mut Vec<Diagnostic>,
723) -> Result<PoItem, ApiError> {
724    match &message.translation {
725        CanonicalTranslation::Singular { value } => {
726            let mut item = base_po_item(message, options, 1);
727            item.msgid.clone_from(&message.msgid);
728            item.msgstr = MsgStr::from(value.clone());
729            Ok(item)
730        }
731        CanonicalTranslation::Plural {
732            source,
733            translation_by_category,
734            variable,
735        } => {
736            if options.semantics == CatalogSemantics::IcuNative {
737                let mut item = base_po_item(message, options, 1);
738                item.msgid = synthesize_icu_plural(variable, &plural_source_branches(source));
739                item.msgstr =
740                    MsgStr::from(synthesize_icu_plural(variable, translation_by_category));
741                return Ok(item);
742            }
743
744            let plural_profile = PluralProfile::for_translation(locale, translation_by_category);
745            let nplurals = plural_profile
746                .nplurals()
747                .max(translation_by_category.len().max(1));
748            let mut item = base_po_item(message, options, nplurals);
749
750            if !translation_by_category.contains_key("other") {
751                diagnostics.push(
752                    Diagnostic::new(
753                        DiagnosticSeverity::Error,
754                        "plural.unsupported_gettext_export",
755                        "Plural translation is missing the required \"other\" category.",
756                    )
757                    .with_identity(&message.msgid, message.msgctxt.as_deref()),
758                );
759                return Err(ApiError::Unsupported(
760                    "plural translation is missing the required \"other\" category".to_owned(),
761                ));
762            }
763            item.msgid = source.one.clone().unwrap_or_else(|| source.other.clone());
764            item.msgid_plural = Some(source.other.clone());
765            item.msgstr = MsgStr::from(plural_profile.gettext_values(translation_by_category));
766            item.nplurals = plural_profile.nplurals();
767
768            Ok(item)
769        }
770    }
771}
772
773/// Builds the common `PoItem` shell shared by singular and plural export.
774fn base_po_item(
775    message: &CanonicalMessage,
776    options: &UpdateCatalogOptions<'_>,
777    nplurals: usize,
778) -> PoItem {
779    let mut item = PoItem::new(nplurals);
780    item.msgctxt.clone_from(&message.msgctxt);
781    item.comments.clone_from(&message.translator_comments);
782    item.flags.clone_from(&message.flags);
783    item.obsolete = message.obsolete;
784    item.extracted_comments.clone_from(&message.comments);
785    append_placeholder_comments(
786        &mut item.extracted_comments,
787        &message.placeholders,
788        &options.print_placeholders_in_comments,
789    );
790    item.references = if options.include_origins {
791        message
792            .origins
793            .iter()
794            .map(|origin| {
795                if options.include_line_numbers {
796                    origin.line.map_or_else(
797                        || origin.file.clone(),
798                        |line| format!("{}:{line}", origin.file),
799                    )
800                } else {
801                    origin.file.clone()
802                }
803            })
804            .collect()
805    } else {
806        Vec::new()
807    };
808    item
809}
810
811/// Builds the minimal category map needed to re-synthesize a source ICU plural.
812pub(super) fn plural_source_branches(source: &PluralSource) -> BTreeMap<String, String> {
813    let mut map = BTreeMap::new();
814    if let Some(one) = &source.one {
815        map.insert("one".to_owned(), one.clone());
816    }
817    map.insert("other".to_owned(), source.other.clone());
818    map
819}
820
821/// Emits extracted placeholder comments only for numeric placeholders, which
822/// mirrors how gettext tools commonly surface ordered placeholder hints.
823pub(super) fn append_placeholder_comments(
824    comments: &mut Vec<String>,
825    placeholders: &BTreeMap<String, Vec<String>>,
826    mode: &PlaceholderCommentMode,
827) {
828    let limit = match mode {
829        PlaceholderCommentMode::Disabled => return,
830        PlaceholderCommentMode::Enabled { limit } => *limit,
831    };
832
833    let mut seen = comments.iter().cloned().collect::<BTreeSet<String>>();
834
835    for (name, values) in placeholders {
836        if !name.chars().all(|ch| ch.is_ascii_digit()) {
837            continue;
838        }
839        for value in values.iter().take(limit) {
840            let comment = format!(
841                "placeholder {{{name}}}: {}",
842                normalize_placeholder_value(value)
843            );
844            if seen.insert(comment.clone()) {
845                comments.push(comment);
846            }
847        }
848    }
849}
850
851fn normalize_placeholder_value(value: &str) -> String {
852    value.replace('\n', " ")
853}
854
855/// Parses catalog text into the canonical internal catalog representation used by
856/// both `parse_catalog` and `update_catalog`.
857///
858/// Keeping this internal representation stable lets the public APIs share one
859/// import path before they diverge into normalized lookup or update/export work.
860fn parse_catalog_to_internal(
861    content: &str,
862    locale_override: Option<&str>,
863    source_locale: &str,
864    semantics: CatalogSemantics,
865    plural_encoding: PluralEncoding,
866    strict: bool,
867    storage_format: CatalogStorageFormat,
868) -> Result<Catalog, ApiError> {
869    match storage_format {
870        CatalogStorageFormat::Po => parse_catalog_to_internal_po(
871            content,
872            locale_override,
873            semantics,
874            plural_encoding,
875            strict,
876        ),
877        CatalogStorageFormat::Ndjson => parse_catalog_to_internal_ndjson(
878            content,
879            locale_override,
880            source_locale,
881            semantics,
882            strict,
883        ),
884    }
885}
886
887fn parse_catalog_to_internal_po(
888    content: &str,
889    locale_override: Option<&str>,
890    semantics: CatalogSemantics,
891    _plural_encoding: PluralEncoding,
892    strict: bool,
893) -> Result<Catalog, ApiError> {
894    let file = parse_po(content)?;
895    let headers = file
896        .headers
897        .iter()
898        .map(|header| (header.key.clone(), header.value.clone()))
899        .collect::<BTreeMap<_, _>>();
900    let locale = locale_override
901        .map(str::to_owned)
902        .or_else(|| headers.get("Language").cloned());
903    let plural_forms = parse_plural_forms_from_headers(&headers);
904    let nplurals = plural_forms.nplurals;
905    let mut diagnostics = Vec::new();
906    validate_plural_forms_header(
907        locale.as_deref(),
908        &plural_forms,
909        semantics,
910        &mut diagnostics,
911    );
912    let mut messages = Vec::with_capacity(file.items.len());
913
914    for item in file.items {
915        let mut conversion_diagnostics = Vec::new();
916        let message = import_message_from_po(
917            item,
918            locale.as_deref(),
919            nplurals,
920            semantics,
921            strict,
922            &mut conversion_diagnostics,
923        )?;
924        diagnostics.extend(conversion_diagnostics);
925        messages.push(message);
926    }
927
928    Ok(Catalog {
929        locale,
930        headers,
931        file_comments: file.comments,
932        file_extracted_comments: file.extracted_comments,
933        messages,
934        diagnostics,
935    })
936}
937
938/// Converts one parsed `PoItem` into the canonical internal message form.
939///
940/// The branching is intentionally centralized here so that gettext plural slot
941/// import, ICU projection, and all associated diagnostics stay in one semantic
942/// decision point.
943fn import_message_from_po(
944    item: PoItem,
945    locale: Option<&str>,
946    nplurals: Option<usize>,
947    semantics: CatalogSemantics,
948    _strict: bool,
949    _diagnostics: &mut Vec<Diagnostic>,
950) -> Result<CanonicalMessage, ApiError> {
951    let (comments, placeholders) = split_placeholder_comments(item.extracted_comments);
952    let origins = item
953        .references
954        .iter()
955        .map(|reference| parse_origin(reference))
956        .collect();
957
958    let translation = if let Some(msgid_plural) = &item.msgid_plural {
959        if semantics == CatalogSemantics::IcuNative {
960            return Err(ApiError::Unsupported(
961                "classic gettext plural requires compat mode".to_owned(),
962            ));
963        }
964        let plural_profile =
965            PluralProfile::for_gettext_slots(locale, nplurals.or(Some(item.msgstr.len())));
966        CanonicalTranslation::Plural {
967            source: PluralSource {
968                one: Some(item.msgid.clone()),
969                other: msgid_plural.clone(),
970            },
971            translation_by_category: plural_profile
972                .categories()
973                .iter()
974                .enumerate()
975                .map(|(index, category)| {
976                    (
977                        category.clone(),
978                        item.msgstr.iter().nth(index).cloned().unwrap_or_default(),
979                    )
980                })
981                .collect(),
982            variable: "count".to_owned(),
983        }
984    } else {
985        if semantics == CatalogSemantics::IcuNative && matches!(item.msgstr, MsgStr::Plural(_)) {
986            return Err(ApiError::Unsupported(
987                "classic gettext plural requires compat mode".to_owned(),
988            ));
989        }
990        CanonicalTranslation::Singular {
991            value: item.msgstr.first_str().unwrap_or_default().to_owned(),
992        }
993    };
994
995    Ok(CanonicalMessage {
996        msgid: item.msgid,
997        msgctxt: item.msgctxt,
998        translation,
999        comments,
1000        origins,
1001        placeholders,
1002        obsolete: item.obsolete,
1003        translator_comments: item.comments,
1004        flags: item.flags,
1005    })
1006}
1007
1008/// Splits extractor-style placeholder comments back out of the generic
1009/// extracted-comment list during PO import.
1010pub(super) fn split_placeholder_comments(
1011    extracted_comments: Vec<String>,
1012) -> (Vec<String>, BTreeMap<String, Vec<String>>) {
1013    let mut comments = Vec::new();
1014    let mut placeholders = BTreeMap::<String, Vec<String>>::new();
1015
1016    for comment in extracted_comments {
1017        if let Some((name, value)) = parse_placeholder_comment(&comment) {
1018            placeholders.entry(name).or_default().push(value);
1019        } else {
1020            comments.push(comment);
1021        }
1022    }
1023
1024    (comments, dedupe_placeholders(placeholders))
1025}
1026
1027/// Parses the internal placeholder comment format emitted by `append_placeholder_comments`.
1028fn parse_placeholder_comment(comment: &str) -> Option<(String, String)> {
1029    let rest = comment.strip_prefix("placeholder {")?;
1030    let end = rest.find("}: ")?;
1031    Some((rest[..end].to_owned(), rest[end + 3..].to_owned()))
1032}
1033
1034/// Parses a gettext reference while tolerating plain paths and `path:line`.
1035fn parse_origin(reference: &str) -> CatalogOrigin {
1036    match reference.rsplit_once(':') {
1037        Some((file, line)) if line.chars().all(|ch| ch.is_ascii_digit()) => CatalogOrigin {
1038            file: file.to_owned(),
1039            line: line.parse::<u32>().ok(),
1040        },
1041        _ => CatalogOrigin {
1042            file: reference.to_owned(),
1043            line: None,
1044        },
1045    }
1046}
1047
1048/// Extracts the small `Plural-Forms` subset that Ferrocat needs for diagnostics
1049/// and gettext-slot interpretation.
1050fn parse_plural_forms_from_headers(headers: &BTreeMap<String, String>) -> ParsedPluralFormsHeader {
1051    let Some(plural_forms) = headers.get("Plural-Forms") else {
1052        return ParsedPluralFormsHeader::default();
1053    };
1054
1055    let mut parsed = ParsedPluralFormsHeader {
1056        raw: Some(plural_forms.clone()),
1057        ..ParsedPluralFormsHeader::default()
1058    };
1059    for part in plural_forms.split(';') {
1060        let trimmed = part.trim();
1061        if let Some(value) = trimmed.strip_prefix("nplurals=") {
1062            parsed.nplurals = value.trim().parse().ok();
1063        } else if let Some(value) = trimmed.strip_prefix("plural=") {
1064            let value = value.trim();
1065            if !value.is_empty() {
1066                parsed.plural = Some(value.to_owned());
1067            }
1068        }
1069    }
1070
1071    parsed
1072}
1073
1074/// Validates only the invariants that materially affect Ferrocat's plural
1075/// interpretation, keeping the diagnostics focused on actionable mismatches.
1076fn validate_plural_forms_header(
1077    locale: Option<&str>,
1078    plural_forms: &ParsedPluralFormsHeader,
1079    semantics: CatalogSemantics,
1080    diagnostics: &mut Vec<Diagnostic>,
1081) {
1082    if semantics != CatalogSemantics::GettextCompat {
1083        return;
1084    }
1085
1086    if let Some(nplurals) = plural_forms.nplurals {
1087        let profile = PluralProfile::for_locale(locale);
1088        let expected = profile.nplurals();
1089        if locale.is_some() && nplurals != expected {
1090            diagnostics.push(Diagnostic::new(
1091                DiagnosticSeverity::Warning,
1092                "plural.nplurals_locale_mismatch",
1093                format!(
1094                    "Plural-Forms declares nplurals={nplurals}, but locale-derived categories expect {expected}."
1095                ),
1096            ));
1097        }
1098    } else if plural_forms.plural.is_some() {
1099        diagnostics.push(Diagnostic::new(
1100            DiagnosticSeverity::Warning,
1101            "parse.invalid_plural_forms_header",
1102            "Plural-Forms header contains a plural expression but no parseable nplurals value.",
1103        ));
1104    }
1105
1106    if plural_forms.nplurals.is_some() && plural_forms.plural.is_none() {
1107        diagnostics.push(Diagnostic::new(
1108            DiagnosticSeverity::Info,
1109            "plural.missing_plural_expression",
1110            "Plural-Forms header declares nplurals but omits the plural expression.",
1111        ));
1112    }
1113}
1114
1115/// Rebuilds the public `CatalogMessage` shape from the canonical internal form.
1116fn public_message_from_canonical(message: CanonicalMessage) -> CatalogMessage {
1117    let translation = match message.translation {
1118        CanonicalTranslation::Singular { value } => TranslationShape::Singular { value },
1119        CanonicalTranslation::Plural {
1120            source,
1121            translation_by_category,
1122            variable,
1123            ..
1124        } => TranslationShape::Plural {
1125            source,
1126            translation: translation_by_category,
1127            variable,
1128        },
1129    };
1130
1131    CatalogMessage {
1132        msgid: message.msgid,
1133        msgctxt: message.msgctxt,
1134        translation,
1135        comments: message.comments,
1136        origin: message.origins,
1137        obsolete: message.obsolete,
1138        extra: Some(CatalogMessageExtra {
1139            translator_comments: message.translator_comments,
1140            flags: message.flags,
1141        }),
1142    }
1143}