Skip to main content

ferrocat_po/
lib.rs

1#![warn(missing_docs, rustdoc::broken_intra_doc_links)]
2//! Performance-first PO parsing and serialization.
3//!
4//! The crate exposes both owned and borrowed parsers for gettext PO files,
5//! plus helpers for serialization and higher-level catalog update workflows.
6//!
7//! # Examples
8//!
9//! ```rust
10//! use ferrocat_po::{PoFile, SerializeOptions, parse_po, stringify_po};
11//!
12//! let input = "msgid \"Hello\"\nmsgstr \"Hallo\"\n";
13//! let file = parse_po(input)?;
14//! assert_eq!(file.items[0].msgid, "Hello");
15//!
16//! let output = stringify_po(&file, &SerializeOptions::default());
17//! assert!(output.contains("msgid \"Hello\""));
18//! # Ok::<(), ferrocat_po::ParseError>(())
19//! ```
20//!
21//! ```rust
22//! use ferrocat_po::{
23//!     CompileCatalogArtifactOptions, CompileSelectedCatalogArtifactOptions,
24//!     CompiledCatalogIdIndex, ParseCatalogOptions, compile_catalog_artifact_selected,
25//!     parse_catalog,
26//! };
27//!
28//! let source = parse_catalog(ParseCatalogOptions {
29//!     content: "msgid \"Hello\"\nmsgstr \"Hello\"\n",
30//!     source_locale: "en",
31//!     locale: Some("en"),
32//!     ..ParseCatalogOptions::default()
33//! })?
34//! .into_normalized_view()?;
35//! let requested = parse_catalog(ParseCatalogOptions {
36//!     content: "msgid \"Hello\"\nmsgstr \"Hallo\"\n",
37//!     source_locale: "en",
38//!     locale: Some("de"),
39//!     ..ParseCatalogOptions::default()
40//! })?
41//! .into_normalized_view()?;
42//! let index = CompiledCatalogIdIndex::new(&[&requested, &source], ferrocat_po::CompiledKeyStrategy::FerrocatV1)?;
43//! let compiled_ids = index.iter().map(|(id, _)| id.to_owned()).collect::<Vec<_>>();
44//! let compiled = compile_catalog_artifact_selected(
45//!     &[&requested, &source],
46//!     &index,
47//!     &CompileSelectedCatalogArtifactOptions {
48//!         requested_locale: "de",
49//!         source_locale: "en",
50//!         compiled_ids: &compiled_ids,
51//!         ..CompileSelectedCatalogArtifactOptions::default()
52//!     },
53//! )?;
54//!
55//! assert_eq!(compiled.messages.len(), 1);
56//! # Ok::<(), Box<dyn std::error::Error>>(())
57//! ```
58
59mod api;
60mod borrowed;
61mod merge;
62mod parse;
63mod scan;
64mod serialize;
65mod text;
66mod utf8;
67
68pub use api::{
69    ApiError, CatalogMessage, CatalogMessageExtra, CatalogMessageKey, CatalogOrigin,
70    CatalogSemantics, CatalogStats, CatalogStorageFormat, CatalogUpdateInput, CatalogUpdateResult,
71    CompileCatalogArtifactOptions, CompileCatalogOptions, CompileSelectedCatalogArtifactOptions,
72    CompiledCatalog, CompiledCatalogArtifact, CompiledCatalogDiagnostic,
73    CompiledCatalogIdDescription, CompiledCatalogIdIndex, CompiledCatalogMissingMessage,
74    CompiledCatalogTranslationKind, CompiledCatalogUnavailableId, CompiledKeyStrategy,
75    CompiledMessage, CompiledTranslation, DescribeCompiledIdsReport, Diagnostic,
76    DiagnosticSeverity, EffectiveTranslation, EffectiveTranslationRef, ExtractedMessage,
77    ExtractedPluralMessage, ExtractedSingularMessage, NormalizedParsedCatalog, ObsoleteStrategy,
78    OrderBy, ParseCatalogOptions, ParsedCatalog, PlaceholderCommentMode, PluralEncoding,
79    PluralSource, SourceExtractedMessage, TranslationShape, UpdateCatalogFileOptions,
80    UpdateCatalogOptions, compile_catalog_artifact, compile_catalog_artifact_selected,
81    compiled_key, parse_catalog, update_catalog, update_catalog_file,
82};
83pub use borrowed::{
84    BorrowedHeader, BorrowedMsgStr, BorrowedPoFile, BorrowedPoItem, parse_po_borrowed,
85};
86pub use merge::{ExtractedMessage as MergeExtractedMessage, merge_catalog};
87pub use parse::parse_po;
88pub use serialize::stringify_po;
89pub use text::{escape_string, extract_quoted, extract_quoted_cow, unescape_string};
90
91use core::{fmt, ops::Index};
92
93/// An owned PO document.
94#[derive(Debug, Clone, PartialEq, Eq, Default)]
95pub struct PoFile {
96    /// File-level translator comments that appear before the header block.
97    pub comments: Vec<String>,
98    /// File-level extracted comments that appear before the header block.
99    pub extracted_comments: Vec<String>,
100    /// Parsed header entries from the leading empty `msgid` block.
101    pub headers: Vec<Header>,
102    /// Regular catalog items in source order.
103    pub items: Vec<PoItem>,
104}
105
106/// A single header entry from the PO header block.
107#[derive(Debug, Clone, PartialEq, Eq, Default)]
108pub struct Header {
109    /// Header name such as `Language` or `Plural-Forms`.
110    pub key: String,
111    /// Header value without the trailing newline.
112    pub value: String,
113}
114
115/// A single gettext message entry.
116#[derive(Debug, Clone, PartialEq, Eq, Default)]
117pub struct PoItem {
118    /// Source message identifier.
119    pub msgid: String,
120    /// Optional gettext message context.
121    pub msgctxt: Option<String>,
122    /// Source references such as `src/app.rs:10`.
123    pub references: Vec<String>,
124    /// Optional plural source identifier.
125    pub msgid_plural: Option<String>,
126    /// Translation payload for the message.
127    pub msgstr: MsgStr,
128    /// Translator comments attached to the item.
129    pub comments: Vec<String>,
130    /// Extracted comments attached to the item.
131    pub extracted_comments: Vec<String>,
132    /// Flags such as `fuzzy`.
133    pub flags: Vec<String>,
134    /// Raw metadata lines that do not fit the dedicated fields.
135    pub metadata: Vec<(String, String)>,
136    /// Whether the item is marked obsolete.
137    pub obsolete: bool,
138    /// Number of plural slots expected when the item is serialized.
139    pub nplurals: usize,
140}
141
142impl PoItem {
143    /// Creates an empty message entry with space for `nplurals` plural slots.
144    #[must_use]
145    pub fn new(nplurals: usize) -> Self {
146        Self {
147            nplurals,
148            ..Self::default()
149        }
150    }
151
152    pub(crate) fn clear_for_reuse(&mut self, nplurals: usize) {
153        self.msgid.clear();
154        self.msgctxt = None;
155        self.references.clear();
156        self.msgid_plural = None;
157        self.msgstr = MsgStr::None;
158        self.comments.clear();
159        self.extracted_comments.clear();
160        self.flags.clear();
161        self.metadata.clear();
162        self.obsolete = false;
163        self.nplurals = nplurals;
164    }
165}
166
167/// Message translation payload for a PO item.
168#[derive(Debug, Clone, PartialEq, Eq, Default)]
169pub enum MsgStr {
170    /// No translation values are present.
171    #[default]
172    None,
173    /// Single translation string.
174    Singular(String),
175    /// Plural translation strings indexed by plural slot.
176    Plural(Vec<String>),
177}
178
179impl MsgStr {
180    /// Returns `true` when no translation values are present.
181    #[must_use]
182    pub const fn is_empty(&self) -> bool {
183        matches!(self, Self::None)
184    }
185
186    /// Returns the number of translation values present.
187    #[must_use]
188    pub fn len(&self) -> usize {
189        match self {
190            Self::None => 0,
191            Self::Singular(_) => 1,
192            Self::Plural(values) => values.len(),
193        }
194    }
195
196    /// Returns the first translation value, if present.
197    #[must_use]
198    pub fn first(&self) -> Option<&String> {
199        match self {
200            Self::None => None,
201            Self::Singular(value) => Some(value),
202            Self::Plural(values) => values.first(),
203        }
204    }
205
206    /// Returns the first translation value as `&str`, if present.
207    #[must_use]
208    pub fn first_str(&self) -> Option<&str> {
209        self.first().map(String::as_str)
210    }
211
212    /// Returns the translation at `index` without panicking.
213    #[must_use]
214    pub fn get(&self, index: usize) -> Option<&str> {
215        match self {
216            Self::Singular(value) if index == 0 => Some(value.as_str()),
217            Self::None | Self::Singular(_) => None,
218            Self::Plural(values) => values.get(index).map(String::as_str),
219        }
220    }
221
222    /// Iterates over all translation values in order.
223    #[must_use]
224    pub fn iter(&self) -> MsgStrIter<'_> {
225        match self {
226            Self::None => MsgStrIter::empty(),
227            Self::Singular(value) => MsgStrIter::single(value),
228            Self::Plural(values) => MsgStrIter::many(values.iter()),
229        }
230    }
231
232    /// Converts the translation payload into an owned vector.
233    #[must_use]
234    pub fn into_vec(self) -> Vec<String> {
235        match self {
236            Self::None => Vec::new(),
237            Self::Singular(value) => vec![value],
238            Self::Plural(values) => values,
239        }
240    }
241}
242
243impl From<String> for MsgStr {
244    fn from(value: String) -> Self {
245        Self::Singular(value)
246    }
247}
248
249impl From<Vec<String>> for MsgStr {
250    fn from(values: Vec<String>) -> Self {
251        match values.len() {
252            0 => Self::None,
253            1 => Self::Singular(values.into_iter().next().expect("single msgstr value")),
254            _ => Self::Plural(values),
255        }
256    }
257}
258
259impl<'a> IntoIterator for &'a MsgStr {
260    type Item = &'a String;
261    type IntoIter = MsgStrIter<'a>;
262
263    fn into_iter(self) -> Self::IntoIter {
264        self.iter()
265    }
266}
267
268impl Index<usize> for MsgStr {
269    type Output = String;
270
271    fn index(&self, index: usize) -> &Self::Output {
272        match self {
273            Self::None => panic!("msgstr index out of bounds: no translations present"),
274            Self::Singular(value) if index == 0 => value,
275            Self::Singular(_) => panic!("msgstr index out of bounds: singular translation"),
276            Self::Plural(values) => &values[index],
277        }
278    }
279}
280
281/// Iterator over [`MsgStr`] values.
282pub struct MsgStrIter<'a> {
283    inner: MsgStrIterInner<'a>,
284}
285
286enum MsgStrIterInner<'a> {
287    Empty,
288    Single(Option<&'a String>),
289    Many(std::slice::Iter<'a, String>),
290}
291
292impl<'a> MsgStrIter<'a> {
293    const fn empty() -> Self {
294        Self {
295            inner: MsgStrIterInner::Empty,
296        }
297    }
298
299    const fn single(value: &'a String) -> Self {
300        Self {
301            inner: MsgStrIterInner::Single(Some(value)),
302        }
303    }
304
305    const fn many(iter: std::slice::Iter<'a, String>) -> Self {
306        Self {
307            inner: MsgStrIterInner::Many(iter),
308        }
309    }
310}
311
312impl<'a> Iterator for MsgStrIter<'a> {
313    type Item = &'a String;
314
315    fn next(&mut self) -> Option<Self::Item> {
316        match &mut self.inner {
317            MsgStrIterInner::Empty => None,
318            MsgStrIterInner::Single(value) => value.take(),
319            MsgStrIterInner::Many(iter) => iter.next(),
320        }
321    }
322}
323
324/// Options controlling PO serialization.
325#[derive(Debug, Clone, PartialEq, Eq)]
326pub struct SerializeOptions {
327    /// Preferred soft line-wrap limit for long string literals.
328    pub fold_length: usize,
329    /// When `true`, one-line values stay compact instead of always expanding.
330    pub compact_multiline: bool,
331}
332
333impl Default for SerializeOptions {
334    fn default() -> Self {
335        Self {
336            fold_length: 80,
337            compact_multiline: true,
338        }
339    }
340}
341
342/// Error returned when parsing or unescaping PO content fails.
343#[derive(Debug, Clone, PartialEq, Eq)]
344pub struct ParseError {
345    message: String,
346}
347
348impl ParseError {
349    /// Creates a new parse error with the provided message.
350    #[must_use]
351    pub fn new(message: impl Into<String>) -> Self {
352        Self {
353            message: message.into(),
354        }
355    }
356}
357
358impl fmt::Display for ParseError {
359    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
360        f.write_str(&self.message)
361    }
362}
363
364impl std::error::Error for ParseError {}
365
366#[cfg(test)]
367mod tests {
368    use super::MsgStr;
369
370    #[test]
371    fn msgstr_get_returns_none_for_empty_values() {
372        let msgstr = MsgStr::None;
373
374        assert_eq!(msgstr.get(0), None);
375    }
376
377    #[test]
378    fn msgstr_get_returns_singular_value_at_zero() {
379        let msgstr = MsgStr::from("Hallo".to_owned());
380
381        assert_eq!(msgstr.get(0), Some("Hallo"));
382        assert_eq!(msgstr.get(1), None);
383    }
384
385    #[test]
386    fn msgstr_get_returns_plural_values_by_index() {
387        let msgstr = MsgStr::from(vec!["eins".to_owned(), "viele".to_owned()]);
388
389        assert_eq!(msgstr.get(0), Some("eins"));
390        assert_eq!(msgstr.get(1), Some("viele"));
391        assert_eq!(msgstr.get(2), None);
392    }
393}