Skip to main content

citum_schema_style/options/
multilingual.rs

1/*
2SPDX-License-Identifier: MIT OR Apache-2.0
3SPDX-FileCopyrightText: © 2023-2026 Bruce D'Arcus and Citum contributors
4*/
5
6#[cfg(feature = "schema")]
7use schemars::JsonSchema;
8use serde::{Deserialize, Serialize, de};
9use std::collections::HashMap;
10
11/// Multilingual rendering options.
12#[derive(Debug, Default, PartialEq, Clone, Serialize, Deserialize)]
13#[cfg_attr(feature = "schema", derive(JsonSchema))]
14#[serde(rename_all = "kebab-case")]
15pub struct MultilingualConfig {
16    /// Preferred rendering mode for titles.
17    #[serde(skip_serializing_if = "Option::is_none")]
18    pub title_mode: Option<MultilingualMode>,
19    /// Preferred rendering mode for names.
20    #[serde(skip_serializing_if = "Option::is_none")]
21    pub name_mode: Option<MultilingualMode>,
22    /// Preferred script for transliterations (e.g., "Latn").
23    #[serde(skip_serializing_if = "Option::is_none")]
24    pub preferred_script: Option<String>,
25    /// Ordered priority list of BCP 47 transliteration tags (e.g. `["ja-Latn-hepburn", "ja-Latn"]`).
26    /// Takes precedence over `preferred_script` when resolving transliterations.
27    #[serde(skip_serializing_if = "Option::is_none")]
28    pub preferred_transliteration: Option<Vec<String>>,
29    /// Script-specific behavior configuration.
30    #[serde(default, skip_serializing_if = "HashMap::is_empty")]
31    pub scripts: HashMap<String, ScriptConfig>,
32}
33
34/// Rendering modes for multilingual content.
35#[derive(Debug, PartialEq, Clone, Serialize)]
36#[cfg_attr(feature = "schema", derive(JsonSchema))]
37#[serde(rename_all = "kebab-case")]
38pub enum MultilingualMode {
39    /// Use original script.
40    Primary,
41    /// Use transliteration.
42    Transliterated,
43    /// Use translation matching style locale.
44    Translated,
45    /// Combine transliteration and translation: `romanized [translated]`.
46    /// Equivalent to `Pattern([transliterated, {translated, brackets}])`.
47    Combined,
48    /// Ordered sequence of views joined by spaces.
49    ///
50    /// Use this when a style requires more than two views — e.g. Chicago's
51    /// `romanized original-script [translated]` or MLA's `original-script [translated]`.
52    /// Each segment specifies a view and an optional bracket wrap.
53    /// Segments whose resolved text is empty or identical to the previous
54    /// segment are silently skipped (dedup).
55    ///
56    /// YAML form:
57    /// ```yaml
58    /// title-mode:
59    ///   pattern:
60    ///     - view: transliterated
61    ///     - view: original-script
62    ///     - view: translated
63    ///       wrap: brackets
64    /// ```
65    Pattern(Vec<MultilingualSegment>),
66}
67
68/// A single view segment in a `MultilingualMode::Pattern`.
69#[derive(Debug, PartialEq, Clone, Serialize, Deserialize)]
70#[cfg_attr(feature = "schema", derive(JsonSchema))]
71#[serde(rename_all = "kebab-case")]
72pub struct MultilingualSegment {
73    /// Which textual view to render for this segment.
74    pub view: MultilingualView,
75    /// Optional wrapping applied around the resolved text.
76    #[serde(default, skip_serializing_if = "SegmentWrap::is_none")]
77    pub wrap: SegmentWrap,
78}
79
80/// Which textual representation of a multilingual field to use in a pattern segment.
81#[derive(Debug, PartialEq, Clone, Serialize, Deserialize)]
82#[cfg_attr(feature = "schema", derive(JsonSchema))]
83#[serde(rename_all = "kebab-case")]
84pub enum MultilingualView {
85    /// The original-script text as stored in the data.
86    OriginalScript,
87    /// The transliterated (romanized) form.
88    Transliterated,
89    /// The translation matching the style locale.
90    Translated,
91}
92
93/// Bracket wrapping applied to a pattern segment.
94#[derive(Debug, Default, PartialEq, Clone, Serialize, Deserialize)]
95#[cfg_attr(feature = "schema", derive(JsonSchema))]
96#[serde(rename_all = "kebab-case")]
97pub enum SegmentWrap {
98    /// No wrapping.
99    #[default]
100    None,
101    /// Wrap in square brackets: `[text]`.
102    Brackets,
103    /// Wrap in parentheses: `(text)`.
104    Parentheses,
105}
106
107impl SegmentWrap {
108    /// Returns `true` when the variant is `None` (used for skip-serializing).
109    pub fn is_none(&self) -> bool {
110        matches!(self, SegmentWrap::None)
111    }
112
113    /// Apply the wrap to a string slice, returning the wrapped form.
114    pub fn apply(&self, text: &str) -> String {
115        match self {
116            SegmentWrap::None => text.to_string(),
117            SegmentWrap::Brackets => format!("[{text}]"),
118            SegmentWrap::Parentheses => format!("({text})"),
119        }
120    }
121}
122
123/// Configuration for specific scripts.
124#[derive(Debug, Default, PartialEq, Clone, Serialize, Deserialize)]
125#[cfg_attr(feature = "schema", derive(JsonSchema))]
126#[serde(rename_all = "kebab-case")]
127pub struct ScriptConfig {
128    /// Whether to use native ordering for this script (e.g., FamilyGiven for CJK).
129    #[serde(default)]
130    pub use_native_ordering: bool,
131    /// Custom delimiter between name parts for this script.
132    #[serde(skip_serializing_if = "Option::is_none")]
133    pub delimiter: Option<String>,
134    /// Custom delimiter between family and given name when this script is inverted.
135    #[serde(skip_serializing_if = "Option::is_none")]
136    pub sort_separator: Option<String>,
137}
138
139/// Custom deserializer for [`MultilingualMode`].
140///
141/// Unit variants are accepted as plain strings (`"primary"`, `"transliterated"`, etc.).
142/// The `Pattern` variant is accepted as a single-key map `{pattern: [...]}`.
143///
144/// A hand-written `deserialize_any` visitor is used instead of serde's derived
145/// `deserialize_enum` because `serde_yaml` cannot pass enum-variant input through
146/// an outer `#[serde(untagged)]` wrapper — the standard derive would fail with
147/// *"untagged and internally tagged enums do not support enum input"* when a
148/// serialized `Pattern` value is round-tripped through [`crate::presets::MultilingualConfigEntry`].
149impl<'de> de::Deserialize<'de> for MultilingualMode {
150    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
151    where
152        D: de::Deserializer<'de>,
153    {
154        struct ModeVisitor;
155
156        impl<'de> de::Visitor<'de> for ModeVisitor {
157            type Value = MultilingualMode;
158
159            fn expecting(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
160                write!(
161                    f,
162                    "a multilingual mode string (\"primary\", \"transliterated\", \
163                     \"translated\", \"combined\") or a pattern object {{pattern: [...]}}"
164                )
165            }
166
167            fn visit_str<E: de::Error>(self, v: &str) -> Result<Self::Value, E> {
168                match v {
169                    "primary" => Ok(MultilingualMode::Primary),
170                    "transliterated" => Ok(MultilingualMode::Transliterated),
171                    "translated" => Ok(MultilingualMode::Translated),
172                    "combined" => Ok(MultilingualMode::Combined),
173                    _ => Err(E::unknown_variant(
174                        v,
175                        &[
176                            "primary",
177                            "transliterated",
178                            "translated",
179                            "combined",
180                            "pattern",
181                        ],
182                    )),
183                }
184            }
185
186            fn visit_map<A: de::MapAccess<'de>>(self, mut map: A) -> Result<Self::Value, A::Error> {
187                let key: String = map
188                    .next_key()?
189                    .ok_or_else(|| de::Error::custom("expected \"pattern\" key, got empty map"))?;
190                if key != "pattern" {
191                    return Err(de::Error::unknown_field(&key, &["pattern"]));
192                }
193                let segments: Vec<MultilingualSegment> = map.next_value()?;
194                if map.next_key::<String>()?.is_some() {
195                    return Err(de::Error::custom("unexpected extra key in pattern object"));
196                }
197                Ok(MultilingualMode::Pattern(segments))
198            }
199
200            /// serde_yaml serialises `{pattern: [...]}` as an externally-tagged enum
201            /// (not a plain map), so we also need to handle enum access.
202            fn visit_enum<A: de::EnumAccess<'de>>(self, data: A) -> Result<Self::Value, A::Error> {
203                use de::VariantAccess as _;
204                let (variant, access): (String, _) = data.variant()?;
205                if variant == "pattern" {
206                    let segments: Vec<MultilingualSegment> = access.newtype_variant()?;
207                    Ok(MultilingualMode::Pattern(segments))
208                } else {
209                    Err(de::Error::unknown_variant(
210                        &variant,
211                        &[
212                            "primary",
213                            "transliterated",
214                            "translated",
215                            "combined",
216                            "pattern",
217                        ],
218                    ))
219                }
220            }
221        }
222
223        deserializer.deserialize_any(ModeVisitor)
224    }
225}