Skip to main content

citum_engine/values/
mod.rs

1/*
2SPDX-License-Identifier: MIT OR Apache-2.0
3SPDX-FileCopyrightText: © 2023-2026 Bruce D'Arcus and Citum contributors
4*/
5
6//! Value extraction for template components.
7//!
8//! This module provides the logic to extract formatted values from references
9//! based on template component specifications.
10
11/// Contributor extraction and name-formatting helpers.
12pub mod contributor;
13/// Date extraction and date-formatting helpers.
14pub mod date;
15/// List-component value extraction helpers.
16pub mod list;
17/// Locator rendering logic.
18pub mod locator;
19/// Numeric variable extraction and page-range helpers.
20pub mod number;
21/// Shared helpers for collapsing consecutive numeric or ordinal numbering.
22pub mod range;
23/// Locale term resolution helpers.
24pub mod term;
25/// Title text-case transform functions.
26pub mod text_case;
27/// Title extraction and title-formatting helpers.
28pub mod title;
29/// Generic variable extraction helpers.
30pub mod variable;
31
32#[cfg(test)]
33#[allow(
34    clippy::unwrap_used,
35    clippy::expect_used,
36    clippy::panic,
37    clippy::indexing_slicing,
38    clippy::todo,
39    clippy::unimplemented,
40    clippy::unreachable,
41    clippy::get_unwrap,
42    reason = "Panicking is acceptable and often desired in tests."
43)]
44mod tests;
45
46use crate::reference::Reference;
47use citum_schema::locale::Locale;
48use citum_schema::options::{Config, bibliography::BibliographyConfig};
49use citum_schema::reference::types::Title;
50use citum_schema::template::{TemplateComponent, TitleType};
51
52pub use contributor::format_contributors_short;
53pub use date::int_to_letter;
54
55/// Resolve preferred transliteration from a map of transliterations.
56///
57/// Applies priority-based matching:
58/// 1. Preferred transliteration list: exact match
59/// 2. Preferred transliteration list: substring match
60/// 3. Preferred script: exact match
61/// 4. Preferred script: substring match
62fn resolve_transliteration<'a>(
63    transliterations: &'a std::collections::HashMap<String, String>,
64    preferred_transliteration: Option<&[String]>,
65    preferred_script: Option<&String>,
66) -> Option<&'a str> {
67    // 1. Priority list: exact match
68    if let Some(tags) = preferred_transliteration {
69        for tag in tags {
70            if let Some(v) = transliterations.get(tag) {
71                return Some(v.as_str());
72            }
73        }
74        // 2. Priority list: substring match
75        for tag in tags {
76            for (k, v) in transliterations {
77                if k.contains(tag.as_str()) {
78                    return Some(v.as_str());
79                }
80            }
81        }
82    }
83    // 3. preferred_script exact match
84    if let Some(script) = preferred_script {
85        if let Some(v) = transliterations.get(script) {
86            return Some(v.as_str());
87        }
88        // 4. preferred_script substring match
89        for (k, v) in transliterations {
90            if k.contains(script.as_str()) {
91                return Some(v.as_str());
92            }
93        }
94    }
95    None
96}
97
98fn resolve_translation<'a>(
99    translations: &'a std::collections::HashMap<citum_schema::reference::LangID, String>,
100    style_locale: &str,
101) -> Option<&'a str> {
102    translations
103        .get(style_locale)
104        .or_else(|| {
105            style_locale
106                .split(['-', '_'])
107                .next()
108                .and_then(|base| translations.get(base))
109        })
110        .map(String::as_str)
111}
112
113/// Resolve a multilingual string based on style configuration.
114///
115/// Applies BCP 47 fallback logic:
116/// 1. Exact tag match (e.g., "ja-Latn-hepburn")
117/// 2. Script prefix match (e.g., "ja-Latn")
118/// 3. Fallback to original field
119///
120/// # Arguments
121/// * `string` - The multilingual string to resolve
122/// * `mode` - The rendering mode from style config
123/// * `preferred_transliteration` - Optional ordered list of BCP 47 transliteration tags
124/// * `preferred_script` - Optional preferred script (e.g., "Latn")
125/// * `style_locale` - The style's locale for translation matching
126#[must_use]
127pub fn resolve_multilingual_string(
128    string: &citum_schema::reference::types::MultilingualString,
129    mode: Option<&citum_schema::options::MultilingualMode>,
130    preferred_transliteration: Option<&[String]>,
131    preferred_script: Option<&String>,
132    style_locale: &str,
133) -> String {
134    use citum_schema::options::MultilingualMode;
135    use citum_schema::reference::types::MultilingualString;
136
137    match string {
138        MultilingualString::Simple(s) => s.clone(),
139        MultilingualString::Complex(complex) => {
140            let mode = mode.unwrap_or(&MultilingualMode::Primary);
141
142            match mode {
143                MultilingualMode::Primary => complex.original.clone(),
144
145                MultilingualMode::Transliterated => {
146                    if let Some(trans) = resolve_transliteration(
147                        &complex.transliterations,
148                        preferred_transliteration,
149                        preferred_script,
150                    ) {
151                        return trans.to_string();
152                    }
153
154                    // Fallback: use any available transliteration, or original
155                    complex
156                        .transliterations
157                        .values()
158                        .next()
159                        .cloned()
160                        .unwrap_or_else(|| complex.original.clone())
161                }
162
163                MultilingualMode::Translated => {
164                    // Try to match style locale
165                    resolve_translation(&complex.translations, style_locale)
166                        .map(ToString::to_string)
167                        .unwrap_or_else(|| complex.original.clone())
168                }
169
170                MultilingualMode::Combined => {
171                    // Format: "transliterated [translated]" or fallback variants
172                    let trans = resolve_transliteration(
173                        &complex.transliterations,
174                        preferred_transliteration,
175                        preferred_script,
176                    );
177
178                    let translation = resolve_translation(&complex.translations, style_locale);
179
180                    match (trans, translation) {
181                        (Some(t), Some(tr)) => format!("{t} [{tr}]"),
182                        (Some(t), None) => t.to_string(),
183                        (None, Some(tr)) => format!("{} [{}]", complex.original, tr),
184                        (None, None) => complex.original.clone(),
185                    }
186                }
187            }
188        }
189    }
190}
191
192/// Resolve the effective language for one logical field scope on a reference.
193///
194/// This prefers an explicit `field_languages` entry, then a multilingual title
195/// language tag for the provided title value, and finally the reference-level
196/// language.
197#[must_use]
198pub fn effective_field_language(
199    reference: &Reference,
200    scope: &str,
201    title: Option<&Title>,
202) -> Option<String> {
203    reference
204        .field_languages()
205        .get(scope)
206        .map(ToString::to_string)
207        .or_else(|| match title {
208            Some(Title::Multilingual(multilingual)) => {
209                multilingual.lang.as_ref().map(ToString::to_string)
210            }
211            _ => None,
212        })
213        .or_else(|| reference.language().map(|lang| lang.to_string()))
214}
215
216/// Resolve the effective language for the primary title of a reference.
217#[must_use]
218pub fn effective_item_language(reference: &Reference) -> Option<String> {
219    effective_field_language(reference, "title", reference.title().as_ref())
220}
221
222/// Resolve the effective language for the specific template component being rendered.
223#[must_use]
224pub fn effective_component_language(
225    reference: &Reference,
226    component: &TemplateComponent,
227) -> Option<String> {
228    match component {
229        TemplateComponent::Title(title_component) => {
230            let title = match title_component.title {
231                TitleType::Primary => reference.title(),
232                TitleType::ParentMonograph => reference.container_title(),
233                TitleType::ParentSerial => reference.container_title(),
234                _ => reference.title(),
235            };
236
237            let scope = match title_component.title {
238                TitleType::Primary => "title",
239                TitleType::ParentMonograph => "parent-monograph.title",
240                TitleType::ParentSerial => "parent-serial.title",
241                _ => "title",
242            };
243
244            effective_field_language(reference, scope, title.as_ref())
245        }
246        _ => effective_item_language(reference),
247    }
248}
249
250/// Select a structured name from transliteration maps using priority-list then script-match rules.
251fn select_by_transliteration<'a>(
252    m: &'a citum_schema::reference::contributor::MultilingualName,
253    preferred_transliteration: Option<&[String]>,
254    preferred_script: Option<&String>,
255) -> &'a citum_schema::reference::contributor::StructuredName {
256    // 1. Priority list: exact match
257    if let Some(tags) = preferred_transliteration {
258        for tag in tags {
259            if let Some(name) = m.transliterations.get(tag) {
260                return name;
261            }
262        }
263        // 2. Priority list: substring match
264        for tag in tags {
265            if let Some((_, name)) = m
266                .transliterations
267                .iter()
268                .find(|(k, _)| k.contains(tag.as_str()))
269            {
270                return name;
271            }
272        }
273    }
274    // 3. Preferred script: exact match
275    if let Some(script) = preferred_script {
276        if let Some(name) = m.transliterations.get(script) {
277            return name;
278        }
279        // 4. Preferred script: substring match
280        if let Some((_, name)) = m
281            .transliterations
282            .iter()
283            .find(|(tag, _)| tag.contains(script))
284        {
285            return name;
286        }
287    }
288    // Fallback: any available transliteration before falling back to original
289    m.transliterations.values().next().unwrap_or(&m.original)
290}
291
292/// Resolve a multilingual contributor name based on style configuration.
293///
294/// Uses holistic name matching - selects the entire name variant (original/transliterated/translated)
295/// as a unit rather than mixing fields from different variants.
296///
297/// # Arguments
298/// * `contributor` - The contributor to resolve
299/// * `mode` - The rendering mode from style config
300/// * `preferred_transliteration` - Optional ordered list of BCP 47 transliteration tags
301/// * `preferred_script` - Optional preferred script (e.g., "Latn")
302/// * `style_locale` - The style's locale for translation matching
303#[must_use]
304pub fn resolve_multilingual_name(
305    contributor: &citum_schema::reference::contributor::Contributor,
306    mode: Option<&citum_schema::options::MultilingualMode>,
307    preferred_transliteration: Option<&[String]>,
308    preferred_script: Option<&String>,
309    style_locale: &str,
310) -> Vec<crate::reference::FlatName> {
311    use citum_schema::options::MultilingualMode;
312    use citum_schema::reference::contributor::Contributor;
313
314    match contributor {
315        // Simple and structured names have no multilingual data
316        Contributor::SimpleName(_) | Contributor::StructuredName(_) => contributor.to_names_vec(),
317
318        // Multilingual names: select variant holistically
319        Contributor::Multilingual(m) => {
320            let mode = mode.unwrap_or(&MultilingualMode::Primary);
321
322            let selected_name = match mode {
323                MultilingualMode::Primary => &m.original,
324                MultilingualMode::Transliterated => {
325                    select_by_transliteration(m, preferred_transliteration, preferred_script)
326                }
327                MultilingualMode::Translated => {
328                    m.translations.get(style_locale).unwrap_or(&m.original)
329                }
330                // Combined mode for names defaults to transliterated (parenthetical combo not common for names)
331                MultilingualMode::Combined => {
332                    select_by_transliteration(m, preferred_transliteration, preferred_script)
333                }
334            };
335
336            // Convert selected name to FlatName
337            vec![crate::reference::FlatName {
338                given: Some(selected_name.given.to_string()),
339                family: Some(selected_name.family.to_string()),
340                suffix: selected_name.suffix.clone(),
341                dropping_particle: selected_name.dropping_particle.clone(),
342                non_dropping_particle: selected_name.non_dropping_particle.clone(),
343                literal: None,
344                short_name: None,
345            }]
346        }
347
348        Contributor::ContributorList(l) => {
349            l.0.iter()
350                .flat_map(|c| {
351                    resolve_multilingual_name(
352                        c,
353                        mode,
354                        preferred_transliteration,
355                        preferred_script,
356                        style_locale,
357                    )
358                })
359                .collect()
360        }
361    }
362}
363
364/// Resolve the URL for a component based on its links configuration and the reference data.
365#[must_use]
366pub fn resolve_url(
367    links: &citum_schema::options::LinksConfig,
368    reference: &Reference,
369) -> Option<String> {
370    use citum_schema::options::LinkTarget;
371
372    let target = links.target.as_ref().unwrap_or(&LinkTarget::UrlOrDoi);
373
374    match target {
375        LinkTarget::Url => reference.url().map(|u| u.to_string()),
376        LinkTarget::Doi => reference.doi().map(|d| format!("https://doi.org/{d}")),
377        LinkTarget::UrlOrDoi => reference
378            .url()
379            .map(|u| u.to_string())
380            .or_else(|| reference.doi().map(|d| format!("https://doi.org/{d}"))),
381        LinkTarget::Pubmed => reference
382            .id()
383            .filter(|id| id.starts_with("pmid:"))
384            .map(|id| {
385                #[allow(clippy::string_slice, reason = "known ASCII prefix")]
386                let result = format!("https://pubmed.ncbi.nlm.nih.gov/{}/", &id[5..]);
387                result
388            }),
389        LinkTarget::Pmcid => reference
390            .id()
391            .filter(|id| id.starts_with("pmc:"))
392            .map(|id| {
393                #[allow(clippy::string_slice, reason = "known ASCII prefix")]
394                let result = format!("https://www.ncbi.nlm.nih.gov/pmc/articles/{}/", &id[4..]);
395                result
396            }),
397    }
398}
399
400/// Resolve the effective URL for a component, checking local links then falling back to global config.
401#[must_use]
402pub fn resolve_effective_url(
403    local_links: Option<&citum_schema::options::LinksConfig>,
404    global_links: Option<&citum_schema::options::LinksConfig>,
405    reference: &Reference,
406    component_anchor: citum_schema::options::LinkAnchor,
407) -> Option<String> {
408    use citum_schema::options::LinkAnchor;
409
410    // 1. Check local links first
411    if let Some(links) = local_links {
412        let anchor = links.anchor.as_ref().unwrap_or(&LinkAnchor::Component);
413        if matches!(anchor, LinkAnchor::Component) || *anchor == component_anchor {
414            return resolve_url(links, reference);
415        }
416    }
417
418    // 2. Fall back to global links if anchor matches this component type
419    if let Some(links) = global_links
420        && let Some(anchor) = &links.anchor
421        && *anchor == component_anchor
422    {
423        return resolve_url(links, reference);
424    }
425
426    None
427}
428
429/// Processed values ready for rendering.
430#[derive(Debug, Clone, Default)]
431pub struct ProcValues<T = String> {
432    /// The primary formatted value.
433    pub value: T,
434    /// Optional prefix to prepend.
435    pub prefix: Option<String>,
436    /// Optional suffix to append.
437    pub suffix: Option<String>,
438    /// Optional URL for hyperlinking.
439    pub url: Option<String>,
440    /// Variable key that was substituted (e.g., "title:Primary" when title replaces author).
441    /// Used to prevent duplicate rendering per CSL variable-once rule.
442    pub substituted_key: Option<String>,
443    /// Whether the value is already pre-formatted.
444    pub pre_formatted: bool,
445}
446
447/// Processing hints computed before rendering a reference or citation item.
448#[derive(Debug, Clone, Default)]
449pub struct ProcHints {
450    /// Whether disambiguation is active (triggers year-suffix).
451    pub disamb_condition: bool,
452    /// Index in the disambiguation group (1-based).
453    pub group_index: usize,
454    /// Total size of the disambiguation group.
455    pub group_length: usize,
456    /// The grouping key used.
457    pub group_key: String,
458    /// Whether to expand given names for disambiguation.
459    pub expand_given_names: bool,
460    /// Whether to expand given names for primary author only.
461    pub expand_given_names_primary_only: bool,
462    /// Minimum number of names to show to resolve ambiguity (overrides et-al-use-first).
463    pub min_names_to_show: Option<usize>,
464    /// Citation number for numeric citation styles (1-based).
465    pub citation_number: Option<usize>,
466    /// Optional sub-label for compound numeric citation addressing (e.g., "a" in "1a").
467    pub citation_sub_label: Option<String>,
468    /// Citation position (first, subsequent, ibid, etc.).
469    pub position: Option<citum_schema::citation::Position>,
470    /// Explicit integral citation name-memory state for this rendered item.
471    pub integral_name_state: Option<citum_schema::citation::IntegralNameState>,
472    /// Explicit org-abbreviation state for this rendered item.
473    pub org_abbreviation_state: Option<citum_schema::citation::IntegralNameState>,
474    /// First note number in which this reference was cited (note styles only).
475    /// Set for subsequent-position citations; `None` otherwise.
476    pub first_reference_note_number: Option<u32>,
477    /// When true, suppress a `disambiguate_only` title component.
478    /// Set when `first_reference_note_number` is present — the note number
479    /// already identifies the work; the disambiguating short title is redundant.
480    pub suppress_disambiguation_title: bool,
481}
482
483/// Context for rendering (citation vs bibliography).
484#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
485pub enum RenderContext {
486    #[default]
487    /// Render values for citation output.
488    Citation,
489    /// Render values for bibliography output.
490    Bibliography,
491}
492
493/// Options for rendering.
494#[derive(Clone)]
495pub struct RenderOptions<'a> {
496    /// Effective configuration after style and default resolution.
497    pub config: &'a Config,
498    /// Effective bibliography-only configuration when rendering bibliography behavior.
499    pub bibliography_config: Option<BibliographyConfig>,
500    /// Locale used for term lookup and locale-sensitive formatting.
501    pub locale: &'a Locale,
502    /// Whether the current render target is a citation or bibliography.
503    pub context: RenderContext,
504    /// Citation mode for the current render operation.
505    pub mode: citum_schema::citation::CitationMode,
506    /// Whether to suppress the author name for this citation.
507    /// Set from the citation-level `suppress_author` flag.
508    pub suppress_author: bool,
509    /// Optional raw citation locator for rendering via locator config.
510    pub locator_raw: Option<&'a citum_schema::citation::CitationLocator>,
511    /// Reference type for optional type-class gating in locator patterns.
512    pub ref_type: Option<String>,
513    /// Whether to output semantic markup (HTML spans, Djot attributes).
514    pub show_semantics: bool,
515    /// The current top-level template index, when propagating preview annotations.
516    pub current_template_index: Option<usize>,
517    /// Document-level abbreviation map for post-render substitution.
518    pub abbreviation_map: Option<&'a crate::api::AbbreviationMap>,
519}
520
521/// Trait for extracting values from template components.
522pub trait ComponentValues {
523    /// Resolve the component into processed render values for one reference.
524    fn values<F: crate::render::format::OutputFormat<Output = String>>(
525        &self,
526        reference: &Reference,
527        hints: &ProcHints,
528        options: &RenderOptions<'_>,
529    ) -> Option<ProcValues<F::Output>>;
530}
531
532impl ComponentValues for TemplateComponent {
533    fn values<F: crate::render::format::OutputFormat<Output = String>>(
534        &self,
535        reference: &Reference,
536        hints: &ProcHints,
537        options: &RenderOptions<'_>,
538    ) -> Option<ProcValues<F::Output>> {
539        match self {
540            TemplateComponent::Contributor(c) => c.values::<F>(reference, hints, options),
541            TemplateComponent::Date(d) => d.values::<F>(reference, hints, options),
542            TemplateComponent::Title(t) => t.values::<F>(reference, hints, options),
543            TemplateComponent::Number(n) => n.values::<F>(reference, hints, options),
544            TemplateComponent::Variable(v) => v.values::<F>(reference, hints, options),
545            TemplateComponent::Group(l) => l.values::<F>(reference, hints, options),
546            TemplateComponent::Term(t) => t.values::<F>(reference, hints, options),
547            _ => None,
548        }
549    }
550}
551
552/// Check if periods should be stripped based on three-tier precedence.
553///
554/// Resolution order:
555/// 1. Component-level `strip_periods`
556/// 2. Global config `strip_periods`
557/// 3. Defaults to false
558#[must_use]
559pub fn should_strip_periods(
560    rendering: &citum_schema::template::Rendering,
561    options: &RenderOptions<'_>,
562) -> bool {
563    rendering
564        .strip_periods
565        .or(options.config.strip_periods)
566        .unwrap_or(false)
567}
568
569/// Strip trailing periods from a string.
570///
571/// Only removes periods at the end of the string, preserves internal periods
572/// (e.g., "Ph.D." remains unchanged if there's no trailing period).
573#[must_use]
574pub fn strip_trailing_periods(s: &str) -> String {
575    s.trim_end_matches('.').to_string()
576}
577
578/// Apply abbreviation substitution if the map contains an entry for `value`.
579///
580/// Returns the abbreviation if found, otherwise returns the original value unchanged.
581#[must_use]
582pub fn apply_abbreviation(value: String, map: Option<&crate::api::AbbreviationMap>) -> String {
583    if let Some(abbr) = map.and_then(|m| m.0.get(&value)) {
584        return abbr.clone();
585    }
586    value
587}