Skip to main content

citum_engine/values/
mod.rs

1/*
2SPDX-License-Identifier: MIT OR Apache-2.0
3SPDX-FileCopyrightText: © 2023-2026 Bruce D'Arcus
4*/
5
6//! Value extraction for template components.
7//!
8//! This module provides the logic to extract formatted values from references
9//! based on template component specifications.
10
11/// Contributor extraction and name-formatting helpers.
12pub mod contributor;
13/// Date extraction and date-formatting helpers.
14pub mod date;
15/// List-component value extraction helpers.
16pub mod list;
17/// Locator rendering logic.
18pub mod locator;
19/// Numeric variable extraction and page-range helpers.
20pub mod number;
21/// Shared helpers for collapsing consecutive numeric or ordinal numbering.
22pub mod range;
23/// Locale term resolution helpers.
24pub mod term;
25/// Title text-case transform functions.
26pub mod text_case;
27/// Title extraction and title-formatting helpers.
28pub mod title;
29/// Generic variable extraction helpers.
30pub mod variable;
31
32#[cfg(test)]
33#[allow(
34    clippy::unwrap_used,
35    clippy::expect_used,
36    clippy::panic,
37    clippy::indexing_slicing,
38    clippy::todo,
39    clippy::unimplemented,
40    clippy::unreachable,
41    clippy::get_unwrap,
42    reason = "Panicking is acceptable and often desired in tests."
43)]
44mod tests;
45
46use crate::reference::Reference;
47use citum_schema::locale::Locale;
48use citum_schema::options::{Config, bibliography::BibliographyConfig};
49use citum_schema::reference::types::Title;
50use citum_schema::template::{TemplateComponent, TitleType};
51
52pub use contributor::format_contributors_short;
53pub use date::int_to_letter;
54
55/// Resolve preferred transliteration from a map of transliterations.
56///
57/// Applies priority-based matching:
58/// 1. Preferred transliteration list: exact match
59/// 2. Preferred transliteration list: substring match
60/// 3. Preferred script: exact match
61/// 4. Preferred script: substring match
62fn resolve_transliteration<'a>(
63    transliterations: &'a std::collections::HashMap<String, String>,
64    preferred_transliteration: Option<&[String]>,
65    preferred_script: Option<&String>,
66) -> Option<&'a str> {
67    // 1. Priority list: exact match
68    if let Some(tags) = preferred_transliteration {
69        for tag in tags {
70            if let Some(v) = transliterations.get(tag) {
71                return Some(v.as_str());
72            }
73        }
74        // 2. Priority list: substring match
75        for tag in tags {
76            for (k, v) in transliterations {
77                if k.contains(tag.as_str()) {
78                    return Some(v.as_str());
79                }
80            }
81        }
82    }
83    // 3. preferred_script exact match
84    if let Some(script) = preferred_script {
85        if let Some(v) = transliterations.get(script) {
86            return Some(v.as_str());
87        }
88        // 4. preferred_script substring match
89        for (k, v) in transliterations {
90            if k.contains(script.as_str()) {
91                return Some(v.as_str());
92            }
93        }
94    }
95    None
96}
97
98/// Resolve a multilingual string based on style configuration.
99///
100/// Applies BCP 47 fallback logic:
101/// 1. Exact tag match (e.g., "ja-Latn-hepburn")
102/// 2. Script prefix match (e.g., "ja-Latn")
103/// 3. Fallback to original field
104///
105/// # Arguments
106/// * `string` - The multilingual string to resolve
107/// * `mode` - The rendering mode from style config
108/// * `preferred_transliteration` - Optional ordered list of BCP 47 transliteration tags
109/// * `preferred_script` - Optional preferred script (e.g., "Latn")
110/// * `style_locale` - The style's locale for translation matching
111#[must_use]
112pub fn resolve_multilingual_string(
113    string: &citum_schema::reference::types::MultilingualString,
114    mode: Option<&citum_schema::options::MultilingualMode>,
115    preferred_transliteration: Option<&[String]>,
116    preferred_script: Option<&String>,
117    style_locale: &str,
118) -> String {
119    use citum_schema::options::MultilingualMode;
120    use citum_schema::reference::types::MultilingualString;
121
122    match string {
123        MultilingualString::Simple(s) => s.clone(),
124        MultilingualString::Complex(complex) => {
125            let mode = mode.unwrap_or(&MultilingualMode::Primary);
126
127            match mode {
128                MultilingualMode::Primary => complex.original.clone(),
129
130                MultilingualMode::Transliterated => {
131                    if let Some(trans) = resolve_transliteration(
132                        &complex.transliterations,
133                        preferred_transliteration,
134                        preferred_script,
135                    ) {
136                        return trans.to_string();
137                    }
138
139                    // Fallback: use any available transliteration, or original
140                    complex
141                        .transliterations
142                        .values()
143                        .next()
144                        .cloned()
145                        .unwrap_or_else(|| complex.original.clone())
146                }
147
148                MultilingualMode::Translated => {
149                    // Try to match style locale
150                    complex
151                        .translations
152                        .get(style_locale)
153                        .cloned()
154                        .unwrap_or_else(|| complex.original.clone())
155                }
156
157                MultilingualMode::Combined => {
158                    // Format: "transliterated [translated]" or fallback variants
159                    let trans = resolve_transliteration(
160                        &complex.transliterations,
161                        preferred_transliteration,
162                        preferred_script,
163                    );
164
165                    let translation = complex.translations.get(style_locale);
166
167                    match (trans, translation) {
168                        (Some(t), Some(tr)) => format!("{t} [{tr}]"),
169                        (Some(t), None) => t.to_string(),
170                        (None, Some(tr)) => format!("{} [{}]", complex.original, tr),
171                        (None, None) => complex.original.clone(),
172                    }
173                }
174            }
175        }
176    }
177}
178
179/// Resolve the effective language for one logical field scope on a reference.
180///
181/// This prefers an explicit `field_languages` entry, then a multilingual title
182/// language tag for the provided title value, and finally the reference-level
183/// language.
184#[must_use]
185pub fn effective_field_language(
186    reference: &Reference,
187    scope: &str,
188    title: Option<&Title>,
189) -> Option<String> {
190    reference
191        .field_languages()
192        .get(scope)
193        .map(ToString::to_string)
194        .or_else(|| match title {
195            Some(Title::Multilingual(multilingual)) => {
196                multilingual.lang.as_ref().map(ToString::to_string)
197            }
198            _ => None,
199        })
200        .or_else(|| reference.language().map(|lang| lang.to_string()))
201}
202
203/// Resolve the effective language for the primary title of a reference.
204#[must_use]
205pub fn effective_item_language(reference: &Reference) -> Option<String> {
206    effective_field_language(reference, "title", reference.title().as_ref())
207}
208
209/// Resolve the effective language for the specific template component being rendered.
210#[must_use]
211pub fn effective_component_language(
212    reference: &Reference,
213    component: &TemplateComponent,
214) -> Option<String> {
215    match component {
216        TemplateComponent::Title(title_component) => {
217            let title = match title_component.title {
218                TitleType::Primary => reference.title(),
219                TitleType::ParentMonograph => reference.container_title(),
220                TitleType::ParentSerial => reference.container_title(),
221                _ => reference.title(),
222            };
223
224            let scope = match title_component.title {
225                TitleType::Primary => "title",
226                TitleType::ParentMonograph => "parent-monograph.title",
227                TitleType::ParentSerial => "parent-serial.title",
228                _ => "title",
229            };
230
231            effective_field_language(reference, scope, title.as_ref())
232        }
233        _ => effective_item_language(reference),
234    }
235}
236
237/// Select a structured name from transliteration maps using priority-list then script-match rules.
238fn select_by_transliteration<'a>(
239    m: &'a citum_schema::reference::contributor::MultilingualName,
240    preferred_transliteration: Option<&[String]>,
241    preferred_script: Option<&String>,
242) -> &'a citum_schema::reference::contributor::StructuredName {
243    // 1. Priority list: exact match
244    if let Some(tags) = preferred_transliteration {
245        for tag in tags {
246            if let Some(name) = m.transliterations.get(tag) {
247                return name;
248            }
249        }
250        // 2. Priority list: substring match
251        for tag in tags {
252            if let Some((_, name)) = m
253                .transliterations
254                .iter()
255                .find(|(k, _)| k.contains(tag.as_str()))
256            {
257                return name;
258            }
259        }
260    }
261    // 3. Preferred script: exact match
262    if let Some(script) = preferred_script {
263        if let Some(name) = m.transliterations.get(script) {
264            return name;
265        }
266        // 4. Preferred script: substring match
267        if let Some((_, name)) = m
268            .transliterations
269            .iter()
270            .find(|(tag, _)| tag.contains(script))
271        {
272            return name;
273        }
274    }
275    // Fallback: any available transliteration before falling back to original
276    m.transliterations.values().next().unwrap_or(&m.original)
277}
278
279/// Resolve a multilingual contributor name based on style configuration.
280///
281/// Uses holistic name matching - selects the entire name variant (original/transliterated/translated)
282/// as a unit rather than mixing fields from different variants.
283///
284/// # Arguments
285/// * `contributor` - The contributor to resolve
286/// * `mode` - The rendering mode from style config
287/// * `preferred_transliteration` - Optional ordered list of BCP 47 transliteration tags
288/// * `preferred_script` - Optional preferred script (e.g., "Latn")
289/// * `style_locale` - The style's locale for translation matching
290#[must_use]
291pub fn resolve_multilingual_name(
292    contributor: &citum_schema::reference::contributor::Contributor,
293    mode: Option<&citum_schema::options::MultilingualMode>,
294    preferred_transliteration: Option<&[String]>,
295    preferred_script: Option<&String>,
296    style_locale: &str,
297) -> Vec<crate::reference::FlatName> {
298    use citum_schema::options::MultilingualMode;
299    use citum_schema::reference::contributor::Contributor;
300
301    match contributor {
302        // Simple and structured names have no multilingual data
303        Contributor::SimpleName(_) | Contributor::StructuredName(_) => contributor.to_names_vec(),
304
305        // Multilingual names: select variant holistically
306        Contributor::Multilingual(m) => {
307            let mode = mode.unwrap_or(&MultilingualMode::Primary);
308
309            let selected_name = match mode {
310                MultilingualMode::Primary => &m.original,
311                MultilingualMode::Transliterated => {
312                    select_by_transliteration(m, preferred_transliteration, preferred_script)
313                }
314                MultilingualMode::Translated => {
315                    m.translations.get(style_locale).unwrap_or(&m.original)
316                }
317                // Combined mode for names defaults to transliterated (parenthetical combo not common for names)
318                MultilingualMode::Combined => {
319                    select_by_transliteration(m, preferred_transliteration, preferred_script)
320                }
321            };
322
323            // Convert selected name to FlatName
324            vec![crate::reference::FlatName {
325                given: Some(selected_name.given.to_string()),
326                family: Some(selected_name.family.to_string()),
327                suffix: selected_name.suffix.clone(),
328                dropping_particle: selected_name.dropping_particle.clone(),
329                non_dropping_particle: selected_name.non_dropping_particle.clone(),
330                literal: None,
331                short_name: None,
332            }]
333        }
334
335        Contributor::ContributorList(l) => {
336            l.0.iter()
337                .flat_map(|c| {
338                    resolve_multilingual_name(
339                        c,
340                        mode,
341                        preferred_transliteration,
342                        preferred_script,
343                        style_locale,
344                    )
345                })
346                .collect()
347        }
348    }
349}
350
351/// Resolve the URL for a component based on its links configuration and the reference data.
352#[must_use]
353pub fn resolve_url(
354    links: &citum_schema::options::LinksConfig,
355    reference: &Reference,
356) -> Option<String> {
357    use citum_schema::options::LinkTarget;
358
359    let target = links.target.as_ref().unwrap_or(&LinkTarget::UrlOrDoi);
360
361    match target {
362        LinkTarget::Url => reference.url().map(|u| u.to_string()),
363        LinkTarget::Doi => reference.doi().map(|d| format!("https://doi.org/{d}")),
364        LinkTarget::UrlOrDoi => reference
365            .url()
366            .map(|u| u.to_string())
367            .or_else(|| reference.doi().map(|d| format!("https://doi.org/{d}"))),
368        LinkTarget::Pubmed => reference
369            .id()
370            .filter(|id| id.starts_with("pmid:"))
371            .map(|id| {
372                #[allow(clippy::string_slice, reason = "known ASCII prefix")]
373                let result = format!("https://pubmed.ncbi.nlm.nih.gov/{}/", &id[5..]);
374                result
375            }),
376        LinkTarget::Pmcid => reference
377            .id()
378            .filter(|id| id.starts_with("pmc:"))
379            .map(|id| {
380                #[allow(clippy::string_slice, reason = "known ASCII prefix")]
381                let result = format!("https://www.ncbi.nlm.nih.gov/pmc/articles/{}/", &id[4..]);
382                result
383            }),
384    }
385}
386
387/// Resolve the effective URL for a component, checking local links then falling back to global config.
388#[must_use]
389pub fn resolve_effective_url(
390    local_links: Option<&citum_schema::options::LinksConfig>,
391    global_links: Option<&citum_schema::options::LinksConfig>,
392    reference: &Reference,
393    component_anchor: citum_schema::options::LinkAnchor,
394) -> Option<String> {
395    use citum_schema::options::LinkAnchor;
396
397    // 1. Check local links first
398    if let Some(links) = local_links {
399        let anchor = links.anchor.as_ref().unwrap_or(&LinkAnchor::Component);
400        if matches!(anchor, LinkAnchor::Component) || *anchor == component_anchor {
401            return resolve_url(links, reference);
402        }
403    }
404
405    // 2. Fall back to global links if anchor matches this component type
406    if let Some(links) = global_links
407        && let Some(anchor) = &links.anchor
408        && *anchor == component_anchor
409    {
410        return resolve_url(links, reference);
411    }
412
413    None
414}
415
416/// Processed values ready for rendering.
417#[derive(Debug, Clone, Default)]
418pub struct ProcValues<T = String> {
419    /// The primary formatted value.
420    pub value: T,
421    /// Optional prefix to prepend.
422    pub prefix: Option<String>,
423    /// Optional suffix to append.
424    pub suffix: Option<String>,
425    /// Optional URL for hyperlinking.
426    pub url: Option<String>,
427    /// Variable key that was substituted (e.g., "title:Primary" when title replaces author).
428    /// Used to prevent duplicate rendering per CSL variable-once rule.
429    pub substituted_key: Option<String>,
430    /// Whether the value is already pre-formatted.
431    pub pre_formatted: bool,
432}
433
434/// Processing hints computed before rendering a reference or citation item.
435#[derive(Debug, Clone, Default)]
436pub struct ProcHints {
437    /// Whether disambiguation is active (triggers year-suffix).
438    pub disamb_condition: bool,
439    /// Index in the disambiguation group (1-based).
440    pub group_index: usize,
441    /// Total size of the disambiguation group.
442    pub group_length: usize,
443    /// The grouping key used.
444    pub group_key: String,
445    /// Whether to expand given names for disambiguation.
446    pub expand_given_names: bool,
447    /// Minimum number of names to show to resolve ambiguity (overrides et-al-use-first).
448    pub min_names_to_show: Option<usize>,
449    /// Citation number for numeric citation styles (1-based).
450    pub citation_number: Option<usize>,
451    /// Optional sub-label for compound numeric citation addressing (e.g., "a" in "1a").
452    pub citation_sub_label: Option<String>,
453    /// Citation position (first, subsequent, ibid, etc.).
454    pub position: Option<citum_schema::citation::Position>,
455    /// Explicit integral citation name-memory state for this rendered item.
456    pub integral_name_state: Option<citum_schema::citation::IntegralNameState>,
457}
458
459/// Context for rendering (citation vs bibliography).
460#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
461pub enum RenderContext {
462    #[default]
463    /// Render values for citation output.
464    Citation,
465    /// Render values for bibliography output.
466    Bibliography,
467}
468
469/// Options for rendering.
470#[derive(Clone)]
471pub struct RenderOptions<'a> {
472    /// Effective configuration after style and default resolution.
473    pub config: &'a Config,
474    /// Effective bibliography-only configuration when rendering bibliography behavior.
475    pub bibliography_config: Option<BibliographyConfig>,
476    /// Locale used for term lookup and locale-sensitive formatting.
477    pub locale: &'a Locale,
478    /// Whether the current render target is a citation or bibliography.
479    pub context: RenderContext,
480    /// Citation mode for the current render operation.
481    pub mode: citum_schema::citation::CitationMode,
482    /// Whether to suppress the author name for this citation.
483    /// Set from the citation-level `suppress_author` flag.
484    pub suppress_author: bool,
485    /// Optional raw citation locator for rendering via locator config.
486    pub locator_raw: Option<&'a citum_schema::citation::CitationLocator>,
487    /// Reference type for optional type-class gating in locator patterns.
488    pub ref_type: Option<String>,
489    /// Whether to output semantic markup (HTML spans, Djot attributes).
490    pub show_semantics: bool,
491    /// The current top-level template index, when propagating preview annotations.
492    pub current_template_index: Option<usize>,
493    /// Document-level abbreviation map for post-render substitution.
494    pub abbreviation_map: Option<&'a crate::api::AbbreviationMap>,
495}
496
497/// Trait for extracting values from template components.
498pub trait ComponentValues {
499    /// Resolve the component into processed render values for one reference.
500    fn values<F: crate::render::format::OutputFormat<Output = String>>(
501        &self,
502        reference: &Reference,
503        hints: &ProcHints,
504        options: &RenderOptions<'_>,
505    ) -> Option<ProcValues<F::Output>>;
506}
507
508impl ComponentValues for TemplateComponent {
509    fn values<F: crate::render::format::OutputFormat<Output = String>>(
510        &self,
511        reference: &Reference,
512        hints: &ProcHints,
513        options: &RenderOptions<'_>,
514    ) -> Option<ProcValues<F::Output>> {
515        match self {
516            TemplateComponent::Contributor(c) => c.values::<F>(reference, hints, options),
517            TemplateComponent::Date(d) => d.values::<F>(reference, hints, options),
518            TemplateComponent::Title(t) => t.values::<F>(reference, hints, options),
519            TemplateComponent::Number(n) => n.values::<F>(reference, hints, options),
520            TemplateComponent::Variable(v) => v.values::<F>(reference, hints, options),
521            TemplateComponent::Group(l) => l.values::<F>(reference, hints, options),
522            TemplateComponent::Term(t) => t.values::<F>(reference, hints, options),
523            _ => None,
524        }
525    }
526}
527
528/// Check if periods should be stripped based on three-tier precedence.
529///
530/// Resolution order:
531/// 1. Component-level `strip_periods`
532/// 2. Global config `strip_periods`
533/// 3. Defaults to false
534#[must_use]
535pub fn should_strip_periods(
536    rendering: &citum_schema::template::Rendering,
537    options: &RenderOptions<'_>,
538) -> bool {
539    rendering
540        .strip_periods
541        .or(options.config.strip_periods)
542        .unwrap_or(false)
543}
544
545/// Strip trailing periods from a string.
546///
547/// Only removes periods at the end of the string, preserves internal periods
548/// (e.g., "Ph.D." remains unchanged if there's no trailing period).
549#[must_use]
550pub fn strip_trailing_periods(s: &str) -> String {
551    s.trim_end_matches('.').to_string()
552}
553
554/// Apply abbreviation substitution if the map contains an entry for `value`.
555///
556/// Returns the abbreviation if found, otherwise returns the original value unchanged.
557#[must_use]
558pub fn apply_abbreviation(value: String, map: Option<&crate::api::AbbreviationMap>) -> String {
559    if let Some(abbr) = map.and_then(|m| m.0.get(&value)) {
560        return abbr.clone();
561    }
562    value
563}