Skip to main content

citum_engine/values/
mod.rs

1/*
2SPDX-License-Identifier: MIT OR Apache-2.0
3SPDX-FileCopyrightText: © 2023-2026 Bruce D'Arcus and Citum contributors
4*/
5
6//! Value extraction for template components.
7//!
8//! This module provides the logic to extract formatted values from references
9//! based on template component specifications.
10
11/// Contributor extraction and name-formatting helpers.
12pub mod contributor;
13/// Date extraction and date-formatting helpers.
14pub mod date;
15/// List-component value extraction helpers.
16pub mod list;
17/// Locator rendering logic.
18pub mod locator;
19/// Numeric variable extraction and page-range helpers.
20pub mod number;
21/// Shared helpers for collapsing consecutive numeric or ordinal numbering.
22pub mod range;
23/// Locale term resolution helpers.
24pub mod term;
25/// Title text-case transform functions.
26pub mod text_case;
27/// Title extraction and title-formatting helpers.
28pub mod title;
29/// Generic variable extraction helpers.
30pub mod variable;
31
32#[cfg(test)]
33#[allow(
34    clippy::unwrap_used,
35    clippy::expect_used,
36    clippy::panic,
37    clippy::indexing_slicing,
38    clippy::todo,
39    clippy::unimplemented,
40    clippy::unreachable,
41    clippy::get_unwrap,
42    reason = "Panicking is acceptable and often desired in tests."
43)]
44mod tests;
45
46use crate::reference::Reference;
47use citum_schema::locale::Locale;
48use citum_schema::options::{Config, bibliography::BibliographyConfig};
49use citum_schema::reference::types::Title;
50use citum_schema::template::{TemplateComponent, TitleType};
51
52pub use contributor::format_contributors_short;
53pub use date::int_to_letter;
54
55/// Resolve preferred transliteration from a map of transliterations.
56///
57/// Applies priority-based matching:
58/// 1. Preferred transliteration list: exact match
59/// 2. Preferred transliteration list: substring match
60/// 3. Preferred script: exact match
61/// 4. Preferred script: substring match
62fn resolve_transliteration<'a>(
63    transliterations: &'a std::collections::HashMap<String, String>,
64    preferred_transliteration: Option<&[String]>,
65    preferred_script: Option<&String>,
66) -> Option<&'a str> {
67    // 1. Priority list: exact match
68    if let Some(tags) = preferred_transliteration {
69        for tag in tags {
70            if let Some(v) = transliterations.get(tag) {
71                return Some(v.as_str());
72            }
73        }
74        // 2. Priority list: substring match
75        for tag in tags {
76            for (k, v) in transliterations {
77                if k.contains(tag.as_str()) {
78                    return Some(v.as_str());
79                }
80            }
81        }
82    }
83    // 3. preferred_script exact match
84    if let Some(script) = preferred_script {
85        if let Some(v) = transliterations.get(script) {
86            return Some(v.as_str());
87        }
88        // 4. preferred_script substring match
89        for (k, v) in transliterations {
90            if k.contains(script.as_str()) {
91                return Some(v.as_str());
92            }
93        }
94    }
95    None
96}
97
98fn resolve_translation<'a>(
99    translations: &'a std::collections::HashMap<citum_schema::reference::LangID, String>,
100    style_locale: &str,
101) -> Option<&'a str> {
102    translations
103        .get(style_locale)
104        .or_else(|| {
105            style_locale
106                .split(['-', '_'])
107                .next()
108                .and_then(|base| translations.get(base))
109        })
110        .map(String::as_str)
111}
112
113/// Resolve a multilingual string based on style configuration.
114///
115/// Applies BCP 47 fallback logic:
116/// 1. Exact tag match (e.g., "ja-Latn-hepburn")
117/// 2. Script prefix match (e.g., "ja-Latn")
118/// 3. Fallback to original field
119///
120/// # Arguments
121/// * `string` - The multilingual string to resolve
122/// * `mode` - The rendering mode from style config
123/// * `preferred_transliteration` - Optional ordered list of BCP 47 transliteration tags
124/// * `preferred_script` - Optional preferred script (e.g., "Latn")
125/// * `style_locale` - The style's locale for translation matching
126#[must_use]
127pub fn resolve_multilingual_string(
128    string: &citum_schema::reference::types::MultilingualString,
129    mode: Option<&citum_schema::options::MultilingualMode>,
130    preferred_transliteration: Option<&[String]>,
131    preferred_script: Option<&String>,
132    style_locale: &str,
133) -> String {
134    use citum_schema::options::MultilingualMode;
135    use citum_schema::reference::types::MultilingualString;
136
137    match string {
138        MultilingualString::Simple(s) => s.clone(),
139        MultilingualString::Complex(complex) => {
140            let mode = mode.unwrap_or(&MultilingualMode::Primary);
141
142            match mode {
143                MultilingualMode::Primary => complex.original.clone(),
144
145                MultilingualMode::Transliterated => {
146                    if let Some(trans) = resolve_transliteration(
147                        &complex.transliterations,
148                        preferred_transliteration,
149                        preferred_script,
150                    ) {
151                        return trans.to_string();
152                    }
153
154                    // Fallback: use any available transliteration, or original
155                    complex
156                        .transliterations
157                        .values()
158                        .next()
159                        .cloned()
160                        .unwrap_or_else(|| complex.original.clone())
161                }
162
163                MultilingualMode::Translated => {
164                    // Try to match style locale
165                    resolve_translation(&complex.translations, style_locale)
166                        .map(ToString::to_string)
167                        .unwrap_or_else(|| complex.original.clone())
168                }
169
170                MultilingualMode::Combined => {
171                    // Format: "transliterated [translated]" or fallback variants
172                    let trans = resolve_transliteration(
173                        &complex.transliterations,
174                        preferred_transliteration,
175                        preferred_script,
176                    );
177
178                    let translation = resolve_translation(&complex.translations, style_locale);
179
180                    match (trans, translation) {
181                        (Some(t), Some(tr)) => format!("{t} [{tr}]"),
182                        (Some(t), None) => t.to_string(),
183                        (None, Some(tr)) => format!("{} [{}]", complex.original, tr),
184                        (None, None) => complex.original.clone(),
185                    }
186                }
187
188                MultilingualMode::Pattern(segments) => resolve_multilingual_pattern(
189                    segments,
190                    &complex.original,
191                    &complex.transliterations,
192                    &complex.translations,
193                    preferred_transliteration,
194                    preferred_script,
195                    style_locale,
196                ),
197            }
198        }
199    }
200}
201
202/// Render a [`MultilingualMode::Pattern`] against a complex multilingual string.
203///
204/// Each segment is resolved to its text; segments that are empty or identical to
205/// the previous non-empty segment are skipped (dedup).  The surviving segments are
206/// joined by a single space.
207fn resolve_multilingual_pattern(
208    segments: &[citum_schema::options::MultilingualSegment],
209    original: &str,
210    transliterations: &std::collections::HashMap<String, String>,
211    translations: &std::collections::HashMap<citum_schema::reference::types::LangID, String>,
212    preferred_transliteration: Option<&[String]>,
213    preferred_script: Option<&String>,
214    style_locale: &str,
215) -> String {
216    use citum_schema::options::{MultilingualView, SegmentWrap};
217    let mut parts: Vec<String> = Vec::with_capacity(segments.len());
218    let mut last_text: Option<String> = None;
219
220    for seg in segments {
221        let text: Option<String> = match &seg.view {
222            MultilingualView::Original => Some(original.to_string()),
223            MultilingualView::Transliterated => resolve_transliteration(
224                transliterations,
225                preferred_transliteration,
226                preferred_script,
227            )
228            .map(ToString::to_string),
229            MultilingualView::Translated => {
230                resolve_translation(translations, style_locale).map(ToString::to_string)
231            }
232        };
233
234        let Some(text) = text else { continue };
235        if text.is_empty() {
236            continue;
237        }
238        // Skip duplicate: if this text is identical to the previous segment (e.g. when
239        // transliteration falls back to the same value as original).
240        if last_text.as_deref() == Some(text.as_str()) {
241            continue;
242        }
243
244        let wrapped = match &seg.wrap {
245            SegmentWrap::None => text.clone(),
246            other => other.apply(&text),
247        };
248        last_text = Some(text);
249        parts.push(wrapped);
250    }
251
252    parts.join(" ")
253}
254
255/// Resolve the effective language for one logical field scope on a reference.
256///
257/// This prefers an explicit `field_languages` entry, then a multilingual title
258/// language tag for the provided title value, and finally the reference-level
259/// language.
260#[must_use]
261pub fn effective_field_language(
262    reference: &Reference,
263    scope: &str,
264    title: Option<&Title>,
265) -> Option<String> {
266    reference
267        .field_languages()
268        .get(scope)
269        .map(ToString::to_string)
270        .or_else(|| match title {
271            Some(Title::Multilingual(multilingual)) => {
272                multilingual.lang.as_ref().map(ToString::to_string)
273            }
274            _ => None,
275        })
276        .or_else(|| reference.language().map(|lang| lang.to_string()))
277}
278
279/// Resolve the effective language for the primary title of a reference.
280#[must_use]
281pub fn effective_item_language(reference: &Reference) -> Option<String> {
282    effective_field_language(reference, "title", reference.title().as_ref())
283}
284
285/// Resolve the effective language for the specific template component being rendered.
286#[must_use]
287pub fn effective_component_language(
288    reference: &Reference,
289    component: &TemplateComponent,
290) -> Option<String> {
291    match component {
292        TemplateComponent::Title(title_component) => {
293            let title = match title_component.title {
294                TitleType::Primary => reference.title(),
295                TitleType::ParentMonograph => reference.container_title(),
296                TitleType::ParentSerial => reference.container_title(),
297                _ => reference.title(),
298            };
299
300            let scope = match title_component.title {
301                TitleType::Primary => "title",
302                TitleType::ParentMonograph => "parent-monograph.title",
303                TitleType::ParentSerial => "parent-serial.title",
304                _ => "title",
305            };
306
307            effective_field_language(reference, scope, title.as_ref())
308        }
309        _ => effective_item_language(reference),
310    }
311}
312
313/// Select a structured name from transliteration maps using priority-list then script-match rules.
314fn select_by_transliteration<'a>(
315    m: &'a citum_schema::reference::contributor::MultilingualName,
316    preferred_transliteration: Option<&[String]>,
317    preferred_script: Option<&String>,
318) -> &'a citum_schema::reference::contributor::StructuredName {
319    // 1. Priority list: exact match
320    if let Some(tags) = preferred_transliteration {
321        for tag in tags {
322            if let Some(name) = m.transliterations.get(tag) {
323                return name;
324            }
325        }
326        // 2. Priority list: substring match
327        for tag in tags {
328            if let Some((_, name)) = m
329                .transliterations
330                .iter()
331                .find(|(k, _)| k.contains(tag.as_str()))
332            {
333                return name;
334            }
335        }
336    }
337    // 3. Preferred script: exact match
338    if let Some(script) = preferred_script {
339        if let Some(name) = m.transliterations.get(script) {
340            return name;
341        }
342        // 4. Preferred script: substring match
343        if let Some((_, name)) = m
344            .transliterations
345            .iter()
346            .find(|(tag, _)| tag.contains(script))
347        {
348            return name;
349        }
350    }
351    // Fallback: any available transliteration before falling back to original
352    m.transliterations.values().next().unwrap_or(&m.original)
353}
354
355/// Resolve a multilingual contributor name based on style configuration.
356///
357/// Uses holistic name matching - selects the entire name variant (original/transliterated/translated)
358/// as a unit rather than mixing fields from different variants.
359///
360/// # Arguments
361/// * `contributor` - The contributor to resolve
362/// * `mode` - The rendering mode from style config
363/// * `preferred_transliteration` - Optional ordered list of BCP 47 transliteration tags
364/// * `preferred_script` - Optional preferred script (e.g., "Latn")
365/// * `style_locale` - The style's locale for translation matching
366#[must_use]
367pub fn resolve_multilingual_name(
368    contributor: &citum_schema::reference::contributor::Contributor,
369    mode: Option<&citum_schema::options::MultilingualMode>,
370    preferred_transliteration: Option<&[String]>,
371    preferred_script: Option<&String>,
372    style_locale: &str,
373) -> Vec<crate::reference::FlatName> {
374    use citum_schema::options::MultilingualMode;
375    use citum_schema::reference::contributor::Contributor;
376
377    match contributor {
378        // Simple and structured names have no multilingual data
379        Contributor::SimpleName(_) | Contributor::StructuredName(_) => contributor.to_names_vec(),
380
381        // Multilingual names: select variant holistically
382        Contributor::Multilingual(m) => {
383            let mode = mode.unwrap_or(&MultilingualMode::Primary);
384
385            let selected_name = match mode {
386                MultilingualMode::Primary => &m.original,
387                MultilingualMode::Transliterated => {
388                    select_by_transliteration(m, preferred_transliteration, preferred_script)
389                }
390                MultilingualMode::Translated => {
391                    m.translations.get(style_locale).unwrap_or(&m.original)
392                }
393                // Combined mode for names defaults to transliterated (parenthetical combo not common for names)
394                MultilingualMode::Combined => {
395                    select_by_transliteration(m, preferred_transliteration, preferred_script)
396                }
397                // Pattern mode for names: use the first non-original view (romanized).
398                // Names are always shown in a single script; multi-view name strings are
399                // not idiomatic in any major style guide.
400                MultilingualMode::Pattern(_) => {
401                    select_by_transliteration(m, preferred_transliteration, preferred_script)
402                }
403            };
404
405            // Convert selected name to FlatName
406            vec![crate::reference::FlatName {
407                given: Some(selected_name.given.to_string()),
408                family: Some(selected_name.family.to_string()),
409                suffix: selected_name.suffix.clone(),
410                dropping_particle: selected_name.dropping_particle.clone(),
411                non_dropping_particle: selected_name.non_dropping_particle.clone(),
412                literal: None,
413                short_name: None,
414            }]
415        }
416
417        Contributor::ContributorList(l) => {
418            l.0.iter()
419                .flat_map(|c| {
420                    resolve_multilingual_name(
421                        c,
422                        mode,
423                        preferred_transliteration,
424                        preferred_script,
425                        style_locale,
426                    )
427                })
428                .collect()
429        }
430    }
431}
432
433/// Resolve the URL for a component based on its links configuration and the reference data.
434#[must_use]
435pub fn resolve_url(
436    links: &citum_schema::options::LinksConfig,
437    reference: &Reference,
438) -> Option<String> {
439    use citum_schema::options::LinkTarget;
440
441    let target = links.target.as_ref().unwrap_or(&LinkTarget::UrlOrDoi);
442
443    match target {
444        LinkTarget::Url => reference.url().map(|u| u.to_string()),
445        LinkTarget::Doi => reference.doi().map(|d| format!("https://doi.org/{d}")),
446        LinkTarget::UrlOrDoi => reference
447            .url()
448            .map(|u| u.to_string())
449            .or_else(|| reference.doi().map(|d| format!("https://doi.org/{d}"))),
450        LinkTarget::Pubmed => reference
451            .id()
452            .filter(|id| id.starts_with("pmid:"))
453            .map(|id| {
454                #[allow(clippy::string_slice, reason = "known ASCII prefix")]
455                let result = format!("https://pubmed.ncbi.nlm.nih.gov/{}/", &id[5..]);
456                result
457            }),
458        LinkTarget::Pmcid => reference
459            .id()
460            .filter(|id| id.starts_with("pmc:"))
461            .map(|id| {
462                #[allow(clippy::string_slice, reason = "known ASCII prefix")]
463                let result = format!("https://www.ncbi.nlm.nih.gov/pmc/articles/{}/", &id[4..]);
464                result
465            }),
466    }
467}
468
469/// Resolve the effective URL for a component, checking local links then falling back to global config.
470#[must_use]
471pub fn resolve_effective_url(
472    local_links: Option<&citum_schema::options::LinksConfig>,
473    global_links: Option<&citum_schema::options::LinksConfig>,
474    reference: &Reference,
475    component_anchor: citum_schema::options::LinkAnchor,
476) -> Option<String> {
477    use citum_schema::options::LinkAnchor;
478
479    // 1. Check local links first
480    if let Some(links) = local_links {
481        let anchor = links.anchor.as_ref().unwrap_or(&LinkAnchor::Component);
482        if matches!(anchor, LinkAnchor::Component) || *anchor == component_anchor {
483            return resolve_url(links, reference);
484        }
485    }
486
487    // 2. Fall back to global links if anchor matches this component type
488    if let Some(links) = global_links
489        && let Some(anchor) = &links.anchor
490        && *anchor == component_anchor
491    {
492        return resolve_url(links, reference);
493    }
494
495    None
496}
497
498/// Processed values ready for rendering.
499#[derive(Debug, Clone, Default)]
500pub struct ProcValues<T = String> {
501    /// The primary formatted value.
502    pub value: T,
503    /// Optional prefix to prepend.
504    pub prefix: Option<String>,
505    /// Optional suffix to append.
506    pub suffix: Option<String>,
507    /// Optional URL for hyperlinking.
508    pub url: Option<String>,
509    /// Variable key that was substituted (e.g., "title:Primary" when title replaces author).
510    /// Used to prevent duplicate rendering per CSL variable-once rule.
511    pub substituted_key: Option<String>,
512    /// Whether the value is already pre-formatted.
513    pub pre_formatted: bool,
514}
515
516/// Processing hints computed before rendering a reference or citation item.
517#[derive(Debug, Clone, Default)]
518pub struct ProcHints {
519    /// Whether disambiguation is active (triggers year-suffix).
520    pub disamb_condition: bool,
521    /// Index in the disambiguation group (1-based).
522    pub group_index: usize,
523    /// Total size of the disambiguation group.
524    pub group_length: usize,
525    /// The grouping key used.
526    pub group_key: String,
527    /// Whether to expand given names for disambiguation.
528    pub expand_given_names: bool,
529    /// Whether to expand given names for primary author only.
530    pub expand_given_names_primary_only: bool,
531    /// Minimum number of names to show to resolve ambiguity (overrides et-al-use-first).
532    pub min_names_to_show: Option<usize>,
533    /// Citation number for numeric citation styles (1-based).
534    pub citation_number: Option<usize>,
535    /// Optional sub-label for compound numeric citation addressing (e.g., "a" in "1a").
536    pub citation_sub_label: Option<String>,
537    /// Citation position (first, subsequent, ibid, etc.).
538    pub position: Option<citum_schema::citation::Position>,
539    /// Explicit integral citation name-memory state for this rendered item.
540    pub integral_name_state: Option<citum_schema::citation::IntegralNameState>,
541    /// Explicit org-abbreviation state for this rendered item.
542    pub org_abbreviation_state: Option<citum_schema::citation::IntegralNameState>,
543    /// First note number in which this reference was cited (note styles only).
544    /// Set for subsequent-position citations; `None` otherwise.
545    pub first_reference_note_number: Option<u32>,
546    /// When true, suppress a `disambiguate_only` title component.
547    /// Set when `first_reference_note_number` is present — the note number
548    /// already identifies the work; the disambiguating short title is redundant.
549    pub suppress_disambiguation_title: bool,
550}
551
552/// Context for rendering (citation vs bibliography).
553#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
554pub enum RenderContext {
555    #[default]
556    /// Render values for citation output.
557    Citation,
558    /// Render values for bibliography output.
559    Bibliography,
560}
561
562/// Options for rendering.
563#[derive(Clone)]
564pub struct RenderOptions<'a> {
565    /// Effective configuration after style and default resolution.
566    pub config: &'a Config,
567    /// Effective bibliography-only configuration when rendering bibliography behavior.
568    pub bibliography_config: Option<BibliographyConfig>,
569    /// Locale used for term lookup and locale-sensitive formatting.
570    pub locale: &'a Locale,
571    /// Whether the current render target is a citation or bibliography.
572    pub context: RenderContext,
573    /// Citation mode for the current render operation.
574    pub mode: citum_schema::citation::CitationMode,
575    /// Whether to suppress the author name for this citation.
576    /// Set from the citation-level `suppress_author` flag.
577    pub suppress_author: bool,
578    /// Optional raw citation locator for rendering via locator config.
579    pub locator_raw: Option<&'a citum_schema::citation::CitationLocator>,
580    /// Reference type for optional type-class gating in locator patterns.
581    pub ref_type: Option<String>,
582    /// Whether to output semantic markup (HTML spans, Djot attributes).
583    pub show_semantics: bool,
584    /// The current top-level template index, when propagating preview annotations.
585    pub current_template_index: Option<usize>,
586    /// Document-level abbreviation map for post-render substitution.
587    pub abbreviation_map: Option<&'a crate::api::AbbreviationMap>,
588}
589
590/// Trait for extracting values from template components.
591pub trait ComponentValues {
592    /// Resolve the component into processed render values for one reference.
593    fn values<F: crate::render::format::OutputFormat<Output = String>>(
594        &self,
595        reference: &Reference,
596        hints: &ProcHints,
597        options: &RenderOptions<'_>,
598    ) -> Option<ProcValues<F::Output>>;
599}
600
601impl ComponentValues for TemplateComponent {
602    fn values<F: crate::render::format::OutputFormat<Output = String>>(
603        &self,
604        reference: &Reference,
605        hints: &ProcHints,
606        options: &RenderOptions<'_>,
607    ) -> Option<ProcValues<F::Output>> {
608        match self {
609            TemplateComponent::Contributor(c) => c.values::<F>(reference, hints, options),
610            TemplateComponent::Date(d) => d.values::<F>(reference, hints, options),
611            TemplateComponent::Title(t) => t.values::<F>(reference, hints, options),
612            TemplateComponent::Number(n) => n.values::<F>(reference, hints, options),
613            TemplateComponent::Variable(v) => v.values::<F>(reference, hints, options),
614            TemplateComponent::Group(l) => l.values::<F>(reference, hints, options),
615            TemplateComponent::Term(t) => t.values::<F>(reference, hints, options),
616            _ => None,
617        }
618    }
619}
620
621/// Check if periods should be stripped based on three-tier precedence.
622///
623/// Resolution order:
624/// 1. Component-level `strip_periods`
625/// 2. Global config `strip_periods`
626/// 3. Defaults to false
627#[must_use]
628pub fn should_strip_periods(
629    rendering: &citum_schema::template::Rendering,
630    options: &RenderOptions<'_>,
631) -> bool {
632    rendering
633        .strip_periods
634        .or(options.config.strip_periods)
635        .unwrap_or(false)
636}
637
638/// Strip trailing periods from a string.
639///
640/// Only removes periods at the end of the string, preserves internal periods
641/// (e.g., "Ph.D." remains unchanged if there's no trailing period).
642#[must_use]
643pub fn strip_trailing_periods(s: &str) -> String {
644    s.trim_end_matches('.').to_string()
645}
646
647/// Apply abbreviation substitution if the map contains an entry for `value`.
648///
649/// Returns the abbreviation if found, otherwise returns the original value unchanged.
650#[must_use]
651pub fn apply_abbreviation(value: String, map: Option<&crate::api::AbbreviationMap>) -> String {
652    if let Some(abbr) = map.and_then(|m| m.0.get(&value)) {
653        return abbr.clone();
654    }
655    value
656}