Skip to main content

citum_engine/values/
title.rs

1/*
2SPDX-License-Identifier: MIT OR Apache-2.0
3SPDX-FileCopyrightText: © 2023-2026 Bruce D'Arcus
4*/
5
6//! Rendering logic for title fields with smartening, form selection,
7//! and text-case transforms.
8
9use crate::reference::Reference;
10use crate::render::rich_text::render_djot_inline_with_transform;
11use crate::values::text_case::{self, apply_text_case, capitalize_first_word};
12use crate::values::{ComponentValues, ProcHints, ProcValues, RenderOptions};
13use citum_schema::options::titles::TextCase;
14use citum_schema::reference::ClassExtension;
15use citum_schema::reference::types::{StructuredTitle, Subtitle, Title};
16use citum_schema::template::{TemplateTitle, TitleForm, TitleType};
17
18/// Converts straight apostrophes and double quotes to curly quotes when the
19/// surrounding context is unambiguous.
20///
21/// Ambiguous characters are preserved as straight quotes so titles containing
22/// measurements or other non-quotation uses do not get rewritten arbitrarily.
23fn smarten_title_quotes(input: &str) -> String {
24    let mut out = String::with_capacity(input.len());
25    let mut it = input.char_indices().peekable();
26    let mut prev: Option<char> = None;
27    let mut open_single_quotes = 0usize;
28    let mut open_double_quotes = 0usize;
29
30    while let Some((_, ch)) = it.next() {
31        let next = it.peek().map(|(_, c)| *c);
32        let prev_is_alpha = prev.is_some_and(char::is_alphabetic);
33        let prev_is_digit = prev.is_some_and(|c| c.is_ascii_digit());
34        let prev_can_close_double_quote = prev.is_some_and(|c| {
35            c.is_alphanumeric() || matches!(c, '\'' | '"' | '\u{2019}' | '\u{201D}')
36        });
37        let next_is_alpha = next.is_some_and(char::is_alphabetic);
38        let next_is_digit = next.is_some_and(|c| c.is_ascii_digit());
39        let next_is_alnum = next.is_some_and(char::is_alphanumeric);
40        let prev_opens_quote =
41            prev.is_none_or(|c| c.is_whitespace() || "([{\u{2018}\u{201C}'\"".contains(c));
42        let next_closes_quote =
43            next.is_none_or(|c| c.is_whitespace() || ".,;:!?)]}\u{2019}\u{201D}'\"".contains(c));
44
45        match ch {
46            '\'' => {
47                if (prev_is_alpha && next_is_alpha) || (prev_opens_quote && next_is_digit) {
48                    out.push('\u{2019}');
49                } else if prev_opens_quote && next_is_alnum {
50                    out.push('\u{2018}');
51                    open_single_quotes += 1;
52                } else if (open_single_quotes > 0 || prev_is_alpha || prev_is_digit)
53                    && next_closes_quote
54                {
55                    out.push('\u{2019}');
56                    open_single_quotes = open_single_quotes.saturating_sub(1);
57                } else {
58                    out.push('\'');
59                }
60            }
61            '"' => {
62                if prev_opens_quote && next_is_alnum {
63                    out.push('\u{201C}');
64                    open_double_quotes += 1;
65                } else if open_double_quotes > 0 && prev_can_close_double_quote && next_closes_quote
66                {
67                    out.push('\u{201D}');
68                    open_double_quotes -= 1;
69                } else if prev_is_alpha && next_closes_quote {
70                    out.push('\u{201D}');
71                } else {
72                    out.push('"');
73                }
74            }
75            _ => out.push(ch),
76        }
77
78        prev = Some(ch);
79    }
80    out
81}
82
83fn title_text(title: &Title, form: Option<&TitleForm>) -> String {
84    match title {
85        Title::Shorthand(short, long) => {
86            if matches!(form, Some(TitleForm::Short)) {
87                short.clone()
88            } else {
89                long.clone()
90            }
91        }
92        Title::Single(s) => s.clone(),
93        _ => title.to_string(),
94    }
95}
96
97fn parent_short_title(reference: &Reference, title_type: &TitleType) -> Option<String> {
98    match title_type {
99        TitleType::ParentMonograph => {
100            if reference.ref_type() == "chapter" || reference.ref_type() == "paper-conference" {
101                reference.container_title().and_then(|t| match t {
102                    Title::Shorthand(short, _) => Some(short),
103                    Title::Single(s) => Some(s),
104                    _ => None,
105                })
106            } else {
107                None
108            }
109        }
110        TitleType::ParentSerial => {
111            if reference.ref_type().contains("article") || reference.ref_type() == "broadcast" {
112                reference.container_title().and_then(|t| match t {
113                    Title::Shorthand(short, _) => Some(short),
114                    Title::Single(s) => Some(s),
115                    _ => None,
116                })
117            } else {
118                None
119            }
120        }
121        _ => None,
122    }
123}
124
125fn looks_like_djot_markup(value: &str) -> bool {
126    value.contains('_')
127        || value.contains('*')
128        || value.contains("](")
129        || value.contains("{.")
130        || value.contains('`')
131}
132
133/// Build a text-transform closure that applies case transform then smart quotes.
134///
135/// The closure is used as the Djot text-leaf transform, so `.nocase` spans
136/// bypass it automatically via the rich-text renderer.
137fn make_case_transform(case: TextCase) -> impl FnMut(&str) -> String {
138    let mut seen_alpha = false;
139    move |text: &str| {
140        let cased = match case {
141            TextCase::Sentence | TextCase::SentenceApa | TextCase::SentenceNlm => {
142                let lowered = text.to_lowercase();
143                if seen_alpha {
144                    lowered
145                } else {
146                    // Capitalize the first alphabetic character we encounter
147                    let result = capitalize_first_word(&lowered);
148                    if result.chars().any(|c: char| c.is_alphabetic()) {
149                        seen_alpha = true;
150                    }
151                    result
152                }
153            }
154            _ => apply_text_case(text, case),
155        };
156        smarten_title_quotes(&cased)
157    }
158}
159
160/// Render a single title part through Djot with case transform + smart quotes.
161/// Returns (`rendered_value`, `has_explicit_link`).
162fn render_part_with_case<F: crate::render::format::OutputFormat<Output = String>>(
163    value: &str,
164    fmt: &F,
165    case: Option<TextCase>,
166) -> (String, bool) {
167    if looks_like_djot_markup(value) {
168        match case {
169            Some(tc) => render_djot_inline_with_transform(value, fmt, make_case_transform(tc)),
170            None => render_djot_inline_with_transform(value, fmt, smarten_title_quotes),
171        }
172    } else {
173        let result = match case {
174            Some(tc) => smarten_title_quotes(&apply_text_case(value, tc)),
175            None => smarten_title_quotes(value),
176        };
177        (result, false)
178    }
179}
180
181/// Render a structured title with per-part case transforms.
182///
183/// For `SentenceApa`, each subtitle gets sentence-case (first word capitalized).
184/// For `SentenceNlm`, subtitles are lowercased (no first-word capitalization).
185fn render_structured_title<F: crate::render::format::OutputFormat<Output = String>>(
186    st: &StructuredTitle,
187    fmt: &F,
188    case: Option<TextCase>,
189    short: bool,
190) -> (String, bool) {
191    let (main_rendered, has_link) = render_part_with_case(&st.main, fmt, case);
192    if short {
193        return (main_rendered, has_link);
194    }
195
196    let subtitle_case = case.map(|c| match c {
197        TextCase::SentenceNlm => TextCase::Lowercase,
198        other => other,
199    });
200
201    let mut parts = vec![main_rendered];
202    let mut has_link = has_link;
203
204    let subs: Vec<&str> = match &st.sub {
205        Subtitle::String(s) => vec![s.as_str()],
206        Subtitle::Vector(v) => v.iter().map(std::string::String::as_str).collect(),
207    };
208
209    for sub in subs {
210        let (sub_rendered, sub_link) = render_part_with_case(sub, fmt, subtitle_case);
211        has_link |= sub_link;
212        parts.push(sub_rendered);
213    }
214
215    (parts.join(": "), has_link)
216}
217
218/// Resolve the effective text-case for this title component.
219fn resolve_effective_text_case(
220    template: &TemplateTitle,
221    reference: &Reference,
222    options: &RenderOptions<'_>,
223) -> Option<TextCase> {
224    // 1. Template-level override takes precedence
225    if let Some(tc) = template.rendering.text_case {
226        return Some(apply_language_fallback(tc, reference));
227    }
228
229    // 2. Global title-category config
230    let ref_type = reference.ref_type();
231    let lang = reference.language();
232    let lang_str = lang.as_deref();
233
234    if let Some(rendering) = crate::render::component::get_title_category_rendering(
235        &template.title,
236        Some(&ref_type),
237        lang_str,
238        options.config,
239    ) && let Some(tc) = rendering.text_case
240    {
241        return Some(apply_language_fallback(tc, reference));
242    }
243
244    None
245}
246
247/// Apply language-aware fallback: non-English → as-is for English-specific transforms.
248fn apply_language_fallback(case: TextCase, reference: &Reference) -> TextCase {
249    let lang = reference.language();
250    text_case::resolve_text_case(case, lang.as_deref())
251}
252
253impl ComponentValues for TemplateTitle {
254    fn values<F: crate::render::format::OutputFormat<Output = String>>(
255        &self,
256        reference: &Reference,
257        hints: &ProcHints,
258        options: &RenderOptions<'_>,
259    ) -> Option<ProcValues<F::Output>> {
260        if self.disambiguate_only == Some(true) && hints.group_length <= 1 {
261            return None;
262        }
263
264        if matches!(self.form, Some(TitleForm::Short))
265            && let Some(short_title) = parent_short_title(reference, &self.title)
266            && !short_title.is_empty()
267        {
268            let (value, pre_formatted) = if looks_like_djot_markup(&short_title) {
269                let (v, _) = render_djot_inline_with_transform(
270                    &short_title,
271                    &F::default(),
272                    smarten_title_quotes,
273                );
274                (v, true)
275            } else {
276                (smarten_title_quotes(&short_title), false)
277            };
278            let value = crate::values::apply_abbreviation(value, options.abbreviation_map);
279            return Some(ProcValues {
280                value,
281                prefix: None,
282                suffix: None,
283                url: None,
284                substituted_key: None,
285                pre_formatted,
286            });
287        }
288
289        let title = resolve_primary_title(reference, &self.title)?;
290        let effective_case = resolve_effective_text_case(self, reference, options);
291        let (value, has_explicit_link, pre_formatted) =
292            render_title_variant::<F>(&title, self.form.as_ref(), effective_case, options);
293
294        if value.is_empty() {
295            return None;
296        }
297
298        use citum_schema::options::LinkAnchor;
299        let value = crate::values::apply_abbreviation(value, options.abbreviation_map);
300        let url = crate::values::resolve_effective_url(
301            self.links.as_ref(),
302            options.config.links.as_ref(),
303            reference,
304            LinkAnchor::Title,
305        );
306        Some(ProcValues {
307            value,
308            prefix: None,
309            suffix: None,
310            url: if has_explicit_link { None } else { url },
311            substituted_key: None,
312            pre_formatted,
313        })
314    }
315}
316
317/// Resolve which title field to render for the given `TitleType` and reference.
318fn resolve_primary_title(reference: &Reference, title_type: &TitleType) -> Option<Title> {
319    match title_type {
320        TitleType::Primary => reference.title(),
321        TitleType::ParentMonograph => match reference.extension() {
322            ClassExtension::Monograph(_)
323            | ClassExtension::CollectionComponent(_)
324            | ClassExtension::Event(_)
325            | ClassExtension::AudioVisual(_) => reference.container_title(),
326            _ => None,
327        },
328        TitleType::ParentSerial => match reference.extension() {
329            ClassExtension::SerialComponent(_)
330            | ClassExtension::LegalCase(_)
331            | ClassExtension::Treaty(_) => reference.container_title(),
332            _ => None,
333        },
334        _ => None,
335    }
336}
337
338/// Render a `Title` value into `(rendered_string, has_explicit_link, pre_formatted)`.
339///
340/// Handles structured, multilingual, and plain title variants with case transforms.
341fn render_title_variant<F: crate::render::format::OutputFormat<Output = String>>(
342    title: &Title,
343    form: Option<&TitleForm>,
344    effective_case: Option<TextCase>,
345    options: &RenderOptions<'_>,
346) -> (String, bool, bool) {
347    let fmt = F::default();
348    match title {
349        Title::Structured(st) => {
350            let short = matches!(form, Some(TitleForm::Short));
351            let (value, has_link) = render_structured_title(st, &fmt, effective_case, short);
352            let pre_formatted = if short {
353                looks_like_djot_markup(&st.main)
354            } else {
355                looks_like_djot_markup(&title_text(title, form))
356            };
357            (value, has_link, pre_formatted)
358        }
359        Title::Multilingual(m) => {
360            let (mode, preferred_transliteration, preferred_script) =
361                resolve_multilingual_title_config(options);
362            let complex = citum_schema::reference::types::MultilingualString::Complex(m.clone());
363            let value = crate::values::resolve_multilingual_string(
364                &complex,
365                mode,
366                preferred_transliteration,
367                preferred_script,
368                options.locale.locale.as_str(),
369            );
370            let (rendered, has_link) = render_part_with_case(&value, &fmt, effective_case);
371            let pre_formatted = looks_like_djot_markup(&value);
372            (rendered, has_link, pre_formatted)
373        }
374        _ => {
375            let value = title_text(title, form);
376            let (rendered, has_link) = render_part_with_case(&value, &fmt, effective_case);
377            let pre_formatted = looks_like_djot_markup(&value);
378            (rendered, has_link, pre_formatted)
379        }
380    }
381}
382
383/// Resolve multilingual title config (mode, transliteration, script) from render options.
384fn resolve_multilingual_title_config<'a>(
385    options: &'a RenderOptions<'a>,
386) -> (
387    Option<&'a citum_schema::options::MultilingualMode>,
388    Option<&'a [String]>,
389    Option<&'a String>,
390) {
391    let mode = options
392        .config
393        .multilingual
394        .as_ref()
395        .and_then(|ml| ml.title_mode.as_ref());
396    let preferred_transliteration = options
397        .config
398        .multilingual
399        .as_ref()
400        .and_then(|ml| ml.preferred_transliteration.as_deref());
401    let preferred_script = options
402        .config
403        .multilingual
404        .as_ref()
405        .and_then(|ml| ml.preferred_script.as_ref());
406    (mode, preferred_transliteration, preferred_script)
407}