Skip to main content

citum_engine/values/
title.rs

1/*
2SPDX-License-Identifier: MIT OR Apache-2.0
3SPDX-FileCopyrightText: © 2023-2026 Bruce D'Arcus and Citum contributors
4*/
5
6//! Rendering logic for title fields with smartening, form selection,
7//! and text-case transforms.
8
9use crate::reference::Reference;
10use crate::render::format::unicode_quote_marks;
11use crate::render::rich_text::{
12    InlineRenderContext, render_djot_inline_with_transform_and_context,
13};
14use crate::values::text_case::{self, apply_text_case, capitalize_first_word};
15use crate::values::{
16    ComponentValues, ProcHints, ProcValues, RenderOptions, effective_component_language,
17};
18use citum_schema::options::titles::TextCase;
19use citum_schema::reference::ClassExtension;
20use citum_schema::reference::types::{StructuredTitle, Subtitle, Title};
21use citum_schema::template::{TemplateComponent, TemplateTitle, TitleForm, TitleType};
22
23/// Converts straight apostrophes and double quotes to curly quotes when the
24/// surrounding context is unambiguous.
25///
26/// Ambiguous characters are preserved as straight quotes so titles containing
27/// measurements or other non-quotation uses do not get rewritten arbitrarily.
28fn smarten_title_quotes_at_depth(input: &str, quote_depth: usize) -> String {
29    let mut out = String::with_capacity(input.len());
30    let mut it = input.char_indices().peekable();
31    let mut prev: Option<char> = None;
32    let mut open_single_quotes = 0usize;
33    let mut open_double_quotes = 0usize;
34
35    while let Some((_, ch)) = it.next() {
36        let next = it.peek().map(|(_, c)| *c);
37        let prev_is_alpha = prev.is_some_and(char::is_alphabetic);
38        let prev_is_digit = prev.is_some_and(|c| c.is_ascii_digit());
39        let prev_can_close_double_quote = prev.is_some_and(|c| {
40            c.is_alphanumeric() || matches!(c, '\'' | '"' | '\u{2019}' | '\u{201D}')
41        });
42        let next_is_alpha = next.is_some_and(char::is_alphabetic);
43        let next_is_digit = next.is_some_and(|c| c.is_ascii_digit());
44        let next_is_alnum = next.is_some_and(char::is_alphanumeric);
45        let prev_opens_quote =
46            prev.is_none_or(|c| c.is_whitespace() || "([{\u{2018}\u{201C}'\"".contains(c));
47        let next_closes_quote =
48            next.is_none_or(|c| c.is_whitespace() || ".,;:!?)]}\u{2019}\u{201D}'\"".contains(c));
49
50        match ch {
51            '\'' => {
52                let (open_quote, close_quote) = unicode_quote_marks(quote_depth + 1);
53                if (prev_is_alpha && next_is_alpha) || (prev_opens_quote && next_is_digit) {
54                    out.push('\u{2019}');
55                } else if prev_opens_quote && next_is_alnum {
56                    out.push_str(open_quote);
57                    open_single_quotes += 1;
58                } else if (open_single_quotes > 0 || prev_is_alpha || prev_is_digit)
59                    && next_closes_quote
60                {
61                    out.push_str(close_quote);
62                    open_single_quotes = open_single_quotes.saturating_sub(1);
63                } else {
64                    out.push('\'');
65                }
66            }
67            '"' => {
68                let (open_quote, close_quote) =
69                    unicode_quote_marks(quote_depth + open_double_quotes);
70                if prev_opens_quote && next_is_alnum {
71                    out.push_str(open_quote);
72                    open_double_quotes += 1;
73                } else if open_double_quotes > 0 && prev_can_close_double_quote && next_closes_quote
74                {
75                    let close_depth = quote_depth + open_double_quotes.saturating_sub(1);
76                    let (_, close_quote) = unicode_quote_marks(close_depth);
77                    out.push_str(close_quote);
78                    open_double_quotes -= 1;
79                } else if prev_is_alpha && next_closes_quote {
80                    out.push_str(close_quote);
81                } else {
82                    out.push('"');
83                }
84            }
85            _ => out.push(ch),
86        }
87
88        prev = Some(ch);
89    }
90    out
91}
92
93fn title_text(title: &Title, form: Option<&TitleForm>) -> String {
94    match title {
95        Title::Shorthand(short, long) => {
96            if matches!(form, Some(TitleForm::Short)) {
97                short.clone()
98            } else {
99                long.clone()
100            }
101        }
102        Title::Single(s) => s.clone(),
103        _ => title.to_string(),
104    }
105}
106
107fn parent_short_title(reference: &Reference, title_type: &TitleType) -> Option<String> {
108    match title_type {
109        TitleType::ParentMonograph => {
110            if reference.ref_type() == "chapter" || reference.ref_type() == "paper-conference" {
111                reference.container_title().and_then(|t| match t {
112                    Title::Shorthand(short, _) => Some(short),
113                    Title::Single(s) => Some(s),
114                    _ => None,
115                })
116            } else {
117                None
118            }
119        }
120        TitleType::ParentSerial => {
121            if reference.ref_type().contains("article") || reference.ref_type() == "broadcast" {
122                reference.container_title().and_then(|t| match t {
123                    Title::Shorthand(short, _) => Some(short),
124                    Title::Single(s) => Some(s),
125                    _ => None,
126                })
127            } else {
128                None
129            }
130        }
131        _ => None,
132    }
133}
134
135fn looks_like_djot_markup(value: &str) -> bool {
136    value.contains('_')
137        || value.contains('*')
138        || value.contains("](")
139        || value.contains("{.")
140        || value.contains('`')
141}
142
143/// Build a text-transform closure that applies case transform then smart quotes.
144///
145/// The closure is used as the Djot text-leaf transform, so `.nocase` spans
146/// bypass it automatically via the rich-text renderer.
147fn make_case_transform(case: TextCase, quote_depth: usize) -> impl FnMut(&str) -> String {
148    let mut seen_alpha = false;
149    move |text: &str| {
150        let cased = match case {
151            TextCase::Sentence | TextCase::SentenceApa | TextCase::SentenceNlm => {
152                let lowered = text.to_lowercase();
153                if seen_alpha {
154                    lowered
155                } else {
156                    // Capitalize the first alphabetic character we encounter
157                    let result = capitalize_first_word(&lowered);
158                    if result.chars().any(|c: char| c.is_alphabetic()) {
159                        seen_alpha = true;
160                    }
161                    result
162                }
163            }
164            _ => apply_text_case(text, case),
165        };
166        smarten_title_quotes_at_depth(&cased, quote_depth)
167    }
168}
169
170/// Render a single title part through Djot with case transform + smart quotes.
171/// Returns (`rendered_value`, `has_explicit_link`).
172fn render_part_with_case<F: crate::render::format::OutputFormat<Output = String>>(
173    value: &str,
174    fmt: &F,
175    case: Option<TextCase>,
176    quote_depth: usize,
177) -> (String, bool) {
178    let context = InlineRenderContext { quote_depth };
179    if looks_like_djot_markup(value) {
180        match case {
181            Some(tc) => render_djot_inline_with_transform_and_context(
182                value,
183                fmt,
184                context,
185                make_case_transform(tc, quote_depth),
186            ),
187            None => {
188                render_djot_inline_with_transform_and_context(value, fmt, context, move |text| {
189                    smarten_title_quotes_at_depth(text, quote_depth)
190                })
191            }
192        }
193    } else {
194        let result = match case {
195            Some(tc) => smarten_title_quotes_at_depth(&apply_text_case(value, tc), quote_depth),
196            None => smarten_title_quotes_at_depth(value, quote_depth),
197        };
198        (result, false)
199    }
200}
201
202/// Render a structured title with per-part case transforms.
203///
204/// For `SentenceApa`, each subtitle gets sentence-case (first word capitalized).
205/// For `SentenceNlm`, subtitles are lowercased (no first-word capitalization).
206fn render_structured_title<F: crate::render::format::OutputFormat<Output = String>>(
207    st: &StructuredTitle,
208    fmt: &F,
209    case: Option<TextCase>,
210    short: bool,
211    quote_depth: usize,
212) -> (String, bool) {
213    let (main_rendered, has_link) = render_part_with_case(&st.main, fmt, case, quote_depth);
214    if short {
215        return (main_rendered, has_link);
216    }
217
218    let subtitle_case = case.map(|c| match c {
219        TextCase::SentenceNlm => TextCase::Lowercase,
220        other => other,
221    });
222
223    let mut parts = vec![main_rendered];
224    let mut has_link = has_link;
225
226    let subs: Vec<&str> = match &st.sub {
227        Subtitle::String(s) => vec![s.as_str()],
228        Subtitle::Vector(v) => v.iter().map(std::string::String::as_str).collect(),
229    };
230
231    for sub in subs {
232        let (sub_rendered, sub_link) = render_part_with_case(sub, fmt, subtitle_case, quote_depth);
233        has_link |= sub_link;
234        parts.push(sub_rendered);
235    }
236
237    (parts.join(": "), has_link)
238}
239
240/// Resolve the effective text-case for this title component.
241fn resolve_effective_text_case(
242    template: &TemplateTitle,
243    reference: &Reference,
244    options: &RenderOptions<'_>,
245) -> Option<TextCase> {
246    // 1. Template-level override takes precedence
247    if let Some(tc) = template.rendering.text_case {
248        return Some(apply_language_fallback(tc, reference));
249    }
250
251    // 2. Global title-category config
252    let ref_type = reference.ref_type();
253    let lang = reference.language();
254    let lang_str = lang.as_deref();
255
256    if let Some(rendering) = crate::render::component::get_title_category_rendering(
257        &template.title,
258        Some(&ref_type),
259        lang_str,
260        options.config,
261    ) && let Some(tc) = rendering.text_case
262    {
263        return Some(apply_language_fallback(tc, reference));
264    }
265
266    None
267}
268
269fn effective_title_quote_depth(
270    template: &TemplateTitle,
271    reference: &Reference,
272    options: &RenderOptions<'_>,
273) -> usize {
274    let component = TemplateComponent::Title(template.clone());
275    let item_language = effective_component_language(reference, &component);
276    let mut rendering = crate::render::component::get_title_category_rendering(
277        &template.title,
278        options.ref_type.as_deref(),
279        item_language.as_deref(),
280        options.config,
281    )
282    .unwrap_or_default();
283    rendering.merge(&template.rendering);
284    usize::from(rendering.quote == Some(true))
285}
286
287/// Apply language-aware fallback: non-English → as-is for English-specific transforms.
288fn apply_language_fallback(case: TextCase, reference: &Reference) -> TextCase {
289    let lang = reference.language();
290    text_case::resolve_text_case(case, lang.as_deref())
291}
292
293impl ComponentValues for TemplateTitle {
294    fn values<F: crate::render::format::OutputFormat<Output = String>>(
295        &self,
296        reference: &Reference,
297        hints: &ProcHints,
298        options: &RenderOptions<'_>,
299    ) -> Option<ProcValues<F::Output>> {
300        if self.disambiguate_only == Some(true)
301            && (hints.group_length <= 1 || hints.suppress_disambiguation_title)
302        {
303            return None;
304        }
305
306        let quote_depth = effective_title_quote_depth(self, reference, options);
307
308        if matches!(self.form, Some(TitleForm::Short))
309            && let Some(short_title) = parent_short_title(reference, &self.title)
310            && !short_title.is_empty()
311        {
312            let (value, pre_formatted) = if looks_like_djot_markup(&short_title) {
313                let (v, _) = render_djot_inline_with_transform_and_context(
314                    &short_title,
315                    &F::default(),
316                    InlineRenderContext { quote_depth },
317                    move |text| smarten_title_quotes_at_depth(text, quote_depth),
318                );
319                (v, true)
320            } else {
321                (
322                    smarten_title_quotes_at_depth(&short_title, quote_depth),
323                    false,
324                )
325            };
326            let value = crate::values::apply_abbreviation(value, options.abbreviation_map);
327            return Some(ProcValues {
328                value,
329                prefix: None,
330                suffix: None,
331                url: None,
332                substituted_key: None,
333                pre_formatted,
334            });
335        }
336
337        let title = resolve_primary_title(reference, &self.title)?;
338        let effective_case = resolve_effective_text_case(self, reference, options);
339        let (value, has_explicit_link, pre_formatted) = render_title_variant::<F>(
340            &title,
341            self.form.as_ref(),
342            effective_case,
343            options,
344            quote_depth,
345        );
346
347        if value.is_empty() {
348            return None;
349        }
350
351        use citum_schema::options::LinkAnchor;
352        let value = crate::values::apply_abbreviation(value, options.abbreviation_map);
353        let url = crate::values::resolve_effective_url(
354            self.links.as_ref(),
355            options.config.links.as_ref(),
356            reference,
357            LinkAnchor::Title,
358        );
359        Some(ProcValues {
360            value,
361            prefix: None,
362            suffix: None,
363            url: if has_explicit_link { None } else { url },
364            substituted_key: None,
365            pre_formatted,
366        })
367    }
368}
369
370/// Resolve which title field to render for the given `TitleType` and reference.
371fn resolve_primary_title(reference: &Reference, title_type: &TitleType) -> Option<Title> {
372    match title_type {
373        TitleType::Primary => reference.title(),
374        TitleType::ParentMonograph => match reference.extension() {
375            ClassExtension::Monograph(_)
376            | ClassExtension::CollectionComponent(_)
377            | ClassExtension::Event(_)
378            | ClassExtension::AudioVisual(_) => reference.container_title(),
379            _ => None,
380        },
381        TitleType::ParentSerial => match reference.extension() {
382            ClassExtension::SerialComponent(_)
383            | ClassExtension::LegalCase(_)
384            | ClassExtension::Treaty(_) => reference.container_title(),
385            _ => None,
386        },
387        _ => None,
388    }
389}
390
391/// Render a `Title` value into `(rendered_string, has_explicit_link, pre_formatted)`.
392///
393/// Handles structured, multilingual, and plain title variants with case transforms.
394fn render_title_variant<F: crate::render::format::OutputFormat<Output = String>>(
395    title: &Title,
396    form: Option<&TitleForm>,
397    effective_case: Option<TextCase>,
398    options: &RenderOptions<'_>,
399    quote_depth: usize,
400) -> (String, bool, bool) {
401    let fmt = F::default();
402    match title {
403        Title::Structured(st) => {
404            let short = matches!(form, Some(TitleForm::Short));
405            let (value, has_link) =
406                render_structured_title(st, &fmt, effective_case, short, quote_depth);
407            let pre_formatted = if short {
408                looks_like_djot_markup(&st.main)
409            } else {
410                looks_like_djot_markup(&title_text(title, form))
411            };
412            (value, has_link, pre_formatted)
413        }
414        Title::Multilingual(m) => {
415            let (mode, preferred_transliteration, preferred_script) =
416                resolve_multilingual_title_config(options);
417            let complex = citum_schema::reference::types::MultilingualString::Complex(m.clone());
418            let value = crate::values::resolve_multilingual_string(
419                &complex,
420                mode,
421                preferred_transliteration,
422                preferred_script,
423                options.locale.locale.as_str(),
424            );
425            let (rendered, has_link) =
426                render_part_with_case(&value, &fmt, effective_case, quote_depth);
427            let pre_formatted = looks_like_djot_markup(&value);
428            (rendered, has_link, pre_formatted)
429        }
430        _ => {
431            let value = title_text(title, form);
432            let (rendered, has_link) =
433                render_part_with_case(&value, &fmt, effective_case, quote_depth);
434            let pre_formatted = looks_like_djot_markup(&value);
435            (rendered, has_link, pre_formatted)
436        }
437    }
438}
439
440/// Resolve multilingual title config (mode, transliteration, script) from render options.
441fn resolve_multilingual_title_config<'a>(
442    options: &'a RenderOptions<'a>,
443) -> (
444    Option<&'a citum_schema::options::MultilingualMode>,
445    Option<&'a [String]>,
446    Option<&'a String>,
447) {
448    let mode = options
449        .config
450        .multilingual
451        .as_ref()
452        .and_then(|ml| ml.title_mode.as_ref());
453    let preferred_transliteration = options
454        .config
455        .multilingual
456        .as_ref()
457        .and_then(|ml| ml.preferred_transliteration.as_deref());
458    let preferred_script = options
459        .config
460        .multilingual
461        .as_ref()
462        .and_then(|ml| ml.preferred_script.as_ref());
463    (mode, preferred_transliteration, preferred_script)
464}