Skip to main content

citum_engine/values/
title.rs

1/*
2SPDX-License-Identifier: MIT OR Apache-2.0
3SPDX-FileCopyrightText: © 2023-2026 Bruce D'Arcus and Citum contributors
4*/
5
6//! Rendering logic for title fields with smartening, form selection,
7//! and text-case transforms.
8
9use crate::reference::Reference;
10use crate::render::format::unicode_quote_marks;
11use crate::render::rich_text::{
12    InlineRenderContext, render_djot_inline_with_transform_and_context,
13};
14use crate::values::text_case::{self, apply_text_case, capitalize_first_word};
15use crate::values::{
16    ComponentValues, ProcHints, ProcValues, RenderOptions, effective_component_language,
17};
18use citum_schema::options::titles::TextCase;
19use citum_schema::reference::ClassExtension;
20use citum_schema::reference::types::{StructuredTitle, Subtitle, Title};
21use citum_schema::template::{TemplateComponent, TemplateTitle, TitleForm, TitleType};
22
23/// Converts straight apostrophes and double quotes to curly quotes when the
24/// surrounding context is unambiguous.
25///
26/// Ambiguous characters are preserved as straight quotes so titles containing
27/// measurements or other non-quotation uses do not get rewritten arbitrarily.
28fn smarten_title_quotes_at_depth(input: &str, quote_depth: usize) -> String {
29    let mut out = String::with_capacity(input.len());
30    let mut it = input.char_indices().peekable();
31    let mut prev: Option<char> = None;
32    let mut open_single_quotes = 0usize;
33    let mut open_double_quotes = 0usize;
34
35    while let Some((_, ch)) = it.next() {
36        let next = it.peek().map(|(_, c)| *c);
37        let prev_is_alpha = prev.is_some_and(char::is_alphabetic);
38        let prev_is_digit = prev.is_some_and(|c| c.is_ascii_digit());
39        let prev_can_close_double_quote = prev.is_some_and(|c| {
40            c.is_alphanumeric() || matches!(c, '\'' | '"' | '\u{2019}' | '\u{201D}')
41        });
42        let next_is_alpha = next.is_some_and(char::is_alphabetic);
43        let next_is_digit = next.is_some_and(|c| c.is_ascii_digit());
44        let next_is_alnum = next.is_some_and(char::is_alphanumeric);
45        let prev_opens_quote =
46            prev.is_none_or(|c| c.is_whitespace() || "([{\u{2018}\u{201C}'\"".contains(c));
47        let next_closes_quote =
48            next.is_none_or(|c| c.is_whitespace() || ".,;:!?)]}\u{2019}\u{201D}'\"".contains(c));
49
50        match ch {
51            '\'' => {
52                let (open_quote, close_quote) = unicode_quote_marks(quote_depth + 1);
53                if (prev_is_alpha && next_is_alpha) || (prev_opens_quote && next_is_digit) {
54                    out.push('\u{2019}');
55                } else if prev_opens_quote && next_is_alnum {
56                    out.push_str(open_quote);
57                    open_single_quotes += 1;
58                } else if (open_single_quotes > 0 || prev_is_alpha || prev_is_digit)
59                    && next_closes_quote
60                {
61                    out.push_str(close_quote);
62                    open_single_quotes = open_single_quotes.saturating_sub(1);
63                } else {
64                    out.push('\'');
65                }
66            }
67            '"' => {
68                let (open_quote, close_quote) =
69                    unicode_quote_marks(quote_depth + open_double_quotes);
70                if prev_opens_quote && next_is_alnum {
71                    out.push_str(open_quote);
72                    open_double_quotes += 1;
73                } else if open_double_quotes > 0 && prev_can_close_double_quote && next_closes_quote
74                {
75                    let close_depth = quote_depth + open_double_quotes.saturating_sub(1);
76                    let (_, close_quote) = unicode_quote_marks(close_depth);
77                    out.push_str(close_quote);
78                    open_double_quotes -= 1;
79                } else if prev_is_alpha && next_closes_quote {
80                    out.push_str(close_quote);
81                } else {
82                    out.push('"');
83                }
84            }
85            _ => out.push(ch),
86        }
87
88        prev = Some(ch);
89    }
90    out
91}
92
93fn title_text(title: &Title, form: Option<&TitleForm>) -> String {
94    match title {
95        Title::Shorthand(short, long) => {
96            if matches!(form, Some(TitleForm::Short)) {
97                short.clone()
98            } else {
99                long.clone()
100            }
101        }
102        Title::Single(s) => s.clone(),
103        _ => title.to_string(),
104    }
105}
106
107fn parent_short_title(reference: &Reference, title_type: &TitleType) -> Option<String> {
108    match title_type {
109        TitleType::ContainerTitle => reference.container_title().and_then(|t| match t {
110            Title::Shorthand(short, _) => Some(short),
111            Title::Single(s) => Some(s),
112            _ => None,
113        }),
114        TitleType::ParentMonograph => {
115            if reference.ref_type() == "chapter" || reference.ref_type() == "paper-conference" {
116                reference.container_title().and_then(|t| match t {
117                    Title::Shorthand(short, _) => Some(short),
118                    Title::Single(s) => Some(s),
119                    _ => None,
120                })
121            } else {
122                None
123            }
124        }
125        TitleType::ParentSerial => {
126            if reference.ref_type().contains("article") || reference.ref_type() == "broadcast" {
127                reference.container_title().and_then(|t| match t {
128                    Title::Shorthand(short, _) => Some(short),
129                    Title::Single(s) => Some(s),
130                    _ => None,
131                })
132            } else {
133                None
134            }
135        }
136        TitleType::CollectionTitle => reference.collection_title().and_then(|t| match t {
137            Title::Shorthand(short, _) => Some(short),
138            Title::Single(s) => Some(s),
139            _ => None,
140        }),
141        _ => None,
142    }
143}
144
145fn looks_like_djot_markup(value: &str) -> bool {
146    value.contains('_')
147        || value.contains('*')
148        || value.contains("](")
149        || value.contains("{.")
150        || value.contains('`')
151}
152
153/// Build a text-transform closure that applies case transform then smart quotes.
154///
155/// The closure is used as the Djot text-leaf transform, so `.nocase` spans
156/// bypass it automatically via the rich-text renderer.
157fn make_case_transform(case: TextCase, quote_depth: usize) -> impl FnMut(&str) -> String {
158    let mut seen_alpha = false;
159    move |text: &str| {
160        let cased = match case {
161            TextCase::Sentence | TextCase::SentenceApa | TextCase::SentenceNlm => {
162                let lowered = text.to_lowercase();
163                if seen_alpha {
164                    lowered
165                } else {
166                    // Capitalize the first alphabetic character we encounter
167                    let result = capitalize_first_word(&lowered);
168                    if result.chars().any(|c: char| c.is_alphabetic()) {
169                        seen_alpha = true;
170                    }
171                    result
172                }
173            }
174            _ => apply_text_case(text, case),
175        };
176        smarten_title_quotes_at_depth(&cased, quote_depth)
177    }
178}
179
180/// Render a single title part through Djot with case transform + smart quotes.
181/// Returns (`rendered_value`, `has_explicit_link`).
182fn render_part_with_case<F: crate::render::format::OutputFormat<Output = String>>(
183    value: &str,
184    fmt: &F,
185    case: Option<TextCase>,
186    quote_depth: usize,
187) -> (String, bool) {
188    let context = InlineRenderContext { quote_depth };
189    if looks_like_djot_markup(value) {
190        match case {
191            Some(tc) => render_djot_inline_with_transform_and_context(
192                value,
193                fmt,
194                context,
195                make_case_transform(tc, quote_depth),
196            ),
197            None => {
198                render_djot_inline_with_transform_and_context(value, fmt, context, move |text| {
199                    smarten_title_quotes_at_depth(text, quote_depth)
200                })
201            }
202        }
203    } else {
204        let result = match case {
205            Some(tc) => smarten_title_quotes_at_depth(&apply_text_case(value, tc), quote_depth),
206            None => smarten_title_quotes_at_depth(value, quote_depth),
207        };
208        (result, false)
209    }
210}
211
212/// Render a structured title with per-part case transforms.
213///
214/// For `SentenceApa`, each subtitle gets sentence-case (first word capitalized).
215/// For `SentenceNlm`, subtitles are lowercased (no first-word capitalization).
216fn render_structured_title<F: crate::render::format::OutputFormat<Output = String>>(
217    st: &StructuredTitle,
218    fmt: &F,
219    case: Option<TextCase>,
220    short: bool,
221    quote_depth: usize,
222) -> (String, bool) {
223    let (main_rendered, has_link) = render_part_with_case(&st.main, fmt, case, quote_depth);
224    if short {
225        return (main_rendered, has_link);
226    }
227
228    let subtitle_case = case.map(|c| match c {
229        TextCase::SentenceNlm => TextCase::Lowercase,
230        other => other,
231    });
232
233    let mut parts = vec![main_rendered];
234    let mut has_link = has_link;
235
236    let subs: Vec<&str> = match &st.sub {
237        Subtitle::String(s) => vec![s.as_str()],
238        Subtitle::Vector(v) => v.iter().map(std::string::String::as_str).collect(),
239    };
240
241    for sub in subs {
242        let (sub_rendered, sub_link) = render_part_with_case(sub, fmt, subtitle_case, quote_depth);
243        has_link |= sub_link;
244        parts.push(sub_rendered);
245    }
246
247    (parts.join(": "), has_link)
248}
249
250/// Resolve the effective text-case for this title component.
251fn resolve_effective_text_case(
252    template: &TemplateTitle,
253    reference: &Reference,
254    options: &RenderOptions<'_>,
255) -> Option<TextCase> {
256    // 1. Template-level override takes precedence
257    if let Some(tc) = template.rendering.text_case {
258        return Some(apply_language_fallback(tc, reference));
259    }
260
261    // 2. Global title-category config
262    let ref_type = reference.ref_type();
263    let lang = reference.language();
264    let lang_str = lang.as_deref();
265
266    if let Some(rendering) = crate::render::component::get_title_category_rendering(
267        &template.title,
268        Some(&ref_type),
269        lang_str,
270        options.config,
271    ) && let Some(tc) = rendering.text_case
272    {
273        return Some(apply_language_fallback(tc, reference));
274    }
275
276    None
277}
278
279fn effective_title_quote_depth(
280    template: &TemplateTitle,
281    reference: &Reference,
282    options: &RenderOptions<'_>,
283) -> usize {
284    let component = TemplateComponent::Title(template.clone());
285    let item_language = effective_component_language(reference, &component);
286    let mut rendering = crate::render::component::get_title_category_rendering(
287        &template.title,
288        options.ref_type.as_deref(),
289        item_language.as_deref(),
290        options.config,
291    )
292    .unwrap_or_default();
293    rendering.merge(&template.rendering);
294    usize::from(rendering.quote == Some(true))
295}
296
297/// Apply language-aware fallback: non-English → as-is for English-specific transforms.
298fn apply_language_fallback(case: TextCase, reference: &Reference) -> TextCase {
299    let lang = reference.language();
300    text_case::resolve_text_case(case, lang.as_deref())
301}
302
303impl ComponentValues for TemplateTitle {
304    fn values<F: crate::render::format::OutputFormat<Output = String>>(
305        &self,
306        reference: &Reference,
307        hints: &ProcHints,
308        options: &RenderOptions<'_>,
309    ) -> Option<ProcValues<F::Output>> {
310        if self.disambiguate_only == Some(true)
311            && (hints.group_length <= 1 || hints.suppress_disambiguation_title)
312        {
313            return None;
314        }
315
316        let quote_depth = effective_title_quote_depth(self, reference, options);
317
318        if matches!(self.form, Some(TitleForm::Short))
319            && let Some(short_title) = parent_short_title(reference, &self.title)
320            && !short_title.is_empty()
321        {
322            let (value, pre_formatted) = if looks_like_djot_markup(&short_title) {
323                let (v, _) = render_djot_inline_with_transform_and_context(
324                    &short_title,
325                    &F::default(),
326                    InlineRenderContext { quote_depth },
327                    move |text| smarten_title_quotes_at_depth(text, quote_depth),
328                );
329                (v, true)
330            } else {
331                (
332                    smarten_title_quotes_at_depth(&short_title, quote_depth),
333                    false,
334                )
335            };
336            let value = crate::values::apply_abbreviation(value, options.abbreviation_map);
337            return Some(ProcValues {
338                value,
339                prefix: None,
340                suffix: None,
341                url: None,
342                substituted_key: None,
343                pre_formatted,
344            });
345        }
346
347        let title = resolve_primary_title(reference, &self.title)?;
348        let effective_case = resolve_effective_text_case(self, reference, options);
349        let (value, has_explicit_link, pre_formatted) = render_title_variant::<F>(
350            &title,
351            self.form.as_ref(),
352            effective_case,
353            options,
354            quote_depth,
355        );
356
357        if value.is_empty() {
358            return None;
359        }
360
361        use citum_schema::options::LinkAnchor;
362        let value = crate::values::apply_abbreviation(value, options.abbreviation_map);
363        let url = crate::values::resolve_effective_url(
364            self.links.as_ref(),
365            options.config.links.as_ref(),
366            reference,
367            LinkAnchor::Title,
368        );
369        Some(ProcValues {
370            value,
371            prefix: None,
372            suffix: None,
373            url: if has_explicit_link { None } else { url },
374            substituted_key: None,
375            pre_formatted,
376        })
377    }
378}
379
380/// Resolve which title field to render for the given `TitleType` and reference.
381fn resolve_primary_title(reference: &Reference, title_type: &TitleType) -> Option<Title> {
382    match title_type {
383        TitleType::Primary => reference.title(),
384        TitleType::ContainerTitle => reference.container_title(),
385        TitleType::ParentMonograph => match reference.extension() {
386            ClassExtension::Monograph(_)
387            | ClassExtension::CollectionComponent(_)
388            | ClassExtension::Event(_)
389            | ClassExtension::AudioVisual(_) => reference.container_title(),
390            _ => None,
391        },
392        TitleType::ParentSerial => match reference.extension() {
393            ClassExtension::SerialComponent(_)
394            | ClassExtension::LegalCase(_)
395            | ClassExtension::Treaty(_) => reference.container_title(),
396            _ => None,
397        },
398        TitleType::CollectionTitle => reference.collection_title(),
399        _ => None,
400    }
401}
402
403/// Render a `Title` value into `(rendered_string, has_explicit_link, pre_formatted)`.
404///
405/// Handles structured, multilingual, and plain title variants with case transforms.
406fn render_title_variant<F: crate::render::format::OutputFormat<Output = String>>(
407    title: &Title,
408    form: Option<&TitleForm>,
409    effective_case: Option<TextCase>,
410    options: &RenderOptions<'_>,
411    quote_depth: usize,
412) -> (String, bool, bool) {
413    let fmt = F::default();
414    match title {
415        Title::Structured(st) => {
416            let short = matches!(form, Some(TitleForm::Short));
417            let (value, has_link) =
418                render_structured_title(st, &fmt, effective_case, short, quote_depth);
419            let pre_formatted = if short {
420                looks_like_djot_markup(&st.main)
421            } else {
422                looks_like_djot_markup(&title_text(title, form))
423            };
424            (value, has_link, pre_formatted)
425        }
426        Title::Multilingual(m) => {
427            let (mode, preferred_transliteration, preferred_script) =
428                resolve_multilingual_title_config(options);
429            let complex = citum_schema::reference::types::MultilingualString::Complex(m.clone());
430            let value = crate::values::resolve_multilingual_string(
431                &complex,
432                mode,
433                preferred_transliteration,
434                preferred_script,
435                options.locale.locale.as_str(),
436            );
437            let (rendered, has_link) =
438                render_part_with_case(&value, &fmt, effective_case, quote_depth);
439            let pre_formatted = looks_like_djot_markup(&value);
440            (rendered, has_link, pre_formatted)
441        }
442        _ => {
443            let value = title_text(title, form);
444            let (rendered, has_link) =
445                render_part_with_case(&value, &fmt, effective_case, quote_depth);
446            let pre_formatted = looks_like_djot_markup(&value);
447            (rendered, has_link, pre_formatted)
448        }
449    }
450}
451
452/// Resolve multilingual title config (mode, transliteration, script) from render options.
453fn resolve_multilingual_title_config<'a>(
454    options: &'a RenderOptions<'a>,
455) -> (
456    Option<&'a citum_schema::options::MultilingualMode>,
457    Option<&'a [String]>,
458    Option<&'a String>,
459) {
460    let mode = options
461        .config
462        .multilingual
463        .as_ref()
464        .and_then(|ml| ml.title_mode.as_ref());
465    let preferred_transliteration = options
466        .config
467        .multilingual
468        .as_ref()
469        .and_then(|ml| ml.preferred_transliteration.as_deref());
470    let preferred_script = options
471        .config
472        .multilingual
473        .as_ref()
474        .and_then(|ml| ml.preferred_script.as_ref());
475    (mode, preferred_transliteration, preferred_script)
476}