Skip to main content

citum_engine/values/
title.rs

1/*
2SPDX-License-Identifier: MIT OR Apache-2.0
3SPDX-FileCopyrightText: © 2023-2026 Bruce D'Arcus
4*/
5
6//! Rendering logic for title fields with smartening, form selection,
7//! and text-case transforms.
8
9use crate::reference::Reference;
10use crate::render::format::unicode_quote_marks;
11use crate::render::rich_text::{
12    InlineRenderContext, render_djot_inline_with_transform_and_context,
13};
14use crate::values::text_case::{self, apply_text_case, capitalize_first_word};
15use crate::values::{
16    ComponentValues, ProcHints, ProcValues, RenderOptions, effective_component_language,
17};
18use citum_schema::options::titles::TextCase;
19use citum_schema::reference::ClassExtension;
20use citum_schema::reference::types::{StructuredTitle, Subtitle, Title};
21use citum_schema::template::{TemplateComponent, TemplateTitle, TitleForm, TitleType};
22
23/// Converts straight apostrophes and double quotes to curly quotes when the
24/// surrounding context is unambiguous.
25///
26/// Ambiguous characters are preserved as straight quotes so titles containing
27/// measurements or other non-quotation uses do not get rewritten arbitrarily.
28fn smarten_title_quotes_at_depth(input: &str, quote_depth: usize) -> String {
29    let mut out = String::with_capacity(input.len());
30    let mut it = input.char_indices().peekable();
31    let mut prev: Option<char> = None;
32    let mut open_single_quotes = 0usize;
33    let mut open_double_quotes = 0usize;
34
35    while let Some((_, ch)) = it.next() {
36        let next = it.peek().map(|(_, c)| *c);
37        let prev_is_alpha = prev.is_some_and(char::is_alphabetic);
38        let prev_is_digit = prev.is_some_and(|c| c.is_ascii_digit());
39        let prev_can_close_double_quote = prev.is_some_and(|c| {
40            c.is_alphanumeric() || matches!(c, '\'' | '"' | '\u{2019}' | '\u{201D}')
41        });
42        let next_is_alpha = next.is_some_and(char::is_alphabetic);
43        let next_is_digit = next.is_some_and(|c| c.is_ascii_digit());
44        let next_is_alnum = next.is_some_and(char::is_alphanumeric);
45        let prev_opens_quote =
46            prev.is_none_or(|c| c.is_whitespace() || "([{\u{2018}\u{201C}'\"".contains(c));
47        let next_closes_quote =
48            next.is_none_or(|c| c.is_whitespace() || ".,;:!?)]}\u{2019}\u{201D}'\"".contains(c));
49
50        match ch {
51            '\'' => {
52                let (open_quote, close_quote) = unicode_quote_marks(quote_depth + 1);
53                if (prev_is_alpha && next_is_alpha) || (prev_opens_quote && next_is_digit) {
54                    out.push('\u{2019}');
55                } else if prev_opens_quote && next_is_alnum {
56                    out.push_str(open_quote);
57                    open_single_quotes += 1;
58                } else if (open_single_quotes > 0 || prev_is_alpha || prev_is_digit)
59                    && next_closes_quote
60                {
61                    out.push_str(close_quote);
62                    open_single_quotes = open_single_quotes.saturating_sub(1);
63                } else {
64                    out.push('\'');
65                }
66            }
67            '"' => {
68                let (open_quote, close_quote) =
69                    unicode_quote_marks(quote_depth + open_double_quotes);
70                if prev_opens_quote && next_is_alnum {
71                    out.push_str(open_quote);
72                    open_double_quotes += 1;
73                } else if open_double_quotes > 0 && prev_can_close_double_quote && next_closes_quote
74                {
75                    let close_depth = quote_depth + open_double_quotes.saturating_sub(1);
76                    let (_, close_quote) = unicode_quote_marks(close_depth);
77                    out.push_str(close_quote);
78                    open_double_quotes -= 1;
79                } else if prev_is_alpha && next_closes_quote {
80                    out.push_str(close_quote);
81                } else {
82                    out.push('"');
83                }
84            }
85            _ => out.push(ch),
86        }
87
88        prev = Some(ch);
89    }
90    out
91}
92
93fn title_text(title: &Title, form: Option<&TitleForm>) -> String {
94    match title {
95        Title::Shorthand(short, long) => {
96            if matches!(form, Some(TitleForm::Short)) {
97                short.clone()
98            } else {
99                long.clone()
100            }
101        }
102        Title::Single(s) => s.clone(),
103        _ => title.to_string(),
104    }
105}
106
107fn parent_short_title(reference: &Reference, title_type: &TitleType) -> Option<String> {
108    match title_type {
109        TitleType::ParentMonograph => {
110            if reference.ref_type() == "chapter" || reference.ref_type() == "paper-conference" {
111                reference.container_title().and_then(|t| match t {
112                    Title::Shorthand(short, _) => Some(short),
113                    Title::Single(s) => Some(s),
114                    _ => None,
115                })
116            } else {
117                None
118            }
119        }
120        TitleType::ParentSerial => {
121            if reference.ref_type().contains("article") || reference.ref_type() == "broadcast" {
122                reference.container_title().and_then(|t| match t {
123                    Title::Shorthand(short, _) => Some(short),
124                    Title::Single(s) => Some(s),
125                    _ => None,
126                })
127            } else {
128                None
129            }
130        }
131        _ => None,
132    }
133}
134
135fn looks_like_djot_markup(value: &str) -> bool {
136    value.contains('_')
137        || value.contains('*')
138        || value.contains("](")
139        || value.contains("{.")
140        || value.contains('`')
141}
142
143/// Build a text-transform closure that applies case transform then smart quotes.
144///
145/// The closure is used as the Djot text-leaf transform, so `.nocase` spans
146/// bypass it automatically via the rich-text renderer.
147fn make_case_transform(case: TextCase, quote_depth: usize) -> impl FnMut(&str) -> String {
148    let mut seen_alpha = false;
149    move |text: &str| {
150        let cased = match case {
151            TextCase::Sentence | TextCase::SentenceApa | TextCase::SentenceNlm => {
152                let lowered = text.to_lowercase();
153                if seen_alpha {
154                    lowered
155                } else {
156                    // Capitalize the first alphabetic character we encounter
157                    let result = capitalize_first_word(&lowered);
158                    if result.chars().any(|c: char| c.is_alphabetic()) {
159                        seen_alpha = true;
160                    }
161                    result
162                }
163            }
164            _ => apply_text_case(text, case),
165        };
166        smarten_title_quotes_at_depth(&cased, quote_depth)
167    }
168}
169
170/// Render a single title part through Djot with case transform + smart quotes.
171/// Returns (`rendered_value`, `has_explicit_link`).
172fn render_part_with_case<F: crate::render::format::OutputFormat<Output = String>>(
173    value: &str,
174    fmt: &F,
175    case: Option<TextCase>,
176    quote_depth: usize,
177) -> (String, bool) {
178    let context = InlineRenderContext { quote_depth };
179    if looks_like_djot_markup(value) {
180        match case {
181            Some(tc) => render_djot_inline_with_transform_and_context(
182                value,
183                fmt,
184                context,
185                make_case_transform(tc, quote_depth),
186            ),
187            None => {
188                render_djot_inline_with_transform_and_context(value, fmt, context, move |text| {
189                    smarten_title_quotes_at_depth(text, quote_depth)
190                })
191            }
192        }
193    } else {
194        let result = match case {
195            Some(tc) => smarten_title_quotes_at_depth(&apply_text_case(value, tc), quote_depth),
196            None => smarten_title_quotes_at_depth(value, quote_depth),
197        };
198        (result, false)
199    }
200}
201
202/// Render a structured title with per-part case transforms.
203///
204/// For `SentenceApa`, each subtitle gets sentence-case (first word capitalized).
205/// For `SentenceNlm`, subtitles are lowercased (no first-word capitalization).
206fn render_structured_title<F: crate::render::format::OutputFormat<Output = String>>(
207    st: &StructuredTitle,
208    fmt: &F,
209    case: Option<TextCase>,
210    short: bool,
211    quote_depth: usize,
212) -> (String, bool) {
213    let (main_rendered, has_link) = render_part_with_case(&st.main, fmt, case, quote_depth);
214    if short {
215        return (main_rendered, has_link);
216    }
217
218    let subtitle_case = case.map(|c| match c {
219        TextCase::SentenceNlm => TextCase::Lowercase,
220        other => other,
221    });
222
223    let mut parts = vec![main_rendered];
224    let mut has_link = has_link;
225
226    let subs: Vec<&str> = match &st.sub {
227        Subtitle::String(s) => vec![s.as_str()],
228        Subtitle::Vector(v) => v.iter().map(std::string::String::as_str).collect(),
229    };
230
231    for sub in subs {
232        let (sub_rendered, sub_link) = render_part_with_case(sub, fmt, subtitle_case, quote_depth);
233        has_link |= sub_link;
234        parts.push(sub_rendered);
235    }
236
237    (parts.join(": "), has_link)
238}
239
240/// Resolve the effective text-case for this title component.
241fn resolve_effective_text_case(
242    template: &TemplateTitle,
243    reference: &Reference,
244    options: &RenderOptions<'_>,
245) -> Option<TextCase> {
246    // 1. Template-level override takes precedence
247    if let Some(tc) = template.rendering.text_case {
248        return Some(apply_language_fallback(tc, reference));
249    }
250
251    // 2. Global title-category config
252    let ref_type = reference.ref_type();
253    let lang = reference.language();
254    let lang_str = lang.as_deref();
255
256    if let Some(rendering) = crate::render::component::get_title_category_rendering(
257        &template.title,
258        Some(&ref_type),
259        lang_str,
260        options.config,
261    ) && let Some(tc) = rendering.text_case
262    {
263        return Some(apply_language_fallback(tc, reference));
264    }
265
266    None
267}
268
269fn effective_title_quote_depth(
270    template: &TemplateTitle,
271    reference: &Reference,
272    options: &RenderOptions<'_>,
273) -> usize {
274    let component = TemplateComponent::Title(template.clone());
275    let item_language = effective_component_language(reference, &component);
276    let mut rendering = crate::render::component::get_title_category_rendering(
277        &template.title,
278        options.ref_type.as_deref(),
279        item_language.as_deref(),
280        options.config,
281    )
282    .unwrap_or_default();
283    rendering.merge(&template.rendering);
284    usize::from(rendering.quote == Some(true))
285}
286
287/// Apply language-aware fallback: non-English → as-is for English-specific transforms.
288fn apply_language_fallback(case: TextCase, reference: &Reference) -> TextCase {
289    let lang = reference.language();
290    text_case::resolve_text_case(case, lang.as_deref())
291}
292
293impl ComponentValues for TemplateTitle {
294    fn values<F: crate::render::format::OutputFormat<Output = String>>(
295        &self,
296        reference: &Reference,
297        hints: &ProcHints,
298        options: &RenderOptions<'_>,
299    ) -> Option<ProcValues<F::Output>> {
300        if self.disambiguate_only == Some(true) && hints.group_length <= 1 {
301            return None;
302        }
303
304        let quote_depth = effective_title_quote_depth(self, reference, options);
305
306        if matches!(self.form, Some(TitleForm::Short))
307            && let Some(short_title) = parent_short_title(reference, &self.title)
308            && !short_title.is_empty()
309        {
310            let (value, pre_formatted) = if looks_like_djot_markup(&short_title) {
311                let (v, _) = render_djot_inline_with_transform_and_context(
312                    &short_title,
313                    &F::default(),
314                    InlineRenderContext { quote_depth },
315                    move |text| smarten_title_quotes_at_depth(text, quote_depth),
316                );
317                (v, true)
318            } else {
319                (
320                    smarten_title_quotes_at_depth(&short_title, quote_depth),
321                    false,
322                )
323            };
324            let value = crate::values::apply_abbreviation(value, options.abbreviation_map);
325            return Some(ProcValues {
326                value,
327                prefix: None,
328                suffix: None,
329                url: None,
330                substituted_key: None,
331                pre_formatted,
332            });
333        }
334
335        let title = resolve_primary_title(reference, &self.title)?;
336        let effective_case = resolve_effective_text_case(self, reference, options);
337        let (value, has_explicit_link, pre_formatted) = render_title_variant::<F>(
338            &title,
339            self.form.as_ref(),
340            effective_case,
341            options,
342            quote_depth,
343        );
344
345        if value.is_empty() {
346            return None;
347        }
348
349        use citum_schema::options::LinkAnchor;
350        let value = crate::values::apply_abbreviation(value, options.abbreviation_map);
351        let url = crate::values::resolve_effective_url(
352            self.links.as_ref(),
353            options.config.links.as_ref(),
354            reference,
355            LinkAnchor::Title,
356        );
357        Some(ProcValues {
358            value,
359            prefix: None,
360            suffix: None,
361            url: if has_explicit_link { None } else { url },
362            substituted_key: None,
363            pre_formatted,
364        })
365    }
366}
367
368/// Resolve which title field to render for the given `TitleType` and reference.
369fn resolve_primary_title(reference: &Reference, title_type: &TitleType) -> Option<Title> {
370    match title_type {
371        TitleType::Primary => reference.title(),
372        TitleType::ParentMonograph => match reference.extension() {
373            ClassExtension::Monograph(_)
374            | ClassExtension::CollectionComponent(_)
375            | ClassExtension::Event(_)
376            | ClassExtension::AudioVisual(_) => reference.container_title(),
377            _ => None,
378        },
379        TitleType::ParentSerial => match reference.extension() {
380            ClassExtension::SerialComponent(_)
381            | ClassExtension::LegalCase(_)
382            | ClassExtension::Treaty(_) => reference.container_title(),
383            _ => None,
384        },
385        _ => None,
386    }
387}
388
389/// Render a `Title` value into `(rendered_string, has_explicit_link, pre_formatted)`.
390///
391/// Handles structured, multilingual, and plain title variants with case transforms.
392fn render_title_variant<F: crate::render::format::OutputFormat<Output = String>>(
393    title: &Title,
394    form: Option<&TitleForm>,
395    effective_case: Option<TextCase>,
396    options: &RenderOptions<'_>,
397    quote_depth: usize,
398) -> (String, bool, bool) {
399    let fmt = F::default();
400    match title {
401        Title::Structured(st) => {
402            let short = matches!(form, Some(TitleForm::Short));
403            let (value, has_link) =
404                render_structured_title(st, &fmt, effective_case, short, quote_depth);
405            let pre_formatted = if short {
406                looks_like_djot_markup(&st.main)
407            } else {
408                looks_like_djot_markup(&title_text(title, form))
409            };
410            (value, has_link, pre_formatted)
411        }
412        Title::Multilingual(m) => {
413            let (mode, preferred_transliteration, preferred_script) =
414                resolve_multilingual_title_config(options);
415            let complex = citum_schema::reference::types::MultilingualString::Complex(m.clone());
416            let value = crate::values::resolve_multilingual_string(
417                &complex,
418                mode,
419                preferred_transliteration,
420                preferred_script,
421                options.locale.locale.as_str(),
422            );
423            let (rendered, has_link) =
424                render_part_with_case(&value, &fmt, effective_case, quote_depth);
425            let pre_formatted = looks_like_djot_markup(&value);
426            (rendered, has_link, pre_formatted)
427        }
428        _ => {
429            let value = title_text(title, form);
430            let (rendered, has_link) =
431                render_part_with_case(&value, &fmt, effective_case, quote_depth);
432            let pre_formatted = looks_like_djot_markup(&value);
433            (rendered, has_link, pre_formatted)
434        }
435    }
436}
437
438/// Resolve multilingual title config (mode, transliteration, script) from render options.
439fn resolve_multilingual_title_config<'a>(
440    options: &'a RenderOptions<'a>,
441) -> (
442    Option<&'a citum_schema::options::MultilingualMode>,
443    Option<&'a [String]>,
444    Option<&'a String>,
445) {
446    let mode = options
447        .config
448        .multilingual
449        .as_ref()
450        .and_then(|ml| ml.title_mode.as_ref());
451    let preferred_transliteration = options
452        .config
453        .multilingual
454        .as_ref()
455        .and_then(|ml| ml.preferred_transliteration.as_deref());
456    let preferred_script = options
457        .config
458        .multilingual
459        .as_ref()
460        .and_then(|ml| ml.preferred_script.as_ref());
461    (mode, preferred_transliteration, preferred_script)
462}