Skip to main content

citum_engine/processor/
labels.rs

1/*
2SPDX-License-Identifier: MIT OR Apache-2.0
3SPDX-FileCopyrightText: © 2023-2026 Bruce D'Arcus and Citum contributors
4*/
5
6//! Label generation for alphanumeric citation styles.
7//!
8//! Generates citation labels like `[AHU74]`, `[Knu84]`, `[Ban+92]`
9//! from author family names and publication year.
10//!
11//! ## Algorithm
12//!
13//! - 1 author:  up to `single_author_chars` chars from family name + year
14//! - 2+ authors (below `et_al_min)`: `multi_author_chars` chars each + year
15//! - >= et_al_min authors: first N authors' initials (N from `et_al_names`) + et_al_marker + year
16
17use crate::reference::Reference;
18use citum_schema::options::LabelParams;
19
20/// Generate a base citation label for a reference (without disambiguation suffix).
21///
22/// Uses the first letter(s) of author family names combined with the
23/// publication year according to the resolved `LabelParams`.
24#[must_use]
25pub fn generate_base_label(reference: &Reference, params: &LabelParams) -> String {
26    let name_part = generate_name_part(reference, params);
27    let year_part = generate_year_part(reference, params.year_digits);
28    format!("{name_part}{year_part}")
29}
30
31fn generate_name_part(reference: &Reference, params: &LabelParams) -> String {
32    let Some(contributor) = reference.author().or_else(|| reference.editor()) else {
33        // No author/editor: use first 3 chars of title
34        return reference
35            .title()
36            .map(|t| {
37                let s = t.to_string();
38                s.chars()
39                    .filter(|c| c.is_alphabetic())
40                    .take(params.single_author_chars as usize)
41                    .collect()
42            })
43            .unwrap_or_default();
44    };
45
46    let names = contributor.to_names_vec();
47    let count = names.len();
48
49    if count == 1 {
50        // Single author: up to single_author_chars from family name
51        #[allow(clippy::indexing_slicing, reason = "count checked")]
52        let family = names[0].family_or_literal();
53        family
54            .chars()
55            .filter(|c| c.is_alphabetic())
56            .take(params.single_author_chars as usize)
57            .collect()
58    } else if count < params.et_al_min as usize {
59        // 2 to et_al_min-1 authors: multi_author_chars chars each
60        names
61            .iter()
62            .map(|n| {
63                n.family_or_literal()
64                    .chars()
65                    .filter(|c| c.is_alphabetic())
66                    .take(params.multi_author_chars as usize)
67                    .collect::<String>()
68            })
69            .collect::<String>()
70    } else {
71        // et_al_min or more authors: first N initials + et_al_marker
72        let initials: String = names
73            .iter()
74            .take(params.et_al_names as usize)
75            .map(|n| {
76                n.family_or_literal()
77                    .chars()
78                    .filter(|c| c.is_alphabetic())
79                    .take(params.multi_author_chars as usize)
80                    .collect::<String>()
81            })
82            .collect::<String>();
83        format!("{}{}", initials, params.et_al_marker)
84    }
85}
86
87fn generate_year_part(reference: &Reference, year_digits: u8) -> String {
88    reference
89        .csl_issued_date()
90        .and_then(|d| d.year().parse::<i32>().ok())
91        .map(|y| {
92            let y_str = y.to_string();
93            if year_digits == 2 && y_str.len() >= 2 {
94                #[allow(clippy::string_slice, reason = "length checked")]
95                y_str[y_str.len() - 2..].to_string()
96            } else {
97                y_str
98            }
99        })
100        .unwrap_or_default()
101}
102
103#[cfg(test)]
104#[allow(
105    clippy::unwrap_used,
106    clippy::expect_used,
107    clippy::panic,
108    clippy::indexing_slicing,
109    clippy::todo,
110    clippy::unimplemented,
111    clippy::unreachable,
112    clippy::get_unwrap,
113    reason = "Panicking is acceptable and often desired in tests."
114)]
115mod tests {
116    use super::*;
117    use citum_schema::options::{LabelConfig, LabelPreset};
118    use csl_legacy::csl_json::{DateVariable, Name, Reference as LegacyReference};
119
120    fn alpha_params() -> LabelParams {
121        LabelConfig::default().effective_params()
122    }
123
124    fn din_params() -> LabelParams {
125        LabelConfig {
126            preset: LabelPreset::Din,
127            ..Default::default()
128        }
129        .effective_params()
130    }
131
132    fn params_4digit_year() -> LabelParams {
133        LabelParams {
134            year_digits: 4,
135            ..alpha_params()
136        }
137    }
138
139    fn ams_params() -> LabelParams {
140        LabelConfig {
141            preset: LabelPreset::Ams,
142            ..Default::default()
143        }
144        .effective_params()
145    }
146
147    fn make_ref(authors: Vec<Name>, year: i32) -> Reference {
148        Reference::from(LegacyReference {
149            id: "t".to_string(),
150            ref_type: "book".to_string(),
151            author: Some(authors),
152            issued: Some(DateVariable::year(year)),
153            ..Default::default()
154        })
155    }
156
157    #[test]
158    fn test_single_author_alpha() {
159        // Kuhn 1962 → "Kuh62" (3 chars from family name + 2-digit year)
160        let r = make_ref(vec![Name::new("Kuhn", "Thomas S.")], 1962);
161        assert_eq!(generate_base_label(&r, &alpha_params()), "Kuh62");
162    }
163
164    #[test]
165    fn test_single_author_short_family() {
166        // "Li" only has 2 chars; no padding expected
167        let r = make_ref(vec![Name::new("Li", "Wei")], 2010);
168        assert_eq!(generate_base_label(&r, &alpha_params()), "Li10");
169    }
170
171    #[test]
172    fn test_single_author_din() {
173        // DIN uses up to 4 chars for single-author labels
174        let r = make_ref(vec![Name::new("Kuhn", "Thomas S.")], 1962);
175        assert_eq!(generate_base_label(&r, &din_params()), "Kuhn62");
176    }
177
178    #[test]
179    fn test_two_authors_alpha() {
180        // 2 < et_al_min=4 → multi-author case: 1 char each
181        // Weinberg + Freedman 1971 → "WF71"
182        let r = make_ref(
183            vec![
184                Name::new("Weinberg", "Gerald M."),
185                Name::new("Freedman", "Daniel P."),
186            ],
187            1971,
188        );
189        assert_eq!(generate_base_label(&r, &alpha_params()), "WF71");
190    }
191
192    #[test]
193    fn test_three_authors_alpha() {
194        // 3 < et_al_min=4 → multi-author case: 1 char each
195        // LeCun + Bengio + Hinton 2015 → "LBH15"
196        let r = make_ref(
197            vec![
198                Name::new("LeCun", "Yann"),
199                Name::new("Bengio", "Yoshua"),
200                Name::new("Hinton", "Geoffrey"),
201            ],
202            2015,
203        );
204        assert_eq!(generate_base_label(&r, &alpha_params()), "LBH15");
205    }
206
207    #[test]
208    fn test_et_al_alpha() {
209        // 8 >= et_al_min=4 → first 3 initials + "+" marker
210        // Vaswani, Shazeer, Parmar, ... 2017 → "VSP+17"
211        let r = make_ref(
212            vec![
213                Name::new("Vaswani", "Ashish"),
214                Name::new("Shazeer", "Noam"),
215                Name::new("Parmar", "Niki"),
216                Name::new("Uszkoreit", "Jakob"),
217                Name::new("Jones", "Llion"),
218                Name::new("Gomez", "Aidan N."),
219                Name::new("Kaiser", "Lukasz"),
220                Name::new("Polosukhin", "Illia"),
221            ],
222            2017,
223        );
224        assert_eq!(generate_base_label(&r, &alpha_params()), "VSP+17");
225    }
226
227    #[test]
228    fn test_alpha_et_al_threshold_boundary() {
229        // Alpha et_al_min=4: exactly 4 authors should trigger et-al behavior
230        let r = make_ref(
231            vec![
232                Name::new("Vaswani", "Ashish"),
233                Name::new("Shazeer", "Noam"),
234                Name::new("Parmar", "Niki"),
235                Name::new("Uszkoreit", "Jakob"),
236            ],
237            2017,
238        );
239        assert_eq!(generate_base_label(&r, &alpha_params()), "VSP+17");
240    }
241
242    #[test]
243    fn test_three_authors_din_triggers_et_al() {
244        // DIN et_al_min=3: count=3 is NOT < 3, so et_al case (no marker)
245        // LeCun + Bengio + Hinton 2015 → "LBH15"
246        let r = make_ref(
247            vec![
248                Name::new("LeCun", "Yann"),
249                Name::new("Bengio", "Yoshua"),
250                Name::new("Hinton", "Geoffrey"),
251            ],
252            2015,
253        );
254        assert_eq!(generate_base_label(&r, &din_params()), "LBH15");
255    }
256
257    #[test]
258    fn test_et_al_din_no_marker() {
259        // DIN et_al case has no "+" → "VSP17" not "VSP+17"
260        let r = make_ref(
261            vec![
262                Name::new("Vaswani", "Ashish"),
263                Name::new("Shazeer", "Noam"),
264                Name::new("Parmar", "Niki"),
265                Name::new("Uszkoreit", "Jakob"),
266            ],
267            2017,
268        );
269        assert_eq!(generate_base_label(&r, &din_params()), "VSP17");
270    }
271
272    #[test]
273    fn test_four_digit_year() {
274        let r = make_ref(vec![Name::new("Kuhn", "Thomas S.")], 1962);
275        assert_eq!(generate_base_label(&r, &params_4digit_year()), "Kuh1962");
276    }
277
278    #[test]
279    fn test_literal_org_author() {
280        // Literal name: take single_author_chars=3 from "World Bank"
281        let r = Reference::from(LegacyReference {
282            id: "t".to_string(),
283            ref_type: "report".to_string(),
284            author: Some(vec![Name::literal("World Bank")]),
285            issued: Some(DateVariable::year(2023)),
286            ..Default::default()
287        });
288        assert_eq!(generate_base_label(&r, &alpha_params()), "Wor23");
289    }
290
291    #[test]
292    fn test_no_author_falls_back_to_title() {
293        // No author or editor: use first 3 alpha chars of title
294        let r = Reference::from(LegacyReference {
295            id: "t".to_string(),
296            ref_type: "book".to_string(),
297            title: Some("Deep Learning".to_string()),
298            issued: Some(DateVariable::year(2016)),
299            ..Default::default()
300        });
301        assert_eq!(generate_base_label(&r, &alpha_params()), "Dee16");
302    }
303
304    #[test]
305    fn test_no_date_gives_empty_year() {
306        // Missing issued date → year part is empty
307        let r = Reference::from(LegacyReference {
308            id: "t".to_string(),
309            ref_type: "book".to_string(),
310            author: Some(vec![Name::new("Knuth", "Donald E.")]),
311            ..Default::default()
312        });
313        assert_eq!(generate_base_label(&r, &alpha_params()), "Knu");
314    }
315
316    #[test]
317    fn test_ams_et_al_uses_4_initials() {
318        // AMS should use 4 initials in et-al case, not 3
319        // Vaswani, Shazeer, Parmar, Uszkoreit, ... 2017 → "VSPU+17" (4 chars)
320        let r = make_ref(
321            vec![
322                Name::new("Vaswani", "Ashish"),
323                Name::new("Shazeer", "Noam"),
324                Name::new("Parmar", "Niki"),
325                Name::new("Uszkoreit", "Jakob"),
326                Name::new("Jones", "Llion"),
327            ],
328            2017,
329        );
330        assert_eq!(generate_base_label(&r, &ams_params()), "VSPU+17");
331    }
332
333    #[test]
334    fn test_alpha_et_al_uses_3_initials() {
335        // Alpha should still use 3 initials
336        // Vaswani, Shazeer, Parmar, Uszkoreit, ... 2017 → "VSP+17" (3 chars)
337        let r = make_ref(
338            vec![
339                Name::new("Vaswani", "Ashish"),
340                Name::new("Shazeer", "Noam"),
341                Name::new("Parmar", "Niki"),
342                Name::new("Uszkoreit", "Jakob"),
343                Name::new("Jones", "Llion"),
344            ],
345            2017,
346        );
347        assert_eq!(generate_base_label(&r, &alpha_params()), "VSP+17");
348    }
349}