1use citum_schema::NoteStartTextCase;
13use citum_schema::options::titles::TextCase;
14
15#[must_use]
23pub fn apply_text_case(text: &str, case: TextCase) -> String {
24 match case {
25 TextCase::AsIs => text.to_string(),
26 TextCase::Lowercase => text.to_lowercase(),
27 TextCase::Uppercase => text.to_uppercase(),
28 TextCase::CapitalizeFirst => capitalize_first_word(text),
29 TextCase::Sentence | TextCase::SentenceApa | TextCase::SentenceNlm => {
30 to_sentence_case(text)
31 }
32 TextCase::Title => to_title_case(text),
33 }
34}
35
36#[must_use]
43pub fn apply_to_structured_parts(
44 main: &str,
45 subtitles: &[&str],
46 case: TextCase,
47) -> (String, Vec<String>) {
48 match case {
49 TextCase::SentenceApa => {
50 let main_cased = to_sentence_case(main);
51 let subs_cased = subtitles.iter().map(|s| to_sentence_case(s)).collect();
52 (main_cased, subs_cased)
53 }
54 TextCase::SentenceNlm => {
55 let main_cased = to_sentence_case(main);
56 let subs_cased = subtitles.iter().map(|s| s.to_lowercase()).collect();
58 (main_cased, subs_cased)
59 }
60 _ => {
61 let main_cased = apply_text_case(main, case);
62 let subs_cased = subtitles.iter().map(|s| apply_text_case(s, case)).collect();
63 (main_cased, subs_cased)
64 }
65 }
66}
67
68#[must_use]
70pub fn is_english_language(lang: Option<&str>) -> bool {
71 match lang {
72 Some(tag) => {
73 let primary = tag.split('-').next().unwrap_or(tag);
74 primary.eq_ignore_ascii_case("en")
75 }
76 None => true,
78 }
79}
80
81#[must_use]
85pub fn resolve_text_case(case: TextCase, language: Option<&str>) -> TextCase {
86 if is_english_language(language) {
87 case
88 } else {
89 match case {
92 TextCase::AsIs | TextCase::Lowercase | TextCase::Uppercase => case,
93 _ => TextCase::AsIs,
94 }
95 }
96}
97
98#[must_use]
101pub(crate) fn apply_note_start_text_case(
102 value: &str,
103 text_case: NoteStartTextCase,
104 language: Option<&str>,
105) -> String {
106 let case = match text_case {
107 NoteStartTextCase::CapitalizeFirst => TextCase::CapitalizeFirst,
108 NoteStartTextCase::Lowercase => TextCase::Lowercase,
109 };
110 apply_text_case(value, resolve_text_case(case, language))
111}
112
113fn to_sentence_case(text: &str) -> String {
115 if text.is_empty() {
116 return String::new();
117 }
118 let lowered = text.to_lowercase();
119 capitalize_first_word(&lowered)
120}
121
122pub(crate) fn capitalize_first_word(text: &str) -> String {
125 let mut result = String::with_capacity(text.len());
126 let mut found_first = false;
127 for ch in text.chars() {
128 if !found_first && ch.is_alphabetic() {
129 for upper in ch.to_uppercase() {
130 result.push(upper);
131 }
132 found_first = true;
133 } else {
134 result.push(ch);
135 }
136 }
137 result
138}
139
140const TITLE_CASE_STOP_WORDS: &[&str] = &[
142 "a", "an", "and", "as", "at", "but", "by", "for", "from", "in", "nor", "of", "on", "or", "so",
143 "the", "to", "up", "yet", "v", "vs",
144];
145
146fn capitalize_hyphenated(word: &str, force_all: bool) -> String {
151 word.split('-')
152 .map(|part| {
153 if force_all {
154 capitalize_first_word(part)
155 } else {
156 let alpha_core = part.trim_matches(|c: char| !c.is_alphanumeric());
157 if TITLE_CASE_STOP_WORDS.contains(&alpha_core) {
158 part.to_string()
159 } else {
160 capitalize_first_word(part)
161 }
162 }
163 })
164 .collect::<Vec<_>>()
165 .join("-")
166}
167
168fn trim_trailing_closing_punctuation(word: &str) -> &str {
169 word.trim_end_matches(['"', '\'', ')', ']', '}', '»', '”', '’'])
170}
171
172fn to_title_case(text: &str) -> String {
179 if text.is_empty() {
180 return String::new();
181 }
182
183 let words: Vec<&str> = text.split_whitespace().collect();
184 if words.is_empty() {
185 return text.to_string();
186 }
187
188 let last_idx = words.len() - 1;
189 let mut parts: Vec<String> = Vec::with_capacity(words.len());
190 let mut capitalize_next = false;
191
192 for (i, word) in words.iter().enumerate() {
193 let lower = word.to_lowercase();
194 if i == 0 || i == last_idx || capitalize_next {
195 if lower.contains('-') {
196 parts.push(capitalize_hyphenated(&lower, true));
197 } else {
198 parts.push(capitalize_first_word(&lower));
199 }
200 } else {
201 let alpha_core = lower.trim_matches(|c: char| !c.is_alphanumeric());
204 if TITLE_CASE_STOP_WORDS.contains(&alpha_core) {
205 parts.push(lower);
206 } else if lower.contains('-') {
207 parts.push(capitalize_hyphenated(&lower, false));
208 } else {
209 parts.push(capitalize_first_word(&lower));
210 }
211 }
212 let punctuation_core = trim_trailing_closing_punctuation(word);
215 capitalize_next = punctuation_core.ends_with(':')
216 || punctuation_core.ends_with('?')
217 || punctuation_core.ends_with('!');
218 }
219
220 let mut result = String::with_capacity(text.len());
222 let mut word_iter = parts.iter();
223 let mut in_word = false;
224 let mut current_word = word_iter.next();
225
226 for ch in text.chars() {
227 if ch.is_whitespace() {
228 if in_word {
229 in_word = false;
230 current_word = word_iter.next();
231 }
232 result.push(ch);
233 } else if !in_word && let Some(word) = current_word {
234 result.push_str(word);
235 in_word = true;
236 }
237 }
238
239 result
240}
241
242#[cfg(test)]
243#[allow(
244 clippy::unwrap_used,
245 clippy::expect_used,
246 clippy::panic,
247 clippy::indexing_slicing,
248 clippy::todo,
249 clippy::unimplemented,
250 clippy::unreachable,
251 clippy::get_unwrap,
252 reason = "Panicking is acceptable and often desired in tests."
253)]
254mod tests {
255 use super::*;
256
257 #[test]
260 fn test_capitalize_first_word_basic() {
261 assert_eq!(capitalize_first_word("hello world"), "Hello world");
262 }
263
264 #[test]
265 fn test_capitalize_first_word_leading_space() {
266 assert_eq!(capitalize_first_word(" hello"), " Hello");
267 }
268
269 #[test]
270 fn test_capitalize_first_word_empty() {
271 assert_eq!(capitalize_first_word(""), "");
272 }
273
274 #[test]
275 fn test_capitalize_first_word_already_upper() {
276 assert_eq!(capitalize_first_word("Hello"), "Hello");
277 }
278
279 #[test]
282 fn test_sentence_case_basic() {
283 assert_eq!(
284 to_sentence_case("The Quick Brown Fox"),
285 "The quick brown fox"
286 );
287 }
288
289 #[test]
290 fn test_sentence_case_all_caps() {
291 assert_eq!(to_sentence_case("DNA REPLICATION"), "Dna replication");
292 }
293
294 #[test]
295 fn test_sentence_case_empty() {
296 assert_eq!(to_sentence_case(""), "");
297 }
298
299 #[test]
302 fn test_title_case_basic() {
303 assert_eq!(to_title_case("the quick brown fox"), "The Quick Brown Fox");
304 }
305
306 #[test]
307 fn test_title_case_stop_words() {
308 assert_eq!(
309 to_title_case("a tale of two cities"),
310 "A Tale of Two Cities"
311 );
312 }
313
314 #[test]
315 fn test_title_case_last_word_capitalized() {
316 assert_eq!(
317 to_title_case("the world we live in"),
318 "The World We Live In"
319 );
320 }
321
322 #[test]
323 fn test_title_case_after_colon() {
324 assert_eq!(
325 to_title_case("the title: a subtitle"),
326 "The Title: A Subtitle"
327 );
328 }
329
330 #[test]
331 fn test_title_case_after_colon_stop_word() {
332 assert_eq!(
334 to_title_case("history of the world: a new perspective"),
335 "History of the World: A New Perspective"
336 );
337 }
338
339 #[test]
340 fn test_title_case_after_question_mark() {
341 assert_eq!(
342 to_title_case("who's black and why? a hidden chapter"),
343 "Who's Black and Why? A Hidden Chapter"
344 );
345 }
346
347 #[test]
348 fn test_title_case_after_question_mark_with_closing_quote() {
349 assert_eq!(
350 to_title_case("who's black and why?\" a hidden chapter"),
351 "Who's Black and Why?\" A Hidden Chapter"
352 );
353 }
354
355 #[test]
356 fn test_title_case_from_is_stop_word() {
357 assert_eq!(
358 to_title_case("a hidden chapter from the eighteenth-century invention of race"),
359 "A Hidden Chapter from the Eighteenth-Century Invention of Race"
360 );
361 }
362
363 #[test]
364 fn test_title_case_hyphenated_compound() {
365 assert_eq!(
366 to_title_case("eighteenth-century studies"),
367 "Eighteenth-Century Studies"
368 );
369 }
370
371 #[test]
372 fn test_title_case_hyphenated_stop_word_part() {
373 assert_eq!(to_title_case("a well-to-do family"), "A Well-to-Do Family");
375 }
376
377 #[test]
380 fn test_sentence_apa_structured() {
381 let (main, subs) = apply_to_structured_parts(
382 "Understanding Citation Systems",
383 &["History and Practice", "A Comparative View"],
384 TextCase::SentenceApa,
385 );
386 assert_eq!(main, "Understanding citation systems");
387 assert_eq!(subs, vec!["History and practice", "A comparative view"]);
388 }
389
390 #[test]
391 fn test_sentence_nlm_structured() {
392 let (main, subs) = apply_to_structured_parts(
393 "Understanding Citation Systems",
394 &["History and Practice"],
395 TextCase::SentenceNlm,
396 );
397 assert_eq!(main, "Understanding citation systems");
398 assert_eq!(subs, vec!["history and practice"]);
400 }
401
402 #[test]
403 fn test_title_case_structured() {
404 let (main, subs) =
405 apply_to_structured_parts("the dna of empire", &["a new perspective"], TextCase::Title);
406 assert_eq!(main, "The Dna of Empire");
407 assert_eq!(subs, vec!["A New Perspective"]);
408 }
409
410 #[test]
413 fn test_english_language_detection() {
414 assert!(is_english_language(Some("en")));
415 assert!(is_english_language(Some("en-US")));
416 assert!(is_english_language(Some("en-GB")));
417 assert!(is_english_language(None));
418 assert!(!is_english_language(Some("de")));
419 assert!(!is_english_language(Some("fr-FR")));
420 }
421
422 #[test]
423 fn test_resolve_non_english_falls_back() {
424 assert_eq!(
425 resolve_text_case(TextCase::SentenceApa, Some("de")),
426 TextCase::AsIs
427 );
428 assert_eq!(
429 resolve_text_case(TextCase::Title, Some("fr")),
430 TextCase::AsIs
431 );
432 assert_eq!(
434 resolve_text_case(TextCase::Lowercase, Some("de")),
435 TextCase::Lowercase
436 );
437 }
438
439 #[test]
440 fn test_resolve_english_passes_through() {
441 assert_eq!(
442 resolve_text_case(TextCase::SentenceApa, Some("en")),
443 TextCase::SentenceApa
444 );
445 assert_eq!(
446 resolve_text_case(TextCase::Title, Some("en-US")),
447 TextCase::Title
448 );
449 }
450
451 #[test]
452 fn test_note_start_capitalize_first_uses_english_language_rules() {
453 assert_eq!(
454 apply_note_start_text_case(
455 "edited by",
456 NoteStartTextCase::CapitalizeFirst,
457 Some("en-US"),
458 ),
459 "Edited by"
460 );
461 }
462
463 #[test]
464 fn test_note_start_capitalize_first_falls_back_to_as_is_for_non_english() {
465 assert_eq!(
466 apply_note_start_text_case(
467 "hg. von",
468 NoteStartTextCase::CapitalizeFirst,
469 Some("de-DE"),
470 ),
471 "hg. von"
472 );
473 }
474
475 #[test]
476 fn test_note_start_capitalize_first_is_no_op_for_uncased_scripts() {
477 assert_eq!(
478 apply_note_start_text_case("ابن سينا", NoteStartTextCase::CapitalizeFirst, Some("ar"),),
479 "ابن سينا"
480 );
481 }
482}