crowbook_text_processing/
caps.rs

1// This Source Code Form is subject to the terms of the Mozilla Public
2// License, v. 2.0. If a copy of the MPL was not distributed with
3// this file, You can obtain one at https://mozilla.org/MPL/2.0/.
4
5//! Tihs module provides function to automatically transform uppercase words
6//! to small caps. Since it is dependent on the output format, functions differ for
7//! LaTeX or HTML output.
8
9
10use regex::{Regex, Captures};
11use std::borrow::Cow;
12
13/// Transform uppercase words to small caps for LaTeX output.
14///
15/// Note that it will put all the text in small capitals in lowercase: sometimes,
16/// it would be best to do otherwise (e.g. put the first letter in uppercase or whatever).
17///
18/// It only applies to words (or abbreviations: you can use dots to separate each letter) that
19/// have strictly more than one letter that are in uppercase in the input.
20/// 
21///
22/// # Example
23///
24/// ```
25/// use crowbook_text_processing::caps;
26///
27/// let s = caps::latex("Some ACRONYM or SCREAMING or whatever.");
28/// assert_eq!(&s, "Some \\textsc{acronym} or \\textsc{screaming} or whatever.");
29/// ```
30pub fn latex<'a, S: Into<Cow<'a, str>>>(input: S) -> Cow<'a, str> {
31    let mut res = input.into();
32
33    lazy_static! {
34        static ref REGEX: Regex = Regex::new(r"\b\p{Lu}{2,}\b").unwrap();
35        static ref REGEX_DOTS: Regex = Regex::new(r"\b((\p{Lu}\.){1,}\p{Lu})\b").unwrap();
36    }
37
38    for cap in REGEX.captures_iter(&res) {
39        println!("capture: {:?}", cap);
40    }
41
42    if REGEX.is_match(&res) {
43        let tmp = REGEX.replace_all(&res, |caps: &Captures| {
44            format!("\\textsc{{{}}}",
45                    caps[0].to_lowercase())
46        });
47        res = Cow::Owned(tmp.into_owned())
48    }
49    if REGEX_DOTS.is_match(&res) {
50        let tmp = REGEX_DOTS.replace_all(&res, |caps: &Captures| {
51            format!("\\textsc{{{}}}",
52                    caps[0].to_lowercase())
53        });
54        res = Cow::Owned(tmp.into_owned())
55    }
56    res
57}
58
59
60#[test]
61fn latex_1() {
62    use crate::caps;
63
64   
65    let s = caps::latex("Some ACRONYM or SCREAMING or whatever.");
66    assert_eq!(&s, "Some \\textsc{acronym} or \\textsc{screaming} or whatever.");
67
68    let s = caps::latex("Nothing to change.");
69    assert_eq!(&s, "Nothing to change.");
70
71    let s = caps::latex("A single letter is not capitalized. TWO or more are.");
72    assert_eq!(&s, "A single letter is not capitalized. \\textsc{two} or more are.");
73
74    let s = caps::latex("BEGIN with caps");
75    assert_eq!(&s, "\\textsc{begin} with caps");
76
77    let s = caps::latex("BEGINning with caps");
78    assert_eq!(&s, "BEGINning with caps");
79
80    let s = caps::latex("Ending with CAPS");
81    assert_eq!(&s, "Ending with \\textsc{caps}");
82
83    let s = caps::latex("Some A.W.D (Acronym With Dots)");
84    assert_eq!(&s, "Some \\textsc{a.w.d} (Acronym With Dots)");
85
86    let s = caps::latex("Sentence ennding with A.W.D.");
87    assert_eq!(&s, "Sentence ennding with \\textsc{a.w.d}.");
88}