Skip to main content

provenant/utils/
text.rs

1use std::path::Path;
2
3const UTF8_BOM_CHAR: char = '\u{FEFF}';
4
5pub fn remove_verbatim_escape_sequences(s: &str) -> String {
6    s.replace("\\r", " ")
7        .replace("\\n", " ")
8        .replace("\\t", " ")
9}
10
11pub fn strip_utf8_bom_str(s: &str) -> &str {
12    s.strip_prefix(UTF8_BOM_CHAR).unwrap_or(s)
13}
14
15pub fn should_remove_verbatim_escape_sequences(path: &Path, is_source: bool) -> bool {
16    if is_source {
17        return true;
18    }
19
20    path.extension()
21        .and_then(|ext| ext.to_str())
22        .is_some_and(|ext| matches!(ext.to_ascii_lowercase().as_str(), "po" | "pot"))
23}
24
25#[cfg(test)]
26mod tests {
27    use super::*;
28    #[test]
29    fn test_strip_utf8_bom_str_with_bom() {
30        let s = "\u{FEFF}Hello World";
31        assert_eq!(strip_utf8_bom_str(s), "Hello World");
32    }
33
34    #[test]
35    fn test_strip_utf8_bom_str_without_bom() {
36        let s = "Hello World";
37        assert_eq!(strip_utf8_bom_str(s), "Hello World");
38    }
39
40    #[test]
41    fn test_strip_utf8_bom_str_empty() {
42        let s = "";
43        assert_eq!(strip_utf8_bom_str(s), "");
44    }
45
46    #[test]
47    fn test_strip_utf8_bom_str_only_bom() {
48        let s = "\u{FEFF}";
49        assert_eq!(strip_utf8_bom_str(s), "");
50    }
51
52    #[test]
53    fn test_bom_character_is_not_whitespace() {
54        let s = "\u{FEFF}Hello";
55        assert_ne!(s.trim(), "Hello");
56        assert_eq!(strip_utf8_bom_str(s), "Hello");
57    }
58
59    #[test]
60    fn test_remove_verbatim_escape_sequences_basic() {
61        let input = "line1\\nline2\\rline3\\tline4";
62        let output = remove_verbatim_escape_sequences(input);
63        assert_eq!(output, "line1 line2 line3 line4");
64    }
65
66    #[test]
67    fn test_remove_verbatim_escape_sequences_only_backslash_n() {
68        let input = "hello\\nworld";
69        let output = remove_verbatim_escape_sequences(input);
70        assert_eq!(output, "hello world");
71    }
72
73    #[test]
74    fn test_remove_verbatim_escape_sequences_no_escapes() {
75        let input = "normal text without escapes";
76        let output = remove_verbatim_escape_sequences(input);
77        assert_eq!(output, input);
78    }
79
80    #[test]
81    fn test_remove_verbatim_escape_sequences_actual_newline() {
82        let input = "line1\nline2";
83        let output = remove_verbatim_escape_sequences(input);
84        assert_eq!(output, "line1\nline2");
85    }
86
87    #[test]
88    fn test_remove_verbatim_escape_sequences_multiple() {
89        let input = "a\\nb\\nc\\n";
90        let output = remove_verbatim_escape_sequences(input);
91        assert_eq!(output, "a b c ");
92    }
93
94    #[test]
95    fn test_remove_verbatim_escape_sequences_options_c_sample() {
96        let input = "Try `progname --help' for more information.\\n";
97        let output = remove_verbatim_escape_sequences(input);
98        assert_eq!(output, "Try `progname --help' for more information. ");
99    }
100
101    #[test]
102    fn test_should_remove_verbatim_escape_sequences_for_source_files() {
103        assert!(should_remove_verbatim_escape_sequences(
104            Path::new("main.rs"),
105            true
106        ));
107    }
108
109    #[test]
110    fn test_should_remove_verbatim_escape_sequences_for_po_files() {
111        assert!(should_remove_verbatim_escape_sequences(
112            Path::new("locale.po"),
113            false
114        ));
115        assert!(should_remove_verbatim_escape_sequences(
116            Path::new("template.pot"),
117            false
118        ));
119    }
120
121    #[test]
122    fn test_should_not_remove_verbatim_escape_sequences_for_plain_text() {
123        assert!(!should_remove_verbatim_escape_sequences(
124            Path::new("README.txt"),
125            false
126        ));
127    }
128}