Skip to main content

provenant/utils/
text.rs

1// SPDX-FileCopyrightText: Provenant contributors
2// SPDX-License-Identifier: Apache-2.0
3
4use std::path::Path;
5
6const UTF8_BOM_CHAR: char = '\u{FEFF}';
7
8pub fn remove_verbatim_escape_sequences(s: &str) -> String {
9    s.replace("\\r", " ")
10        .replace("\\n", " ")
11        .replace("\\t", " ")
12}
13
14pub fn strip_utf8_bom_str(s: &str) -> &str {
15    s.strip_prefix(UTF8_BOM_CHAR).unwrap_or(s)
16}
17
18pub fn should_remove_verbatim_escape_sequences(path: &Path, is_source: bool) -> bool {
19    if is_source {
20        return true;
21    }
22
23    path.extension()
24        .and_then(|ext| ext.to_str())
25        .is_some_and(|ext| matches!(ext.to_ascii_lowercase().as_str(), "po" | "pot"))
26}
27
28#[cfg(test)]
29mod tests {
30    use super::*;
31    #[test]
32    fn test_strip_utf8_bom_str_with_bom() {
33        let s = "\u{FEFF}Hello World";
34        assert_eq!(strip_utf8_bom_str(s), "Hello World");
35    }
36
37    #[test]
38    fn test_strip_utf8_bom_str_without_bom() {
39        let s = "Hello World";
40        assert_eq!(strip_utf8_bom_str(s), "Hello World");
41    }
42
43    #[test]
44    fn test_strip_utf8_bom_str_empty() {
45        let s = "";
46        assert_eq!(strip_utf8_bom_str(s), "");
47    }
48
49    #[test]
50    fn test_strip_utf8_bom_str_only_bom() {
51        let s = "\u{FEFF}";
52        assert_eq!(strip_utf8_bom_str(s), "");
53    }
54
55    #[test]
56    fn test_bom_character_is_not_whitespace() {
57        let s = "\u{FEFF}Hello";
58        assert_ne!(s.trim(), "Hello");
59        assert_eq!(strip_utf8_bom_str(s), "Hello");
60    }
61
62    #[test]
63    fn test_remove_verbatim_escape_sequences_basic() {
64        let input = "line1\\nline2\\rline3\\tline4";
65        let output = remove_verbatim_escape_sequences(input);
66        assert_eq!(output, "line1 line2 line3 line4");
67    }
68
69    #[test]
70    fn test_remove_verbatim_escape_sequences_only_backslash_n() {
71        let input = "hello\\nworld";
72        let output = remove_verbatim_escape_sequences(input);
73        assert_eq!(output, "hello world");
74    }
75
76    #[test]
77    fn test_remove_verbatim_escape_sequences_no_escapes() {
78        let input = "normal text without escapes";
79        let output = remove_verbatim_escape_sequences(input);
80        assert_eq!(output, input);
81    }
82
83    #[test]
84    fn test_remove_verbatim_escape_sequences_actual_newline() {
85        let input = "line1\nline2";
86        let output = remove_verbatim_escape_sequences(input);
87        assert_eq!(output, "line1\nline2");
88    }
89
90    #[test]
91    fn test_remove_verbatim_escape_sequences_multiple() {
92        let input = "a\\nb\\nc\\n";
93        let output = remove_verbatim_escape_sequences(input);
94        assert_eq!(output, "a b c ");
95    }
96
97    #[test]
98    fn test_remove_verbatim_escape_sequences_options_c_sample() {
99        let input = "Try `progname --help' for more information.\\n";
100        let output = remove_verbatim_escape_sequences(input);
101        assert_eq!(output, "Try `progname --help' for more information. ");
102    }
103
104    #[test]
105    fn test_should_remove_verbatim_escape_sequences_for_source_files() {
106        assert!(should_remove_verbatim_escape_sequences(
107            Path::new("main.rs"),
108            true
109        ));
110    }
111
112    #[test]
113    fn test_should_remove_verbatim_escape_sequences_for_po_files() {
114        assert!(should_remove_verbatim_escape_sequences(
115            Path::new("locale.po"),
116            false
117        ));
118        assert!(should_remove_verbatim_escape_sequences(
119            Path::new("template.pot"),
120            false
121        ));
122    }
123
124    #[test]
125    fn test_should_not_remove_verbatim_escape_sequences_for_plain_text() {
126        assert!(!should_remove_verbatim_escape_sequences(
127            Path::new("README.txt"),
128            false
129        ));
130    }
131}