1use std::path::Path;
2
3const UTF8_BOM_CHAR: char = '\u{FEFF}';
4
5const SOURCE_EXTENSIONS: &[&str] = &[
6 ".ada", ".adb", ".asm", ".asp", ".aj", ".bas", ".bat", ".c", ".c++", ".cc", ".clj", ".cob",
7 ".cpp", ".cs", ".csh", ".csx", ".cxx", ".d", ".e", ".el", ".f", ".fs", ".f77", ".f90", ".for",
8 ".fth", ".ftn", ".go", ".h", ".hh", ".hpp", ".hs", ".html", ".htm", ".hxx", ".java", ".js",
9 ".jsx", ".jsp", ".ksh", ".kt", ".lisp", ".lua", ".m", ".m4", ".nim", ".pas", ".php", ".pl",
10 ".pp", ".ps1", ".py", ".r", ".rb", ".ruby", ".rs", ".s", ".scala", ".sh", ".swift", ".ts",
11 ".vhdl", ".verilog", ".vb", ".groovy", ".po",
12];
13
14pub fn is_source(path: &Path) -> bool {
15 path.extension()
16 .map(|ext| {
17 let ext_str = ext.to_string_lossy();
18 let ext_lower = format!(".{}", ext_str.to_lowercase());
19 SOURCE_EXTENSIONS.contains(&ext_lower.as_str())
20 })
21 .unwrap_or(false)
22}
23
24pub fn remove_verbatim_escape_sequences(s: &str) -> String {
25 s.replace("\\r", " ")
26 .replace("\\n", " ")
27 .replace("\\t", " ")
28}
29
30pub fn strip_utf8_bom_str(s: &str) -> &str {
31 s.strip_prefix(UTF8_BOM_CHAR).unwrap_or(s)
32}
33
34#[cfg(test)]
35mod tests {
36 use super::*;
37 use std::path::PathBuf;
38
39 #[test]
40 fn test_strip_utf8_bom_str_with_bom() {
41 let s = "\u{FEFF}Hello World";
42 assert_eq!(strip_utf8_bom_str(s), "Hello World");
43 }
44
45 #[test]
46 fn test_strip_utf8_bom_str_without_bom() {
47 let s = "Hello World";
48 assert_eq!(strip_utf8_bom_str(s), "Hello World");
49 }
50
51 #[test]
52 fn test_strip_utf8_bom_str_empty() {
53 let s = "";
54 assert_eq!(strip_utf8_bom_str(s), "");
55 }
56
57 #[test]
58 fn test_strip_utf8_bom_str_only_bom() {
59 let s = "\u{FEFF}";
60 assert_eq!(strip_utf8_bom_str(s), "");
61 }
62
63 #[test]
64 fn test_bom_character_is_not_whitespace() {
65 let s = "\u{FEFF}Hello";
66 assert_ne!(s.trim(), "Hello");
67 assert_eq!(strip_utf8_bom_str(s), "Hello");
68 }
69
70 #[test]
71 fn test_is_source_rust() {
72 assert!(is_source(&PathBuf::from("test.rs")));
73 assert!(is_source(&PathBuf::from("TEST.RS")));
74 }
75
76 #[test]
77 fn test_is_source_python() {
78 assert!(is_source(&PathBuf::from("script.py")));
79 }
80
81 #[test]
82 fn test_is_source_javascript() {
83 assert!(is_source(&PathBuf::from("app.js")));
84 }
85
86 #[test]
87 fn test_is_source_c() {
88 assert!(is_source(&PathBuf::from("options.c")));
89 assert!(is_source(&PathBuf::from("OPTIONS.C")));
90 }
91
92 #[test]
93 fn test_is_source_not_source() {
94 assert!(!is_source(&PathBuf::from("README.md")));
95 assert!(!is_source(&PathBuf::from("data.json")));
96 assert!(!is_source(&PathBuf::from("config.yaml")));
97 }
98
99 #[test]
100 fn test_is_source_no_extension() {
101 assert!(!is_source(&PathBuf::from("Makefile")));
102 }
103
104 #[test]
105 fn test_remove_verbatim_escape_sequences_basic() {
106 let input = "line1\\nline2\\rline3\\tline4";
107 let output = remove_verbatim_escape_sequences(input);
108 assert_eq!(output, "line1 line2 line3 line4");
109 }
110
111 #[test]
112 fn test_remove_verbatim_escape_sequences_only_backslash_n() {
113 let input = "hello\\nworld";
114 let output = remove_verbatim_escape_sequences(input);
115 assert_eq!(output, "hello world");
116 }
117
118 #[test]
119 fn test_remove_verbatim_escape_sequences_no_escapes() {
120 let input = "normal text without escapes";
121 let output = remove_verbatim_escape_sequences(input);
122 assert_eq!(output, input);
123 }
124
125 #[test]
126 fn test_remove_verbatim_escape_sequences_actual_newline() {
127 let input = "line1\nline2";
128 let output = remove_verbatim_escape_sequences(input);
129 assert_eq!(output, "line1\nline2");
130 }
131
132 #[test]
133 fn test_remove_verbatim_escape_sequences_multiple() {
134 let input = "a\\nb\\nc\\n";
135 let output = remove_verbatim_escape_sequences(input);
136 assert_eq!(output, "a b c ");
137 }
138
139 #[test]
140 fn test_remove_verbatim_escape_sequences_options_c_sample() {
141 let input = "Try `progname --help' for more information.\\n";
142 let output = remove_verbatim_escape_sequences(input);
143 assert_eq!(output, "Try `progname --help' for more information. ");
144 }
145
146 #[test]
147 fn test_is_source_options_c() {
148 let path = PathBuf::from("testdata/license-golden/datadriven/lic2/regression/options.c");
149 assert!(
150 is_source(&path),
151 "options.c should be recognized as source file"
152 );
153 }
154}