qlty_analysis/code/
language_detector.rs1use lazy_static::lazy_static;
4use regex::Regex;
5use std::{collections::HashMap, fs::File as FS_File, io::BufReader, path::Path};
6
7pub fn detect(
8 path: &Path,
9 interpreters: &HashMap<String, Vec<String>>,
10) -> Result<String, Box<dyn std::error::Error>> {
11 let file = FS_File::open(path)?;
12 let mut reader = BufReader::new(file);
13 let language = get_language_from_shebang(&mut reader, interpreters)?;
14
15 if crate::lang::from_str(&language).is_some() {
16 return Ok(language);
17 }
18
19 Ok("".to_string())
20}
21
22pub fn get_language_from_shebang<R: std::io::BufRead>(
23 reader: R,
24 interpreters: &HashMap<String, Vec<String>>,
25) -> Result<String, std::io::Error> {
26 let mut lines = reader.lines();
27 let shebang_line = match lines.next() {
28 Some(line) => line,
29 None => return Ok("".to_string()),
30 }?;
31 let mut extra_content = String::new();
32
33 if !shebang_line.starts_with("#!") {
34 return Ok("".to_string());
35 }
36
37 let language = shebang_line
38 .split('/')
39 .last()
40 .and_then(|interpreter_line| {
41 let mut splits = interpreter_line.split_whitespace();
42 match splits.next() {
43 Some("env") => splits.next(),
45 Some("sh") => {
47 let lines: Vec<String> = lines.take(4).filter_map(|line| line.ok()).collect();
48 extra_content = lines.join("\n");
49 lazy_static! {
50 static ref SHEBANG_HACK_RE: Regex =
51 Regex::new(r"exec (\w+).+\$0.+\$@").unwrap();
52 }
53 let interpreter = SHEBANG_HACK_RE
54 .captures(&extra_content[..])
55 .and_then(|captures| captures.get(1))
56 .map(|interpreter| interpreter.as_str())
57 .unwrap_or("sh");
58 Some(interpreter)
59 }
60 Some(interpreter) => Some(interpreter),
62 None => None,
64 }
65 })
66 .and_then(|interpreter| {
67 lazy_static! {
69 static ref RE: Regex = Regex::new(r"[0-9]\.[0-9]").unwrap();
70 }
71 let parsed_interpreter = RE.split(interpreter).next().unwrap();
72 interpreters
73 .iter()
74 .find(|(_, interpreters)| interpreters.contains(&parsed_interpreter.to_string()))
75 .map(|(lang, _)| lang.to_string())
76 });
77
78 match language {
79 Some(language) => Ok(language),
80 None => Ok("".to_string()),
81 }
82}
83
84#[cfg(test)]
85mod test {
86 use super::*;
87 use qlty_config::config::Builder;
88 use std::{
89 collections::HashMap,
90 env,
91 fs::{self, File},
92 io::{Cursor, Write},
93 path::PathBuf,
94 };
95
96 #[test]
97 fn test_detect_get_language_by_shebang() {
98 let mut temp_file_path = PathBuf::from(env::temp_dir());
99 temp_file_path.push("temp_shebang_file");
100 {
101 let mut temp_file =
102 File::create(&temp_file_path).expect("Failed to create temporary file");
103 writeln!(temp_file, "#!/usr/bin/env node").expect("Failed to write to temporary file");
104 }
105
106 let result = detect(&temp_file_path, &mock_interpreters());
107
108 fs::remove_file(&temp_file_path).expect("Failed to delete temporary file");
109
110 assert_eq!(result.unwrap().as_str(), "javascript");
111 }
112
113 #[test]
114 fn test_detect_get_unsupported_language() {
115 let mut temp_file_path = PathBuf::from(env::temp_dir());
116 temp_file_path.push("temp_unsupported_language_file");
117 {
118 let mut temp_file =
119 File::create(&temp_file_path).expect("Failed to create temporary file");
120 writeln!(temp_file, "#!/usr/bin/env haskell")
121 .expect("Failed to write to temporary file");
122 }
123
124 let result = detect(&temp_file_path, &mock_interpreters());
125
126 fs::remove_file(&temp_file_path).expect("Failed to delete temporary file");
127
128 assert_eq!(result.unwrap().as_str(), "");
129 }
130
131 #[test]
132 fn test_shebang_get_language() {
133 assert_eq!(
134 get_language_from_shebang(Cursor::new("#!/usr/bin/python"), &mock_interpreters())
135 .unwrap(),
136 "python"
137 );
138 }
139 #[test]
140 fn test_shebang_get_language_env() {
141 assert_eq!(
142 get_language_from_shebang(Cursor::new("#!/usr/bin/env node"), &mock_interpreters())
143 .unwrap(),
144 "javascript"
145 );
146 }
147
148 #[test]
149 fn test_shebang_get_language_with_minor_version() {
150 assert_eq!(
151 get_language_from_shebang(Cursor::new("#!/usr/bin/python2.6"), &mock_interpreters())
152 .unwrap(),
153 "python"
154 );
155 }
156
157 #[test]
158 fn test_shebang_empty_cases() {
159 assert_eq!(
160 get_language_from_shebang(Cursor::new("#!/usr/bin/env"), &mock_interpreters()).unwrap(),
161 ""
162 );
163 assert_eq!(
164 get_language_from_shebang(Cursor::new("#!/usr/bin/parrot"), &mock_interpreters())
165 .unwrap(),
166 ""
167 );
168 assert_eq!(
169 get_language_from_shebang(Cursor::new("#!"), &mock_interpreters()).unwrap(),
170 ""
171 );
172 assert_eq!(
173 get_language_from_shebang(Cursor::new(""), &mock_interpreters()).unwrap(),
174 ""
175 );
176 assert_eq!(
177 get_language_from_shebang(Cursor::new("aslkdfjas;ldk"), &mock_interpreters()).unwrap(),
178 ""
179 );
180 assert_eq!(
181 get_language_from_shebang(Cursor::new(" #!/usr/bin/python"), &mock_interpreters())
182 .unwrap(),
183 ""
184 );
185 assert_eq!(
186 get_language_from_shebang(Cursor::new(" #!/usr/bin/ "), &mock_interpreters()).unwrap(),
187 ""
188 );
189 assert_eq!(
190 get_language_from_shebang(Cursor::new(" #!/usr/bin"), &mock_interpreters()).unwrap(),
191 ""
192 );
193 assert_eq!(
194 get_language_from_shebang(Cursor::new(" #!/usr/bin"), &mock_interpreters()).unwrap(),
195 ""
196 );
197 assert_eq!(
198 get_language_from_shebang(Cursor::new(""), &mock_interpreters()).unwrap(),
199 ""
200 );
201 }
202
203 #[test]
204 fn test_shebang_hack() {
205 let content = Cursor::new(
206 r#"#!/bin/sh
207 exec python "$0" "$@"
208 !#
209 "#,
210 );
211
212 assert_eq!(
213 get_language_from_shebang(content, &mock_interpreters()).unwrap(),
214 "python"
215 );
216 }
217
218 fn mock_interpreters() -> HashMap<String, Vec<String>> {
219 let mut map = HashMap::new();
220
221 Builder::default_config()
222 .unwrap()
223 .file_types
224 .iter()
225 .for_each(|(lang, file_type)| {
226 let _ = &file_type.interpreters.iter().for_each(|interpreter| {
227 map.entry(lang.clone())
228 .or_insert_with(Vec::new)
229 .push(interpreter.clone());
230 });
231 });
232 map
233 }
234}