qlty_analysis/code/
language_detector.rs

1// From: https://github.com/monkslc/hyperpolyglot
2
3use lazy_static::lazy_static;
4use regex::Regex;
5use std::{collections::HashMap, fs::File as FS_File, io::BufReader, path::Path};
6
7pub fn detect(
8    path: &Path,
9    interpreters: &HashMap<String, Vec<String>>,
10) -> Result<String, Box<dyn std::error::Error>> {
11    let file = FS_File::open(path)?;
12    let mut reader = BufReader::new(file);
13    let language = get_language_from_shebang(&mut reader, interpreters)?;
14
15    if crate::lang::from_str(&language).is_some() {
16        return Ok(language);
17    }
18
19    Ok("".to_string())
20}
21
22pub fn get_language_from_shebang<R: std::io::BufRead>(
23    reader: R,
24    interpreters: &HashMap<String, Vec<String>>,
25) -> Result<String, std::io::Error> {
26    let mut lines = reader.lines();
27    let shebang_line = match lines.next() {
28        Some(line) => line,
29        None => return Ok("".to_string()),
30    }?;
31    let mut extra_content = String::new();
32
33    if !shebang_line.starts_with("#!") {
34        return Ok("".to_string());
35    }
36
37    let language = shebang_line
38        .split('/')
39        .last()
40        .and_then(|interpreter_line| {
41            let mut splits = interpreter_line.split_whitespace();
42            match splits.next() {
43                // #!/usr/bin/env python
44                Some("env") => splits.next(),
45                // #!/usr/bin/sh [exec scala "$0" "$@"]
46                Some("sh") => {
47                    let lines: Vec<String> = lines.take(4).filter_map(|line| line.ok()).collect();
48                    extra_content = lines.join("\n");
49                    lazy_static! {
50                        static ref SHEBANG_HACK_RE: Regex =
51                            Regex::new(r"exec (\w+).+\$0.+\$@").unwrap();
52                    }
53                    let interpreter = SHEBANG_HACK_RE
54                        .captures(&extra_content[..])
55                        .and_then(|captures| captures.get(1))
56                        .map(|interpreter| interpreter.as_str())
57                        .unwrap_or("sh");
58                    Some(interpreter)
59                }
60                // #!/usr/bin/python
61                Some(interpreter) => Some(interpreter),
62                // #!
63                None => None,
64            }
65        })
66        .and_then(|interpreter| {
67            // #!/usr/bin/python2.6.3 -> #!/usr/bin/python2
68            lazy_static! {
69                static ref RE: Regex = Regex::new(r"[0-9]\.[0-9]").unwrap();
70            }
71            let parsed_interpreter = RE.split(interpreter).next().unwrap();
72            interpreters
73                .iter()
74                .find(|(_, interpreters)| interpreters.contains(&parsed_interpreter.to_string()))
75                .map(|(lang, _)| lang.to_string())
76        });
77
78    match language {
79        Some(language) => Ok(language),
80        None => Ok("".to_string()),
81    }
82}
83
84#[cfg(test)]
85mod test {
86    use super::*;
87    use qlty_config::config::Builder;
88    use std::{
89        collections::HashMap,
90        env,
91        fs::{self, File},
92        io::{Cursor, Write},
93        path::PathBuf,
94    };
95
96    #[test]
97    fn test_detect_get_language_by_shebang() {
98        let mut temp_file_path = PathBuf::from(env::temp_dir());
99        temp_file_path.push("temp_shebang_file");
100        {
101            let mut temp_file =
102                File::create(&temp_file_path).expect("Failed to create temporary file");
103            writeln!(temp_file, "#!/usr/bin/env node").expect("Failed to write to temporary file");
104        }
105
106        let result = detect(&temp_file_path, &mock_interpreters());
107
108        fs::remove_file(&temp_file_path).expect("Failed to delete temporary file");
109
110        assert_eq!(result.unwrap().as_str(), "javascript");
111    }
112
113    #[test]
114    fn test_detect_get_unsupported_language() {
115        let mut temp_file_path = PathBuf::from(env::temp_dir());
116        temp_file_path.push("temp_unsupported_language_file");
117        {
118            let mut temp_file =
119                File::create(&temp_file_path).expect("Failed to create temporary file");
120            writeln!(temp_file, "#!/usr/bin/env haskell")
121                .expect("Failed to write to temporary file");
122        }
123
124        let result = detect(&temp_file_path, &mock_interpreters());
125
126        fs::remove_file(&temp_file_path).expect("Failed to delete temporary file");
127
128        assert_eq!(result.unwrap().as_str(), "");
129    }
130
131    #[test]
132    fn test_shebang_get_language() {
133        assert_eq!(
134            get_language_from_shebang(Cursor::new("#!/usr/bin/python"), &mock_interpreters())
135                .unwrap(),
136            "python"
137        );
138    }
139    #[test]
140    fn test_shebang_get_language_env() {
141        assert_eq!(
142            get_language_from_shebang(Cursor::new("#!/usr/bin/env node"), &mock_interpreters())
143                .unwrap(),
144            "javascript"
145        );
146    }
147
148    #[test]
149    fn test_shebang_get_language_with_minor_version() {
150        assert_eq!(
151            get_language_from_shebang(Cursor::new("#!/usr/bin/python2.6"), &mock_interpreters())
152                .unwrap(),
153            "python"
154        );
155    }
156
157    #[test]
158    fn test_shebang_empty_cases() {
159        assert_eq!(
160            get_language_from_shebang(Cursor::new("#!/usr/bin/env"), &mock_interpreters()).unwrap(),
161            ""
162        );
163        assert_eq!(
164            get_language_from_shebang(Cursor::new("#!/usr/bin/parrot"), &mock_interpreters())
165                .unwrap(),
166            ""
167        );
168        assert_eq!(
169            get_language_from_shebang(Cursor::new("#!"), &mock_interpreters()).unwrap(),
170            ""
171        );
172        assert_eq!(
173            get_language_from_shebang(Cursor::new(""), &mock_interpreters()).unwrap(),
174            ""
175        );
176        assert_eq!(
177            get_language_from_shebang(Cursor::new("aslkdfjas;ldk"), &mock_interpreters()).unwrap(),
178            ""
179        );
180        assert_eq!(
181            get_language_from_shebang(Cursor::new(" #!/usr/bin/python"), &mock_interpreters())
182                .unwrap(),
183            ""
184        );
185        assert_eq!(
186            get_language_from_shebang(Cursor::new(" #!/usr/bin/ "), &mock_interpreters()).unwrap(),
187            ""
188        );
189        assert_eq!(
190            get_language_from_shebang(Cursor::new(" #!/usr/bin"), &mock_interpreters()).unwrap(),
191            ""
192        );
193        assert_eq!(
194            get_language_from_shebang(Cursor::new(" #!/usr/bin"), &mock_interpreters()).unwrap(),
195            ""
196        );
197        assert_eq!(
198            get_language_from_shebang(Cursor::new(""), &mock_interpreters()).unwrap(),
199            ""
200        );
201    }
202
203    #[test]
204    fn test_shebang_hack() {
205        let content = Cursor::new(
206            r#"#!/bin/sh
207               exec python "$0" "$@"
208               !#
209            "#,
210        );
211
212        assert_eq!(
213            get_language_from_shebang(content, &mock_interpreters()).unwrap(),
214            "python"
215        );
216    }
217
218    fn mock_interpreters() -> HashMap<String, Vec<String>> {
219        let mut map = HashMap::new();
220
221        Builder::default_config()
222            .unwrap()
223            .file_types
224            .iter()
225            .for_each(|(lang, file_type)| {
226                let _ = &file_type.interpreters.iter().for_each(|interpreter| {
227                    map.entry(lang.clone())
228                        .or_insert_with(Vec::new)
229                        .push(interpreter.clone());
230                });
231            });
232        map
233    }
234}