rust_code_analysis/
tools.rs1use std::cmp::Ordering;
2use std::collections::HashMap;
3use std::fs::{self, File};
4use std::io::{Read, Write};
5use std::path::{Component, Path, PathBuf};
6
7use lazy_static::lazy_static;
8use regex::bytes::Regex;
9
10use crate::langs::fake;
11use crate::langs::*;
12
13pub fn read_file(path: &Path) -> std::io::Result<Vec<u8>> {
26 let mut file = File::open(path)?;
27 let mut data = Vec::new();
28 file.read_to_end(&mut data)?;
29
30 remove_blank_lines(&mut data);
31
32 Ok(data)
33}
34
35pub fn read_file_with_eol(path: &Path) -> std::io::Result<Option<Vec<u8>>> {
48 let file_size = fs::metadata(path).map_or(1024 * 1024, |m| m.len() as usize);
49 if file_size <= 3 {
50 return Ok(None);
52 }
53
54 let mut file = File::open(path)?;
55
56 let mut start = vec![0; 64.min(file_size)];
57 let start = if file.read_exact(&mut start).is_ok() {
58 if start[..2] == [b'\xFE', b'\xFF'] || start[..2] == [b'\xFF', b'\xFE'] {
60 &start[2..]
61 } else if start[..3] == [b'\xEF', b'\xBB', b'\xBF'] {
62 &start[3..]
63 } else {
64 &start
65 }
66 } else {
67 return Ok(None);
68 };
69
70 let mut head = String::from_utf8_lossy(start).into_owned();
72 head.pop();
74 if head.contains('\u{FFFD}') {
76 return Ok(None);
77 }
78
79 let mut data = Vec::with_capacity(file_size + 2);
80 data.extend_from_slice(start);
81
82 file.read_to_end(&mut data)?;
83
84 remove_blank_lines(&mut data);
85
86 Ok(Some(data))
87}
88
89pub fn write_file(path: &Path, data: &[u8]) -> std::io::Result<()> {
103 let mut file = File::create(path)?;
104 file.write_all(data)?;
105
106 Ok(())
107}
108
109pub fn get_language_for_file(path: &Path) -> Option<LANG> {
123 if let Some(ext) = path.extension() {
124 let ext = ext.to_str().unwrap().to_lowercase();
125 get_from_ext(&ext)
126 } else {
127 None
128 }
129}
130
131fn mode_to_str(mode: &[u8]) -> Option<String> {
132 std::str::from_utf8(mode).ok().map(|m| m.to_lowercase())
133}
134
135fn get_emacs_mode(buf: &[u8]) -> Option<String> {
136 lazy_static! {
138 static ref RE1_EMACS: Regex = Regex::new(r"(?i)-\*-.*[^-\w]mode\s*:\s*([^:;\s]+)").unwrap();
140 static ref RE2_EMACS: Regex = Regex::new(r"-\*-\s*([^:;\s]+)\s*-\*-").unwrap();
141 static ref RE1_VIM: Regex = Regex::new(r"(?i)vim\s*:.*[^\w]ft\s*=\s*([^:\s]+)").unwrap();
142 }
143
144 for (i, line) in buf.splitn(5, |c| *c == b'\n').enumerate() {
145 if let Some(cap) = RE1_EMACS.captures_iter(line).next() {
146 return mode_to_str(&cap[1]);
147 } else if let Some(cap) = RE2_EMACS.captures_iter(line).next() {
148 return mode_to_str(&cap[1]);
149 } else if let Some(cap) = RE1_VIM.captures_iter(line).next() {
150 return mode_to_str(&cap[1]);
151 }
152 if i == 3 {
153 break;
154 }
155 }
156
157 for (i, line) in buf.rsplitn(5, |c| *c == b'\n').enumerate() {
158 if let Some(cap) = RE1_VIM.captures_iter(line).next() {
159 return mode_to_str(&cap[1]);
160 }
161 if i == 3 {
162 break;
163 }
164 }
165
166 None
167}
168
169pub fn guess_language<P: AsRef<Path>>(buf: &[u8], path: P) -> (Option<LANG>, String) {
193 let ext = path
194 .as_ref()
195 .extension()
196 .map(|e| e.to_str().unwrap())
197 .map(|e| e.to_lowercase())
198 .unwrap_or_else(|| "".to_string());
199 let from_ext = get_from_ext(&ext);
200
201 let mode = get_emacs_mode(buf).unwrap_or_default();
202
203 let from_mode = get_from_emacs_mode(&mode);
204
205 if let Some(lang_ext) = from_ext {
206 if let Some(lang_mode) = from_mode {
207 if lang_ext == lang_mode {
208 (
209 Some(lang_mode),
210 fake::get_true(&ext, &mode).unwrap_or_else(|| lang_mode.get_name().to_string()),
211 )
212 } else {
213 (Some(lang_ext), lang_ext.get_name().to_string())
215 }
216 } else {
217 (
218 Some(lang_ext),
219 fake::get_true(&ext, &mode).unwrap_or_else(|| lang_ext.get_name().to_string()),
220 )
221 }
222 } else if let Some(lang_mode) = from_mode {
223 (
224 Some(lang_mode),
225 fake::get_true(&ext, &mode).unwrap_or_else(|| lang_mode.get_name().to_string()),
226 )
227 } else {
228 (None, fake::get_true(&ext, &mode).unwrap_or_default())
229 }
230}
231
232pub(crate) fn remove_blank_lines(data: &mut Vec<u8>) {
234 let count_trailing = data
235 .iter()
236 .rev()
237 .take_while(|&c| (*c == b'\n' || *c == b'\r'))
238 .count();
239 if count_trailing > 0 {
240 data.truncate(data.len() - count_trailing);
241 }
242 data.push(b'\n');
243}
244
245pub(crate) fn normalize_path<P: AsRef<Path>>(path: P) -> PathBuf {
246 let mut components = path.as_ref().components().peekable();
248 let mut ret = if let Some(c @ Component::Prefix(..)) = components.peek().cloned() {
249 components.next();
250 PathBuf::from(c.as_os_str())
251 } else {
252 PathBuf::new()
253 };
254
255 for component in components {
256 match component {
257 Component::Prefix(..) => unreachable!(),
258 Component::RootDir => {
259 ret.push(component.as_os_str());
260 }
261 Component::CurDir => {}
262 Component::ParentDir => {
263 ret.pop();
264 }
265 Component::Normal(c) => {
266 ret.push(c);
267 }
268 }
269 }
270 ret
271}
272
273pub(crate) fn get_paths_dist(path1: &Path, path2: &Path) -> Option<usize> {
274 for ancestor in path1.ancestors() {
275 if path2.starts_with(ancestor) && !ancestor.as_os_str().is_empty() {
276 let path1 = path1.strip_prefix(ancestor).unwrap();
277 let path2 = path2.strip_prefix(ancestor).unwrap();
278 return Some(path1.components().count() + path2.components().count());
279 }
280 }
281 None
282}
283
284pub(crate) fn guess_file<S: ::std::hash::BuildHasher>(
285 current_path: &Path,
286 include_path: &str,
287 all_files: &HashMap<String, Vec<PathBuf>, S>,
288) -> Vec<PathBuf> {
289 let include_path = if let Some(end) = include_path.strip_prefix("mozilla/") {
291 end
292 } else {
293 include_path
294 };
295 let include_path = normalize_path(include_path);
296 if let Some(possibilities) = all_files.get(include_path.file_name().unwrap().to_str().unwrap())
297 {
298 if possibilities.len() == 1 {
299 return possibilities.clone();
301 }
302
303 let mut new_possibilities = Vec::new();
304 for p in possibilities.iter() {
305 if p.ends_with(&include_path) && current_path != p {
306 new_possibilities.push(p.clone());
307 }
308 }
309 if new_possibilities.len() == 1 {
310 return new_possibilities;
312 }
313 new_possibilities.clear();
314
315 if let Some(parent) = current_path.parent() {
316 for p in possibilities.iter() {
317 if p.starts_with(parent) && current_path != p {
318 new_possibilities.push(p.clone());
319 }
320 }
321 if new_possibilities.len() == 1 {
322 return new_possibilities;
324 }
325 new_possibilities.clear();
326 }
327
328 let mut dist_min = std::usize::MAX;
329 let mut path_min = Vec::new();
330 for p in possibilities.iter() {
331 if current_path == p {
332 continue;
333 }
334 if let Some(dist) = get_paths_dist(current_path, p) {
335 match dist.cmp(&dist_min) {
336 Ordering::Less => {
337 dist_min = dist;
338 path_min.clear();
339 path_min.push(p);
340 }
341 Ordering::Equal => {
342 path_min.push(p);
343 }
344 Ordering::Greater => {}
345 }
346 }
347 }
348
349 let path_min: Vec<_> = path_min.drain(..).map(|p| p.to_path_buf()).collect();
350 return path_min;
351 }
352
353 vec![]
354}
355
356#[cfg(test)]
357mod tests {
358 use pretty_assertions::assert_eq;
359
360 use super::*;
361
362 #[test]
363 fn test_read() {
364 let tmp_dir = std::env::temp_dir();
365 let tmp_path = tmp_dir.join("test_read");
366 let data = vec![
367 (b"\xFF\xFEabc".to_vec(), Some(b"abc\n".to_vec())),
368 (b"\xFE\xFFabc".to_vec(), Some(b"abc\n".to_vec())),
369 (b"\xEF\xBB\xBFabc".to_vec(), Some(b"abc\n".to_vec())),
370 (b"\xEF\xBB\xBFabc\n".to_vec(), Some(b"abc\n".to_vec())),
371 (b"\xEF\xBBabc\n".to_vec(), None),
372 (b"abcdef\n".to_vec(), Some(b"abcdef\n".to_vec())),
373 (b"abcdef".to_vec(), Some(b"abcdef\n".to_vec())),
374 ];
375 for (d, expected) in data {
376 write_file(&tmp_path, &d).unwrap();
377 let res = read_file_with_eol(&tmp_path).unwrap();
378 assert_eq!(res, expected);
379 }
380 }
381
382 #[test]
383 fn test_guess_language() {
384 let buf = b"// -*- foo: bar; mode: c++; hello: world\n";
385 assert_eq!(
386 guess_language(buf, "foo.cpp"),
387 (Some(LANG::Cpp), "c/c++".to_string())
388 );
389
390 let buf = b"// -*- c++ -*-\n";
391 assert_eq!(
392 guess_language(buf, "foo.cpp"),
393 (Some(LANG::Cpp), "c/c++".to_string())
394 );
395
396 let buf = b"// -*- foo: bar; bar-mode: c++; hello: world\n";
397 assert_eq!(
398 guess_language(buf, "foo.py"),
399 (Some(LANG::Python), "python".to_string())
400 );
401
402 let buf = b"/* hello world */\n";
403 assert_eq!(
404 guess_language(buf, "foo.cpp"),
405 (Some(LANG::Cpp), "c/c++".to_string())
406 );
407
408 let buf = b"\n\n\n\n\n\n\n\n\n// vim: set ts=4 ft=c++\n\n\n";
409 assert_eq!(
410 guess_language(buf, "foo.c"),
411 (Some(LANG::Cpp), "c/c++".to_string())
412 );
413
414 let buf = b"\n\n\n\n\n\n\n\n\n\n\n\n";
415 assert_eq!(guess_language(buf, "foo.txt"), (None, "".to_string()));
416
417 let buf = b"// -*- foo: bar; mode: Objective-C++; hello: world\n";
418 assert_eq!(
419 guess_language(buf, "foo.mm"),
420 (Some(LANG::Cpp), "obj-c/c++".to_string())
421 );
422 }
423}