1#![allow(clippy::wildcard_imports, clippy::enum_glob_use)]
8#![allow(
15 clippy::cast_precision_loss,
16 clippy::cast_possible_truncation,
17 clippy::cast_sign_loss
18)]
19
20use std::cmp::Ordering;
21use std::collections::HashMap;
22use std::fs::{self, File};
23use std::io::{Read, Write};
24use std::path::{Component, Path, PathBuf};
25use std::sync::OnceLock;
26
27use regex::bytes::Regex;
28use termcolor::{Color, ColorSpec, StandardStreamLock, WriteColor};
29
30use crate::langs::fake;
31use crate::langs::*;
32
33pub fn read_file(path: &Path) -> std::io::Result<Vec<u8>> {
57 let mut file = File::open(path)?;
58 let mut data = Vec::new();
59 file.read_to_end(&mut data)?;
60
61 normalize_line_endings(&mut data);
62
63 Ok(data)
64}
65
66pub fn read_file_with_eol(path: &Path) -> std::io::Result<Option<Vec<u8>>> {
88 let file_size = fs::metadata(path).map_or(1024 * 1024, |m| m.len() as usize);
89 if file_size <= 3 {
90 return Ok(None);
92 }
93
94 let mut file = File::open(path)?;
95
96 let mut start = vec![0; 64.min(file_size)];
97 let start = if file.read_exact(&mut start).is_ok() {
98 if start[..2] == [b'\xFE', b'\xFF'] || start[..2] == [b'\xFF', b'\xFE'] {
100 &start[2..]
101 } else if start[..3] == [b'\xEF', b'\xBB', b'\xBF'] {
102 &start[3..]
103 } else {
104 &start
105 }
106 } else {
107 return Ok(None);
108 };
109
110 let mut head = String::from_utf8_lossy(start).into_owned();
112 head.pop();
114 if head.contains('\u{FFFD}') {
116 return Ok(None);
117 }
118
119 let mut data = Vec::with_capacity(file_size + 2);
120 data.extend_from_slice(start);
121
122 file.read_to_end(&mut data)?;
123
124 normalize_line_endings(&mut data);
125
126 Ok(Some(data))
127}
128
129pub fn write_file(path: &Path, data: &[u8]) -> std::io::Result<()> {
149 let mut file = File::create(path)?;
150 file.write_all(data)?;
151
152 Ok(())
153}
154
155#[must_use]
169pub fn get_language_for_file(path: &Path) -> Option<LANG> {
170 if let Some(ext) = path.extension() {
171 let ext = ext.to_str()?.to_lowercase();
172 get_from_ext(&ext)
173 } else {
174 None
175 }
176}
177
178fn mode_to_str(mode: &[u8]) -> Option<String> {
179 std::str::from_utf8(mode).ok().map(str::to_lowercase)
180}
181
182static RE1_EMACS: OnceLock<Regex> = OnceLock::new();
184static RE2_EMACS: OnceLock<Regex> = OnceLock::new();
185static RE1_VIM: OnceLock<Regex> = OnceLock::new();
186static RE_GENERATED: OnceLock<Regex> = OnceLock::new();
187
188const FIRST_EMACS_EXPRESSION: &str = r"(?i)-\*-.*[^-\w]mode\s*:\s*([^:;\s]+)";
190const SECOND_EMACS_EXPRESSION: &str = r"-\*-\s*([^:;\s]+)\s*-\*-";
191const VIM_EXPRESSION: &str = r"(?i)vim\s*:.*[^\w]ft\s*=\s*([^:\s]+)";
192
193const GENERATED_EXPRESSION: &str = r"(?i)@generated\b|DO NOT EDIT|GENERATED CODE";
205
206const GENERATED_SCAN_BYTES: usize = 5 * 1024;
210const GENERATED_SCAN_LINES: usize = 50;
213
214pub fn is_generated(buf: &[u8]) -> bool {
249 let buf = buf.strip_prefix(b"\xEF\xBB\xBF").unwrap_or(buf);
256
257 let cap = buf.len().min(GENERATED_SCAN_BYTES);
262 let end = buf[..cap]
263 .iter()
264 .enumerate()
265 .filter_map(|(i, &b)| (b == b'\n').then_some(i + 1))
266 .nth(GENERATED_SCAN_LINES - 1)
267 .unwrap_or(cap);
268 let window = &buf[..end];
269
270 RE_GENERATED
271 .get_or_init(|| {
272 Regex::new(GENERATED_EXPRESSION).expect("GENERATED_EXPRESSION is a constant regex")
273 })
274 .is_match(window)
275}
276
277#[inline]
278fn get_regex<'a>(
279 once_lock: &OnceLock<Regex>,
280 line: &'a [u8],
281 regex: &'a str,
282) -> Option<regex::bytes::Captures<'a>> {
283 once_lock
284 .get_or_init(|| Regex::new(regex).unwrap())
285 .captures_iter(line)
286 .next()
287}
288
289fn get_shebang_lang(buf: &[u8]) -> Option<LANG> {
298 let rest = buf.strip_prefix(b"#!")?;
301 let line_end = rest.iter().position(|&b| b == b'\n').unwrap_or(rest.len());
302 let line = &rest[..line_end];
303 let line = line.strip_suffix(b"\r").unwrap_or(line);
306 let line = std::str::from_utf8(line).ok()?;
307
308 let mut tokens = line.split_ascii_whitespace();
309 let first_base = basename(tokens.next()?);
310
311 let interpreter = if first_base == "env" {
312 skip_env_args(&mut tokens)?
313 } else {
314 first_base
315 };
316
317 get_from_interpreter(strip_version_suffix(interpreter))
318}
319
320fn skip_env_args<'a>(tokens: &mut std::str::SplitAsciiWhitespace<'a>) -> Option<&'a str> {
325 loop {
326 let tok = tokens.next()?;
327 if let Some(flag) = tok.strip_prefix('-') {
328 if flag == "u" {
329 tokens.next()?;
330 }
331 continue;
332 }
333 if tok.contains('=') {
334 continue;
335 }
336 return Some(basename(tok));
337 }
338}
339
340fn basename(path: &str) -> &str {
341 path.rsplit_once('/').map_or(path, |(_, name)| name)
342}
343
344fn strip_version_suffix(name: &str) -> &str {
347 let trimmed = name.trim_end_matches(|c: char| c.is_ascii_digit() || c == '.');
348 if trimmed.is_empty() { name } else { trimmed }
349}
350
351fn get_from_interpreter(name: &str) -> Option<LANG> {
352 match name {
353 "sh" | "bash" | "dash" | "ksh" | "zsh" => Some(LANG::Bash),
354 "python" => Some(LANG::Python),
355 "perl" => Some(LANG::Perl),
356 "lua" | "luajit" => Some(LANG::Lua),
357 "php" | "php-cgi" => Some(LANG::Php),
358 "node" | "nodejs" => Some(LANG::Javascript),
359 "tclsh" | "wish" => Some(LANG::Tcl),
360 "ruby" => Some(LANG::Ruby),
361 "elixir" | "iex" => Some(LANG::Elixir),
362 _ => None,
363 }
364}
365
366fn get_emacs_mode(buf: &[u8]) -> Option<String> {
367 for (i, line) in buf.splitn(5, |c| *c == b'\n').enumerate() {
369 if let Some(cap) = get_regex(&RE1_EMACS, line, FIRST_EMACS_EXPRESSION) {
370 return mode_to_str(&cap[1]);
371 } else if let Some(cap) = get_regex(&RE2_EMACS, line, SECOND_EMACS_EXPRESSION) {
372 return mode_to_str(&cap[1]);
373 } else if let Some(cap) = get_regex(&RE1_VIM, line, VIM_EXPRESSION) {
374 return mode_to_str(&cap[1]);
375 }
376 if i == 3 {
377 break;
378 }
379 }
380
381 for (i, line) in buf.rsplitn(5, |c| *c == b'\n').enumerate() {
382 if let Some(cap) = get_regex(&RE1_VIM, line, VIM_EXPRESSION) {
383 return mode_to_str(&cap[1]);
384 }
385 if i == 3 {
386 break;
387 }
388 }
389
390 None
391}
392
393pub fn guess_language<'a, P: AsRef<Path>>(buf: &[u8], path: P) -> (Option<LANG>, &'a str) {
417 let ext = path
418 .as_ref()
419 .extension()
420 .and_then(|e| e.to_str())
421 .map(str::to_lowercase)
422 .unwrap_or_default();
423 let from_ext = get_from_ext(&ext);
424
425 let mode = get_emacs_mode(buf).unwrap_or_default();
426
427 let from_mode = get_from_emacs_mode(&mode);
428
429 if let Some(lang_ext) = from_ext {
430 if let Some(lang_mode) = from_mode {
431 if lang_ext == lang_mode {
432 (
433 Some(lang_mode),
434 fake::get_true(&ext, &mode).unwrap_or_else(|| lang_mode.get_name()),
435 )
436 } else {
437 (Some(lang_ext), lang_ext.get_name())
439 }
440 } else {
441 (
442 Some(lang_ext),
443 fake::get_true(&ext, &mode).unwrap_or_else(|| lang_ext.get_name()),
444 )
445 }
446 } else if let Some(lang_mode) = from_mode {
447 (
448 Some(lang_mode),
449 fake::get_true(&ext, &mode).unwrap_or_else(|| lang_mode.get_name()),
450 )
451 } else if let Some(lang_shebang) = get_shebang_lang(buf) {
452 (
453 Some(lang_shebang),
454 fake::get_true(&ext, &mode).unwrap_or_else(|| lang_shebang.get_name()),
455 )
456 } else {
457 (None, fake::get_true(&ext, &mode).unwrap_or_default())
458 }
459}
460
461pub(crate) fn normalize_line_endings(data: &mut Vec<u8>) {
464 let mut w = 0;
466 let mut r = 0;
467 while r < data.len() {
468 if data[r] == b'\r' {
469 data[w] = b'\n';
470 w += 1;
471 r += if data.get(r + 1).copied() == Some(b'\n') {
472 2
473 } else {
474 1
475 };
476 } else {
477 data[w] = data[r];
478 w += 1;
479 r += 1;
480 }
481 }
482 data.truncate(w);
483 let trailing = data.iter().rev().take_while(|&&c| c == b'\n').count();
484 data.truncate(data.len() - trailing);
485 data.push(b'\n');
486}
487
488pub(crate) fn normalize_path<P: AsRef<Path>>(path: P) -> PathBuf {
489 let mut components = path.as_ref().components().peekable();
491 let mut ret = if let Some(c @ Component::Prefix(..)) = components.peek().copied() {
492 components.next();
493 PathBuf::from(c.as_os_str())
494 } else {
495 PathBuf::new()
496 };
497
498 for component in components {
499 match component {
500 Component::Prefix(..) => unreachable!(),
501 Component::RootDir => {
502 ret.push(component.as_os_str());
503 }
504 Component::CurDir => {}
505 Component::ParentDir => {
506 ret.pop();
507 }
508 Component::Normal(c) => {
509 ret.push(c);
510 }
511 }
512 }
513 ret
514}
515
516pub(crate) fn get_paths_dist(path1: &Path, path2: &Path) -> Option<usize> {
517 for ancestor in path1.ancestors() {
518 if path2.starts_with(ancestor) && !ancestor.as_os_str().is_empty() {
519 let path1 = path1.strip_prefix(ancestor).unwrap();
520 let path2 = path2.strip_prefix(ancestor).unwrap();
521 return Some(path1.components().count() + path2.components().count());
522 }
523 }
524 None
525}
526
527pub(crate) fn guess_file<S: ::std::hash::BuildHasher>(
528 current_path: &Path,
529 include_path: &str,
530 all_files: &HashMap<String, Vec<PathBuf>, S>,
531) -> Vec<PathBuf> {
532 let include_path = include_path
533 .strip_prefix("mozilla/")
534 .unwrap_or(include_path);
535
536 let resolved_path = current_path
544 .parent()
545 .map(|parent| normalize_path(parent.join(include_path)));
546
547 let include_path = normalize_path(include_path);
548 let Some(file_name) = include_path.file_name() else {
549 return vec![];
550 };
551 let Some(file_name) = file_name.to_str() else {
552 return vec![];
553 };
554 if let Some(possibilities) = all_files.get(file_name) {
555 if possibilities.len() == 1 {
556 return possibilities.clone();
558 }
559
560 if let Some(resolved) = resolved_path.as_ref() {
565 fn unique_match<F: Fn(&PathBuf) -> bool>(
566 possibilities: &[PathBuf],
567 current_path: &Path,
568 pred: F,
569 ) -> Option<Vec<PathBuf>> {
570 let matched: Vec<PathBuf> = possibilities
571 .iter()
572 .filter(|p| current_path != p.as_path() && pred(p))
573 .cloned()
574 .collect();
575 (matched.len() == 1).then_some(matched)
576 }
577 if let Some(hit) = unique_match(possibilities, current_path, |p| p == resolved) {
578 return hit;
579 }
580 if let Some(hit) = unique_match(possibilities, current_path, |p| p.ends_with(resolved))
581 {
582 return hit;
583 }
584 }
585
586 let mut new_possibilities = Vec::new();
587 for p in possibilities {
588 if p.ends_with(&include_path) && current_path != p {
589 new_possibilities.push(p.clone());
590 }
591 }
592 if new_possibilities.len() == 1 {
593 return new_possibilities;
595 }
596 new_possibilities.clear();
597
598 if let Some(parent) = current_path.parent() {
599 for p in possibilities {
600 if p.starts_with(parent) && current_path != p {
601 new_possibilities.push(p.clone());
602 }
603 }
604 if new_possibilities.len() == 1 {
605 return new_possibilities;
607 }
608 new_possibilities.clear();
609 }
610
611 let mut dist_min = usize::MAX;
612 let mut path_min = Vec::new();
613 for p in possibilities {
614 if current_path == p {
615 continue;
616 }
617 if let Some(dist) = get_paths_dist(current_path, p) {
618 match dist.cmp(&dist_min) {
619 Ordering::Less => {
620 dist_min = dist;
621 path_min.clear();
622 path_min.push(p);
623 }
624 Ordering::Equal => {
625 path_min.push(p);
626 }
627 Ordering::Greater => {}
628 }
629 }
630 }
631
632 let path_min: Vec<_> = path_min.drain(..).cloned().collect();
633 return path_min;
634 }
635
636 vec![]
637}
638
639#[inline]
640pub(crate) fn color(stdout: &mut StandardStreamLock, color: Color) -> std::io::Result<()> {
641 stdout.set_color(ColorSpec::new().set_fg(Some(color)))
642}
643
644#[inline]
645pub(crate) fn intense_color(stdout: &mut StandardStreamLock, color: Color) -> std::io::Result<()> {
646 stdout.set_color(ColorSpec::new().set_fg(Some(color)).set_intense(true))
647}
648
649#[cfg(test)]
650pub(crate) fn check_func_space<T: crate::ParserTrait, F: Fn(crate::FuncSpace)>(
651 source: &str,
652 filename: &str,
653 check: F,
654) {
655 let path = std::path::PathBuf::from(filename);
656 let normalized = source.replace("\r\n", "\n").replace('\r', "\n");
658 let mut trimmed_bytes = normalized.trim_end().trim_matches('\n').as_bytes().to_vec();
659 trimmed_bytes.push(b'\n');
660 let parser = T::new(trimmed_bytes, &path, None);
661 #[allow(deprecated)]
662 let func_space = crate::metrics(&parser, &path).unwrap();
663
664 check(func_space);
665}
666
667#[cfg(test)]
668pub(crate) fn check_metrics<T: crate::ParserTrait>(
669 source: &str,
670 filename: &str,
671 check: fn(crate::CodeMetrics) -> (),
672) {
673 check_func_space::<T, _>(source, filename, |func_space| check(func_space.metrics));
674}
675
676#[cfg(test)]
684pub(crate) fn assert_child_space_kind(
685 func_space: &crate::FuncSpace,
686 name: &str,
687 expected: crate::SpaceKind,
688) {
689 let child = func_space
690 .spaces
691 .iter()
692 .find(|s| s.name.as_deref() == Some(name))
693 .unwrap_or_else(|| panic!("expected a child FuncSpace named {name:?}"));
694 assert_eq!(
695 child.kind, expected,
696 "child FuncSpace {name:?} kind: got {:?}, expected {:?}",
697 child.kind, expected,
698 );
699}
700
701#[cfg(test)]
702#[allow(
703 clippy::float_cmp,
704 clippy::cast_precision_loss,
705 clippy::cast_possible_truncation,
706 clippy::cast_sign_loss,
707 clippy::similar_names,
708 clippy::doc_markdown,
709 clippy::needless_raw_string_hashes,
710 clippy::too_many_lines
711)]
712mod tests {
713 use pretty_assertions::assert_eq;
714
715 use super::*;
716
717 #[test]
718 fn test_read() {
719 let tmp_dir = std::env::temp_dir();
720 let tmp_path = tmp_dir.join("test_read");
721 let data = vec![
722 (b"\xFF\xFEabc".to_vec(), Some(b"abc\n".to_vec())),
723 (b"\xFE\xFFabc".to_vec(), Some(b"abc\n".to_vec())),
724 (b"\xEF\xBB\xBFabc".to_vec(), Some(b"abc\n".to_vec())),
725 (b"\xEF\xBB\xBFabc\n".to_vec(), Some(b"abc\n".to_vec())),
726 (b"\xEF\xBBabc\n".to_vec(), None),
727 (b"abcdef\n".to_vec(), Some(b"abcdef\n".to_vec())),
728 (b"abcdef".to_vec(), Some(b"abcdef\n".to_vec())),
729 (b"abc\r\ndef\r\n".to_vec(), Some(b"abc\ndef\n".to_vec())),
731 (
733 b"\xEF\xBB\xBFabc\r\ndef\r\n".to_vec(),
734 Some(b"abc\ndef\n".to_vec()),
735 ),
736 ];
737 for (d, expected) in data {
738 write_file(&tmp_path, &d).unwrap();
739 let res = read_file_with_eol(&tmp_path).unwrap();
740 assert_eq!(res, expected);
741 }
742 }
743
744 #[cfg(unix)]
745 #[test]
746 fn test_get_language_for_file_non_utf8() {
747 use std::ffi::OsStr;
748 use std::os::unix::ffi::OsStrExt;
749
750 let path = Path::new(OsStr::from_bytes(b"foo.\xff"));
751 assert_eq!(get_language_for_file(path), None);
752 }
753
754 #[cfg(unix)]
755 #[test]
756 fn test_guess_language_non_utf8() {
757 use std::ffi::OsStr;
758 use std::os::unix::ffi::OsStrExt;
759 use std::path::PathBuf;
760
761 let path = PathBuf::from(OsStr::from_bytes(b"foo.\xff"));
762 let (lang, _name) = guess_language(b"int a = 42;", &path);
763 assert_eq!(lang, None);
764 }
765
766 #[test]
767 fn test_guess_file_no_file_name() {
768 let all_files: HashMap<String, Vec<PathBuf>> = HashMap::new();
769 let current = Path::new("/some/file.c");
770 let result = guess_file(current, "..", &all_files);
771 assert!(result.is_empty());
772 }
773
774 #[test]
779 fn guess_file_parent_dir_include_resolves_to_sibling() {
780 let mut all_files: HashMap<String, Vec<PathBuf>> = HashMap::new();
781 all_files.insert(
782 "foo.h".to_string(),
783 vec![
784 PathBuf::from("/proj/src/foo.h"),
785 PathBuf::from("/proj/src/lib/foo.h"),
786 ],
787 );
788 let current = Path::new("/proj/src/lib/file.c");
789 let result = guess_file(current, "../foo.h", &all_files);
790 assert_eq!(result, vec![PathBuf::from("/proj/src/foo.h")]);
791 }
792
793 #[test]
797 fn guess_file_parent_subdir_include_resolves_to_correct_inc() {
798 let mut all_files: HashMap<String, Vec<PathBuf>> = HashMap::new();
799 all_files.insert(
800 "foo.h".to_string(),
801 vec![
802 PathBuf::from("/proj/src/inc/foo.h"),
803 PathBuf::from("/proj/src/lib/inc/foo.h"),
804 PathBuf::from("/proj/other/inc/foo.h"),
805 ],
806 );
807 let current = Path::new("/proj/src/lib/file.c");
808 let result = guess_file(current, "../inc/foo.h", &all_files);
809 assert_eq!(result, vec![PathBuf::from("/proj/src/inc/foo.h")]);
810 }
811
812 #[test]
816 fn guess_file_plain_include_keeps_same_directory_preference() {
817 let mut all_files: HashMap<String, Vec<PathBuf>> = HashMap::new();
818 all_files.insert(
819 "foo.h".to_string(),
820 vec![
821 PathBuf::from("/proj/src/foo.h"),
822 PathBuf::from("/proj/src/lib/foo.h"),
823 ],
824 );
825 let current = Path::new("/proj/src/lib/file.c");
826 let result = guess_file(current, "foo.h", &all_files);
827 assert_eq!(result, vec![PathBuf::from("/proj/src/lib/foo.h")]);
828 }
829
830 #[test]
834 fn guess_file_curdir_include_resolves_to_same_directory() {
835 let mut all_files: HashMap<String, Vec<PathBuf>> = HashMap::new();
836 all_files.insert(
837 "foo.h".to_string(),
838 vec![
839 PathBuf::from("/proj/src/foo.h"),
840 PathBuf::from("/proj/src/lib/foo.h"),
841 ],
842 );
843 let current = Path::new("/proj/src/lib/file.c");
844 let result = guess_file(current, "./foo.h", &all_files);
845 assert_eq!(result, vec![PathBuf::from("/proj/src/lib/foo.h")]);
846 }
847
848 #[test]
851 fn guess_file_double_parent_include_resolves_two_levels_up() {
852 let mut all_files: HashMap<String, Vec<PathBuf>> = HashMap::new();
853 all_files.insert(
854 "foo.h".to_string(),
855 vec![
856 PathBuf::from("/proj/src/foo.h"),
857 PathBuf::from("/proj/src/a/foo.h"),
858 PathBuf::from("/proj/src/a/b/foo.h"),
859 ],
860 );
861 let current = Path::new("/proj/src/a/b/file.c");
862 let result = guess_file(current, "../../foo.h", &all_files);
863 assert_eq!(result, vec![PathBuf::from("/proj/src/foo.h")]);
864 }
865
866 #[test]
872 fn guess_file_unique_basename_returns_only_candidate() {
873 let mut all_files: HashMap<String, Vec<PathBuf>> = HashMap::new();
874 all_files.insert(
875 "foo.h".to_string(),
876 vec![PathBuf::from("/proj/src/lib/foo.h")],
877 );
878 let current = Path::new("/proj/src/lib/file.c");
879 let result = guess_file(current, "../../foo.h", &all_files);
882 assert_eq!(result, vec![PathBuf::from("/proj/src/lib/foo.h")]);
883 }
884
885 #[test]
889 fn guess_file_mozilla_prefix_is_stripped_before_resolution() {
890 let mut all_files: HashMap<String, Vec<PathBuf>> = HashMap::new();
891 all_files.insert(
892 "foo.h".to_string(),
893 vec![
894 PathBuf::from("/proj/src/foo.h"),
895 PathBuf::from("/proj/src/lib/foo.h"),
896 ],
897 );
898 let current = Path::new("/proj/src/lib/file.c");
899 let result = guess_file(current, "mozilla/foo.h", &all_files);
900 assert_eq!(result, vec![PathBuf::from("/proj/src/lib/foo.h")]);
901 }
902
903 #[test]
904 fn test_guess_language() {
905 let buf = b"// -*- foo: bar; mode: c++; hello: world\n";
906 assert_eq!(guess_language(buf, "foo.cpp"), (Some(LANG::Cpp), "c/c++"));
907
908 let buf = b"// -*- c++ -*-\n";
909 assert_eq!(guess_language(buf, "foo.cpp"), (Some(LANG::Cpp), "c/c++"));
910
911 let buf = b"// -*- foo: bar; bar-mode: c++; hello: world\n";
912 assert_eq!(
913 guess_language(buf, "foo.py"),
914 (Some(LANG::Python), "python")
915 );
916
917 let buf = b"/* hello world */\n";
918 assert_eq!(guess_language(buf, "foo.cpp"), (Some(LANG::Cpp), "c/c++"));
919
920 let buf = b"\n\n\n\n\n\n\n\n\n// vim: set ts=4 ft=c++\n\n\n";
921 assert_eq!(guess_language(buf, "foo.c"), (Some(LANG::Cpp), "c/c++"));
922
923 let buf = b"\n\n\n\n\n\n\n\n\n\n\n\n";
924 assert_eq!(guess_language(buf, "foo.txt"), (None, ""));
925
926 let buf = b"// -*- foo: bar; mode: Objective-C++; hello: world\n";
927 assert_eq!(
928 guess_language(buf, "foo.mm"),
929 (Some(LANG::Cpp), "obj-c/c++")
930 );
931 }
932
933 #[test]
934 fn shebang_bare_bash() {
935 assert_eq!(get_shebang_lang(b"#!/bin/bash\n"), Some(LANG::Bash));
936 }
937
938 #[test]
939 fn shebang_env_python3() {
940 assert_eq!(
941 get_shebang_lang(b"#!/usr/bin/env python3\n"),
942 Some(LANG::Python),
943 );
944 }
945
946 #[test]
947 fn shebang_versioned_perl_with_flag() {
948 assert_eq!(
949 get_shebang_lang(b"#!/usr/bin/perl5.36 -w\n"),
950 Some(LANG::Perl),
951 );
952 }
953
954 #[test]
955 fn shebang_env_dash_s_node() {
956 assert_eq!(
957 get_shebang_lang(b"#!/usr/bin/env -S node --experimental\n"),
958 Some(LANG::Javascript),
959 );
960 }
961
962 #[test]
963 fn shebang_env_with_var_assignment() {
964 assert_eq!(
966 get_shebang_lang(b"#!/usr/bin/env FOO=bar python3\n"),
967 Some(LANG::Python),
968 );
969 }
970
971 #[test]
972 fn shebang_env_dash_u_consumes_next_token() {
973 assert_eq!(
977 get_shebang_lang(b"#!/usr/bin/env -u VAR python3\n"),
978 Some(LANG::Python),
979 );
980 }
981
982 #[test]
983 fn shebang_versioned_lua() {
984 assert_eq!(get_shebang_lang(b"#!/usr/bin/lua5.1\n"), Some(LANG::Lua));
985 }
986
987 #[test]
988 fn shebang_node() {
989 assert_eq!(
990 get_shebang_lang(b"#!/usr/local/bin/node\n"),
991 Some(LANG::Javascript),
992 );
993 }
994
995 #[test]
996 fn shebang_tclsh() {
997 assert_eq!(get_shebang_lang(b"#!/usr/bin/tclsh\n"), Some(LANG::Tcl));
998 }
999
1000 #[test]
1001 fn shebang_no_trailing_newline() {
1002 assert_eq!(get_shebang_lang(b"#!/bin/sh"), Some(LANG::Bash));
1003 }
1004
1005 #[test]
1006 fn shebang_crlf_line_ending() {
1007 assert_eq!(get_shebang_lang(b"#!/bin/bash\r\n"), Some(LANG::Bash));
1009 }
1010
1011 #[test]
1012 fn shebang_empty_buffer() {
1013 assert_eq!(get_shebang_lang(b""), None);
1014 }
1015
1016 #[test]
1017 fn shebang_single_byte() {
1018 assert_eq!(get_shebang_lang(b"#"), None);
1019 }
1020
1021 #[test]
1022 fn shebang_no_shebang_prefix() {
1023 assert_eq!(get_shebang_lang(b"// not a shebang\n"), None);
1024 }
1025
1026 #[test]
1027 fn shebang_unknown_interpreter() {
1028 assert_eq!(get_shebang_lang(b"#!/usr/bin/ocaml\n"), None);
1033 }
1034
1035 #[test]
1036 fn shebang_env_only_no_interpreter() {
1037 assert_eq!(get_shebang_lang(b"#!/usr/bin/env\n"), None);
1038 }
1039
1040 #[test]
1041 fn shebang_non_utf8_returns_none() {
1042 assert_eq!(get_shebang_lang(b"#!/usr/bin/\xff\xfe\n"), None);
1044 }
1045
1046 #[test]
1047 fn guess_language_extension_wins_over_shebang() {
1048 let buf = b"#!/bin/sh\nprint('hi')\n";
1050 assert_eq!(
1051 guess_language(buf, "foo.py"),
1052 (Some(LANG::Python), "python")
1053 );
1054 }
1055
1056 #[test]
1057 fn guess_language_shebang_falls_through_when_no_extension() {
1058 let buf = b"#!/usr/bin/env python3\nprint('hi')\n";
1059 assert_eq!(guess_language(buf, "run"), (Some(LANG::Python), "python"));
1060 }
1061
1062 #[test]
1063 fn guess_language_shebang_detects_ruby_without_extension() {
1064 let buf = b"#!/usr/bin/env ruby\nputs 'hi'\n";
1067 assert_eq!(guess_language(buf, "run"), (Some(LANG::Ruby), "ruby"));
1068 }
1069
1070 #[test]
1071 fn guess_language_shebang_detects_elixir_without_extension() {
1072 let buf = b"#!/usr/bin/env elixir\nIO.puts(\"hi\")\n";
1075 assert_eq!(guess_language(buf, "run"), (Some(LANG::Elixir), "elixir"));
1076 }
1077
1078 #[test]
1079 fn guess_language_shebang_detects_iex_without_extension() {
1080 let buf = b"#!/usr/bin/env iex\nIO.puts(\"hi\")\n";
1083 assert_eq!(guess_language(buf, "run"), (Some(LANG::Elixir), "elixir"));
1084 }
1085
1086 #[test]
1087 fn guess_language_shebang_loses_to_mode_line() {
1088 let buf = b"#!/usr/bin/env node\n# -*- mode: python -*-\n";
1090 assert_eq!(guess_language(buf, "run"), (Some(LANG::Python), "python"));
1091 }
1092
1093 #[test]
1094 fn normalize_line_endings_normalizes_crlf() {
1095 let mut d = b"code\r\n# comment\r\n".to_vec();
1096 normalize_line_endings(&mut d);
1097 assert_eq!(d, b"code\n# comment\n");
1098 }
1099
1100 #[test]
1101 fn normalize_line_endings_normalizes_lone_cr() {
1102 let mut d = b"code\r# comment\r".to_vec();
1103 normalize_line_endings(&mut d);
1104 assert_eq!(d, b"code\n# comment\n");
1105 }
1106
1107 #[test]
1108 fn normalize_line_endings_normalizes_cr_before_crlf() {
1109 let mut d = b"a\r\r\nb".to_vec();
1111 normalize_line_endings(&mut d);
1112 assert_eq!(d, b"a\n\nb\n");
1113 }
1114
1115 #[test]
1116 fn normalize_line_endings_normalizes_crlf_blank_line() {
1117 let mut d = b"a\r\n\r\nb\r\n".to_vec();
1118 normalize_line_endings(&mut d);
1119 assert_eq!(d, b"a\n\nb\n");
1120 }
1121
1122 #[test]
1123 fn normalize_line_endings_empty_buffer() {
1124 let mut d = b"".to_vec();
1125 normalize_line_endings(&mut d);
1126 assert_eq!(d, b"\n");
1127 }
1128
1129 #[test]
1130 fn is_generated_at_generated_top() {
1131 assert!(is_generated(b"// @generated\nfn x() {}\n"));
1132 }
1133
1134 #[test]
1135 fn is_generated_go_do_not_edit() {
1136 assert!(is_generated(
1137 b"// Code generated by protoc. DO NOT EDIT.\npackage x\n",
1138 ));
1139 }
1140
1141 #[test]
1142 fn is_generated_lizard_marker() {
1143 assert!(is_generated(b"# GENERATED CODE\nprint('x')\n"));
1144 }
1145
1146 #[test]
1147 fn is_generated_python_do_not_edit() {
1148 assert!(is_generated(b"# DO NOT EDIT\nprint('x')\n"));
1149 }
1150
1151 #[test]
1152 fn is_generated_case_insensitive_marker() {
1153 assert!(is_generated(b"// @GENERATED\nfn x() {}\n"));
1154 }
1155
1156 #[test]
1157 fn is_generated_marker_only_in_body_is_false() {
1158 let mut buf = Vec::with_capacity(8 * 1024);
1160 for i in 0..200 {
1161 buf.extend_from_slice(format!("// line {i}\n").as_bytes());
1162 }
1163 buf.extend_from_slice(b"// @generated -- but this is line 200+\n");
1164 assert!(!is_generated(&buf));
1165 }
1166
1167 #[test]
1168 fn is_generated_empty_file_is_false() {
1169 assert!(!is_generated(b""));
1170 }
1171
1172 #[test]
1173 fn is_generated_non_utf8_does_not_panic() {
1174 let buf: Vec<u8> = (0x80u8..=0xFFu8).cycle().take(2048).collect();
1180 assert!(!is_generated(&buf));
1181 }
1182
1183 #[test]
1184 fn is_generated_short_file_with_marker() {
1185 assert!(is_generated(b"# @generated"));
1187 }
1188
1189 #[test]
1190 fn is_generated_utf8_bom_then_marker() {
1191 let mut buf = Vec::new();
1192 buf.extend_from_slice(b"\xEF\xBB\xBF");
1193 buf.extend_from_slice(b"// @generated\nfn x() {}\n");
1194 assert!(is_generated(&buf));
1195 }
1196
1197 #[test]
1198 fn is_generated_no_marker_returns_false() {
1199 assert!(!is_generated(
1200 b"// Hand-written file.\nfn main() { println!(\"hi\"); }\n"
1201 ));
1202 }
1203
1204 #[test]
1205 fn normalize_line_endings_mixed_endings() {
1206 let mut d = b"a\nb\rc\r\nd".to_vec();
1208 normalize_line_endings(&mut d);
1209 assert_eq!(d, b"a\nb\nc\nd\n");
1210 }
1211}