#![allow(clippy::wildcard_imports, clippy::enum_glob_use)]
#![allow(
clippy::cast_precision_loss,
clippy::cast_possible_truncation,
clippy::cast_sign_loss
)]
use std::cmp::Ordering;
use std::collections::HashMap;
use std::fs::{self, File};
use std::io::{Read, Write};
use std::path::{Component, Path, PathBuf};
use std::sync::OnceLock;
use regex::bytes::Regex;
use termcolor::{Color, ColorSpec, StandardStreamLock, WriteColor};
use crate::langs::fake;
use crate::langs::*;
pub fn read_file(path: &Path) -> std::io::Result<Vec<u8>> {
let mut file = File::open(path)?;
let mut data = Vec::new();
file.read_to_end(&mut data)?;
normalize_line_endings(&mut data);
Ok(data)
}
pub fn read_file_with_eol(path: &Path) -> std::io::Result<Option<Vec<u8>>> {
let file_size = fs::metadata(path).map_or(1024 * 1024, |m| m.len() as usize);
if file_size <= 3 {
return Ok(None);
}
let mut file = File::open(path)?;
let mut start = vec![0; 64.min(file_size)];
let start = if file.read_exact(&mut start).is_ok() {
if start[..2] == [b'\xFE', b'\xFF'] || start[..2] == [b'\xFF', b'\xFE'] {
&start[2..]
} else if start[..3] == [b'\xEF', b'\xBB', b'\xBF'] {
&start[3..]
} else {
&start
}
} else {
return Ok(None);
};
let mut head = String::from_utf8_lossy(start).into_owned();
head.pop();
if head.contains('\u{FFFD}') {
return Ok(None);
}
let mut data = Vec::with_capacity(file_size + 2);
data.extend_from_slice(start);
file.read_to_end(&mut data)?;
normalize_line_endings(&mut data);
Ok(Some(data))
}
pub fn write_file(path: &Path, data: &[u8]) -> std::io::Result<()> {
let mut file = File::create(path)?;
file.write_all(data)?;
Ok(())
}
#[must_use]
pub fn get_language_for_file(path: &Path) -> Option<LANG> {
if let Some(ext) = path.extension() {
let ext = ext.to_str()?.to_lowercase();
get_from_ext(&ext)
} else {
None
}
}
fn mode_to_str(mode: &[u8]) -> Option<String> {
std::str::from_utf8(mode).ok().map(str::to_lowercase)
}
static RE1_EMACS: OnceLock<Regex> = OnceLock::new();
static RE2_EMACS: OnceLock<Regex> = OnceLock::new();
static RE1_VIM: OnceLock<Regex> = OnceLock::new();
static RE_GENERATED: OnceLock<Regex> = OnceLock::new();
const FIRST_EMACS_EXPRESSION: &str = r"(?i)-\*-.*[^-\w]mode\s*:\s*([^:;\s]+)";
const SECOND_EMACS_EXPRESSION: &str = r"-\*-\s*([^:;\s]+)\s*-\*-";
const VIM_EXPRESSION: &str = r"(?i)vim\s*:.*[^\w]ft\s*=\s*([^:\s]+)";
const GENERATED_EXPRESSION: &str = r"(?i)@generated\b|DO NOT EDIT|GENERATED CODE";
const GENERATED_SCAN_BYTES: usize = 5 * 1024;
const GENERATED_SCAN_LINES: usize = 50;
pub fn is_generated(buf: &[u8]) -> bool {
let buf = buf.strip_prefix(b"\xEF\xBB\xBF").unwrap_or(buf);
let cap = buf.len().min(GENERATED_SCAN_BYTES);
let end = buf[..cap]
.iter()
.enumerate()
.filter_map(|(i, &b)| (b == b'\n').then_some(i + 1))
.nth(GENERATED_SCAN_LINES - 1)
.unwrap_or(cap);
let window = &buf[..end];
RE_GENERATED
.get_or_init(|| {
Regex::new(GENERATED_EXPRESSION).expect("GENERATED_EXPRESSION is a constant regex")
})
.is_match(window)
}
#[inline]
fn get_regex<'a>(
once_lock: &OnceLock<Regex>,
line: &'a [u8],
regex: &'a str,
) -> Option<regex::bytes::Captures<'a>> {
once_lock
.get_or_init(|| Regex::new(regex).unwrap())
.captures_iter(line)
.next()
}
fn get_shebang_lang(buf: &[u8]) -> Option<LANG> {
let rest = buf.strip_prefix(b"#!")?;
let line_end = rest.iter().position(|&b| b == b'\n').unwrap_or(rest.len());
let line = &rest[..line_end];
let line = line.strip_suffix(b"\r").unwrap_or(line);
let line = std::str::from_utf8(line).ok()?;
let mut tokens = line.split_ascii_whitespace();
let first_base = basename(tokens.next()?);
let interpreter = if first_base == "env" {
skip_env_args(&mut tokens)?
} else {
first_base
};
get_from_interpreter(strip_version_suffix(interpreter))
}
fn skip_env_args<'a>(tokens: &mut std::str::SplitAsciiWhitespace<'a>) -> Option<&'a str> {
loop {
let tok = tokens.next()?;
if let Some(flag) = tok.strip_prefix('-') {
if flag == "u" {
tokens.next()?;
}
continue;
}
if tok.contains('=') {
continue;
}
return Some(basename(tok));
}
}
fn basename(path: &str) -> &str {
path.rsplit_once('/').map_or(path, |(_, name)| name)
}
fn strip_version_suffix(name: &str) -> &str {
let trimmed = name.trim_end_matches(|c: char| c.is_ascii_digit() || c == '.');
if trimmed.is_empty() { name } else { trimmed }
}
fn get_from_interpreter(name: &str) -> Option<LANG> {
match name {
"sh" | "bash" | "dash" | "ksh" | "zsh" => Some(LANG::Bash),
"python" => Some(LANG::Python),
"perl" => Some(LANG::Perl),
"lua" | "luajit" => Some(LANG::Lua),
"php" | "php-cgi" => Some(LANG::Php),
"node" | "nodejs" => Some(LANG::Javascript),
"tclsh" | "wish" => Some(LANG::Tcl),
"ruby" => Some(LANG::Ruby),
"elixir" | "iex" => Some(LANG::Elixir),
_ => None,
}
}
fn get_emacs_mode(buf: &[u8]) -> Option<String> {
for (i, line) in buf.splitn(5, |c| *c == b'\n').enumerate() {
if let Some(cap) = get_regex(&RE1_EMACS, line, FIRST_EMACS_EXPRESSION) {
return mode_to_str(&cap[1]);
} else if let Some(cap) = get_regex(&RE2_EMACS, line, SECOND_EMACS_EXPRESSION) {
return mode_to_str(&cap[1]);
} else if let Some(cap) = get_regex(&RE1_VIM, line, VIM_EXPRESSION) {
return mode_to_str(&cap[1]);
}
if i == 3 {
break;
}
}
for (i, line) in buf.rsplitn(5, |c| *c == b'\n').enumerate() {
if let Some(cap) = get_regex(&RE1_VIM, line, VIM_EXPRESSION) {
return mode_to_str(&cap[1]);
}
if i == 3 {
break;
}
}
None
}
pub fn guess_language<'a, P: AsRef<Path>>(buf: &[u8], path: P) -> (Option<LANG>, &'a str) {
let ext = path
.as_ref()
.extension()
.and_then(|e| e.to_str())
.map(str::to_lowercase)
.unwrap_or_default();
let from_ext = get_from_ext(&ext);
let mode = get_emacs_mode(buf).unwrap_or_default();
let from_mode = get_from_emacs_mode(&mode);
if let Some(lang_ext) = from_ext {
if let Some(lang_mode) = from_mode {
if lang_ext == lang_mode {
(
Some(lang_mode),
fake::get_true(&ext, &mode).unwrap_or_else(|| lang_mode.get_name()),
)
} else {
(Some(lang_ext), lang_ext.get_name())
}
} else {
(
Some(lang_ext),
fake::get_true(&ext, &mode).unwrap_or_else(|| lang_ext.get_name()),
)
}
} else if let Some(lang_mode) = from_mode {
(
Some(lang_mode),
fake::get_true(&ext, &mode).unwrap_or_else(|| lang_mode.get_name()),
)
} else if let Some(lang_shebang) = get_shebang_lang(buf) {
(
Some(lang_shebang),
fake::get_true(&ext, &mode).unwrap_or_else(|| lang_shebang.get_name()),
)
} else {
(None, fake::get_true(&ext, &mode).unwrap_or_default())
}
}
pub(crate) fn normalize_line_endings(data: &mut Vec<u8>) {
let mut w = 0;
let mut r = 0;
while r < data.len() {
if data[r] == b'\r' {
data[w] = b'\n';
w += 1;
r += if data.get(r + 1).copied() == Some(b'\n') {
2
} else {
1
};
} else {
data[w] = data[r];
w += 1;
r += 1;
}
}
data.truncate(w);
let trailing = data.iter().rev().take_while(|&&c| c == b'\n').count();
data.truncate(data.len() - trailing);
data.push(b'\n');
}
pub(crate) fn normalize_path<P: AsRef<Path>>(path: P) -> PathBuf {
let mut components = path.as_ref().components().peekable();
let mut ret = if let Some(c @ Component::Prefix(..)) = components.peek().copied() {
components.next();
PathBuf::from(c.as_os_str())
} else {
PathBuf::new()
};
for component in components {
match component {
Component::Prefix(..) => unreachable!(),
Component::RootDir => {
ret.push(component.as_os_str());
}
Component::CurDir => {}
Component::ParentDir => {
ret.pop();
}
Component::Normal(c) => {
ret.push(c);
}
}
}
ret
}
pub(crate) fn get_paths_dist(path1: &Path, path2: &Path) -> Option<usize> {
for ancestor in path1.ancestors() {
if path2.starts_with(ancestor) && !ancestor.as_os_str().is_empty() {
let path1 = path1.strip_prefix(ancestor).unwrap();
let path2 = path2.strip_prefix(ancestor).unwrap();
return Some(path1.components().count() + path2.components().count());
}
}
None
}
pub(crate) fn guess_file<S: ::std::hash::BuildHasher>(
current_path: &Path,
include_path: &str,
all_files: &HashMap<String, Vec<PathBuf>, S>,
) -> Vec<PathBuf> {
let include_path = include_path
.strip_prefix("mozilla/")
.unwrap_or(include_path);
let resolved_path = current_path
.parent()
.map(|parent| normalize_path(parent.join(include_path)));
let include_path = normalize_path(include_path);
let Some(file_name) = include_path.file_name() else {
return vec![];
};
let Some(file_name) = file_name.to_str() else {
return vec![];
};
if let Some(possibilities) = all_files.get(file_name) {
if possibilities.len() == 1 {
return possibilities.clone();
}
if let Some(resolved) = resolved_path.as_ref() {
fn unique_match<F: Fn(&PathBuf) -> bool>(
possibilities: &[PathBuf],
current_path: &Path,
pred: F,
) -> Option<Vec<PathBuf>> {
let matched: Vec<PathBuf> = possibilities
.iter()
.filter(|p| current_path != p.as_path() && pred(p))
.cloned()
.collect();
(matched.len() == 1).then_some(matched)
}
if let Some(hit) = unique_match(possibilities, current_path, |p| p == resolved) {
return hit;
}
if let Some(hit) = unique_match(possibilities, current_path, |p| p.ends_with(resolved))
{
return hit;
}
}
let mut new_possibilities = Vec::new();
for p in possibilities {
if p.ends_with(&include_path) && current_path != p {
new_possibilities.push(p.clone());
}
}
if new_possibilities.len() == 1 {
return new_possibilities;
}
new_possibilities.clear();
if let Some(parent) = current_path.parent() {
for p in possibilities {
if p.starts_with(parent) && current_path != p {
new_possibilities.push(p.clone());
}
}
if new_possibilities.len() == 1 {
return new_possibilities;
}
new_possibilities.clear();
}
let mut dist_min = usize::MAX;
let mut path_min = Vec::new();
for p in possibilities {
if current_path == p {
continue;
}
if let Some(dist) = get_paths_dist(current_path, p) {
match dist.cmp(&dist_min) {
Ordering::Less => {
dist_min = dist;
path_min.clear();
path_min.push(p);
}
Ordering::Equal => {
path_min.push(p);
}
Ordering::Greater => {}
}
}
}
let path_min: Vec<_> = path_min.drain(..).cloned().collect();
return path_min;
}
vec![]
}
#[inline]
pub(crate) fn color(stdout: &mut StandardStreamLock, color: Color) -> std::io::Result<()> {
stdout.set_color(ColorSpec::new().set_fg(Some(color)))
}
#[inline]
pub(crate) fn intense_color(stdout: &mut StandardStreamLock, color: Color) -> std::io::Result<()> {
stdout.set_color(ColorSpec::new().set_fg(Some(color)).set_intense(true))
}
#[cfg(test)]
pub(crate) fn check_func_space<T: crate::ParserTrait, F: Fn(crate::FuncSpace)>(
source: &str,
filename: &str,
check: F,
) {
let path = std::path::PathBuf::from(filename);
let normalized = source.replace("\r\n", "\n").replace('\r', "\n");
let mut trimmed_bytes = normalized.trim_end().trim_matches('\n').as_bytes().to_vec();
trimmed_bytes.push(b'\n');
let parser = T::new(trimmed_bytes, &path, None);
#[allow(deprecated)]
let func_space = crate::metrics(&parser, &path).unwrap();
check(func_space);
}
#[cfg(test)]
pub(crate) fn check_metrics<T: crate::ParserTrait>(
source: &str,
filename: &str,
check: fn(crate::CodeMetrics) -> (),
) {
check_func_space::<T, _>(source, filename, |func_space| check(func_space.metrics));
}
#[cfg(test)]
pub(crate) fn assert_child_space_kind(
func_space: &crate::FuncSpace,
name: &str,
expected: crate::SpaceKind,
) {
let child = func_space
.spaces
.iter()
.find(|s| s.name.as_deref() == Some(name))
.unwrap_or_else(|| panic!("expected a child FuncSpace named {name:?}"));
assert_eq!(
child.kind, expected,
"child FuncSpace {name:?} kind: got {:?}, expected {:?}",
child.kind, expected,
);
}
#[cfg(test)]
#[allow(
clippy::float_cmp,
clippy::cast_precision_loss,
clippy::cast_possible_truncation,
clippy::cast_sign_loss,
clippy::similar_names,
clippy::doc_markdown,
clippy::needless_raw_string_hashes,
clippy::too_many_lines
)]
mod tests {
use pretty_assertions::assert_eq;
use super::*;
#[test]
fn test_read() {
let tmp_dir = std::env::temp_dir();
let tmp_path = tmp_dir.join("test_read");
let data = vec![
(b"\xFF\xFEabc".to_vec(), Some(b"abc\n".to_vec())),
(b"\xFE\xFFabc".to_vec(), Some(b"abc\n".to_vec())),
(b"\xEF\xBB\xBFabc".to_vec(), Some(b"abc\n".to_vec())),
(b"\xEF\xBB\xBFabc\n".to_vec(), Some(b"abc\n".to_vec())),
(b"\xEF\xBBabc\n".to_vec(), None),
(b"abcdef\n".to_vec(), Some(b"abcdef\n".to_vec())),
(b"abcdef".to_vec(), Some(b"abcdef\n".to_vec())),
(b"abc\r\ndef\r\n".to_vec(), Some(b"abc\ndef\n".to_vec())),
(
b"\xEF\xBB\xBFabc\r\ndef\r\n".to_vec(),
Some(b"abc\ndef\n".to_vec()),
),
];
for (d, expected) in data {
write_file(&tmp_path, &d).unwrap();
let res = read_file_with_eol(&tmp_path).unwrap();
assert_eq!(res, expected);
}
}
#[cfg(unix)]
#[test]
fn test_get_language_for_file_non_utf8() {
use std::ffi::OsStr;
use std::os::unix::ffi::OsStrExt;
let path = Path::new(OsStr::from_bytes(b"foo.\xff"));
assert_eq!(get_language_for_file(path), None);
}
#[cfg(unix)]
#[test]
fn test_guess_language_non_utf8() {
use std::ffi::OsStr;
use std::os::unix::ffi::OsStrExt;
use std::path::PathBuf;
let path = PathBuf::from(OsStr::from_bytes(b"foo.\xff"));
let (lang, _name) = guess_language(b"int a = 42;", &path);
assert_eq!(lang, None);
}
#[test]
fn test_guess_file_no_file_name() {
let all_files: HashMap<String, Vec<PathBuf>> = HashMap::new();
let current = Path::new("/some/file.c");
let result = guess_file(current, "..", &all_files);
assert!(result.is_empty());
}
#[test]
fn guess_file_parent_dir_include_resolves_to_sibling() {
let mut all_files: HashMap<String, Vec<PathBuf>> = HashMap::new();
all_files.insert(
"foo.h".to_string(),
vec![
PathBuf::from("/proj/src/foo.h"),
PathBuf::from("/proj/src/lib/foo.h"),
],
);
let current = Path::new("/proj/src/lib/file.c");
let result = guess_file(current, "../foo.h", &all_files);
assert_eq!(result, vec![PathBuf::from("/proj/src/foo.h")]);
}
#[test]
fn guess_file_parent_subdir_include_resolves_to_correct_inc() {
let mut all_files: HashMap<String, Vec<PathBuf>> = HashMap::new();
all_files.insert(
"foo.h".to_string(),
vec![
PathBuf::from("/proj/src/inc/foo.h"),
PathBuf::from("/proj/src/lib/inc/foo.h"),
PathBuf::from("/proj/other/inc/foo.h"),
],
);
let current = Path::new("/proj/src/lib/file.c");
let result = guess_file(current, "../inc/foo.h", &all_files);
assert_eq!(result, vec![PathBuf::from("/proj/src/inc/foo.h")]);
}
#[test]
fn guess_file_plain_include_keeps_same_directory_preference() {
let mut all_files: HashMap<String, Vec<PathBuf>> = HashMap::new();
all_files.insert(
"foo.h".to_string(),
vec![
PathBuf::from("/proj/src/foo.h"),
PathBuf::from("/proj/src/lib/foo.h"),
],
);
let current = Path::new("/proj/src/lib/file.c");
let result = guess_file(current, "foo.h", &all_files);
assert_eq!(result, vec![PathBuf::from("/proj/src/lib/foo.h")]);
}
#[test]
fn guess_file_curdir_include_resolves_to_same_directory() {
let mut all_files: HashMap<String, Vec<PathBuf>> = HashMap::new();
all_files.insert(
"foo.h".to_string(),
vec![
PathBuf::from("/proj/src/foo.h"),
PathBuf::from("/proj/src/lib/foo.h"),
],
);
let current = Path::new("/proj/src/lib/file.c");
let result = guess_file(current, "./foo.h", &all_files);
assert_eq!(result, vec![PathBuf::from("/proj/src/lib/foo.h")]);
}
#[test]
fn guess_file_double_parent_include_resolves_two_levels_up() {
let mut all_files: HashMap<String, Vec<PathBuf>> = HashMap::new();
all_files.insert(
"foo.h".to_string(),
vec![
PathBuf::from("/proj/src/foo.h"),
PathBuf::from("/proj/src/a/foo.h"),
PathBuf::from("/proj/src/a/b/foo.h"),
],
);
let current = Path::new("/proj/src/a/b/file.c");
let result = guess_file(current, "../../foo.h", &all_files);
assert_eq!(result, vec![PathBuf::from("/proj/src/foo.h")]);
}
#[test]
fn guess_file_unique_basename_returns_only_candidate() {
let mut all_files: HashMap<String, Vec<PathBuf>> = HashMap::new();
all_files.insert(
"foo.h".to_string(),
vec![PathBuf::from("/proj/src/lib/foo.h")],
);
let current = Path::new("/proj/src/lib/file.c");
let result = guess_file(current, "../../foo.h", &all_files);
assert_eq!(result, vec![PathBuf::from("/proj/src/lib/foo.h")]);
}
#[test]
fn guess_file_mozilla_prefix_is_stripped_before_resolution() {
let mut all_files: HashMap<String, Vec<PathBuf>> = HashMap::new();
all_files.insert(
"foo.h".to_string(),
vec![
PathBuf::from("/proj/src/foo.h"),
PathBuf::from("/proj/src/lib/foo.h"),
],
);
let current = Path::new("/proj/src/lib/file.c");
let result = guess_file(current, "mozilla/foo.h", &all_files);
assert_eq!(result, vec![PathBuf::from("/proj/src/lib/foo.h")]);
}
#[test]
fn test_guess_language() {
let buf = b"// -*- foo: bar; mode: c++; hello: world\n";
assert_eq!(guess_language(buf, "foo.cpp"), (Some(LANG::Cpp), "c/c++"));
let buf = b"// -*- c++ -*-\n";
assert_eq!(guess_language(buf, "foo.cpp"), (Some(LANG::Cpp), "c/c++"));
let buf = b"// -*- foo: bar; bar-mode: c++; hello: world\n";
assert_eq!(
guess_language(buf, "foo.py"),
(Some(LANG::Python), "python")
);
let buf = b"/* hello world */\n";
assert_eq!(guess_language(buf, "foo.cpp"), (Some(LANG::Cpp), "c/c++"));
let buf = b"\n\n\n\n\n\n\n\n\n// vim: set ts=4 ft=c++\n\n\n";
assert_eq!(guess_language(buf, "foo.c"), (Some(LANG::Cpp), "c/c++"));
let buf = b"\n\n\n\n\n\n\n\n\n\n\n\n";
assert_eq!(guess_language(buf, "foo.txt"), (None, ""));
let buf = b"// -*- foo: bar; mode: Objective-C++; hello: world\n";
assert_eq!(
guess_language(buf, "foo.mm"),
(Some(LANG::Cpp), "obj-c/c++")
);
}
#[test]
fn shebang_bare_bash() {
assert_eq!(get_shebang_lang(b"#!/bin/bash\n"), Some(LANG::Bash));
}
#[test]
fn shebang_env_python3() {
assert_eq!(
get_shebang_lang(b"#!/usr/bin/env python3\n"),
Some(LANG::Python),
);
}
#[test]
fn shebang_versioned_perl_with_flag() {
assert_eq!(
get_shebang_lang(b"#!/usr/bin/perl5.36 -w\n"),
Some(LANG::Perl),
);
}
#[test]
fn shebang_env_dash_s_node() {
assert_eq!(
get_shebang_lang(b"#!/usr/bin/env -S node --experimental\n"),
Some(LANG::Javascript),
);
}
#[test]
fn shebang_env_with_var_assignment() {
assert_eq!(
get_shebang_lang(b"#!/usr/bin/env FOO=bar python3\n"),
Some(LANG::Python),
);
}
#[test]
fn shebang_env_dash_u_consumes_next_token() {
assert_eq!(
get_shebang_lang(b"#!/usr/bin/env -u VAR python3\n"),
Some(LANG::Python),
);
}
#[test]
fn shebang_versioned_lua() {
assert_eq!(get_shebang_lang(b"#!/usr/bin/lua5.1\n"), Some(LANG::Lua));
}
#[test]
fn shebang_node() {
assert_eq!(
get_shebang_lang(b"#!/usr/local/bin/node\n"),
Some(LANG::Javascript),
);
}
#[test]
fn shebang_tclsh() {
assert_eq!(get_shebang_lang(b"#!/usr/bin/tclsh\n"), Some(LANG::Tcl));
}
#[test]
fn shebang_no_trailing_newline() {
assert_eq!(get_shebang_lang(b"#!/bin/sh"), Some(LANG::Bash));
}
#[test]
fn shebang_crlf_line_ending() {
assert_eq!(get_shebang_lang(b"#!/bin/bash\r\n"), Some(LANG::Bash));
}
#[test]
fn shebang_empty_buffer() {
assert_eq!(get_shebang_lang(b""), None);
}
#[test]
fn shebang_single_byte() {
assert_eq!(get_shebang_lang(b"#"), None);
}
#[test]
fn shebang_no_shebang_prefix() {
assert_eq!(get_shebang_lang(b"// not a shebang\n"), None);
}
#[test]
fn shebang_unknown_interpreter() {
assert_eq!(get_shebang_lang(b"#!/usr/bin/ocaml\n"), None);
}
#[test]
fn shebang_env_only_no_interpreter() {
assert_eq!(get_shebang_lang(b"#!/usr/bin/env\n"), None);
}
#[test]
fn shebang_non_utf8_returns_none() {
assert_eq!(get_shebang_lang(b"#!/usr/bin/\xff\xfe\n"), None);
}
#[test]
fn guess_language_extension_wins_over_shebang() {
let buf = b"#!/bin/sh\nprint('hi')\n";
assert_eq!(
guess_language(buf, "foo.py"),
(Some(LANG::Python), "python")
);
}
#[test]
fn guess_language_shebang_falls_through_when_no_extension() {
let buf = b"#!/usr/bin/env python3\nprint('hi')\n";
assert_eq!(guess_language(buf, "run"), (Some(LANG::Python), "python"));
}
#[test]
fn guess_language_shebang_detects_ruby_without_extension() {
let buf = b"#!/usr/bin/env ruby\nputs 'hi'\n";
assert_eq!(guess_language(buf, "run"), (Some(LANG::Ruby), "ruby"));
}
#[test]
fn guess_language_shebang_detects_elixir_without_extension() {
let buf = b"#!/usr/bin/env elixir\nIO.puts(\"hi\")\n";
assert_eq!(guess_language(buf, "run"), (Some(LANG::Elixir), "elixir"));
}
#[test]
fn guess_language_shebang_detects_iex_without_extension() {
let buf = b"#!/usr/bin/env iex\nIO.puts(\"hi\")\n";
assert_eq!(guess_language(buf, "run"), (Some(LANG::Elixir), "elixir"));
}
#[test]
fn guess_language_shebang_loses_to_mode_line() {
let buf = b"#!/usr/bin/env node\n# -*- mode: python -*-\n";
assert_eq!(guess_language(buf, "run"), (Some(LANG::Python), "python"));
}
#[test]
fn normalize_line_endings_normalizes_crlf() {
let mut d = b"code\r\n# comment\r\n".to_vec();
normalize_line_endings(&mut d);
assert_eq!(d, b"code\n# comment\n");
}
#[test]
fn normalize_line_endings_normalizes_lone_cr() {
let mut d = b"code\r# comment\r".to_vec();
normalize_line_endings(&mut d);
assert_eq!(d, b"code\n# comment\n");
}
#[test]
fn normalize_line_endings_normalizes_cr_before_crlf() {
let mut d = b"a\r\r\nb".to_vec();
normalize_line_endings(&mut d);
assert_eq!(d, b"a\n\nb\n");
}
#[test]
fn normalize_line_endings_normalizes_crlf_blank_line() {
let mut d = b"a\r\n\r\nb\r\n".to_vec();
normalize_line_endings(&mut d);
assert_eq!(d, b"a\n\nb\n");
}
#[test]
fn normalize_line_endings_empty_buffer() {
let mut d = b"".to_vec();
normalize_line_endings(&mut d);
assert_eq!(d, b"\n");
}
#[test]
fn is_generated_at_generated_top() {
assert!(is_generated(b"// @generated\nfn x() {}\n"));
}
#[test]
fn is_generated_go_do_not_edit() {
assert!(is_generated(
b"// Code generated by protoc. DO NOT EDIT.\npackage x\n",
));
}
#[test]
fn is_generated_lizard_marker() {
assert!(is_generated(b"# GENERATED CODE\nprint('x')\n"));
}
#[test]
fn is_generated_python_do_not_edit() {
assert!(is_generated(b"# DO NOT EDIT\nprint('x')\n"));
}
#[test]
fn is_generated_case_insensitive_marker() {
assert!(is_generated(b"// @GENERATED\nfn x() {}\n"));
}
#[test]
fn is_generated_marker_only_in_body_is_false() {
let mut buf = Vec::with_capacity(8 * 1024);
for i in 0..200 {
buf.extend_from_slice(format!("// line {i}\n").as_bytes());
}
buf.extend_from_slice(b"// @generated -- but this is line 200+\n");
assert!(!is_generated(&buf));
}
#[test]
fn is_generated_empty_file_is_false() {
assert!(!is_generated(b""));
}
#[test]
fn is_generated_non_utf8_does_not_panic() {
let buf: Vec<u8> = (0x80u8..=0xFFu8).cycle().take(2048).collect();
assert!(!is_generated(&buf));
}
#[test]
fn is_generated_short_file_with_marker() {
assert!(is_generated(b"# @generated"));
}
#[test]
fn is_generated_utf8_bom_then_marker() {
let mut buf = Vec::new();
buf.extend_from_slice(b"\xEF\xBB\xBF");
buf.extend_from_slice(b"// @generated\nfn x() {}\n");
assert!(is_generated(&buf));
}
#[test]
fn is_generated_no_marker_returns_false() {
assert!(!is_generated(
b"// Hand-written file.\nfn main() { println!(\"hi\"); }\n"
));
}
#[test]
fn normalize_line_endings_mixed_endings() {
let mut d = b"a\nb\rc\r\nd".to_vec();
normalize_line_endings(&mut d);
assert_eq!(d, b"a\nb\nc\nd\n");
}
}