#![allow(clippy::wildcard_imports, clippy::enum_glob_use)]
#![allow(
clippy::cast_precision_loss,
clippy::cast_possible_truncation,
clippy::cast_sign_loss
)]
use std::cmp::Ordering;
use std::collections::HashMap;
use std::fs::{self, File};
use std::io::{Read, Write};
use std::path::{Component, Path, PathBuf};
use std::sync::OnceLock;
use regex::bytes::Regex;
use termcolor::{Color, ColorSpec, StandardStreamLock, WriteColor};
use crate::langs::fake;
use crate::langs::*;
pub fn read_file(path: &Path) -> std::io::Result<Vec<u8>> {
let mut file = File::open(path)?;
let mut data = Vec::new();
file.read_to_end(&mut data)?;
normalize_line_endings(&mut data);
Ok(data)
}
pub fn read_file_with_eol(path: &Path) -> std::io::Result<Option<Vec<u8>>> {
let file_size = fs::metadata(path).map_or(1024 * 1024, |m| m.len() as usize);
if file_size <= 3 {
return Ok(None);
}
let mut file = File::open(path)?;
let mut start = vec![0; 64.min(file_size)];
let start = if file.read_exact(&mut start).is_ok() {
if start[..2] == [b'\xFE', b'\xFF'] || start[..2] == [b'\xFF', b'\xFE'] {
&start[2..]
} else if start[..3] == [b'\xEF', b'\xBB', b'\xBF'] {
&start[3..]
} else {
&start
}
} else {
return Ok(None);
};
let mut head = String::from_utf8_lossy(start).into_owned();
head.pop();
if head.contains('\u{FFFD}') {
return Ok(None);
}
let mut data = Vec::with_capacity(file_size + 2);
data.extend_from_slice(start);
file.read_to_end(&mut data)?;
normalize_line_endings(&mut data);
Ok(Some(data))
}
pub fn write_file(path: &Path, data: &[u8]) -> std::io::Result<()> {
let mut file = File::create(path)?;
file.write_all(data)?;
Ok(())
}
#[must_use]
pub fn get_language_for_file(path: &Path) -> Option<LANG> {
if let Some(ext) = path.extension() {
let ext = ext.to_str()?.to_lowercase();
get_from_ext(&ext)
} else {
None
}
}
fn mode_to_str(mode: &[u8]) -> Option<String> {
std::str::from_utf8(mode).ok().map(str::to_lowercase)
}
static RE1_EMACS: OnceLock<Regex> = OnceLock::new();
static RE2_EMACS: OnceLock<Regex> = OnceLock::new();
static RE1_VIM: OnceLock<Regex> = OnceLock::new();
static RE_GENERATED: OnceLock<Regex> = OnceLock::new();
const FIRST_EMACS_EXPRESSION: &str = r"(?i)-\*-.*[^-\w]mode\s*:\s*([^:;\s]+)";
const SECOND_EMACS_EXPRESSION: &str = r"-\*-\s*([^:;\s]+)\s*-\*-";
const VIM_EXPRESSION: &str = r"(?i)vim\s*:.*[^\w]ft\s*=\s*([^:\s]+)";
const GENERATED_EXPRESSION: &str = r"(?i)@generated\b|DO NOT EDIT|GENERATED CODE";
const GENERATED_SCAN_BYTES: usize = 5 * 1024;
const GENERATED_SCAN_LINES: usize = 50;
pub fn is_generated(buf: &[u8]) -> bool {
let buf = buf.strip_prefix(b"\xEF\xBB\xBF").unwrap_or(buf);
let cap = buf.len().min(GENERATED_SCAN_BYTES);
let end = buf[..cap]
.iter()
.enumerate()
.filter_map(|(i, &b)| (b == b'\n').then_some(i + 1))
.nth(GENERATED_SCAN_LINES - 1)
.unwrap_or(cap);
let window = &buf[..end];
RE_GENERATED
.get_or_init(|| {
Regex::new(GENERATED_EXPRESSION).expect("GENERATED_EXPRESSION is a constant regex")
})
.is_match(window)
}
#[inline]
fn get_regex<'a>(
once_lock: &OnceLock<Regex>,
line: &'a [u8],
regex: &'a str,
) -> Option<regex::bytes::Captures<'a>> {
once_lock
.get_or_init(|| Regex::new(regex).expect("constant regex pattern must compile"))
.captures_iter(line)
.next()
}
fn get_shebang_lang(buf: &[u8]) -> Option<LANG> {
let rest = buf.strip_prefix(b"#!")?;
let line_end = rest.iter().position(|&b| b == b'\n').unwrap_or(rest.len());
let line = &rest[..line_end];
let line = line.strip_suffix(b"\r").unwrap_or(line);
let line = std::str::from_utf8(line).ok()?;
let mut tokens = line.split_ascii_whitespace();
let first_base = basename(tokens.next()?);
let interpreter = if first_base == "env" {
skip_env_args(&mut tokens)?
} else {
first_base
};
get_from_interpreter(strip_version_suffix(interpreter))
}
fn skip_env_args<'a>(tokens: &mut std::str::SplitAsciiWhitespace<'a>) -> Option<&'a str> {
loop {
let tok = tokens.next()?;
if let Some(flag) = tok.strip_prefix('-') {
if flag == "u" {
tokens.next()?;
}
continue;
}
if tok.contains('=') {
continue;
}
return Some(basename(tok));
}
}
fn basename(path: &str) -> &str {
path.rsplit_once('/').map_or(path, |(_, name)| name)
}
fn strip_version_suffix(name: &str) -> &str {
let trimmed = name.trim_end_matches(|c: char| c.is_ascii_digit() || c == '.');
if trimmed.is_empty() { name } else { trimmed }
}
fn get_from_interpreter(name: &str) -> Option<LANG> {
match name {
"sh" | "bash" | "dash" | "ksh" | "zsh" => Some(LANG::Bash),
"python" => Some(LANG::Python),
"perl" => Some(LANG::Perl),
"lua" | "luajit" => Some(LANG::Lua),
"php" | "php-cgi" => Some(LANG::Php),
"node" | "nodejs" => Some(LANG::Javascript),
"tclsh" | "wish" => Some(LANG::Tcl),
"ruby" => Some(LANG::Ruby),
"elixir" | "iex" => Some(LANG::Elixir),
_ => None,
}
}
fn get_emacs_mode(buf: &[u8]) -> Option<String> {
for (i, line) in buf.splitn(5, |c| *c == b'\n').enumerate() {
if let Some(cap) = get_regex(&RE1_EMACS, line, FIRST_EMACS_EXPRESSION) {
return mode_to_str(&cap[1]);
} else if let Some(cap) = get_regex(&RE2_EMACS, line, SECOND_EMACS_EXPRESSION) {
return mode_to_str(&cap[1]);
} else if let Some(cap) = get_regex(&RE1_VIM, line, VIM_EXPRESSION) {
return mode_to_str(&cap[1]);
}
if i == 3 {
break;
}
}
for (i, line) in buf.rsplitn(5, |c| *c == b'\n').enumerate() {
if let Some(cap) = get_regex(&RE1_VIM, line, VIM_EXPRESSION) {
return mode_to_str(&cap[1]);
}
if i == 3 {
break;
}
}
None
}
pub fn guess_language<'a, P: AsRef<Path>>(buf: &[u8], path: P) -> (Option<LANG>, &'a str) {
let ext = path
.as_ref()
.extension()
.and_then(|e| e.to_str())
.map(str::to_lowercase)
.unwrap_or_default();
let from_ext = get_from_ext(&ext);
let mode = get_emacs_mode(buf).unwrap_or_default();
let from_mode = get_from_emacs_mode(&mode);
if let Some(lang_ext) = from_ext {
if let Some(lang_mode) = from_mode {
if lang_ext == lang_mode {
(
Some(lang_mode),
fake::get_true(&ext, &mode).unwrap_or_else(|| lang_mode.get_name()),
)
} else {
(Some(lang_ext), lang_ext.get_name())
}
} else {
(
Some(lang_ext),
fake::get_true(&ext, &mode).unwrap_or_else(|| lang_ext.get_name()),
)
}
} else if let Some(lang_mode) = from_mode {
(
Some(lang_mode),
fake::get_true(&ext, &mode).unwrap_or_else(|| lang_mode.get_name()),
)
} else if let Some(lang_shebang) = get_shebang_lang(buf) {
(
Some(lang_shebang),
fake::get_true(&ext, &mode).unwrap_or_else(|| lang_shebang.get_name()),
)
} else {
(None, fake::get_true(&ext, &mode).unwrap_or_default())
}
}
pub(crate) fn normalize_line_endings(data: &mut Vec<u8>) {
let mut w = 0;
let mut r = 0;
while r < data.len() {
if data[r] == b'\r' {
data[w] = b'\n';
w += 1;
r += if data.get(r + 1).copied() == Some(b'\n') {
2
} else {
1
};
} else {
data[w] = data[r];
w += 1;
r += 1;
}
}
data.truncate(w);
let trailing = data.iter().rev().take_while(|&&c| c == b'\n').count();
data.truncate(data.len() - trailing);
data.push(b'\n');
}
pub(crate) fn normalize_path<P: AsRef<Path>>(path: P) -> PathBuf {
let mut components = path.as_ref().components().peekable();
let mut ret = if let Some(c @ Component::Prefix(..)) = components.peek().copied() {
components.next();
PathBuf::from(c.as_os_str())
} else {
PathBuf::new()
};
for component in components {
match component {
Component::Prefix(..) => unreachable!(),
Component::RootDir => {
ret.push(component.as_os_str());
}
Component::CurDir => {}
Component::ParentDir => {
ret.pop();
}
Component::Normal(c) => {
ret.push(c);
}
}
}
ret
}
pub(crate) fn get_paths_dist(path1: &Path, path2: &Path) -> Option<usize> {
for ancestor in path1.ancestors() {
if path2.starts_with(ancestor) && !ancestor.as_os_str().is_empty() {
let path1 = path1
.strip_prefix(ancestor)
.expect("ancestor is by construction a prefix of path1");
let path2 = path2
.strip_prefix(ancestor)
.expect("ancestor verified by starts_with above");
return Some(path1.components().count() + path2.components().count());
}
}
None
}
pub(crate) fn guess_file<S: ::std::hash::BuildHasher>(
current_path: &Path,
include_path: &str,
all_files: &HashMap<String, Vec<PathBuf>, S>,
) -> Vec<PathBuf> {
let include_path = include_path
.strip_prefix("mozilla/")
.unwrap_or(include_path);
let resolved_path = current_path
.parent()
.map(|parent| normalize_path(parent.join(include_path)));
let include_path = normalize_path(include_path);
let Some(file_name) = include_path.file_name().and_then(|n| n.to_str()) else {
return vec![];
};
let Some(possibilities) = all_files.get(file_name) else {
return vec![];
};
if possibilities.len() == 1 {
return possibilities.clone();
}
resolve_against_resolved(possibilities, current_path, resolved_path.as_deref())
.or_else(|| unique_filter(possibilities, current_path, |p| p.ends_with(&include_path)))
.or_else(|| resolve_against_parent(possibilities, current_path))
.unwrap_or_else(|| min_distance_candidates(possibilities, current_path))
}
fn unique_filter<F>(possibilities: &[PathBuf], current_path: &Path, pred: F) -> Option<Vec<PathBuf>>
where
F: Fn(&PathBuf) -> bool,
{
let matched: Vec<PathBuf> = possibilities
.iter()
.filter(|p| current_path != p.as_path() && pred(p))
.cloned()
.collect();
(matched.len() == 1).then_some(matched)
}
fn resolve_against_resolved(
possibilities: &[PathBuf],
current_path: &Path,
resolved: Option<&Path>,
) -> Option<Vec<PathBuf>> {
let resolved = resolved?;
unique_filter(possibilities, current_path, |p| p == resolved)
.or_else(|| unique_filter(possibilities, current_path, |p| p.ends_with(resolved)))
}
fn resolve_against_parent(possibilities: &[PathBuf], current_path: &Path) -> Option<Vec<PathBuf>> {
let parent = current_path.parent()?;
unique_filter(possibilities, current_path, |p| p.starts_with(parent))
}
fn min_distance_candidates(possibilities: &[PathBuf], current_path: &Path) -> Vec<PathBuf> {
let mut dist_min = usize::MAX;
let mut path_min: Vec<&PathBuf> = Vec::new();
for p in possibilities {
if current_path == p {
continue;
}
let Some(dist) = get_paths_dist(current_path, p) else {
continue;
};
match dist.cmp(&dist_min) {
Ordering::Less => {
dist_min = dist;
path_min.clear();
path_min.push(p);
}
Ordering::Equal => path_min.push(p),
Ordering::Greater => {}
}
}
path_min.into_iter().cloned().collect()
}
#[inline]
pub(crate) fn color(stdout: &mut StandardStreamLock, color: Color) -> std::io::Result<()> {
stdout.set_color(ColorSpec::new().set_fg(Some(color)))
}
#[inline]
pub(crate) fn intense_color(stdout: &mut StandardStreamLock, color: Color) -> std::io::Result<()> {
stdout.set_color(ColorSpec::new().set_fg(Some(color)).set_intense(true))
}
#[cfg(test)]
pub(crate) fn check_func_space<T: crate::ParserTrait, F: Fn(crate::FuncSpace)>(
source: &str,
filename: &str,
check: F,
) {
let path = std::path::PathBuf::from(filename);
let normalized = source.replace("\r\n", "\n").replace('\r', "\n");
let mut trimmed_bytes = normalized.trim_end().trim_matches('\n').as_bytes().to_vec();
trimmed_bytes.push(b'\n');
let parser = T::new(trimmed_bytes, &path, None);
#[allow(deprecated)]
let func_space = crate::metrics(&parser, &path).unwrap();
check(func_space);
}
#[cfg(test)]
pub(crate) fn check_metrics<T: crate::ParserTrait>(
source: &str,
filename: &str,
check: fn(crate::CodeMetrics) -> (),
) {
check_func_space::<T, _>(source, filename, |func_space| check(func_space.metrics));
}
#[cfg(test)]
pub(crate) fn assert_child_space_kind(
func_space: &crate::FuncSpace,
name: &str,
expected: crate::SpaceKind,
) {
let child = func_space
.spaces
.iter()
.find(|s| s.name.as_deref() == Some(name))
.unwrap_or_else(|| panic!("expected a child FuncSpace named {name:?}"));
assert_eq!(
child.kind, expected,
"child FuncSpace {name:?} kind: got {:?}, expected {:?}",
child.kind, expected,
);
}
#[cfg(test)]
#[path = "tools_tests.rs"]
mod tests;