use std::collections::{HashMap, HashSet};
use std::io::Read;
use std::path::{Path, PathBuf};
use ignore::WalkBuilder;
const BINARY_SIZE_THRESHOLD: u64 = 10 * 1024 * 1024;
#[derive(Debug, Clone)]
pub struct FileInfo {
pub mtime: u64,
pub size: u64,
pub root: Option<PathBuf>,
pub kind: FileKind,
}
impl FileInfo {
#[must_use]
pub const fn language_id(&self) -> Option<&'static str> {
match self.kind {
FileKind::Text { language_id, .. } => language_id,
FileKind::Binary => None,
}
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum FileKind {
Binary,
Text {
lines: usize,
language_id: Option<&'static str>,
},
}
pub struct FilesystemManager {
cache: std::sync::Mutex<HashMap<PathBuf, CachedEntry>>,
roots: std::sync::Mutex<Vec<PathBuf>>,
}
struct CachedEntry {
mtime: u64,
kind: FileKind,
}
impl Default for FilesystemManager {
fn default() -> Self {
Self {
cache: std::sync::Mutex::new(HashMap::new()),
roots: std::sync::Mutex::new(Vec::new()),
}
}
}
impl FilesystemManager {
#[must_use]
pub fn new() -> Self {
Self::default()
}
pub fn classify(&self, path: &Path, metadata: &std::fs::Metadata) -> FileInfo {
let mtime = mtime_secs(metadata);
let size = metadata.len();
let root = self.resolve_root(path);
if let Ok(cache) = self.cache.lock()
&& let Some(entry) = cache.get(path)
&& entry.mtime == mtime
{
return FileInfo {
mtime,
size,
root,
kind: entry.kind,
};
}
let ext_language = detect_language_id_opt(path);
let kind = scan_file(path, metadata).map_or(FileKind::Binary, |scan| {
let language_id = ext_language.or(scan.shebang_language);
FileKind::Text {
lines: scan.lines,
language_id,
}
});
if let Ok(mut cache) = self.cache.lock() {
cache.insert(path.to_path_buf(), CachedEntry { mtime, kind });
}
FileInfo {
mtime,
size,
root,
kind,
}
}
pub fn is_binary(&self, path: &Path, metadata: &std::fs::Metadata) -> bool {
matches!(self.classify(path, metadata).kind, FileKind::Binary)
}
pub fn line_count(&self, path: &Path, metadata: &std::fs::Metadata) -> Option<usize> {
match self.classify(path, metadata).kind {
FileKind::Binary => None,
FileKind::Text { lines, .. } => Some(lines),
}
}
pub fn language_id(&self, path: &Path) -> Option<&'static str> {
if let Some(lang) = detect_language_id_opt(path) {
return Some(lang);
}
let metadata = std::fs::metadata(path).ok()?;
self.classify(path, &metadata).language_id()
}
#[must_use]
pub fn resolve_root(&self, path: &Path) -> Option<PathBuf> {
let Ok(roots) = self.roots.lock() else {
return None;
};
roots
.iter()
.filter(|root| path.starts_with(root))
.max_by_key(|root| root.as_os_str().len())
.cloned()
}
pub fn set_roots(&self, roots: Vec<PathBuf>) {
if let Ok(mut current) = self.roots.lock() {
*current = roots;
}
}
#[allow(clippy::implicit_hasher, reason = "All callers use the default hasher")]
pub fn detect_workspace_languages(
&self,
roots: &[PathBuf],
configured_keys: &HashSet<&str>,
) -> HashSet<String> {
let mut detected = HashSet::new();
for root in roots {
if !root.exists() {
continue;
}
let walker = WalkBuilder::new(root).git_ignore(true).hidden(true).build();
for entry in walker.flatten() {
if !entry.file_type().is_some_and(|ft| ft.is_file()) {
continue;
}
let path = entry.path();
let lang = detect_language_id_opt(path).or_else(|| {
let metadata = entry.metadata().ok()?;
self.classify(path, &metadata).language_id()
});
if let Some(lang) = lang {
if configured_keys.contains(lang) {
detected.insert(lang.to_string());
}
} else if let Some(ext) = path.extension().and_then(|e| e.to_str())
&& configured_keys.contains(ext)
{
detected.insert(ext.to_string());
}
if detected.len() == configured_keys.len() {
return detected;
}
}
}
detected
}
}
#[must_use]
#[allow(
clippy::cast_precision_loss,
reason = "display-only rounding is acceptable"
)]
pub fn format_file_size(bytes: u64) -> String {
if bytes >= 1_073_741_824 {
format!("{:.1} GB", bytes as f64 / 1_073_741_824.0)
} else if bytes >= 1_048_576 {
format!("{:.1} MB", bytes as f64 / 1_048_576.0)
} else if bytes >= 1024 {
format!("{:.0} KB", bytes as f64 / 1024.0)
} else {
format!("{bytes} B")
}
}
pub(crate) fn detect_language_id_opt(path: &Path) -> Option<&'static str> {
if let Some(file_name) = path.file_name().and_then(|n| n.to_str()) {
let lang = match file_name {
"Dockerfile" => "dockerfile",
"Makefile" | "GNUmakefile" => "makefile",
"CMakeLists.txt" => "cmake",
"Cargo.toml" | "Cargo.lock" => "toml",
"Gemfile" | "Rakefile" => "ruby",
"Justfile" | "justfile" => "just",
"PKGBUILD" => "shellscript",
_ => "",
};
if !lang.is_empty() {
return Some(lang);
}
}
match path.extension().and_then(|e| e.to_str()) {
Some("rs") => Some("rust"),
Some("go") => Some("go"),
Some("c") => Some("c"),
Some("cpp" | "cc" | "cxx" | "h" | "hpp") => Some("cpp"),
Some("zig") => Some("zig"),
Some("d") => Some("d"),
Some("v") => Some("v"),
Some("nim") => Some("nim"),
Some("java") => Some("java"),
Some("kt" | "kts") => Some("kotlin"),
Some("scala" | "sc") => Some("scala"),
Some("groovy" | "gvy") => Some("groovy"),
Some("clj" | "cljs" | "cljc") => Some("clojure"),
Some("cs") => Some("csharp"),
Some("fs" | "fsx" | "fsi") => Some("fsharp"),
Some("swift") => Some("swift"),
Some("m" | "mm") => Some("objective-c"),
Some("py") => Some("python"),
Some("rb") => Some("ruby"),
Some("pl" | "pm") => Some("perl"),
Some("php") => Some("php"),
Some("lua") => Some("lua"),
Some("tcl") => Some("tcl"),
Some("cr") => Some("crystal"),
Some("js" | "mjs" | "cjs") => Some("javascript"),
Some("ts" | "mts" | "cts") => Some("typescript"),
Some("tsx") => Some("typescriptreact"),
Some("jsx") => Some("javascriptreact"),
Some("hs" | "lhs") => Some("haskell"),
Some("ml" | "mli") => Some("ocaml"),
Some("elm") => Some("elm"),
Some("gleam") => Some("gleam"),
Some("ex" | "exs") => Some("elixir"),
Some("erl" | "hrl") => Some("erlang"),
Some("purs") => Some("purescript"),
Some("sh" | "bash" | "zsh" | "ebuild" | "eclass" | "install") => Some("shellscript"),
Some("fish") => Some("fish"),
Some("ps1" | "psm1" | "psd1") => Some("powershell"),
Some("r" | "R") => Some("r"),
Some("jl") => Some("julia"),
Some("mojo") => Some("mojo"),
Some("html" | "htm") => Some("html"),
Some("css") => Some("css"),
Some("scss") => Some("scss"),
Some("sass") => Some("sass"),
Some("less") => Some("less"),
Some("svelte") => Some("svelte"),
Some("vue") => Some("vue"),
Some("json" | "jsonc") => Some("json"),
Some("yaml" | "yml") => Some("yaml"),
Some("toml") => Some("toml"),
Some("xml" | "xsl" | "xslt" | "xsd") => Some("xml"),
Some("sql") => Some("sql"),
Some("graphql" | "gql") => Some("graphql"),
Some("proto") => Some("proto"),
Some("md" | "mdx") => Some("markdown"),
Some("rst") => Some("restructuredtext"),
Some("tex" | "latex") => Some("latex"),
Some("typ") => Some("typst"),
Some("nix") => Some("nix"),
Some("tf" | "tfvars") => Some("terraform"),
Some("cmake") => Some("cmake"),
Some("dart") => Some("dart"),
Some("dockerfile") => Some("dockerfile"),
_ => None,
}
}
fn mtime_secs(metadata: &std::fs::Metadata) -> u64 {
metadata
.modified()
.ok()
.and_then(|t| t.duration_since(std::time::UNIX_EPOCH).ok())
.map_or(0, |d| d.as_secs())
}
struct ScanResult {
lines: usize,
shebang_language: Option<&'static str>,
}
fn scan_file(path: &Path, metadata: &std::fs::Metadata) -> Option<ScanResult> {
if metadata.len() > BINARY_SIZE_THRESHOLD {
return None;
}
let Ok(file) = std::fs::File::open(path) else {
return Some(ScanResult {
lines: 0,
shebang_language: None,
});
};
let mut reader = std::io::BufReader::new(file);
let mut buf = [0u8; 8192];
let mut lines = 0;
let mut shebang_language = None;
let mut first_chunk = true;
loop {
let Ok(n) = reader.read(&mut buf) else {
return Some(ScanResult {
lines,
shebang_language,
});
};
if n == 0 {
return Some(ScanResult {
lines,
shebang_language,
});
}
if memchr::memchr(0, &buf[..n]).is_some() {
return None; }
if first_chunk {
first_chunk = false;
let first_line_end = memchr::memchr(b'\n', &buf[..n]).unwrap_or(n);
shebang_language = parse_shebang(&buf[..first_line_end]);
}
lines += memchr::memchr_iter(b'\n', &buf[..n]).count();
}
}
fn parse_shebang(first_line: &[u8]) -> Option<&'static str> {
let line = first_line.strip_prefix(b"#!")?;
let line = line.trim_ascii_start();
let line_str = std::str::from_utf8(line).ok()?;
let mut parts = line_str.split_whitespace();
let command = parts.next()?;
let interpreter = if command.ends_with("/env") {
parts.find(|p| !p.starts_with('-'))?
} else {
command
};
let basename = interpreter.rsplit('/').next()?;
match basename {
"bash" | "sh" | "zsh" | "dash" | "ksh" => Some("shellscript"),
"fish" => Some("fish"),
"python" | "python3" | "python2" => Some("python"),
"node" | "nodejs" => Some("javascript"),
"deno" => Some("typescript"),
"ruby" | "irb" => Some("ruby"),
"perl" => Some("perl"),
"php" => Some("php"),
"lua" | "luajit" => Some("lua"),
"tclsh" | "wish" => Some("tcl"),
"Rscript" => Some("r"),
"julia" => Some("julia"),
"elixir" | "iex" => Some("elixir"),
"erl" => Some("erlang"),
"swift" => Some("swift"),
"kotlin" => Some("kotlin"),
"scala" => Some("scala"),
"groovy" => Some("groovy"),
"crystal" => Some("crystal"),
_ => None,
}
}
#[cfg(test)]
#[allow(
clippy::expect_used,
reason = "tests use expect for readable assertions"
)]
mod tests {
use super::*;
use std::io::Write;
#[test]
fn classify_binary_file() {
let dir = tempfile::tempdir().expect("tempdir");
let path = dir.path().join("binary.bin");
let mut f = std::fs::File::create(&path).expect("create");
f.write_all(&[0x89, 0x50, 0x4E, 0x47, 0x00, 0x0A])
.expect("write");
drop(f);
let mgr = FilesystemManager::new();
let metadata = std::fs::metadata(&path).expect("metadata");
assert_eq!(mgr.classify(&path, &metadata).kind, FileKind::Binary);
}
#[test]
fn classify_text_file() {
let dir = tempfile::tempdir().expect("tempdir");
let path = dir.path().join("text.txt");
std::fs::write(&path, "Hello, world!\nLine two.\n").expect("write");
let mgr = FilesystemManager::new();
let metadata = std::fs::metadata(&path).expect("metadata");
assert_eq!(
mgr.classify(&path, &metadata).kind,
FileKind::Text {
lines: 2,
language_id: None,
}
);
}
#[test]
fn classify_empty_file() {
let dir = tempfile::tempdir().expect("tempdir");
let path = dir.path().join("empty.txt");
std::fs::write(&path, "").expect("write");
let mgr = FilesystemManager::new();
let metadata = std::fs::metadata(&path).expect("metadata");
assert_eq!(
mgr.classify(&path, &metadata).kind,
FileKind::Text {
lines: 0,
language_id: None,
}
);
}
#[test]
fn line_count_text() {
let dir = tempfile::tempdir().expect("tempdir");
let path = dir.path().join("code.rs");
std::fs::write(&path, "fn main() {\n println!(\"hi\");\n}\n").expect("write");
let mgr = FilesystemManager::new();
let metadata = std::fs::metadata(&path).expect("metadata");
assert_eq!(mgr.line_count(&path, &metadata), Some(3));
assert_eq!(mgr.line_count(&path, &metadata), Some(3));
}
#[test]
fn line_count_binary() {
let dir = tempfile::tempdir().expect("tempdir");
let path = dir.path().join("image.png");
let mut f = std::fs::File::create(&path).expect("create");
f.write_all(&[0x89, 0x50, 0x4E, 0x47, 0x00]).expect("write");
drop(f);
let mgr = FilesystemManager::new();
let metadata = std::fs::metadata(&path).expect("metadata");
assert_eq!(mgr.line_count(&path, &metadata), None);
}
#[test]
fn cache_populated_by_classify() {
let dir = tempfile::tempdir().expect("tempdir");
let path = dir.path().join("cached.bin");
let mut f = std::fs::File::create(&path).expect("create");
f.write_all(&[0x00, 0x01, 0x02]).expect("write");
drop(f);
let mgr = FilesystemManager::new();
let metadata = std::fs::metadata(&path).expect("metadata");
assert!(mgr.is_binary(&path, &metadata));
assert!(mgr.is_binary(&path, &metadata));
let len = mgr.cache.lock().expect("lock").len();
assert_eq!(len, 1);
}
#[test]
fn language_detection_filenames() {
assert_eq!(
detect_language_id_opt(Path::new("Dockerfile")),
Some("dockerfile")
);
assert_eq!(
detect_language_id_opt(Path::new("Makefile")),
Some("makefile")
);
assert_eq!(
detect_language_id_opt(Path::new("GNUmakefile")),
Some("makefile")
);
assert_eq!(
detect_language_id_opt(Path::new("CMakeLists.txt")),
Some("cmake")
);
assert_eq!(
detect_language_id_opt(Path::new("Cargo.toml")),
Some("toml")
);
assert_eq!(detect_language_id_opt(Path::new("Gemfile")), Some("ruby"));
assert_eq!(detect_language_id_opt(Path::new("Rakefile")), Some("ruby"));
assert_eq!(detect_language_id_opt(Path::new("Justfile")), Some("just"));
assert_eq!(
detect_language_id_opt(Path::new("PKGBUILD")),
Some("shellscript")
);
}
#[test]
#[allow(clippy::too_many_lines, reason = "exhaustive extension coverage")]
fn language_detection_extensions() {
assert_eq!(detect_language_id_opt(Path::new("test.rs")), Some("rust"));
assert_eq!(detect_language_id_opt(Path::new("test.go")), Some("go"));
assert_eq!(detect_language_id_opt(Path::new("test.c")), Some("c"));
assert_eq!(detect_language_id_opt(Path::new("test.cpp")), Some("cpp"));
assert_eq!(detect_language_id_opt(Path::new("test.h")), Some("cpp"));
assert_eq!(detect_language_id_opt(Path::new("test.zig")), Some("zig"));
assert_eq!(detect_language_id_opt(Path::new("test.d")), Some("d"));
assert_eq!(detect_language_id_opt(Path::new("test.v")), Some("v"));
assert_eq!(detect_language_id_opt(Path::new("test.nim")), Some("nim"));
assert_eq!(detect_language_id_opt(Path::new("test.java")), Some("java"));
assert_eq!(detect_language_id_opt(Path::new("test.kt")), Some("kotlin"));
assert_eq!(
detect_language_id_opt(Path::new("test.scala")),
Some("scala")
);
assert_eq!(
detect_language_id_opt(Path::new("test.groovy")),
Some("groovy")
);
assert_eq!(
detect_language_id_opt(Path::new("test.clj")),
Some("clojure")
);
assert_eq!(detect_language_id_opt(Path::new("test.cs")), Some("csharp"));
assert_eq!(detect_language_id_opt(Path::new("test.fs")), Some("fsharp"));
assert_eq!(
detect_language_id_opt(Path::new("test.swift")),
Some("swift")
);
assert_eq!(
detect_language_id_opt(Path::new("test.m")),
Some("objective-c")
);
assert_eq!(detect_language_id_opt(Path::new("test.py")), Some("python"));
assert_eq!(detect_language_id_opt(Path::new("test.rb")), Some("ruby"));
assert_eq!(detect_language_id_opt(Path::new("test.pl")), Some("perl"));
assert_eq!(detect_language_id_opt(Path::new("test.php")), Some("php"));
assert_eq!(detect_language_id_opt(Path::new("test.lua")), Some("lua"));
assert_eq!(detect_language_id_opt(Path::new("test.tcl")), Some("tcl"));
assert_eq!(
detect_language_id_opt(Path::new("test.cr")),
Some("crystal")
);
assert_eq!(
detect_language_id_opt(Path::new("test.js")),
Some("javascript")
);
assert_eq!(
detect_language_id_opt(Path::new("test.mjs")),
Some("javascript")
);
assert_eq!(
detect_language_id_opt(Path::new("test.ts")),
Some("typescript")
);
assert_eq!(
detect_language_id_opt(Path::new("test.mts")),
Some("typescript")
);
assert_eq!(
detect_language_id_opt(Path::new("test.tsx")),
Some("typescriptreact")
);
assert_eq!(
detect_language_id_opt(Path::new("test.jsx")),
Some("javascriptreact")
);
assert_eq!(
detect_language_id_opt(Path::new("test.hs")),
Some("haskell")
);
assert_eq!(detect_language_id_opt(Path::new("test.ml")), Some("ocaml"));
assert_eq!(detect_language_id_opt(Path::new("test.elm")), Some("elm"));
assert_eq!(
detect_language_id_opt(Path::new("test.gleam")),
Some("gleam")
);
assert_eq!(detect_language_id_opt(Path::new("test.ex")), Some("elixir"));
assert_eq!(
detect_language_id_opt(Path::new("test.erl")),
Some("erlang")
);
assert_eq!(
detect_language_id_opt(Path::new("test.purs")),
Some("purescript")
);
assert_eq!(
detect_language_id_opt(Path::new("test.sh")),
Some("shellscript")
);
assert_eq!(
detect_language_id_opt(Path::new("test.bash")),
Some("shellscript")
);
assert_eq!(
detect_language_id_opt(Path::new("test.ebuild")),
Some("shellscript")
);
assert_eq!(
detect_language_id_opt(Path::new("test.eclass")),
Some("shellscript")
);
assert_eq!(detect_language_id_opt(Path::new("test.fish")), Some("fish"));
assert_eq!(
detect_language_id_opt(Path::new("test.ps1")),
Some("powershell")
);
assert_eq!(detect_language_id_opt(Path::new("test.r")), Some("r"));
assert_eq!(detect_language_id_opt(Path::new("test.jl")), Some("julia"));
assert_eq!(detect_language_id_opt(Path::new("test.html")), Some("html"));
assert_eq!(detect_language_id_opt(Path::new("test.css")), Some("css"));
assert_eq!(detect_language_id_opt(Path::new("test.scss")), Some("scss"));
assert_eq!(detect_language_id_opt(Path::new("test.less")), Some("less"));
assert_eq!(
detect_language_id_opt(Path::new("test.svelte")),
Some("svelte")
);
assert_eq!(detect_language_id_opt(Path::new("test.vue")), Some("vue"));
assert_eq!(detect_language_id_opt(Path::new("test.json")), Some("json"));
assert_eq!(
detect_language_id_opt(Path::new("test.jsonc")),
Some("json")
);
assert_eq!(detect_language_id_opt(Path::new("test.yaml")), Some("yaml"));
assert_eq!(detect_language_id_opt(Path::new("test.toml")), Some("toml"));
assert_eq!(detect_language_id_opt(Path::new("test.xml")), Some("xml"));
assert_eq!(detect_language_id_opt(Path::new("test.sql")), Some("sql"));
assert_eq!(
detect_language_id_opt(Path::new("test.graphql")),
Some("graphql")
);
assert_eq!(
detect_language_id_opt(Path::new("test.proto")),
Some("proto")
);
assert_eq!(
detect_language_id_opt(Path::new("test.md")),
Some("markdown")
);
assert_eq!(
detect_language_id_opt(Path::new("test.mdx")),
Some("markdown")
);
assert_eq!(
detect_language_id_opt(Path::new("test.rst")),
Some("restructuredtext")
);
assert_eq!(detect_language_id_opt(Path::new("test.tex")), Some("latex"));
assert_eq!(detect_language_id_opt(Path::new("test.typ")), Some("typst"));
assert_eq!(detect_language_id_opt(Path::new("test.nix")), Some("nix"));
assert_eq!(
detect_language_id_opt(Path::new("test.tf")),
Some("terraform")
);
assert_eq!(detect_language_id_opt(Path::new("test.dart")), Some("dart"));
assert_eq!(detect_language_id_opt(Path::new("test.unknown")), None);
assert_eq!(detect_language_id_opt(Path::new("noextension")), None);
}
#[test]
fn shebang_bash_direct() {
assert_eq!(parse_shebang(b"#!/bin/bash"), Some("shellscript"));
}
#[test]
fn shebang_bash_env() {
assert_eq!(parse_shebang(b"#!/usr/bin/env bash"), Some("shellscript"));
}
#[test]
fn shebang_sh() {
assert_eq!(parse_shebang(b"#!/bin/sh"), Some("shellscript"));
}
#[test]
fn shebang_python_env() {
assert_eq!(parse_shebang(b"#!/usr/bin/env python3"), Some("python"));
}
#[test]
fn shebang_python_direct() {
assert_eq!(parse_shebang(b"#!/usr/bin/python"), Some("python"));
}
#[test]
fn shebang_node() {
assert_eq!(parse_shebang(b"#!/usr/bin/env node"), Some("javascript"));
}
#[test]
fn shebang_ruby() {
assert_eq!(parse_shebang(b"#!/usr/bin/env ruby"), Some("ruby"));
}
#[test]
fn shebang_perl() {
assert_eq!(parse_shebang(b"#!/usr/bin/env perl"), Some("perl"));
}
#[test]
fn shebang_php() {
assert_eq!(parse_shebang(b"#!/usr/bin/env php"), Some("php"));
}
#[test]
fn shebang_lua() {
assert_eq!(parse_shebang(b"#!/usr/bin/env lua"), Some("lua"));
}
#[test]
fn shebang_luajit() {
assert_eq!(parse_shebang(b"#!/usr/bin/env luajit"), Some("lua"));
}
#[test]
fn shebang_tclsh() {
assert_eq!(parse_shebang(b"#!/usr/bin/env tclsh"), Some("tcl"));
}
#[test]
fn shebang_rscript() {
assert_eq!(parse_shebang(b"#!/usr/bin/env Rscript"), Some("r"));
}
#[test]
fn shebang_julia() {
assert_eq!(parse_shebang(b"#!/usr/bin/env julia"), Some("julia"));
}
#[test]
fn shebang_elixir() {
assert_eq!(parse_shebang(b"#!/usr/bin/env elixir"), Some("elixir"));
}
#[test]
fn shebang_swift() {
assert_eq!(parse_shebang(b"#!/usr/bin/env swift"), Some("swift"));
}
#[test]
fn shebang_kotlin() {
assert_eq!(parse_shebang(b"#!/usr/bin/env kotlin"), Some("kotlin"));
}
#[test]
fn shebang_groovy() {
assert_eq!(parse_shebang(b"#!/usr/bin/env groovy"), Some("groovy"));
}
#[test]
fn shebang_crystal() {
assert_eq!(parse_shebang(b"#!/usr/bin/env crystal"), Some("crystal"));
}
#[test]
fn shebang_deno() {
assert_eq!(parse_shebang(b"#!/usr/bin/env deno"), Some("typescript"));
}
#[test]
fn shebang_fish() {
assert_eq!(parse_shebang(b"#!/usr/bin/env fish"), Some("fish"));
}
#[test]
fn shebang_erl() {
assert_eq!(parse_shebang(b"#!/usr/bin/env erl"), Some("erlang"));
}
#[test]
fn shebang_scala() {
assert_eq!(parse_shebang(b"#!/usr/bin/env scala"), Some("scala"));
}
#[test]
fn shebang_with_flags() {
assert_eq!(parse_shebang(b"#!/bin/bash -e"), Some("shellscript"));
}
#[test]
fn shebang_space_after_hash_bang() {
assert_eq!(parse_shebang(b"#! /bin/bash"), Some("shellscript"));
}
#[test]
fn shebang_env_with_flags() {
assert_eq!(parse_shebang(b"#!/usr/bin/env -S python3"), Some("python"));
}
#[test]
fn shebang_unknown_interpreter() {
assert_eq!(parse_shebang(b"#!/usr/bin/env something_unknown"), None);
}
#[test]
fn no_shebang() {
assert_eq!(parse_shebang(b"hello world"), None);
}
#[test]
fn classify_extensionless_with_shebang() {
let dir = tempfile::tempdir().expect("tempdir");
let path = dir.path().join("my_script");
std::fs::write(&path, "#!/bin/bash\necho hello\n").expect("write");
let mgr = FilesystemManager::new();
let metadata = std::fs::metadata(&path).expect("metadata");
assert_eq!(
mgr.classify(&path, &metadata).kind,
FileKind::Text {
lines: 2,
language_id: Some("shellscript"),
}
);
}
#[test]
fn classify_extensionless_without_shebang() {
let dir = tempfile::tempdir().expect("tempdir");
let path = dir.path().join("data_file");
std::fs::write(&path, "just some text\n").expect("write");
let mgr = FilesystemManager::new();
let metadata = std::fs::metadata(&path).expect("metadata");
assert_eq!(
mgr.classify(&path, &metadata).kind,
FileKind::Text {
lines: 1,
language_id: None,
}
);
}
#[test]
fn classify_binary_skips_shebang() {
let dir = tempfile::tempdir().expect("tempdir");
let path = dir.path().join("fake_script");
let mut content = b"#!/bin/bash\n".to_vec();
content.push(0x00);
content.extend_from_slice(b"echo hello\n");
std::fs::write(&path, &content).expect("write");
let mgr = FilesystemManager::new();
let metadata = std::fs::metadata(&path).expect("metadata");
assert_eq!(mgr.classify(&path, &metadata).kind, FileKind::Binary);
}
#[test]
fn format_file_size_units() {
assert_eq!(format_file_size(0), "0 B");
assert_eq!(format_file_size(512), "512 B");
assert_eq!(format_file_size(1024), "1 KB");
assert_eq!(format_file_size(1_048_576), "1.0 MB");
assert_eq!(format_file_size(1_073_741_824), "1.0 GB");
assert_eq!(format_file_size(5_368_709_120), "5.0 GB");
}
#[test]
fn classify_extension_takes_priority_over_shebang() {
let dir = tempfile::tempdir().expect("tempdir");
let path = dir.path().join("script.py");
std::fs::write(&path, "#!/usr/bin/env ruby\nprint('hello')\n").expect("write");
let mgr = FilesystemManager::new();
let metadata = std::fs::metadata(&path).expect("metadata");
assert_eq!(
mgr.classify(&path, &metadata).kind,
FileKind::Text {
lines: 2,
language_id: Some("python"),
}
);
}
#[test]
fn resolve_root_single_match() {
let mgr = FilesystemManager::new();
mgr.set_roots(vec![PathBuf::from("/home/user/project")]);
assert_eq!(
mgr.resolve_root(Path::new("/home/user/project/src/main.rs")),
Some(PathBuf::from("/home/user/project"))
);
}
#[test]
fn resolve_root_outside_all_roots() {
let mgr = FilesystemManager::new();
mgr.set_roots(vec![PathBuf::from("/home/user/project")]);
assert_eq!(mgr.resolve_root(Path::new("/other/path/file.rs")), None);
}
#[test]
fn resolve_root_longest_prefix_wins() {
let mgr = FilesystemManager::new();
mgr.set_roots(vec![
PathBuf::from("/home/user/project"),
PathBuf::from("/home/user/project/subdir"),
]);
assert_eq!(
mgr.resolve_root(Path::new("/home/user/project/subdir/foo.rs")),
Some(PathBuf::from("/home/user/project/subdir"))
);
}
#[test]
fn resolve_root_no_roots() {
let mgr = FilesystemManager::new();
assert_eq!(mgr.resolve_root(Path::new("/any/path/file.rs")), None);
}
#[test]
fn set_roots_updates_resolution() {
let mgr = FilesystemManager::new();
mgr.set_roots(vec![PathBuf::from("/home/user/project")]);
assert_eq!(
mgr.resolve_root(Path::new("/home/user/project/src/main.rs")),
Some(PathBuf::from("/home/user/project"))
);
mgr.set_roots(vec![PathBuf::from("/other/root")]);
assert_eq!(
mgr.resolve_root(Path::new("/home/user/project/src/main.rs")),
None
);
assert_eq!(
mgr.resolve_root(Path::new("/other/root/file.rs")),
Some(PathBuf::from("/other/root"))
);
}
#[test]
fn classify_populates_root() {
let dir = tempfile::tempdir().expect("tempdir");
let path = dir.path().join("code.rs");
std::fs::write(&path, "fn main() {}\n").expect("write");
let mgr = FilesystemManager::new();
mgr.set_roots(vec![dir.path().to_path_buf()]);
let metadata = std::fs::metadata(&path).expect("metadata");
let info = mgr.classify(&path, &metadata);
assert_eq!(info.root, Some(dir.path().to_path_buf()));
}
#[test]
fn classify_root_none_when_outside() {
let dir = tempfile::tempdir().expect("tempdir");
let path = dir.path().join("code.rs");
std::fs::write(&path, "fn main() {}\n").expect("write");
let mgr = FilesystemManager::new();
let metadata = std::fs::metadata(&path).expect("metadata");
let info = mgr.classify(&path, &metadata);
assert_eq!(info.root, None);
}
}