use std::path::Path;
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub enum FileType {
Rust,
Python,
JavaScript,
TypeScript,
Go,
Java,
Cpp,
C,
CSharp,
Ruby,
Php,
Swift,
Kotlin,
Scala,
Haskell,
Markdown,
Json,
Yaml,
Toml,
Xml,
Html,
Css,
Text,
Other,
}
impl FileType {
pub fn from_path(path: &Path) -> Self {
let extension = path.extension().and_then(|ext| ext.to_str()).unwrap_or("").to_lowercase();
match extension.as_str() {
"rs" => FileType::Rust,
"py" => FileType::Python,
"js" | "mjs" | "cjs" => FileType::JavaScript,
"ts" | "tsx" => FileType::TypeScript,
"go" => FileType::Go,
"java" => FileType::Java,
"cpp" | "cc" | "cxx" | "c++" | "hpp" | "hxx" | "h++" => FileType::Cpp,
"c" | "h" => FileType::C,
"cs" => FileType::CSharp,
"rb" => FileType::Ruby,
"php" => FileType::Php,
"swift" => FileType::Swift,
"kt" | "kts" => FileType::Kotlin,
"scala" => FileType::Scala,
"hs" => FileType::Haskell,
"md" | "markdown" => FileType::Markdown,
"json" => FileType::Json,
"yaml" | "yml" => FileType::Yaml,
"toml" => FileType::Toml,
"xml" => FileType::Xml,
"html" | "htm" => FileType::Html,
"css" | "scss" | "sass" | "less" => FileType::Css,
"txt" | "text" => FileType::Text,
_ => {
let filename = path.file_name().and_then(|name| name.to_str()).unwrap_or("");
match filename {
"README" | "LICENSE" | "CHANGELOG" | "AUTHORS" | "CONTRIBUTORS" => {
FileType::Text
}
"Makefile" | "Dockerfile" | "Vagrantfile" | "Jenkinsfile" => FileType::Text,
_ if !is_binary_extension(path) => FileType::Text,
_ => FileType::Other,
}
}
}
}
}
pub fn get_language_from_extension(path: &Path) -> &'static str {
let extension = path.extension().and_then(|ext| ext.to_str()).unwrap_or("");
match extension.to_lowercase().as_str() {
"rs" => "rust",
"py" => "python",
"js" | "mjs" | "cjs" => "javascript",
"ts" | "tsx" => "typescript",
"jsx" => "jsx",
"go" => "go",
"c" => "c",
"cpp" | "cc" | "cxx" | "c++" => "cpp",
"h" | "hpp" | "hxx" => "cpp",
"cs" => "csharp",
"java" => "java",
"kt" | "kts" => "kotlin",
"swift" => "swift",
"rb" => "ruby",
"php" => "php",
"lua" => "lua",
"r" => "r",
"scala" => "scala",
"clj" | "cljs" => "clojure",
"ex" | "exs" => "elixir",
"elm" => "elm",
"hs" => "haskell",
"ml" | "mli" => "ocaml",
"fs" | "fsx" => "fsharp",
"pl" => "perl",
"sh" => "bash",
"fish" => "fish",
"zsh" => "zsh",
"ps1" => "powershell",
"dart" => "dart",
"julia" | "jl" => "julia",
"nim" => "nim",
"zig" => "zig",
"v" => "v",
"d" => "d",
"html" | "htm" => "html",
"css" => "css",
"scss" | "sass" => "scss",
"less" => "less",
"vue" => "vue",
"svelte" => "svelte",
"json" => "json",
"yaml" | "yml" => "yaml",
"toml" => "toml",
"xml" => "xml",
"csv" => "csv",
"sql" => "sql",
"md" | "markdown" => "markdown",
"tex" => "latex",
"rst" => "rst",
"adoc" | "asciidoc" => "asciidoc",
"ini" | "cfg" => "ini",
"conf" | "config" => "text",
"env" => "dotenv",
"dockerfile" => "dockerfile",
"makefile" | "mk" => "makefile",
"bash" => "bash",
"bat" | "cmd" => "batch",
"proto" => "protobuf",
"graphql" | "gql" => "graphql",
"tf" => "hcl",
"vim" => "vim",
"diff" | "patch" => "diff",
_ => "text",
}
}
pub fn is_binary_extension(path: &Path) -> bool {
let extension = path.extension().and_then(|ext| ext.to_str()).unwrap_or("");
matches!(
extension.to_lowercase().as_str(),
"exe" | "dll" | "so" | "dylib" | "a" | "lib" | "bin" |
"zip" | "tar" | "gz" | "bz2" | "xz" | "7z" | "rar" |
"jpg" | "jpeg" | "png" | "gif" | "bmp" | "ico" | "svg" | "webp" |
"mp3" | "wav" | "flac" | "aac" | "ogg" | "wma" |
"mp4" | "avi" | "mkv" | "mov" | "wmv" | "flv" | "webm" |
"pdf" | "doc" | "docx" | "xls" | "xlsx" | "ppt" | "pptx" |
"ttf" | "otf" | "woff" | "woff2" | "eot" |
"db" | "sqlite" | "sqlite3" |
"pyc" | "pyo" | "class" | "o" | "obj" | "pdb"
)
}
pub fn is_binary_content(content: &[u8]) -> bool {
let check_len = content.len().min(8192);
content[..check_len].contains(&0)
}
#[cfg(test)]
mod tests {
use super::*;
use std::path::Path;
#[test]
fn test_language_detection() {
assert_eq!(get_language_from_extension(Path::new("test.rs")), "rust");
assert_eq!(get_language_from_extension(Path::new("test.py")), "python");
assert_eq!(get_language_from_extension(Path::new("test.js")), "javascript");
assert_eq!(get_language_from_extension(Path::new("test.unknown")), "text");
assert_eq!(get_language_from_extension(Path::new("Makefile")), "text");
}
#[test]
fn test_binary_extension_detection() {
assert!(is_binary_extension(Path::new("test.exe")));
assert!(is_binary_extension(Path::new("image.png")));
assert!(is_binary_extension(Path::new("archive.zip")));
assert!(!is_binary_extension(Path::new("code.rs")));
assert!(!is_binary_extension(Path::new("text.md")));
}
#[test]
fn test_binary_content_detection() {
assert!(!is_binary_content(b"Hello, world!"));
assert!(is_binary_content(b"Hello\0world"));
assert!(is_binary_content(&[0xFF, 0xFE, 0x00, 0x00]));
}
}