1use std::path::Path;
4
5#[derive(Debug, Clone, PartialEq, Eq, Hash)]
7pub enum FileType {
8 Rust,
10 Python,
11 JavaScript,
12 TypeScript,
13 Go,
14 Java,
15 Cpp,
16 C,
17 CSharp,
18 Ruby,
19 Php,
20 Swift,
21 Kotlin,
22 Scala,
23 Haskell,
24
25 Markdown,
27 Json,
28 Yaml,
29 Toml,
30 Xml,
31 Html,
32 Css,
33
34 Text,
36 Other,
37}
38
39impl FileType {
40 pub fn from_path(path: &Path) -> Self {
42 let extension = path.extension().and_then(|ext| ext.to_str()).unwrap_or("").to_lowercase();
43
44 match extension.as_str() {
45 "rs" => FileType::Rust,
46 "py" => FileType::Python,
47 "js" | "mjs" | "cjs" => FileType::JavaScript,
48 "ts" | "tsx" => FileType::TypeScript,
49 "go" => FileType::Go,
50 "java" => FileType::Java,
51 "cpp" | "cc" | "cxx" | "c++" | "hpp" | "hxx" | "h++" => FileType::Cpp,
52 "c" | "h" => FileType::C,
53 "cs" => FileType::CSharp,
54 "rb" => FileType::Ruby,
55 "php" => FileType::Php,
56 "swift" => FileType::Swift,
57 "kt" | "kts" => FileType::Kotlin,
58 "scala" => FileType::Scala,
59 "hs" => FileType::Haskell,
60 "md" | "markdown" => FileType::Markdown,
61 "json" => FileType::Json,
62 "yaml" | "yml" => FileType::Yaml,
63 "toml" => FileType::Toml,
64 "xml" => FileType::Xml,
65 "html" | "htm" => FileType::Html,
66 "css" | "scss" | "sass" | "less" => FileType::Css,
67 "txt" | "text" => FileType::Text,
68 _ => {
69 let filename = path.file_name().and_then(|name| name.to_str()).unwrap_or("");
71
72 match filename {
73 "README" | "LICENSE" | "CHANGELOG" | "AUTHORS" | "CONTRIBUTORS" => {
74 FileType::Text
75 }
76 "Makefile" | "Dockerfile" | "Vagrantfile" | "Jenkinsfile" => FileType::Text,
77 _ if !is_binary_extension(path) => FileType::Text,
78 _ => FileType::Other,
79 }
80 }
81 }
82 }
83}
84
85pub fn get_language_from_extension(path: &Path) -> &'static str {
87 let extension = path.extension().and_then(|ext| ext.to_str()).unwrap_or("");
88
89 match extension.to_lowercase().as_str() {
90 "rs" => "rust",
92 "py" => "python",
93 "js" | "mjs" | "cjs" => "javascript",
94 "ts" | "tsx" => "typescript",
95 "jsx" => "jsx",
96 "go" => "go",
97 "c" => "c",
98 "cpp" | "cc" | "cxx" | "c++" => "cpp",
99 "h" | "hpp" | "hxx" => "cpp",
100 "cs" => "csharp",
101 "java" => "java",
102 "kt" | "kts" => "kotlin",
103 "swift" => "swift",
104 "rb" => "ruby",
105 "php" => "php",
106 "lua" => "lua",
107 "r" => "r",
108 "scala" => "scala",
109 "clj" | "cljs" => "clojure",
110 "ex" | "exs" => "elixir",
111 "elm" => "elm",
112 "hs" => "haskell",
113 "ml" | "mli" => "ocaml",
114 "fs" | "fsx" => "fsharp",
115 "pl" => "perl",
116 "sh" => "bash",
117 "fish" => "fish",
118 "zsh" => "zsh",
119 "ps1" => "powershell",
120 "dart" => "dart",
121 "julia" | "jl" => "julia",
122 "nim" => "nim",
123 "zig" => "zig",
124 "v" => "v",
125 "d" => "d",
126
127 "html" | "htm" => "html",
129 "css" => "css",
130 "scss" | "sass" => "scss",
131 "less" => "less",
132 "vue" => "vue",
133 "svelte" => "svelte",
134
135 "json" => "json",
137 "yaml" | "yml" => "yaml",
138 "toml" => "toml",
139 "xml" => "xml",
140 "csv" => "csv",
141 "sql" => "sql",
142
143 "md" | "markdown" => "markdown",
145 "tex" => "latex",
146 "rst" => "rst",
147 "adoc" | "asciidoc" => "asciidoc",
148
149 "ini" | "cfg" => "ini",
151 "conf" | "config" => "text",
152 "env" => "dotenv",
153 "dockerfile" => "dockerfile",
154 "makefile" | "mk" => "makefile",
155
156 "bash" => "bash",
158 "bat" | "cmd" => "batch",
159
160 "proto" => "protobuf",
162 "graphql" | "gql" => "graphql",
163 "tf" => "hcl",
164 "vim" => "vim",
165 "diff" | "patch" => "diff",
166
167 _ => "text",
169 }
170}
171
172pub fn is_binary_extension(path: &Path) -> bool {
174 let extension = path.extension().and_then(|ext| ext.to_str()).unwrap_or("");
175
176 matches!(
177 extension.to_lowercase().as_str(),
178 "exe" | "dll" | "so" | "dylib" | "a" | "lib" | "bin" |
180 "zip" | "tar" | "gz" | "bz2" | "xz" | "7z" | "rar" |
182 "jpg" | "jpeg" | "png" | "gif" | "bmp" | "ico" | "svg" | "webp" |
184 "mp3" | "wav" | "flac" | "aac" | "ogg" | "wma" |
186 "mp4" | "avi" | "mkv" | "mov" | "wmv" | "flv" | "webm" |
188 "pdf" | "doc" | "docx" | "xls" | "xlsx" | "ppt" | "pptx" |
190 "ttf" | "otf" | "woff" | "woff2" | "eot" |
192 "db" | "sqlite" | "sqlite3" |
194 "pyc" | "pyo" | "class" | "o" | "obj" | "pdb"
196 )
197}
198
199pub fn is_binary_content(content: &[u8]) -> bool {
201 let check_len = content.len().min(8192);
203 content[..check_len].contains(&0)
204}
205
206#[cfg(test)]
207mod tests {
208 use super::*;
209 use std::path::Path;
210
211 #[test]
212 fn test_language_detection() {
213 assert_eq!(get_language_from_extension(Path::new("test.rs")), "rust");
214 assert_eq!(get_language_from_extension(Path::new("test.py")), "python");
215 assert_eq!(get_language_from_extension(Path::new("test.js")), "javascript");
216 assert_eq!(get_language_from_extension(Path::new("test.unknown")), "text");
217 assert_eq!(get_language_from_extension(Path::new("Makefile")), "text");
218 }
219
220 #[test]
221 fn test_binary_extension_detection() {
222 assert!(is_binary_extension(Path::new("test.exe")));
223 assert!(is_binary_extension(Path::new("image.png")));
224 assert!(is_binary_extension(Path::new("archive.zip")));
225 assert!(!is_binary_extension(Path::new("code.rs")));
226 assert!(!is_binary_extension(Path::new("text.md")));
227 }
228
229 #[test]
230 fn test_binary_content_detection() {
231 assert!(!is_binary_content(b"Hello, world!"));
232 assert!(is_binary_content(b"Hello\0world"));
233 assert!(is_binary_content(&[0xFF, 0xFE, 0x00, 0x00]));
234 }
235}