1use std::path::Path;
4
5#[derive(Debug, Clone, PartialEq, Eq, Hash)]
7pub enum FileType {
8 Rust,
10 Python,
11 JavaScript,
12 TypeScript,
13 Go,
14 Java,
15 Cpp,
16 C,
17 CSharp,
18 Ruby,
19 Php,
20 Swift,
21 Kotlin,
22 Scala,
23 Haskell,
24 Dart,
25 Lua,
26 R,
27 Julia,
28 Elixir,
29 Elm,
30
31 Markdown,
33 Json,
34 Yaml,
35 Toml,
36 Xml,
37 Html,
38 Css,
39
40 Text,
42 Other,
43}
44
45impl FileType {
46 pub fn from_path(path: &Path) -> Self {
48 let extension = path
49 .extension()
50 .and_then(|ext| ext.to_str())
51 .unwrap_or("")
52 .to_lowercase();
53
54 match extension.as_str() {
55 "rs" => FileType::Rust,
56 "py" => FileType::Python,
57 "js" | "mjs" | "cjs" => FileType::JavaScript,
58 "ts" | "tsx" => FileType::TypeScript,
59 "go" => FileType::Go,
60 "java" => FileType::Java,
61 "cpp" | "cc" | "cxx" | "c++" | "hpp" | "hxx" | "h++" => FileType::Cpp,
62 "c" | "h" => FileType::C,
63 "cs" => FileType::CSharp,
64 "rb" => FileType::Ruby,
65 "php" => FileType::Php,
66 "swift" => FileType::Swift,
67 "kt" | "kts" => FileType::Kotlin,
68 "scala" | "sc" => FileType::Scala,
69 "hs" => FileType::Haskell,
70 "dart" => FileType::Dart,
71 "lua" => FileType::Lua,
72 "r" => FileType::R,
73 "jl" => FileType::Julia,
74 "ex" | "exs" => FileType::Elixir,
75 "elm" => FileType::Elm,
76 "md" | "markdown" => FileType::Markdown,
77 "json" => FileType::Json,
78 "yaml" | "yml" => FileType::Yaml,
79 "toml" => FileType::Toml,
80 "xml" => FileType::Xml,
81 "html" | "htm" => FileType::Html,
82 "css" | "scss" | "sass" | "less" => FileType::Css,
83 "txt" | "text" => FileType::Text,
84 _ => {
85 let filename = path
87 .file_name()
88 .and_then(|name| name.to_str())
89 .unwrap_or("");
90
91 match filename {
92 "README" | "LICENSE" | "CHANGELOG" | "AUTHORS" | "CONTRIBUTORS" => {
93 FileType::Text
94 }
95 "Makefile" | "Dockerfile" | "Vagrantfile" | "Jenkinsfile" => FileType::Text,
96 _ if !is_binary_extension(path) => FileType::Text,
97 _ => FileType::Other,
98 }
99 }
100 }
101 }
102}
103
104pub fn get_language_from_extension(path: &Path) -> &'static str {
106 let extension = path.extension().and_then(|ext| ext.to_str()).unwrap_or("");
107
108 match extension.to_lowercase().as_str() {
109 "rs" => "rust",
111 "py" => "python",
112 "js" | "mjs" | "cjs" => "javascript",
113 "ts" | "tsx" => "typescript",
114 "jsx" => "jsx",
115 "go" => "go",
116 "c" => "c",
117 "cpp" | "cc" | "cxx" | "c++" => "cpp",
118 "h" | "hpp" | "hxx" => "cpp",
119 "cs" => "csharp",
120 "java" => "java",
121 "kt" | "kts" => "kotlin",
122 "swift" => "swift",
123 "rb" => "ruby",
124 "php" => "php",
125 "lua" => "lua",
126 "r" => "r",
127 "scala" => "scala",
128 "clj" | "cljs" => "clojure",
129 "ex" | "exs" => "elixir",
130 "elm" => "elm",
131 "hs" => "haskell",
132 "ml" | "mli" => "ocaml",
133 "fs" | "fsx" => "fsharp",
134 "pl" => "perl",
135 "sh" => "bash",
136 "fish" => "fish",
137 "zsh" => "zsh",
138 "ps1" => "powershell",
139 "dart" => "dart",
140 "julia" | "jl" => "julia",
141 "nim" => "nim",
142 "zig" => "zig",
143 "v" => "v",
144 "d" => "d",
145
146 "html" | "htm" => "html",
148 "css" => "css",
149 "scss" | "sass" => "scss",
150 "less" => "less",
151 "vue" => "vue",
152 "svelte" => "svelte",
153
154 "json" => "json",
156 "yaml" | "yml" => "yaml",
157 "toml" => "toml",
158 "xml" => "xml",
159 "csv" => "csv",
160 "sql" => "sql",
161
162 "md" | "markdown" => "markdown",
164 "tex" => "latex",
165 "rst" => "rst",
166 "adoc" | "asciidoc" => "asciidoc",
167
168 "ini" | "cfg" => "ini",
170 "conf" | "config" => "text",
171 "env" => "dotenv",
172 "dockerfile" => "dockerfile",
173 "makefile" | "mk" => "makefile",
174
175 "bash" => "bash",
177 "bat" | "cmd" => "batch",
178
179 "proto" => "protobuf",
181 "graphql" | "gql" => "graphql",
182 "tf" => "hcl",
183 "vim" => "vim",
184 "diff" | "patch" => "diff",
185
186 _ => "text",
188 }
189}
190
191pub fn is_binary_extension(path: &Path) -> bool {
193 let extension = path.extension().and_then(|ext| ext.to_str()).unwrap_or("");
194
195 matches!(
196 extension.to_lowercase().as_str(),
197 "exe" | "dll" | "so" | "dylib" | "a" | "lib" | "bin" |
199 "zip" | "tar" | "gz" | "bz2" | "xz" | "7z" | "rar" |
201 "jpg" | "jpeg" | "png" | "gif" | "bmp" | "ico" | "svg" | "webp" |
203 "mp3" | "wav" | "flac" | "aac" | "ogg" | "wma" |
205 "mp4" | "avi" | "mkv" | "mov" | "wmv" | "flv" | "webm" |
207 "pdf" | "doc" | "docx" | "xls" | "xlsx" | "ppt" | "pptx" |
209 "ttf" | "otf" | "woff" | "woff2" | "eot" |
211 "db" | "sqlite" | "sqlite3" |
213 "pyc" | "pyo" | "class" | "o" | "obj" | "pdb"
215 )
216}
217
218pub fn is_binary_content(content: &[u8]) -> bool {
220 let check_len = content.len().min(8192);
222 content[..check_len].contains(&0)
223}
224
225#[cfg(test)]
226mod tests {
227 use super::*;
228 use std::path::Path;
229
230 #[test]
231 fn test_language_detection() {
232 assert_eq!(get_language_from_extension(Path::new("test.rs")), "rust");
233 assert_eq!(get_language_from_extension(Path::new("test.py")), "python");
234 assert_eq!(
235 get_language_from_extension(Path::new("test.js")),
236 "javascript"
237 );
238 assert_eq!(
239 get_language_from_extension(Path::new("test.unknown")),
240 "text"
241 );
242 assert_eq!(get_language_from_extension(Path::new("Makefile")), "text");
243 }
244
245 #[test]
246 fn test_binary_extension_detection() {
247 assert!(is_binary_extension(Path::new("test.exe")));
248 assert!(is_binary_extension(Path::new("image.png")));
249 assert!(is_binary_extension(Path::new("archive.zip")));
250 assert!(!is_binary_extension(Path::new("code.rs")));
251 assert!(!is_binary_extension(Path::new("text.md")));
252 }
253
254 #[test]
255 fn test_binary_content_detection() {
256 assert!(!is_binary_content(b"Hello, world!"));
257 assert!(is_binary_content(b"Hello\0world"));
258 assert!(is_binary_content(&[0xFF, 0xFE, 0x00, 0x00]));
259 }
260}