file-identify 0.4.0

File identification library for Rust - detects file types based on extensions, shebangs, and content
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
use crate::tags::{TagSet, tags_from_array};
use phf::phf_map;

pub static EXTENSION_TAGS: phf::Map<&'static str, &'static [&'static str]> = phf_map! {
    "adoc" => &["text", "asciidoc"],
    "ai" => &["binary", "adobe-illustrator"],
    "aj" => &["text", "aspectj"],
    "asciidoc" => &["text", "asciidoc"],
    "apinotes" => &["text", "apinotes"],
    "asar" => &["binary", "asar"],
    "asm" => &["text", "asm"],
    "astro" => &["text", "astro"],
    "avif" => &["binary", "image", "avif"],
    "avsc" => &["text", "avro-schema"],
    "bash" => &["text", "shell", "bash"],
    "bat" => &["text", "batch"],
    "bats" => &["text", "shell", "bash", "bats"],
    "bazel" => &["text", "bazel"],
    "bb" => &["text", "bitbake"],
    "bbappend" => &["text", "bitbake"],
    "bbclass" => &["text", "bitbake"],
    "beancount" => &["text", "beancount"],
    "bib" => &["text", "bib"],
    "bmp" => &["binary", "image", "bitmap"],
    "bz2" => &["binary", "bzip2"],
    "bz3" => &["binary", "bzip3"],
    "bzl" => &["text", "bazel"],
    "c" => &["text", "c"],
    "c++" => &["text", "c++"],
    "c++m" => &["text", "c++"],
    "cc" => &["text", "c++"],
    "ccm" => &["text", "c++"],
    "cfg" => &["text"],
    "chs" => &["text", "c2hs"],
    "cjs" => &["text", "javascript"],
    "clj" => &["text", "clojure"],
    "cljc" => &["text", "clojure"],
    "cljs" => &["text", "clojure", "clojurescript"],
    "cmake" => &["text", "cmake"],
    "cnf" => &["text"],
    "coffee" => &["text", "coffee"],
    "conf" => &["text"],
    "cpp" => &["text", "c++"],
    "cppm" => &["text", "c++"],
    "cr" => &["text", "crystal"],
    "crt" => &["text", "pem"],
    "cs" => &["text", "c#"],
    "csproj" => &["text", "xml", "csproj", "msbuild"],
    "csh" => &["text", "shell", "csh"],
    "cson" => &["text", "cson"],
    "css" => &["text", "css"],
    "csv" => &["text", "csv"],
    "csx" => &["text", "c#", "c#script"],
    "cu" => &["text", "cuda"],
    "cue" => &["text", "cue"],
    "cuh" => &["text", "cuda"],
    "cxx" => &["text", "c++"],
    "cxxm" => &["text", "c++"],
    "cylc" => &["text", "cylc"],
    "dart" => &["text", "dart"],
    "dbc" => &["text", "dbc"],
    "def" => &["text", "def"],
    "dll" => &["binary"],
    "dtd" => &["text", "dtd"],
    "ear" => &["binary", "zip", "jar"],
    "edn" => &["text", "clojure", "edn"],
    "ejs" => &["text", "ejs"],
    "ejson" => &["text", "json", "ejson"],
    "elm" => &["text", "elm"],
    "env" => &["text", "dotenv"],
    "eot" => &["binary", "eot"],
    "eps" => &["binary", "eps"],
    "erb" => &["text", "erb"],
    "erl" => &["text", "erlang"],
    "ex" => &["text", "elixir"],
    "exe" => &["binary"],
    "exs" => &["text", "elixir"],
    "eyaml" => &["text", "yaml"],
    "f03" => &["text", "fortran"],
    "f08" => &["text", "fortran"],
    "f90" => &["text", "fortran"],
    "f95" => &["text", "fortran"],
    "feature" => &["text", "gherkin"],
    "fish" => &["text", "fish"],
    "fits" => &["binary", "fits"],
    "fs" => &["text", "f#"],
    "fsproj" => &["text", "xml", "fsproj", "msbuild"],
    "fsx" => &["text", "f#", "f#script"],
    "gd" => &["text", "gdscript"],
    "gemspec" => &["text", "ruby"],
    "geojson" => &["text", "geojson", "json"],
    "ggb" => &["binary", "zip", "ggb"],
    "gif" => &["binary", "image", "gif"],
    "gleam" => &["text", "gleam"],
    "go" => &["text", "go"],
    "gotmpl" => &["text", "gotmpl"],
    "gpx" => &["text", "gpx", "xml"],
    "graphql" => &["text", "graphql"],
    "gradle" => &["text", "groovy"],
    "groovy" => &["text", "groovy"],
    "gyb" => &["text", "gyb"],
    "gyp" => &["text", "gyp", "python"],
    "gypi" => &["text", "gyp", "python"],
    "gz" => &["binary", "gzip"],
    "h" => &["text", "header", "c", "c++"],
    "hbs" => &["text", "handlebars"],
    "hcl" => &["text", "hcl"],
    "hh" => &["text", "header", "c++"],
    "hpp" => &["text", "header", "c++"],
    "hrl" => &["text", "erlang"],
    "hs" => &["text", "haskell"],
    "htm" => &["text", "html"],
    "html" => &["text", "html"],
    "hxx" => &["text", "header", "c++"],
    "icns" => &["binary", "icns"],
    "ico" => &["binary", "icon"],
    "ics" => &["text", "icalendar"],
    "idl" => &["text", "idl"],
    "idr" => &["text", "idris"],
    "inc" => &["text", "inc"],
    "ini" => &["text", "ini"],
    "inl" => &["text", "inl", "c++"],
    "ino" => &["text", "ino", "c++"],
    "inx" => &["text", "xml", "inx"],
    "ipynb" => &["text", "jupyter", "json"],
    "ixx" => &["text", "c++"],
    "j2" => &["text", "jinja"],
    "jade" => &["text", "jade"],
    "jar" => &["binary", "zip", "jar"],
    "java" => &["text", "java"],
    "jenkins" => &["text", "groovy", "jenkins"],
    "jenkinsfile" => &["text", "groovy", "jenkins"],
    "jinja" => &["text", "jinja"],
    "jinja2" => &["text", "jinja"],
    "jl" => &["text", "julia"],
    "jpeg" => &["binary", "image", "jpeg"],
    "jpg" => &["binary", "image", "jpeg"],
    "js" => &["text", "javascript"],
    "json" => &["text", "json"],
    "jsonld" => &["text", "json", "jsonld"],
    "jsonnet" => &["text", "jsonnet"],
    "json5" => &["text", "json5"],
    "jsx" => &["text", "jsx"],
    "key" => &["text", "pem"],
    "kml" => &["text", "kml", "xml"],
    "kt" => &["text", "kotlin"],
    "kts" => &["text", "kotlin"],
    "lean" => &["text", "lean"],
    "lektorproject" => &["text", "ini", "lektorproject"],
    "less" => &["text", "less"],
    "lfm" => &["text", "lazarus", "lazarus-form"],
    "lhs" => &["text", "literate-haskell"],
    "libsonnet" => &["text", "jsonnet"],
    "lidr" => &["text", "idris"],
    "liquid" => &["text", "liquid"],
    "lpi" => &["text", "lazarus", "xml"],
    "lpr" => &["text", "lazarus", "pascal"],
    "lr" => &["text", "lektor"],
    "lua" => &["text", "lua"],
    "m" => &["text", "objective-c"],
    "m4" => &["text", "m4"],
    "magik" => &["text", "magik"],
    "make" => &["text", "makefile"],
    "manifest" => &["text", "manifest"],
    "map" => &["text", "map"],
    "markdown" => &["text", "markdown"],
    "md" => &["text", "markdown"],
    "mdx" => &["text", "mdx"],
    "meson" => &["text", "meson"],
    "metal" => &["text", "metal"],
    "mib" => &["text", "mib"],
    "mjs" => &["text", "javascript"],
    "mk" => &["text", "makefile"],
    "ml" => &["text", "ocaml"],
    "mli" => &["text", "ocaml"],
    "mm" => &["text", "c++", "objective-c++"],
    "modulemap" => &["text", "modulemap"],
    "mscx" => &["text", "xml", "musescore"],
    "mscz" => &["binary", "zip", "musescore"],
    "mustache" => &["text", "mustache"],
    "myst" => &["text", "myst"],
    "ngdoc" => &["text", "ngdoc"],
    "nim" => &["text", "nim"],
    "nims" => &["text", "nim"],
    "nimble" => &["text", "nimble"],
    "nix" => &["text", "nix"],
    "njk" => &["text", "nunjucks"],
    "otf" => &["binary", "otf"],
    "p12" => &["binary", "p12"],
    "pas" => &["text", "pascal"],
    "patch" => &["text", "diff"],
    "pdf" => &["binary", "pdf"],
    "pem" => &["text", "pem"],
    "php" => &["text", "php"],
    "php4" => &["text", "php"],
    "php5" => &["text", "php"],
    "phtml" => &["text", "php"],
    "pl" => &["text", "perl"],
    "plantuml" => &["text", "plantuml"],
    "pm" => &["text", "perl"],
    "png" => &["binary", "image", "png"],
    "po" => &["text", "pofile"],
    "pom" => &["pom", "text", "xml"],
    "pp" => &["text", "puppet"],
    "prisma" => &["text", "prisma"],
    "properties" => &["text", "java-properties"],
    "props" => &["text", "xml", "msbuild"],
    "proto" => &["text", "proto"],
    "ps1" => &["text", "powershell"],
    "psd1" => &["text", "powershell"],
    "psm1" => &["text", "powershell"],
    "pug" => &["text", "pug"],
    "puml" => &["text", "plantuml"],
    "purs" => &["text", "purescript"],
    "pxd" => &["text", "cython"],
    "pxi" => &["text", "cython"],
    "py" => &["text", "python"],
    "pyi" => &["text", "pyi"],
    "pyproj" => &["text", "xml", "pyproj", "msbuild"],
    "pyt" => &["text", "python"],
    "pyx" => &["text", "cython"],
    "pyz" => &["binary", "pyz"],
    "pyzw" => &["binary", "pyz"],
    "qml" => &["text", "qml"],
    "r" => &["text", "r"],
    "rake" => &["text", "ruby"],
    "rb" => &["text", "ruby"],
    "resx" => &["text", "resx", "xml"],
    "rng" => &["text", "xml", "relax-ng"],
    "rs" => &["text", "rust"],
    "rst" => &["text", "rst"],
    "s" => &["text", "asm"],
    "sas" => &["text", "sas"],
    "sass" => &["text", "sass"],
    "sbt" => &["text", "sbt", "scala"],
    "sc" => &["text", "scala"],
    "scala" => &["text", "scala"],
    "scm" => &["text", "scheme"],
    "scss" => &["text", "scss"],
    "sh" => &["text", "shell"],
    "sln" => &["text", "sln"],
    "sls" => &["text", "salt"],
    "so" => &["binary"],
    "sol" => &["text", "solidity"],
    "spec" => &["text", "spec"],
    "sql" => &["text", "sql"],
    "ss" => &["text", "scheme"],
    "sty" => &["text", "tex"],
    "styl" => &["text", "stylus"],
    "sv" => &["text", "system-verilog"],
    "svelte" => &["text", "svelte"],
    "svg" => &["text", "image", "svg", "xml"],
    "svh" => &["text", "system-verilog"],
    "swf" => &["binary", "swf"],
    "swift" => &["text", "swift"],
    "swiftdeps" => &["text", "swiftdeps"],
    "tac" => &["text", "twisted", "python"],
    "tar" => &["binary", "tar"],
    "targets" => &["text", "xml", "msbuild"],
    "templ" => &["text", "templ"],
    "tex" => &["text", "tex"],
    "textproto" => &["text", "textproto"],
    "tf" => &["text", "terraform"],
    "tfvars" => &["text", "terraform"],
    "tgz" => &["binary", "gzip"],
    "thrift" => &["text", "thrift"],
    "tiff" => &["binary", "image", "tiff"],
    "toml" => &["text", "toml"],
    "ts" => &["text", "ts"],
    "tsv" => &["text", "tsv"],
    "tsx" => &["text", "tsx"],
    "ttf" => &["binary", "ttf"],
    "twig" => &["text", "twig"],
    "txsprofile" => &["text", "ini", "txsprofile"],
    "txt" => &["text", "plain-text"],
    "txtpb" => &["text", "textproto"],
    "urdf" => &["text", "xml", "urdf"],
    "v" => &["text", "verilog"],
    "vb" => &["text", "vb"],
    "vbproj" => &["text", "xml", "vbproj", "msbuild"],
    "vcxproj" => &["text", "xml", "vcxproj", "msbuild"],
    "vdx" => &["text", "vdx"],
    "vh" => &["text", "verilog"],
    "vhd" => &["text", "vhdl"],
    "vim" => &["text", "vim"],
    "vtl" => &["text", "vtl"],
    "vue" => &["text", "vue"],
    "war" => &["binary", "zip", "jar"],
    "wav" => &["binary", "audio", "wav"],
    "webp" => &["binary", "image", "webp"],
    "whl" => &["binary", "wheel", "zip"],
    "wkt" => &["text", "wkt"],
    "woff" => &["binary", "woff"],
    "woff2" => &["binary", "woff2"],
    "wsdl" => &["text", "xml", "wsdl"],
    "wsgi" => &["text", "wsgi", "python"],
    "xhtml" => &["text", "xml", "html", "xhtml"],
    "xacro" => &["text", "xml", "urdf", "xacro"],
    "xctestplan" => &["text", "json"],
    "xml" => &["text", "xml"],
    "xq" => &["text", "xquery"],
    "xql" => &["text", "xquery"],
    "xqm" => &["text", "xquery"],
    "xqu" => &["text", "xquery"],
    "xquery" => &["text", "xquery"],
    "xqy" => &["text", "xquery"],
    "xsd" => &["text", "xml", "xsd"],
    "xsl" => &["text", "xml", "xsl"],
    "xslt" => &["text", "xml", "xsl"],
    "yaml" => &["text", "yaml"],
    "yamlld" => &["text", "yaml", "yamlld"],
    "yang" => &["text", "yang"],
    "yin" => &["text", "xml", "yin"],
    "yml" => &["text", "yaml"],
    "zcml" => &["text", "xml", "zcml"],
    "zig" => &["text", "zig"],
    "zip" => &["binary", "zip"],
    "zpt" => &["text", "zpt"],
    "zsh" => &["text", "shell", "zsh"],
};

pub static EXTENSIONS_NEED_BINARY_CHECK_TAGS: phf::Map<&'static str, &'static [&'static str]> = phf_map! {
    "plist" => &["plist"],
    "ppm" => &["image", "ppm"],
};

pub static NAME_TAGS: phf::Map<&'static str, &'static [&'static str]> = phf_map! {
    ".ansible-lint" => &["text", "yaml"],
    ".clang-format" => &["text", "yaml"],
    ".clang-tidy" => &["text", "yaml"],
    ".salt-lint" => &["text", "yaml", "salt-lint"],
    ".yamllint" => &["text", "yaml", "yamllint"],
    ".babelrc" => &["text", "json", "babelrc"],
    ".bowerrc" => &["text", "json", "bowerrc"],
    ".csslintrc" => &["text", "json", "csslintrc"],
    ".eslintrc" => &["text", "json"],
    ".eslintrc.js" => &["text", "javascript"],
    ".eslintrc.json" => &["text", "json"],
    ".eslintrc.yaml" => &["text", "yaml"],
    ".eslintrc.yml" => &["text", "yaml"],
    ".jshintrc" => &["text", "json", "jshintrc"],
    ".mention-bot" => &["text", "json", "mention-bot"],
    ".prettierrc" => &["text", "json"],
    ".prettierrc.json" => &["text", "json"],
    ".prettierrc.toml" => &["text", "toml"],
    ".prettierrc.yaml" => &["text", "yaml"],
    ".prettierrc.yml" => &["text", "yaml"],
    ".stylintrc" => &["text", "json"],
    ".bash_aliases" => &["text", "shell", "bash"],
    ".bash_profile" => &["text", "shell", "bash"],
    ".bashrc" => &["text", "shell", "bash"],
    ".cshrc" => &["text", "shell", "csh"],
    ".envrc" => &["text", "shell", "bash"],
    ".zlogin" => &["text", "shell", "zsh"],
    ".zlogout" => &["text", "shell", "zsh"],
    ".zprofile" => &["text", "shell", "zsh"],
    ".zshrc" => &["text", "shell", "zsh"],
    ".zshenv" => &["text", "shell", "zsh"],
    "direnvrc" => &["text", "shell", "bash"],
    ".codespellrc" => &["text", "ini", "codespellrc"],
    ".coveragerc" => &["text", "ini", "coveragerc"],
    ".flake8" => &["text", "ini", "flake8"],
    ".gitconfig" => &["text", "ini", "gitconfig"],
    ".gitlint" => &["text", "ini", "gitlint"],
    ".hgrc" => &["text", "ini", "hgrc"],
    ".isort.cfg" => &["text", "ini", "isort"],
    ".pypirc" => &["text", "ini", "pypirc"],
    ".rstcheck.cfg" => &["text", "ini"],
    ".sqlfluff" => &["text", "ini"],
    "pylintrc" => &["text", "ini", "pylintrc"],
    "setup.cfg" => &["text", "ini"],
    ".dockerignore" => &["text", "dockerignore"],
    ".gitattributes" => &["text", "gitattributes"],
    ".gitignore" => &["text", "gitignore"],
    ".gitmodules" => &["text", "gitmodules"],
    ".npmignore" => &["text", "npmignore"],
    ".prettierignore" => &["text", "gitignore", "prettierignore"],
    ".bazelrc" => &["text", "bazelrc"],
    ".browserslistrc" => &["text", "browserslistrc"],
    ".editorconfig" => &["text", "editorconfig"],
    ".mailmap" => &["text", "mailmap"],
    ".pdbrc" => &["text", "python", "pdbrc"],
    "BUILD" => &["text", "bazel"],
    "BUILD.bazel" => &["text", "bazel"],
    "CMakeLists.txt" => &["text", "cmake"],
    "Dockerfile" => &["text", "dockerfile"],
    "Containerfile" => &["text", "dockerfile"],
    "Makefile" => &["text", "makefile"],
    "GNUmakefile" => &["text", "makefile"],
    "makefile" => &["text", "makefile"],
    "meson.build" => &["text", "meson"],
    "meson_options.txt" => &["text", "meson"],
    "WORKSPACE" => &["text", "bazel"],
    "WORKSPACE.bazel" => &["text", "bazel"],
    "copy.bara.sky" => &["text", "bazel"],
    "Cargo.toml" => &["text", "toml", "cargo"],
    "Cargo.lock" => &["text", "toml", "cargo-lock"],
    "composer.json" => &["text", "json"],
    "composer.lock" => &["text", "json"],
    "go.mod" => &["text", "go-mod"],
    "go.sum" => &["text", "go-sum"],
    "package.json" => &["text", "json"],
    "package-lock.json" => &["text", "json"],
    "Pipfile" => &["text", "toml"],
    "Pipfile.lock" => &["text", "json"],
    "poetry.lock" => &["text", "toml"],
    "pom.xml" => &["pom", "text", "xml"],
    "yarn.lock" => &["text", "yaml"],
    "config.ru" => &["text", "ruby"],
    "Gemfile" => &["text", "ruby"],
    "Gemfile.lock" => &["text"],
    "Rakefile" => &["text", "ruby"],
    "Vagrantfile" => &["text", "ruby"],
    "bblayers.conf" => &["text", "bitbake"],
    "bitbake.conf" => &["text", "bitbake"],
    "rebar.config" => &["text", "erlang"],
    "sys.config" => &["text", "erlang"],
    "sys.config.src" => &["text", "erlang"],
    "AUTHORS" => &["text", "plain-text"],
    "CHANGELOG" => &["text", "plain-text"],
    "CONTRIBUTING" => &["text", "plain-text"],
    "COPYING" => &["text", "plain-text"],
    "LICENSE" => &["text", "plain-text"],
    "MAINTAINERS" => &["text", "plain-text"],
    "NEWS" => &["text", "plain-text"],
    "NOTICE" => &["text", "plain-text"],
    "PATENTS" => &["text", "plain-text"],
    "README" => &["text", "plain-text"],
    "Jenkinsfile" => &["text", "groovy", "jenkins"],
    "PKGBUILD" => &["text", "bash", "pkgbuild", "alpm"],
    "Tiltfile" => &["text", "tiltfile"],
    "wscript" => &["text", "python"],
};

pub fn get_extension_tags(ext: &str) -> TagSet {
    EXTENSION_TAGS
        .get(ext)
        .map(|&tags| tags_from_array(tags))
        .unwrap_or_default()
}

pub fn get_extensions_need_binary_check_tags(ext: &str) -> TagSet {
    EXTENSIONS_NEED_BINARY_CHECK_TAGS
        .get(ext)
        .map(|&tags| tags_from_array(tags))
        .unwrap_or_default()
}

pub fn get_name_tags(name: &str) -> TagSet {
    NAME_TAGS
        .get(name)
        .map(|&tags| tags_from_array(tags))
        .unwrap_or_default()
}