tft/
detect.rs

1use std::path::Path;
2
3use aho_corasick::AhoCorasick;
4use lazy_regex::{lazy_regex, regex, regex_is_match};
5use once_cell::sync::Lazy;
6use regex::Regex;
7
8use self::{
9    file_extension::FILE_EXTENSION, filename::FILENAME, path_suffix::PATH_SUFFIX, pattern::PATTERN,
10};
11use crate::FileType;
12
13mod file_extension;
14mod filename;
15mod path_suffix;
16mod pattern;
17mod util;
18
19/// Same as [`try_detect`] but automatically falling back to [`FileType::Text`] where
20/// [`try_detect`] would return [`None`].
21///
22/// # Example
23/// ```
24/// use tft::FileType;
25///
26/// assert_eq!(FileType::Rust, tft::detect("main.rs", ""));
27/// assert_eq!(FileType::Text, tft::detect("test.txt", ""));
28/// assert_eq!(FileType::Text, tft::detect("unsupported.filetype", ""));
29/// ```
30pub fn detect(path: impl AsRef<Path>, content: &str) -> FileType {
31    try_detect(path, content).unwrap_or(FileType::Text)
32}
33
34/// Try to detect a [`FileType`] given a file's path and content.
35///
36/// # Example
37/// ```
38/// use tft::FileType;
39///
40/// assert_eq!(Some(FileType::Rust), tft::try_detect("main.rs", ""));
41/// assert_eq!(Some(FileType::Text), tft::try_detect("test.txt", ""));
42/// assert_eq!(None, tft::try_detect("unsupported.filetype", ""));
43/// ```
44pub fn try_detect(path: impl AsRef<Path>, content: &str) -> Option<FileType> {
45    let path = path.as_ref();
46
47    // path suffix
48    for (suffix, resolver) in PATH_SUFFIX {
49        if path.ends_with(suffix) {
50            if let Some(ft) = resolver.resolve(path, content) {
51                return Some(ft);
52            }
53        }
54    }
55
56    // filename
57    if let Some(resolver) = path
58        .file_name()
59        .and_then(|os_name| os_name.to_str())
60        .and_then(|filename| FILENAME.get(filename))
61    {
62        if let Some(ft) = resolver.resolve(path, content) {
63            return Some(ft);
64        }
65    }
66
67    // patterns (non-negative priority)
68    let mut negative_prio_start_index = 0;
69    for (index, (match_full_path, regex, pat)) in PATTERN.iter().enumerate() {
70        if pat.priority.map_or(false, |prio| prio < 0) {
71            negative_prio_start_index = index;
72            break;
73        }
74        if match match_full_path {
75            true => path.to_str(),
76            false => path.file_name().and_then(|os_name| os_name.to_str()),
77        }
78        .map_or(true, |haystack| !regex.is_match(haystack))
79        {
80            continue;
81        }
82        if let Some(ft) = pat.resolver.resolve(path, content) {
83            return Some(ft);
84        }
85    }
86
87    // file extension
88    if let Some(resolver) = path
89        .extension()
90        .and_then(|os_ext| os_ext.to_str())
91        .and_then(|ext| FILE_EXTENSION.get(ext))
92    {
93        if let Some(ft) = resolver.resolve(path, content) {
94            return Some(ft);
95        }
96    }
97
98    // patterns (negative priority)
99    for (match_full_path, regex, pat) in PATTERN.iter().skip(negative_prio_start_index) {
100        if match match_full_path {
101            true => path.to_str(),
102            false => path.file_name().and_then(|os_name| os_name.to_str()),
103        }
104        .map_or(true, |haystack| !regex.is_match(haystack))
105        {
106            continue;
107        }
108        if let Some(ft) = pat.resolver.resolve(path, content) {
109            return Some(ft);
110        }
111    }
112
113    // file contents
114    // TODO: try to guess from content (make public as separate function)
115
116    None
117}
118
119fn asa(_path: &Path, _content: &str) -> Option<FileType> {
120    // TODO: user defined preferred asa filetype
121    Some(FileType::AspVbs)
122}
123
124fn asm(_path: &Path, content: &str) -> Option<FileType> {
125    // TODO: user defined preferred asm syntax
126    match util::findany(
127        content,
128        10,
129        true,
130        [".title", ".ident", ".macro", ".subtitle", ".library"],
131    ) {
132        true => Some(FileType::Vmasm),
133        false => Some(FileType::Asm),
134    }
135}
136
137fn asp(_path: &Path, content: &str) -> Option<FileType> {
138    // TODO: user defined preferred asp filetype
139    match util::find(content, 3, false, "perlscript") {
140        true => Some(FileType::AspPerl),
141        false => Some(FileType::AspVbs),
142    }
143}
144
145fn bak(path: &Path, content: &str) -> Option<FileType> {
146    // for files like `main.rs.bak` retry search without the `.bak` extension
147    try_detect(path.with_extension(""), content)
148}
149
150const VISUAL_BASIC_CONTENT: &[&str] = &[
151    "vb_name",
152    "begin vb.form",
153    "begin vb.mdiform",
154    "begin vb.usercontrol",
155];
156
157fn bas(_path: &Path, content: &str) -> Option<FileType> {
158    // TODO: user defined preferred bas filetype
159    // Most frequent FreeBASIC-specific keywords in distro files
160    let fb_keywords = regex!(
161        r"^\s*(extern|var|enum|private|scope|union|byref|operator|constructor|delete|namespace|public|property|with|destructor|using)\b(\s*[:=(])\@!"i
162    );
163    let fb_preproc = regex!(
164        r"^\s*(#\s*\a+|option\s+(byval|dynamic|escape|(no)?gosub|nokeyword|private|static)\b|(''|rem)\s*\$lang\b|def(byte|longint|short|ubyte|uint|ulongint|ushort)\b)"i
165    );
166
167    let fb_comment = regex!(r"^\s*/'");
168    // OPTION EXPLICIT, without the leading underscore, is common to many dialects
169    let qb64_preproc = regex!(r"^\s*($\a+|option\s+(_explicit|_?explicitarray)\b)"i);
170
171    for line in content.lines().take(100) {
172        if util::findany(line, 0, false, VISUAL_BASIC_CONTENT) {
173            return Some(FileType::Vb);
174        } else if fb_comment.is_match(line)
175            || fb_preproc.is_match(line)
176            || fb_keywords.is_match(line)
177        {
178            return Some(FileType::FreeBasic);
179        } else if qb64_preproc.is_match(line) {
180            return Some(FileType::Qb64);
181        }
182    }
183    Some(FileType::Basic)
184}
185
186fn bindzone(content: &str, default: Option<FileType>) -> Option<FileType> {
187    match regex_is_match!(
188        r"^; <<>> DiG [0-9\.]+.* <<>>|\$ORIGIN|\$TTL|IN\s+SOA",
189        util::get_lines(content, 4)
190    ) {
191        true => Some(FileType::Bindzone),
192        false => default,
193    }
194}
195
196fn btm(_path: &Path, _content: &str) -> Option<FileType> {
197    // TODO: user defined dosbatch for btm
198    Some(FileType::Btm)
199}
200
201fn cfg(_path: &Path, content: &str) -> Option<FileType> {
202    // TODO: user defined preferred cfg filetype
203    match regex_is_match!(
204        r"(eio|mmc|moc|proc|sio|sys):cfg"i,
205        util::get_lines(content, 1)
206    ) {
207        true => Some(FileType::Rapid),
208        false => Some(FileType::Cfg),
209    }
210}
211
212/// If the first line starts with # or ! it's probably a ch file.
213/// If one of the first ten lines starts with `@` it's probably a change file.
214/// If one of the first ten lines contains `MODULE` it's probably a CHILL file.
215/// If a line has `main`, `#include`, or `//` it's probably ch.
216/// Otherwise CHILL is assumed.
217fn change(_path: &Path, content: &str) -> Option<FileType> {
218    if regex_is_match!(r"^(#|!)", util::get_lines(content, 1)) {
219        return Some(FileType::Ch);
220    }
221    for line in content.lines().take(10) {
222        if line.starts_with('@') {
223            return Some(FileType::Change);
224        }
225        if util::find(line, 0, true, "MODULE") {
226            return Some(FileType::Chill);
227        }
228        if regex_is_match!(r"main\s*\(|#\s*include|//"i, line) {
229            return Some(FileType::Ch);
230        }
231    }
232    Some(FileType::Chill)
233}
234
235fn changelog(_path: &Path, content: &str) -> Option<FileType> {
236    match util::find(content, 1, false, "; urgency=") {
237        true => Some(FileType::DebChangelog),
238        false => Some(FileType::Changelog),
239    }
240}
241
242fn cls(_path: &Path, content: &str) -> Option<FileType> {
243    // TODO: user defined preferred cls filetype
244    let first_line = util::get_lines(content, 1);
245    if regex_is_match!(r"^[%\\]", first_line) {
246        Some(FileType::Tex)
247    } else if first_line.starts_with('#')
248        && AhoCorasick::builder()
249            .ascii_case_insensitive(true)
250            .build(["rexx"])
251            .unwrap()
252            .is_match(first_line)
253    {
254        Some(FileType::Rexx)
255    } else if first_line == "VERSION 1.0 CLASS" {
256        Some(FileType::Vb)
257    } else {
258        Some(FileType::St)
259    }
260}
261
262fn cmd(_path: &Path, content: &str) -> Option<FileType> {
263    match content.starts_with("/*") {
264        true => Some(FileType::Rexx),
265        false => Some(FileType::DosBatch),
266    }
267}
268
269fn control(_path: &Path, content: &str) -> Option<FileType> {
270    match content.starts_with("Source:") {
271        true => Some(FileType::DebControl),
272        false => None,
273    }
274}
275
276fn copyright(_path: &Path, content: &str) -> Option<FileType> {
277    match content.starts_with("Format:") {
278        true => Some(FileType::DebCopyright),
279        false => None,
280    }
281}
282
283fn cpp(_path: &Path, _content: &str) -> Option<FileType> {
284    // TODO: user defined cynlib for cpp
285    Some(FileType::Cpp)
286}
287
288fn cpy(_path: &Path, content: &str) -> Option<FileType> {
289    match content.starts_with("##") {
290        true => Some(FileType::Python),
291        false => Some(FileType::Cobol),
292    }
293}
294
295fn csh(_path: &Path, content: &str) -> Option<FileType> {
296    // TODO: user defined preferred csh filetype
297    // TODO: user defined preferred shell filetype
298    shell(content, FileType::Csh)
299}
300
301fn dat(path: &Path, content: &str) -> Option<FileType> {
302    if path
303        .file_name()
304        .and_then(|os_name| os_name.to_str())
305        .map_or(
306            false,
307            |name| regex_is_match!(r"^((.*\.)?upstream\.dat|upstream\..*\.dat)$"i, name),
308        )
309    {
310        return Some(FileType::UpstreamDat);
311    }
312    // TODO: user defined preferred dat filetype
313    match util::next_non_blank(content, 0)
314        .map_or(false, |line| regex_is_match!(r"^\s*(&\w+|defdat\b)"i, line))
315    {
316        true => Some(FileType::Krl),
317        false => None,
318    }
319}
320
321fn decl(_path: &Path, content: &str) -> Option<FileType> {
322    for line in content.lines().take(3) {
323        if regex_is_match!(r"^<!sgml"i, line) {
324            return Some(FileType::SgmlDecl);
325        }
326    }
327    None
328}
329
330fn dep3patch(path: &Path, content: &str) -> Option<FileType> {
331    let filename = path.file_name()?.to_str()?;
332    if filename == "series" {
333        return None;
334    }
335    for line in content.lines().take(100) {
336        if util::starts_with_any(
337            line,
338            true,
339            [
340                "Description:",
341                "Subject:",
342                "Origin:",
343                "Bug:",
344                "Forwarded:",
345                "Author:",
346                "From:",
347                "Reviewed-by:",
348                "Acked-by:",
349                "Last-Updated:",
350                "Applied-Upstream:",
351            ],
352        ) {
353            return Some(FileType::Dep3Patch);
354        } else if line.starts_with("---") {
355            // end of headers found. stop processing
356            return None;
357        }
358    }
359    None
360}
361
362fn dsl(_path: &Path, content: &str) -> Option<FileType> {
363    match regex_is_match!(r"^\s*<!", util::get_lines(content, 1)) {
364        true => Some(FileType::Dsl),
365        false => Some(FileType::Structurizr),
366    }
367}
368
369fn dtrace(_path: &Path, content: &str) -> Option<FileType> {
370    for line in content.lines().take(100) {
371        if regex_is_match!(r"^(module|import)\b"i, line) {
372            return Some(FileType::D);
373        } else if regex_is_match!(r"'^#!\S+dtrace|#pragma\s+D\s+option|:\S-:\S-:", line) {
374            return Some(FileType::DTrace);
375        }
376    }
377    Some(FileType::D)
378}
379
380fn e(_path: &Path, content: &str) -> Option<FileType> {
381    // TODO: user defined preferred euphoria filetype
382    for line in content.lines().take(100) {
383        if regex_is_match!(r"^\s*<'\s*$|^\s*'>\s*$", line) {
384            return Some(FileType::SpecMan);
385        }
386    }
387    Some(FileType::Eiffel)
388}
389
390fn edn(_path: &Path, content: &str) -> Option<FileType> {
391    match regex_is_match!(r"^\s*\(\s*edif\b"i, util::get_lines(content, 1)) {
392        true => Some(FileType::Edif),
393        false => Some(FileType::Clojure),
394    }
395}
396
397fn ent(_path: &Path, content: &str) -> Option<FileType> {
398    for line in content.lines().take(5) {
399        if regex_is_match!(r"^\s*[#{]", line) {
400            return Some(FileType::Cl);
401        } else if !line.trim_start().is_empty() {
402            // not a blank line, not a comment, and not a block start,
403            // so doesn't look like valid cl code
404            break;
405        }
406    }
407    Some(FileType::Dtd)
408}
409
410fn euphoria(_path: &Path, _content: &str) -> Option<FileType> {
411    // TODO: user defined preferred euphoria filetype
412    Some(FileType::Euphoria3)
413}
414
415fn ex(_path: &Path, content: &str) -> Option<FileType> {
416    // TODO: user defined preferred euphoria filetype
417    for line in content.lines().take(100) {
418        if regex_is_match!(r"^(--|ifdef\b|include\b)", line) {
419            return Some(FileType::Euphoria3);
420        }
421    }
422    Some(FileType::Elixir)
423}
424
425fn foam(_path: &Path, content: &str) -> Option<FileType> {
426    let mut foam_file = false;
427    for line in content.lines().take(15) {
428        if line.starts_with("FoamFile") {
429            foam_file = true;
430        } else if foam_file && line.trim_start().starts_with("object") {
431            return Some(FileType::Foam);
432        }
433    }
434    None
435}
436
437fn frm(_path: &Path, content: &str) -> Option<FileType> {
438    // TODO: user defined preferred frm filetype
439    match util::findany(content, 5, false, VISUAL_BASIC_CONTENT) {
440        true => Some(FileType::Vb),
441        false => Some(FileType::Form),
442    }
443}
444
445fn fs(_path: &Path, content: &str) -> Option<FileType> {
446    // TODO: user defined preferred fs filetype
447    for line in content.lines().take(100) {
448        if line.starts_with([':', '(', '\\']) {
449            return Some(FileType::Forth);
450        }
451    }
452    Some(FileType::FSharp)
453}
454
455fn fvwm(path: &Path, _content: &str) -> Option<FileType> {
456    match path.extension().map_or(false, |ext| ext == "m4") {
457        true => Some(FileType::Fvwm2M4),
458        false => Some(FileType::Fvwm2),
459    }
460}
461
462fn git(_path: &Path, content: &str) -> Option<FileType> {
463    match regex_is_match!(r"^[a-fA-F0-9]{40,}\b|^ref: ", util::get_lines(content, 1)) {
464        true => Some(FileType::Git),
465        false => None,
466    }
467}
468
469fn header(_path: &Path, content: &str) -> Option<FileType> {
470    for line in content.lines().take(200) {
471        if regex_is_match!(r"^@(interface|end|class)"i, line) {
472            // TODO: allow setting C or C++
473            return Some(FileType::ObjC);
474        }
475    }
476    // TODO: user defined preferred header filetype
477    Some(FileType::C)
478}
479
480fn hook(_path: &Path, content: &str) -> Option<FileType> {
481    match util::get_lines(content, 1) == "[Trigger]" {
482        true => Some(FileType::Conf),
483        false => None,
484    }
485}
486
487fn html(_path: &Path, content: &str) -> Option<FileType> {
488    for line in content.lines().take(10) {
489        if regex_is_match!(r"\bDTD\s+XHTML\s", line) {
490            return Some(FileType::Xhtml);
491        } else if regex_is_match!(r"\{%\s*(extends|block|load)\b|\{#\s+"i, line) {
492            return Some(FileType::HtmlDjango);
493        }
494    }
495    Some(FileType::Html)
496}
497
498fn hw(_path: &Path, content: &str) -> Option<FileType> {
499    match util::find(content, 1, false, "<?php") {
500        true => Some(FileType::Php),
501        false => Some(FileType::Virata),
502    }
503}
504
505fn idl(_path: &Path, content: &str) -> Option<FileType> {
506    for line in content.lines().take(50) {
507        if regex_is_match!(r#"^\s*import\s+"(unknwn|objidl)"\.idl"#i, line) {
508            return Some(FileType::Msidl);
509        }
510    }
511    Some(FileType::Idl)
512}
513
514fn in_(path: &Path, content: &str) -> Option<FileType> {
515    if path
516        .file_name()
517        .map_or(false, |name| name == "configure.in")
518    {
519        return bak(path, content);
520    }
521    None
522}
523
524static PASCAL_KEYWORDS: Lazy<Regex> =
525    lazy_regex!(r"^\s*(program|unit|library|uses|begin|procedure|function|const|type|var)\b"i);
526static PASCAL_COMMENTS: Lazy<Regex> = lazy_regex!(r"^\s*(\{|\(\*|//)");
527
528fn inc(path: &Path, content: &str) -> Option<FileType> {
529    // TODO: user defined preferred inc filetype
530    let lines = util::get_lines(content, 3);
531    if util::find(lines, 0, false, "perlscript") {
532        Some(FileType::AspPerl)
533    } else if util::find(lines, 0, false, "<%") {
534        Some(FileType::AspVbs)
535    } else if util::find(lines, 0, false, "<?") {
536        Some(FileType::Php)
537    } else if regex_is_match!(r"^\s(\{|\(\*)"i, lines) || PASCAL_KEYWORDS.is_match(lines) {
538        Some(FileType::Pascal)
539    } else if regex_is_match!(
540        // TODO: is this regex correct?
541        r"^\s*(inherit|require|[A-Z][\w_:${}]*\s+\??[?:+]?=) "i,
542        lines
543    ) {
544        Some(FileType::Bitbake)
545    } else if let Some(ft) = asm(path, content) {
546        match ft {
547            FileType::Asm => Some(FileType::Pov),
548            _ => Some(ft),
549        }
550    } else {
551        Some(FileType::Pov)
552    }
553}
554
555fn inp(_path: &Path, content: &str) -> Option<FileType> {
556    if content.starts_with('*') {
557        return Some(FileType::Abaqus);
558    }
559    for line in content.lines().take(500) {
560        if util::starts_with_any(line, false, ["header surface data"]) {
561            return Some(FileType::Trasys);
562        }
563    }
564    None
565}
566
567fn install(_path: &Path, content: &str) -> Option<FileType> {
568    match util::find(content, 1, false, "<?php") {
569        true => Some(FileType::Php),
570        false => sh(content, Some(FileType::Bash)),
571    }
572}
573
574fn log(path: &Path, _content: &str) -> Option<FileType> {
575    let path = path.to_str();
576    if path.map_or(
577        false,
578        |path| regex_is_match!(r"upstream([.-].*)?\.log|.*\.upstream\.log"i, path),
579    ) {
580        Some(FileType::UpstreamLog)
581    } else if path.map_or(false, |path| {
582        regex_is_match!(
583            r"upstreaminstall(\..*)?\.log|.*\.upstreaminstall\.log"i,
584            path
585        )
586    }) {
587        Some(FileType::UpstreamInstallLog)
588    } else if path.map_or(
589        false,
590        |path| regex_is_match!(r"usserver(\..*)?\.log|.*\.usserver\.log"i, path),
591    ) {
592        Some(FileType::UsServerLog)
593    } else if path.map_or(
594        false,
595        |path| regex_is_match!(r"usw2kagtlog(\..*)?\.log|.*\.usw2kagtlog\.log"i, path),
596    ) {
597        Some(FileType::Usw2KagtLog)
598    } else {
599        None
600    }
601}
602
603fn lpc(_path: &Path, content: &str) -> Option<FileType> {
604    // TODO: user defined allow lpc
605    for line in content.lines().take(12) {
606        if util::starts_with_any(
607            line,
608            true,
609            [
610                "//",
611                "inherit",
612                "private",
613                "protected",
614                "nosave",
615                "string",
616                "object",
617                "mapping",
618                "mixed",
619            ],
620        ) {
621            return Some(FileType::Lpc);
622        }
623    }
624    Some(FileType::C)
625}
626
627fn lsl(_path: &Path, content: &str) -> Option<FileType> {
628    // TODO: user defined preferred lsl filetype
629    match util::next_non_blank(content, 0)
630        .map_or(false, |line| regex_is_match!(r"^\s*%|:\s*trait\s*$", line))
631    {
632        true => Some(FileType::Larch),
633        false => Some(FileType::Lsl),
634    }
635}
636
637fn m(_path: &Path, content: &str) -> Option<FileType> {
638    // TODO: user defined preferred m filetype
639    let octave_block_terminators = regex!(
640        r"(^|;)\s*\bend(_try_catch|classdef|enumeration|events|methods|parfor|properties)\b"i
641    );
642    let objc_preprocessor =
643        regex!(r"^\s*#\s*(import|include|define|if|ifn?def|undef|line|error|pragma)\b"i);
644
645    let mut saw_comment = false;
646    for line in content.lines().take(100) {
647        let trimmed_line = line.trim_start();
648        if trimmed_line.starts_with("/*") {
649            // /* ... */ is a comment in Objective C and Murphi, so we can't conclude
650            // it's either of them yet, but track this as a hint in case we don't see
651            // anything more definitive.
652            saw_comment = true;
653        }
654        if trimmed_line.starts_with("//")
655            || util::starts_with_any(trimmed_line, false, ["@import"])
656            || objc_preprocessor.is_match(line)
657        {
658            return Some(FileType::ObjC);
659        } else if util::starts_with_any(trimmed_line, false, ["#", "%%!", "unwind_protect"])
660            || octave_block_terminators.is_match(line)
661        {
662            return Some(FileType::Octave);
663        } else if trimmed_line.starts_with("%%") {
664            return Some(FileType::Matlab);
665        } else if trimmed_line.starts_with("(*") {
666            return Some(FileType::Mma);
667        } else if regex_is_match!(r"^\s*((type|var)\b|--)"i, line) {
668            return Some(FileType::Murphi);
669        }
670    }
671
672    match saw_comment {
673        // We didn't see anything definitive, but this looks like either Objective C
674        // or Murphi based on the comment leader. Assume the former as it is more
675        // common.
676        true => Some(FileType::ObjC),
677        // default is Matlab
678        false => Some(FileType::Matlab),
679    }
680}
681
682fn m4_ext(path: &Path, _content: &str) -> Option<FileType> {
683    match !path.to_str().map_or(false, |p| p.ends_with("html.m4"))
684        && !path.to_str().map_or(false, |p| p.contains("fvwm2rc"))
685    {
686        true => Some(FileType::M4),
687        false => None,
688    }
689}
690
691fn mc(_path: &Path, content: &str) -> Option<FileType> {
692    for line in content.lines().take(20) {
693        let trimmed_line = line.trim_start();
694        if util::starts_with_any(trimmed_line, false, ["#", "dnl"]) {
695            return Some(FileType::M4);
696        } else if trimmed_line.starts_with(';') {
697            return Some(FileType::MsMessages);
698        }
699    }
700    Some(FileType::M4)
701}
702
703fn me(path: &Path, _content: &str) -> Option<FileType> {
704    match path.file_name().map_or(false, |name| {
705        name.eq_ignore_ascii_case("read.me") || name.eq_ignore_ascii_case("click.me")
706    }) {
707        true => None,
708        false => Some(FileType::Nroff),
709    }
710}
711
712fn mm(_path: &Path, content: &str) -> Option<FileType> {
713    for line in content.lines().take(20) {
714        if regex_is_match!(r"^\s*(#\s*(include|import)\b|@import\b|/\*)"i, line) {
715            return Some(FileType::ObjCpp);
716        }
717    }
718    Some(FileType::Nroff)
719}
720
721fn mms(_path: &Path, content: &str) -> Option<FileType> {
722    for line in content.lines().take(20) {
723        let trimmed_line = line.trim_start();
724        if util::starts_with_any(trimmed_line, true, ["%", "//", "*"]) {
725            return Some(FileType::Mmix);
726        } else if trimmed_line.starts_with('#') {
727            return Some(FileType::Make);
728        }
729    }
730    Some(FileType::Mmix)
731}
732
733fn is_lprolog(content: &str) -> bool {
734    for line in content.lines().take(500) {
735        let trimmed_line = line.trim_start();
736        if !trimmed_line.is_empty() && !trimmed_line.starts_with('%') {
737            return regex_is_match!(r"\bmodule\s+\w+\s*\.\s*(%|$)"i, line);
738        }
739    }
740    false
741}
742
743fn is_rapid(content: &str) -> bool {
744    util::next_non_blank(content, 0).map_or(
745        false,
746        |line| regex_is_match!(r"^\s*(%{3}|module\s+\w+\s*(\(|$))"i, line),
747    )
748}
749
750fn mod_(path: &Path, content: &str) -> Option<FileType> {
751    // TODO: user defined preferred mod filetype
752    if path
753        .file_name()
754        .map_or(false, |name| name.eq_ignore_ascii_case("go.mod"))
755    {
756        Some(FileType::GoMod)
757    } else if is_lprolog(content) {
758        Some(FileType::LambdaProlog)
759    } else if util::next_non_blank(content, 0).map_or(false, |line| {
760        regex_is_match!(r"(\bMODULE\s+\w+\s*;|^\s*\(\*)", line)
761    }) {
762        Some(FileType::Modula2)
763    } else if is_rapid(content) {
764        Some(FileType::Rapid)
765    } else {
766        Some(FileType::Modsim3)
767    }
768}
769
770fn news(_path: &Path, content: &str) -> Option<FileType> {
771    match util::find(content, 1, false, "; urgency=") {
772        true => Some(FileType::DebChangelog),
773        false => None,
774    }
775}
776
777fn nroff(_path: &Path, content: &str) -> Option<FileType> {
778    for line in content.lines().take(5) {
779        if line.starts_with('.') {
780            return Some(FileType::Nroff);
781        }
782    }
783    None
784}
785
786fn patch(_path: &Path, content: &str) -> Option<FileType> {
787    match regex_is_match!(
788        r"^From [a-fA-F0-9]{40}+ Mon Sep 17 00:00:00 2001$",
789        util::get_lines(content, 1)
790    ) {
791        true => Some(FileType::GitSendEmail),
792        false => Some(FileType::Diff),
793    }
794}
795
796fn perl(path: &Path, content: &str) -> Option<FileType> {
797    match (path.extension().map_or(false, |ext| ext == "t")
798        && path
799            .parent()
800            .and_then(|p| p.file_name())
801            .map_or(false, |dir| dir == "t" || dir == "xt"))
802        || (content.starts_with('#') && util::find(content, 1, false, "perl"))
803        || content
804            .lines()
805            .take(30)
806            .any(|line| util::starts_with_any(line.trim_start(), false, ["use"]))
807    {
808        true => Some(FileType::Perl),
809        false => None,
810    }
811}
812
813fn pl(_path: &Path, content: &str) -> Option<FileType> {
814    // TODO: user defined preferred pl filetype
815    match util::next_non_blank(content, 0).map_or(
816        false,
817        |line| regex_is_match!(r":-|\bprolog\b|^\s*(%+(\s|$)|/\*)"i, line),
818    ) {
819        true => Some(FileType::Prolog),
820        false => Some(FileType::Perl),
821    }
822}
823
824fn pm(_path: &Path, content: &str) -> Option<FileType> {
825    let line = util::get_lines(content, 1);
826    if line.contains("XPM2") {
827        Some(FileType::Xpm2)
828    } else if line.contains("XPM") {
829        Some(FileType::Xpm)
830    } else {
831        Some(FileType::Perl)
832    }
833}
834
835fn pp(_path: &Path, content: &str) -> Option<FileType> {
836    // TODO: user defined preferred pp filetype
837    match util::next_non_blank(content, 0).map_or(false, |line| {
838        PASCAL_COMMENTS.is_match(line) || PASCAL_KEYWORDS.is_match(line)
839    }) {
840        true => Some(FileType::Pascal),
841        false => Some(FileType::Puppet),
842    }
843}
844
845fn prg(_path: &Path, content: &str) -> Option<FileType> {
846    // TODO: user defined preferred prg filetype
847    match is_rapid(content) {
848        true => Some(FileType::Rapid),
849        false => Some(FileType::Clipper),
850    }
851}
852
853fn progress_asm(path: &Path, content: &str) -> Option<FileType> {
854    // TODO: user defined preferred i filetype
855    for line in content.lines().take(10) {
856        let trimmed_line = line.trim_start();
857        if trimmed_line.starts_with(';') {
858            return asm(path, content);
859        } else if trimmed_line.starts_with("/*") {
860            break;
861        }
862    }
863    Some(FileType::Progress)
864}
865
866fn progress_cweb(_path: &Path, content: &str) -> Option<FileType> {
867    // TODO: user defined preferred w filetype
868    match util::starts_with_any(content, false, ["&analyze"])
869        || content
870            .lines()
871            .take(3)
872            .any(|line| util::starts_with_any(line, false, ["&global-define"]))
873    {
874        true => Some(FileType::Progress),
875        false => Some(FileType::Cweb),
876    }
877}
878
879fn progress_pascal(_path: &Path, content: &str) -> Option<FileType> {
880    // TODO: user defined preferred p filetype
881    for line in content.lines().take(10) {
882        if PASCAL_COMMENTS.is_match(line) || PASCAL_KEYWORDS.is_match(line) {
883            return Some(FileType::Pascal);
884        } else if line.trim_start().starts_with("/*") {
885            break;
886        }
887    }
888    Some(FileType::Progress)
889}
890
891/// Distinguish between `default`, Prolog, and Cproto files.
892fn proto(content: &str, default: FileType) -> Option<FileType> {
893    // Cproto files have a comment in the first line and a function prototype in
894    // the second line, it always ends in `;`. Indent files may also have
895    // comments, thus we can't match comments to see the difference.
896    // IDL files can have a single `;` in the second line, require at least one
897    // character before the `;`.
898    if regex_is_match!(r".;$", util::get_lines(content, 2)) {
899        // second line ends with `;`
900        return Some(FileType::Cpp);
901    }
902    // recognize Prolog by specific text in the first non-empty line;
903    // require a blank after the `%` because Perl uses `%list` and `%translate`
904    match util::next_non_blank(content, 0).map_or(
905        false,
906        |line| regex_is_match!(r":-|\bprolog\b|^\s*(%+(\s|$)|/\*)"i, line),
907    ) {
908        true => Some(FileType::Prolog),
909        false => Some(default),
910    }
911}
912
913fn psf(_path: &Path, content: &str) -> Option<FileType> {
914    let trimmed_line = util::get_lines(content, 1).trim();
915    match [
916        "distribution",
917        "installed_software",
918        "root",
919        "bundle",
920        "product",
921    ]
922    .into_iter()
923    .any(|pat| trimmed_line.eq_ignore_ascii_case(pat))
924    {
925        true => Some(FileType::Psf),
926        false => None,
927    }
928}
929
930fn r(_path: &Path, content: &str) -> Option<FileType> {
931    // Rebol is easy to recognize, check for that first
932    if regex_is_match!(r"\brebol\b"i, util::get_lines(content, 50)) {
933        return Some(FileType::Rebol);
934    }
935
936    for line in content.lines().take(50) {
937        let trimmed_line = line.trim_start();
938        // R has # comments
939        if trimmed_line.starts_with('#') {
940            return Some(FileType::R);
941        }
942        // Rexx has /* comments */
943        if trimmed_line.starts_with("/*") {
944            return Some(FileType::Rexx);
945        }
946    }
947    // TODO: user defined preferred r filetype
948    Some(FileType::R)
949}
950
951fn rc(path: &Path, _content: &str) -> Option<FileType> {
952    match path
953        .to_str()
954        .map_or(false, |str| str.contains("/etc/Muttrc.d/"))
955    {
956        true => None,
957        false => Some(FileType::Rc),
958    }
959}
960
961fn redif(_path: &Path, content: &str) -> Option<FileType> {
962    for line in content.lines().take(5) {
963        if util::starts_with_any(line, false, ["template-type:"]) {
964            return Some(FileType::Redif);
965        }
966    }
967    None
968}
969
970fn reg(_path: &Path, content: &str) -> Option<FileType> {
971    match regex_is_match!(
972        r"^regedit[0-9]*\s*$|^windows registry editor version \d*\.\d*\s*$"i,
973        util::get_lines(content, 1)
974    ) {
975        true => Some(FileType::Registry),
976        false => None,
977    }
978}
979
980fn rul(_path: &Path, content: &str) -> Option<FileType> {
981    match util::find(content, 6, false, "installshield") {
982        true => Some(FileType::InstallShield),
983        false => Some(FileType::Diva),
984    }
985}
986
987fn rules(path: &Path, _content: &str) -> Option<FileType> {
988    let utf8_path = path.to_str();
989    if utf8_path.map_or(
990        false,
991        |p| regex_is_match!(r"/(etc|(usr/)?lib)/udev/(rules\.d/)?.*\.rules$"i, p),
992    ) {
993        Some(FileType::UdevRules)
994    } else if path.starts_with("/etc/ufw") {
995        Some(FileType::Conf)
996    } else if utf8_path.map_or(
997        false,
998        |p| regex_is_match!(r"/(etc|usr/share)/polkit-1/rules\.d/"i, p),
999    ) {
1000        Some(FileType::JavaScript)
1001    } else {
1002        // TODO: maybe try to read udev.conf for other paths
1003        Some(FileType::Hog)
1004    }
1005}
1006
1007fn sc(_path: &Path, content: &str) -> Option<FileType> {
1008    for line in content.lines().take(25) {
1009        if regex_is_match!(r"(class)?var\s<|\^this.*|\|\w+\||\+\s\w*\s\{|\*ar\s", line) {
1010            return Some(FileType::Supercollider);
1011        }
1012    }
1013    Some(FileType::Scala)
1014}
1015
1016fn scd(_path: &Path, content: &str) -> Option<FileType> {
1017    match regex_is_match!(
1018        r#"^\S+\(\d[0-9A-Za-z]*\)(\s+"[^"]*"]){0,2}"#,
1019        util::get_lines(content, 1)
1020    ) {
1021        true => Some(FileType::Scdoc),
1022        false => Some(FileType::Supercollider),
1023    }
1024}
1025
1026fn sgml(_path: &Path, content: &str) -> Option<FileType> {
1027    let lines = util::get_lines(content, 5);
1028    if lines.contains("linuxdoc") {
1029        Some(FileType::Smgllnx)
1030    } else if regex_is_match!(r"<!DOCTYPE[\s\S]*DocBook", lines) {
1031        Some(FileType::DocBookSgml4)
1032    } else {
1033        Some(FileType::Sgml)
1034    }
1035}
1036
1037fn sh(content: &str, dialect: Option<FileType>) -> Option<FileType> {
1038    let dialect = dialect.unwrap_or_else(|| {
1039        let first_line = util::get_lines(content, 1);
1040        // try to detect from shebang
1041        if !regex_is_match!(r"^\s*#!", first_line) {
1042            FileType::Sh
1043        } else if regex_is_match!(r"\bcsh\b"i, first_line) {
1044            FileType::Csh
1045        } else if regex_is_match!(r"\btcsh\b"i, first_line) {
1046            FileType::Tcsh
1047        } else if regex_is_match!(r"\bzsh\b"i, first_line) {
1048            FileType::Zsh
1049        } else if regex_is_match!(r"\bksh\b"i, first_line) {
1050            FileType::Ksh
1051        } else if regex_is_match!(r"\b(bash|bash2)\b"i, first_line) {
1052            FileType::Bash
1053        } else {
1054            FileType::Sh
1055        }
1056    });
1057    shell(content, dialect)
1058}
1059
1060fn shell(content: &str, dialect: FileType) -> Option<FileType> {
1061    let mut prev_line = "";
1062    for (line_num, line) in content.lines().enumerate().take(1000) {
1063        // skip the first line
1064        if line_num == 0 {
1065            prev_line = line;
1066            continue;
1067        }
1068
1069        if regex_is_match!(r"\s*exec\s+(\S*/)?(tclsh|wish)"i, line)
1070            && !regex_is_match!(r"^\s*#.*\\$"i, prev_line)
1071        {
1072            // found an "exec" line with `tclsh` or `wish` after a comment with continuation
1073            return Some(FileType::Tcl);
1074        }
1075
1076        prev_line = line;
1077    }
1078    Some(dialect)
1079}
1080
1081fn sig(_path: &Path, content: &str) -> Option<FileType> {
1082    // TODO: user defined preferred sig filetype
1083    let line = util::next_non_blank(content, 0)?;
1084    if regex_is_match!(r"^\s*(/\*|%|sig\s+[a-zA-Z])", line) {
1085        Some(FileType::LambdaProlog)
1086    } else if regex_is_match!(r"^\s*(\(\*|(signature|structure)\s+[a-zA-Z])", line) {
1087        Some(FileType::Sml)
1088    } else {
1089        None
1090    }
1091}
1092
1093fn sil(_path: &Path, content: &str) -> Option<FileType> {
1094    for line in content.lines().take(100) {
1095        let trimmed_line = line.trim_start();
1096        if trimmed_line.starts_with(['\\', '%']) {
1097            return Some(FileType::Sile);
1098        } else if !trimmed_line.is_empty() {
1099            return Some(FileType::Sil);
1100        }
1101    }
1102    Some(FileType::Sil)
1103}
1104
1105fn smi(_path: &Path, content: &str) -> Option<FileType> {
1106    match regex_is_match!(r"\bsmil\b"i, util::get_lines(content, 1)) {
1107        true => Some(FileType::Smil),
1108        false => Some(FileType::Mib),
1109    }
1110}
1111
1112fn smil(_path: &Path, content: &str) -> Option<FileType> {
1113    match regex_is_match!(r"<\?\s*xml.*\?>", util::get_lines(content, 1)) {
1114        true => Some(FileType::Xml),
1115        false => Some(FileType::Smil),
1116    }
1117}
1118
1119fn sql(_path: &Path, _content: &str) -> Option<FileType> {
1120    // TODO: user defined preferred sql filetype
1121    Some(FileType::Sql)
1122}
1123
1124fn src(_path: &Path, content: &str) -> Option<FileType> {
1125    // TODO: user defined preferred src filetype
1126    match util::next_non_blank(content, 0).map_or(
1127        false,
1128        |line| regex_is_match!(r"^\s*(&\w+|(global\s+)?def(fct)?\b)"i, line),
1129    ) {
1130        true => Some(FileType::Krl),
1131        false => None,
1132    }
1133}
1134
1135fn sys(_path: &Path, content: &str) -> Option<FileType> {
1136    // TODO: user defined preferred sys filetype
1137    match is_rapid(content) {
1138        true => Some(FileType::Rapid),
1139        false => Some(FileType::Bat),
1140    }
1141}
1142
1143/// Choose context, plaintex, or tex (LaTeX)
1144fn tex(path: &Path, content: &str) -> Option<FileType> {
1145    let first_line = util::get_lines(content, 1);
1146    if regex_is_match!(r"^%&\s*plain(tex)?", first_line) {
1147        Some(FileType::PlainTex)
1148    } else if regex_is_match!(r"^%&\s*context", first_line)
1149        || path
1150            .to_str()
1151            .map_or(false, |p| regex_is_match!(r"tex/context/.*/.*\.tex"i, p))
1152    {
1153        Some(FileType::Context)
1154    } else {
1155        let latex_regex =
1156            regex!(r"^\s*\\(documentclass\b|usepackage\b|begin\{|newcommand\b|renewcommand\b)"i);
1157        let context_regex = regex!(
1158            r"^\s*\\(start[a-zA-Z]+|setup[a-zA-Z]+|usemodule|enablemode|enableregime|setvariables|useencoding|usesymbols|stelle[a-zA-Z]+|verwende[a-zA-Z]+|stel[a-zA-Z]+|gebruik[a-zA-Z]+|usa[a-zA-Z]+|imposta[a-zA-Z]+|regle[a-zA-Z]+|utilisemodule\b)"i
1159        );
1160
1161        for line in content
1162            .lines()
1163            .skip_while(|line| regex_is_match!(r"^\s*%\S", line))
1164            .take(1000)
1165        {
1166            if latex_regex.is_match(line) {
1167                return Some(FileType::Tex);
1168            } else if context_regex.is_match(line) {
1169                return Some(FileType::Context);
1170            }
1171        }
1172
1173        Some(FileType::Tex)
1174    }
1175}
1176
1177fn tf(_path: &Path, content: &str) -> Option<FileType> {
1178    for line in content.lines() {
1179        let trimmed_line = line.trim_start();
1180        if !trimmed_line.is_empty() && !trimmed_line.starts_with([';', '/']) {
1181            return Some(FileType::Terraform);
1182        }
1183    }
1184    Some(FileType::Tf)
1185}
1186
1187fn tmp(path: &Path, content: &str) -> Option<FileType> {
1188    // for files like `main.rs~` retry search without the `~` suffix
1189    path.file_name()
1190        .and_then(|os_str| os_str.to_str())
1191        .and_then(|name| try_detect(path.with_file_name(&name[..name.len() - 1]), content))
1192}
1193
1194fn ts(_path: &Path, content: &str) -> Option<FileType> {
1195    match regex_is_match!(r"<\?\s*xml", util::get_lines(content, 1)) {
1196        true => Some(FileType::Xml),
1197        false => Some(FileType::Smil),
1198    }
1199}
1200
1201fn ttl(_path: &Path, content: &str) -> Option<FileType> {
1202    match regex_is_match!(r"^@?(prefix|base)", util::get_lines(content, 1)) {
1203        true => Some(FileType::Turtle),
1204        false => Some(FileType::Teraterm),
1205    }
1206}
1207
1208fn txt(_path: &Path, content: &str) -> Option<FileType> {
1209    // vim helpfiles match *.txt but should have a modeline as last line
1210    match regex_is_match!(
1211        r"vim:.*ft=help",
1212        content.lines().next_back().unwrap_or(content)
1213    ) {
1214        true => Some(FileType::VimHelp),
1215        false => Some(FileType::Text),
1216    }
1217}
1218
1219fn typ(_path: &Path, content: &str) -> Option<FileType> {
1220    // TODO: user defined preferred typ filetype
1221    for line in content.lines().take(200) {
1222        if regex_is_match!(r"^(CASE\s*=\s*(SAME|LOWER|UPPER|OPPOSITE)$|TYPE\s)", line) {
1223            return Some(FileType::Sql);
1224        }
1225    }
1226    Some(FileType::Typst)
1227}
1228
1229fn v(_path: &Path, content: &str) -> Option<FileType> {
1230    for line in content.lines().take(200) {
1231        if !line.trim_start().starts_with('/') {
1232            if regex_is_match!(r";\s*($|/)", line) {
1233                return Some(FileType::Verilog);
1234            } else if regex_is_match!(r"\.\s*($|\(\*)", line) {
1235                return Some(FileType::Coq);
1236            }
1237        }
1238    }
1239    Some(FileType::V)
1240}
1241
1242fn web(_path: &Path, content: &str) -> Option<FileType> {
1243    for line in content.lines().take(5) {
1244        if line.starts_with('%') {
1245            return Some(FileType::Web);
1246        }
1247    }
1248    Some(FileType::WinBatch)
1249}
1250
1251fn xfree86(_path: &Path, content: &str) -> Option<FileType> {
1252    match regex_is_match!(r"\bXConfigurator\b", util::get_lines(content, 1)) {
1253        true => Some(FileType::XF86Conf3),
1254        false => Some(FileType::XF86Conf),
1255    }
1256}
1257
1258fn xml(_path: &Path, content: &str) -> Option<FileType> {
1259    for line in content.lines().take(100) {
1260        if regex_is_match!(r"<!DOCTYPE.*DocBook", line) {
1261            return Some(FileType::DocBookXml4);
1262        } else if util::find(line, 0, false, " xmlns=\"http://docbook.org/ns/docbook\"") {
1263            return Some(FileType::DocBookXml5);
1264        } else if util::find(line, 0, false, "xmlns:xbl=\"http://www.mozilla.org/xbl\"") {
1265            return Some(FileType::Xbl);
1266        }
1267    }
1268    Some(FileType::Xml)
1269}
1270
1271fn xpm(_path: &Path, content: &str) -> Option<FileType> {
1272    match util::find(content, 1, true, "XPM2") {
1273        true => Some(FileType::Xpm2),
1274        false => Some(FileType::Xpm),
1275    }
1276}
1277
1278fn y(_path: &Path, content: &str) -> Option<FileType> {
1279    for line in content.lines().take(100) {
1280        if line.trim_start().starts_with('%') {
1281            return Some(FileType::Yacc);
1282        } else if regex_is_match!(r"^\s*(#|class\b)"i, line)
1283            && !regex_is_match!(r"^\s*#\s*include"i, line)
1284        {
1285            return Some(FileType::Racc);
1286        }
1287    }
1288    Some(FileType::Yacc)
1289}