use petgraph::prelude::*;
use std::collections::{HashMap, HashSet};
use std::fs::File;
use std::io::prelude::*;
use std::path::Path;
use std::sync::LazyLock;
mod basetype;
mod fdo_magic;
type Mime = &'static str;
const TYPEORDER: [&str; 6] = [
"image/png",
"image/jpeg",
"image/gif",
"application/zip",
"application/x-msdos-executable",
"application/pdf",
];
trait Checker: Send + Sync {
fn match_bytes(&self, bytes: &[u8], mimetype: &str) -> bool;
fn match_file(&self, file: &File, mimetype: &str) -> bool;
fn get_supported(&self) -> Vec<Mime>;
fn get_subclasses(&self) -> Vec<(Mime, Mime)>;
fn get_aliaslist(&self) -> HashMap<Mime, Mime>;
}
static CHECKERS: &[&'static dyn Checker] = &[
&fdo_magic::builtin::check::FdoMagic,
&basetype::check::BaseType,
];
static CHECKER_SUPPORT: LazyLock<HashMap<Mime, &'static dyn Checker>> = LazyLock::new(|| {
let mut out = HashMap::<Mime, &'static dyn Checker>::default();
for &c in CHECKERS {
for m in c.get_supported() {
out.insert(m, c);
}
}
out
});
static ALIASES: LazyLock<HashMap<Mime, Mime>> = LazyLock::new(|| {
let mut out = HashMap::<Mime, Mime>::default();
for &c in CHECKERS {
out.extend(c.get_aliaslist());
}
out
});
struct TypeStruct {
graph: DiGraph<Mime, u32>,
}
static TYPE: LazyLock<TypeStruct> = LazyLock::new(|| {
let mut graph = DiGraph::<Mime, u32>::new();
let mut added_mimes = HashMap::<Mime, NodeIndex>::default();
let mut mimelist = Vec::<Mime>::new();
let mut edgelist_raw = Vec::<(Mime, Mime)>::new();
for &c in CHECKERS {
mimelist.extend(c.get_supported());
edgelist_raw.extend(c.get_subclasses());
}
mimelist.sort_unstable();
mimelist.dedup();
let mimelist = mimelist;
for mimetype in mimelist.iter() {
let node = graph.add_node(mimetype);
added_mimes.insert(mimetype, node);
}
let mut edge_list = HashSet::<(NodeIndex, NodeIndex)>::with_capacity_and_hasher(
edgelist_raw.len(),
Default::default(),
);
for (child_raw, parent_raw) in &edgelist_raw {
let Some(parent) = added_mimes.get(parent_raw) else {
continue;
};
let Some(child) = added_mimes.get(child_raw) else {
continue;
};
edge_list.insert((*child, *parent));
}
graph.extend_with_edges(&edge_list);
let node_text = *added_mimes
.entry("text/plain")
.or_insert_with(|| graph.add_node("text/plain"));
let node_octet = *added_mimes
.entry("application/octet-stream")
.or_insert_with(|| graph.add_node("application/octet-stream"));
let node_allall = *added_mimes
.entry("all/all")
.or_insert_with(|| graph.add_node("all/all"));
let node_allfiles = *added_mimes
.entry("all/allfiles")
.or_insert_with(|| graph.add_node("all/allfiles"));
let mut edge_list_2 = HashSet::<(NodeIndex, NodeIndex)>::default();
for mimenode in graph.externals(Incoming) {
let mimetype = &graph[mimenode];
let toplevel = mimetype.split('/').next().unwrap_or("");
if mimenode == node_text
|| mimenode == node_octet
|| mimenode == node_allfiles
|| mimenode == node_allall
{
continue;
}
if toplevel == "text" {
edge_list_2.insert((node_text, mimenode));
} else if toplevel == "inode" {
edge_list_2.insert((node_allall, mimenode));
} else {
edge_list_2.insert((node_octet, mimenode));
}
}
graph.extend_with_edges(edge_list_2.difference(&edge_list));
TypeStruct { graph }
});
fn typegraph_walker<T, F>(parentnode: NodeIndex, input: &T, matchfn: F) -> Option<Mime>
where
T: ?Sized,
F: Fn(&str, &T) -> bool,
{
let mut children: Vec<NodeIndex> = TYPE
.graph
.neighbors_directed(parentnode, Outgoing)
.collect();
for i in 0..children.len() {
let x = children[i];
if TYPEORDER.contains(&TYPE.graph[x]) {
children.remove(i);
children.insert(0, x);
}
}
for childnode in children {
let mimetype = &TYPE.graph[childnode];
let result = matchfn(mimetype, input);
match result {
true => match typegraph_walker(childnode, input, matchfn) {
Some(foundtype) => return Some(foundtype),
None => return Some(mimetype),
},
false => continue,
}
}
None
}
fn get_alias(mimetype: &str) -> &str {
match ALIASES.get(mimetype) {
Some(x) => x,
None => mimetype,
}
}
fn match_u8_noalias(mimetype: &str, bytes: &[u8]) -> bool {
match CHECKER_SUPPORT.get(mimetype) {
None => false,
Some(y) => y.match_bytes(bytes, mimetype),
}
}
pub fn match_u8(mimetype: &str, bytes: &[u8]) -> bool {
match_u8_noalias(get_alias(mimetype), bytes)
}
fn from_u8_node(parentnode: NodeIndex, bytes: &[u8]) -> Option<Mime> {
typegraph_walker(parentnode, bytes, match_u8_noalias)
}
pub fn from_u8(bytes: &[u8]) -> Mime {
let node = match TYPE.graph.externals(Incoming).next() {
Some(foundnode) => foundnode,
None => panic!("No filetype definitions are loaded."),
};
from_u8_node(node, bytes).unwrap()
}
pub fn match_file(mimetype: &str, file: &File) -> bool {
match_file_noalias(get_alias(mimetype), file)
}
fn match_file_noalias(mimetype: &str, file: &File) -> bool {
match CHECKER_SUPPORT.get(mimetype) {
None => false,
Some(c) => c.match_file(file, mimetype),
}
}
#[inline]
pub fn match_filepath(mimetype: &str, path: &Path) -> bool {
let Ok(file) = File::open(path) else {
return false;
};
match_file(mimetype, &file)
}
fn from_file_node(parentnode: NodeIndex, file: &File) -> Option<Mime> {
if !match_file("application/octet-stream", file) {
return typegraph_walker(parentnode, file, match_file_noalias);
}
let bytes = read_bytes(file, 2048).ok()?;
from_u8_node(parentnode, &bytes)
}
pub fn from_file(file: &File) -> Option<Mime> {
let node = TYPE.graph.externals(Incoming).next()?;
from_file_node(node, file)
}
#[inline]
pub fn from_filepath(path: &Path) -> Option<Mime> {
let file = File::open(path).ok()?;
from_file(&file)
}
fn read_bytes(file: &File, bytecount: usize) -> Result<Vec<u8>, std::io::Error> {
let mut bytes = Vec::<u8>::with_capacity(bytecount);
file.take(bytecount as u64).read_to_end(&mut bytes)?;
Ok(bytes)
}