use ndarray::ArrayViewD;
use crate::model::Label;
use crate::ContentType;
#[derive(Debug, Clone)]
pub enum FileType {
Directory,
Symlink,
Inferred(InferredType),
Ruled(ContentType),
}
#[derive(Debug, Clone)]
pub struct InferredType {
pub content_type: Option<(ContentType, OverwriteReason)>,
pub inferred_type: ContentType,
pub score: f32,
}
#[derive(Debug, Clone)]
pub enum OverwriteReason {
LowConfidence,
OverwriteMap,
}
impl FileType {
pub fn content_type(&self) -> Option<ContentType> {
match self {
FileType::Directory => None,
FileType::Symlink => None,
FileType::Inferred(x) => Some(x.content_type()),
FileType::Ruled(x) => Some(*x),
}
}
pub fn info(&self) -> &'static TypeInfo {
match self {
FileType::Directory => &crate::content::DIRECTORY,
FileType::Symlink => &crate::content::SYMLINK,
FileType::Inferred(x) => x.content_type().info(),
FileType::Ruled(x) => x.info(),
}
}
pub fn score(&self) -> f32 {
match self {
FileType::Directory => 1.0,
FileType::Symlink => 1.0,
FileType::Inferred(x) => x.score,
FileType::Ruled(_) => 1.0,
}
}
}
impl InferredType {
pub fn content_type(&self) -> ContentType {
match self.content_type {
Some((x, _)) => x,
None => self.inferred_type,
}
}
}
#[cfg_attr(feature = "serde", derive(serde::Serialize))]
pub struct TypeInfo {
pub label: &'static str,
pub mime_type: &'static str,
pub group: &'static str,
pub description: &'static str,
pub extensions: &'static [&'static str],
pub is_text: bool,
}
impl FileType {
pub(crate) fn convert(tensor: ArrayViewD<f32>) -> Vec<FileType> {
let mut results = Vec::new();
for view in tensor.view().axis_iter(ndarray::Axis(0)) {
let scores = view.to_slice().unwrap();
let mut best = 0;
for (i, &x) in scores.iter().enumerate() {
if scores[best].max(x) == x {
best = i;
}
}
assert!(best < crate::model::NUM_LABELS);
let score = scores[best];
let label = unsafe { std::mem::transmute::<u32, Label>(best as u32) };
let inferred_type = label.content_type();
let config = &crate::model::CONFIG;
let mut content_type = if score < config.thresholds[inferred_type as usize] {
let is_text = inferred_type.info().is_text;
Some((
if is_text { ContentType::Txt } else { ContentType::Unknown },
OverwriteReason::LowConfidence,
))
} else {
let overwrite = config.overwrite_map[inferred_type as usize];
(overwrite != inferred_type).then_some((overwrite, OverwriteReason::OverwriteMap))
};
if content_type.as_ref().is_some_and(|(x, _)| *x == inferred_type) {
content_type = None;
}
results.push(FileType::Inferred(InferredType { content_type, inferred_type, score }));
}
results
}
}