use std::path::Path;
use std::fs::File;
use std::io::Read;
use crate::core::Result;
use super::magic::{MagicPattern, get_magic_patterns, detect_openxml_type, detect_ole2_type};
use super::confidence::calculate_confidence;
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum DetectionMethod {
MagicBytes,
ContentAnalysis,
Extension,
Unknown,
}
#[derive(Debug, Clone)]
pub struct DetectionResult {
pub mime_type: String,
pub confidence: f32,
pub detected_by: DetectionMethod,
}
pub trait Detector {
fn detect(&self, data: &[u8], filename: Option<&str>) -> DetectionResult;
}
pub struct TypeDetector {
magic_patterns: Vec<MagicPattern>,
}
impl TypeDetector {
pub fn new() -> Self {
Self {
magic_patterns: get_magic_patterns(),
}
}
pub fn detect_from_bytes(&self, data: &[u8]) -> DetectionResult {
if data.len() >= 4 && &data[0..4] == b"PK\x03\x04" {
if let Some(mime_type) = detect_openxml_type(data) {
return DetectionResult {
mime_type,
confidence: 0.95,
detected_by: DetectionMethod::MagicBytes,
};
}
}
if data.len() >= 8 && &data[0..8] == b"\xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1" {
if let Some((mime_type, confidence)) = detect_ole2_type(data) {
return DetectionResult {
mime_type,
confidence,
detected_by: DetectionMethod::MagicBytes,
};
}
}
if let Some((mime_type, confidence)) = self.check_magic_bytes(data) {
return DetectionResult {
mime_type,
confidence,
detected_by: DetectionMethod::MagicBytes,
};
}
if let Some((mime_type, confidence)) = self.analyze_content(data) {
return DetectionResult {
mime_type,
confidence,
detected_by: DetectionMethod::ContentAnalysis,
};
}
DetectionResult {
mime_type: "application/octet-stream".to_string(),
confidence: 0.1,
detected_by: DetectionMethod::Unknown,
}
}
pub fn detect_from_path(&self, path: &Path) -> Result<DetectionResult> {
let mut file = File::open(path)?;
let mut buffer = vec![0u8; 8192];
let bytes_read = file.read(&mut buffer)?;
buffer.truncate(bytes_read);
if buffer.len() >= 4 && &buffer[0..4] == b"PK\x03\x04" {
if let Some(mime_type) = detect_openxml_type(&buffer) {
return Ok(DetectionResult {
mime_type,
confidence: 0.95,
detected_by: DetectionMethod::MagicBytes,
});
}
}
if buffer.len() >= 8 && &buffer[0..8] == b"\xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1" {
if let Some((mime_type, confidence)) = detect_ole2_type(&buffer) {
return Ok(DetectionResult {
mime_type,
confidence,
detected_by: DetectionMethod::MagicBytes,
});
}
}
if let Some((mime_type, confidence)) = self.check_magic_bytes(&buffer) {
return Ok(DetectionResult {
mime_type,
confidence,
detected_by: DetectionMethod::MagicBytes,
});
}
if let Some((mime_type, confidence)) = self.analyze_content(&buffer) {
return Ok(DetectionResult {
mime_type,
confidence,
detected_by: DetectionMethod::ContentAnalysis,
});
}
if let Some((mime_type, confidence)) = self.detect_from_extension(path) {
return Ok(DetectionResult {
mime_type,
confidence,
detected_by: DetectionMethod::Extension,
});
}
Ok(DetectionResult {
mime_type: "application/octet-stream".to_string(),
confidence: 0.1,
detected_by: DetectionMethod::Unknown,
})
}
fn check_magic_bytes(&self, data: &[u8]) -> Option<(String, f32)> {
for pattern in &self.magic_patterns {
if pattern.matches(data) {
let confidence = calculate_confidence(DetectionMethod::MagicBytes);
return Some((pattern.mime_type.clone(), confidence));
}
}
None
}
fn analyze_content(&self, data: &[u8]) -> Option<(String, f32)> {
if data.is_empty() {
return None;
}
if let Ok(text) = std::str::from_utf8(data) {
let trimmed = text.trim();
if (trimmed.starts_with('{') && trimmed.ends_with('}')) ||
(trimmed.starts_with('[') && trimmed.ends_with(']')) {
let confidence = calculate_confidence(DetectionMethod::ContentAnalysis);
return Some(("application/json".to_string(), confidence));
}
if trimmed.starts_with('<') && trimmed.contains('>') {
let confidence = calculate_confidence(DetectionMethod::ContentAnalysis);
if trimmed.contains("<?xml") {
return Some(("text/xml".to_string(), confidence));
} else if trimmed.contains("<html") || trimmed.contains("<HTML") {
return Some(("text/html".to_string(), confidence));
} else if trimmed.contains("<svg") {
return Some(("image/svg+xml".to_string(), confidence));
}
return Some(("text/xml".to_string(), confidence));
}
if self.looks_like_css(text) {
return Some(("text/css".to_string(), 0.6));
}
if text.lines().count() > 1 {
let first_line = text.lines().next().unwrap_or("");
if first_line.contains(',') || first_line.contains('\t') {
let confidence = calculate_confidence(DetectionMethod::ContentAnalysis);
return Some(("text/csv".to_string(), confidence));
}
}
let confidence = calculate_confidence(DetectionMethod::ContentAnalysis);
return Some(("text/plain".to_string(), confidence));
}
None
}
fn looks_like_css(&self, text: &str) -> bool {
let trimmed = text.trim();
if trimmed.len() < 10 {
return false;
}
let mut css_indicators = 0;
if trimmed.contains('{') && trimmed.contains('}') && trimmed.contains(':') {
css_indicators += 1;
}
let at_rules = ["@import", "@media", "@charset", "@font-face", "@keyframes", "@supports"];
for at_rule in &at_rules {
if trimmed.contains(at_rule) {
css_indicators += 1;
break;
}
}
let common_properties = [
"color:", "background:", "margin:", "padding:", "font-",
"border:", "width:", "height:", "display:", "position:"
];
for prop in &common_properties {
if trimmed.contains(prop) {
css_indicators += 1;
break;
}
}
if trimmed.contains(';') {
css_indicators += 1;
}
css_indicators >= 2
}
fn detect_from_extension(&self, path: &Path) -> Option<(String, f32)> {
let extension = path.extension()?.to_str()?.to_lowercase();
let mime_type = match extension.as_str() {
"pdf" => "application/pdf",
"doc" => "application/msword",
"docx" => "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
"odt" => "application/vnd.oasis.opendocument.text",
"rtf" => "application/rtf",
"xls" => "application/vnd.ms-excel",
"xlsx" => "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
"ods" => "application/vnd.oasis.opendocument.spreadsheet",
"ppt" => "application/vnd.ms-powerpoint",
"pptx" => "application/vnd.openxmlformats-officedocument.presentationml.presentation",
"odp" => "application/vnd.oasis.opendocument.presentation",
"jpg" | "jpeg" => "image/jpeg",
"png" => "image/png",
"gif" => "image/gif",
"bmp" => "image/bmp",
"tiff" | "tif" => "image/tiff",
"webp" => "image/webp",
"svg" => "image/svg+xml",
"ico" => "image/x-icon",
"zip" => "application/zip",
"tar" => "application/x-tar",
"gz" => "application/gzip",
"bz2" => "application/x-bzip2",
"7z" => "application/x-7z-compressed",
"rar" => "application/x-rar-compressed",
"xz" => "application/x-xz",
"txt" => "text/plain",
"csv" => "text/csv",
"json" => "application/json",
"xml" => "text/xml",
"html" | "htm" => "text/html",
"css" => "text/css",
"md" => "text/markdown",
"mp3" => "audio/mpeg",
"wav" => "audio/wav",
"flac" => "audio/x-flac",
"ogg" => "audio/ogg",
"m4a" => "audio/x-m4a",
"mp4" => "video/mp4",
"avi" => "video/x-msvideo",
"mkv" => "video/x-matroska",
"webm" => "video/webm",
"mov" => "video/quicktime",
_ => return None,
};
let confidence = calculate_confidence(DetectionMethod::Extension);
Some((mime_type.to_string(), confidence))
}
}
impl Default for TypeDetector {
fn default() -> Self {
Self::new()
}
}
impl Detector for TypeDetector {
fn detect(&self, data: &[u8], filename: Option<&str>) -> DetectionResult {
if data.len() >= 4 && &data[0..4] == b"PK\x03\x04" {
if let Some(mime_type) = detect_openxml_type(data) {
return DetectionResult {
mime_type,
confidence: 0.95,
detected_by: DetectionMethod::MagicBytes,
};
}
}
if data.len() >= 8 && &data[0..8] == b"\xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1" {
if let Some((mime_type, confidence)) = detect_ole2_type(data) {
return DetectionResult {
mime_type,
confidence,
detected_by: DetectionMethod::MagicBytes,
};
}
}
if let Some((mime_type, confidence)) = self.check_magic_bytes(data) {
return DetectionResult {
mime_type,
confidence,
detected_by: DetectionMethod::MagicBytes,
};
}
if let Some((mime_type, confidence)) = self.analyze_content(data) {
return DetectionResult {
mime_type,
confidence,
detected_by: DetectionMethod::ContentAnalysis,
};
}
if let Some(filename) = filename {
let path = Path::new(filename);
if let Some((mime_type, confidence)) = self.detect_from_extension(path) {
return DetectionResult {
mime_type,
confidence,
detected_by: DetectionMethod::Extension,
};
}
}
DetectionResult {
mime_type: "application/octet-stream".to_string(),
confidence: 0.1,
detected_by: DetectionMethod::Unknown,
}
}
}