#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum FileType {
Csv,
Excel,
}
impl FileType {
#[must_use]
pub const fn name(&self) -> &'static str {
match self {
Self::Csv => "CSV",
Self::Excel => "Excel",
}
}
}
mod magic {
pub const ZIP: &[u8] = &[0x50, 0x4B, 0x03, 0x04];
pub const OLE2: &[u8] = &[0xD0, 0xCF, 0x11, 0xE0, 0xA1, 0xB1, 0x1A, 0xE1];
}
#[must_use]
pub fn detect_file_type(content: &[u8], filename: &str) -> Option<FileType> {
if let Some(file_type) = detect_from_magic_bytes(content) {
return Some(file_type);
}
detect_from_extension(filename)
}
fn detect_from_magic_bytes(content: &[u8]) -> Option<FileType> {
if content.len() < 4 {
return None;
}
if content.starts_with(magic::ZIP) {
return Some(FileType::Excel);
}
if content.len() >= 8 && content.starts_with(magic::OLE2) {
return Some(FileType::Excel);
}
None
}
fn detect_from_extension(filename: &str) -> Option<FileType> {
let extension = filename
.rsplit('.')
.next()?
.to_ascii_lowercase();
match extension.as_str() {
"csv" | "tsv" | "txt" | "dat" | "tab" => Some(FileType::Csv),
"xlsx" | "xls" | "xlsm" | "xlsb" | "ods" => Some(FileType::Excel),
_ => None,
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_detect_xlsx_from_magic() {
let content = [0x50, 0x4B, 0x03, 0x04, 0x00, 0x00];
assert_eq!(detect_file_type(&content, "unknown"), Some(FileType::Excel));
}
#[test]
fn test_detect_xls_from_magic() {
let content = [0xD0, 0xCF, 0x11, 0xE0, 0xA1, 0xB1, 0x1A, 0xE1];
assert_eq!(detect_file_type(&content, "unknown"), Some(FileType::Excel));
}
#[test]
fn test_detect_csv_from_extension() {
let content = b"a,b,c\n1,2,3";
assert_eq!(detect_file_type(content, "data.csv"), Some(FileType::Csv));
assert_eq!(detect_file_type(content, "data.CSV"), Some(FileType::Csv));
assert_eq!(detect_file_type(content, "data.tsv"), Some(FileType::Csv));
assert_eq!(detect_file_type(content, "data.txt"), Some(FileType::Csv));
}
#[test]
fn test_detect_excel_from_extension() {
let content = b"not actually excel content";
assert_eq!(detect_file_type(content, "data.xlsx"), Some(FileType::Excel));
assert_eq!(detect_file_type(content, "data.xls"), Some(FileType::Excel));
assert_eq!(detect_file_type(content, "data.xlsm"), Some(FileType::Excel));
assert_eq!(detect_file_type(content, "data.ods"), Some(FileType::Excel));
}
#[test]
fn test_unsupported_extension() {
let content = b"some content";
assert_eq!(detect_file_type(content, "data.json"), None);
assert_eq!(detect_file_type(content, "data.xml"), None);
assert_eq!(detect_file_type(content, "data.parquet"), None);
}
#[test]
fn test_magic_bytes_override_extension() {
let content = [0x50, 0x4B, 0x03, 0x04, 0x00, 0x00];
assert_eq!(detect_file_type(&content, "data.csv"), Some(FileType::Excel));
}
}