use std::io::{Read, Seek};
use std::path::Path;
use std::fs::File;
pub mod ole2;
pub mod ooxml;
pub mod odf;
pub mod iwork;
pub mod utils;
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum FileFormat {
Doc,
Docx,
Ppt,
Pptx,
Xls,
Xlsx,
Xlsb,
Pages,
Keynote,
Numbers,
Odt,
Ods,
Odp,
}
pub fn detect_file_format<P: AsRef<Path>>(path: P) -> Option<FileFormat> {
let mut file = File::open(path).ok()?;
detect_format_from_reader(&mut file)
}
pub fn detect_file_format_from_bytes(bytes: &[u8]) -> Option<FileFormat> {
if bytes.len() < 4 {
return None;
}
if bytes.len() >= 8 && &bytes[0..8] == utils::OLE2_SIGNATURE {
return ole2::detect_ole2_format(bytes);
}
if &bytes[0..4] == utils::ZIP_SIGNATURE {
return ooxml::detect_zip_format(bytes);
}
if let Some(result) = iwork::detect_iwork_format(bytes) {
return Some(result);
}
None
}
pub fn detect_format_from_reader<R: Read + Seek>(reader: &mut R) -> Option<FileFormat> {
let mut header = [0u8; 8];
if reader.read_exact(&mut header).is_err() {
return None;
}
let _ = reader.seek(std::io::SeekFrom::Start(0));
if &header[0..8] == utils::OLE2_SIGNATURE {
return ole2::detect_ole2_format_from_reader(reader);
}
if &header[0..4] == utils::ZIP_SIGNATURE {
let _ = reader.seek(std::io::SeekFrom::Start(0));
if let Ok(mut zip_archive) = zip::ZipArchive::new(&mut *reader) {
let has_iwa_files = (0..zip_archive.len()).any(|i| {
zip_archive.by_index(i).ok()
.map(|file| file.name().ends_with(".iwa"))
.unwrap_or(false)
});
if has_iwa_files {
if let Some(result) = iwork::detect_application_from_zip_archive(&mut zip_archive) {
return Some(result);
}
return None;
}
}
let _ = reader.seek(std::io::SeekFrom::Start(0));
if let Some(result) = ooxml::detect_zip_format_from_reader(reader) {
return Some(result);
}
let _ = reader.seek(std::io::SeekFrom::Start(0));
if let Some(result) = odf::detect_odf_format_from_reader(reader) {
return Some(result);
}
return None;
}
if let Some(result) = iwork::detect_iwork_format_from_reader(reader) {
return Some(result);
}
None
}
pub fn detect_iwork_format_from_path<P: AsRef<Path>>(path: P) -> Option<FileFormat> {
iwork::detect_iwork_format_from_path(path)
}
#[cfg(test)]
mod tests {
use super::*;
use std::io::Cursor;
#[test]
fn test_detect_docx_from_bytes() {
let zip_data = create_minimal_docx_zip();
let format = detect_file_format_from_bytes(&zip_data);
assert!(format.is_some());
assert_eq!(format.unwrap(), FileFormat::Docx);
}
#[test]
fn test_detect_ole2_from_bytes() {
let ole2_data = utils::OLE2_SIGNATURE.to_vec();
let format = detect_file_format_from_bytes(&ole2_data);
assert!(format.is_some());
}
#[test]
fn test_detect_iwork_pages() {
let mock_path = std::path::Path::new("test.pages");
let format = detect_iwork_format_from_path(mock_path);
assert!(format.is_none());
}
fn create_minimal_docx_zip() -> Vec<u8> {
use std::io::Write;
let mut buffer = Vec::new();
{
let mut zip = zip::ZipWriter::new(Cursor::new(&mut buffer));
let options = zip::write::SimpleFileOptions::default();
zip.start_file("[Content_Types].xml", options).unwrap();
zip.write_all(b"<Types><Default Extension=\"xml\" ContentType=\"application/xml\"/></Types>").unwrap();
zip.start_file("word/document.xml", options).unwrap();
zip.write_all(b"<document><body><p>Hello</p></body></document>").unwrap();
zip.finish().unwrap();
}
buffer
}
}