use std::io::{BufRead, BufReader, Error, ErrorKind, Read, Result, Seek, SeekFrom};
trait AdvancedRead: Read + Seek {
#[inline]
fn read_le_u16(&mut self) -> Result<u16> {
let mut buffer = [0; 2];
self.read_exact(&mut buffer)?;
Ok(u16::from_le_bytes(buffer))
}
#[inline]
fn read_le_u32(&mut self) -> Result<u32> {
let mut buffer = [0; 4];
self.read_exact(&mut buffer)?;
Ok(u32::from_le_bytes(buffer))
}
#[inline]
fn contains(&mut self, bytes: &[u8], size: u64) -> Result<bool> {
self.rewind()?;
Ok(BufReader::new(self.take(size))
.fill_buf()?
.windows(bytes.len())
.any(|window| window == bytes))
}
}
impl<R: Read + Seek + ?Sized> AdvancedRead for R {}
impl crate::FileFormat {
#[cfg(feature = "cfb")]
pub(crate) fn from_cfb<R: Read + Seek>(reader: &mut BufReader<R>) -> Result<Self> {
let file = cfb::CompoundFile::open(reader)?;
Ok(match file.root_entry().clsid().to_string().as_str() {
"00020810-0000-0000-c000-000000000046" => Self::MicrosoftExcelSpreadsheet,
"00020820-0000-0000-c000-000000000046" => Self::MicrosoftExcelSpreadsheet,
"64818d10-4f9b-11cf-86ea-00aa00b929e8" => Self::MicrosoftPowerpointPresentation,
"74b78f3a-c8c8-11d1-be11-00c04fb6faf1" => Self::MicrosoftProjectPlan,
"00021201-0000-0000-00c0-000000000046" => Self::MicrosoftPublisherDocument,
"000c1084-0000-0000-c000-000000000046" => Self::MicrosoftSoftwareInstaller,
"00021a14-0000-0000-c000-000000000046" => Self::MicrosoftVisioDrawing,
"00020906-0000-0000-c000-000000000046" => Self::MicrosoftWordDocument,
_ => Self::CompoundFileBinary,
})
}
pub(crate) fn from_mkv<R: Read + Seek>(reader: &mut BufReader<R>) -> Result<Self> {
const SEARCH_LIMIT: u64 = match cfg!(feature = "accuracy") {
true => 4096,
false => 1024,
};
Ok(if reader.contains(b"webm", SEARCH_LIMIT)? {
Self::Webm
} else {
Self::MatroskaVideo
})
}
pub(crate) fn from_ms_dos_exe<R: Read + Seek>(reader: &mut BufReader<R>) -> Result<Self> {
reader.seek(SeekFrom::Start(0x3C))?;
let address = reader.read_le_u32()?;
reader.seek(SeekFrom::Start(address as u64))?;
if reader.read_le_u32()? == 0x00004550 {
reader.seek(SeekFrom::Current(0x12))?;
return Ok(if reader.read_le_u16()? & 0x2000 == 0x2000 {
Self::DynamicLinkLibrary
} else {
Self::PortableExecutable
});
}
Ok(Self::MsDosExecutable)
}
pub(crate) fn from_pdf<R: Read + Seek>(reader: &mut BufReader<R>) -> Result<Self> {
const SEARCH_LIMIT: u64 = match cfg!(feature = "accuracy") {
true => 4_194_304,
false => 1_048_576,
};
Ok(if reader.contains(b"AIPrivateData", SEARCH_LIMIT)? {
Self::AdobeIllustratorArtwork
} else {
Self::PortableDocumentFormat
})
}
pub(crate) fn from_plain_text<R: Read + Seek>(reader: &mut BufReader<R>) -> Result<Self> {
const READ_LIMIT: u64 = match cfg!(feature = "accuracy") {
true => 8_388_608,
false => 1_048_576,
};
const LINE_LIMIT: usize = match cfg!(feature = "accuracy") {
true => 256,
false => 32,
};
reader
.take(READ_LIMIT)
.lines()
.take(LINE_LIMIT)
.try_for_each(|line| {
line?
.chars()
.find(|char| char.is_control() && !char.is_whitespace())
.map(|_| Err(Error::new(ErrorKind::InvalidData, "Invalid chars")))
.unwrap_or(Ok(()))
})
.map(|_| Self::PlainText)
}
pub(crate) fn from_xml<R: Read + Seek>(reader: &mut BufReader<R>) -> Result<Self> {
const SEARCH_LIMIT: u64 = match cfg!(feature = "accuracy") {
true => 1024,
false => 256,
};
Ok(if reader.contains(b"<xsl", SEARCH_LIMIT)? {
Self::ExtensibleStylesheetLanguageTransformations
} else if reader.contains(b"<gml", SEARCH_LIMIT)? {
Self::GeographyMarkupLanguage
} else if reader.contains(b"<kml", SEARCH_LIMIT)? {
Self::KeyholeMarkupLanguage
} else if reader.contains(b"<score-partwise", SEARCH_LIMIT)? {
Self::Musicxml
} else if reader.contains(b"<rss", SEARCH_LIMIT)? {
Self::ReallySimpleSyndication
} else if reader.contains(b"<svg", SEARCH_LIMIT)? {
Self::ScalableVectorGraphics
} else if reader.contains(b"<soap", SEARCH_LIMIT)? {
Self::SimpleObjectAccessProtocol
} else {
Self::ExtensibleMarkupLanguage
})
}
#[cfg(feature = "zip")]
pub(crate) fn from_zip<R: Read + Seek>(reader: &mut BufReader<R>) -> Result<Self> {
const FILE_LIMIT: usize = match cfg!(feature = "accuracy") {
true => 4096,
false => 1024,
};
let mut archive = zip::ZipArchive::new(reader)?;
let mut format = Self::Zip;
for index in 0..std::cmp::min(archive.len(), FILE_LIMIT) {
let file = archive.by_index(index)?;
match file.name() {
"AndroidManifest.xml" => return Ok(Self::AndroidPackage),
"AppManifest.xaml" => return Ok(Self::Xap),
"AppxManifest.xml" => return Ok(Self::WindowsAppPackage),
"META-INF/MANIFEST.MF" => format = Self::JavaArchive,
"META-INF/application.xml" => return Ok(Self::EnterpriseApplicationArchive),
"META-INF/mozilla.rsa" => return Ok(Self::Xpinstall),
"WEB-INF/web.xml" => return Ok(Self::WebApplicationArchive),
"doc.kml" => return Ok(Self::KeyholeMarkupLanguageZipped),
"extension.vsixmanifest" => return Ok(Self::MicrosoftVisualStudioExtension),
"mimetype" => match std::io::read_to_string(file)?.trim() {
"application/epub+zip" => return Ok(Self::ElectronicPublication),
"application/vnd.oasis.opendocument.graphics" => {
return Ok(Self::OpenDocumentGraphics)
}
"application/vnd.oasis.opendocument.presentation" => {
return Ok(Self::OpenDocumentPresentation);
}
"application/vnd.oasis.opendocument.spreadsheet" => {
return Ok(Self::OpenDocumentSpreadsheet);
}
"application/vnd.oasis.opendocument.text" => {
return Ok(Self::OpenDocumentText);
}
"application/vnd.recordare.musicxml" => return Ok(Self::MusicxmlZipped),
"image/openraster" => return Ok(Self::Openraster),
_ => {}
},
_ => {
if file.name().starts_with("circuitdiagram/") {
return Ok(Self::CircuitDiagramDocument);
} else if file.name().starts_with("dwf/") {
return Ok(Self::DesignWebFormatXps);
} else if file.name().starts_with("word/") {
return Ok(Self::OfficeOpenXmlDocument);
} else if file.name().starts_with("visio/") {
return Ok(Self::OfficeOpenXmlDrawing);
} else if file.name().starts_with("ppt/") {
return Ok(Self::OfficeOpenXmlPresentation);
} else if file.name().starts_with("xl/") {
return Ok(Self::OfficeOpenXmlSpreadsheet);
} else if file.name().starts_with("3D/") && file.name().ends_with(".model") {
return Ok(Self::ThreeDimensionalManufacturingFormat);
} else if file.name().starts_with("Payload/") && file.name().contains(".app/") {
return Ok(Self::IosAppStorePackage);
}
}
}
}
Ok(format)
}
}