use std::collections::{BTreeSet, HashMap};
use std::fs::File;
use std::io::{self, Read, Seek, SeekFrom};
use std::path::Path;
use std::str::FromStr;
use cfg_if::cfg_if;
use itertools::{Either, Itertools};
use log::{debug, error};
use mime::Mime;
use mime_guess::from_ext;
use once_cell::sync::Lazy;
use parking_lot::RwLock;
use walkdir::{DirEntry, WalkDir};
use crate::findings::{Findings, ScanError};
use crate::mime_db::MimeDb;
use crate::parameters::ScanOpts;
use crate::utils::APPLICATION_ZIP;
use crate::{String, MIMEDB};
static MIMEXT: Lazy<RwLock<HashMap<String, Option<Vec<String>>>>> = Lazy::new(|| RwLock::new(HashMap::new()));
pub const INITIAL_BUF_SIZE: usize = 128;
pub const BUF_SIZE: usize = 8192;
static APPLICATION_X_OLE_STORAGE: Lazy<Mime> = Lazy::new(|| Mime::from_str("application/x-ole-storage").unwrap());
cfg_if! {
if #[cfg(windows)] {
pub fn is_hidden(entry: &DirEntry) -> bool {
use std::os::windows::prelude::*;
const FILE_ATTRIBUTE_HIDDEN: u32 = 0x2; std::fs::metadata(entry.path()) .map_or(
false, |f| f.file_attributes() & FILE_ATTRIBUTE_HIDDEN > 0,
)
}
} else {
pub fn is_hidden(entry: &DirEntry) -> bool {
entry
.file_name()
.to_str()
.map_or(false, |f| f.starts_with('.') && f != ".")
}
}
}
pub fn wanted_file(
entry: &DirEntry,
exts: Option<&BTreeSet<&str>>,
exclude: Option<&BTreeSet<&str>>,
scan_opts: &ScanOpts,
) -> bool {
if entry.depth() == 0 {
return true;
}
if !scan_opts.hidden && is_hidden(entry) {
return false;
}
if entry.file_type().is_dir() {
return true;
}
if let Some(ext) = entry.path().extension() {
let ext = ext.to_string_lossy().to_lowercase();
let ext = ext.as_str();
if scan_opts.ignore_unknown_exts && from_ext(ext).is_empty() {
return false;
}
if let Some(exts) = exts {
exts.contains(&ext)
} else {
exclude.map_or(true, |exclude| !exclude.contains(&ext))
}
} else {
scan_opts.extensionless
}
}
pub fn scan_file(entry: &DirEntry, canonical_paths: bool) -> Result<Findings, ScanError> {
let path = entry.path();
let result = match mime_type(&*MIMEDB, path) {
Err(_) => return Err(ScanError::File(path)),
Ok(None) => return Err(ScanError::Mime(path)),
Ok(Some(result)) => result,
};
let valid = if let Some(entry_ext) = path.extension() {
let entry_ext = entry_ext.to_string_lossy().to_lowercase();
if ["bak", "backup", "filepart", "part", "crdownload"]
.iter()
.any(|ext| ext == &entry_ext)
{
true
} else {
let known_exts = mime_extension_lookup(result.essence_str().into());
match known_exts {
Some(e) => e.contains(&entry_ext.into()),
None => false,
}
}
} else {
false
};
let path = if canonical_paths {
match std::fs::canonicalize(path) {
Ok(path) => path,
Err(_) => return Err(ScanError::File(entry.path())),
}
} else {
path.to_path_buf() };
Ok(Findings {
file: path,
valid,
mime: result,
})
}
pub fn scan_from_walkdir(
entries: &[DirEntry],
canonical_paths: bool,
use_threads: bool,
) -> (Vec<Findings>, Vec<ScanError>) {
cfg_if! {
if #[cfg(feature = "multi-threaded")] {
use rayon::prelude::*;
const CHUNKS: usize = 32;
if use_threads && entries.len() > CHUNKS {
return entries
.par_chunks(CHUNKS)
.flat_map_iter(|chunk| {
chunk
.iter() .map(|entry| scan_file(entry, canonical_paths))
.collect::<Vec<_>>()
}).partition_map(|result| match result {
Ok(f) => Either::Left(f),
Err(e) => Either::Right(e),
});
}
} else {
assert!(!use_threads)
}
}
entries
.iter()
.partition_map(|entry: &DirEntry| match scan_file(entry, canonical_paths) {
Ok(f) => Either::Left(f),
Err(e) => Either::Right(e),
})
}
pub fn scan_directory(
dirs: &Path,
exts: Option<&BTreeSet<&str>>,
exclude: Option<&BTreeSet<&str>>,
scan_opts: &ScanOpts,
) -> Option<Vec<DirEntry>> {
let stepper = WalkDir::new(dirs).follow_links(scan_opts.follow_symlinks).into_iter();
let mut probably_fatal_error = false;
let entries: Vec<DirEntry> = stepper
.filter_entry(|e| wanted_file(e, exts, exclude, scan_opts)) .filter_map(|e| {
if let Err(err) = &e {
debug!("uh oh spaghettio!! {:#?}", e);
let path = err.path().map_or("General error".into(), Path::to_string_lossy);
if err.depth() == 0 {
probably_fatal_error = true;
}
error!("{}: {}", path, err.io_error().map_or(err.to_string(), |e| e.to_string()));
return None;
}
e.ok()
})
.filter(|e| !e.file_type().is_dir())
.filter(|e| scan_opts.follow_symlinks || !e.file_type().is_symlink())
.collect();
if probably_fatal_error {
None
} else {
Some(entries)
}
}
pub fn mime_type<T: MimeDb>(db: &T, path: &Path) -> io::Result<Option<Mime>> {
let mut buffer = [0; INITIAL_BUF_SIZE];
let mut file = File::open(path)?;
let mut read = io::Result::Ok(0);
for _ in 0..3 {
file.seek(SeekFrom::Start(0))?;
read = file.read(&mut buffer);
match read {
Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
Err(_) | Ok(_) => break,
}
}
let read = read?;
let r = db.get_type(&buffer);
if read < INITIAL_BUF_SIZE {
return Ok(r);
}
let r = r.filter(|mime|
mime != &mime::TEXT_XML
&& mime != &*APPLICATION_ZIP
&& mime != &*APPLICATION_X_OLE_STORAGE);
if r.is_some() {
return Ok(r);
}
let mut buffer = [0; BUF_SIZE];
file.seek(SeekFrom::Start(0))?;
file.read(&mut buffer)?;
Ok(db.get_type(&buffer))
}
pub fn mime_extension_lookup(essence: String) -> Option<Vec<String>> {
if let Some(exts) = MIMEXT.read().get(&essence) {
return exts.clone();
}
let mut exts = mime_guess::get_mime_extensions_str(essence.as_str());
if exts.is_none() {
if essence.contains("/x-") {
exts = mime_guess::get_mime_extensions_str(&essence.replace("/x-", "/"));
} else {
exts = mime_guess::get_mime_extensions_str(&essence.replace('/', "/x-"));
}
}
let exts = match exts {
Some(exts) => {
let possible_exts: Vec<String> = exts.iter().map(|e| String::from(*e)).collect();
Some(if essence == mime::IMAGE_JPEG.essence_str() {
[vec![String::from("jpg")], possible_exts].concat()
} else if essence == mime::TEXT_XML.essence_str() || essence == "application/xml" {
[vec![String::from("xml"), String::from("svg")], possible_exts].concat()
} else if essence == "application/msword" {
vec![String::from("doc"), String::from("xls"), String::from("ppt")]
} else if essence == "application/zip" {
[
vec![
String::from("zip"),
String::from("docx"),
String::from("xlsx"),
String::from("pptx"),
String::from("apk"),
String::from("ipa"),
String::from("docbook"),
String::from("kdenlive"),
String::from("vcpkg"),
String::from("nupkg"),
String::from("whl"),
String::from("xpi"),
],
possible_exts,
]
.concat()
} else if essence == "application/x-ms-dos-executable" {
[
vec![
String::from("exe"),
String::from("dll"),
String::from("scr"),
String::from("com"),
String::from("dll16"),
String::from("drv"),
String::from("drv16"),
String::from("cpl"),
String::from("msstyles"),
String::from("sys"),
],
possible_exts,
]
.concat()
} else {
possible_exts
})
}
None => None,
};
MIMEXT.write().insert(essence, exts.clone());
exts
}