use std::borrow::Cow;
use std::collections::HashSet;
use std::ffi::OsString;
use std::path::PathBuf;
use globset::Glob;
use globset::GlobSet;
use globset::GlobSetBuilder;
use rayon::iter::IntoParallelIterator;
use rayon::iter::ParallelIterator;
use walkdir::WalkDir;
use crate::Database;
use crate::error::DatabaseError;
use crate::exclusion::Exclusion;
use crate::file::File;
use crate::file::FileType;
use crate::utils::read_file;
pub struct DatabaseLoader {
database: Option<Database>,
workspace: PathBuf,
paths: Vec<PathBuf>,
includes: Vec<PathBuf>,
excludes: Vec<Exclusion>,
memory_sources: Vec<(&'static str, &'static str, FileType)>,
extensions: Vec<String>,
}
impl DatabaseLoader {
#[allow(clippy::too_many_arguments)]
pub fn new(
workspace: PathBuf,
paths: Vec<PathBuf>,
includes: Vec<PathBuf>,
excludes: Vec<Exclusion>,
extensions: Vec<String>,
) -> Self {
Self { workspace, paths, includes, excludes, memory_sources: vec![], extensions, database: None }
}
pub fn add_memory_source(&mut self, name: &'static str, contents: &'static str, file_type: FileType) {
self.memory_sources.push((name, contents, file_type));
}
pub fn load(mut self) -> Result<Database, DatabaseError> {
let mut db = if let Some(existing_db) = self.database.take() { existing_db } else { Database::new() };
let extensions_set: HashSet<OsString> = self.extensions.iter().map(OsString::from).collect();
let mut glob_builder = GlobSetBuilder::new();
for ex in &self.excludes {
if let Exclusion::Pattern(pat) = ex {
glob_builder.add(Glob::new(pat)?);
}
}
let glob_excludes = glob_builder.build()?;
let host_files = self.load_paths(&self.paths, FileType::Host, &extensions_set, &glob_excludes)?;
let vendored_files = self.load_paths(&self.includes, FileType::Vendored, &extensions_set, &glob_excludes)?;
for file in host_files.into_iter().chain(vendored_files.into_iter()) {
db.add(file);
}
for (name, contents, file_type) in self.memory_sources {
let file = File::new(Cow::Borrowed(name), file_type, None, Cow::Borrowed(contents));
db.add(file);
}
Ok(db)
}
fn load_paths(
&self,
roots: &[PathBuf],
file_type: FileType,
extensions: &HashSet<OsString>,
glob_excludes: &GlobSet,
) -> Result<Vec<File>, DatabaseError> {
let path_excludes: HashSet<_> = self
.excludes
.iter()
.filter_map(|ex| match ex {
Exclusion::Path(p) => p.canonicalize().ok(),
_ => None,
})
.collect();
let mut paths_to_process = Vec::new();
for root in roots {
for entry in WalkDir::new(root).into_iter().filter_map(Result::ok) {
if entry.file_type().is_file() {
paths_to_process.push(entry.into_path());
}
}
}
let files: Vec<File> = paths_to_process
.into_par_iter() .filter_map(|path| {
if glob_excludes.is_match(&path) {
return None;
}
if let Ok(p) = path.canonicalize()
&& path_excludes.contains(&p)
{
return None;
}
if let Some(ext) = path.extension() {
if !extensions.contains(ext) {
return None;
}
} else {
return None;
}
match read_file(&self.workspace, &path, file_type) {
Ok(file) => Some(Ok(file)),
Err(e) => Some(Err(e)),
}
})
.collect::<Result<Vec<File>, _>>()?;
Ok(files)
}
}