use std::{
io,
path::{Path, PathBuf},
sync::mpsc,
};
use ignore::WalkBuilder;
use regex::Regex;
use serde::{Deserialize, Serialize};
#[derive(Debug)]
pub struct Collector {
pub folder: PathBuf,
config: Config,
}
#[derive(Default, Clone, Debug, Serialize, Deserialize)]
pub struct Config {
#[serde(with = "serde_regex", default)]
pub includes: Vec<Regex>,
#[serde(with = "serde_regex", default)]
pub excludes: Vec<Regex>,
}
impl Collector {
pub fn new(folder: &Path) -> io::Result<Self> {
Ok(Self {
folder: folder.canonicalize()?,
config: Config::default(),
})
}
pub fn from_config(folder: &Path, config: &Config) -> io::Result<Self> {
Ok(Self {
folder: folder.canonicalize()?,
config: config.clone(),
})
}
fn should_exclude(&self, path: &Path) -> bool {
let path = path
.strip_prefix(&self.folder)
.unwrap()
.display()
.to_string();
for exclude in &self.config.excludes {
if exclude.is_match(&path) {
tracing::trace!("file excluded from configurations");
return true;
}
}
false
}
fn should_include(&self, path: &Path) -> bool {
let path = path
.strip_prefix(&self.folder)
.unwrap()
.display()
.to_string();
if self.config.includes.is_empty() {
return true;
}
for include in &self.config.includes {
if include.is_match(&path) {
tracing::trace!("file excluded from configurations");
return true;
}
}
tracing::debug!("file should not be included");
false
}
#[must_use]
pub fn collect_files(&self) -> Vec<PathBuf> {
let (tx, rx) = mpsc::channel();
WalkBuilder::new(&self.folder)
.build_parallel()
.run(move || {
let tx = tx.clone();
Box::new(move |result| {
result.map_or_else(
|err| {
tracing::error!(err = %err,"dir entry error ");
},
|entry| {
if entry.path().is_file() {
let path = entry.path().to_owned();
if !self.should_exclude(path.as_path()) && self.should_include(path.as_path()){
if let Err(err) = tx.send(path.clone()) {
tracing::error!(err = %err,path = %path.display(),"error sending path to tx ");
}
}
}
},
);
ignore::WalkState::Continue
})
});
rx.into_iter().collect::<Vec<_>>()
}
}