use std::collections::{HashMap, HashSet};
use anyhow::{anyhow, Result};
use globset::{Glob, GlobSet, GlobSetBuilder};
use crate::category::Category;
use crate::entry::Entry;
#[derive(Debug, Clone, Copy, PartialEq, Eq, clap::ValueEnum)]
pub enum EntryType {
File,
Dir,
}
#[derive(Debug, Default)]
pub struct Filter {
categories: HashSet<Category>,
type_: Option<EntryType>,
min_size: Option<u64>,
names: Option<GlobSet>,
changed_within_days: Option<u64>,
changed_before_days: Option<u64>,
}
pub struct FilterInputs {
pub categories: Vec<Category>,
pub type_: Option<EntryType>,
pub min_size: Option<String>,
pub names: Vec<String>,
pub changed_within: Option<String>,
pub changed_before: Option<String>,
}
impl Filter {
pub fn from_inputs(inputs: FilterInputs) -> Result<Self> {
let categories: HashSet<Category> = inputs.categories.into_iter().collect();
let names = if inputs.names.is_empty() {
None
} else {
let mut builder = GlobSetBuilder::new();
for pat in &inputs.names {
let glob = Glob::new(pat)
.map_err(|e| anyhow!("--name '{pat}' is not a valid glob: {e}"))?;
builder.add(glob);
}
Some(builder.build()?)
};
let min_size = inputs.min_size.as_deref().map(parse_size).transpose()?;
let changed_within_days = inputs
.changed_within
.as_deref()
.map(parse_duration_days)
.transpose()?;
let changed_before_days = inputs
.changed_before
.as_deref()
.map(parse_duration_days)
.transpose()?;
Ok(Self {
categories,
type_: inputs.type_,
min_size,
names,
changed_within_days,
changed_before_days,
})
}
pub fn is_empty(&self) -> bool {
self.categories.is_empty()
&& self.type_.is_none()
&& self.min_size.is_none()
&& self.names.is_none()
&& self.changed_within_days.is_none()
&& self.changed_before_days.is_none()
}
pub fn matches(&self, entry: &Entry) -> bool {
if !self.categories.is_empty() && !self.categories.contains(&entry.category) {
return false;
}
if let Some(t) = self.type_ {
match t {
EntryType::File if entry.is_dir() => return false,
EntryType::Dir if !entry.is_dir() => return false,
_ => {}
}
}
if let Some(min) = self.min_size {
if entry.size < min {
return false;
}
}
if let Some(globs) = &self.names {
if !globs.is_match(&entry.name) {
return false;
}
}
if let Some(threshold) = self.changed_within_days {
match entry.modified_days_ago {
Some(d) if d <= threshold => {}
_ => return false,
}
}
if let Some(threshold) = self.changed_before_days {
match entry.modified_days_ago {
Some(d) if d > threshold => {}
_ => return false,
}
}
true
}
}
pub type SubtreeMatch = HashMap<*const Entry, bool>;
pub fn precompute_subtree_match(entry: &Entry, filter: &Filter) -> SubtreeMatch {
let mut map: SubtreeMatch = HashMap::new();
walk_match(entry, filter, &mut map);
map
}
fn walk_match(entry: &Entry, filter: &Filter, map: &mut SubtreeMatch) -> bool {
let mut any = filter.matches(entry);
if let Some(children) = entry.children() {
for child in children {
let child_has = walk_match(child, filter, map);
any = any || child_has;
}
}
map.insert(entry as *const Entry, any);
any
}
pub fn subtree_visible(entry: &Entry, map: &SubtreeMatch) -> bool {
map.get(&(entry as *const Entry)).copied().unwrap_or(false)
}
pub fn parse_size(s: &str) -> Result<u64> {
let s = s.trim();
if s.is_empty() {
return Err(anyhow!("empty size"));
}
let split = s
.find(|c: char| !(c.is_ascii_digit() || c == '.'))
.unwrap_or(s.len());
let (num_str, unit) = s.split_at(split);
let num: f64 = num_str
.parse()
.map_err(|_| anyhow!("not a number: '{num_str}' in '{s}'"))?;
if !num.is_finite() {
return Err(anyhow!("not a finite number: '{s}'"));
}
if num.is_sign_negative() {
return Err(anyhow!("negative size: '{s}'"));
}
let mult = match unit.trim().to_ascii_uppercase().as_str() {
"" | "B" => 1u64,
"K" | "KB" | "KIB" => 1024,
"M" | "MB" | "MIB" => 1024 * 1024,
"G" | "GB" | "GIB" => 1024 * 1024 * 1024,
"T" | "TB" | "TIB" => 1024u64.pow(4),
other => return Err(anyhow!("unknown size unit: '{other}' in '{s}'")),
};
let bytes = (num * mult as f64).round();
if bytes >= u64::MAX as f64 {
return Err(anyhow!("size out of range (exceeds u64): '{s}'"));
}
Ok(bytes as u64)
}
pub fn parse_duration_days(s: &str) -> Result<u64> {
let s = s.trim();
if s.is_empty() {
return Err(anyhow!("empty duration"));
}
let split = s.find(|c: char| !c.is_ascii_digit()).unwrap_or(s.len());
let (num_str, unit) = s.split_at(split);
let num: u64 = num_str
.parse()
.map_err(|_| anyhow!("not an integer: '{num_str}' in '{s}'"))?;
let days_per_unit = match unit.trim().to_ascii_lowercase().as_str() {
"" | "d" => 1,
"w" => 7,
"m" => 30,
"y" => 365,
other => {
return Err(anyhow!(
"unknown duration unit: '{other}' in '{s}' (use d/w/m/y)"
))
}
};
num.checked_mul(days_per_unit)
.ok_or_else(|| anyhow!("duration overflow: '{s}'"))
}
#[cfg(test)]
mod tests {
use super::*;
fn dir(name: &str, cat: Category, children: Vec<Entry>) -> Entry {
Entry::dir(name.to_string(), cat, None, children)
}
fn file(name: &str, size: u64, cat: Category, days_ago: Option<u64>) -> Entry {
Entry::file(name.to_string(), size, cat, days_ago)
}
fn empty_inputs() -> FilterInputs {
FilterInputs {
categories: vec![],
type_: None,
min_size: None,
names: vec![],
changed_within: None,
changed_before: None,
}
}
#[test]
fn parse_size_1024_based() {
assert_eq!(parse_size("0").unwrap(), 0);
assert_eq!(parse_size("100").unwrap(), 100);
assert_eq!(parse_size("1K").unwrap(), 1024);
assert_eq!(parse_size("1KB").unwrap(), 1024);
assert_eq!(parse_size("1KiB").unwrap(), 1024);
assert_eq!(parse_size("1M").unwrap(), 1024 * 1024);
assert_eq!(parse_size("1.5M").unwrap(), (1.5 * 1024.0 * 1024.0) as u64);
assert_eq!(parse_size("1g").unwrap(), 1024u64.pow(3));
assert_eq!(parse_size("1T").unwrap(), 1024u64.pow(4));
}
#[test]
fn parse_size_rejects_garbage() {
assert!(parse_size("").is_err());
assert!(parse_size("xyz").is_err());
assert!(parse_size("1XB").is_err());
assert!(parse_size("-1M").is_err());
}
#[test]
fn parse_size_rejects_non_finite_and_overflow() {
assert!(parse_size("inf").is_err());
assert!(parse_size("nan").is_err());
assert!(parse_size("1e20").is_err());
assert!(parse_size("99999999999T").is_err());
assert!(parse_size("18446744073709551615").is_err());
}
#[test]
fn parse_duration_days_works() {
assert_eq!(parse_duration_days("7").unwrap(), 7);
assert_eq!(parse_duration_days("7d").unwrap(), 7);
assert_eq!(parse_duration_days("2w").unwrap(), 14);
assert_eq!(parse_duration_days("3m").unwrap(), 90);
assert_eq!(parse_duration_days("1y").unwrap(), 365);
assert_eq!(parse_duration_days("1Y").unwrap(), 365);
}
#[test]
fn parse_duration_rejects_unknown_suffix() {
assert!(parse_duration_days("7h").is_err());
assert!(parse_duration_days("").is_err());
assert!(parse_duration_days("abc").is_err());
}
#[test]
fn empty_filter_matches_everything() {
let f = Filter::from_inputs(empty_inputs()).unwrap();
assert!(f.is_empty());
let e = file("a.txt", 10, Category::Other, Some(0));
assert!(f.matches(&e));
}
#[test]
fn category_and_type_filters_combine_with_and() {
let inputs = FilterInputs {
categories: vec![Category::Cache],
type_: Some(EntryType::File),
..empty_inputs()
};
let f = Filter::from_inputs(inputs).unwrap();
assert!(f.matches(&file("a", 0, Category::Cache, None)));
assert!(!f.matches(&file("a", 0, Category::Build, None))); assert!(!f.matches(&dir("a", Category::Cache, vec![]))); }
#[test]
fn min_size_filter() {
let inputs = FilterInputs {
min_size: Some("1K".into()),
..empty_inputs()
};
let f = Filter::from_inputs(inputs).unwrap();
assert!(f.matches(&file("a", 1024, Category::Other, None)));
assert!(f.matches(&file("a", 2048, Category::Other, None)));
assert!(!f.matches(&file("a", 1023, Category::Other, None)));
}
#[test]
fn name_filter_combines_globs_with_or() {
let inputs = FilterInputs {
names: vec!["*.log".into(), "*.tmp".into()],
..empty_inputs()
};
let f = Filter::from_inputs(inputs).unwrap();
assert!(f.matches(&file("server.log", 0, Category::Log, None)));
assert!(f.matches(&file("scratch.tmp", 0, Category::Other, None)));
assert!(!f.matches(&file("README.md", 0, Category::Other, None)));
}
#[test]
fn mtime_filters() {
let within = Filter::from_inputs(FilterInputs {
changed_within: Some("7d".into()),
..empty_inputs()
})
.unwrap();
let before = Filter::from_inputs(FilterInputs {
changed_before: Some("30d".into()),
..empty_inputs()
})
.unwrap();
let recent = file("a", 0, Category::Other, Some(3));
let mid = file("a", 0, Category::Other, Some(15));
let stale = file("a", 0, Category::Other, Some(60));
assert!(within.matches(&recent));
assert!(!within.matches(&mid));
assert!(!within.matches(&stale));
assert!(!before.matches(&recent));
assert!(!before.matches(&mid));
assert!(before.matches(&stale));
}
#[test]
fn precompute_subtree_match_keeps_ancestors_of_matches() {
let tree = dir(
"root",
Category::Other,
vec![
dir(
"src",
Category::Other,
vec![file("main.rs", 10, Category::Other, None)],
),
dir(
"target",
Category::Build,
vec![dir(
"debug",
Category::Build,
vec![file("app", 100, Category::Build, None)],
)],
),
file("notes.txt", 5, Category::Other, None),
],
);
let f = Filter::from_inputs(FilterInputs {
names: vec!["*.rs".into()],
..empty_inputs()
})
.unwrap();
let map = precompute_subtree_match(&tree, &f);
let root_ptr = &tree as *const Entry;
assert!(*map.get(&root_ptr).unwrap());
let src = tree
.children()
.unwrap()
.iter()
.find(|c| c.name == "src")
.unwrap();
assert!(*map.get(&(src as *const Entry)).unwrap());
let main_rs = &src.children().unwrap()[0];
assert!(*map.get(&(main_rs as *const Entry)).unwrap());
let target = tree
.children()
.unwrap()
.iter()
.find(|c| c.name == "target")
.unwrap();
assert!(!*map.get(&(target as *const Entry)).unwrap());
let notes = tree
.children()
.unwrap()
.iter()
.find(|c| c.name == "notes.txt")
.unwrap();
assert!(!*map.get(&(notes as *const Entry)).unwrap());
}
}