pub mod cache;
pub mod types;
pub mod variant;
use std::path::{Path, PathBuf};
use anyhow::Context;
use include_dir::{Dir, DirEntry, include_dir};
use types::{CommandPattern, FilterConfig};
static STDLIB: Dir<'static> = include_dir!("$CARGO_MANIFEST_DIR/filters");
pub const STDLIB_PRIORITY: u8 = u8::MAX;
pub fn get_embedded_filter(relative_path: &Path) -> Option<&'static str> {
STDLIB.get_file(relative_path)?.contents_utf8()
}
pub fn get_embedded_dir_files(dir_path: &Path) -> Vec<(PathBuf, &'static str)> {
let Some(dir) = STDLIB.get_dir(dir_path) else {
return Vec::new();
};
dir.files()
.filter_map(|f| Some((f.path().to_path_buf(), f.contents_utf8()?)))
.collect()
}
pub fn default_search_dirs() -> Vec<PathBuf> {
let mut dirs = Vec::new();
if let Ok(cwd) = std::env::current_dir() {
dirs.push(cwd.join(".tokf/filters"));
}
if let Some(user) = crate::paths::user_dir() {
dirs.push(user.join("filters"));
}
dirs
}
pub fn try_load_filter(path: &Path) -> anyhow::Result<Option<FilterConfig>> {
let content = match std::fs::read_to_string(path) {
Ok(c) => c,
Err(e) if e.kind() == std::io::ErrorKind::NotFound => return Ok(None),
Err(e) => {
return Err(anyhow::Error::new(e)
.context(format!("failed to read filter file: {}", path.display())));
}
};
let config: FilterConfig = toml::from_str(&content)
.with_context(|| format!("failed to parse filter file: {}", path.display()))?;
Ok(Some(config))
}
pub fn pattern_specificity(pattern: &str) -> usize {
pattern.split_whitespace().filter(|w| *w != "*").count()
}
fn extract_basename(word: &str) -> &str {
word.rfind(['/', '\\']).map_or(word, |pos| &word[pos + 1..])
}
fn skip_flags_to_match(words: &[&str], target: &str) -> Option<usize> {
let mut i = 0;
while i < words.len() {
if words[i] == target {
return Some(i + 1);
}
if words[i].starts_with('-') {
if words[i].contains('=') {
i += 1;
} else {
i += 1;
if i < words.len() && !words[i].starts_with('-') && words[i] != target {
i += 1;
}
}
} else {
return None;
}
}
None
}
pub fn pattern_matches_prefix(pattern: &str, words: &[&str]) -> Option<usize> {
let pattern_words: Vec<&str> = pattern.split_whitespace().collect();
if pattern_words.is_empty() || words.is_empty() {
return None;
}
let mut word_idx = 0;
let mut wildcard_consumed = 0;
for (pat_idx, pword) in pattern_words.iter().enumerate() {
if word_idx >= words.len() {
return None;
}
if *pword == "*" {
if words[word_idx].is_empty() {
return None;
}
word_idx += 1;
wildcard_consumed += 1;
} else {
let word_to_match = if pat_idx == 0 {
extract_basename(words[word_idx])
} else {
words[word_idx]
};
let pword_to_match = if pat_idx == 0 {
extract_basename(pword)
} else {
pword
};
if word_to_match == pword_to_match {
word_idx += 1;
} else if pat_idx > 0 {
if let Some(advance) = skip_flags_to_match(&words[word_idx..], pword) {
word_idx += advance;
} else {
return None;
}
} else {
return None;
}
}
}
Some(word_idx - wildcard_consumed)
}
pub fn discover_filter_files(dir: &Path) -> Vec<PathBuf> {
let mut files = Vec::new();
collect_filter_files(dir, &mut files);
files.sort();
files
}
fn collect_filter_files(dir: &Path, files: &mut Vec<PathBuf>) {
let Ok(entries) = std::fs::read_dir(dir) else {
return;
};
let mut entries: Vec<_> = entries.filter_map(Result::ok).collect();
entries.sort_by_key(std::fs::DirEntry::file_name);
for entry in entries {
let path = entry.path();
let name = entry.file_name();
let name_str = name.to_string_lossy();
if name_str.starts_with('.') {
continue;
}
if path.is_dir() {
collect_filter_files(&path, files);
} else if path.extension().is_some_and(|e| e == "toml") {
files.push(path);
}
}
}
pub struct ResolvedFilter {
pub config: FilterConfig,
pub hash: String,
pub source_path: PathBuf,
pub relative_path: PathBuf,
pub priority: u8,
}
impl ResolvedFilter {
pub fn matches(&self, words: &[&str]) -> Option<usize> {
for pattern in self.config.command.patterns() {
if let Some(consumed) = pattern_matches_prefix(pattern, words) {
return Some(consumed);
}
}
None
}
pub fn specificity(&self) -> usize {
self.config
.command
.patterns()
.iter()
.map(|p| pattern_specificity(p))
.max()
.unwrap_or(0)
}
pub fn matches_name(&self, name: &str) -> bool {
self.relative_path.with_extension("").to_string_lossy() == name
}
pub const fn priority_label(&self) -> &'static str {
match self.priority {
0 => "local",
1 => "user",
_ => "built-in",
}
}
}
pub fn discover_all_filters(search_dirs: &[PathBuf]) -> anyhow::Result<Vec<ResolvedFilter>> {
let mut all_filters: Vec<ResolvedFilter> = Vec::new();
for (priority, dir) in search_dirs.iter().enumerate() {
let files = discover_filter_files(dir);
for path in files {
let Ok(Some(config)) = try_load_filter(&path) else {
continue;
};
let relative_path = path.strip_prefix(dir).unwrap_or(&path).to_path_buf();
let hash = tokf_common::hash::canonical_hash(&config).unwrap_or_default();
all_filters.push(ResolvedFilter {
config,
hash,
source_path: path,
relative_path,
priority: u8::try_from(priority).unwrap_or(u8::MAX),
});
}
}
if let Ok(entries) = STDLIB.find("**/*.toml") {
for entry in entries {
if let DirEntry::File(file) = entry {
let content = file.contents_utf8().unwrap_or("");
let Ok(config) = toml::from_str::<FilterConfig>(content) else {
continue; };
let rel = file.path().to_path_buf();
let hash = tokf_common::hash::canonical_hash(&config).unwrap_or_default();
all_filters.push(ResolvedFilter {
config,
hash,
source_path: PathBuf::from("<built-in>").join(&rel),
relative_path: rel,
priority: STDLIB_PRIORITY,
});
}
}
}
all_filters.sort_by(|a, b| {
a.priority
.cmp(&b.priority)
.then_with(|| b.specificity().cmp(&a.specificity()))
});
let mut seen: std::collections::HashSet<String> = std::collections::HashSet::new();
all_filters.retain(|f| seen.insert(f.config.command.first().to_string()));
Ok(all_filters)
}
pub fn command_pattern_to_regex(pattern: &str) -> String {
let words: Vec<&str> = pattern.split_whitespace().collect();
if words.is_empty() {
return "^(\\s.*)?$".to_string();
}
let mut regex = String::from("^");
for (i, &word) in words.iter().enumerate() {
if i == 0 {
if word == "*" {
regex.push_str(r"\S+");
} else {
let basename = extract_basename(word);
regex.push_str(r"(?:[^\s]*[\\/])?");
regex.push_str(®ex::escape(basename));
}
} else if word == "*" {
regex.push_str(r"\s+\S+");
} else {
let word_re = regex::escape(word);
regex.push_str(r"(?:\s+-[^=\s]+(?:=[^\s]+)?(?:\s+[^-\s]\S*)?)*\s+");
regex.push_str(&word_re);
}
}
regex.push_str(r"(\s.*)?$");
regex
}
pub fn command_pattern_regexes(command: &CommandPattern) -> Vec<(String, String)> {
command
.patterns()
.iter()
.map(|p| (p.clone(), command_pattern_to_regex(p)))
.collect()
}
#[cfg(test)]
mod tests;
#[cfg(test)]
mod tests_basename;
#[cfg(test)]
mod tests_discovery;
#[cfg(test)]
mod tests_matching;