use std::fs;
use std::path::Path;
use regex::Regex;
pub struct PreFilter {
max_file_size: usize,
generated_markers: Vec<String>,
generated_patterns: Vec<Regex>,
lines_to_check: usize,
}
impl Default for PreFilter {
fn default() -> Self {
Self {
max_file_size: 1_000_000, generated_markers: vec![
"// AUTO-GENERATED".to_string(),
"// This file is auto-generated".to_string(),
"// DO NOT EDIT".to_string(),
"// Generated by".to_string(),
"/* AUTO-GENERATED */".to_string(),
"@generated".to_string(),
"// Code generated".to_string(),
],
generated_patterns: vec![
Regex::new(r"(?i)auto.?generated").unwrap(),
Regex::new(r"(?i)do\s*not\s*(edit|modify)").unwrap(),
],
lines_to_check: 10,
}
}
}
impl PreFilter {
pub fn new() -> Self {
Self::default()
}
pub fn with_max_file_size(mut self, size: usize) -> Self {
self.max_file_size = size;
self
}
pub fn with_generated_marker(mut self, marker: impl Into<String>) -> Self {
self.generated_markers.push(marker.into());
self
}
pub fn should_parse(&self, path: &Path) -> bool {
if !self.is_rust_file(path) {
return false;
}
if self.is_too_large(path) {
return false;
}
if self.is_generated(path) {
return false;
}
true
}
fn is_rust_file(&self, path: &Path) -> bool {
path.extension()
.map(|ext| ext == "rs")
.unwrap_or(false)
}
fn is_too_large(&self, path: &Path) -> bool {
fs::metadata(path)
.map(|m| m.len() as usize > self.max_file_size)
.unwrap_or(true) }
fn is_generated(&self, path: &Path) -> bool {
let content = match fs::read_to_string(path) {
Ok(c) => c,
Err(_) => return false, };
let first_lines: String = content
.lines()
.take(self.lines_to_check)
.collect::<Vec<_>>()
.join("\n");
for marker in &self.generated_markers {
if first_lines.contains(marker) {
return true;
}
}
for pattern in &self.generated_patterns {
if pattern.is_match(&first_lines) {
return true;
}
}
false
}
pub fn should_skip_path(&self, path: &Path) -> bool {
let path_str = path.to_string_lossy();
let skip_dirs = [
"/target/",
"/.git/",
"/node_modules/",
"/vendor/",
"/.cargo/",
];
for dir in &skip_dirs {
if path_str.contains(dir) {
return true;
}
}
if let Some(file_name) = path.file_name().and_then(|n| n.to_str()) {
if file_name.ends_with(".generated.rs")
|| file_name.ends_with("_generated.rs")
|| file_name.starts_with("generated_")
{
return true;
}
}
false
}
}
#[cfg(test)]
mod tests {
use super::*;
use std::io::Write;
use tempfile::NamedTempFile;
#[test]
fn test_rust_file_detection() {
let filter = PreFilter::new();
assert!(filter.is_rust_file(Path::new("main.rs")));
assert!(filter.is_rust_file(Path::new("src/lib.rs")));
assert!(!filter.is_rust_file(Path::new("README.md")));
assert!(!filter.is_rust_file(Path::new("Cargo.toml")));
}
#[test]
fn test_generated_file_detection() {
let filter = PreFilter::new();
let mut file = NamedTempFile::with_suffix(".rs").unwrap();
writeln!(file, "// AUTO-GENERATED").unwrap();
writeln!(file, "fn main() {{}}").unwrap();
assert!(filter.is_generated(file.path()));
}
#[test]
fn test_normal_file_not_generated() {
let filter = PreFilter::new();
let mut file = NamedTempFile::with_suffix(".rs").unwrap();
writeln!(file, "// This is a normal file").unwrap();
writeln!(file, "fn main() {{}}").unwrap();
assert!(!filter.is_generated(file.path()));
}
#[test]
fn test_skip_path_detection() {
let filter = PreFilter::new();
assert!(filter.should_skip_path(Path::new("/project/target/debug/main.rs")));
assert!(filter.should_skip_path(Path::new("/project/.git/hooks/pre-commit")));
assert!(filter.should_skip_path(Path::new("/project/main.generated.rs")));
assert!(!filter.should_skip_path(Path::new("/project/src/main.rs")));
}
#[test]
fn test_file_size_limit() {
let filter = PreFilter::new().with_max_file_size(100);
let mut small_file = NamedTempFile::with_suffix(".rs").unwrap();
writeln!(small_file, "fn main() {{}}").unwrap();
let mut large_file = NamedTempFile::with_suffix(".rs").unwrap();
for _ in 0..100 {
writeln!(large_file, "// This is a line that makes the file larger").unwrap();
}
assert!(!filter.is_too_large(small_file.path()));
assert!(filter.is_too_large(large_file.path()));
}
}