use once_cell::sync::Lazy;
use regex::Regex;
use serde::{Deserialize, Serialize};
use std::path::Path;
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub enum FileType {
BusinessLogic,
GeneratedCode { tool: Option<String> },
TestCode { test_type: TestType },
DeclarativeConfig { config_type: ConfigType },
ProceduralMacro,
BuildScript,
Unknown,
}
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub enum TestType {
Unit,
Integration,
Property,
Benchmark,
}
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub enum ConfigType {
Flags,
Schema,
Routes,
Builder,
}
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub enum ReductionTarget {
Single(usize),
Phased {
phase1: usize,
phase2: usize,
final_target: usize,
},
NotRecommended { reason: String },
}
#[derive(Debug, Clone)]
pub struct FileSizeThresholds {
pub base_threshold: usize,
pub max_threshold: usize,
pub min_lines_per_function: f32,
}
#[derive(Debug)]
pub struct FileSizeAnalysis {
pub file_type: FileType,
pub current_lines: usize,
pub threshold: FileSizeThresholds,
pub reduction_target: ReductionTarget,
pub function_density: f32,
pub recommendation_level: RecommendationLevel,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
pub enum RecommendationLevel {
Critical, High, Medium, Low, Suppressed, }
static FIELD_PATTERN: Lazy<Regex> =
Lazy::new(|| Regex::new(r"(?m)^\s*pub\s+\w+:\s+\w+,?\s*$").unwrap());
static DERIVE_PATTERN: Lazy<Regex> = Lazy::new(|| Regex::new(r"(?m)^\s*#\[derive\(").unwrap());
static BUILDER_METHOD_PATTERN: Lazy<Regex> =
Lazy::new(|| Regex::new(r"(?m)^\s*pub\s+fn\s+\w+\(mut\s+self").unwrap());
pub fn classify_file(source: &str, path: &Path) -> FileType {
if is_generated_code(source) {
FileType::GeneratedCode {
tool: detect_generator(source),
}
} else if is_test_file(path, source) {
FileType::TestCode {
test_type: detect_test_type(source),
}
} else if is_declarative_config(source) {
FileType::DeclarativeConfig {
config_type: detect_config_type(source),
}
} else if is_proc_macro(path) {
FileType::ProceduralMacro
} else if is_build_script(path) {
FileType::BuildScript
} else {
FileType::BusinessLogic
}
}
fn is_generated_code(source: &str) -> bool {
let markers = [
"DO NOT EDIT",
"automatically generated",
"AUTO-GENERATED",
"@generated",
"Code generated by",
"autogenerated",
];
source
.lines()
.take(20)
.any(|line| markers.iter().any(|m| line.contains(m)))
}
fn detect_generator(source: &str) -> Option<String> {
if source.contains("prost::Message") {
Some("prost".to_string())
} else if source.contains("diesel::") {
Some("diesel".to_string())
} else if source.contains("tonic::") {
Some("tonic".to_string())
} else if source.contains("sea_orm::") {
Some("sea-orm".to_string())
} else {
None
}
}
fn is_test_file(path: &Path, source: &str) -> bool {
let path_str = path.to_string_lossy();
let is_test_path = path_str.contains("/tests/")
|| path_str.contains("/benches/")
|| path_str.ends_with("_test.rs")
|| path_str.ends_with("_tests.rs");
let has_test_attrs = source.contains("#[test]")
|| source.contains("#[cfg(test)]")
|| source.contains("#[bench]");
is_test_path || has_test_attrs
}
fn detect_test_type(source: &str) -> TestType {
if source.contains("proptest!") || source.contains("quickcheck!") {
TestType::Property
} else if source.contains("#[bench]") || source.contains("criterion") {
TestType::Benchmark
} else if source.contains("tests/integration") {
TestType::Integration
} else {
TestType::Unit
}
}
fn is_declarative_config(source: &str) -> bool {
let field_matches = FIELD_PATTERN.find_iter(source).count();
let derive_matches = DERIVE_PATTERN.find_iter(source).count();
let builder_matches = BUILDER_METHOD_PATTERN.find_iter(source).count();
let total_matches = field_matches + derive_matches + builder_matches;
let total_lines = source.lines().count();
if total_lines == 0 {
return false;
}
(total_matches as f32 / total_lines as f32) > 0.5
}
fn detect_config_type(source: &str) -> ConfigType {
if source.contains("clap::Parser") || source.contains("structopt") {
ConfigType::Flags
} else if source.contains("serde::") && source.contains("Deserialize") {
ConfigType::Schema
} else if source.contains("Router") || source.contains("routes") {
ConfigType::Routes
} else {
ConfigType::Builder
}
}
fn is_proc_macro(path: &Path) -> bool {
let path_str = path.to_string_lossy();
path_str.contains("/proc-macro/") || path_str.contains("/macros/")
}
fn is_build_script(path: &Path) -> bool {
path.file_name()
.and_then(|n| n.to_str())
.map(|n| n == "build.rs")
.unwrap_or(false)
}
pub fn get_threshold(
file_type: &FileType,
function_count: usize,
lines: usize,
) -> FileSizeThresholds {
let base = match file_type {
FileType::BusinessLogic => 400,
FileType::GeneratedCode { .. } => 5000,
FileType::TestCode { .. } => 650,
FileType::DeclarativeConfig { .. } => 1200,
FileType::ProceduralMacro => 500,
FileType::BuildScript => 300,
FileType::Unknown => 400,
};
let density = if function_count > 0 {
lines as f32 / function_count as f32
} else {
0.0
};
let adjusted = adjust_for_density(base, density);
FileSizeThresholds {
base_threshold: adjusted,
max_threshold: adjusted * 2,
min_lines_per_function: 3.0,
}
}
fn adjust_for_density(base_threshold: usize, density: f32) -> usize {
match density {
d if d < 5.0 => base_threshold, d if d < 10.0 => (base_threshold as f32 * 1.2) as usize,
d if d < 20.0 => (base_threshold as f32 * 1.5) as usize,
_ => (base_threshold as f32 * 2.0) as usize, }
}
pub fn calculate_reduction_target(
current_lines: usize,
threshold: &FileSizeThresholds,
function_count: usize,
) -> ReductionTarget {
let min_achievable = (function_count as f32 * threshold.min_lines_per_function) as usize;
let target = threshold.base_threshold.max(min_achievable);
if current_lines > threshold.base_threshold * 3 {
ReductionTarget::Phased {
phase1: current_lines / 2,
phase2: (threshold.base_threshold as f32 * 1.5) as usize,
final_target: target,
}
} else if current_lines <= threshold.base_threshold {
ReductionTarget::NotRecommended {
reason: "File is already within size threshold".to_string(),
}
} else {
ReductionTarget::Single(target)
}
}
pub fn recommendation_level(
file_type: &FileType,
current_lines: usize,
threshold: &FileSizeThresholds,
) -> RecommendationLevel {
match file_type {
FileType::GeneratedCode { .. } => RecommendationLevel::Suppressed,
FileType::BusinessLogic => {
let ratio = current_lines as f32 / threshold.base_threshold as f32;
if ratio > 2.0 {
RecommendationLevel::Critical
} else if ratio > 1.5 {
RecommendationLevel::High
} else if ratio > 1.0 {
RecommendationLevel::Medium
} else {
RecommendationLevel::Low
}
}
_ => {
let ratio = current_lines as f32 / threshold.base_threshold as f32;
if ratio > 2.0 {
RecommendationLevel::High
} else if ratio > 1.5 {
RecommendationLevel::Medium
} else {
RecommendationLevel::Low
}
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_generated_code_detection() {
let generated = r#"
// DO NOT EDIT
// This file is automatically generated
pub struct Generated {}
"#;
assert!(is_generated_code(generated));
}
#[test]
fn test_not_generated_code() {
let normal = r#"
pub struct Normal {
field: String,
}
"#;
assert!(!is_generated_code(normal));
}
#[test]
fn test_declarative_config_detection() {
let flags = r#"
#[derive(Debug)]
pub struct Flags {
pub verbose: bool,
pub quiet: bool,
pub output: PathBuf,
pub debug: bool,
pub trace: bool,
pub log_level: String,
pub log_file: PathBuf,
}
"#;
assert!(is_declarative_config(flags));
}
#[test]
fn test_reduction_target_respects_function_count() {
let threshold = FileSizeThresholds {
base_threshold: 500,
max_threshold: 1000,
min_lines_per_function: 3.0,
};
let target = calculate_reduction_target(2000, &threshold, 600);
match target {
ReductionTarget::Single(t) => assert!(t >= 1800),
ReductionTarget::Phased { final_target, .. } => assert!(final_target >= 1800),
_ => panic!("Expected reduction target"),
}
}
#[test]
fn test_function_density_adjustment() {
let low_density = adjust_for_density(400, 4.0); let high_density = adjust_for_density(400, 25.0);
assert_eq!(low_density, 400); assert!(high_density > 600); }
#[test]
fn test_test_file_detection() {
let test_code = r#"
#[cfg(test)]
mod tests {
#[test]
fn test_something() {}
}
"#;
let path = Path::new("src/main.rs");
assert!(is_test_file(path, test_code));
}
#[test]
fn test_build_script_detection() {
assert!(is_build_script(Path::new("build.rs")));
assert!(!is_build_script(Path::new("src/main.rs")));
}
#[test]
fn test_recommendation_level_for_business_logic() {
let file_type = FileType::BusinessLogic;
let threshold = FileSizeThresholds {
base_threshold: 400,
max_threshold: 800,
min_lines_per_function: 3.0,
};
assert_eq!(
recommendation_level(&file_type, 900, &threshold),
RecommendationLevel::Critical
);
assert_eq!(
recommendation_level(&file_type, 650, &threshold),
RecommendationLevel::High
);
assert_eq!(
recommendation_level(&file_type, 450, &threshold),
RecommendationLevel::Medium
);
}
#[test]
fn test_generated_code_suppressed() {
let file_type = FileType::GeneratedCode { tool: None };
let threshold = FileSizeThresholds {
base_threshold: 400,
max_threshold: 800,
min_lines_per_function: 3.0,
};
assert_eq!(
recommendation_level(&file_type, 10000, &threshold),
RecommendationLevel::Suppressed
);
}
}