use anyhow::Result;
use blake3;
use std::fs;
use std::path::{Path, PathBuf};
use std::time::{Duration, SystemTime};
use crate::entropy::EntropyAnalyzer;
use crate::tdg::{
config::TdgConfig, AdaptiveThresholdFactory, AdaptiveThresholdManager, AnalysisMetadata,
ComponentScores, FileIdentity, FullTdgRecord, Grade, Language, MetricCategory,
OperationPriority, PenaltyTracker, PlatformResourceController, ProjectScore,
ResourceControllerFactory, SchedulerFactory, SemanticSignature, SimpleFairScheduler, TdgScore,
TieredStorageFactory, TieredStore,
};
pub struct TdgAnalyzerAst {
config: TdgConfig,
storage: Option<TieredStore>,
scheduler: Option<SimpleFairScheduler>,
adaptive_manager: Option<AdaptiveThresholdManager>,
resource_controller: Option<PlatformResourceController>,
}
impl TdgAnalyzerAst {
pub fn new() -> Result<Self> {
Ok(Self {
config: TdgConfig::default(),
storage: None,
scheduler: None,
adaptive_manager: None,
resource_controller: None,
})
}
pub fn with_config(config: TdgConfig) -> Result<Self> {
Ok(Self {
config,
storage: None,
scheduler: None,
adaptive_manager: None,
resource_controller: None,
})
}
pub fn with_storage(config: TdgConfig) -> Result<Self> {
let storage = TieredStorageFactory::create_default()?;
let scheduler = SchedulerFactory::create_balanced();
let adaptive_manager = AdaptiveThresholdFactory::create_default();
let resource_controller = ResourceControllerFactory::create_default();
Ok(Self {
config,
storage: Some(storage),
scheduler: Some(scheduler),
adaptive_manager: Some(adaptive_manager),
resource_controller: Some(resource_controller),
})
}
pub async fn with_full_resource_management(config: TdgConfig) -> Result<Self> {
let storage = TieredStorageFactory::create_default()?;
let scheduler = SchedulerFactory::create_background_optimized();
let adaptive_manager = AdaptiveThresholdFactory::create_prod_optimized();
let resource_controller = ResourceControllerFactory::create_prod_optimized();
resource_controller.start_monitoring().await?;
Ok(Self {
config,
storage: Some(storage),
scheduler: Some(scheduler),
adaptive_manager: Some(adaptive_manager),
resource_controller: Some(resource_controller),
})
}
pub async fn analyze_file(&self, path: &Path) -> Result<TdgScore> {
self.analyze_file_with_priority(path, OperationPriority::Medium)
.await
}
pub async fn analyze_file_with_priority(
&self,
path: &Path,
priority: OperationPriority,
) -> Result<TdgScore> {
let start_time = SystemTime::now();
let language = Language::from_extension(path);
let _resource_allocation = self.request_analysis_resources(path, priority).await?;
let source = fs::read_to_string(path)?;
let content_hash = blake3::hash(source.as_bytes());
if let Some(cached_score) = self
.check_cache_and_return(&content_hash, language, path, start_time)
.await?
{
return Ok(cached_score);
}
let score = self
.perform_analysis_and_store(path, &source, language, content_hash, start_time)
.await?;
Ok(score)
}
async fn request_analysis_resources(
&self,
path: &Path,
priority: OperationPriority,
) -> Result<Option<crate::tdg::resource_control::ResourceAllocation>> {
if let Some(controller) = &self.resource_controller {
let estimated_memory = self.estimate_analysis_memory(path)?;
Ok(Some(
controller
.request_resources(
format!("analyze_{}", path.display()),
crate::tdg::resource_control::OperationType::Analysis,
priority,
estimated_memory,
)
.await?,
))
} else {
Ok(None)
}
}
async fn check_cache_and_return(
&self,
content_hash: &blake3::Hash,
language: Language,
path: &Path,
start_time: SystemTime,
) -> Result<Option<TdgScore>> {
if let Some(storage) = &self.storage {
if let Some(hot_entry) = storage.get_hot(content_hash) {
if let Some(adaptive) = &self.adaptive_manager {
let duration = start_time.elapsed().unwrap_or_default();
let sample = adaptive.create_sample(duration, true, 0).await;
adaptive.record_sample(sample).await?;
}
let mut cached_score = TdgScore {
total: hot_entry.total_score,
grade: Grade::from_score(hot_entry.total_score),
language,
confidence: language.confidence(),
file_path: Some(path.to_path_buf()),
..Default::default()
};
cached_score.calculate_total();
return Ok(Some(cached_score));
}
}
Ok(None)
}
async fn perform_analysis_and_store(
&self,
path: &Path,
source: &str,
language: Language,
content_hash: blake3::Hash,
start_time: SystemTime,
) -> Result<TdgScore> {
let analysis_start = SystemTime::now();
let score = self.analyze_source(source, language, Some(path.to_path_buf()))?;
let analysis_duration = analysis_start.elapsed().unwrap_or_default();
self.store_analysis_record(path, &score, content_hash, analysis_duration, language)
.await?;
if let Some(adaptive) = &self.adaptive_manager {
let total_duration = start_time.elapsed().unwrap_or_default();
let sample = adaptive.create_sample(total_duration, false, 0).await;
adaptive.record_sample(sample).await?;
}
Ok(score)
}
async fn store_analysis_record(
&self,
path: &Path,
score: &TdgScore,
content_hash: blake3::Hash,
analysis_duration: Duration,
language: Language,
) -> Result<()> {
if let Some(storage) = &self.storage {
let file_metadata = fs::metadata(path)?;
let record = FullTdgRecord {
identity: FileIdentity {
path: path.to_path_buf(),
content_hash,
size_bytes: file_metadata.len(),
modified_time: file_metadata.modified().unwrap_or(SystemTime::now()),
},
score: score.clone(),
components: ComponentScores {
complexity_breakdown: std::collections::HashMap::new(),
duplication_sources: Vec::new(),
coupling_dependencies: Vec::new(),
doc_missing_items: Vec::new(),
consistency_violations: Vec::new(),
},
semantic_sig: SemanticSignature {
ast_structure_hash: u64::from_le_bytes(
content_hash.as_bytes()[0..8].try_into().unwrap(),
),
identifier_pattern: String::new(),
control_flow_pattern: String::new(),
import_dependencies: Vec::new(),
},
metadata: AnalysisMetadata {
analyzer_version: env!("CARGO_PKG_VERSION").to_string(),
analysis_duration_ms: analysis_duration.as_millis() as u64,
language_confidence: language.confidence(),
analysis_timestamp: SystemTime::now(),
cache_hit: false,
},
};
storage.store(record).await?;
}
Ok(())
}
pub async fn analyze_file_commit(&self, path: &Path) -> Result<TdgScore> {
let _guard = if let Some(scheduler) = &self.scheduler {
Some(
scheduler
.schedule_commit(path.to_path_buf())
.await
.map_err(|e| anyhow::anyhow!("Scheduling failed: {e}"))?,
)
} else {
None
};
self.analyze_file_with_priority(path, OperationPriority::Critical)
.await
}
pub async fn analyze_file_background(&self, path: &Path) -> Result<TdgScore> {
let _guard = if let Some(scheduler) = &self.scheduler {
Some(
scheduler
.schedule_background(path.to_path_buf())
.await
.map_err(|e| anyhow::anyhow!("Scheduling failed: {e}"))?,
)
} else {
None
};
self.analyze_file_with_priority(path, OperationPriority::Low)
.await
}
pub async fn get_scheduler_stats(&self) -> Option<crate::tdg::SchedulingStatistics> {
if let Some(scheduler) = &self.scheduler {
Some(scheduler.get_statistics().await)
} else {
None
}
}
#[must_use]
pub fn get_storage(&self) -> Option<&TieredStore> {
self.storage.as_ref()
}
pub async fn get_stored_score(&self, path: &Path) -> Result<Option<TdgScore>> {
if let Some(storage) = &self.storage {
let source = fs::read_to_string(path)?;
let content_hash = blake3::hash(source.as_bytes());
if let Some(hot_entry) = storage.get_hot(&content_hash) {
let language = Language::from_extension(path);
let score = TdgScore {
total: hot_entry.total_score,
grade: Grade::from_score(hot_entry.total_score),
language,
confidence: language.confidence(),
file_path: Some(path.to_path_buf()),
..Default::default()
};
return Ok(Some(score));
}
if let Some(record) = storage.retrieve_full(&content_hash).await? {
return Ok(Some(record.score));
}
}
Ok(None)
}
#[must_use]
pub fn get_storage_stats(&self) -> Option<crate::tdg::StorageStatistics> {
self.storage
.as_ref()
.map(super::storage::TieredStore::get_statistics)
}
pub async fn get_adaptive_stats(&self) -> Option<crate::tdg::PerformanceStatistics> {
if let Some(adaptive) = &self.adaptive_manager {
Some(adaptive.get_performance_stats().await)
} else {
None
}
}
pub async fn get_current_thresholds(&self) -> Option<crate::tdg::CurrentThresholds> {
if let Some(adaptive) = &self.adaptive_manager {
Some(adaptive.get_current_thresholds().await)
} else {
None
}
}
pub async fn reset_adaptive_thresholds(&self) -> Result<()> {
if let Some(adaptive) = &self.adaptive_manager {
adaptive.reset_to_defaults().await?;
}
Ok(())
}
pub async fn get_resource_stats(&self) -> Option<crate::tdg::ResourceEnforcementStats> {
if let Some(controller) = &self.resource_controller {
Some(controller.get_enforcement_stats().await)
} else {
None
}
}
pub async fn get_resource_usage(&self) -> Option<crate::tdg::ResourceUsage> {
if let Some(controller) = &self.resource_controller {
Some(controller.get_current_usage().await)
} else {
None
}
}
fn estimate_analysis_memory(&self, path: &Path) -> Result<f64> {
let metadata = fs::metadata(path)?;
let file_size_mb = metadata.len() as f64 / (1024.0 * 1024.0);
let base_memory = file_size_mb * 4.0;
let language = Language::from_extension(path);
let language_overhead = match language {
Language::Rust => 20.0, Language::Cpp | Language::C => 15.0, Language::Java => 12.0, Language::TypeScript => 10.0, Language::Python => 8.0, Language::JavaScript => 6.0, _ => 5.0, };
Ok((base_memory + language_overhead).max(5.0)) }
pub fn analyze_source(
&self,
source: &str,
language: Language,
file_path: Option<PathBuf>,
) -> Result<TdgScore> {
let mut tracker = PenaltyTracker::new();
let mut score = TdgScore {
language,
confidence: language.confidence(),
file_path,
..Default::default()
};
match language {
Language::Rust => self.analyze_rust_ast(source, &mut score, &mut tracker)?,
Language::Python => self.analyze_python_ast(source, &mut score, &mut tracker)?,
Language::JavaScript | Language::TypeScript => {
self.analyze_javascript_ast(source, &mut score, &mut tracker)?;
}
Language::Go => self.analyze_go_ast(source, &mut score, &mut tracker)?,
Language::Java => self.analyze_java_ast(source, &mut score, &mut tracker)?,
Language::C | Language::Cpp => self.analyze_c_ast(source, &mut score, &mut tracker)?,
Language::Ruchy => self.analyze_ruchy_ast(source, &mut score, &mut tracker)?,
_ => {
score.confidence *= 0.5;
self.analyze_heuristic(source, &mut score, &mut tracker)?;
}
}
score.penalties_applied = tracker.get_attributions();
score.calculate_total();
Ok(score)
}
fn analyze_rust_ast(
&self,
source: &str,
score: &mut TdgScore,
tracker: &mut PenaltyTracker,
) -> Result<()> {
#[cfg(feature = "rust-ast")]
{
use syn::{visit::Visit, File};
let ast = syn::parse_str::<File>(source)?;
let mut visitor = RustComplexityVisitor::new();
visitor.visit_file(&ast);
let cyclomatic = visitor.cyclomatic_complexity;
let cognitive = visitor.cognitive_complexity;
score.structural_complexity = self.score_structural_complexity(
cyclomatic,
cognitive,
visitor.max_nesting_depth,
visitor.max_method_length,
tracker,
);
score.semantic_complexity = self.score_semantic_complexity(
visitor.max_params,
visitor.generic_count,
visitor.abstraction_levels,
tracker,
);
score.duplication_ratio = self.analyze_duplication_ast(source, Language::Rust, tracker);
score.coupling_score = self.score_coupling(
visitor.import_count,
visitor.external_calls,
visitor.interface_implementations,
tracker,
);
score.doc_coverage = self.score_documentation(
visitor.documented_items,
visitor.total_public_items,
visitor.comment_lines,
visitor.total_lines,
tracker,
);
score.consistency_score = self.score_consistency_rust(&ast, tracker);
score.entropy_score = self.score_entropy_analysis(source, Language::Rust, tracker);
}
#[cfg(not(feature = "rust-ast"))]
{
self.analyze_heuristic(source, score, tracker)?;
}
Ok(())
}
fn analyze_python_ast(
&self,
source: &str,
score: &mut TdgScore,
tracker: &mut PenaltyTracker,
) -> Result<()> {
#[cfg(feature = "python-ast")]
{
use rustpython_parser::{parse, Mode};
let ast = parse(source, Mode::Module, "<string>")
.map_err(|e| anyhow::anyhow!("Python parse error: {e:?}"))?;
let mut visitor = PythonComplexityVisitor::new();
visitor.analyze_module(ast);
score.structural_complexity = self.score_structural_complexity(
visitor.cyclomatic_complexity,
visitor.cognitive_complexity,
visitor.max_nesting_depth,
visitor.max_method_length,
tracker,
);
score.semantic_complexity = self.score_semantic_complexity(
visitor.max_params,
visitor.decorator_count,
visitor.metaclass_count,
tracker,
);
score.duplication_ratio =
self.analyze_duplication_ast(source, Language::Python, tracker);
score.coupling_score = self.score_coupling(
visitor.import_count,
visitor.external_calls,
0, tracker,
);
score.doc_coverage = self.score_documentation(
visitor.documented_functions,
visitor.total_functions,
visitor.docstring_lines,
visitor.total_lines,
tracker,
);
score.consistency_score = self.score_consistency_python(source, tracker);
}
#[cfg(not(feature = "python-ast"))]
{
self.analyze_heuristic(source, score, tracker)?;
}
Ok(())
}
fn analyze_javascript_ast(
&self,
source: &str,
score: &mut TdgScore,
tracker: &mut PenaltyTracker,
) -> Result<()> {
#[cfg(feature = "typescript-ast")]
{
use swc_common::{sync::Lrc, FileName, SourceMap};
use swc_ecma_parser::{lexer::Lexer, Parser, StringInput, Syntax, TsSyntax};
use swc_ecma_visit::VisitWith;
let cm: Lrc<SourceMap> = Default::default();
let fm = cm.new_source_file(FileName::Custom("test.js".into()).into(), source.to_string());
let lexer = Lexer::new(
Syntax::Typescript(TsSyntax {
tsx: true,
decorators: true,
..Default::default()
}),
Default::default(),
StringInput::from(&*fm),
None,
);
let mut parser = Parser::new_from(lexer);
match parser.parse_module() {
Ok(module) => {
let mut visitor = JavaScriptComplexityVisitor::new();
module.visit_with(&mut visitor);
score.structural_complexity = self.score_structural_complexity(
visitor.cyclomatic_complexity,
visitor.cognitive_complexity,
visitor.max_nesting_depth,
visitor.max_function_length,
tracker,
);
score.semantic_complexity = self.score_semantic_complexity(
visitor.max_params,
visitor.async_count,
visitor.callback_depth,
tracker,
);
score.duplication_ratio =
self.analyze_duplication_ast(source, Language::JavaScript, tracker);
score.coupling_score = self.score_coupling(
visitor.import_count,
visitor.external_calls,
visitor.class_count,
tracker,
);
score.doc_coverage = self.score_documentation(
visitor.jsdoc_count,
visitor.function_count,
visitor.comment_lines,
visitor.total_lines,
tracker,
);
score.consistency_score = self.score_consistency_javascript(source, tracker);
}
Err(_) => {
self.analyze_heuristic(source, score, tracker)?;
}
}
}
#[cfg(not(feature = "typescript-ast"))]
{
self.analyze_heuristic(source, score, tracker)?;
}
Ok(())
}
fn analyze_go_ast(
&self,
source: &str,
score: &mut TdgScore,
tracker: &mut PenaltyTracker,
) -> Result<()> {
self.analyze_tree_sitter_generic(source, Language::Go, score, tracker)
}
fn analyze_java_ast(
&self,
source: &str,
score: &mut TdgScore,
tracker: &mut PenaltyTracker,
) -> Result<()> {
self.analyze_tree_sitter_generic(source, Language::Java, score, tracker)
}
fn analyze_c_ast(
&self,
source: &str,
score: &mut TdgScore,
tracker: &mut PenaltyTracker,
) -> Result<()> {
#[cfg(feature = "c-ast")]
{
use tree_sitter::{Parser, Query, QueryCursor};
let mut parser = Parser::new();
let language = if score.language == Language::Cpp {
tree_sitter_cpp::language()
} else {
tree_sitter_c::language()
};
parser
.set_language(&language)
.map_err(|e| anyhow::anyhow!("Failed to set language: {e:?}"))?;
if let Some(tree) = parser.parse(source, None) {
let root_node = tree.root_node();
let query_str = r"
(if_statement) @if
(while_statement) @while
(for_statement) @for
(do_statement) @do
(switch_statement) @switch
(case_statement) @case
(conditional_expression) @ternary
";
let query = Query::new(&language, query_str)
.map_err(|e| anyhow::anyhow!("Query error: {e:?}"))?;
let mut cursor = QueryCursor::new();
let matches = cursor.matches(&query, root_node, source.as_bytes());
let cyclomatic = 1 + matches.count() as u32;
score.structural_complexity = self.score_structural_complexity(
cyclomatic,
self.calculate_cognitive_complexity(&root_node),
self.calculate_max_nesting(&root_node),
self.calculate_max_function_length(&root_node, source),
tracker,
);
let include_query = Query::new(&language, "(preproc_include) @include")
.map_err(|e| anyhow::anyhow!("Query error: {e:?}"))?;
let mut cursor = QueryCursor::new();
let include_matches = cursor.matches(&include_query, root_node, source.as_bytes());
let import_count = include_matches.count() as u32;
score.coupling_score = self.score_coupling(import_count, 0, 0, tracker);
let comment_query = Query::new(&language, "(comment) @comment")
.map_err(|e| anyhow::anyhow!("Query error: {e:?}"))?;
let mut cursor = QueryCursor::new();
let comment_matches = cursor.matches(&comment_query, root_node, source.as_bytes());
let comment_count = comment_matches.count() as u32;
score.doc_coverage = (comment_count as f32 / source.lines().count() as f32 * 10.0)
.min(self.config.weights.documentation);
score.semantic_complexity = self.config.weights.semantic_complexity;
score.duplication_ratio =
self.analyze_duplication_ast(source, score.language, tracker);
score.consistency_score = self.config.weights.consistency;
} else {
self.analyze_heuristic(source, score, tracker)?;
}
}
#[cfg(not(feature = "c-ast"))]
{
self.analyze_heuristic(source, score, tracker)?;
}
Ok(())
}
fn analyze_ruchy_ast(
&self,
source: &str,
score: &mut TdgScore,
tracker: &mut PenaltyTracker,
) -> Result<()> {
#[cfg(feature = "ruchy-ast")]
{
use crate::services::languages::ruchy::analyze_ruchy_file_with_parser;
use std::io::Write;
use tempfile::NamedTempFile;
let mut temp_file = NamedTempFile::with_suffix(".ruchy")?;
temp_file.write_all(source.as_bytes())?;
let temp_path = temp_file.path();
let rt = tokio::runtime::Handle::try_current()
.or_else(|_| tokio::runtime::Runtime::new().map(|rt| rt.handle().clone()))
.map_err(|e| anyhow::anyhow!("Failed to get async runtime: {e}"))?;
let analysis_result =
rt.block_on(async { analyze_ruchy_file_with_parser(temp_path).await });
match analysis_result {
Ok(metrics) => {
score.structural_complexity = self.score_structural_complexity(
metrics.total_complexity.cyclomatic.into(),
metrics.total_complexity.cognitive.into(),
metrics.total_complexity.nesting_max as usize,
metrics.total_complexity.lines.into(),
tracker,
);
let semantic_score = self.calculate_ruchy_semantic_complexity(source);
score.semantic_complexity = semantic_score;
let import_count = self.count_ruchy_imports(source);
let dependency_count = self.count_ruchy_dependencies(source);
score.coupling_score =
self.score_coupling(import_count, dependency_count, 0, tracker);
let doc_coverage = self.calculate_ruchy_doc_coverage(source);
score.doc_coverage = doc_coverage;
score.duplication_ratio =
self.analyze_duplication_ast(source, score.language, tracker);
score.consistency_score = self.calculate_ruchy_consistency(source);
}
Err(_) => {
self.analyze_heuristic(source, score, tracker)?;
}
}
}
#[cfg(not(feature = "ruchy-ast"))]
{
self.analyze_heuristic(source, score, tracker)?;
}
Ok(())
}
fn analyze_tree_sitter_generic(
&self,
source: &str,
_language: Language,
score: &mut TdgScore,
_tracker: &mut PenaltyTracker,
) -> Result<()> {
score.confidence *= 0.7;
self.analyze_heuristic(source, score, _tracker)
}
fn analyze_heuristic(
&self,
source: &str,
score: &mut TdgScore,
_tracker: &mut PenaltyTracker,
) -> Result<()> {
score.confidence *= 0.3;
let simple_analyzer = crate::tdg::analyzer_simple::TdgAnalyzer::new()?;
let simple_score = simple_analyzer.analyze_source(source, score.language, None)?;
score.structural_complexity = simple_score.structural_complexity;
score.semantic_complexity = simple_score.semantic_complexity;
score.duplication_ratio = simple_score.duplication_ratio;
score.coupling_score = simple_score.coupling_score;
score.doc_coverage = simple_score.doc_coverage;
score.consistency_score = simple_score.consistency_score;
Ok(())
}
fn score_structural_complexity(
&self,
cyclomatic: u32,
cognitive: u32,
nesting_depth: usize,
method_length: usize,
tracker: &mut PenaltyTracker,
) -> f32 {
let mut points = self.config.weights.structural_complexity;
if cyclomatic > self.config.thresholds.max_cyclomatic_complexity {
let excess = (cyclomatic - self.config.thresholds.max_cyclomatic_complexity) as f32;
let penalty = (excess * 0.5).min(15.0);
if let Some(applied) = tracker.apply(
format!("high_cyclomatic_{cyclomatic}"),
MetricCategory::StructuralComplexity,
penalty,
format!("High cyclomatic complexity: {cyclomatic}"),
) {
points -= applied;
}
}
if cognitive > 15 {
let excess = (cognitive - 15) as f32;
let penalty = (excess * 0.3).min(10.0);
if let Some(applied) = tracker.apply(
format!("high_cognitive_{cognitive}"),
MetricCategory::StructuralComplexity,
penalty,
format!("High cognitive complexity: {cognitive}"),
) {
points -= applied;
}
}
if nesting_depth > self.config.thresholds.max_nesting_depth as usize {
let excess = (nesting_depth - self.config.thresholds.max_nesting_depth as usize) as f32;
let penalty = excess.min(5.0);
if let Some(applied) = tracker.apply(
format!("deep_nesting_{nesting_depth}"),
MetricCategory::StructuralComplexity,
penalty,
format!("Deep nesting: {nesting_depth} levels"),
) {
points -= applied;
}
}
if method_length > 50 {
let excess = ((method_length - 50) as f32 / 10.0).min(5.0);
if let Some(applied) = tracker.apply(
format!("long_method_{method_length}"),
MetricCategory::StructuralComplexity,
excess,
format!("Long method: {method_length} lines"),
) {
points -= applied;
}
}
points.max(0.0)
}
fn score_semantic_complexity(
&self,
max_params: usize,
type_complexity: u32,
abstraction_levels: u32,
tracker: &mut PenaltyTracker,
) -> f32 {
let mut points = self.config.weights.semantic_complexity;
if max_params > 5 {
let penalty = ((max_params - 5) as f32 * 0.5).min(5.0);
if let Some(applied) = tracker.apply(
format!("many_params_{max_params}"),
MetricCategory::SemanticComplexity,
penalty,
format!("Too many parameters: {max_params}"),
) {
points -= applied;
}
}
if type_complexity > 10 {
let penalty = ((type_complexity - 10) as f32 * 0.3).min(5.0);
if let Some(applied) = tracker.apply(
format!("complex_types_{type_complexity}"),
MetricCategory::SemanticComplexity,
penalty,
format!("Complex type usage: {type_complexity}"),
) {
points -= applied;
}
}
if abstraction_levels > 3 {
let penalty = ((abstraction_levels - 3) as f32).min(5.0);
if let Some(applied) = tracker.apply(
format!("deep_abstraction_{abstraction_levels}"),
MetricCategory::SemanticComplexity,
penalty,
format!("Deep abstraction: {abstraction_levels} levels"),
) {
points -= applied;
}
}
points.max(0.0)
}
fn analyze_duplication_ast(
&self,
source: &str,
_language: Language,
tracker: &mut PenaltyTracker,
) -> f32 {
let mut points = self.config.weights.duplication;
let lines: Vec<&str> = source
.lines()
.map(str::trim)
.filter(|l| !l.is_empty() && !l.starts_with("//") && !l.starts_with("/*"))
.collect();
if lines.len() < 3 {
return points;
}
let mut duplicates = 0;
let mut seen = std::collections::HashSet::new();
for line in &lines {
if line.len() > 10 && !seen.insert(line) {
duplicates += 1;
}
}
let duplication_ratio = duplicates as f32 / lines.len() as f32;
if duplication_ratio > 0.1 {
let penalty = (duplication_ratio * 20.0).min(20.0);
if let Some(applied) = tracker.apply(
format!("duplication_{duplication_ratio:.2}"),
MetricCategory::Duplication,
penalty,
format!("Code duplication: {:.1}%", duplication_ratio * 100.0),
) {
points -= applied;
}
}
points.max(0.0)
}
fn score_coupling(
&self,
import_count: u32,
external_calls: u32,
_interface_implementations: u32,
tracker: &mut PenaltyTracker,
) -> f32 {
let mut points = self.config.weights.coupling;
if import_count > 20 {
let penalty = ((import_count - 20) as f32 * 0.2).min(10.0);
if let Some(applied) = tracker.apply(
format!("many_imports_{import_count}"),
MetricCategory::Coupling,
penalty,
format!("Too many imports: {import_count}"),
) {
points -= applied;
}
}
if external_calls > 50 {
let penalty = ((external_calls - 50) as f32 * 0.1).min(5.0);
if let Some(applied) = tracker.apply(
format!("many_external_calls_{external_calls}"),
MetricCategory::Coupling,
penalty,
format!("Too many external calls: {external_calls}"),
) {
points -= applied;
}
}
points.max(0.0)
}
fn score_documentation(
&self,
documented_items: u32,
total_public_items: u32,
comment_lines: u32,
total_lines: u32,
_tracker: &mut PenaltyTracker,
) -> f32 {
if total_public_items == 0 {
return self.config.weights.documentation;
}
let coverage = documented_items as f32 / total_public_items as f32;
let comment_ratio = comment_lines as f32 / total_lines as f32;
let score = coverage * 0.7 + comment_ratio * 0.3;
(score * self.config.weights.documentation).min(self.config.weights.documentation)
}
fn score_consistency_rust(&self, _ast: &syn::File, _tracker: &mut PenaltyTracker) -> f32 {
self.config.weights.consistency
}
fn score_consistency_python(&self, source: &str, _tracker: &mut PenaltyTracker) -> f32 {
let mut points = self.config.weights.consistency;
let lines: Vec<&str> = source.lines().collect();
let mut tab_count = 0;
let mut space_count = 0;
for line in &lines {
if line.starts_with('\t') {
tab_count += 1;
} else if line.starts_with(" ") || line.starts_with(" ") {
space_count += 1;
}
}
let total_indented = tab_count + space_count;
if total_indented > 0 {
let consistency = if tab_count > space_count {
tab_count as f32 / total_indented as f32
} else {
space_count as f32 / total_indented as f32
};
points = consistency * self.config.weights.consistency;
}
points
}
fn score_consistency_javascript(&self, source: &str, tracker: &mut PenaltyTracker) -> f32 {
let mut score = 100.0f32;
let lines_with_semicolons = source
.lines()
.filter(|line| line.trim().ends_with(';'))
.count();
let total_lines = source
.lines()
.filter(|line| !line.trim().is_empty() && !line.trim().starts_with("//"))
.count();
if total_lines > 0 {
let semicolon_ratio = lines_with_semicolons as f32 / total_lines as f32;
if semicolon_ratio < 0.8 && semicolon_ratio > 0.2 {
score -= 10.0;
tracker.apply(
"inconsistent_semicolon_usage".to_string(),
MetricCategory::Consistency,
10.0,
"Inconsistent semicolon usage detected".to_string(),
);
}
}
let tab_lines = source.lines().filter(|line| line.starts_with('\t')).count();
let space_lines = source.lines().filter(|line| line.starts_with(" ")).count();
if tab_lines > 0 && space_lines > 0 {
score -= 15.0;
tracker.apply(
"mixed_indentation".to_string(),
MetricCategory::Consistency,
15.0,
"Mixed indentation (tabs and spaces) detected".to_string(),
);
}
let single_quotes = source.matches('\'').count();
let double_quotes = source.matches('"').count();
if single_quotes > 0 && double_quotes > 0 {
let ratio = (single_quotes as f32) / (single_quotes + double_quotes) as f32;
if ratio > 0.2 && ratio < 0.8 {
score -= 5.0;
tracker.apply(
"inconsistent_quotes".to_string(),
MetricCategory::Consistency,
5.0,
"Inconsistent quote usage detected".to_string(),
);
}
}
score.max(0.0f32)
}
fn score_entropy_analysis(
&self,
source: &str,
_language: Language,
tracker: &mut PenaltyTracker,
) -> f32 {
let _analyzer = EntropyAnalyzer::new();
use std::io::Write;
let temp_dir = std::env::temp_dir().join(format!("tdg_entropy_{}", std::process::id()));
let temp_file = temp_dir.join("temp_file.rs");
let score = if std::fs::create_dir_all(&temp_dir).is_ok() {
if let Ok(mut file) = std::fs::File::create(&temp_file) {
if file.write_all(source.as_bytes()).is_ok() {
let lines = source.lines().collect::<Vec<_>>();
let mut pattern_score = 100.0;
let mut line_counts = std::collections::HashMap::new();
for line in &lines {
let trimmed = line.trim();
if !trimmed.is_empty() && !trimmed.starts_with("//") {
*line_counts.entry(trimmed).or_insert(0) += 1;
}
}
let duplicate_lines = line_counts.values().filter(|&&count| count > 1).count();
if duplicate_lines > 0 {
let penalty = (duplicate_lines as f32 * 5.0).min(30.0);
pattern_score -= penalty;
tracker.apply(
"duplicate_code_patterns".to_string(),
MetricCategory::Duplication,
penalty,
format!("Found {duplicate_lines} duplicate code patterns"),
);
}
pattern_score
} else {
15.0 }
} else {
15.0 }
} else {
15.0 };
let _ = std::fs::remove_dir_all(&temp_dir);
score
}
#[cfg(any(feature = "c-ast", feature = "cpp-ast"))]
fn calculate_cognitive_complexity(&self, node: &tree_sitter::Node) -> u32 {
let mut cognitive_score = 0u32;
fn traverse_cognitive(node: tree_sitter::Node, nesting_level: u32, score: &mut u32) {
match node.kind() {
"if_statement" | "while_statement" | "for_statement" | "do_statement" => {
*score += 1 + nesting_level;
}
"switch_statement" | "case_label" => {
*score += 1;
}
"try_statement" | "catch_clause" => {
*score += 1;
}
"logical_and" | "logical_or" => {
*score += 1;
}
"conditional_expression" => {
*score += 1;
}
_ => {}
}
let new_nesting = if matches!(
node.kind(),
"if_statement" | "while_statement" | "for_statement" | "switch_statement"
) {
nesting_level + 1
} else {
nesting_level
};
for child in node.children(&mut node.walk()) {
traverse_cognitive(child, new_nesting, score);
}
}
traverse_cognitive(*node, 0, &mut cognitive_score);
cognitive_score
}
#[cfg(not(any(feature = "c-ast", feature = "cpp-ast")))]
fn calculate_cognitive_complexity(&self, _node: &str) -> u32 {
5 }
#[cfg(any(feature = "c-ast", feature = "cpp-ast"))]
fn calculate_max_nesting(&self, node: &tree_sitter::Node) -> usize {
let mut max_depth = 0;
let _current_depth = 0;
fn traverse(node: tree_sitter::Node, depth: usize, max: &mut usize) {
*max = (*max).max(depth);
for child in node.children(&mut node.walk()) {
let new_depth = if matches!(
child.kind(),
"if_statement" | "while_statement" | "for_statement" | "compound_statement"
) {
depth + 1
} else {
depth
};
traverse(child, new_depth, max);
}
}
traverse(*node, 0, &mut max_depth);
max_depth
}
#[cfg(not(any(feature = "c-ast", feature = "cpp-ast")))]
fn calculate_max_nesting(&self, _node: &str) -> usize {
5 }
#[cfg(any(feature = "c-ast", feature = "cpp-ast"))]
fn calculate_max_function_length(&self, node: &tree_sitter::Node, source: &str) -> usize {
let mut max_length = 0;
fn find_functions(node: tree_sitter::Node, source: &str, max: &mut usize) {
if node.kind() == "function_definition" {
let start_line = node.start_position().row;
let end_line = node.end_position().row;
let length = end_line - start_line + 1;
*max = (*max).max(length);
}
for child in node.children(&mut node.walk()) {
find_functions(child, source, max);
}
}
find_functions(*node, source, &mut max_length);
max_length
}
#[cfg(not(any(feature = "c-ast", feature = "cpp-ast")))]
fn calculate_max_function_length(&self, _source: &str) -> usize {
20 }
pub async fn analyze_project(&self, dir: &Path) -> Result<ProjectScore> {
let files = self.discover_files(dir)?;
let mut scores = Vec::new();
for file in files {
match self.analyze_file(&file).await {
Ok(score) => scores.push(score),
Err(e) => eprintln!("Warning: Failed to analyze {}: {}", file.display(), e),
}
}
Ok(ProjectScore::aggregate(scores))
}
pub async fn compare(&self, path1: &Path, path2: &Path) -> Result<crate::tdg::Comparison> {
let score1 = if path1.is_dir() {
self.analyze_project(path1).await?.average()
} else {
self.analyze_file(path1).await?
};
let score2 = if path2.is_dir() {
self.analyze_project(path2).await?.average()
} else {
self.analyze_file(path2).await?
};
Ok(crate::tdg::Comparison::new(score1, score2))
}
fn discover_files(&self, dir: &Path) -> Result<Vec<PathBuf>> {
let mut files = Vec::new();
self.discover_files_recursive(dir, &mut files)?;
Ok(files)
}
fn discover_files_recursive(&self, dir: &Path, files: &mut Vec<PathBuf>) -> Result<()> {
if !dir.is_dir() {
return Ok(());
}
for entry in fs::read_dir(dir)? {
let entry = entry?;
let path = entry.path();
if path.is_dir() {
if !self.should_skip_directory(&path) {
self.discover_files_recursive(&path, files)?;
}
} else if self.should_analyze_file(&path) {
files.push(path);
}
}
Ok(())
}
fn should_skip_directory(&self, path: &Path) -> bool {
if let Some(name) = path.file_name().and_then(|n| n.to_str()) {
matches!(
name,
"node_modules"
| "target"
| "build"
| "dist"
| ".git"
| "__pycache__"
| ".pytest_cache"
| "venv"
| ".venv"
| "vendor"
| ".idea"
| ".vscode"
)
} else {
false
}
}
fn should_analyze_file(&self, path: &Path) -> bool {
if let Some(ext) = path.extension().and_then(|e| e.to_str()) {
matches!(
ext,
"rs" | "py"
| "js"
| "ts"
| "jsx"
| "tsx"
| "go"
| "java"
| "c"
| "h"
| "cpp"
| "cc"
| "cxx"
| "hpp"
| "rb"
| "swift"
| "kt"
| "kts"
)
} else {
false
}
}
}
#[cfg(feature = "rust-ast")]
struct RustComplexityVisitor {
cyclomatic_complexity: u32,
cognitive_complexity: u32,
max_nesting_depth: usize,
max_method_length: usize,
max_params: usize,
generic_count: u32,
abstraction_levels: u32,
import_count: u32,
external_calls: u32,
interface_implementations: u32,
documented_items: u32,
total_public_items: u32,
comment_lines: u32,
total_lines: u32,
current_depth: usize,
}
#[cfg(feature = "rust-ast")]
impl RustComplexityVisitor {
fn new() -> Self {
Self {
cyclomatic_complexity: 1,
cognitive_complexity: 0,
max_nesting_depth: 0,
max_method_length: 0,
max_params: 0,
generic_count: 0,
abstraction_levels: 0,
import_count: 0,
external_calls: 0,
interface_implementations: 0,
documented_items: 0,
total_public_items: 0,
comment_lines: 0,
total_lines: 0,
current_depth: 0,
}
}
}
#[cfg(feature = "rust-ast")]
impl<'ast> syn::visit::Visit<'ast> for RustComplexityVisitor {
fn visit_expr_if(&mut self, node: &'ast syn::ExprIf) {
self.cyclomatic_complexity += 1;
self.cognitive_complexity += 1 + self.current_depth as u32;
self.current_depth += 1;
self.max_nesting_depth = self.max_nesting_depth.max(self.current_depth);
syn::visit::visit_expr_if(self, node);
self.current_depth -= 1;
}
fn visit_expr_while(&mut self, node: &'ast syn::ExprWhile) {
self.cyclomatic_complexity += 1;
self.cognitive_complexity += 1 + self.current_depth as u32;
self.current_depth += 1;
self.max_nesting_depth = self.max_nesting_depth.max(self.current_depth);
syn::visit::visit_expr_while(self, node);
self.current_depth -= 1;
}
fn visit_expr_for_loop(&mut self, node: &'ast syn::ExprForLoop) {
self.cyclomatic_complexity += 1;
self.cognitive_complexity += 1 + self.current_depth as u32;
self.current_depth += 1;
self.max_nesting_depth = self.max_nesting_depth.max(self.current_depth);
syn::visit::visit_expr_for_loop(self, node);
self.current_depth -= 1;
}
fn visit_expr_match(&mut self, node: &'ast syn::ExprMatch) {
self.cyclomatic_complexity += node.arms.len() as u32;
self.cognitive_complexity += 1 + self.current_depth as u32;
self.current_depth += 1;
self.max_nesting_depth = self.max_nesting_depth.max(self.current_depth);
syn::visit::visit_expr_match(self, node);
self.current_depth -= 1;
}
fn visit_item_fn(&mut self, node: &'ast syn::ItemFn) {
let param_count = node.sig.inputs.len();
self.max_params = self.max_params.max(param_count);
self.generic_count += node.sig.generics.params.len() as u32;
if !node.attrs.is_empty() {
for attr in &node.attrs {
if attr.path().is_ident("doc") {
self.documented_items += 1;
break;
}
}
}
if matches!(node.vis, syn::Visibility::Public(_)) {
self.total_public_items += 1;
}
syn::visit::visit_item_fn(self, node);
}
fn visit_use_tree(&mut self, _node: &'ast syn::UseTree) {
self.import_count += 1;
}
fn visit_item_impl(&mut self, node: &'ast syn::ItemImpl) {
if node.trait_.is_some() {
self.interface_implementations += 1;
}
syn::visit::visit_item_impl(self, node);
}
}
#[cfg(feature = "python-ast")]
struct PythonComplexityVisitor {
cyclomatic_complexity: u32,
cognitive_complexity: u32,
max_nesting_depth: usize,
max_method_length: usize,
max_params: usize,
decorator_count: u32,
metaclass_count: u32,
import_count: u32,
external_calls: u32,
documented_functions: u32,
total_functions: u32,
docstring_lines: u32,
total_lines: u32,
}
#[cfg(feature = "python-ast")]
impl PythonComplexityVisitor {
fn new() -> Self {
Self {
cyclomatic_complexity: 1,
cognitive_complexity: 0,
max_nesting_depth: 0,
max_method_length: 0,
max_params: 0,
decorator_count: 0,
metaclass_count: 0,
import_count: 0,
external_calls: 0,
documented_functions: 0,
total_functions: 0,
docstring_lines: 0,
total_lines: 0,
}
}
fn analyze_module(&mut self, module: rustpython_parser::ast::Mod) {
if let rustpython_parser::ast::Mod::Module(_) = module {
self.cyclomatic_complexity += 5; self.max_nesting_depth = 2; } else {
}
}
#[allow(dead_code)]
fn analyze_python_statement(&mut self, _stmt: &rustpython_parser::ast::Stmt) {
self.cyclomatic_complexity += 1;
}
}
#[cfg(feature = "typescript-ast")]
struct JavaScriptComplexityVisitor {
cyclomatic_complexity: u32,
cognitive_complexity: u32,
max_nesting_depth: usize,
max_function_length: usize,
max_params: usize,
async_count: u32,
callback_depth: u32,
import_count: u32,
external_calls: u32,
class_count: u32,
jsdoc_count: u32,
function_count: u32,
comment_lines: u32,
total_lines: u32,
}
#[cfg(feature = "typescript-ast")]
impl JavaScriptComplexityVisitor {
fn new() -> Self {
Self {
cyclomatic_complexity: 1,
cognitive_complexity: 0,
max_nesting_depth: 0,
max_function_length: 0,
max_params: 0,
async_count: 0,
callback_depth: 0,
import_count: 0,
external_calls: 0,
class_count: 0,
jsdoc_count: 0,
function_count: 0,
comment_lines: 0,
total_lines: 0,
}
}
}
#[cfg(feature = "typescript-ast")]
impl swc_ecma_visit::Visit for JavaScriptComplexityVisitor {
fn visit_if_stmt(&mut self, _node: &swc_ecma_ast::IfStmt) {
self.cyclomatic_complexity += 1;
self.cognitive_complexity += 1;
}
fn visit_while_stmt(&mut self, _node: &swc_ecma_ast::WhileStmt) {
self.cyclomatic_complexity += 1;
self.cognitive_complexity += 1;
}
fn visit_for_stmt(&mut self, _node: &swc_ecma_ast::ForStmt) {
self.cyclomatic_complexity += 1;
self.cognitive_complexity += 1;
}
fn visit_switch_stmt(&mut self, node: &swc_ecma_ast::SwitchStmt) {
self.cyclomatic_complexity += node.cases.len() as u32;
self.cognitive_complexity += 1;
}
fn visit_function(&mut self, node: &swc_ecma_ast::Function) {
self.function_count += 1;
self.max_params = self.max_params.max(node.params.len());
if node.is_async {
self.async_count += 1;
}
}
fn visit_import_decl(&mut self, _node: &swc_ecma_ast::ImportDecl) {
self.import_count += 1;
}
fn visit_class_decl(&mut self, _node: &swc_ecma_ast::ClassDecl) {
self.class_count += 1;
}
}
impl TdgAnalyzerAst {
#[cfg(feature = "ruchy-ast")]
fn calculate_ruchy_semantic_complexity(&self, source: &str) -> f32 {
let mut complexity_score = self.config.weights.semantic_complexity;
let actor_count = source.matches("actor ").count();
let receive_count = source.matches("receive ").count();
let pipeline_count = source.matches("|>").count();
let match_count = source.matches(" match ").count();
let pattern_match_count = source.matches(" => ").count();
complexity_score += (actor_count * 2) as f32;
complexity_score += receive_count as f32 * 1.5;
complexity_score += pipeline_count as f32 * 0.5;
complexity_score += match_count as f32 * 1.2;
complexity_score += pattern_match_count as f32 * 0.3;
complexity_score.min(self.config.weights.semantic_complexity)
}
#[cfg(feature = "ruchy-ast")]
fn count_ruchy_imports(&self, source: &str) -> u32 {
source.matches("import ").count() as u32
+ source.matches("use ").count() as u32
+ source.matches("extern ").count() as u32
}
#[cfg(feature = "ruchy-ast")]
fn count_ruchy_dependencies(&self, source: &str) -> u32 {
source.matches(" <- ").count() as u32 + source.matches(" <? ").count() as u32 + source.matches("spawn ").count() as u32 }
#[cfg(feature = "ruchy-ast")]
fn calculate_ruchy_doc_coverage(&self, source: &str) -> f32 {
let line_count = source.lines().count() as f32;
if line_count == 0.0 {
return self.config.weights.documentation;
}
let doc_comments = source.matches("///").count() as f32
+ source.matches("/**").count() as f32
+ source.matches("#[doc").count() as f32;
let coverage_ratio = (doc_comments / line_count * 20.0).min(1.0);
coverage_ratio * self.config.weights.documentation
}
#[cfg(feature = "ruchy-ast")]
fn calculate_ruchy_consistency(&self, source: &str) -> f32 {
let mut consistency_score = self.config.weights.consistency;
let snake_case_functions = regex::Regex::new(r"fun [a-z][a-z0-9_]*\(")
.unwrap()
.find_iter(source)
.count();
let pascal_case_types = regex::Regex::new(r"(struct|enum|actor) [A-Z][A-Za-z0-9]*")
.unwrap()
.find_iter(source)
.count();
let snake_case_vars = regex::Regex::new(r"let [a-z][a-z0-9_]* =")
.unwrap()
.find_iter(source)
.count();
let total_identifiers = snake_case_functions + pascal_case_types + snake_case_vars;
if total_identifiers > 0 {
let fun_upper_regex = regex::Regex::new(r"fun [A-Z]").unwrap();
let struct_lower_regex = regex::Regex::new(r"struct [a-z]").unwrap();
let let_upper_regex = regex::Regex::new(r"let [A-Z]").unwrap();
let inconsistent_count = fun_upper_regex.find_iter(source).count()
+ struct_lower_regex.find_iter(source).count()
+ let_upper_regex.find_iter(source).count();
if inconsistent_count > 0 {
let consistency_ratio =
1.0 - (inconsistent_count as f32 / total_identifiers as f32);
consistency_score *= consistency_ratio;
}
}
consistency_score
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_ast_analyzer_creation() {
let analyzer = TdgAnalyzerAst::new().unwrap();
assert!(analyzer.config.weights.structural_complexity > 0.0);
}
#[test]
fn test_language_detection() {
use crate::tdg::Language;
assert_eq!(
Language::from_extension(Path::new("test.rs")),
Language::Rust
);
assert_eq!(
Language::from_extension(Path::new("test.py")),
Language::Python
);
assert_eq!(
Language::from_extension(Path::new("test.js")),
Language::JavaScript
);
}
}
#[cfg(test)]
mod property_tests {
use proptest::prelude::*;
proptest! {
#[test]
fn basic_property_stability(_input in ".*") {
prop_assert!(true);
}
#[test]
fn module_consistency_check(_x in 0u32..1000) {
prop_assert!(_x < 1001);
}
}
}