use crate::cli::defect_helpers::discover_files_for_defect_analysis;
use crate::cli::defect_prediction_helpers::{collect_file_metrics, DefectPredictionConfig};
use crate::cli::DefectPredictionOutputFormat;
use crate::services::defect_probability::{DefectProbabilityCalculator, DefectScore};
use anyhow::Result;
use std::path::{Path, PathBuf};
use std::time::Instant;
#[allow(clippy::too_many_arguments)]
pub async fn handle_analyze_defect_prediction(
project_path: PathBuf,
confidence_threshold: f32,
min_lines: usize,
include_low_confidence: bool,
format: DefectPredictionOutputFormat,
high_risk_only: bool,
include_recommendations: bool,
include: Option<String>,
exclude: Option<String>,
output: Option<PathBuf>,
perf: bool,
top_files: usize,
) -> Result<()> {
let start_time = Instant::now();
print_analysis_header(&project_path, confidence_threshold, high_risk_only);
let config = create_defect_prediction_config(
confidence_threshold,
min_lines,
include_low_confidence,
high_risk_only,
include_recommendations,
include,
exclude,
);
let files = discover_and_validate_files(&project_path, &config).await?;
let predictions = calculate_defect_predictions(&files)?;
let filtered_predictions = filter_and_sort_predictions(
predictions,
high_risk_only,
include_low_confidence,
confidence_threshold,
top_files,
);
let elapsed = start_time.elapsed();
let content = format_defect_output(
format,
&filtered_predictions,
elapsed,
include_recommendations,
)?;
output_results(content, output, perf, elapsed).await?;
Ok(())
}
fn print_analysis_header(project_path: &Path, confidence_threshold: f32, high_risk_only: bool) {
eprintln!("🔮 Analyzing defect probability using ML-based analysis...");
eprintln!("📁 Project path: {}", project_path.display());
eprintln!("🎯 Confidence threshold: {confidence_threshold}");
eprintln!("📊 High risk only: {high_risk_only}");
}
fn create_defect_prediction_config(
confidence_threshold: f32,
min_lines: usize,
include_low_confidence: bool,
high_risk_only: bool,
include_recommendations: bool,
include: Option<String>,
exclude: Option<String>,
) -> DefectPredictionConfig {
DefectPredictionConfig {
confidence_threshold,
min_lines,
include_low_confidence,
high_risk_only,
include_recommendations,
include,
exclude,
}
}
async fn discover_and_validate_files(
project_path: &Path,
config: &DefectPredictionConfig,
) -> Result<Vec<(std::path::PathBuf, String, usize)>> {
let files = discover_files_for_defect_analysis(project_path, config).await?;
eprintln!("📂 Found {} files matching criteria", files.len());
if files.is_empty() {
eprintln!("⚠️ No files found matching the criteria");
return Err(anyhow::anyhow!("No files found matching criteria"));
}
Ok(files)
}
fn calculate_defect_predictions(
files: &[(std::path::PathBuf, String, usize)],
) -> Result<Vec<(String, DefectScore)>> {
let file_metrics = collect_file_metrics(files);
let calculator = DefectProbabilityCalculator::new();
Ok(file_metrics
.into_iter()
.map(|metrics| {
let score = calculator.calculate(&metrics);
(metrics.file_path, score)
})
.collect())
}
fn filter_and_sort_predictions(
mut predictions: Vec<(String, DefectScore)>,
high_risk_only: bool,
include_low_confidence: bool,
confidence_threshold: f32,
top_files: usize,
) -> Vec<(String, DefectScore)> {
if high_risk_only {
predictions.retain(|(_, score)| score.probability > 0.7);
}
if !include_low_confidence {
predictions.retain(|(_, score)| score.confidence > confidence_threshold);
}
predictions.sort_by(|a, b| b.1.probability.partial_cmp(&a.1.probability).unwrap());
if top_files > 0 && predictions.len() > top_files {
predictions.truncate(top_files);
}
predictions
}
fn format_defect_output(
format: DefectPredictionOutputFormat,
predictions: &[(String, DefectScore)],
elapsed: std::time::Duration,
include_recommendations: bool,
) -> Result<String> {
match format {
DefectPredictionOutputFormat::Summary => format_defect_summary(predictions, elapsed),
DefectPredictionOutputFormat::Json => format_defect_json(predictions, elapsed),
DefectPredictionOutputFormat::Detailed => {
format_defect_detailed(predictions, elapsed, include_recommendations)
}
DefectPredictionOutputFormat::Sarif => format_defect_sarif(predictions),
DefectPredictionOutputFormat::Csv => format_defect_csv(predictions),
}
}
async fn output_results(
content: String,
output: Option<PathBuf>,
perf: bool,
elapsed: std::time::Duration,
) -> Result<()> {
if perf {
eprintln!("⏱️ Analysis completed in {elapsed:.2?}");
}
eprintln!("✅ Defect prediction complete");
if let Some(output_path) = output {
tokio::fs::write(&output_path, &content).await?;
eprintln!("📝 Written to {}", output_path.display());
} else {
println!("{content}");
}
Ok(())
}
fn format_defect_summary(
predictions: &[(String, DefectScore)],
elapsed: std::time::Duration,
) -> Result<String> {
let mut output = String::new();
write_summary_header(&mut output)?;
write_risk_distribution(&mut output, predictions)?;
write_top_risk_files(&mut output, predictions)?;
write_summary_footer(&mut output, elapsed)?;
Ok(output)
}
fn write_summary_header(output: &mut String) -> Result<()> {
use std::fmt::Write;
writeln!(output, "🔮 Defect Prediction Summary")?;
writeln!(output, "==========================")?;
writeln!(output)?;
Ok(())
}
fn write_risk_distribution(
output: &mut String,
predictions: &[(String, DefectScore)],
) -> Result<()> {
use std::fmt::Write;
let risk_stats = calculate_risk_statistics(predictions);
writeln!(output, "📊 Risk Distribution:")?;
writeln!(output, " 🔴 High risk: {} files", risk_stats.high_risk)?;
writeln!(output, " 🟡 Medium risk: {} files", risk_stats.medium_risk)?;
writeln!(output, " 🟢 Low risk: {} files", risk_stats.low_risk)?;
writeln!(output)?;
Ok(())
}
struct RiskStatistics {
high_risk: usize,
medium_risk: usize,
low_risk: usize,
}
fn calculate_risk_statistics(predictions: &[(String, DefectScore)]) -> RiskStatistics {
let high_risk = predictions
.iter()
.filter(|(_, s)| s.probability > 0.7)
.count();
let medium_risk = predictions
.iter()
.filter(|(_, s)| s.probability > 0.3 && s.probability <= 0.7)
.count();
let low_risk = predictions
.iter()
.filter(|(_, s)| s.probability <= 0.3)
.count();
RiskStatistics {
high_risk,
medium_risk,
low_risk,
}
}
fn write_top_risk_files(output: &mut String, predictions: &[(String, DefectScore)]) -> Result<()> {
use std::fmt::Write;
if !predictions.is_empty() {
writeln!(output, "🎯 Top Risk Files:")?;
for (file, score) in predictions.iter().take(10) {
let risk_icon = get_risk_icon(&score.risk_level);
writeln!(
output,
" {} {:.1}% - {} (confidence: {:.1}%)",
risk_icon,
score.probability * 100.0,
file,
score.confidence * 100.0
)?;
}
}
Ok(())
}
fn get_risk_icon(risk_level: &crate::services::defect_probability::RiskLevel) -> &'static str {
match risk_level {
crate::services::defect_probability::RiskLevel::High => "🔴",
crate::services::defect_probability::RiskLevel::Medium => "🟡",
crate::services::defect_probability::RiskLevel::Low => "🟢",
}
}
fn write_summary_footer(output: &mut String, elapsed: std::time::Duration) -> Result<()> {
use std::fmt::Write;
writeln!(output)?;
writeln!(output, "⏱️ Analysis time: {elapsed:.2?}")?;
Ok(())
}
fn format_defect_json(
predictions: &[(String, DefectScore)],
elapsed: std::time::Duration,
) -> Result<String> {
let report = serde_json::json!({
"analysis_type": "defect_prediction",
"summary": {
"total_files_analyzed": predictions.len(),
"high_risk_files": predictions.iter().filter(|(_, s)| s.probability > 0.7).count(),
"medium_risk_files": predictions.iter().filter(|(_, s)| s.probability > 0.3 && s.probability <= 0.7).count(),
"low_risk_files": predictions.iter().filter(|(_, s)| s.probability <= 0.3).count(),
"analysis_time_ms": elapsed.as_millis(),
},
"predictions": predictions.iter().map(|(file, score)| {
serde_json::json!({
"file": file,
"probability": score.probability,
"confidence": score.confidence,
"risk_level": format!("{:?}", score.risk_level),
"contributing_factors": score.contributing_factors,
"recommendations": score.recommendations,
})
}).collect::<Vec<_>>(),
});
Ok(serde_json::to_string_pretty(&report)?)
}
fn format_defect_detailed(
predictions: &[(String, DefectScore)],
elapsed: std::time::Duration,
include_recommendations: bool,
) -> Result<String> {
let mut output = String::new();
write_detailed_header(&mut output)?;
for (file, score) in predictions {
write_file_details(&mut output, file, score, include_recommendations)?;
}
write_analysis_footer(&mut output, elapsed)?;
Ok(output)
}
fn write_detailed_header(output: &mut String) -> Result<()> {
use std::fmt::Write;
writeln!(output, "🔮 Defect Prediction Detailed Report")?;
writeln!(output, "===================================")?;
writeln!(output)?;
Ok(())
}
fn write_file_details(
output: &mut String,
file: &str,
score: &DefectScore,
include_recommendations: bool,
) -> Result<()> {
use std::fmt::Write;
writeln!(output, "📄 File: {file}")?;
write_risk_level(output, score)?;
write_confidence_level(output, score)?;
write_contributing_factors(output, score)?;
if include_recommendations {
write_recommendations(output, score)?;
}
writeln!(output)?;
Ok(())
}
fn write_risk_level(output: &mut String, score: &DefectScore) -> Result<()> {
use std::fmt::Write;
let risk_display = format_risk_level_display(&score.risk_level);
writeln!(
output,
" Risk Level: {} ({:.1}%)",
risk_display,
score.probability * 100.0
)?;
Ok(())
}
fn format_risk_level_display(
risk_level: &crate::services::defect_probability::RiskLevel,
) -> &'static str {
match risk_level {
crate::services::defect_probability::RiskLevel::High => "🔴 HIGH",
crate::services::defect_probability::RiskLevel::Medium => "🟡 MEDIUM",
crate::services::defect_probability::RiskLevel::Low => "🟢 LOW",
}
}
fn write_confidence_level(output: &mut String, score: &DefectScore) -> Result<()> {
use std::fmt::Write;
writeln!(output, " Confidence: {:.1}%", score.confidence * 100.0)?;
Ok(())
}
fn write_contributing_factors(output: &mut String, score: &DefectScore) -> Result<()> {
use std::fmt::Write;
if score.contributing_factors.is_empty() {
return Ok(());
}
writeln!(output, " Contributing Factors:")?;
for (factor, weight) in &score.contributing_factors {
writeln!(output, " - {}: {:.1}%", factor, weight * 100.0)?;
}
Ok(())
}
fn write_recommendations(output: &mut String, score: &DefectScore) -> Result<()> {
use std::fmt::Write;
if score.recommendations.is_empty() {
return Ok(());
}
writeln!(output, " Recommendations:")?;
for rec in &score.recommendations {
writeln!(output, " • {rec}")?;
}
Ok(())
}
fn write_analysis_footer(output: &mut String, elapsed: std::time::Duration) -> Result<()> {
use std::fmt::Write;
writeln!(output, "⏱️ Analysis time: {elapsed:.2?}")?;
Ok(())
}
fn format_defect_sarif(predictions: &[(String, DefectScore)]) -> Result<String> {
let sarif = serde_json::json!({
"$schema": "https://raw.githubusercontent.com/oasis-tcs/sarif-spec/master/Schemata/sarif-schema-2.1.0.json",
"version": "2.1.0",
"runs": [{
"tool": {
"driver": {
"name": "pmat-defect-prediction",
"informationUri": "https://github.com/paiml/paiml-mcp-agent-toolkit",
"version": env!("CARGO_PKG_VERSION"),
"rules": [{
"id": "DEFECT-RISK",
"name": "DefectRisk",
"shortDescription": {
"text": "ML-based defect probability prediction"
},
"fullDescription": {
"text": "Predicts defect probability using ensemble ML model based on churn, complexity, duplication, and coupling metrics"
},
"help": {
"text": "Files with high defect probability should be reviewed carefully and refactored if necessary"
}
}]
}
},
"results": predictions.iter().map(|(file, score)| {
serde_json::json!({
"ruleId": "DEFECT-RISK",
"level": match score.risk_level {
crate::services::defect_probability::RiskLevel::High => "error",
crate::services::defect_probability::RiskLevel::Medium => "warning",
crate::services::defect_probability::RiskLevel::Low => "note",
},
"message": {
"text": format!("Defect probability: {:.1}% (confidence: {:.1}%)",
score.probability * 100.0, score.confidence * 100.0)
},
"locations": [{
"physicalLocation": {
"artifactLocation": {
"uri": file,
"uriBaseId": "%SRCROOT%"
}
}
}],
"properties": {
"probability": score.probability,
"confidence": score.confidence,
"contributing_factors": score.contributing_factors,
"recommendations": score.recommendations
}
})
}).collect::<Vec<_>>()
}]
});
Ok(serde_json::to_string_pretty(&sarif)?)
}
fn format_defect_csv(predictions: &[(String, DefectScore)]) -> Result<String> {
let mut csv = String::new();
csv.push_str("file,probability,confidence,risk_level,top_factor,top_factor_weight\n");
for (file, score) in predictions {
let (top_factor, top_weight) = score
.contributing_factors
.first()
.map_or(("", 0.0), |(f, w)| (f.as_str(), *w));
csv.push_str(&format!(
"{},{:.3},{:.3},{:?},{},{:.3}\n",
file, score.probability, score.confidence, score.risk_level, top_factor, top_weight
));
}
Ok(csv)
}
#[cfg(test)]
mod property_tests {
use proptest::prelude::*;
proptest! {
#[test]
fn basic_property_stability(_input in ".*") {
prop_assert!(true);
}
#[test]
fn module_consistency_check(_x in 0u32..1000) {
prop_assert!(_x < 1001);
}
}
}