use crate::cli::defect_prediction_helpers::{
calculate_simple_churn_score, calculate_simple_complexity, DefectPredictionConfig,
};
use crate::services::defect_probability::{DefectProbabilityCalculator, DefectScore, FileMetrics};
use anyhow::Result;
use std::fmt::Write;
use std::path::{Path, PathBuf};
pub async fn discover_files_for_defect_analysis(
project_path: &Path,
config: &DefectPredictionConfig,
) -> Result<Vec<(PathBuf, String, usize)>> {
use crate::cli::defect_prediction_helpers::discover_source_files_for_defect_analysis;
discover_source_files_for_defect_analysis(project_path, config).await
}
pub async fn analyze_defect_probability(
files: &[(PathBuf, String, usize)],
config: &DefectPredictionConfig,
) -> Result<Vec<(String, DefectScore)>> {
eprintln!("📊 Analyzing {} files...", files.len());
let calculator = DefectProbabilityCalculator::new();
let mut predictions = Vec::new();
for (path, content, line_count) in files {
let metrics = FileMetrics {
file_path: path.to_string_lossy().to_string(),
complexity: calculate_simple_complexity(content) as f32,
churn_score: calculate_simple_churn_score(content, *line_count),
duplicate_ratio: 0.0, afferent_coupling: 0.0, efferent_coupling: 0.0, lines_of_code: *line_count,
cyclomatic_complexity: 10, cognitive_complexity: 10, };
let score = calculator.calculate(&metrics);
predictions.push((path.to_string_lossy().to_string(), score));
}
if config.high_risk_only {
predictions.retain(|(_, score)| score.probability > 0.7);
}
if !config.include_low_confidence {
predictions.retain(|(_, score)| score.confidence > config.confidence_threshold);
}
predictions.sort_by(|a, b| b.1.probability.partial_cmp(&a.1.probability).unwrap());
Ok(predictions)
}
pub fn format_defect_json(predictions: &[(String, DefectScore)]) -> Result<String> {
let json_data = serde_json::json!({
"defect_predictions": predictions.iter().map(|(file, score)| {
serde_json::json!({
"file": file,
"probability": score.probability,
"confidence": score.confidence,
"risk_factors": score.contributing_factors,
})
}).collect::<Vec<_>>(),
"summary": {
"total_files": predictions.len(),
"high_risk_files": predictions.iter().filter(|(_, s)| s.probability > 0.7).count(),
"medium_risk_files": predictions.iter().filter(|(_, s)| s.probability > 0.4 && s.probability <= 0.7).count(),
"low_risk_files": predictions.iter().filter(|(_, s)| s.probability <= 0.4).count(),
}
});
serde_json::to_string_pretty(&json_data).map_err(Into::into)
}
pub fn format_defect_summary(predictions: &[(String, DefectScore)]) -> Result<String> {
let mut output = String::new();
writeln!(&mut output, "# Defect Prediction Summary\n")?;
writeln!(
&mut output,
"**Total files analyzed**: {}",
predictions.len()
)?;
let high_risk = predictions
.iter()
.filter(|(_, s)| s.probability > 0.7)
.count();
let medium_risk = predictions
.iter()
.filter(|(_, s)| s.probability > 0.4 && s.probability <= 0.7)
.count();
let low_risk = predictions
.iter()
.filter(|(_, s)| s.probability <= 0.4)
.count();
writeln!(&mut output, "\n## Risk Distribution:")?;
writeln!(&mut output, "- 🔴 High Risk (>70%): {high_risk} files")?;
writeln!(
&mut output,
"- 🟡 Medium Risk (40-70%): {medium_risk} files"
)?;
writeln!(&mut output, "- 🟢 Low Risk (<40%): {low_risk} files")?;
if !predictions.is_empty() {
writeln!(&mut output, "\n## Top 10 High-Risk Files:")?;
for (i, (file, score)) in predictions.iter().take(10).enumerate() {
writeln!(
&mut output,
"{}. {} - {:.1}% probability",
i + 1,
file,
score.probability * 100.0
)?;
}
}
Ok(output)
}
pub fn format_defect_markdown(
predictions: &[(String, DefectScore)],
include_recommendations: bool,
) -> Result<String> {
let mut output = String::new();
writeln!(&mut output, "# Defect Prediction Report\n")?;
write_summary_section(&mut output, predictions)?;
write_risk_distribution_table(&mut output, predictions)?;
write_detailed_predictions(&mut output, predictions, include_recommendations)?;
Ok(output)
}
fn write_summary_section(output: &mut String, predictions: &[(String, DefectScore)]) -> Result<()> {
writeln!(output, "## Summary\n")?;
writeln!(output, "**Total files analyzed**: {}", predictions.len())?;
Ok(())
}
fn write_risk_distribution_table(
output: &mut String,
predictions: &[(String, DefectScore)],
) -> Result<()> {
let (high_risk, medium_risk, low_risk) = calculate_risk_counts(predictions);
let total = predictions.len() as f64;
writeln!(output, "\n### Risk Distribution")?;
writeln!(output, "| Risk Level | Count | Percentage |")?;
writeln!(output, "|------------|-------|------------|")?;
write_risk_row(output, "High (>70%)", high_risk, total)?;
write_risk_row(output, "Medium (40-70%)", medium_risk, total)?;
write_risk_row(output, "Low (<40%)", low_risk, total)?;
Ok(())
}
fn calculate_risk_counts(predictions: &[(String, DefectScore)]) -> (usize, usize, usize) {
let high_risk = predictions
.iter()
.filter(|(_, s)| s.probability > 0.7)
.count();
let medium_risk = predictions
.iter()
.filter(|(_, s)| s.probability > 0.4 && s.probability <= 0.7)
.count();
let low_risk = predictions
.iter()
.filter(|(_, s)| s.probability <= 0.4)
.count();
(high_risk, medium_risk, low_risk)
}
fn write_risk_row(output: &mut String, label: &str, count: usize, total: f64) -> Result<()> {
writeln!(
output,
"| {} | {} | {:.1}% |",
label,
count,
(count as f64 / total) * 100.0
)?;
Ok(())
}
fn write_detailed_predictions(
output: &mut String,
predictions: &[(String, DefectScore)],
include_recommendations: bool,
) -> Result<()> {
writeln!(output, "\n## Detailed Predictions\n")?;
for (file, score) in predictions.iter().take(20) {
write_single_prediction(output, file, score, include_recommendations)?;
}
Ok(())
}
fn write_single_prediction(
output: &mut String,
file: &str,
score: &DefectScore,
include_recommendations: bool,
) -> Result<()> {
writeln!(output, "### {file}\n")?;
write_prediction_metrics(output, score)?;
if include_recommendations {
write_recommendations(output, f64::from(score.probability))?;
}
writeln!(output)?;
Ok(())
}
fn write_prediction_metrics(output: &mut String, score: &DefectScore) -> Result<()> {
writeln!(
output,
"- **Probability**: {:.1}%",
f64::from(score.probability) * 100.0
)?;
writeln!(
output,
"- **Confidence**: {:.1}%",
f64::from(score.confidence) * 100.0
)?;
writeln!(
output,
"- **Risk Factors**: {:?}",
score.contributing_factors
)?;
Ok(())
}
fn write_recommendations(output: &mut String, probability: f64) -> Result<()> {
writeln!(output, "\n#### Recommendations:")?;
if probability > 0.7 {
writeln!(output, "- 🔴 High priority for code review")?;
writeln!(output, "- Add comprehensive test coverage")?;
writeln!(output, "- Consider refactoring to reduce complexity")?;
} else if probability > 0.4 {
writeln!(output, "- 🟡 Schedule for regular review")?;
writeln!(output, "- Improve test coverage")?;
} else {
writeln!(output, "- 🟢 Monitor during regular maintenance")?;
}
Ok(())
}
pub fn format_defect_sarif(
predictions: &[(String, DefectScore)],
_project_path: &Path,
) -> Result<String> {
let mut results = Vec::new();
for (file, score) in predictions {
let level = if score.probability > 0.7 {
"error"
} else if score.probability > 0.4 {
"warning"
} else {
"note"
};
let rule_id = if score.probability > 0.7 {
"high-defect-probability"
} else if score.probability > 0.4 {
"medium-defect-probability"
} else {
"low-defect-probability"
};
results.push(serde_json::json!({
"ruleId": rule_id,
"level": level,
"message": {
"text": format!(
"File has {:.1}% defect probability with {:.1}% confidence. Risk factors: {:?}",
score.probability * 100.0,
score.confidence * 100.0,
score.contributing_factors
)
},
"locations": [{
"physicalLocation": {
"artifactLocation": {
"uri": file
}
}
}]
}));
}
let sarif = serde_json::json!({
"version": "2.1.0",
"$schema": "https://raw.githubusercontent.com/oasis-tcs/sarif-spec/master/Schemata/sarif-schema-2.1.0.json",
"runs": [{
"tool": {
"driver": {
"name": "paiml-defect-predictor",
"version": env!("CARGO_PKG_VERSION"),
"informationUri": "https://github.com/paiml/paiml-mcp-agent-toolkit",
"rules": generate_defect_rules(),
}
},
"results": results
}]
});
serde_json::to_string_pretty(&sarif).map_err(Into::into)
}
fn generate_defect_rules() -> Vec<serde_json::Value> {
vec![
serde_json::json!({
"id": "high-defect-probability",
"name": "High Defect Probability",
"shortDescription": {
"text": "File has high probability of containing defects"
},
"fullDescription": {
"text": "Files with >70% defect probability require immediate review"
},
"defaultConfiguration": {
"level": "error"
}
}),
serde_json::json!({
"id": "medium-defect-probability",
"name": "Medium Defect Probability",
"shortDescription": {
"text": "File has medium probability of containing defects"
},
"fullDescription": {
"text": "Files with 40-70% defect probability should be reviewed"
},
"defaultConfiguration": {
"level": "warning"
}
}),
serde_json::json!({
"id": "low-defect-probability",
"name": "Low Defect Probability",
"shortDescription": {
"text": "File has low probability of containing defects"
},
"fullDescription": {
"text": "Files with <40% defect probability are lower risk"
},
"defaultConfiguration": {
"level": "note"
}
}),
]
}
#[cfg(test)]
mod property_tests {
use proptest::prelude::*;
proptest! {
#[test]
fn basic_property_stability(_input in ".*") {
prop_assert!(true);
}
#[test]
fn module_consistency_check(_x in 0u32..1000) {
prop_assert!(_x < 1001);
}
}
}