organizational_intelligence_plugin/
summarizer.rs

1// src/summarizer.rs
2//! Summarization module for converting organizational reports into AI-friendly summaries
3//!
4//! This module provides automated PII stripping and pattern extraction to eliminate
5//! manual waste (Toyota Way: Muda reduction).
6
7use crate::report::{AnalysisReport, DefectPattern};
8use anyhow::Result;
9use serde::{Deserialize, Serialize};
10use std::path::Path;
11
12/// Configuration for summarization behavior
13#[derive(Debug, Clone)]
14pub struct SummaryConfig {
15    /// Strip PII (author names, commit hashes, email addresses)
16    pub strip_pii: bool,
17    /// Show only top N defect categories by frequency
18    pub top_n_categories: usize,
19    /// Filter out categories with frequency below this threshold
20    pub min_frequency: usize,
21    /// Include anonymized examples (with PII removed)
22    pub include_examples: bool,
23}
24
25impl Default for SummaryConfig {
26    fn default() -> Self {
27        Self {
28            strip_pii: true,
29            top_n_categories: 10,
30            min_frequency: 5,
31            include_examples: false,
32        }
33    }
34}
35
36/// Quality thresholds for code assessment
37#[derive(Debug, Clone, Serialize, Deserialize)]
38pub struct QualityThresholds {
39    pub tdg_minimum: f32,
40    pub test_coverage_minimum: f32,
41    pub max_function_length: usize,
42    pub max_cyclomatic_complexity: usize,
43}
44
45impl Default for QualityThresholds {
46    fn default() -> Self {
47        Self {
48            tdg_minimum: 85.0,
49            test_coverage_minimum: 0.85,
50            max_function_length: 50,
51            max_cyclomatic_complexity: 10,
52        }
53    }
54}
55
56/// Metadata about the summary
57#[derive(Debug, Clone, Serialize, Deserialize)]
58pub struct SummaryMetadata {
59    pub analysis_date: String,
60    pub repositories_analyzed: usize,
61    pub commits_analyzed: usize,
62}
63
64/// Summarized organizational intelligence for AI consumption
65#[derive(Debug, Clone, Serialize, Deserialize)]
66pub struct Summary {
67    pub organizational_insights: OrganizationalInsights,
68    pub code_quality_thresholds: QualityThresholds,
69    pub metadata: SummaryMetadata,
70}
71
72impl Summary {
73    /// Find a defect category by name
74    pub fn find_category(&self, category_name: &str) -> Option<DefectPatternSummary> {
75        self.organizational_insights
76            .top_defect_categories
77            .iter()
78            .find(|p| p.category.to_string() == category_name)
79            .map(|p| DefectPatternSummary {
80                category: p.category.to_string(),
81                frequency: p.frequency,
82                confidence: p.confidence,
83                avg_tdg_score: p.quality_signals.avg_tdg_score.unwrap_or(0.0),
84                common_patterns: Vec::new(), // Not stored in summary
85                prevention_strategies: Vec::new(), // Not stored in summary
86            })
87    }
88
89    /// Load summary from file
90    pub fn from_file<P: AsRef<Path>>(path: P) -> Result<Self> {
91        let content = std::fs::read_to_string(path)?;
92        let summary: Summary = serde_yaml::from_str(&content)?;
93        Ok(summary)
94    }
95}
96
97/// Top-level container for defect patterns
98#[derive(Debug, Clone, Serialize, Deserialize)]
99pub struct OrganizationalInsights {
100    pub top_defect_categories: Vec<DefectPattern>,
101}
102
103/// Simplified defect pattern for PR review (without examples)
104#[derive(Debug, Clone, PartialEq)]
105pub struct DefectPatternSummary {
106    pub category: String,
107    pub frequency: usize,
108    pub confidence: f32,
109    pub avg_tdg_score: f32,
110    pub common_patterns: Vec<String>,
111    pub prevention_strategies: Vec<String>,
112}
113
114/// Summarize organizational analysis reports
115pub struct ReportSummarizer;
116
117impl ReportSummarizer {
118    /// Summarize a full organizational report according to config
119    pub fn summarize<P: AsRef<Path>>(input: P, config: SummaryConfig) -> Result<Summary> {
120        // Load full report
121        let content = std::fs::read_to_string(input)?;
122        let report: AnalysisReport = serde_yaml::from_str(&content)?;
123
124        // Filter and sort patterns by frequency
125        let mut patterns: Vec<DefectPattern> = report
126            .defect_patterns
127            .into_iter()
128            .filter(|p| p.frequency >= config.min_frequency)
129            .collect();
130
131        // Sort by frequency descending
132        patterns.sort_by(|a, b| b.frequency.cmp(&a.frequency));
133
134        // Take top N
135        patterns.truncate(config.top_n_categories);
136
137        // Strip PII if requested
138        if config.strip_pii {
139            Self::strip_pii_from_patterns(&mut patterns);
140        }
141
142        // Remove examples unless explicitly requested
143        if !config.include_examples {
144            for pattern in &mut patterns {
145                pattern.examples.clear();
146            }
147        }
148
149        // Build summary
150        Ok(Summary {
151            organizational_insights: OrganizationalInsights {
152                top_defect_categories: patterns,
153            },
154            code_quality_thresholds: QualityThresholds::default(),
155            metadata: SummaryMetadata {
156                analysis_date: report.metadata.analysis_date,
157                repositories_analyzed: report.metadata.repositories_analyzed,
158                commits_analyzed: report.metadata.commits_analyzed,
159            },
160        })
161    }
162
163    /// Strip PII from defect patterns (author, commit hash, email)
164    fn strip_pii_from_patterns(patterns: &mut [DefectPattern]) {
165        for pattern in patterns {
166            for example in &mut pattern.examples {
167                // Clear PII fields
168                example.commit_hash = "REDACTED".to_string();
169                example.author = "REDACTED".to_string();
170            }
171        }
172    }
173
174    /// Save summary to YAML file
175    pub fn save_to_file<P: AsRef<Path>>(summary: &Summary, output: P) -> Result<()> {
176        let yaml = serde_yaml::to_string(summary)?;
177        std::fs::write(output, yaml)?;
178        Ok(())
179    }
180}
181
182#[cfg(test)]
183mod tests {
184    use super::*;
185    use crate::classifier::DefectCategory;
186    use crate::report::{AnalysisMetadata, DefectInstance, QualitySignals};
187    use tempfile::NamedTempFile;
188
189    fn create_test_report() -> AnalysisReport {
190        AnalysisReport {
191            version: "1.0".to_string(),
192            metadata: AnalysisMetadata {
193                organization: "test-org".to_string(),
194                analysis_date: "2025-11-15T12:00:00Z".to_string(),
195                repositories_analyzed: 10,
196                commits_analyzed: 1000,
197                analyzer_version: "0.1.0".to_string(),
198            },
199            defect_patterns: vec![
200                DefectPattern {
201                    category: DefectCategory::ConfigurationErrors,
202                    frequency: 25,
203                    confidence: 0.85,
204                    quality_signals: QualitySignals {
205                        avg_tdg_score: Some(45.2),
206                        max_tdg_score: Some(60.0),
207                        avg_complexity: None,
208                        avg_test_coverage: None,
209                        satd_instances: 0,
210                        avg_lines_changed: 50.0,
211                        avg_files_per_commit: 3.0,
212                    },
213                    examples: vec![DefectInstance {
214                        commit_hash: "abc123".to_string(),
215                        message: "fix config bug".to_string(),
216                        author: "test@example.com".to_string(),
217                        timestamp: 1731662400,
218                        files_affected: 3,
219                        lines_added: 50,
220                        lines_removed: 10,
221                    }],
222                },
223                DefectPattern {
224                    category: DefectCategory::TypeErrors,
225                    frequency: 3,
226                    confidence: 0.90,
227                    quality_signals: QualitySignals {
228                        avg_tdg_score: Some(95.0),
229                        max_tdg_score: Some(98.0),
230                        avg_complexity: None,
231                        avg_test_coverage: None,
232                        satd_instances: 0,
233                        avg_lines_changed: 10.0,
234                        avg_files_per_commit: 1.0,
235                    },
236                    examples: vec![],
237                },
238                DefectPattern {
239                    category: DefectCategory::SecurityVulnerabilities,
240                    frequency: 15,
241                    confidence: 0.80,
242                    quality_signals: QualitySignals {
243                        avg_tdg_score: Some(55.0),
244                        max_tdg_score: Some(70.0),
245                        avg_complexity: None,
246                        avg_test_coverage: None,
247                        satd_instances: 0,
248                        avg_lines_changed: 30.0,
249                        avg_files_per_commit: 2.0,
250                    },
251                    examples: vec![],
252                },
253            ],
254        }
255    }
256
257    #[test]
258    fn test_pii_stripping_removes_sensitive_data() {
259        let report = create_test_report();
260        let temp_file = NamedTempFile::new().unwrap();
261        let report_path = temp_file.path();
262
263        // Save test report
264        let yaml = serde_yaml::to_string(&report).unwrap();
265        std::fs::write(report_path, yaml).unwrap();
266
267        // Summarize with PII stripping
268        let config = SummaryConfig {
269            strip_pii: true,
270            ..Default::default()
271        };
272        let summary = ReportSummarizer::summarize(report_path, config).unwrap();
273
274        // Verify PII is stripped
275        for pattern in &summary.organizational_insights.top_defect_categories {
276            for example in &pattern.examples {
277                assert_eq!(example.commit_hash, "REDACTED");
278                assert_eq!(example.author, "REDACTED");
279            }
280        }
281    }
282
283    #[test]
284    fn test_frequency_filtering() {
285        let report = create_test_report();
286        let temp_file = NamedTempFile::new().unwrap();
287        let report_path = temp_file.path();
288
289        let yaml = serde_yaml::to_string(&report).unwrap();
290        std::fs::write(report_path, yaml).unwrap();
291
292        // Filter out defects with frequency < 10
293        let config = SummaryConfig {
294            min_frequency: 10,
295            ..Default::default()
296        };
297        let summary = ReportSummarizer::summarize(report_path, config).unwrap();
298
299        // Should only have ConfigurationErrors (25) and SecurityVulnerabilities (15)
300        assert_eq!(
301            summary.organizational_insights.top_defect_categories.len(),
302            2
303        );
304
305        let categories: Vec<String> = summary
306            .organizational_insights
307            .top_defect_categories
308            .iter()
309            .map(|p| p.category.to_string())
310            .collect();
311
312        assert!(categories.contains(&"ConfigurationErrors".to_string()));
313        assert!(categories.contains(&"SecurityVulnerabilities".to_string()));
314        assert!(!categories.contains(&"TypeErrors".to_string()));
315    }
316
317    #[test]
318    fn test_top_n_selection() {
319        let report = create_test_report();
320        let temp_file = NamedTempFile::new().unwrap();
321        let report_path = temp_file.path();
322
323        let yaml = serde_yaml::to_string(&report).unwrap();
324        std::fs::write(report_path, yaml).unwrap();
325
326        // Only take top 2
327        let config = SummaryConfig {
328            top_n_categories: 2,
329            min_frequency: 0,
330            ..Default::default()
331        };
332        let summary = ReportSummarizer::summarize(report_path, config).unwrap();
333
334        assert_eq!(
335            summary.organizational_insights.top_defect_categories.len(),
336            2
337        );
338
339        // Should be sorted by frequency
340        assert_eq!(
341            summary.organizational_insights.top_defect_categories[0].frequency,
342            25
343        ); // ConfigurationErrors
344        assert_eq!(
345            summary.organizational_insights.top_defect_categories[1].frequency,
346            15
347        ); // SecurityVulnerabilities
348    }
349
350    #[test]
351    fn test_examples_removed_by_default() {
352        let report = create_test_report();
353        let temp_file = NamedTempFile::new().unwrap();
354        let report_path = temp_file.path();
355
356        let yaml = serde_yaml::to_string(&report).unwrap();
357        std::fs::write(report_path, yaml).unwrap();
358
359        let config = SummaryConfig::default();
360        let summary = ReportSummarizer::summarize(report_path, config).unwrap();
361
362        // Examples should be empty by default
363        for pattern in &summary.organizational_insights.top_defect_categories {
364            assert!(pattern.examples.is_empty());
365        }
366    }
367
368    #[test]
369    fn test_roundtrip_save_and_load() {
370        let report = create_test_report();
371        let report_file = NamedTempFile::new().unwrap();
372        let summary_file = NamedTempFile::new().unwrap();
373
374        // Save report
375        let yaml = serde_yaml::to_string(&report).unwrap();
376        std::fs::write(report_file.path(), yaml).unwrap();
377
378        // Summarize
379        let config = SummaryConfig::default();
380        let summary = ReportSummarizer::summarize(report_file.path(), config).unwrap();
381
382        // Save summary
383        ReportSummarizer::save_to_file(&summary, summary_file.path()).unwrap();
384
385        // Load summary back
386        let loaded_yaml = std::fs::read_to_string(summary_file.path()).unwrap();
387        let loaded_summary: Summary = serde_yaml::from_str(&loaded_yaml).unwrap();
388
389        // Verify metadata
390        assert_eq!(loaded_summary.metadata.repositories_analyzed, 10);
391        assert_eq!(loaded_summary.metadata.commits_analyzed, 1000);
392    }
393
394    #[test]
395    fn test_summary_config_default() {
396        let config = SummaryConfig::default();
397        assert!(config.strip_pii);
398        assert_eq!(config.top_n_categories, 10);
399        assert_eq!(config.min_frequency, 5);
400        assert!(!config.include_examples);
401    }
402
403    #[test]
404    fn test_quality_thresholds_default() {
405        let thresholds = QualityThresholds::default();
406        assert_eq!(thresholds.tdg_minimum, 85.0);
407        assert_eq!(thresholds.test_coverage_minimum, 0.85);
408        assert_eq!(thresholds.max_function_length, 50);
409        assert_eq!(thresholds.max_cyclomatic_complexity, 10);
410    }
411
412    #[test]
413    fn test_summary_find_category() {
414        let report = create_test_report();
415        let temp_file = NamedTempFile::new().unwrap();
416        let report_path = temp_file.path();
417
418        let yaml = serde_yaml::to_string(&report).unwrap();
419        std::fs::write(report_path, yaml).unwrap();
420
421        let config = SummaryConfig::default();
422        let summary = ReportSummarizer::summarize(report_path, config).unwrap();
423
424        // Find existing category
425        let found = summary.find_category("ConfigurationErrors");
426        assert!(found.is_some());
427        let category = found.unwrap();
428        assert_eq!(category.category, "ConfigurationErrors");
429        assert_eq!(category.frequency, 25);
430        assert_eq!(category.avg_tdg_score, 45.2);
431
432        // Find non-existent category
433        assert!(summary.find_category("NonExistent").is_none());
434    }
435
436    #[test]
437    fn test_summary_from_file() {
438        let report = create_test_report();
439        let report_file = NamedTempFile::new().unwrap();
440        let summary_file = NamedTempFile::new().unwrap();
441
442        // Create and save summary
443        let yaml = serde_yaml::to_string(&report).unwrap();
444        std::fs::write(report_file.path(), yaml).unwrap();
445
446        let config = SummaryConfig::default();
447        let summary = ReportSummarizer::summarize(report_file.path(), config).unwrap();
448        ReportSummarizer::save_to_file(&summary, summary_file.path()).unwrap();
449
450        // Load using Summary::from_file
451        let loaded = Summary::from_file(summary_file.path()).unwrap();
452        assert_eq!(loaded.metadata.repositories_analyzed, 10);
453        assert_eq!(loaded.metadata.commits_analyzed, 1000);
454    }
455
456    #[test]
457    fn test_include_examples_config() {
458        let report = create_test_report();
459        let temp_file = NamedTempFile::new().unwrap();
460        let report_path = temp_file.path();
461
462        let yaml = serde_yaml::to_string(&report).unwrap();
463        std::fs::write(report_path, yaml).unwrap();
464
465        // Summarize with examples included
466        let config = SummaryConfig {
467            include_examples: true,
468            strip_pii: false,
469            ..Default::default()
470        };
471        let summary = ReportSummarizer::summarize(report_path, config).unwrap();
472
473        // ConfigurationErrors has 1 example
474        let config_pattern = summary
475            .organizational_insights
476            .top_defect_categories
477            .iter()
478            .find(|p| p.category.to_string() == "ConfigurationErrors")
479            .unwrap();
480
481        assert_eq!(config_pattern.examples.len(), 1);
482        assert_eq!(config_pattern.examples[0].commit_hash, "abc123");
483        assert_eq!(config_pattern.examples[0].author, "test@example.com");
484    }
485
486    #[test]
487    fn test_defect_pattern_summary_equality() {
488        let summary1 = DefectPatternSummary {
489            category: "MemorySafety".to_string(),
490            frequency: 10,
491            confidence: 0.85,
492            avg_tdg_score: 70.0,
493            common_patterns: vec!["use-after-free".to_string()],
494            prevention_strategies: vec!["Use smart pointers".to_string()],
495        };
496
497        let summary2 = summary1.clone();
498        assert_eq!(summary1, summary2);
499    }
500
501    #[test]
502    fn test_summary_metadata_serialization() {
503        let metadata = SummaryMetadata {
504            analysis_date: "2025-11-24".to_string(),
505            repositories_analyzed: 5,
506            commits_analyzed: 500,
507        };
508
509        let yaml = serde_yaml::to_string(&metadata).unwrap();
510        let deserialized: SummaryMetadata = serde_yaml::from_str(&yaml).unwrap();
511
512        assert_eq!(deserialized.analysis_date, "2025-11-24");
513        assert_eq!(deserialized.repositories_analyzed, 5);
514        assert_eq!(deserialized.commits_analyzed, 500);
515    }
516
517    #[test]
518    fn test_organizational_insights_serialization() {
519        let insights = OrganizationalInsights {
520            top_defect_categories: vec![],
521        };
522
523        let yaml = serde_yaml::to_string(&insights).unwrap();
524        let deserialized: OrganizationalInsights = serde_yaml::from_str(&yaml).unwrap();
525
526        assert!(deserialized.top_defect_categories.is_empty());
527    }
528
529    #[test]
530    fn test_no_pii_stripping_when_disabled() {
531        let report = create_test_report();
532        let temp_file = NamedTempFile::new().unwrap();
533        let report_path = temp_file.path();
534
535        let yaml = serde_yaml::to_string(&report).unwrap();
536        std::fs::write(report_path, yaml).unwrap();
537
538        // Summarize without PII stripping
539        let config = SummaryConfig {
540            strip_pii: false,
541            include_examples: true,
542            ..Default::default()
543        };
544        let summary = ReportSummarizer::summarize(report_path, config).unwrap();
545
546        // Verify PII is NOT stripped
547        let config_pattern = summary
548            .organizational_insights
549            .top_defect_categories
550            .iter()
551            .find(|p| p.category.to_string() == "ConfigurationErrors")
552            .unwrap();
553
554        assert_eq!(config_pattern.examples[0].commit_hash, "abc123");
555        assert_eq!(config_pattern.examples[0].author, "test@example.com");
556    }
557}