organizational_intelligence_plugin/
cli.rs

1// CLI argument parsing for OIP
2// Following EXTREME TDD: Minimal implementation to make tests compile
3
4use clap::{Parser, Subcommand};
5use std::path::PathBuf;
6
7#[derive(Parser, Debug)]
8#[command(name = "oip")]
9#[command(about = "Organizational Intelligence Plugin - Defect Pattern Analysis", long_about = None)]
10#[command(version)]
11pub struct Cli {
12    #[command(subcommand)]
13    pub command: Commands,
14
15    /// Enable verbose logging
16    #[arg(long, global = true)]
17    pub verbose: bool,
18
19    /// Configuration file path
20    #[arg(long, global = true)]
21    pub config: Option<PathBuf>,
22}
23
24#[derive(Subcommand, Debug)]
25pub enum Commands {
26    /// Analyze GitHub organization for defect patterns
27    Analyze {
28        /// Organization name
29        #[arg(long, required = true)]
30        org: String,
31
32        /// Output file path
33        #[arg(long, short, default_value = "defects.yaml")]
34        output: PathBuf,
35
36        /// Maximum concurrent repository analysis
37        #[arg(long, default_value = "10")]
38        max_concurrent: usize,
39
40        /// Path to trained ML model (optional, uses rule-based if not provided)
41        #[arg(long)]
42        model: Option<PathBuf>,
43
44        /// Confidence threshold for ML predictions (0.0-1.0)
45        #[arg(long, default_value = "0.65")]
46        ml_confidence: f32,
47    },
48
49    /// Summarize analysis report for AI consumption (Phase 2)
50    Summarize {
51        /// Input YAML report from 'analyze' command
52        #[arg(long, short, required = true)]
53        input: PathBuf,
54
55        /// Output summary file
56        #[arg(long, short, required = true)]
57        output: PathBuf,
58
59        /// Strip PII (author names, commit hashes, email addresses)
60        #[arg(long, default_value = "true")]
61        strip_pii: bool,
62
63        /// Top N defect categories to include
64        #[arg(long, default_value = "10")]
65        top_n: usize,
66
67        /// Minimum frequency to include
68        #[arg(long, default_value = "5")]
69        min_frequency: usize,
70
71        /// Include anonymized examples (with PII redacted if strip-pii is true)
72        #[arg(long, default_value = "false")]
73        include_examples: bool,
74    },
75
76    /// Review PR with organizational context (Phase 3)
77    ReviewPr {
78        /// Baseline summary from weekly analysis
79        #[arg(long, short, required = true)]
80        baseline: PathBuf,
81
82        /// Files changed in PR (comma-separated)
83        #[arg(long, short, required = true)]
84        files: String,
85
86        /// Output format: markdown, json
87        #[arg(long, default_value = "markdown")]
88        format: String,
89
90        /// Output file (stdout if not specified)
91        #[arg(long, short)]
92        output: Option<PathBuf>,
93    },
94
95    /// Extract training data from Git repository (Phase 2 ML)
96    ExtractTrainingData {
97        /// Path to Git repository
98        #[arg(long, short, required = true)]
99        repo: PathBuf,
100
101        /// Output JSON file
102        #[arg(long, short, default_value = "training-data.json")]
103        output: PathBuf,
104
105        /// Minimum confidence threshold (0.0-1.0)
106        #[arg(long, default_value = "0.75")]
107        min_confidence: f32,
108
109        /// Maximum commits to analyze
110        #[arg(long, default_value = "1000")]
111        max_commits: usize,
112
113        /// Create train/validation/test splits
114        #[arg(long, default_value = "true")]
115        create_splits: bool,
116
117        /// Show visualization (requires --features viz)
118        #[arg(long, default_value = "false")]
119        viz: bool,
120    },
121
122    /// Train ML classifier on extracted training data (Phase 2 ML)
123    TrainClassifier {
124        /// Input training data JSON file
125        #[arg(long, short, required = true)]
126        input: PathBuf,
127
128        /// Output model file (optional)
129        #[arg(long, short)]
130        output: Option<PathBuf>,
131
132        /// Number of trees in Random Forest
133        #[arg(long, default_value = "100")]
134        n_estimators: usize,
135
136        /// Maximum tree depth
137        #[arg(long, default_value = "20")]
138        max_depth: usize,
139
140        /// Maximum TF-IDF features
141        #[arg(long, default_value = "1500")]
142        max_features: usize,
143    },
144
145    /// Export CommitFeatures to aprender-compatible format (Issue #2)
146    Export {
147        /// Path to Git repository to analyze
148        #[arg(long, short, required = true)]
149        repo: PathBuf,
150
151        /// Output file path
152        #[arg(long, short, default_value = "features.json")]
153        output: PathBuf,
154
155        /// Export format: json, binary, parquet
156        #[arg(long, short, default_value = "json")]
157        format: String,
158
159        /// Maximum commits to analyze
160        #[arg(long, default_value = "1000")]
161        max_commits: usize,
162
163        /// Minimum confidence threshold for classification (0.0-1.0)
164        #[arg(long, default_value = "0.70")]
165        min_confidence: f32,
166    },
167
168    /// Import Depyler CITL corpus for ground-truth training labels (NLP-014)
169    ImportDepyler {
170        /// Path to Depyler JSONL export file
171        #[arg(long, short, required = true)]
172        input: PathBuf,
173
174        /// Output training data JSON file
175        #[arg(long, short, default_value = "citl-training.json")]
176        output: PathBuf,
177
178        /// Minimum confidence threshold (0.0-1.0)
179        #[arg(long, default_value = "0.75")]
180        min_confidence: f32,
181
182        /// Merge with existing training data file (optional)
183        #[arg(long, short)]
184        merge: Option<PathBuf>,
185
186        /// Create train/validation/test splits
187        #[arg(long, default_value = "true")]
188        create_splits: bool,
189    },
190
191    /// Localize faults using Tarantula SBFL (Spectrum-Based Fault Localization)
192    Localize {
193        /// Path to LCOV coverage file from passing tests
194        #[arg(long, required = true)]
195        passed_coverage: PathBuf,
196
197        /// Path to LCOV coverage file from failing tests
198        #[arg(long, required = true)]
199        failed_coverage: PathBuf,
200
201        /// Number of passing tests
202        #[arg(long, default_value = "1")]
203        passed_count: usize,
204
205        /// Number of failing tests
206        #[arg(long, default_value = "1")]
207        failed_count: usize,
208
209        /// SBFL formula: tarantula, ochiai, dstar2, dstar3
210        #[arg(long, default_value = "tarantula")]
211        formula: String,
212
213        /// Top N suspicious statements to report
214        #[arg(long, default_value = "10")]
215        top_n: usize,
216
217        /// Output file path
218        #[arg(long, short, default_value = "fault-localization.yaml")]
219        output: PathBuf,
220
221        /// Output format: yaml, json, terminal
222        #[arg(long, short, default_value = "yaml")]
223        format: String,
224
225        /// Include TDG scores from pmat (requires pmat)
226        #[arg(long, default_value = "false")]
227        enrich_tdg: bool,
228
229        /// Repository path for TDG enrichment
230        #[arg(long)]
231        repo: Option<PathBuf>,
232
233        /// Enable RAG-enhanced localization with trueno-rag
234        #[arg(long, default_value = "false")]
235        rag: bool,
236
237        /// Path to bug knowledge base YAML file (for RAG)
238        #[arg(long)]
239        knowledge_base: Option<PathBuf>,
240
241        /// Fusion strategy for RAG: rrf, linear, dbsf, sbfl-only
242        #[arg(long, default_value = "rrf")]
243        fusion: String,
244
245        /// Number of similar bugs to retrieve (for RAG)
246        #[arg(long, default_value = "5")]
247        similar_bugs: usize,
248
249        /// Enable weighted ensemble model (Phase 6)
250        #[arg(long, default_value = "false")]
251        ensemble: bool,
252
253        /// Path to trained ensemble model file
254        #[arg(long)]
255        ensemble_model: Option<PathBuf>,
256
257        /// Include churn metrics from git history (for ensemble)
258        #[arg(long, default_value = "false")]
259        include_churn: bool,
260
261        /// Enable calibrated probability output (Phase 7)
262        #[arg(long, default_value = "false")]
263        calibrated: bool,
264
265        /// Path to trained calibration model file
266        #[arg(long)]
267        calibration_model: Option<PathBuf>,
268
269        /// Confidence threshold for calibrated predictions (0.0-1.0)
270        #[arg(long, default_value = "0.5")]
271        confidence_threshold: f32,
272    },
273}
274
275#[cfg(test)]
276mod tests {
277    use super::*;
278
279    #[test]
280    fn test_cli_structure_exists() {
281        // Verify the CLI structure compiles
282        // This is a sanity check test
283        let _cli_type_check: Option<Cli> = None;
284        let _commands_type_check: Option<Commands> = None;
285    }
286}