organizational_intelligence_plugin/cli.rs
1// CLI argument parsing for OIP
2// Following EXTREME TDD: Minimal implementation to make tests compile
3
4use clap::{Parser, Subcommand};
5use std::path::PathBuf;
6
7#[derive(Parser, Debug)]
8#[command(name = "oip")]
9#[command(about = "Organizational Intelligence Plugin - Defect Pattern Analysis", long_about = None)]
10#[command(version)]
11pub struct Cli {
12 #[command(subcommand)]
13 pub command: Commands,
14
15 /// Enable verbose logging
16 #[arg(long, global = true)]
17 pub verbose: bool,
18
19 /// Configuration file path
20 #[arg(long, global = true)]
21 pub config: Option<PathBuf>,
22}
23
24#[derive(Subcommand, Debug)]
25pub enum Commands {
26 /// Analyze GitHub organization for defect patterns
27 Analyze {
28 /// Organization name
29 #[arg(long, required = true)]
30 org: String,
31
32 /// Output file path
33 #[arg(long, short, default_value = "defects.yaml")]
34 output: PathBuf,
35
36 /// Maximum concurrent repository analysis
37 #[arg(long, default_value = "10")]
38 max_concurrent: usize,
39
40 /// Path to trained ML model (optional, uses rule-based if not provided)
41 #[arg(long)]
42 model: Option<PathBuf>,
43
44 /// Confidence threshold for ML predictions (0.0-1.0)
45 #[arg(long, default_value = "0.65")]
46 ml_confidence: f32,
47 },
48
49 /// Summarize analysis report for AI consumption (Phase 2)
50 Summarize {
51 /// Input YAML report from 'analyze' command
52 #[arg(long, short, required = true)]
53 input: PathBuf,
54
55 /// Output summary file
56 #[arg(long, short, required = true)]
57 output: PathBuf,
58
59 /// Strip PII (author names, commit hashes, email addresses)
60 #[arg(long, default_value = "true")]
61 strip_pii: bool,
62
63 /// Top N defect categories to include
64 #[arg(long, default_value = "10")]
65 top_n: usize,
66
67 /// Minimum frequency to include
68 #[arg(long, default_value = "5")]
69 min_frequency: usize,
70
71 /// Include anonymized examples (with PII redacted if strip-pii is true)
72 #[arg(long, default_value = "false")]
73 include_examples: bool,
74 },
75
76 /// Review PR with organizational context (Phase 3)
77 ReviewPr {
78 /// Baseline summary from weekly analysis
79 #[arg(long, short, required = true)]
80 baseline: PathBuf,
81
82 /// Files changed in PR (comma-separated)
83 #[arg(long, short, required = true)]
84 files: String,
85
86 /// Output format: markdown, json
87 #[arg(long, default_value = "markdown")]
88 format: String,
89
90 /// Output file (stdout if not specified)
91 #[arg(long, short)]
92 output: Option<PathBuf>,
93 },
94
95 /// Extract training data from Git repository (Phase 2 ML)
96 ExtractTrainingData {
97 /// Path to Git repository
98 #[arg(long, short, required = true)]
99 repo: PathBuf,
100
101 /// Output JSON file
102 #[arg(long, short, default_value = "training-data.json")]
103 output: PathBuf,
104
105 /// Minimum confidence threshold (0.0-1.0)
106 #[arg(long, default_value = "0.75")]
107 min_confidence: f32,
108
109 /// Maximum commits to analyze
110 #[arg(long, default_value = "1000")]
111 max_commits: usize,
112
113 /// Create train/validation/test splits
114 #[arg(long, default_value = "true")]
115 create_splits: bool,
116
117 /// Show visualization (requires --features viz)
118 #[arg(long, default_value = "false")]
119 viz: bool,
120 },
121
122 /// Train ML classifier on extracted training data (Phase 2 ML)
123 TrainClassifier {
124 /// Input training data JSON file
125 #[arg(long, short, required = true)]
126 input: PathBuf,
127
128 /// Output model file (optional)
129 #[arg(long, short)]
130 output: Option<PathBuf>,
131
132 /// Number of trees in Random Forest
133 #[arg(long, default_value = "100")]
134 n_estimators: usize,
135
136 /// Maximum tree depth
137 #[arg(long, default_value = "20")]
138 max_depth: usize,
139
140 /// Maximum TF-IDF features
141 #[arg(long, default_value = "1500")]
142 max_features: usize,
143 },
144
145 /// Export CommitFeatures to aprender-compatible format (Issue #2)
146 Export {
147 /// Path to Git repository to analyze
148 #[arg(long, short, required = true)]
149 repo: PathBuf,
150
151 /// Output file path
152 #[arg(long, short, default_value = "features.json")]
153 output: PathBuf,
154
155 /// Export format: json, binary, parquet
156 #[arg(long, short, default_value = "json")]
157 format: String,
158
159 /// Maximum commits to analyze
160 #[arg(long, default_value = "1000")]
161 max_commits: usize,
162
163 /// Minimum confidence threshold for classification (0.0-1.0)
164 #[arg(long, default_value = "0.70")]
165 min_confidence: f32,
166 },
167
168 /// Import Depyler CITL corpus for ground-truth training labels (NLP-014)
169 ImportDepyler {
170 /// Path to Depyler JSONL export file
171 #[arg(long, short, required = true)]
172 input: PathBuf,
173
174 /// Output training data JSON file
175 #[arg(long, short, default_value = "citl-training.json")]
176 output: PathBuf,
177
178 /// Minimum confidence threshold (0.0-1.0)
179 #[arg(long, default_value = "0.75")]
180 min_confidence: f32,
181
182 /// Merge with existing training data file (optional)
183 #[arg(long, short)]
184 merge: Option<PathBuf>,
185
186 /// Create train/validation/test splits
187 #[arg(long, default_value = "true")]
188 create_splits: bool,
189 },
190
191 /// Localize faults using Tarantula SBFL (Spectrum-Based Fault Localization)
192 Localize {
193 /// Path to LCOV coverage file from passing tests
194 #[arg(long, required = true)]
195 passed_coverage: PathBuf,
196
197 /// Path to LCOV coverage file from failing tests
198 #[arg(long, required = true)]
199 failed_coverage: PathBuf,
200
201 /// Number of passing tests
202 #[arg(long, default_value = "1")]
203 passed_count: usize,
204
205 /// Number of failing tests
206 #[arg(long, default_value = "1")]
207 failed_count: usize,
208
209 /// SBFL formula: tarantula, ochiai, dstar2, dstar3
210 #[arg(long, default_value = "tarantula")]
211 formula: String,
212
213 /// Top N suspicious statements to report
214 #[arg(long, default_value = "10")]
215 top_n: usize,
216
217 /// Output file path
218 #[arg(long, short, default_value = "fault-localization.yaml")]
219 output: PathBuf,
220
221 /// Output format: yaml, json, terminal
222 #[arg(long, short, default_value = "yaml")]
223 format: String,
224
225 /// Include TDG scores from pmat (requires pmat)
226 #[arg(long, default_value = "false")]
227 enrich_tdg: bool,
228
229 /// Repository path for TDG enrichment
230 #[arg(long)]
231 repo: Option<PathBuf>,
232
233 /// Enable RAG-enhanced localization with trueno-rag
234 #[arg(long, default_value = "false")]
235 rag: bool,
236
237 /// Path to bug knowledge base YAML file (for RAG)
238 #[arg(long)]
239 knowledge_base: Option<PathBuf>,
240
241 /// Fusion strategy for RAG: rrf, linear, dbsf, sbfl-only
242 #[arg(long, default_value = "rrf")]
243 fusion: String,
244
245 /// Number of similar bugs to retrieve (for RAG)
246 #[arg(long, default_value = "5")]
247 similar_bugs: usize,
248
249 /// Enable weighted ensemble model (Phase 6)
250 #[arg(long, default_value = "false")]
251 ensemble: bool,
252
253 /// Path to trained ensemble model file
254 #[arg(long)]
255 ensemble_model: Option<PathBuf>,
256
257 /// Include churn metrics from git history (for ensemble)
258 #[arg(long, default_value = "false")]
259 include_churn: bool,
260
261 /// Enable calibrated probability output (Phase 7)
262 #[arg(long, default_value = "false")]
263 calibrated: bool,
264
265 /// Path to trained calibration model file
266 #[arg(long)]
267 calibration_model: Option<PathBuf>,
268
269 /// Confidence threshold for calibrated predictions (0.0-1.0)
270 #[arg(long, default_value = "0.5")]
271 confidence_threshold: f32,
272 },
273}
274
275#[cfg(test)]
276mod tests {
277 use super::*;
278
279 #[test]
280 fn test_cli_structure_exists() {
281 // Verify the CLI structure compiles
282 // This is a sanity check test
283 let _cli_type_check: Option<Cli> = None;
284 let _commands_type_check: Option<Commands> = None;
285 }
286}