Skip to main content

battlecommand_forge/
report.rs

1//! Pipeline run report — structured JSON capture of every stage.
2//!
3//! Generated after EVERY pipeline run (pass or fail).
4//! Saved to `.battlecommand/reports/{slug}_{timestamp}.json`.
5
6use anyhow::Result;
7use serde::{Deserialize, Serialize};
8use std::collections::HashMap;
9use std::fs;
10use std::path::{Path, PathBuf};
11
12const REPORTS_DIR: &str = ".battlecommand/reports";
13
14// ─── Top-level report ───
15
16#[derive(Debug, Serialize, Deserialize)]
17pub struct PipelineReport {
18    pub version: u32,
19    pub generated_at: String,
20    pub mission: MissionMeta,
21    pub model_config: ModelConfigSnapshot,
22    pub timing: TimingSummary,
23    pub router: RouterStageReport,
24    pub architect: LlmStageReport,
25    pub tester: LlmStageReport,
26    pub rounds: Vec<RoundReport>,
27    pub result: PipelineResult,
28    pub code_metrics: CodeMetrics,
29}
30
31// ─── Sub-structures ───
32
33#[derive(Debug, Clone, Serialize, Deserialize)]
34pub struct MissionMeta {
35    pub mission_id: String,
36    pub prompt: String,
37    pub preset: String,
38    pub language: String,
39    pub output_dir: String,
40}
41
42#[derive(Debug, Clone, Serialize, Deserialize)]
43pub struct ModelConfigSnapshot {
44    pub architect: RoleSnapshot,
45    pub tester: RoleSnapshot,
46    pub coder: RoleSnapshot,
47    pub security: RoleSnapshot,
48    pub critique: RoleSnapshot,
49    pub cto: RoleSnapshot,
50}
51
52#[derive(Debug, Clone, Serialize, Deserialize)]
53pub struct RoleSnapshot {
54    pub model: String,
55    pub provider: String,
56    pub context_size: u32,
57    pub max_predict: u32,
58}
59
60impl RoleSnapshot {
61    pub fn from_role(role: &crate::model_config::RoleConfig) -> Self {
62        Self {
63            model: role.model.clone(),
64            provider: role.provider.to_string(),
65            context_size: role.context_size(),
66            max_predict: role.max_predict(),
67        }
68    }
69}
70
71#[derive(Debug, Serialize, Deserialize)]
72pub struct TimingSummary {
73    pub total_secs: f64,
74    pub router_secs: f64,
75    pub architect_secs: f64,
76    pub tester_secs: f64,
77    pub rounds_secs: Vec<f64>,
78}
79
80#[derive(Debug, Clone, Serialize, Deserialize)]
81pub struct RouterStageReport {
82    pub tier: String,
83    pub duration_secs: f64,
84}
85
86#[derive(Debug, Clone, Serialize, Deserialize)]
87pub struct LlmStageReport {
88    pub model: String,
89    pub duration_secs: f64,
90    pub token_count: u64,
91    pub tok_per_sec: f64,
92    pub output_lines: u64,
93}
94
95impl Default for LlmStageReport {
96    fn default() -> Self {
97        Self {
98            model: String::new(),
99            duration_secs: 0.0,
100            token_count: 0,
101            tok_per_sec: 0.0,
102            output_lines: 0,
103        }
104    }
105}
106
107// ─── Round report (stages 4-8) ───
108
109#[derive(Debug, Clone, Serialize, Deserialize)]
110pub struct RoundReport {
111    pub round_number: usize,
112    pub coder: LlmStageReport,
113    pub verifier: VerifierReport,
114    pub security: SecurityReport,
115    pub critique: CritiqueReport,
116    pub cto: CtoReport,
117    pub final_score: f32,
118    pub critique_avg: f32,
119    pub verifier_score: f32,
120    pub feedback_to_next_round: Option<String>,
121}
122
123#[derive(Debug, Clone, Serialize, Deserialize)]
124pub struct VerifierReport {
125    pub duration_secs: f64,
126    pub avg_score: f32,
127    pub tests_passed: u32,
128    pub tests_failed: u32,
129    pub tests_run: bool,
130    pub total_lint_issues: usize,
131    pub secrets_found: bool,
132    pub file_reports: Vec<FileVerifierReport>,
133}
134
135#[derive(Debug, Clone, Serialize, Deserialize)]
136pub struct FileVerifierReport {
137    pub path: String,
138    pub score: f32,
139    pub lint_passed: bool,
140    pub lint_issues: Vec<String>,
141    pub syntax_valid: bool,
142    pub has_tests: bool,
143    pub has_docstring: bool,
144    pub has_error_handling: bool,
145    pub has_hardcoded_secrets: bool,
146}
147
148#[derive(Debug, Clone, Serialize, Deserialize)]
149pub struct SecurityReport {
150    pub model: String,
151    pub duration_secs: f64,
152    pub verdict: String,
153    pub passed: bool,
154}
155
156#[derive(Debug, Clone, Serialize, Deserialize)]
157pub struct CritiqueReport {
158    pub model: String,
159    pub duration_secs: f64,
160    pub scores: CritiqueScores,
161    pub avg: f32,
162    pub details: Vec<String>,
163}
164
165#[derive(Debug, Clone, Serialize, Deserialize)]
166pub struct CritiqueScores {
167    pub dev: f32,
168    pub arch: f32,
169    pub test: f32,
170    pub sec: f32,
171    pub docs: f32,
172}
173
174#[derive(Debug, Clone, Serialize, Deserialize)]
175pub struct CtoReport {
176    pub model: String,
177    pub duration_secs: f64,
178    pub verdict: String,
179    pub approved: bool,
180}
181
182#[derive(Debug, Serialize, Deserialize)]
183pub struct PipelineResult {
184    pub quality_gate_passed: bool,
185    pub quality_gate_threshold: f32,
186    pub best_score: f32,
187    pub best_round: usize,
188    pub total_rounds: usize,
189    pub max_rounds_allowed: usize,
190    pub output_dir: String,
191    pub files_shipped: Vec<ShippedFile>,
192}
193
194#[derive(Debug, Serialize, Deserialize)]
195pub struct ShippedFile {
196    pub path: String,
197    pub language: String,
198    pub lines: usize,
199    pub bytes: usize,
200}
201
202#[derive(Debug, Serialize, Deserialize)]
203pub struct CodeMetrics {
204    pub total_files: usize,
205    pub total_loc: usize,
206    pub test_files: usize,
207    pub languages: HashMap<String, LanguageMetrics>,
208}
209
210#[derive(Debug, Serialize, Deserialize)]
211pub struct LanguageMetrics {
212    pub files: usize,
213    pub loc: usize,
214}
215
216// ─── Builder ───
217
218pub struct ReportBuilder {
219    pub mission: Option<MissionMeta>,
220    pub model_config: Option<ModelConfigSnapshot>,
221    pub router: Option<RouterStageReport>,
222    pub architect: Option<LlmStageReport>,
223    pub tester: Option<LlmStageReport>,
224    pub rounds: Vec<RoundReport>,
225    pub start_time: std::time::Instant,
226    stage_times: StageTimes,
227}
228
229struct StageTimes {
230    router: f64,
231    architect: f64,
232    tester: f64,
233}
234
235impl Default for ReportBuilder {
236    fn default() -> Self {
237        Self::new()
238    }
239}
240
241impl ReportBuilder {
242    pub fn new() -> Self {
243        Self {
244            mission: None,
245            model_config: None,
246            router: None,
247            architect: None,
248            tester: None,
249            rounds: Vec::new(),
250            start_time: std::time::Instant::now(),
251            stage_times: StageTimes {
252                router: 0.0,
253                architect: 0.0,
254                tester: 0.0,
255            },
256        }
257    }
258
259    pub fn set_mission(&mut self, meta: MissionMeta) {
260        self.mission = Some(meta);
261    }
262
263    pub fn set_model_config(&mut self, config: &crate::model_config::ModelConfig) {
264        self.model_config = Some(ModelConfigSnapshot {
265            architect: RoleSnapshot::from_role(&config.architect),
266            tester: RoleSnapshot::from_role(&config.tester),
267            coder: RoleSnapshot::from_role(&config.coder),
268            security: RoleSnapshot::from_role(&config.security),
269            critique: RoleSnapshot::from_role(&config.critique),
270            cto: RoleSnapshot::from_role(&config.cto),
271        });
272    }
273
274    pub fn set_router(&mut self, tier: &str, duration_secs: f64) {
275        self.stage_times.router = duration_secs;
276        self.router = Some(RouterStageReport {
277            tier: tier.to_string(),
278            duration_secs,
279        });
280    }
281
282    pub fn set_architect(&mut self, stats: LlmStageReport) {
283        self.stage_times.architect = stats.duration_secs;
284        self.architect = Some(stats);
285    }
286
287    pub fn set_tester(&mut self, stats: LlmStageReport) {
288        self.stage_times.tester = stats.duration_secs;
289        self.tester = Some(stats);
290    }
291
292    pub fn add_round(&mut self, round: RoundReport) {
293        self.rounds.push(round);
294    }
295
296    pub fn build(
297        &self,
298        passed: bool,
299        best_score: f32,
300        best_round: usize,
301        output_dir: &Path,
302        files: &[crate::codegen::GeneratedFile],
303    ) -> PipelineReport {
304        let total_secs = self.start_time.elapsed().as_secs_f64();
305
306        let files_shipped: Vec<ShippedFile> = files
307            .iter()
308            .map(|f| ShippedFile {
309                path: f.path.display().to_string(),
310                language: f.language.clone(),
311                lines: f.content.lines().count(),
312                bytes: f.content.len(),
313            })
314            .collect();
315
316        let total_loc: usize = files_shipped.iter().map(|f| f.lines).sum();
317        let test_files = files_shipped
318            .iter()
319            .filter(|f| {
320                let p = f.path.to_lowercase();
321                p.contains("test") || p.contains("spec")
322            })
323            .count();
324
325        let mut languages: HashMap<String, LanguageMetrics> = HashMap::new();
326        for f in &files_shipped {
327            let entry = languages
328                .entry(f.language.clone())
329                .or_insert(LanguageMetrics { files: 0, loc: 0 });
330            entry.files += 1;
331            entry.loc += f.lines;
332        }
333
334        let rounds_secs: Vec<f64> = self
335            .rounds
336            .iter()
337            .map(|r| {
338                r.coder.duration_secs
339                    + r.verifier.duration_secs
340                    + r.security.duration_secs
341                    + r.critique.duration_secs
342                    + r.cto.duration_secs
343            })
344            .collect();
345
346        let total_rounds = self.rounds.len();
347        let empty_snap = RoleSnapshot {
348            model: String::new(),
349            provider: String::new(),
350            context_size: 0,
351            max_predict: 0,
352        };
353
354        PipelineReport {
355            version: 1,
356            generated_at: chrono::Utc::now().to_rfc3339(),
357            mission: self.mission.clone().unwrap_or(MissionMeta {
358                mission_id: String::new(),
359                prompt: String::new(),
360                preset: String::new(),
361                language: String::new(),
362                output_dir: output_dir.display().to_string(),
363            }),
364            model_config: self.model_config.clone().unwrap_or(ModelConfigSnapshot {
365                architect: empty_snap.clone(),
366                tester: empty_snap.clone(),
367                coder: empty_snap.clone(),
368                security: empty_snap.clone(),
369                critique: empty_snap.clone(),
370                cto: empty_snap,
371            }),
372            timing: TimingSummary {
373                total_secs,
374                router_secs: self.stage_times.router,
375                architect_secs: self.stage_times.architect,
376                tester_secs: self.stage_times.tester,
377                rounds_secs,
378            },
379            router: self.router.clone().unwrap_or(RouterStageReport {
380                tier: "unknown".to_string(),
381                duration_secs: 0.0,
382            }),
383            architect: self.architect.clone().unwrap_or_default(),
384            tester: self.tester.clone().unwrap_or_default(),
385            rounds: self.rounds.clone(),
386            result: PipelineResult {
387                quality_gate_passed: passed,
388                quality_gate_threshold: 9.2,
389                best_score,
390                best_round,
391                total_rounds,
392                max_rounds_allowed: 5,
393                output_dir: output_dir.display().to_string(),
394                files_shipped,
395            },
396            code_metrics: CodeMetrics {
397                total_files: files.len(),
398                total_loc,
399                test_files,
400                languages,
401            },
402        }
403    }
404}
405
406// ─── Save / Load / List ───
407
408pub fn save_report(report: &PipelineReport) -> Result<PathBuf> {
409    fs::create_dir_all(REPORTS_DIR)?;
410
411    let slug: String = report
412        .mission
413        .prompt
414        .to_lowercase()
415        .chars()
416        .map(|c| if c.is_alphanumeric() { c } else { '_' })
417        .take(40)
418        .collect();
419    let slug = slug.trim_matches('_').to_string();
420    let ts = chrono::Utc::now().format("%Y%m%d_%H%M%S");
421
422    let filename = format!("{}_{}.json", slug, ts);
423    let path = PathBuf::from(REPORTS_DIR).join(&filename);
424
425    let json = serde_json::to_string_pretty(report)?;
426    fs::write(&path, &json)?;
427
428    // Also write latest.json
429    let latest = PathBuf::from(REPORTS_DIR).join("latest.json");
430    fs::write(&latest, &json)?;
431
432    println!("[REPORT] Saved to {}", path.display());
433    Ok(path)
434}
435
436pub fn load_report(path: &Path) -> Result<PipelineReport> {
437    let content = fs::read_to_string(path)?;
438    let report: PipelineReport = serde_json::from_str(&content)?;
439    Ok(report)
440}
441
442pub fn list_reports() -> Result<Vec<PathBuf>> {
443    let dir = Path::new(REPORTS_DIR);
444    if !dir.exists() {
445        return Ok(Vec::new());
446    }
447    let mut reports: Vec<PathBuf> = fs::read_dir(dir)?
448        .filter_map(|e| e.ok())
449        .map(|e| e.path())
450        .filter(|p| {
451            p.extension().map(|e| e == "json").unwrap_or(false)
452                && p.file_name().map(|n| n != "latest.json").unwrap_or(true)
453        })
454        .collect();
455    reports.sort();
456    Ok(reports)
457}
458
459/// Pretty-print a report to stdout.
460pub fn print_report(report: &PipelineReport) {
461    let r = report;
462    println!();
463    println!("=== Pipeline Report (v{}) ===", r.version);
464    println!("Generated: {}", r.generated_at);
465    println!();
466
467    // Mission
468    println!("MISSION");
469    println!(
470        "  Prompt:     {}",
471        r.mission.prompt.chars().take(80).collect::<String>()
472    );
473    println!(
474        "  Preset:     {} | Language: {} | Complexity: {}",
475        r.mission.preset, r.mission.language, r.router.tier
476    );
477    println!("  Output:     {}", r.mission.output_dir);
478    println!();
479
480    // Model config
481    println!("MODELS");
482    println!(
483        "  Architect:  {} ({})",
484        r.model_config.architect.model, r.model_config.architect.provider
485    );
486    println!(
487        "  Tester:     {} ({})",
488        r.model_config.tester.model, r.model_config.tester.provider
489    );
490    println!(
491        "  Coder:      {} ({})",
492        r.model_config.coder.model, r.model_config.coder.provider
493    );
494    println!(
495        "  Security:   {} ({})",
496        r.model_config.security.model, r.model_config.security.provider
497    );
498    println!(
499        "  Critique:   {} ({})",
500        r.model_config.critique.model, r.model_config.critique.provider
501    );
502    println!(
503        "  CTO:        {} ({})",
504        r.model_config.cto.model, r.model_config.cto.provider
505    );
506    println!();
507
508    // Timing
509    println!("TIMING");
510    println!("  Total:      {:.1}s", r.timing.total_secs);
511    println!(
512        "  Router:     {:.1}s | Architect: {:.1}s | Tester: {:.1}s",
513        r.timing.router_secs, r.timing.architect_secs, r.timing.tester_secs
514    );
515    for (i, secs) in r.timing.rounds_secs.iter().enumerate() {
516        println!("  Round {}:    {:.1}s", i + 1, secs);
517    }
518    println!();
519
520    // Architect + Tester stats
521    if r.architect.duration_secs > 0.0 {
522        println!(
523            "ARCHITECT: {} | {:.1}s | {} tokens | {:.0} tok/s | {} lines",
524            r.architect.model,
525            r.architect.duration_secs,
526            r.architect.token_count,
527            r.architect.tok_per_sec,
528            r.architect.output_lines
529        );
530    }
531    if r.tester.duration_secs > 0.0 {
532        println!(
533            "TESTER:    {} | {:.1}s | {} tokens | {:.0} tok/s | {} lines",
534            r.tester.model,
535            r.tester.duration_secs,
536            r.tester.token_count,
537            r.tester.tok_per_sec,
538            r.tester.output_lines
539        );
540    }
541    println!();
542
543    // Rounds
544    for round in &r.rounds {
545        let passed = if round.final_score >= r.result.quality_gate_threshold {
546            "PASS"
547        } else {
548            "FAIL"
549        };
550        println!(
551            "ROUND {} [{} — {:.1}/10]",
552            round.round_number, passed, round.final_score
553        );
554        println!(
555            "  Coder:    {:.1}s | {} tokens | {:.0} tok/s",
556            round.coder.duration_secs, round.coder.token_count, round.coder.tok_per_sec
557        );
558        println!(
559            "  Verifier: {:.1}/10 | tests {}/{} | lint {} | secrets {}",
560            round.verifier.avg_score,
561            round.verifier.tests_passed,
562            round.verifier.tests_failed,
563            round.verifier.total_lint_issues,
564            if round.verifier.secrets_found {
565                "FOUND"
566            } else {
567                "clean"
568            }
569        );
570        println!(
571            "  Security: {} | {}",
572            if round.security.passed {
573                "PASS"
574            } else {
575                "FAIL"
576            },
577            round.security.verdict.lines().next().unwrap_or("")
578        );
579        println!(
580            "  Critique: Dev={:.1} Arch={:.1} Test={:.1} Sec={:.1} Docs={:.1} => {:.1}",
581            round.critique.scores.dev,
582            round.critique.scores.arch,
583            round.critique.scores.test,
584            round.critique.scores.sec,
585            round.critique.scores.docs,
586            round.critique.avg
587        );
588        println!(
589            "  CTO:      {} | {}",
590            if round.cto.approved {
591                "APPROVE"
592            } else {
593                "REJECT"
594            },
595            round.cto.verdict.lines().next().unwrap_or("")
596        );
597        println!(
598            "  Score:    critique {:.1} * 0.4 + verifier {:.1} * 0.6 = {:.1}",
599            round.critique_avg, round.verifier_score, round.final_score
600        );
601        if let Some(ref fb) = round.feedback_to_next_round {
602            let lines: usize = fb.lines().count();
603            println!("  Feedback: {} lines sent to next round", lines);
604        }
605        println!();
606    }
607
608    // Result
609    let gate = if r.result.quality_gate_passed {
610        "PASSED"
611    } else {
612        "FAILED"
613    };
614    println!(
615        "RESULT: {} ({:.1}/10, round {}/{})",
616        gate, r.result.best_score, r.result.best_round, r.result.total_rounds
617    );
618    println!();
619
620    // Code metrics
621    println!("CODE METRICS");
622    println!(
623        "  Files: {} | LOC: {} | Test files: {}",
624        r.code_metrics.total_files, r.code_metrics.total_loc, r.code_metrics.test_files
625    );
626    for (lang, m) in &r.code_metrics.languages {
627        println!("  {}: {} files, {} LOC", lang, m.files, m.loc);
628    }
629    println!();
630
631    // File list
632    if !r.result.files_shipped.is_empty() {
633        println!("FILES");
634        for f in &r.result.files_shipped {
635            println!("  {} ({} lines, {} bytes)", f.path, f.lines, f.bytes);
636        }
637    }
638}
639
640#[cfg(test)]
641mod tests {
642    use super::*;
643
644    #[test]
645    fn test_report_builder_defaults() {
646        let builder = ReportBuilder::new();
647        let report = builder.build(false, 5.0, 1, Path::new("output/test"), &[]);
648        assert_eq!(report.version, 1);
649        assert!(!report.result.quality_gate_passed);
650        assert_eq!(report.result.best_score, 5.0);
651        assert_eq!(report.code_metrics.total_files, 0);
652    }
653
654    #[test]
655    fn test_list_reports_empty() {
656        // Should not crash if dir doesn't exist
657        let _ = list_reports();
658    }
659}