Skip to main content

battlecommand_forge/
mission.rs

1use crate::codegen::{self, GeneratedFile};
2use crate::db;
3use crate::llm::{self, LlmCallStats, LlmClient};
4use crate::memory;
5use crate::model_config::ModelConfig;
6use crate::report::{
7    self, CritiqueReport, CritiqueScores, CtoReport, FileVerifierReport, LlmStageReport,
8    MissionMeta, ReportBuilder, RoundReport, SecurityReport, VerifierReport,
9};
10use crate::router;
11use crate::verifier::{self, QualityReport};
12use crate::voice;
13use crate::workspace::{self, Workspace};
14use anyhow::{Context, Result};
15use std::fs;
16use std::path::{Path, PathBuf};
17use tokio::sync::mpsc;
18
19/// Events emitted by MissionRunner for TUI consumption.
20#[derive(Debug, Clone)]
21pub enum TuiEvent {
22    /// Structured log message with level
23    Log { level: String, message: String },
24    /// Pipeline stage started
25    StageStarted {
26        stage: String,
27        step: String,
28        model: String,
29    },
30    /// Pipeline stage completed
31    StageCompleted { stage: String, status: String },
32    /// Code chunk from coder (for Code tab)
33    CodeChunk {
34        content: String,
35        model: String,
36        done: bool,
37    },
38    /// Mission completed with final score
39    MissionCompleted { score: f64, output_dir: String },
40    /// Mission failed
41    MissionFailed { error: String },
42    /// Cost update from API call
43    CostUpdate { total_usd: f64 },
44    /// LLM thinking/reasoning chunk for live visualization
45    ThinkingChunk {
46        model: String,
47        content: String,
48        done: bool,
49    },
50}
51
52const MAX_FIX_ROUNDS: usize = 5;
53// Compile-time invariant — guarantees the fix-round loop runs at least once,
54// which in turn means `best_result` is Some by the time the loop exits.
55const _: () = assert!(MAX_FIX_ROUNDS >= 1, "MAX_FIX_ROUNDS must be >= 1");
56
57/// Quality gate threshold scaled by complexity.
58/// C1-C6: 9.2 (achievable with good models)
59/// C7-C8: 8.5 (complex multi-file projects)
60/// C9-C10: 8.0 (mega-projects)
61fn quality_gate(complexity: u32) -> f32 {
62    match complexity {
63        0..=6 => 9.2,
64        7..=8 => 8.5,
65        _ => 8.0,
66    }
67}
68
69/// Result of a single attempt through stages 4-8.
70struct AttemptResult {
71    files: Vec<GeneratedFile>,
72    reports: Vec<QualityReport>,
73    critique_scores: Vec<f32>,
74    critique_details: Vec<String>,
75    security_verdict: String,
76    cto_verdict: String,
77    verifier_score: f32,
78    critique_avg: f32,
79    final_score: f32,
80    /// Import/test errors captured from pytest for surgical fix targeting
81    test_errors: Vec<String>,
82    tests_passed: u32,
83    tests_failed: u32,
84}
85
86pub struct MissionRunner {
87    config: ModelConfig,
88    llm_architect: LlmClient,
89    llm_tester: LlmClient,
90    llm_coder: LlmClient,
91    llm_fix_coder: LlmClient,
92    llm_security: LlmClient,
93    llm_critique: LlmClient,
94    llm_cto: LlmClient,
95    quality_bible: String,
96    pub auto_mode: bool,
97    /// Custom output directory override
98    pub output_override: Option<PathBuf>,
99    /// GitHub repo URL to clone as context
100    pub repo_url: Option<String>,
101    /// Local directory to use as context
102    pub local_path: Option<PathBuf>,
103    /// Loaded repo/project context (file tree + source) for injection into prompts
104    repo_context: Option<String>,
105    /// Complexity score from router (C1-C10). Determines single-shot vs staged generation.
106    complexity: u32,
107    /// Best score achieved across all fix rounds (for benchmark reporting).
108    pub last_best_score: f64,
109    /// Optional TUI event sender for live updates.
110    pub event_tx: Option<mpsc::UnboundedSender<TuiEvent>>,
111}
112
113impl MissionRunner {
114    pub fn new(config: ModelConfig) -> Self {
115        config.print_summary();
116
117        let quality_bible = fs::read_to_string(".battlecommand/quality_policies.md")
118            .unwrap_or_else(|_| {
119                "TDD first, 90% coverage, OWASP, zero TODOs, full error handling.".to_string()
120            });
121
122        Self {
123            llm_architect: LlmClient::with_limits(
124                &config.architect.model,
125                config.architect.context_size(),
126                config.architect.max_predict(),
127            ),
128            llm_tester: LlmClient::with_limits(
129                &config.tester.model,
130                config.tester.context_size(),
131                config.tester.max_predict(),
132            ),
133            llm_coder: LlmClient::with_limits(
134                &config.coder.model,
135                config.coder.context_size(),
136                config.coder.max_predict(),
137            ),
138            llm_fix_coder: LlmClient::with_limits(
139                &config.fix_coder.model,
140                config.fix_coder.context_size(),
141                config.fix_coder.max_predict(),
142            ),
143            llm_security: LlmClient::with_limits(
144                &config.security.model,
145                config.security.context_size(),
146                config.security.max_predict(),
147            ),
148            llm_critique: LlmClient::with_limits(
149                &config.critique.model,
150                config.critique.context_size(),
151                config.critique.max_predict(),
152            ),
153            llm_cto: LlmClient::with_limits(
154                &config.cto.model,
155                config.cto.context_size(),
156                config.cto.max_predict(),
157            ),
158            config,
159            quality_bible,
160            auto_mode: false,
161            output_override: None,
162            repo_url: None,
163            local_path: None,
164            repo_context: None,
165            complexity: 5,
166            last_best_score: 0.0,
167            event_tx: None,
168        }
169    }
170
171    /// Returns the best score achieved in the most recent run.
172    pub fn best_score(&self) -> f64 {
173        self.last_best_score
174    }
175
176    /// Emit a TUI event (no-op if no listener).
177    fn emit(&self, event: TuiEvent) {
178        if let Some(ref tx) = self.event_tx {
179            let _ = tx.send(event);
180        }
181    }
182
183    fn emit_log(&self, level: &str, message: &str) {
184        println!("{}", message);
185        self.emit(TuiEvent::Log {
186            level: level.into(),
187            message: message.into(),
188        });
189    }
190
191    fn emit_stage(&self, stage: &str, step: &str, model: &str) {
192        self.emit(TuiEvent::StageStarted {
193            stage: stage.into(),
194            step: step.into(),
195            model: model.into(),
196        });
197    }
198
199    fn emit_stage_done(&self, stage: &str, status: &str) {
200        self.emit(TuiEvent::StageCompleted {
201            stage: stage.into(),
202            status: status.into(),
203        });
204    }
205
206    pub async fn run(&mut self, prompt: &str) -> Result<()> {
207        println!();
208        println!("=== BattleCommand Forge v{} ===", env!("CARGO_PKG_VERSION"));
209        println!("Preset: {}", self.config.preset);
210        println!();
211
212        // Check for API keys if any role uses cloud models
213        let roles = [
214            (&self.config.architect, "architect"),
215            (&self.config.tester, "tester"),
216            (&self.config.coder, "coder"),
217            (&self.config.fix_coder, "fix_coder"),
218            (&self.config.security, "security"),
219            (&self.config.critique, "critique"),
220            (&self.config.cto, "cto"),
221        ];
222        let claude_roles: Vec<&str> = roles
223            .iter()
224            .filter(|(r, _)| r.model.starts_with("claude-"))
225            .map(|(_, name)| *name)
226            .collect();
227        if !claude_roles.is_empty() && std::env::var("ANTHROPIC_API_KEY").is_err() {
228            anyhow::bail!(
229                "ANTHROPIC_API_KEY not set but {} use Claude models ({}). Set it with: export ANTHROPIC_API_KEY=sk-...",
230                claude_roles.len(), claude_roles.join(", ")
231            );
232        }
233        let grok_roles: Vec<&str> = roles
234            .iter()
235            .filter(|(r, _)| r.model.starts_with("grok-"))
236            .map(|(_, name)| *name)
237            .collect();
238        if !grok_roles.is_empty() && std::env::var("XAI_API_KEY").is_err() {
239            anyhow::bail!(
240                "XAI_API_KEY not set but {} use Grok models ({}). Set it with: export XAI_API_KEY=xai-...",
241                grok_roles.len(), grok_roles.join(", ")
242            );
243        }
244
245        let pipeline_start = std::time::Instant::now();
246
247        // Initialize report builder
248        let mut rb = ReportBuilder::new();
249        rb.set_model_config(&self.config);
250
251        // Load memory context
252        let memory_context = memory::load_context(prompt);
253        if !memory_context.is_empty() {
254            self.emit_log(
255                "info",
256                &format!("[MEMORY] Loaded context ({} chars)", memory_context.len()),
257            );
258        }
259
260        // Stage 1: Complexity Assessment (dual: rules + AI)
261        let router_start = std::time::Instant::now();
262        self.emit_stage("1/9", "ROUTER", &self.config.complexity.model.clone());
263        let llm_complexity = LlmClient::with_limits(
264            &self.config.complexity.model,
265            self.config.complexity.context_size(),
266            self.config.complexity.max_predict(),
267        );
268        let routing = router::assess_complexity_dual(prompt, &llm_complexity).await;
269        let tier = routing.tier;
270        self.complexity = routing.complexity;
271        self.emit_log(
272            "info",
273            &format!("[1/9] ROUTER: {} ({})", tier.label(), routing.reasoning),
274        );
275        self.emit_stage_done("1/9", &format!("C{} {}", routing.complexity, tier.label()));
276        rb.set_router(
277            &format!(
278                "C{} {} ({})",
279                routing.complexity,
280                tier.label(),
281                routing.source
282            ),
283            router_start.elapsed().as_secs_f64(),
284        );
285
286        // C1-C6: if architect is a cloud model (Grok/Claude), downgrade to the configured
287        // coder's local model for detailed specs (local models produce better specs for simple tasks)
288        if self.complexity < 7
289            && self.config.architect.provider == crate::model_config::ModelProvider::Cloud
290        {
291            // Use the coder model as fallback — it's always configured and typically local
292            if self.config.coder.provider == crate::model_config::ModelProvider::Local {
293                let local_arch = self.config.coder.model.clone();
294                println!("[DOWNGRADE] C{} detected — switching architect from {} to {} for detailed specs",
295                    self.complexity, self.config.architect.model, local_arch);
296                self.config.architect.model = local_arch.clone();
297                self.config.architect.provider = crate::model_config::ModelProvider::Local;
298                self.llm_architect = LlmClient::with_limits(
299                    &local_arch,
300                    self.config.architect.context_size(),
301                    self.config.architect.max_predict(),
302                );
303            }
304        }
305
306        // C7+ complexity: upgrade coder to the fix_coder model for precision on complex projects
307        // Auth/E-commerce/WebSocket consistently land at C7-C8
308        if self.complexity >= 7
309            && !self.config.coder.model.starts_with("claude-")
310            && !self.config.coder.model.starts_with("grok-")
311        {
312            // Use the fix_coder model — it's always a capable cloud model in premium preset
313            if self.config.fix_coder.provider == crate::model_config::ModelProvider::Cloud {
314                let upgrade = self.config.fix_coder.model.clone();
315                println!(
316                    "[UPGRADE] C{} detected — switching coder from {} to {} for precision",
317                    self.complexity, self.config.coder.model, upgrade
318                );
319                self.config.coder.model = upgrade.clone();
320                self.config.coder.provider = crate::model_config::ModelProvider::Cloud;
321                self.llm_coder = LlmClient::with_limits(
322                    &upgrade,
323                    self.config.coder.context_size(),
324                    self.config.coder.max_predict(),
325                );
326            }
327        }
328
329        // Create workspace + output dir
330        let mission_id = workspace::mission_id_from_prompt(prompt);
331        let ws = Workspace::create(&mission_id).ok();
332        if ws.is_some() {
333            println!("[WORKSPACE] Created: {}", mission_id);
334        }
335        let output_dir = match &self.output_override {
336            Some(dir) => {
337                fs::create_dir_all(dir).context("Failed to create output directory")?;
338                dir.clone()
339            }
340            None => create_output_dir(prompt)?,
341        };
342
343        let language = detect_language(prompt);
344        rb.set_mission(MissionMeta {
345            mission_id: mission_id.clone(),
346            prompt: prompt.to_string(),
347            preset: self.config.preset.to_string(),
348            language: language.clone(),
349            output_dir: output_dir.display().to_string(),
350        });
351
352        // Load repo/project context if --repo or --path specified
353        if let Some(ref url) = self.repo_url {
354            let clone_dir = output_dir.join(".repo_clone");
355            crate::workspace::clone_repo(url, &clone_dir)?;
356            self.repo_context = Some(crate::editor::read_project_context(&clone_dir, 50_000)?);
357        } else if let Some(ref path) = self.local_path {
358            self.repo_context = Some(crate::editor::read_project_context(path, 50_000)?);
359        }
360
361        // Run as single task — multi-file extraction handles project structure.
362        // Decomposition caused duplicate projects; single-task + good prompts is better.
363        self.run_single_task(prompt, prompt, &output_dir, &mut rb)
364            .await?;
365
366        // Save mission to history database
367        let duration_secs = pipeline_start.elapsed().as_secs_f64();
368        let latest_report =
369            report::load_report(Path::new(".battlecommand/reports/latest.json")).ok();
370        let (final_score, passed) = latest_report
371            .as_ref()
372            .map(|r| (r.result.best_score, r.result.quality_gate_passed))
373            .unwrap_or((0.0, false));
374        let files: Vec<String> = std::fs::read_dir(&output_dir)
375            .map(|entries| {
376                entries
377                    .flatten()
378                    .map(|e| e.file_name().to_string_lossy().to_string())
379                    .collect()
380            })
381            .unwrap_or_default();
382        let _ = db::save_mission(&db::MissionRecord {
383            id: mission_id,
384            prompt: prompt.to_string(),
385            preset: self.config.preset.to_string(),
386            tier: format!("C{}", self.complexity),
387            subtasks: 0,
388            rounds: latest_report
389                .as_ref()
390                .map(|r| r.result.total_rounds as u32)
391                .unwrap_or(1),
392            final_score,
393            passed,
394            model: self.config.coder.model.clone(),
395            files_generated: files,
396            duration_secs,
397            timestamp: chrono::Utc::now().to_rfc3339(),
398        });
399
400        // Clean up build artifacts from output
401        cleanup_artifacts(&output_dir);
402
403        self.print_results(&output_dir)?;
404        Ok(())
405    }
406
407    /// Run the full pipeline (stages 2-9) for a single task/subtask.
408    async fn run_single_task(
409        &mut self,
410        _original_prompt: &str,
411        task_prompt: &str,
412        output_dir: &Path,
413        rb: &mut ReportBuilder,
414    ) -> Result<()> {
415        // Stage 2: Architect
416        self.emit_stage("2/9", "ARCHITECT", &self.config.architect.model.clone());
417        self.emit_log(
418            "info",
419            "[2/9] ARCHITECT: Designing spec + file manifest + TDD plan...",
420        );
421        let (spec, arch_stats) = self.run_architect_with_stats(task_prompt).await?;
422        self.emit_stage_done(
423            "2/9",
424            &format!(
425                "{} lines, {:.0}s",
426                arch_stats.output_lines, arch_stats.duration_secs
427            ),
428        );
429        rb.set_architect(LlmStageReport {
430            model: arch_stats.model,
431            duration_secs: arch_stats.duration_secs,
432            token_count: arch_stats.token_count,
433            tok_per_sec: arch_stats.tok_per_sec,
434            output_lines: arch_stats.output_lines,
435        });
436        // Offload architect before tester (if different models)
437        if self.config.architect.model != self.config.tester.model {
438            offload_model(&self.config.architect.model).await;
439        }
440
441        // Stage 3: Tester-first
442        self.emit_stage("3/9", "TESTER", &self.config.tester.model.clone());
443        self.emit_log("info", "[3/9] TESTER: Writing tests first (TDD)...");
444        let (tests_raw, test_stats) = self.run_tester_with_stats(task_prompt, &spec).await?;
445        self.emit_stage_done(
446            "3/9",
447            &format!(
448                "{} lines, {:.0}s",
449                test_stats.output_lines, test_stats.duration_secs
450            ),
451        );
452        rb.set_tester(LlmStageReport {
453            model: test_stats.model,
454            duration_secs: test_stats.duration_secs,
455            token_count: test_stats.token_count,
456            tok_per_sec: test_stats.tok_per_sec,
457            output_lines: test_stats.output_lines,
458        });
459
460        // Offload tester before coder (if different models)
461        if self.config.tester.model != self.config.coder.model {
462            offload_model(&self.config.tester.model).await;
463        }
464
465        let language = detect_language(task_prompt);
466
467        // === FIX-PASS RETRY LOOP (Stages 4-8) ===
468        let mut feedback = String::new();
469        let mut previous_code = String::new();
470        let mut persistent_issues: Vec<String> = Vec::new();
471        let mut best_result: Option<AttemptResult> = None;
472        let mut best_round: usize = 1;
473
474        for round in 0..MAX_FIX_ROUNDS {
475            if round > 0 {
476                self.emit_log(
477                    "warn",
478                    &format!("=== FIX ROUND {}/{} ===", round + 1, MAX_FIX_ROUNDS),
479                );
480            }
481
482            let (result, round_report) = self
483                .attempt_round_with_report(
484                    task_prompt,
485                    &spec,
486                    &tests_raw,
487                    &language,
488                    output_dir,
489                    round,
490                    &feedback,
491                    &previous_code,
492                )
493                .await?;
494
495            // ─── Show full report for this round ───
496            let min_score = quality_gate(self.complexity);
497            let passed_gate = result.final_score >= min_score;
498
499            // Build a snapshot report up to this round for display
500            rb.add_round(round_report.clone());
501            let snap_report = rb.build(
502                passed_gate,
503                result.final_score,
504                round + 1,
505                output_dir,
506                &result.files,
507            );
508            report::print_report(&snap_report);
509            // Pop the round back — we'll re-add it (possibly with feedback) below
510            rb.rounds.pop();
511
512            if passed_gate {
513                voice::quality_gate(result.final_score, true);
514            }
515
516            // ─── Human-in-the-loop approval (or auto-mode) ───
517            let remaining = MAX_FIX_ROUNDS - (round + 1);
518
519            let (accept, abort) = if self.auto_mode {
520                // Auto mode: accept if passed, continue fixing if not, accept on last round
521                if passed_gate {
522                    println!(
523                        "[AUTO] Quality gate PASSED ({:.1}/10) — accepting",
524                        result.final_score
525                    );
526                    (true, false)
527                } else if remaining > 0 {
528                    println!("[AUTO] Quality gate FAILED ({:.1} < {:.1}) — continuing to fix round ({} remaining)",
529                        result.final_score, min_score, remaining);
530                    (false, false)
531                } else {
532                    println!(
533                        "[AUTO] Final round complete ({:.1}/10) — accepting",
534                        result.final_score
535                    );
536                    (true, false)
537                }
538            } else {
539                if passed_gate {
540                    println!();
541                    println!("Quality gate PASSED. What would you like to do?");
542                    println!("  [a] Accept and ship (default)");
543                    println!(
544                        "  [f] Run another fix round anyway ({} remaining)",
545                        remaining
546                    );
547                    println!("  [q] Abort mission");
548                } else if remaining > 0 {
549                    println!();
550                    println!(
551                        "Quality gate FAILED ({:.1} < {:.1}). What would you like to do?",
552                        result.final_score, min_score
553                    );
554                    println!(
555                        "  [f] Continue to next fix round (default, {} remaining)",
556                        remaining
557                    );
558                    println!("  [a] Accept current output as-is");
559                    println!("  [q] Abort mission");
560                } else {
561                    println!();
562                    println!(
563                        "Final round complete ({:.1}/10). What would you like to do?",
564                        result.final_score
565                    );
566                    println!("  [a] Accept current output (default)");
567                    println!("  [q] Abort mission");
568                }
569
570                print!("> ");
571                let _ = std::io::Write::flush(&mut std::io::stdout());
572                let mut input = String::new();
573                let _ = std::io::stdin().read_line(&mut input);
574                let choice = input.trim().to_lowercase();
575
576                let accept = if passed_gate {
577                    choice != "f"
578                } else if remaining > 0 {
579                    choice == "a"
580                } else {
581                    choice != "q"
582                };
583                let abort = choice == "q";
584                (accept, abort)
585            };
586
587            if abort {
588                println!();
589                println!("Mission aborted by user.");
590
591                rb.add_round(round_report);
592                let score = best_result
593                    .as_ref()
594                    .map(|b| b.final_score)
595                    .unwrap_or(result.final_score);
596                let report = rb.build(false, score, best_round, output_dir, &result.files);
597                let _ = report::save_report(&report);
598
599                return Ok(());
600            }
601
602            if accept {
603                let accepted_passed = result.final_score >= min_score;
604                rb.add_round(round_report);
605
606                let report = rb.build(
607                    accepted_passed,
608                    result.final_score,
609                    round + 1,
610                    output_dir,
611                    &result.files,
612                );
613                let _ = report::save_report(&report);
614
615                if accepted_passed {
616                    let code_summary: String = result
617                        .files
618                        .iter()
619                        .map(|f| {
620                            format!("{}: {} lines", f.path.display(), f.content.lines().count())
621                        })
622                        .collect::<Vec<_>>()
623                        .join(", ");
624                    let _ = memory::distill_and_save(
625                        &self.llm_coder,
626                        task_prompt,
627                        &code_summary,
628                        result.final_score,
629                    )
630                    .await;
631                    let _ = memory::save_example(task_prompt, output_dir, &language);
632                    println!("[MEMORY] Learnings saved");
633                }
634
635                self.last_best_score = result.final_score as f64;
636                self.emit_log(
637                    "info",
638                    &format!(
639                        "Output accepted ({:.1}/10). Files at: {}",
640                        result.final_score,
641                        output_dir.display()
642                    ),
643                );
644                self.emit(TuiEvent::MissionCompleted {
645                    score: result.final_score as f64,
646                    output_dir: output_dir.display().to_string(),
647                });
648                voice::mission_complete(accepted_passed, result.final_score);
649                return Ok(());
650            }
651
652            // Save failure patterns for future missions
653            let error_keys: Vec<String> = result
654                .reports
655                .iter()
656                .flat_map(|r| r.lint_issues.clone())
657                .collect();
658            memory::save_failure_patterns(&language, &error_keys, result.final_score);
659
660            // Continue to next fix round — build feedback with v2-style truncation
661            let new_issues = self.extract_issue_keys(&result);
662            for issue in &new_issues {
663                if !persistent_issues.contains(issue) {
664                    persistent_issues.push(issue.clone());
665                }
666            }
667            feedback = self.build_feedback_v2(&result, round, &persistent_issues);
668
669            // Save previous code (truncated per v2 strategy — 2K chars max per file, top 5 files)
670            previous_code = result
671                .files
672                .iter()
673                .take(5)
674                .map(|f| {
675                    let snippet = if f.content.len() > 2000 {
676                        format!(
677                            "{}...(truncated)",
678                            &f.content[..f.content.floor_char_boundary(2000)]
679                        )
680                    } else {
681                        f.content.clone()
682                    };
683                    format!("### {}\n```\n{}\n```", f.path.display(), snippet)
684                })
685                .collect::<Vec<_>>()
686                .join("\n\n");
687
688            let mut rr = round_report;
689            rr.feedback_to_next_round = Some(feedback.clone());
690            rb.add_round(rr);
691
692            let improved = best_result
693                .as_ref()
694                .map(|b| result.final_score > b.final_score)
695                .unwrap_or(true);
696            if improved {
697                best_round = round + 1;
698                best_result = Some(result);
699            } else if round >= 2 {
700                // Fix 3: Stop early if score hasn't improved for 2 consecutive rounds
701                let prev_best = best_result.as_ref().map(|b| b.final_score).unwrap_or(0.0);
702                if result.final_score < prev_best - 0.1 {
703                    println!("[AUTO] Score declining ({:.1} < best {:.1}) — stopping early, restoring best round {}",
704                        result.final_score, prev_best, best_round);
705                    break;
706                }
707            }
708
709            voice::fix_round(round + 2, MAX_FIX_ROUNDS);
710        }
711
712        // Should only reach here if all rounds exhausted without accept/abort.
713        // `best_result` is unconditionally set on iteration 0 of the fix-round
714        // loop (see `improved` initialization above) and the compile-time
715        // assertion on `MAX_FIX_ROUNDS` guarantees iteration 0 runs — so this
716        // None branch is unreachable. We surface a real error rather than
717        // panicking just in case future refactors change the loop shape.
718        let best = match best_result {
719            Some(b) => b,
720            None => return Err(anyhow::anyhow!("Pipeline produced no rounds")),
721        };
722        println!();
723        println!(
724            "All {} fix rounds exhausted (best: {:.1}/10, round {})",
725            MAX_FIX_ROUNDS, best.final_score, best_round
726        );
727
728        // Restore best round's files to disk (fix rounds may have degraded)
729        if self.repo_context.is_none() {
730            let _ = fs::remove_dir_all(output_dir);
731            fs::create_dir_all(output_dir)?;
732        }
733        codegen::write_files(output_dir, &best.files)?;
734
735        let report = rb.build(false, best.final_score, best_round, output_dir, &best.files);
736        let _ = report::save_report(&report);
737
738        self.last_best_score = best.final_score as f64;
739        voice::mission_complete(false, best.final_score);
740        Ok(())
741    }
742
743    /// Run architect with stats capture.
744    async fn run_architect_with_stats(&self, prompt: &str) -> Result<(String, LlmCallStats)> {
745        let mem_context = memory::load_context(prompt);
746        let repo_section = if let Some(ref ctx) = self.repo_context {
747            format!(
748                "\n\nExisting codebase:\n{}\n\n\
749                     You are EXTENDING this codebase, not creating from scratch.\n\
750                     Follow existing conventions, naming patterns, and directory structure.\n\
751                     Only list files that need to be CREATED or MODIFIED.",
752                ctx
753            )
754        } else {
755            String::new()
756        };
757        let system = format!(
758            "{}\n\n{}\n\nYou are a Principal Software Architect.\n\
759             Produce a clear, actionable specification with:\n\
760             1. Architecture Decision Record (1 paragraph)\n\
761             2. COMPLETE file manifest with exact relative paths and purpose of each file\n\
762             3. TDD test plan (list every test case with expected behavior)\n\
763             4. Security considerations\n\n\
764             IMPORTANT: List EVERY file that needs to be created. The coder will generate\n\
765             all of these files. Include config files, __init__.py, models, routes, etc.\n\
766             Output structured text, NOT code.{}",
767            self.quality_bible, mem_context, repo_section
768        );
769        self.llm_architect
770            .generate_live_with_stats("ARCHITECT", &system, prompt)
771            .await
772    }
773
774    /// Run tester with stats capture.
775    async fn run_tester_with_stats(
776        &self,
777        prompt: &str,
778        spec: &str,
779    ) -> Result<(String, LlmCallStats)> {
780        let system = format!(
781            "{}\n\nYou are a Senior Test Engineer. Write COMPLETE test files.\n\
782             Rules:\n\
783             - Write tests FIRST, before any implementation exists\n\
784             - Cover: happy path, edge cases, error cases, security\n\
785             - Use the standard test framework for the language\n\
786             - Aim for >= 90% coverage of the spec\n\
787             - Output MULTIPLE test files if the spec has multiple modules\n\
788             - For each file, start with a header: ### tests/test_<module>.py\n\
789             - Then a fenced code block with the test code\n\
790             - Output ALL test files, no explanations between them",
791            self.quality_bible
792        );
793        let user_prompt = format!(
794            "Based on this spec, write the complete test suite:\n\n{}\n\nOriginal request: {}",
795            spec, prompt
796        );
797        self.llm_tester
798            .generate_live_with_stats("TESTER", &system, &user_prompt)
799            .await
800    }
801
802    /// Run one attempt through stages 4-8 with report capture.
803    async fn attempt_round_with_report(
804        &self,
805        prompt: &str,
806        spec: &str,
807        tests_raw: &str,
808        language: &str,
809        output_dir: &Path,
810        round: usize,
811        feedback: &str,
812        previous_code: &str,
813    ) -> Result<(AttemptResult, RoundReport)> {
814        let result = self
815            .attempt_round(
816                prompt,
817                spec,
818                tests_raw,
819                language,
820                output_dir,
821                round,
822                feedback,
823                previous_code,
824            )
825            .await?;
826
827        // Build round report from the result
828        let file_reports: Vec<FileVerifierReport> = result
829            .reports
830            .iter()
831            .enumerate()
832            .map(|(i, r)| {
833                let path = result
834                    .files
835                    .get(i)
836                    .map(|f| f.path.display().to_string())
837                    .unwrap_or_else(|| format!("file_{}", i));
838                FileVerifierReport {
839                    path,
840                    score: r.score,
841                    lint_passed: r.lint_passed,
842                    lint_issues: r.lint_issues.clone(),
843                    syntax_valid: r.syntax_valid,
844                    has_tests: r.has_tests,
845                    has_docstring: r.has_docstring,
846                    has_error_handling: r.has_error_handling,
847                    has_hardcoded_secrets: r.has_hardcoded_secrets,
848                }
849            })
850            .collect();
851
852        let total_lint_issues: usize = result.reports.iter().map(|r| r.lint_issues.len()).sum();
853        let secrets_found = result.reports.iter().any(|r| r.has_hardcoded_secrets);
854
855        let critique_scores = if result.critique_scores.len() >= 5 {
856            CritiqueScores {
857                dev: result.critique_scores[0],
858                arch: result.critique_scores[1],
859                test: result.critique_scores[2],
860                sec: result.critique_scores[3],
861                docs: result.critique_scores[4],
862            }
863        } else {
864            CritiqueScores {
865                dev: 7.0,
866                arch: 7.0,
867                test: 7.0,
868                sec: 7.0,
869                docs: 7.0,
870            }
871        };
872
873        let cto_approved = result.cto_verdict.to_uppercase().contains("APPROVE");
874        let sec_passed = !result.security_verdict.to_uppercase().contains("FAIL");
875
876        let rr = RoundReport {
877            round_number: round + 1,
878            coder: LlmStageReport {
879                model: self.config.coder.model.clone(),
880                duration_secs: 0.0, // timing captured at LLM level
881                token_count: 0,
882                tok_per_sec: 0.0,
883                output_lines: result
884                    .files
885                    .iter()
886                    .map(|f| f.content.lines().count() as u64)
887                    .sum(),
888            },
889            verifier: VerifierReport {
890                duration_secs: 0.0,
891                avg_score: result.verifier_score,
892                tests_passed: result.tests_passed,
893                tests_failed: result.tests_failed,
894                tests_run: result.tests_passed > 0 || result.tests_failed > 0,
895                total_lint_issues,
896                secrets_found,
897                file_reports,
898            },
899            security: SecurityReport {
900                model: self.config.security.model.clone(),
901                duration_secs: 0.0,
902                verdict: result
903                    .security_verdict
904                    .lines()
905                    .next()
906                    .unwrap_or("")
907                    .to_string(),
908                passed: sec_passed,
909            },
910            critique: CritiqueReport {
911                model: self.config.critique.model.clone(),
912                duration_secs: 0.0,
913                scores: critique_scores,
914                avg: result.critique_avg,
915                details: result.critique_details.clone(),
916            },
917            cto: CtoReport {
918                model: self.config.cto.model.clone(),
919                duration_secs: 0.0,
920                verdict: result.cto_verdict.lines().next().unwrap_or("").to_string(),
921                approved: cto_approved,
922            },
923            final_score: result.final_score,
924            critique_avg: result.critique_avg,
925            verifier_score: result.verifier_score,
926            feedback_to_next_round: None,
927        };
928
929        Ok((result, rr))
930    }
931
932    /// Run one attempt through stages 4-8.
933    async fn attempt_round(
934        &self,
935        prompt: &str,
936        spec: &str,
937        tests_raw: &str,
938        language: &str,
939        output_dir: &Path,
940        round: usize,
941        feedback: &str,
942        previous_code: &str,
943    ) -> Result<AttemptResult> {
944        let files = if round == 0 {
945            // === ROUND 1: Full generation ===
946            self.single_shot_generate(prompt, spec, tests_raw, language, output_dir)
947                .await?
948        } else {
949            // === FIX ROUNDS: Surgical or full regen based on issue ratio ===
950            self.surgical_or_regen(
951                prompt,
952                spec,
953                tests_raw,
954                language,
955                output_dir,
956                round,
957                feedback,
958                previous_code,
959            )
960            .await?
961        };
962
963        // If fix round produced 0 files, reuse previous round's files from disk
964        let files = if files.is_empty() && round > 0 {
965            let disk_files = load_files_from_dir(output_dir).unwrap_or_default();
966            if disk_files.is_empty() {
967                println!("[FIX] No files on disk — returning previous round score");
968                return Ok(AttemptResult {
969                    files: vec![],
970                    reports: vec![],
971                    critique_scores: vec![],
972                    critique_details: vec![],
973                    security_verdict: String::new(),
974                    cto_verdict: String::new(),
975                    verifier_score: 0.0,
976                    critique_avg: 0.0,
977                    final_score: 0.0,
978                    test_errors: vec![],
979                    tests_passed: 0,
980                    tests_failed: 0,
981                });
982            }
983            disk_files
984        } else {
985            files
986        };
987
988        // Unload coder/fix_coder model to free VRAM for reviewer
989        offload_model(&self.config.coder.model).await;
990        if self.config.fix_coder.model != self.config.coder.model {
991            offload_model(&self.config.fix_coder.model).await;
992        }
993
994        // Stage 5: Verifier — verify entire project (linters + tests)
995        let stage = if round == 0 { "[5/9]" } else { "[FIX]" };
996        self.emit_stage("5/9", "VERIFIER", "ruff+pytest");
997        self.emit_log(
998            "info",
999            &format!(
1000                "{} VERIFIER: Checking {} files + running tests...",
1001                stage,
1002                files.len()
1003            ),
1004        );
1005
1006        let project_report = verifier::verify_project(output_dir, language)?;
1007
1008        let verifier_score = project_report.avg_score;
1009        let verifier_tests_passed = project_report.tests_passed;
1010        let verifier_tests_failed = project_report.tests_failed;
1011        let reports = project_report
1012            .file_reports
1013            .iter()
1014            .map(|(_, r)| r)
1015            .collect::<Vec<_>>();
1016        let any_secrets = reports.iter().any(|r| r.has_hardcoded_secrets);
1017        let total_issues: usize = reports.iter().map(|r| r.lint_issues.len()).sum();
1018
1019        let verifier_summary = format!(
1020            "   Verifier avg: {:.1}/10 | Files: {} | Issues: {} | Secrets: {} | Tests: {}",
1021            verifier_score,
1022            reports.len(),
1023            total_issues,
1024            if any_secrets { "FOUND" } else { "clean" },
1025            if project_report.tests_run {
1026                format!(
1027                    "{} passed, {} failed",
1028                    project_report.tests_passed, project_report.tests_failed
1029                )
1030            } else {
1031                "not run".to_string()
1032            }
1033        );
1034        self.emit_log("info", &verifier_summary);
1035        self.emit_stage_done("5/9", &format!("{:.1}/10", verifier_score));
1036
1037        // Capture test errors for surgical fix targeting
1038        let test_errors = project_report.test_errors.clone();
1039
1040        // Convert to owned reports for AttemptResult
1041        let reports: Vec<verifier::QualityReport> = project_report
1042            .file_reports
1043            .into_iter()
1044            .map(|(_, r)| r)
1045            .collect();
1046
1047        // Combine all code for review stages
1048        let all_code = files
1049            .iter()
1050            .map(|f| format!("### {}\n```\n{}\n```", f.path.display(), f.content))
1051            .collect::<Vec<_>>()
1052            .join("\n\n");
1053
1054        // Stage 6: Security Auditor
1055        let stage = if round == 0 { "[6/9]" } else { "[FIX]" };
1056        self.emit_stage("6/9", "SECURITY", &self.config.security.model.clone());
1057        self.emit_log(
1058            "info",
1059            &format!("{} SECURITY: Checking for vulnerabilities...", stage),
1060        );
1061        let security_system = "/no_think\nYou are a Security Auditor. Review this code for:\n\
1062             1. OWASP Top 10 vulnerabilities\n\
1063             2. Hardcoded secrets or credentials\n\
1064             3. SQL injection, XSS, CSRF risks\n\
1065             4. Missing input validation\n\
1066             5. Missing rate limiting\n\
1067             Output a brief verdict: PASS or FAIL with specific issues.";
1068        let security_verdict = self
1069            .llm_security
1070            .generate("SECURITY", security_system, &all_code)
1071            .await
1072            .unwrap_or_else(|_| "REVIEW SKIPPED".to_string());
1073
1074        // Stage 7: Critique Panel (5 reviewers)
1075        let stage = if round == 0 { "[7/9]" } else { "[FIX]" };
1076        self.emit_stage("7/9", "CRITIQUE", &self.config.critique.model.clone());
1077        self.emit_log(
1078            "info",
1079            &format!("{} CRITIQUE PANEL: 5 specialist reviews...", stage),
1080        );
1081        self.emit_stage_done("6/9", "done");
1082        let (critique_scores, critique_details) = self.run_critique_panel(&all_code, spec).await?;
1083        let critique_avg = if critique_scores.is_empty() {
1084            5.0
1085        } else {
1086            critique_scores.iter().sum::<f32>() / critique_scores.len() as f32
1087        };
1088        if critique_scores.len() >= 5 {
1089            println!(
1090                "   Dev={:.1} Arch={:.1} Test={:.1} Sec={:.1} Docs={:.1} => Avg={:.1}",
1091                critique_scores[0],
1092                critique_scores[1],
1093                critique_scores[2],
1094                critique_scores[3],
1095                critique_scores[4],
1096                critique_avg
1097            );
1098        } else {
1099            println!(
1100                "   Critique avg={:.1} ({} scores)",
1101                critique_avg,
1102                critique_scores.len()
1103            );
1104        }
1105
1106        // Offload critique model before loading CTO (if different)
1107        if self.config.critique.model != self.config.cto.model {
1108            offload_model(&self.config.critique.model).await;
1109        }
1110
1111        // Stage 8: CTO Final Review
1112        let stage = if round == 0 { "[8/9]" } else { "[FIX]" };
1113        self.emit_stage_done("7/9", &format!("avg={:.1}", critique_avg));
1114        self.emit_stage("8/9", "CTO REVIEW", &self.config.cto.model.clone());
1115        self.emit_log(
1116            "info",
1117            &format!("{} CTO REVIEW: Mission-level coherence...", stage),
1118        );
1119        let cto_system = "/no_think\nYou are a CTO doing a final review. Check:\n\
1120             1. Does the code match the original request?\n\
1121             2. Is it production-ready? Are all modules present?\n\
1122             3. Would you deploy this to customers?\n\
1123             Output: APPROVE or REJECT with reason (1-2 sentences).";
1124        let cto_prompt = format!(
1125            "Original request: {}\n\nGenerated code ({} files):\n{}\n\nSecurity review: {}",
1126            prompt,
1127            files.len(),
1128            all_code,
1129            security_verdict
1130        );
1131        let cto_verdict = self
1132            .llm_cto
1133            .generate("CTO", cto_system, &cto_prompt)
1134            .await
1135            .unwrap_or_else(|_| "REVIEW SKIPPED".to_string());
1136
1137        // Calculate final score: critique 40% + verifier 60%
1138        // Verifier (tests + linting) is the real quality signal — weight it higher
1139        let final_score = critique_avg * 0.4 + verifier_score * 0.6;
1140        self.emit_stage_done("8/9", "done");
1141        self.emit_log("info", &format!("[9/9] GATE: Score {:.1}/10", final_score));
1142
1143        Ok(AttemptResult {
1144            files,
1145            reports,
1146            critique_scores,
1147            critique_details,
1148            security_verdict,
1149            cto_verdict,
1150            verifier_score,
1151            critique_avg,
1152            final_score,
1153            test_errors,
1154            tests_passed: verifier_tests_passed,
1155            tests_failed: verifier_tests_failed,
1156        })
1157    }
1158
1159    /// Full project generation via 5 architectural stages.
1160    /// Each stage sees the output of all previous stages for cross-file coherence.
1161    /// Single-shot generation for simple tasks (C1-C7).
1162    /// One LLM call produces all files — better coherence, fewer import seam failures.
1163    async fn single_shot_generate(
1164        &self,
1165        prompt: &str,
1166        spec: &str,
1167        tests_raw: &str,
1168        language: &str,
1169        output_dir: &Path,
1170    ) -> Result<Vec<GeneratedFile>> {
1171        self.emit_stage("4/9", "CODER", &self.config.coder.model.clone());
1172        self.emit_log(
1173            "info",
1174            &format!(
1175                "[4/9] CODER: Implementing (single-shot, C{})...",
1176                self.complexity
1177            ),
1178        );
1179
1180        let warnings = known_bad_patterns(language);
1181        let coder_system = format!(
1182            "{}\n\nYou are a Senior Software Engineer (10+ years).\n\
1183             Rules:\n\
1184             - Output EVERY file as a separate fenced code block\n\
1185             - Before each code block, write the ACTUAL file path as a markdown header, e.g.: ### app/main.py\n\
1186             - Follow SOLID principles, clean architecture\n\
1187             - Full error handling (no bare except, no unwrap)\n\
1188             - No hardcoded secrets — use environment variables\n\
1189             - No TODO/FIXME comments\n\
1190             - Include docstrings and type hints\n\
1191             - Do NOT reference modules you haven't created\n\
1192             - IMPORTANT: Use 'app/' as the Python package root (NOT 'src/'). All imports must use 'from app.xxx import yyy'.\n\
1193             - IMPORTANT: All __init__.py files must be EMPTY (just a docstring or blank). Do NOT put imports or re-exports in __init__.py.\n\
1194             - IMPORTANT: Every name used in type hints MUST be imported in that file.\n\n\
1195             {}\n\n{}",
1196            self.quality_bible, warnings, memory::load_failure_patterns(language)
1197        );
1198
1199        let tests_compact = truncate_str(tests_raw, 4000);
1200        let repo_section = if let Some(ref ctx) = self.repo_context {
1201            format!("\n\nExisting codebase (read-only context — build on this, follow its conventions):\n{}\n", ctx)
1202        } else {
1203            String::new()
1204        };
1205        let coder_prompt = format!(
1206            "Implement the COMPLETE project based on this spec.\n\n\
1207             Spec:\n{}\n\n\
1208             Test plan (your code must pass these tests):\n{}\n\n\
1209             Original request: {}{}\n\n\
1210             Generate ALL files: production code, tests (conftest.py + test files), and pyproject.toml.\n\
1211             Output every file with its real path as a ### header before its code block (e.g., ### app/config.py, ### tests/conftest.py).",
1212            spec, tests_compact, prompt, repo_section
1213        );
1214
1215        let code_raw = self
1216            .llm_coder
1217            .generate_live("CODER", &coder_system, &coder_prompt)
1218            .await?;
1219        let mut all_files = codegen::extract_files(&code_raw, language);
1220
1221        // Merge tester-generated test files (don't overwrite coder's)
1222        let test_files = codegen::extract_files(tests_raw, language);
1223        for tf in &test_files {
1224            if !all_files.iter().any(|f| f.path == tf.path) {
1225                all_files.push(tf.clone());
1226            }
1227        }
1228
1229        // Fallback: single-file extraction
1230        if all_files.is_empty() {
1231            let code = llm::extract_code(&code_raw, language);
1232            let tests = llm::extract_code(tests_raw, language);
1233            all_files.push(GeneratedFile {
1234                path: PathBuf::from(default_code_path(language)),
1235                content: code,
1236                language: language.to_string(),
1237            });
1238            all_files.push(GeneratedFile {
1239                path: PathBuf::from(default_test_path(language)),
1240                content: tests,
1241                language: language.to_string(),
1242            });
1243        }
1244
1245        // Sanitize Python code: fix common import mistakes + strip __init__.py re-exports
1246        if language == "python" {
1247            sanitize_python_imports(&mut all_files);
1248            sanitize_init_files(&mut all_files);
1249        }
1250
1251        self.emit_log(
1252            "info",
1253            &format!(
1254                "[4/9] CODER: {} files generated (single-shot)",
1255                all_files.len()
1256            ),
1257        );
1258        self.emit_stage_done("4/9", &format!("{} files", all_files.len()));
1259        // Send generated code to Code tab
1260        let code_preview: String = all_files
1261            .iter()
1262            .map(|f| format!("### {}\n{}\n", f.path.display(), f.content))
1263            .collect();
1264        self.emit(TuiEvent::CodeChunk {
1265            content: code_preview,
1266            model: self.config.coder.model.clone(),
1267            done: true,
1268        });
1269
1270        // Write all files (repo mode: overwrite only, don't nuke existing dir)
1271        if self.repo_context.is_none() {
1272            let _ = fs::remove_dir_all(output_dir);
1273            fs::create_dir_all(output_dir)?;
1274        }
1275        codegen::write_files(output_dir, &all_files)?;
1276        codegen::write_boilerplate(output_dir, language, prompt)?;
1277
1278        Ok(all_files)
1279    }
1280
1281    /// Surgical fix or full regen based on issue ratio.
1282    /// If <= 50% of files have issues → fix only broken files (v2 general_direct_fix).
1283    /// If > 50% → full regen with feedback (too many issues for surgical approach).
1284    async fn surgical_or_regen(
1285        &self,
1286        prompt: &str,
1287        _spec: &str,
1288        _tests_raw: &str,
1289        language: &str,
1290        output_dir: &Path,
1291        _round: usize,
1292        feedback: &str,
1293        _previous_code: &str,
1294    ) -> Result<Vec<GeneratedFile>> {
1295        // Load previous files from output dir
1296        let prev_files = load_files_from_dir(output_dir)?;
1297        if prev_files.is_empty() {
1298            // No previous files — nothing to fix, skip round
1299            println!("[FIX] No previous files found — skipping round");
1300            return Ok(vec![]);
1301        }
1302
1303        // Identify which files have issues from the feedback
1304        let broken_files = identify_broken_files(feedback, &prev_files);
1305
1306        if broken_files.is_empty() {
1307            // Fix 2: Never fall back to full regen — it always degrades quality.
1308            // Skip this round and keep previous files intact.
1309            println!(
1310                "[FIX] Cannot identify specific broken files — skipping round (keeping previous code)"
1311            );
1312            Ok(prev_files)
1313        } else {
1314            // === SURGICAL FIX MODE (v2 general_direct_fix) ===
1315            println!(
1316                "[FIX] SURGICAL: {} of {} files need fixing — patching only broken files",
1317                broken_files.len(),
1318                prev_files.len()
1319            );
1320
1321            let mut files = prev_files;
1322
1323            for (file_idx, file_issues) in &broken_files {
1324                let file = &files[*file_idx];
1325
1326                // v2 size gate: skip surgical fix for large files needing structural rewrites
1327                if file.content.lines().count() > 500
1328                    && file_issues.iter().any(|i| {
1329                        i.contains("restructure") || i.contains("redesign") || i.contains("rewrite")
1330                    })
1331                {
1332                    println!(
1333                        "   [skip] {} — too large for surgical fix ({} lines)",
1334                        file.path.display(),
1335                        file.content.lines().count()
1336                    );
1337                    continue;
1338                }
1339
1340                println!(
1341                    "   [fix] {} ({} issue(s))",
1342                    file.path.display(),
1343                    file_issues.len()
1344                );
1345
1346                // Build findings text (v2 format)
1347                let findings_text: String = file_issues
1348                    .iter()
1349                    .map(|issue| format!("  - {}", issue))
1350                    .collect::<Vec<_>>()
1351                    .join("\n");
1352
1353                let fixed_content = self
1354                    .surgical_fix_file(
1355                        &file.path.display().to_string(),
1356                        &file.content,
1357                        &findings_text,
1358                        language,
1359                        prompt,
1360                    )
1361                    .await?;
1362
1363                // Replace the file content
1364                let idx = *file_idx;
1365                files[idx] = GeneratedFile {
1366                    path: file.path.clone(),
1367                    content: fixed_content,
1368                    language: language.to_string(),
1369                };
1370            }
1371
1372            // Write updated files
1373            if language == "python" {
1374                sanitize_python_imports(&mut files);
1375                sanitize_init_files(&mut files);
1376            }
1377
1378            if self.repo_context.is_none() {
1379                let _ = fs::remove_dir_all(output_dir);
1380                fs::create_dir_all(output_dir)?;
1381            }
1382            codegen::write_files(output_dir, &files)?;
1383            codegen::write_boilerplate(output_dir, language, prompt)?;
1384
1385            Ok(files)
1386        }
1387    }
1388
1389    /// Fix a single file surgically (v2 general_direct_fix prompt).
1390    /// Uses generate_live for streaming output so you can see tokens flowing.
1391    async fn surgical_fix_file(
1392        &self,
1393        file_name: &str,
1394        file_content: &str,
1395        findings: &str,
1396        language: &str,
1397        _mission: &str,
1398    ) -> Result<String> {
1399        let system = format!(
1400            "{}\n\nYou are a Senior Software Engineer fixing bugs in existing code.\n\
1401             Fix the issues listed below. Preserve all working code. Only change what is broken.",
1402            self.quality_bible
1403        );
1404
1405        // Truncate file content for context (v2: 2K cap per file for retry, but surgical gets full file up to 4K)
1406        let code_for_prompt = truncate_str(file_content, 4000);
1407
1408        let user_prompt = format!(
1409            "Fix the following issues in this file.\n\n\
1410             ### {file_name}\n\
1411             ```{language}\n\
1412             {code_for_prompt}\n\
1413             ```\n\n\
1414             Issues to fix:\n\
1415             {findings}\n\n\
1416             Output the COMPLETE fixed file as a single fenced code block.\n\
1417             Fix ONLY the issues listed. Do not refactor working code.\n\
1418             If a fix requires adding imports, add them.\n\n\
1419             ### {file_name}\n\
1420             ```{language}"
1421        );
1422
1423        // Use generate_live so tokens stream to stdout
1424        let raw = self
1425            .llm_fix_coder
1426            .generate_live("  FIX", &system, &user_prompt)
1427            .await?;
1428        let mut fixed = llm::extract_code(&raw, language);
1429
1430        // Strip inner code fences from config files (LLM wraps pyproject.toml in ```toml)
1431        let config_exts = [
1432            ".toml", ".yaml", ".yml", ".json", ".ini", ".cfg", ".env", ".txt",
1433        ];
1434        if config_exts.iter().any(|ext| file_name.ends_with(ext)) && fixed.trim().starts_with("```")
1435        {
1436            if let Some(nl) = fixed.find('\n') {
1437                let after = &fixed[nl + 1..];
1438                fixed = if after.trim_end().ends_with("```") {
1439                    let end = after.rfind("```").unwrap_or(after.len());
1440                    after[..end].trim().to_string()
1441                } else {
1442                    after.trim().to_string()
1443                };
1444            }
1445        }
1446
1447        // Content validation
1448        // 1. Too short — LLM returned stub or empty
1449        if fixed.len() < file_content.len() / 3 {
1450            println!(
1451                "   [warn] Fix for {} too short ({} vs {} chars), keeping original",
1452                file_name,
1453                fixed.len(),
1454                file_content.len()
1455            );
1456            return Ok(file_content.to_string());
1457        }
1458
1459        // 2. Reasoning leak — LLM embedded its thinking instead of code
1460        let reasoning_markers = [
1461            "looking at the error",
1462            "the issue is",
1463            "let me",
1464            "I need to",
1465            "the fix is",
1466            "we need to",
1467            "the problem is",
1468            "actually",
1469            "re-reading the instruction",
1470            "Wait -",
1471            "Hmm",
1472            "However, the instruction says",
1473            "false positive",
1474            "no actual",
1475            "nothing to fix",
1476            "no issues to fix",
1477            "examining the file",
1478        ];
1479        let reasoning_count = reasoning_markers
1480            .iter()
1481            .filter(|m| fixed.to_lowercase().contains(&m.to_lowercase()))
1482            .count();
1483        // Config files: stricter threshold (1 marker = leak). Code files: 3 markers.
1484        let config_exts = [".toml", ".yaml", ".yml", ".json", ".ini", ".cfg"];
1485        let threshold = if config_exts.iter().any(|ext| file_name.ends_with(ext)) {
1486            1
1487        } else {
1488            3
1489        };
1490        if reasoning_count >= threshold {
1491            println!(
1492                "   [warn] Fix for {} contains LLM reasoning (not code), keeping original",
1493                file_name
1494            );
1495            return Ok(file_content.to_string());
1496        }
1497
1498        Ok(fixed)
1499    }
1500
1501    /// Build compact feedback (v2 strategy: truncate everything, focus on actionable issues).
1502    fn build_feedback_v2(
1503        &self,
1504        result: &AttemptResult,
1505        round: usize,
1506        persistent_issues: &[String],
1507    ) -> String {
1508        let mut feedback = String::new();
1509
1510        // Scale truncation limits by complexity (C8+ gets 2x budget)
1511        let verifier_limit = if self.complexity >= 8 { 1500 } else { 800 };
1512        let critique_limit = if self.complexity >= 8 { 600 } else { 400 };
1513        let verdict_limit = if self.complexity >= 8 { 400 } else { 200 };
1514
1515        // Verifier issues
1516        let mut verifier_section = String::new();
1517        for (i, report) in result.reports.iter().enumerate() {
1518            if !report.lint_issues.is_empty() {
1519                let file_name = result
1520                    .files
1521                    .get(i)
1522                    .map(|f| f.path.display().to_string())
1523                    .unwrap_or_else(|| format!("file_{}", i));
1524                verifier_section.push_str(&format!("{}: ", file_name));
1525                verifier_section.push_str(&report.lint_issues.join("; "));
1526                verifier_section.push('\n');
1527            }
1528            if report.has_hardcoded_secrets {
1529                verifier_section.push_str("CRITICAL: Hardcoded secrets — use os.getenv()\n");
1530            }
1531        }
1532        if !verifier_section.is_empty() {
1533            feedback.push_str("## Verifier\n");
1534            feedback.push_str(&truncate_str(&verifier_section, verifier_limit));
1535            feedback.push('\n');
1536        }
1537
1538        // Test errors — critical for surgical fix targeting
1539        if !result.test_errors.is_empty() {
1540            feedback.push_str("## Test errors\n");
1541            let max_errors = if self.complexity >= 8 { 20 } else { 10 };
1542            for err in result.test_errors.iter().take(max_errors) {
1543                feedback.push_str(&format!("{}\n", err));
1544            }
1545            feedback.push('\n');
1546        }
1547
1548        // Critique defects — one line each, truncated
1549        let critique_text: String = result
1550            .critique_details
1551            .iter()
1552            .filter(|d| !d.is_empty())
1553            .cloned()
1554            .collect::<Vec<_>>()
1555            .join("; ");
1556        if !critique_text.is_empty() {
1557            feedback.push_str("## Critique defects\n");
1558            feedback.push_str(&truncate_str(&critique_text, critique_limit));
1559            feedback.push('\n');
1560        }
1561
1562        // CTO verdict — first line only
1563        let cto_first_line = result.cto_verdict.lines().next().unwrap_or("");
1564        feedback.push_str(&format!(
1565            "## CTO: {}\n",
1566            truncate_str(cto_first_line, verdict_limit)
1567        ));
1568
1569        // Security — first line only
1570        let sec_first_line = result.security_verdict.lines().next().unwrap_or("");
1571        feedback.push_str(&format!(
1572            "## Security: {}\n",
1573            truncate_str(sec_first_line, verdict_limit)
1574        ));
1575
1576        // Persistent issues — flag things that haven't been fixed across rounds
1577        if round >= 2 && !persistent_issues.is_empty() {
1578            feedback.push_str(&format!(
1579                "\n## PERSISTENT ISSUES (unfixed for {} rounds)\n",
1580                round
1581            ));
1582            for issue in persistent_issues.iter().take(5) {
1583                feedback.push_str(&format!("- {}\n", issue));
1584            }
1585        }
1586
1587        feedback.push_str(&format!(
1588            "\nScore: {:.1}/10 (need >= {:.1}).\n\
1589             IMPORTANT: Fix ONLY bugs (import errors, missing imports, test failures, syntax errors).\n\
1590             Do NOT add new features, middleware, auth, or rate limiting unless the original prompt asked for them.\n\
1591             Do NOT restructure working code. Preserve what works.\n",
1592            result.final_score, quality_gate(self.complexity)
1593        ));
1594
1595        feedback
1596    }
1597
1598    /// Extract key issue identifiers for persistent issue tracking.
1599    fn extract_issue_keys(&self, result: &AttemptResult) -> Vec<String> {
1600        let mut keys = Vec::new();
1601        for report in &result.reports {
1602            for issue in &report.lint_issues {
1603                // Normalize to short key
1604                let key = if issue.contains("hardcoded secret") || issue.contains("Hardcoded") {
1605                    "hardcoded secrets".to_string()
1606                } else if issue.contains("syntax error") {
1607                    "syntax errors".to_string()
1608                } else if issue.contains("import") {
1609                    "import errors".to_string()
1610                } else {
1611                    truncate_str(issue, 60).to_string()
1612                };
1613                if !keys.contains(&key) {
1614                    keys.push(key);
1615                }
1616            }
1617            if report.has_hardcoded_secrets && !keys.contains(&"hardcoded secrets".to_string()) {
1618                keys.push("hardcoded secrets".to_string());
1619            }
1620        }
1621        keys
1622    }
1623
1624    /// Run critique panel as a SINGLE LLM call (5 scores in one response).
1625    async fn run_critique_panel(&self, code: &str, spec: &str) -> Result<(Vec<f32>, Vec<String>)> {
1626        let system = "/no_think\nYou are 5 expert reviewers in one. Score this code 0-10 on each dimension.\n\
1627            Output EXACTLY this format (one line per role, nothing else):\n\
1628            DEV: X.X | defects: ...\n\
1629            ARCH: X.X | defects: ...\n\
1630            TEST: X.X | defects: ...\n\
1631            SEC: X.X | defects: ...\n\
1632            DOCS: X.X | defects: ...\n\n\
1633            DEV = correctness, robustness\n\
1634            ARCH = architecture, SOLID, maintainability\n\
1635            TEST = test quality, coverage\n\
1636            SEC = security, OWASP, secrets\n\
1637            DOCS = documentation, readability";
1638
1639        let prompt = format!("Code:\n{}\n\nSpec:\n{}", code, spec);
1640
1641        let response = self
1642            .llm_critique
1643            .generate("  CRITIQUE", system, &prompt)
1644            .await
1645            .unwrap_or_else(|e| {
1646                eprintln!("   CRITIQUE FAILED: {}", e);
1647                "DEV: 5.0\nARCH: 5.0\nTEST: 5.0\nSEC: 5.0\nDOCS: 5.0".to_string()
1648            });
1649
1650        if response.trim().is_empty() {
1651            eprintln!("   CRITIQUE returned empty response — using default 5.0 scores");
1652            return Ok((vec![5.0f32; 5], vec![String::new(); 5]));
1653        }
1654
1655        // Parse 5 scores from the single response
1656        let mut scores = vec![5.0f32; 5];
1657        let mut details = vec![String::new(); 5];
1658        let prefixes = ["DEV", "ARCH", "TEST", "SEC", "DOCS"];
1659
1660        for line in response.lines() {
1661            // Strip markdown formatting (**, *, #, etc.) before matching
1662            let stripped: String = line.chars().filter(|c| *c != '*' && *c != '#').collect();
1663            let upper = stripped.to_uppercase();
1664            for (i, prefix) in prefixes.iter().enumerate() {
1665                // Match "DEV:" or "DEV :" or "DEV=" etc.
1666                if let Some(pos) = upper.find(prefix) {
1667                    let after = &upper[pos + prefix.len()..];
1668                    if after.starts_with(':') || after.starts_with(' ') || after.starts_with('=') {
1669                        // Extract score — first number 0-10 on the line
1670                        for word in stripped.split_whitespace() {
1671                            let cleaned = word.trim_matches(|c: char| !c.is_numeric() && c != '.');
1672                            if let Ok(n) = cleaned.parse::<f32>() {
1673                                if (0.0..=10.0).contains(&n) {
1674                                    scores[i] = n;
1675                                    break;
1676                                }
1677                            }
1678                        }
1679                        // Extract defects after |
1680                        if let Some(defect_part) = line.split('|').nth(1) {
1681                            details[i] = defect_part.trim().to_string();
1682                        }
1683                    }
1684                }
1685            }
1686        }
1687
1688        // Warn if all scores stayed at default (parser couldn't extract)
1689        if scores.iter().all(|&s| s == 5.0) {
1690            eprintln!("   WARNING: Critique parser extracted no scores from {} lines — model may have used unexpected format", response.lines().count());
1691            // Print first 5 lines for debugging
1692            for (i, line) in response.lines().take(5).enumerate() {
1693                eprintln!("     line {}: {}", i + 1, line);
1694            }
1695        }
1696
1697        Ok((scores, details))
1698    }
1699
1700    fn print_results(&self, output_dir: &Path) -> Result<()> {
1701        println!();
1702        println!("=== Mission Complete ===");
1703        println!("Output: {}", output_dir.display());
1704        println!();
1705        list_files(output_dir)?;
1706        Ok(())
1707    }
1708}
1709
1710// ── Helper functions ──
1711
1712/// Remove __pycache__, .pytest_cache, and other build artifacts from output.
1713fn cleanup_artifacts(dir: &Path) {
1714    let artifacts = [
1715        "__pycache__",
1716        ".pytest_cache",
1717        "__pypackages__",
1718        ".mypy_cache",
1719        "node_modules",
1720        ".venv",
1721    ];
1722    if let Ok(entries) = walkdir(dir) {
1723        for entry in entries {
1724            for artifact in &artifacts {
1725                if entry.to_string_lossy().contains(artifact) {
1726                    if entry.is_dir() {
1727                        let _ = fs::remove_dir_all(&entry);
1728                    } else {
1729                        let _ = fs::remove_file(&entry);
1730                    }
1731                }
1732            }
1733        }
1734    }
1735    // Also clean up directories
1736    if let Ok(entries) = fs::read_dir(dir) {
1737        for entry in entries.flatten() {
1738            let name = entry.file_name().to_string_lossy().to_string();
1739            if artifacts.contains(&name.as_str()) {
1740                let _ = fs::remove_dir_all(entry.path());
1741            }
1742            if entry.path().is_dir() {
1743                cleanup_artifacts(&entry.path());
1744            }
1745        }
1746    }
1747}
1748
1749/// Offload a model from Ollama VRAM to free memory for the next stage.
1750async fn offload_model(model: &str) {
1751    let client = reqwest::Client::new();
1752    let body = serde_json::json!({"model": model, "keep_alive": 0});
1753    match client
1754        .post(format!("{}/api/generate", crate::llm::ollama_url()))
1755        .json(&body)
1756        .send()
1757        .await
1758    {
1759        Ok(_) => println!("   [VRAM] Offloaded {}", model),
1760        Err(_) => {} // silent fail — model may not be loaded
1761    }
1762}
1763
1764// Score extraction and defect parsing are now handled inline in run_critique_panel.
1765
1766fn default_code_path(language: &str) -> &str {
1767    match language {
1768        "python" => "app/main.py",
1769        "typescript" => "src/index.ts",
1770        "javascript" => "src/index.js",
1771        "rust" => "src/main.rs",
1772        "go" => "main.go",
1773        _ => "main.py",
1774    }
1775}
1776
1777fn default_test_path(language: &str) -> &str {
1778    match language {
1779        "python" => "tests/test_main.py",
1780        "typescript" => "__tests__/index.test.ts",
1781        "javascript" => "__tests__/index.test.js",
1782        "rust" => "src/tests.rs",
1783        "go" => "main_test.go",
1784        _ => "test_main.py",
1785    }
1786}
1787
1788/// Language-specific known-bad patterns learned from previous runs.
1789/// These prevent the most common LLM mistakes for each language.
1790fn known_bad_patterns(language: &str) -> String {
1791    let common = "\
1792COMMON MISTAKES TO AVOID:\n\
1793- Service/repository methods must return ORM/database models, NOT request schemas\n\
1794- register()/create() MUST call repository.create() and return the saved model\n\
1795- Every class/function you reference MUST be imported — verify your imports\n\
1796- Route handlers that need injected services MUST use the DI pattern for your framework\n\
1797- Do NOT return password hashes in response schemas\n\
1798- No hardcoded secrets — use environment variables\n";
1799
1800    let lang_specific = match language {
1801        "python" => "\
1802Python-specific:\n\
1803- Pydantic v2: use pydantic_settings.BaseSettings (NOT pydantic.BaseSettings)\n\
1804- Pydantic v2: use model_config = ConfigDict(from_attributes=True) (NOT class Config: orm_mode = True)\n\
1805- Pydantic v2: use @field_validator (NOT @validator)\n\
1806- Pydantic v2: use .model_validate() (NOT .from_orm())\n\
1807- Pydantic v2: models are frozen by default — use model_copy(update={...}) to modify\n\
1808- python-jose: use 'from jose import jwt' (NOT 'import jwt' — that's PyJWT, different API)\n\
1809- SQLAlchemy: cast PostgresDsn to str() before passing to create_async_engine()\n\
1810- SQLAlchemy 2.0: 'from sqlalchemy.orm import DeclarativeBase, Mapped, mapped_column' (NOT from sqlalchemy.ext.declarative — that module does NOT have DeclarativeBase)\n\
1811- FastAPI DI: route params need Depends() — e.g. service: MyService = Depends(get_service)\n\
1812- FastAPI routes: always import Depends from fastapi in router files\n\
1813- pytest-asyncio: add @pytest.mark.asyncio to async test functions\n\
1814- conftest.py: set env vars BEFORE importing Settings (module-level singletons)\n\
1815- CIRCULAR IMPORTS: dependencies.py must NEVER import from routes. Routes import from dependencies, not the other way around.\n\
1816- Dependencies (get_db, get_current_user) must be in a separate file that does NOT import route modules.\n\
1817- Surgical fix must NOT create empty __init__.py files — only fix the specific file content.\n\
1818- httpx 0.28+: use AsyncClient(transport=ASGITransport(app=app), base_url='http://test') NOT AsyncClient(app=app). Import ASGITransport from httpx.\n\
1819- pydantic-settings: do NOT use Field(...) (required) for settings fields — always provide a default value. Field(...) crashes if env var is not set.\n\
1820- @property: if a Settings field is a @property, access it as attribute (settings.foo), NOT as method call (settings.foo()).\n\
1821- FastAPI routes: if router prefix is '/users', the route decorator should be @router.post('') not @router.post('/') to avoid trailing-slash 307 redirects. Or use '/users/' consistently in tests.\n\
1822- Every name used in type hints (return types, params) MUST be imported. 'def foo() -> User' requires 'from app.models.user import User'.\n\
1823- SQLAlchemy: define Base in ONE place only (models.py or db/base.py). Do NOT create a second Base in database.py with declarative_base(). Import Base from that one place everywhere.\n\
1824- SQLAlchemy: use ONLY DeclarativeBase (new style), NEVER declarative_base() (old style). Never mix both in the same project.\n\
1825- Mocking: patch targets must match import style. If code does 'import smtplib' then patch 'module.smtplib.SMTP', NOT 'module.SMTP'.\n\
1826- Pydantic v2 validation error types: 'missing' (not 'field_required'), 'value_error' (not 'value_error.email'), 'int_parsing' (not 'value_error.integer').\n\
1827- Tests: use tempfile.TemporaryDirectory() for test dirs, never hardcoded relative paths.\n\
1828- Tests: if production code uses direct instantiation (obj = Foo()), do NOT mock with context manager (__enter__/__exit__).\n\
1829- NAMING: Pydantic response schemas MUST use different names than ORM models. Use UserResponse or UserRead (NOT User). Importing both 'from models import User' and 'from schemas import User' in the same file shadows the ORM model and crashes.\n\
1830- NAMING: If you have an ORM model User and a schema User, rename the schema to UserResponse, UserRead, or UserOut.\n\
1831- Pydantic v2: do NOT use 'from pydantic.networks import Url' — Url does not exist in pydantic v2. Use HttpUrl directly from pydantic: 'from pydantic import HttpUrl'.\n\
1832- conftest.py: import async_sessionmaker from sqlalchemy.ext.asyncio (NOT from sqlalchemy.orm import sessionmaker for async).\n\
1833- SQLite: do NOT use Mapped[uuid.UUID] — SQLite has no native UUID type. Use Mapped[str] with default=lambda: str(uuid.uuid4()).\n\
1834- SQLAlchemy: do NOT set Base = None and assign later. Define Base as 'class Base(DeclarativeBase): pass' in database.py and import it everywhere.\n\
1835- conftest.py: MUST override app dependencies with test session. Use app.dependency_overrides[get_db] = get_test_db.\n\
1836- pytest-asyncio: in pyproject.toml use asyncio_mode (underscore), NOT asyncio-mode (dash).\n\
1837- Pydantic response schemas: datetime fields must be typed as datetime, NOT str. SQLAlchemy returns datetime objects.\n\
1838- SQLAlchemy: IntegrityError is in sqlalchemy.exc, NOT sqlalchemy. Use 'from sqlalchemy.exc import IntegrityError'.\n\
1839- Every test file MUST start with 'import pytest'. Never use pytest.fixture or pytest.mark without importing pytest first.\n\
1840- DEPENDENCIES: Put ALL dependencies (including pytest, pytest-asyncio, httpx, hypothesis) in [project.dependencies], NOT in [project.optional-dependencies]. The verifier installs from [project.dependencies] only.\n\
1841- DEPENDENCIES: Always include a requirements.txt with ALL deps (including test deps) as a fallback.\n\
1842- PYTEST CONFIG: pyproject.toml MUST include [tool.pytest.ini_options] with asyncio_mode = 'auto' and testpaths = ['tests']. This eliminates the need for @pytest.mark.asyncio on every test.\n\
1843- ASYNC/SYNC CONSISTENCY: If production uses create_async_engine + AsyncSession, tests MUST use httpx.AsyncClient(transport=ASGITransport(app=app)) with async fixtures. Do NOT use TestClient with async production code — TestClient triggers the lifespan which calls the async engine.\n\
1844- LIFESPAN: FastAPI lifespan handlers that create DB tables MUST wrap engine.begin() in try/except so tests with overridden databases don't crash. Or use 'if os.getenv(\"TESTING\") != \"1\"' guard.\n\
1845- TYPING: Always import Optional, List, Dict from typing for Python <3.10 compat. Or use 'from __future__ import annotations' at the top of every file.\n\
1846- TOML: pyproject.toml must NOT have duplicate keys (e.g. two 'warn_return_any = true' in [tool.mypy]). Duplicate keys cause TOML parse errors that prevent pytest from running.\n",
1847        "rust" => "\
1848Rust-specific:\n\
1849- Handle all Result/Option types — no unwrap() in production code\n\
1850- Use thiserror for custom error types\n\
1851- Ensure all public types derive necessary traits (Debug, Clone, Serialize, Deserialize)\n\
1852- Use ? operator for error propagation, not unwrap()\n",
1853        "go" => "\
1854Go-specific:\n\
1855- Always check error returns — no _ = err\n\
1856- Use context.Context as first parameter in functions that do I/O\n\
1857- Close resources with defer\n\
1858- Use interfaces for dependency injection\n",
1859        "typescript" => "\
1860TypeScript-specific:\n\
1861- Use strict mode in tsconfig.json\n\
1862- Avoid any type — use proper generics\n\
1863- Use async/await consistently, not mixed callbacks\n",
1864        _ => "",
1865    };
1866
1867    format!("{}{}", common, lang_specific)
1868}
1869
1870/// Merge new files into accumulator, skipping duplicates (first version wins).
1871#[allow(dead_code)]
1872fn merge_files(all: &mut Vec<GeneratedFile>, new: Vec<GeneratedFile>) {
1873    for f in new {
1874        if !all.iter().any(|existing| existing.path == f.path) {
1875            all.push(f);
1876        }
1877    }
1878}
1879
1880/// Auto-fix common Python import mistakes that LLMs consistently make.
1881/// These are deterministic regex fixes — cheaper and more reliable than re-prompting.
1882fn sanitize_python_imports(files: &mut Vec<GeneratedFile>) {
1883    let fixes: &[(&str, &str)] = &[
1884        // ConfigDict must come from pydantic, not pydantic_settings
1885        (
1886            "from pydantic_settings import ConfigDict",
1887            "from pydantic import ConfigDict",
1888        ),
1889        (
1890            "from pydantic_settings import BaseSettings, ConfigDict",
1891            "from pydantic_settings import BaseSettings\nfrom pydantic import ConfigDict",
1892        ),
1893        // BaseSettings must come from pydantic_settings, not pydantic
1894        (
1895            "from pydantic import BaseSettings",
1896            "from pydantic_settings import BaseSettings",
1897        ),
1898        // DeclarativeBase must come from sqlalchemy.orm, not sqlalchemy.ext.declarative
1899        (
1900            "from sqlalchemy.ext.declarative import DeclarativeBase",
1901            "from sqlalchemy.orm import DeclarativeBase",
1902        ),
1903        // validator is v1, field_validator is v2
1904        (
1905            "from pydantic import validator",
1906            "from pydantic import field_validator",
1907        ),
1908    ];
1909
1910    for file in files.iter_mut() {
1911        if !file.path.to_string_lossy().ends_with(".py") {
1912            continue;
1913        }
1914        let mut changed = false;
1915        let mut content = file.content.clone();
1916        for (bad, good) in fixes {
1917            if content.contains(bad) {
1918                content = content.replace(bad, good);
1919                changed = true;
1920            }
1921        }
1922        if changed {
1923            file.content = content;
1924        }
1925    }
1926}
1927
1928/// Sanitize __init__.py files: strip re-exports that cause circular imports.
1929/// LLMs consistently ignore "empty __init__.py" instructions, so we enforce it.
1930fn sanitize_init_files(files: &mut Vec<GeneratedFile>) {
1931    for file in files.iter_mut() {
1932        let path_str = file.path.display().to_string();
1933        if !path_str.ends_with("__init__.py") {
1934            continue;
1935        }
1936        // Check if file has actual imports (not just docstrings/comments/__all__)
1937        let has_imports = file.content.lines().any(|line| {
1938            let trimmed = line.trim();
1939            (trimmed.starts_with("from ") || trimmed.starts_with("import "))
1940                && !trimmed.starts_with("from __future__")
1941        });
1942        if has_imports {
1943            // Replace with empty file (just a docstring)
1944            let docstring = file
1945                .content
1946                .lines()
1947                .take_while(|l| {
1948                    l.starts_with('#')
1949                        || l.starts_with("\"\"\"")
1950                        || l.starts_with("'''")
1951                        || l.trim().is_empty()
1952                })
1953                .collect::<Vec<_>>()
1954                .join("\n");
1955            let cleaned = if docstring.trim().is_empty() {
1956                format!(
1957                    "\"\"\"{}\"\"\"",
1958                    path_str
1959                        .replace("__init__.py", "")
1960                        .replace('/', ".")
1961                        .trim_matches('.')
1962                )
1963            } else {
1964                docstring
1965            };
1966            file.content = cleaned;
1967        }
1968    }
1969}
1970
1971/// Format files as code listing for LLM context.
1972/// Truncate a string to max_chars, appending "..." if truncated.
1973fn truncate_str(s: &str, max_chars: usize) -> String {
1974    if s.len() <= max_chars {
1975        s.to_string()
1976    } else {
1977        let end = s.floor_char_boundary(max_chars);
1978        format!("{}...", &s[..end])
1979    }
1980}
1981
1982/// Load existing generated files from the output directory.
1983fn load_files_from_dir(dir: &Path) -> Result<Vec<GeneratedFile>> {
1984    let mut files = Vec::new();
1985    let skip = [
1986        "__pycache__",
1987        ".pytest_cache",
1988        ".pyc",
1989        ".mypy_cache",
1990        ".venv",
1991        "node_modules",
1992    ];
1993
1994    for path in walkdir(dir)? {
1995        let path_str = path.to_string_lossy();
1996        if skip.iter().any(|s| path_str.contains(s)) {
1997            continue;
1998        }
1999        // Only load source files
2000        let ext = path.extension().and_then(|e| e.to_str()).unwrap_or("");
2001        if ![
2002            "py", "ts", "js", "tsx", "jsx", "rs", "go", "toml", "json", "yml", "yaml", "md", "txt",
2003            "cfg", "ini", "html", "css",
2004        ]
2005        .contains(&ext)
2006        {
2007            continue;
2008        }
2009        if let Ok(content) = fs::read_to_string(&path) {
2010            let relative = path.strip_prefix(dir).unwrap_or(&path).to_path_buf();
2011            let lang = match ext {
2012                "py" => "python",
2013                "ts" | "tsx" => "typescript",
2014                "js" | "jsx" => "javascript",
2015                "rs" => "rust",
2016                "go" => "go",
2017                _ => "text",
2018            };
2019            files.push(GeneratedFile {
2020                path: relative,
2021                content,
2022                language: lang.to_string(),
2023            });
2024        }
2025    }
2026    Ok(files)
2027}
2028
2029/// Identify which files have issues based on feedback text.
2030/// Returns Vec of (file_index, Vec<issue_string>).
2031fn identify_broken_files(feedback: &str, files: &[GeneratedFile]) -> Vec<(usize, Vec<String>)> {
2032    let mut broken: Vec<(usize, Vec<String>)> = Vec::new();
2033    let feedback_lower = feedback.to_lowercase();
2034
2035    // Step 1: Find files explicitly mentioned by path in feedback
2036    for (idx, file) in files.iter().enumerate() {
2037        let file_name = file.path.display().to_string();
2038        let mut issues = Vec::new();
2039
2040        for line in feedback.lines() {
2041            if line.contains(&file_name) {
2042                issues.push(line.trim().to_string());
2043            }
2044        }
2045
2046        if !issues.is_empty() {
2047            broken.push((idx, issues));
2048        }
2049    }
2050
2051    // Step 2: Import error chain tracing — find the SOURCE of import failures
2052    for line in feedback.lines() {
2053        let lower_line = line.to_lowercase();
2054        if !(lower_line.contains("importerror")
2055            || lower_line.contains("modulenotfounderror")
2056            || lower_line.contains("cannot import name")
2057            || lower_line.contains("circular import"))
2058        {
2059            continue;
2060        }
2061
2062        // Extract the module path (e.g., "src.database", "app.api.v1.routes.users")
2063        if let Some(module) = extract_failed_module(&lower_line) {
2064            // Convert module path to file path: "src.database" → "src/database/__init__.py" or "src/database.py"
2065            let module_as_path = module.replace('.', "/");
2066
2067            for (idx, file) in files.iter().enumerate() {
2068                if broken.iter().any(|(i, _)| *i == idx) {
2069                    continue;
2070                }
2071
2072                let file_path = file.path.display().to_string();
2073
2074                // Match: file IS the broken module (e.g., src/database/__init__.py or src/database/session.py)
2075                let file_module = file_path
2076                    .replace('/', ".")
2077                    .trim_end_matches(".py")
2078                    .replace(".__init__", "")
2079                    .to_string();
2080                if file_module == module
2081                    || file_module.starts_with(&format!("{}.", module))
2082                    || module.starts_with(&format!("{}.", file_module))
2083                {
2084                    broken.push((
2085                        idx,
2086                        vec![format!(
2087                            "Import chain error — this module is part of the broken import: {}",
2088                            line.trim()
2089                        )],
2090                    ));
2091                }
2092
2093                // Match: file path contains the module path (e.g., "src/database/session.py" contains "src/database")
2094                if file_path.starts_with(&module_as_path)
2095                    && !file_path.contains("__pycache__")
2096                    && !broken.iter().any(|(i, _)| *i == idx)
2097                {
2098                    broken.push((
2099                        idx,
2100                        vec![format!("Part of broken module {}: {}", module, line.trim())],
2101                    ));
2102                }
2103            }
2104        }
2105    }
2106
2107    // Step 2b: NameError tracing — find the specific file where the error occurred
2108    // e.g., "NameError: name 'Bookmark' is not defined" in conftest.py
2109    // Only flag the file mentioned in the error context, not every file using the name
2110    for line in feedback.lines() {
2111        if !line.contains("NameError") {
2112            continue;
2113        }
2114        if let Some(start) = line.find("name '") {
2115            if let Some(end) = line[start + 6..].find('\'') {
2116                let undefined_name = &line[start + 6..start + 6 + end];
2117                let mut found_specific = false;
2118
2119                // First: try to find the specific file mentioned in surrounding error context
2120                // pytest errors mention file paths like "tests/conftest.py:42"
2121                for ctx_line in feedback.lines() {
2122                    for (idx, file) in files.iter().enumerate() {
2123                        if broken.iter().any(|(i, _)| *i == idx) {
2124                            continue;
2125                        }
2126                        let file_name = file.path.display().to_string();
2127                        if ctx_line.contains(&file_name)
2128                            && (ctx_line.contains("Error")
2129                                || ctx_line.contains("conftest")
2130                                || ctx_line.contains("FAILED"))
2131                            && file.content.contains(undefined_name)
2132                        {
2133                            broken.push((idx, vec![format!(
2134                                    "NameError: '{}' used but not imported. Add the missing import.", undefined_name
2135                                )]));
2136                            found_specific = true;
2137                        }
2138                    }
2139                }
2140
2141                // Fallback: if no specific file found, flag ONLY test/conftest files missing the import
2142                // (NameErrors almost always originate in test code, not production code)
2143                if !found_specific {
2144                    for (idx, file) in files.iter().enumerate() {
2145                        if broken.iter().any(|(i, _)| *i == idx) {
2146                            continue;
2147                        }
2148                        let file_name = file.path.display().to_string();
2149                        if !file_name.ends_with(".py") || file_name.ends_with("__init__.py") {
2150                            continue;
2151                        }
2152                        // Only check test files and conftest as fallback
2153                        if !(file_name.contains("test") || file_name.contains("conftest")) {
2154                            continue;
2155                        }
2156                        if file.content.contains(undefined_name) {
2157                            let has_import = file.content.lines().any(|l| {
2158                                let t = l.trim();
2159                                if !(t.starts_with("from ") || t.starts_with("import ")) {
2160                                    return false;
2161                                }
2162                                t.split(|c: char| !c.is_alphanumeric() && c != '_')
2163                                    .any(|word| word == undefined_name)
2164                            });
2165                            if !has_import {
2166                                broken.push((idx, vec![format!(
2167                                    "NameError: '{}' used but not imported. Add the missing import.", undefined_name
2168                                )]));
2169                            }
2170                        }
2171                    }
2172                }
2173            }
2174        }
2175    }
2176
2177    // Step 2c: AttributeError tracing — find class definitions missing attributes
2178    // e.g., "'Settings' object has no attribute 'ALLOWED_ORIGINS'" → find file defining Settings class
2179    for line in feedback.lines() {
2180        if !line.contains("AttributeError") || !line.contains("has no attribute") {
2181            continue;
2182        }
2183        // Extract class name and attribute from "'ClassName' object has no attribute 'attr_name'"
2184        if let Some(cls_start) = line.find('\'') {
2185            if let Some(cls_end) = line[cls_start + 1..].find('\'') {
2186                let class_name = &line[cls_start + 1..cls_start + 1 + cls_end];
2187                if let Some(attr_start) = line.rfind("'") {
2188                    let before_last = &line[..attr_start];
2189                    if let Some(attr_start2) = before_last.rfind("'") {
2190                        let attr_name = &line[attr_start2 + 1..attr_start];
2191                        // Find files that define this class
2192                        for (idx, file) in files.iter().enumerate() {
2193                            if broken.iter().any(|(i, _)| *i == idx) {
2194                                continue;
2195                            }
2196                            let class_def = format!("class {}", class_name);
2197                            if file.content.contains(&class_def) {
2198                                broken.push((idx, vec![format!(
2199                                    "AttributeError: class '{}' missing attribute '{}'. Add it.", class_name, attr_name
2200                                )]));
2201                            }
2202                        }
2203                    }
2204                }
2205            }
2206        }
2207    }
2208
2209    // Step 2d: "cannot import name 'X' from 'external.package'" — find PROJECT files that have
2210    // the bad import line. The module is external (pydantic, sqlalchemy, etc.) so import chain
2211    // tracing won't match any project file. Instead, grep for the import in project code.
2212    for line in feedback.lines() {
2213        let lower_line = line.to_lowercase();
2214        if !lower_line.contains("cannot import name") {
2215            continue;
2216        }
2217        // Extract name and source: "cannot import name 'Url' from 'pydantic.networks'"
2218        if let Some(name_start) = lower_line.find("cannot import name '") {
2219            let after_name = &lower_line[name_start + 20..];
2220            if let Some(name_end) = after_name.find('\'') {
2221                let import_name = &line[name_start + 20..name_start + 20 + name_end];
2222                if let Some(from_start) = lower_line.find("from '") {
2223                    let after_from = &line[from_start + 6..];
2224                    if let Some(from_end) = after_from.find('\'') {
2225                        let from_module = &after_from[..from_end];
2226                        // Build the import pattern to search for in project files
2227                        let import_pattern = format!("from {} import", from_module);
2228                        for (idx, file) in files.iter().enumerate() {
2229                            if broken.iter().any(|(i, _)| *i == idx) {
2230                                continue;
2231                            }
2232                            if file.content.contains(&import_pattern)
2233                                && file.content.contains(import_name)
2234                            {
2235                                broken.push((idx, vec![format!(
2236                                    "Bad import: 'from {} import {}' — '{}' does not exist in this package. Remove or replace it.",
2237                                    from_module, import_name, import_name
2238                                )]));
2239                            }
2240                        }
2241                    }
2242                }
2243            }
2244        }
2245    }
2246
2247    // Step 3: __init__.py re-export detection — flag __init__.py files that import from submodules
2248    // (common cause of circular imports)
2249    if feedback_lower.contains("importerror")
2250        || feedback_lower.contains("circular")
2251        || feedback_lower.contains("nameerror")
2252    {
2253        for (idx, file) in files.iter().enumerate() {
2254            if broken.iter().any(|(i, _)| *i == idx) {
2255                continue;
2256            }
2257            let file_name = file.path.display().to_string();
2258            let lower_content = file.content.to_lowercase();
2259
2260            // Flag __init__.py files that re-export (these commonly cause circular imports)
2261            if file_name.ends_with("__init__.py")
2262                && !file.content.trim().is_empty()
2263                && lower_content.contains("from ")
2264                && lower_content.contains(" import ")
2265            {
2266                broken.push((idx, vec!["__init__.py re-exports cause circular imports — should be empty or contain only __all__".to_string()]));
2267            }
2268        }
2269    }
2270
2271    // Step 4: Content-based issue detection
2272    for (idx, file) in files.iter().enumerate() {
2273        if broken.iter().any(|(i, _)| *i == idx) {
2274            continue;
2275        }
2276
2277        let file_name = file.path.display().to_string();
2278        let lower_content = file.content.to_lowercase();
2279        let mut issues = Vec::new();
2280
2281        // Hardcoded secrets: only flag config/docker/env files
2282        if feedback_lower.contains("hardcoded secret") || feedback_lower.contains("secrets found") {
2283            let is_config_file = file_name.contains("config")
2284                || file_name.contains("docker")
2285                || file_name.contains("settings")
2286                || file_name.contains(".env")
2287                || file_name.contains(".yml")
2288                || file_name.contains(".yaml");
2289            if is_config_file
2290                && (lower_content.contains("password = \"")
2291                    || lower_content.contains("secret_key = \""))
2292            {
2293                issues.push("Hardcoded secrets — use environment variable references".to_string());
2294            }
2295        }
2296
2297        // Pydantic v1/v2: only flag files that actually use wrong imports
2298        if lower_content.contains("from pydantic import basesettings") {
2299            issues.push("Pydantic v1/v2 mismatch: use pydantic_settings.BaseSettings".to_string());
2300        }
2301
2302        if !issues.is_empty() {
2303            broken.push((idx, issues));
2304        }
2305    }
2306
2307    broken
2308}
2309
2310/// Extract the failed module path from an import error message.
2311fn extract_failed_module(error_line: &str) -> Option<String> {
2312    // "No module named 'app.api.v1.endpoints'"
2313    if let Some(pos) = error_line.find("no module named") {
2314        let after = &error_line[pos + 16..];
2315        let module = after
2316            .trim()
2317            .trim_matches(|c: char| c == '\'' || c == '"' || c == ' ' || c == '(');
2318        let module = module.trim_end_matches(['\'', '"', ')']);
2319        if !module.is_empty() {
2320            return Some(module.to_string());
2321        }
2322    }
2323    // "cannot import name 'X' from 'app.module'"
2324    if let Some(pos) = error_line.find("from '") {
2325        let after = &error_line[pos + 6..];
2326        if let Some(end) = after.find('\'') {
2327            return Some(after[..end].to_string());
2328        }
2329    }
2330    None
2331}
2332
2333fn detect_language(prompt: &str) -> String {
2334    let lower = prompt.to_lowercase();
2335    if lower.contains("python")
2336        || lower.contains("fastapi")
2337        || lower.contains("django")
2338        || lower.contains("flask")
2339    {
2340        "python".to_string()
2341    } else if lower.contains("typescript") || lower.contains("next.js") || lower.contains("react") {
2342        "typescript".to_string()
2343    } else if lower.contains("javascript") || lower.contains("node") || lower.contains("express") {
2344        "javascript".to_string()
2345    } else if lower.contains("rust") || lower.contains("cargo") {
2346        "rust".to_string()
2347    } else if lower.contains("go ") || lower.contains("golang") {
2348        "go".to_string()
2349    } else if lower.contains("c++") || lower.contains("cpp") || lower.contains("cmake") {
2350        "c++".to_string()
2351    } else {
2352        "python".to_string()
2353    }
2354}
2355
2356fn create_output_dir(prompt: &str) -> Result<PathBuf> {
2357    let safe_name: String = prompt
2358        .to_lowercase()
2359        .chars()
2360        .map(|c| if c.is_alphanumeric() { c } else { '_' })
2361        .collect::<String>()
2362        .chars()
2363        .take(40)
2364        .collect();
2365
2366    let mut cleaned = String::new();
2367    let mut last_was_underscore = false;
2368    for c in safe_name.chars() {
2369        if c == '_' {
2370            if !last_was_underscore {
2371                cleaned.push(c);
2372            }
2373            last_was_underscore = true;
2374        } else {
2375            cleaned.push(c);
2376            last_was_underscore = false;
2377        }
2378    }
2379    let cleaned = cleaned.trim_matches('_');
2380
2381    let dir = PathBuf::from(format!("output/{}", cleaned));
2382    fs::create_dir_all(&dir).context("Failed to create output directory")?;
2383    Ok(dir)
2384}
2385
2386fn list_files(dir: &Path) -> Result<()> {
2387    println!("Files:");
2388    let skip = ["__pycache__", ".pytest_cache", ".pyc", ".mypy_cache"];
2389    for entry in walkdir(dir)? {
2390        let path_str = entry.to_string_lossy();
2391        if skip.iter().any(|s| path_str.contains(s)) {
2392            continue;
2393        }
2394        let relative = entry.strip_prefix(dir).unwrap_or(&entry);
2395        let size = fs::metadata(&entry).map(|m| m.len()).unwrap_or(0);
2396        println!("  {} ({} bytes)", relative.display(), size);
2397    }
2398    Ok(())
2399}
2400
2401fn walkdir(dir: &Path) -> Result<Vec<PathBuf>> {
2402    let mut files = Vec::new();
2403    if dir.is_dir() {
2404        for entry in fs::read_dir(dir)? {
2405            let entry = entry?;
2406            let path = entry.path();
2407            if path.is_dir() {
2408                files.extend(walkdir(&path)?);
2409            } else {
2410                files.push(path);
2411            }
2412        }
2413    }
2414    Ok(files)
2415}
2416
2417#[cfg(test)]
2418mod tests {
2419    use super::*;
2420
2421    #[test]
2422    fn test_detect_language_python() {
2423        assert_eq!(detect_language("Build a FastAPI endpoint"), "python");
2424    }
2425
2426    #[test]
2427    fn test_detect_language_typescript() {
2428        assert_eq!(detect_language("Create a Next.js dashboard"), "typescript");
2429    }
2430
2431    #[test]
2432    fn test_detect_language_default() {
2433        assert_eq!(detect_language("build something cool"), "python");
2434    }
2435
2436    #[test]
2437    fn test_create_output_dir() {
2438        let dir = create_output_dir("Build a REST API!").unwrap();
2439        assert!(dir.to_str().unwrap().contains("build_a_rest_api"));
2440        let _ = std::fs::remove_dir_all("output");
2441    }
2442
2443    #[test]
2444    fn test_default_paths() {
2445        assert_eq!(default_code_path("python"), "app/main.py");
2446        assert_eq!(default_test_path("python"), "tests/test_main.py");
2447        assert_eq!(default_code_path("rust"), "src/main.rs");
2448    }
2449}