oxi/skills/
autonomous_loop.rs

1//! Autonomous development loop skill for oxi
2//!
3//! A fully autonomous, recursive development cycle that runs
4//! **Design → Plan → Implement → Verify → Fix** in a loop until zero
5//! *genuine* issues remain. The operator issues one command and gets a
6//! finished, verified, committed result.
7//!
8//! This module provides:
9//! - [`AutonomousLoop`] — state machine that tracks loop iterations, phases,
10//!   batches, issues, and verification results.
11//! - [`LoopTask`] / [`TaskBatch`] — structured task and batch definitions
12//!   with dependency tracking.
13//! - [`Issue`] / [`IssueSeverity`] / [`IssueVerdict`] — issue tracking with
14//!   severity classification and false-positive filtering.
15//! - [`LoopPhase`] — phase enum for the six phases of the loop.
16//! - [`AutonomousLoopSkill`] — skill content generator that produces the
17//!   system-prompt instructions for the LLM-driven autonomous workflow.
18//! - [`LoopStatus`] — serializable status snapshot for diagnostics.
19
20use anyhow::{bail, Result};
21use chrono::Utc;
22use serde::{Deserialize, Serialize};
23use std::fmt;
24use std::path::{PathBuf};
25
26// ── Constants ──────────────────────────────────────────────────────────
27
28/// Maximum number of full loop iterations before forced stop.
29pub const MAX_ITERATIONS: u8 = 8;
30
31// ── Phase ──────────────────────────────────────────────────────────────
32
33/// The phases of the autonomous development loop.
34#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
35#[serde(rename_all = "snake_case")]
36pub enum LoopPhase {
37    /// Understand requirements and produce a clear design.
38    Design,
39    /// Decompose into ordered, verifiable implementation steps.
40    Plan,
41    /// Execute the plan by batch, parallelizing independent tasks.
42    Implement,
43    /// Multi-axis verification that catches real problems.
44    Verify,
45    /// Confirm that every issue found is a REAL issue.
46    ReValidate,
47    /// Fix only the confirmed, genuine issues.
48    Fix,
49    /// Session concluded — all criteria met.
50    Done,
51}
52
53impl Default for LoopPhase {
54    fn default() -> Self {
55        LoopPhase::Design
56    }
57}
58
59impl fmt::Display for LoopPhase {
60    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
61        match self {
62            LoopPhase::Design => write!(f, "DESIGN"),
63            LoopPhase::Plan => write!(f, "PLAN"),
64            LoopPhase::Implement => write!(f, "IMPLEMENT"),
65            LoopPhase::Verify => write!(f, "VERIFY"),
66            LoopPhase::ReValidate => write!(f, "RE-VALIDATE"),
67            LoopPhase::Fix => write!(f, "FIX"),
68            LoopPhase::Done => write!(f, "DONE"),
69        }
70    }
71}
72
73impl LoopPhase {
74    /// Return all phases in loop order.
75    pub fn all() -> &'static [LoopPhase] {
76        &[
77            LoopPhase::Design,
78            LoopPhase::Plan,
79            LoopPhase::Implement,
80            LoopPhase::Verify,
81            LoopPhase::ReValidate,
82            LoopPhase::Fix,
83            LoopPhase::Done,
84        ]
85    }
86
87    /// Advance to the next phase in the standard sequence.
88    ///
89    /// The loop flow is:
90    /// ```text
91    /// Design → Plan → Implement → Verify
92    ///   ↑                              │
93    ///   └── Fix ← ReValidate ←────────┘
94    ///                                    ↘ Done (no issues)
95    /// ```
96    pub fn next(&self) -> Option<LoopPhase> {
97        match self {
98            LoopPhase::Design => Some(LoopPhase::Plan),
99            LoopPhase::Plan => Some(LoopPhase::Implement),
100            LoopPhase::Implement => Some(LoopPhase::Verify),
101            LoopPhase::Verify => Some(LoopPhase::ReValidate),
102            LoopPhase::ReValidate => Some(LoopPhase::Fix),
103            LoopPhase::Fix => Some(LoopPhase::Verify), // back to verify after fix
104            LoopPhase::Done => None,
105        }
106    }
107
108    /// Returns `true` if this phase should proceed to `Done` when no issues
109    /// are found (as opposed to continuing the loop).
110    pub fn can_exit_on_clean(&self) -> bool {
111        matches!(self, LoopPhase::Verify | LoopPhase::ReValidate)
112    }
113}
114
115// ── Task and Batch ─────────────────────────────────────────────────────
116
117/// Status of a task or batch.
118#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
119#[serde(rename_all = "snake_case")]
120pub enum TaskStatus {
121    /// Not yet started.
122    Pending,
123    /// Currently running.
124    Running,
125    /// Successfully completed.
126    Done,
127    /// Failed with an error.
128    Failed,
129}
130
131impl fmt::Display for TaskStatus {
132    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
133        match self {
134            TaskStatus::Pending => write!(f, "pending"),
135            TaskStatus::Running => write!(f, "running"),
136            TaskStatus::Done => write!(f, "done"),
137            TaskStatus::Failed => write!(f, "failed"),
138        }
139    }
140}
141
142/// A single task in the implementation plan.
143#[derive(Debug, Clone, Serialize, Deserialize)]
144pub struct LoopTask {
145    /// Unique task identifier (e.g., "T1", "T2").
146    pub id: String,
147    /// Short description of what this task accomplishes.
148    pub description: String,
149    /// File paths this task creates or modifies.
150    pub touches_files: Vec<PathBuf>,
151    /// IDs of tasks this task depends on.
152    pub depends_on: Vec<String>,
153    /// How to verify this task works.
154    pub verification: String,
155    /// Current status.
156    pub status: TaskStatus,
157    /// Commit hash after completion, if any.
158    #[serde(skip_serializing_if = "Option::is_none")]
159    pub commit_hash: Option<String>,
160}
161
162impl LoopTask {
163    /// Create a new task with the given ID and description.
164    pub fn new(id: impl Into<String>, description: impl Into<String>) -> Self {
165        Self {
166            id: id.into(),
167            description: description.into(),
168            touches_files: Vec::new(),
169            depends_on: Vec::new(),
170            verification: String::new(),
171            status: TaskStatus::Pending,
172            commit_hash: None,
173        }
174    }
175
176    /// Add a file this task touches.
177    pub fn touches(mut self, path: impl Into<PathBuf>) -> Self {
178        self.touches_files.push(path.into());
179        self
180    }
181
182    /// Declare a dependency on another task.
183    pub fn depends_on(mut self, task_id: impl Into<String>) -> Self {
184        self.depends_on.push(task_id.into());
185        self
186    }
187
188    /// Set verification method.
189    pub fn verify_with(mut self, method: impl Into<String>) -> Self {
190        self.verification = method.into();
191        self
192    }
193
194    /// Mark the task as running.
195    pub fn start(&mut self) {
196        self.status = TaskStatus::Running;
197    }
198
199    /// Mark the task as completed with an optional commit hash.
200    pub fn complete(&mut self, commit_hash: Option<String>) {
201        self.status = TaskStatus::Done;
202        self.commit_hash = commit_hash;
203    }
204
205    /// Mark the task as failed.
206    pub fn fail(&mut self) {
207        self.status = TaskStatus::Failed;
208    }
209}
210
211/// A batch of tasks that can be executed in parallel.
212#[derive(Debug, Clone, Serialize, Deserialize)]
213pub struct TaskBatch {
214    /// Batch index (0-based).
215    pub index: usize,
216    /// Tasks in this batch.
217    pub tasks: Vec<LoopTask>,
218    /// Whether tasks in this batch have file conflicts (must run sequentially).
219    pub has_conflicts: bool,
220    /// Current status of the batch.
221    pub status: TaskStatus,
222}
223
224impl TaskBatch {
225    /// Create a new batch at the given index.
226    pub fn new(index: usize) -> Self {
227        Self {
228            index,
229            tasks: Vec::new(),
230            has_conflicts: false,
231            status: TaskStatus::Pending,
232        }
233    }
234
235    /// Add a task to this batch.
236    pub fn add_task(&mut self, task: LoopTask) {
237        self.tasks.push(task);
238    }
239
240    /// Mark all tasks as running.
241    pub fn start(&mut self) {
242        self.status = TaskStatus::Running;
243        for task in &mut self.tasks {
244            task.start();
245        }
246    }
247
248    /// Mark the batch as completed.
249    pub fn complete(&mut self) {
250        self.status = TaskStatus::Done;
251    }
252
253    /// Check if all tasks in this batch are done.
254    pub fn all_done(&self) -> bool {
255        self.tasks.iter().all(|t| t.status == TaskStatus::Done)
256    }
257
258    /// Check if any task in this batch failed.
259    pub fn any_failed(&self) -> bool {
260        self.tasks.iter().any(|t| t.status == TaskStatus::Failed)
261    }
262}
263
264// ── Issue tracking ─────────────────────────────────────────────────────
265
266/// Severity of an issue found during verification.
267#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize)]
268#[serde(rename_all = "snake_case")]
269pub enum IssueSeverity {
270    /// Formatting, naming preference — optional.
271    Nit = 0,
272    /// Style inconsistency, missing edge case — should fix.
273    Minor = 1,
274    /// Incorrect behavior, failing tests, broken feature — must fix.
275    Important = 2,
276    /// Build broken, data loss, security vulnerability — must fix now.
277    Critical = 3,
278}
279
280impl fmt::Display for IssueSeverity {
281    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
282        match self {
283            IssueSeverity::Nit => write!(f, "Nit"),
284            IssueSeverity::Minor => write!(f, "Minor"),
285            IssueSeverity::Important => write!(f, "Important"),
286            IssueSeverity::Critical => write!(f, "Critical"),
287        }
288    }
289}
290
291/// Verdict after re-validating an issue.
292#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
293#[serde(rename_all = "snake_case")]
294pub enum IssueVerdict {
295    /// Real issue — proceed to fix.
296    Confirmed,
297    /// Not actually a problem — discard.
298    FalsePositive,
299    /// Real but out of scope — log for future.
300    Deferred,
301    /// Cannot determine — needs user input.
302    NeedsContext,
303}
304
305impl fmt::Display for IssueVerdict {
306    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
307        match self {
308            IssueVerdict::Confirmed => write!(f, "CONFIRMED"),
309            IssueVerdict::FalsePositive => write!(f, "FALSE_POSITIVE"),
310            IssueVerdict::Deferred => write!(f, "DEFERRED"),
311            IssueVerdict::NeedsContext => write!(f, "NEEDS_CONTEXT"),
312        }
313    }
314}
315
316/// An issue found during verification.
317#[derive(Debug, Clone, Serialize, Deserialize)]
318pub struct Issue {
319    /// Issue number (1-based, for display).
320    pub number: usize,
321    /// One-line description.
322    pub description: String,
323    /// Severity.
324    pub severity: IssueSeverity,
325    /// Location (file:line or component name).
326    pub location: String,
327    /// Evidence (error message, test output, or concrete observation).
328    pub evidence: String,
329    /// Whether the issue is reproducible.
330    pub reproducible: bool,
331    /// Suggested fix approach.
332    pub fix_approach: String,
333    /// Verdict after re-validation.
334    #[serde(skip_serializing_if = "Option::is_none")]
335    pub verdict: Option<IssueVerdict>,
336    /// Reason for the verdict (especially for false positives).
337    #[serde(skip_serializing_if = "Option::is_none")]
338    pub verdict_reason: Option<String>,
339    /// Whether this issue has been fixed.
340    pub fixed: bool,
341    /// Commit hash of the fix, if applied.
342    #[serde(skip_serializing_if = "Option::is_none")]
343    pub fix_commit: Option<String>,
344}
345
346impl Issue {
347    /// Create a new issue.
348    pub fn new(
349        number: usize,
350        description: impl Into<String>,
351        severity: IssueSeverity,
352        location: impl Into<String>,
353    ) -> Self {
354        Self {
355            number,
356            description: description.into(),
357            severity,
358            location: location.into(),
359            evidence: String::new(),
360            reproducible: false,
361            fix_approach: String::new(),
362            verdict: None,
363            verdict_reason: None,
364            fixed: false,
365            fix_commit: None,
366        }
367    }
368
369    /// Set evidence for this issue.
370    pub fn with_evidence(mut self, evidence: impl Into<String>) -> Self {
371        self.evidence = evidence.into();
372        self
373    }
374
375    /// Set whether the issue is reproducible.
376    pub fn reproducible(mut self, yes: bool) -> Self {
377        self.reproducible = yes;
378        self
379    }
380
381    /// Set the fix approach.
382    pub fn fix_approach(mut self, approach: impl Into<String>) -> Self {
383        self.fix_approach = approach.into();
384        self
385    }
386
387    /// Record a verdict after re-validation.
388    pub fn set_verdict(&mut self, verdict: IssueVerdict, reason: impl Into<String>) {
389        self.verdict = Some(verdict);
390        self.verdict_reason = Some(reason.into());
391    }
392
393    /// Mark the issue as fixed.
394    pub fn mark_fixed(&mut self, commit_hash: Option<String>) {
395        self.fixed = true;
396        self.fix_commit = commit_hash;
397    }
398
399    /// Whether this issue must be fixed (confirmed and not yet fixed).
400    pub fn needs_fix(&self) -> bool {
401        self.verdict == Some(IssueVerdict::Confirmed) && !self.fixed
402    }
403
404    /// Whether this issue is actionable (confirmed or needs context, not fixed).
405    pub fn is_actionable(&self) -> bool {
406        matches!(
407            self.verdict,
408            Some(IssueVerdict::Confirmed) | Some(IssueVerdict::NeedsContext)
409        ) && !self.fixed
410    }
411}
412
413// ── Verification result ───────────────────────────────────────────────
414
415/// Result of a verification pass.
416#[derive(Debug, Clone, Serialize, Deserialize)]
417pub struct VerificationResult {
418    /// Whether the build succeeded.
419    pub build_passed: bool,
420    /// Whether all tests passed.
421    pub tests_passed: bool,
422    /// Whether type checking passed.
423    pub type_check_passed: bool,
424    /// Whether linting passed.
425    pub lint_passed: bool,
426    /// Issues found during this verification pass.
427    pub issues: Vec<Issue>,
428    /// Timestamp of this verification.
429    pub timestamp: String,
430}
431
432impl VerificationResult {
433    /// Create a new verification result.
434    pub fn new() -> Self {
435        Self {
436            build_passed: false,
437            tests_passed: false,
438            type_check_passed: false,
439            lint_passed: false,
440            issues: Vec::new(),
441            timestamp: Utc::now().to_rfc3339(),
442        }
443    }
444
445    /// Whether all checks passed with no issues.
446    pub fn is_clean(&self) -> bool {
447        self.build_passed
448            && self.tests_passed
449            && self.type_check_passed
450            && self.lint_passed
451            && self.issues.is_empty()
452    }
453
454    /// Whether the critical gates passed (build + tests).
455    pub fn critical_passed(&self) -> bool {
456        self.build_passed && self.tests_passed
457    }
458
459    /// Count issues by severity.
460    pub fn issue_count_by_severity(&self, severity: IssueSeverity) -> usize {
461        self.issues.iter().filter(|i| i.severity == severity).count()
462    }
463
464    /// Count confirmed issues that still need fixing.
465    pub fn confirmed_unfixed(&self) -> usize {
466        self.issues.iter().filter(|i| i.needs_fix()).count()
467    }
468}
469
470impl Default for VerificationResult {
471    fn default() -> Self {
472        Self::new()
473    }
474}
475
476// ── Loop status snapshot ───────────────────────────────────────────────
477
478/// A serializable snapshot of the autonomous loop state.
479#[derive(Debug, Clone, Serialize, Deserialize)]
480pub struct LoopStatus {
481    /// The task being executed.
482    pub task: String,
483    /// Current iteration number (1-based).
484    pub iteration: u8,
485    /// Maximum iterations allowed.
486    pub max_iterations: u8,
487    /// Current phase.
488    pub phase: LoopPhase,
489    /// Execution batches.
490    pub batches: Vec<TaskBatch>,
491    /// Issues found across all iterations.
492    pub issues: Vec<Issue>,
493    /// Latest verification result.
494    #[serde(skip_serializing_if = "Option::is_none")]
495    pub last_verification: Option<VerificationResult>,
496    /// Most recent commit hash.
497    #[serde(skip_serializing_if = "Option::is_none")]
498    pub last_commit: Option<String>,
499    /// Whether git working tree is clean.
500    pub git_clean: bool,
501    /// Any blocking condition.
502    #[serde(skip_serializing_if = "Option::is_none")]
503    pub blocker: Option<String>,
504    /// Timestamp of this status snapshot.
505    pub timestamp: String,
506}
507
508impl LoopStatus {
509    /// Render the status as a formatted string for display.
510    pub fn render(&self) -> String {
511        let mut s = String::with_capacity(2048);
512
513        s.push_str("AUTONOMOUS LOOP STATUS\n");
514        s.push_str("═══════════════════════\n");
515        s.push_str(&format!("Task: {}\n", self.task));
516        s.push_str(&format!(
517            "Iteration: {} / {}\n",
518            self.iteration, self.max_iterations
519        ));
520        s.push_str(&format!("Phase: {}\n", self.phase));
521
522        // Batch summary
523        let done_count = self
524            .batches
525            .iter()
526            .filter(|b| b.status == TaskStatus::Done)
527            .count();
528        let total_count = self.batches.len();
529        s.push_str(&format!(
530            "Batches: {} / {} done\n",
531            done_count, total_count
532        ));
533        for batch in &self.batches {
534            let task_ids: Vec<&str> = batch.tasks.iter().map(|t| t.id.as_str()).collect();
535            let mode = if batch.has_conflicts {
536                "sequential"
537            } else {
538                "parallel"
539            };
540            s.push_str(&format!(
541            "  Batch {}: [{}] ({}) — {}\n",
542            batch.index,
543            task_ids.join(", "),
544            mode,
545            batch.status
546        ));
547        }
548
549        // Issue summary
550        let total = self.issues.len();
551        let confirmed = self
552            .issues
553            .iter()
554            .filter(|i| i.verdict == Some(IssueVerdict::Confirmed))
555            .count();
556        let fixed = self.issues.iter().filter(|i| i.fixed).count();
557        s.push_str(&format!(
558            "Issues: {} found → {} confirmed → {} fixed\n",
559            total, confirmed, fixed
560        ));
561
562        // Progress bar
563        let pct = if total_count > 0 {
564            (done_count * 10) / total_count
565        } else {
566            0
567        };
568        let filled: String = "▓".repeat(pct);
569        let empty: String = "░".repeat(10 - pct);
570        s.push_str(&format!("Progress: {}{} \n", filled, empty));
571
572        // Commit and git status
573        if let Some(ref hash) = self.last_commit {
574            s.push_str(&format!("Last commit: {}\n", &hash[..7.min(hash.len())]));
575        }
576        s.push_str(&format!(
577            "Git status: {}\n",
578            if self.git_clean { "clean" } else { "dirty" }
579        ));
580
581        // Blocker
582        if let Some(ref blocker) = self.blocker {
583            s.push_str(&format!("Blocks: {}\n", blocker));
584        }
585
586        s
587    }
588}
589
590// ── Autonomous Loop state machine ──────────────────────────────────────
591
592/// The autonomous development loop state machine.
593///
594/// Tracks the full lifecycle of an autonomous development task across
595/// multiple iterations of the Design → Plan → Implement → Verify →
596/// ReValidate → Fix loop.
597///
598/// # Usage
599///
600/// ```rust,ignore
601/// let mut al = AutonomousLoop::new("Implement user authentication");
602/// al.start()?;
603///
604/// // Design phase
605/// al.advance()?; // → Plan
606///
607/// // Plan phase — add tasks and compute batches
608/// al.add_task(LoopTask::new("T1", "Create auth module").touches("src/auth.rs"));
609/// al.add_task(LoopTask::new("T2", "Add login route").depends_on("T1"));
610/// al.compute_batches()?;
611/// al.advance()?; // → Implement
612///
613/// // ... execute batches ...
614/// al.advance()?; // → Verify
615/// ```
616#[derive(Debug, Clone, Serialize, Deserialize)]
617pub struct AutonomousLoop {
618    /// Unique loop instance ID.
619    pub id: String,
620    /// The task description.
621    pub task: String,
622    /// Current iteration (1-based).
623    pub iteration: u8,
624    /// Maximum iterations allowed.
625    pub max_iterations: u8,
626    /// Current phase.
627    pub phase: LoopPhase,
628    /// Whether the loop has been started.
629    pub started: bool,
630    /// Whether the loop has been forcefully stopped.
631    pub emergency_stopped: bool,
632    /// All tasks in the plan.
633    pub tasks: Vec<LoopTask>,
634    /// Computed execution batches.
635    pub batches: Vec<TaskBatch>,
636    /// Issues found across all iterations.
637    pub issues: Vec<Issue>,
638    /// Latest verification result.
639    pub last_verification: Option<VerificationResult>,
640    /// Most recent commit hash.
641    pub last_commit: Option<String>,
642    /// Whether the git working tree is clean.
643    pub git_clean: bool,
644    /// Any blocking condition preventing progress.
645    pub blocker: Option<String>,
646    /// Creation timestamp.
647    pub created_at: String,
648    /// Last update timestamp.
649    pub updated_at: String,
650}
651
652impl AutonomousLoop {
653    /// Create a new autonomous loop for the given task.
654    pub fn new(task: impl Into<String>) -> Self {
655        Self {
656            id: uuid::Uuid::new_v4().to_string(),
657            task: task.into(),
658            iteration: 0,
659            max_iterations: MAX_ITERATIONS,
660            phase: LoopPhase::Design,
661            started: false,
662            emergency_stopped: false,
663            tasks: Vec::new(),
664            batches: Vec::new(),
665            issues: Vec::new(),
666            last_verification: None,
667            last_commit: None,
668            git_clean: true,
669            blocker: None,
670            created_at: Utc::now().to_rfc3339(),
671            updated_at: Utc::now().to_rfc3339(),
672        }
673    }
674
675    /// Set the maximum number of iterations.
676    pub fn with_max_iterations(mut self, max: u8) -> Self {
677        self.max_iterations = max.min(MAX_ITERATIONS).max(1);
678        self
679    }
680
681    /// Start the loop (transitions to iteration 1, Design phase).
682    pub fn start(&mut self) -> Result<()> {
683        if self.started {
684            bail!("Loop already started");
685        }
686        if self.emergency_stopped {
687            bail!("Loop was emergency-stopped and cannot be restarted");
688        }
689        self.started = true;
690        self.iteration = 1;
691        self.phase = LoopPhase::Design;
692        self.touch();
693        Ok(())
694    }
695
696    /// Emergency stop — halts the loop immediately.
697    pub fn emergency_stop(&mut self, reason: impl Into<String>) {
698        self.emergency_stopped = true;
699        self.blocker = Some(reason.into());
700        self.touch();
701    }
702
703    /// Advance to the next phase.
704    ///
705    /// Follows the standard loop flow:
706    /// - After ReValidate with no confirmed issues → Done
707    /// - After ReValidate with confirmed issues → Fix → Verify (new iteration)
708    /// - After Verify with no issues → Done
709    /// - After Verify with issues → ReValidate
710    pub fn advance(&mut self) -> Result<LoopPhase> {
711        if !self.started {
712            bail!("Loop has not been started");
713        }
714        if self.emergency_stopped {
715            bail!("Loop was emergency-stopped: {}", self.blocker.as_deref().unwrap_or("unknown"));
716        }
717
718        let next = match self.phase {
719            LoopPhase::Design => Some(LoopPhase::Plan),
720            LoopPhase::Plan => Some(LoopPhase::Implement),
721            LoopPhase::Implement => Some(LoopPhase::Verify),
722
723            LoopPhase::Verify => {
724                // If clean, we're done
725                if self.is_clean() {
726                    Some(LoopPhase::Done)
727                } else {
728                    Some(LoopPhase::ReValidate)
729                }
730            }
731
732            LoopPhase::ReValidate => {
733                let has_confirmed = self.issues.iter().any(|i| i.needs_fix());
734                if has_confirmed {
735                    Some(LoopPhase::Fix)
736                } else {
737                    // All issues are false positives or deferred — done!
738                    Some(LoopPhase::Done)
739                }
740            }
741
742            LoopPhase::Fix => {
743                // After fixing, increment iteration and go back to verify
744                self.iteration += 1;
745                if self.iteration > self.max_iterations {
746                    self.emergency_stop(format!(
747                        "Maximum iterations ({}) reached",
748                        self.max_iterations
749                    ));
750                    bail!(
751                        "Maximum iterations ({}) reached. Diagnostic:\n{}",
752                        self.max_iterations,
753                        self.diagnostic()
754                    );
755                }
756                Some(LoopPhase::Verify)
757            }
758
759            LoopPhase::Done => {
760                bail!("Loop is already complete");
761            }
762        };
763
764        if let Some(phase) = next {
765            self.phase = phase;
766            self.touch();
767            Ok(phase)
768        } else {
769            bail!("No valid next phase from {:?}", self.phase);
770        }
771    }
772
773    /// Jump to a specific phase (for recovery or testing).
774    pub fn set_phase(&mut self, phase: LoopPhase) {
775        self.phase = phase;
776        self.touch();
777    }
778
779    // ── Task management ─────────────────────────────────────────────
780
781    /// Add a task to the plan.
782    pub fn add_task(&mut self, task: LoopTask) {
783        self.tasks.push(task);
784        self.touch();
785    }
786
787    /// Get a task by ID.
788    pub fn get_task(&self, id: &str) -> Option<&LoopTask> {
789        self.tasks.iter().find(|t| t.id == id)
790    }
791
792    /// Get a mutable reference to a task by ID.
793    pub fn get_task_mut(&mut self, id: &str) -> Option<&mut LoopTask> {
794        self.tasks.iter_mut().find(|t| t.id == id)
795    }
796
797    // ── Batch computation ───────────────────────────────────────────
798
799    /// Compute execution batches from the current task list.
800    ///
801    /// Groups tasks into batches using topological ordering. Each batch
802    /// contains tasks whose dependencies are all in *earlier* batches.
803    /// Tasks within a batch that touch overlapping files are flagged
804    /// as having conflicts.
805    ///
806    /// Uses Kahn's algorithm for topological sorting, grouping tasks
807    /// by their dependency depth level.
808    pub fn compute_batches(&mut self) -> Result<()> {
809        if self.tasks.is_empty() {
810            self.batches.clear();
811            return Ok(());
812        }
813
814        use std::collections::{HashMap, HashSet, VecDeque};
815
816        // Build adjacency graph
817        let task_ids: HashSet<&str> = self.tasks.iter().map(|t| t.id.as_str()).collect();
818        let mut in_degree: HashMap<&str, usize> = HashMap::new();
819        let mut dependents: HashMap<&str, Vec<&str>> = HashMap::new(); // task -> tasks that depend on it
820
821        for task in &self.tasks {
822            in_degree.entry(task.id.as_str()).or_insert(0);
823            dependents.entry(task.id.as_str()).or_insert_with(Vec::new);
824
825            for dep in &task.depends_on {
826                if !task_ids.contains(dep.as_str()) {
827                    bail!(
828                        "Task '{}' depends on '{}' which does not exist",
829                        task.id, dep
830                    );
831                }
832                *in_degree.entry(task.id.as_str()).or_insert(0) += 1;
833                dependents
834                    .entry(dep.as_str())
835                    .or_insert_with(Vec::new)
836                    .push(task.id.as_str());
837            }
838        }
839
840        // Kahn's algorithm with level tracking
841        let mut queue: VecDeque<(&str, usize)> = VecDeque::new(); // (task_id, level)
842        for task in &self.tasks {
843            if task.depends_on.is_empty() {
844                queue.push_back((task.id.as_str(), 0));
845            }
846        }
847
848        let mut levels: HashMap<&str, usize> = HashMap::new();
849        let mut processed: HashSet<&str> = HashSet::new();
850
851        while let Some((task_id, level)) = queue.pop_front() {
852            if processed.contains(task_id) {
853                continue;
854            }
855            processed.insert(task_id);
856            levels.insert(task_id, level);
857
858            // Decrease in-degree of dependents
859            if let Some(deps) = dependents.get(task_id) {
860                for &dep_id in deps {
861                    let deg = in_degree.get_mut(dep_id).unwrap();
862                    *deg -= 1;
863                    if *deg == 0 {
864                        queue.push_back((dep_id, level + 1));
865                    }
866                }
867            }
868        }
869
870        // Check for circular dependencies
871        if processed.len() != self.tasks.len() {
872            let unassigned: Vec<&str> = self
873                .tasks
874                .iter()
875                .filter(|t| !processed.contains(t.id.as_str()))
876                .map(|t| t.id.as_str())
877                .collect();
878            bail!(
879                "Cannot compute batches: circular dependency detected. Unassigned tasks: {:?}",
880                unassigned
881            );
882        }
883
884        // Group tasks by level into batches
885        let max_level = levels.values().copied().max().unwrap_or(0);
886        let mut batches: Vec<TaskBatch> = Vec::new();
887
888        for level in 0..=max_level {
889            let batch_idx = level;
890            let mut batch = TaskBatch::new(batch_idx);
891
892            for task in &self.tasks {
893                let task_level = levels.get(task.id.as_str()).copied().unwrap_or(0);
894                if task_level != level {
895                    continue;
896                }
897
898                // Check for file conflicts with tasks already in this batch
899                let has_conflict = batch.tasks.iter().any(|bt| {
900                    let bt_files: HashSet<_> = bt.touches_files.iter().collect();
901                    task.touches_files.iter().any(|f| bt_files.contains(f))
902                });
903
904                if has_conflict {
905                    batch.has_conflicts = true;
906                }
907
908                batch.add_task(task.clone());
909            }
910
911            if !batch.tasks.is_empty() {
912                batches.push(batch);
913            }
914        }
915
916        self.batches = batches;
917        self.touch();
918        Ok(())
919    }
920
921    /// Get the next pending batch, if any.
922    pub fn next_pending_batch(&self) -> Option<&TaskBatch> {
923        self.batches.iter().find(|b| b.status == TaskStatus::Pending)
924    }
925
926    /// Get a mutable reference to a batch by index.
927    pub fn get_batch_mut(&mut self, index: usize) -> Option<&mut TaskBatch> {
928        self.batches.get_mut(index)
929    }
930
931    /// Count completed batches.
932    pub fn completed_batch_count(&self) -> usize {
933        self.batches.iter().filter(|b| b.status == TaskStatus::Done).count()
934    }
935
936    /// Count total batches.
937    pub fn total_batch_count(&self) -> usize {
938        self.batches.len()
939    }
940
941    // ── Issue management ────────────────────────────────────────────
942
943    /// Add an issue found during verification.
944    pub fn add_issue(&mut self, issue: Issue) {
945        self.issues.push(issue);
946        self.touch();
947    }
948
949    /// Get all confirmed, unfixed issues.
950    pub fn confirmed_issues(&self) -> Vec<&Issue> {
951        self.issues
952            .iter()
953            .filter(|i| i.needs_fix())
954            .collect()
955    }
956
957    /// Count issues by verdict.
958    pub fn issues_by_verdict(&self, verdict: IssueVerdict) -> usize {
959        self.issues
960            .iter()
961            .filter(|i| i.verdict == Some(verdict))
962            .count()
963    }
964
965    /// Count fixed issues.
966    pub fn fixed_issue_count(&self) -> usize {
967        self.issues.iter().filter(|i| i.fixed).count()
968    }
969
970    // ── Verification ────────────────────────────────────────────────
971
972    /// Record a verification result.
973    pub fn record_verification(&mut self, result: VerificationResult) {
974        // Merge issues into the global list
975        let next_number = self.issues.len() + 1;
976        for (i, mut issue) in result.issues.into_iter().enumerate() {
977            issue.number = next_number + i;
978            self.issues.push(issue);
979        }
980        // Store the result (without the moved issues)
981        let mut stored = VerificationResult {
982            issues: Vec::new(), // issues are in the global list now
983            ..result
984        };
985        stored.issues = self
986            .issues
987            .iter()
988            .filter(|i| i.number >= next_number)
989            .cloned()
990            .collect();
991        self.last_verification = Some(stored);
992        self.touch();
993    }
994
995    /// Whether the current state is "clean" (no unfixed confirmed issues,
996    /// all verification gates passed).
997    pub fn is_clean(&self) -> bool {
998        let no_unfixed = !self.issues.iter().any(|i| i.needs_fix());
999        let verify_ok = self
1000            .last_verification
1001            .as_ref()
1002            .map(|v| v.is_clean())
1003            .unwrap_or(false);
1004        no_unfixed && verify_ok
1005    }
1006
1007    // ── Git integration ─────────────────────────────────────────────
1008
1009    /// Record a commit hash.
1010    pub fn record_commit(&mut self, hash: impl Into<String>) {
1011        self.last_commit = Some(hash.into());
1012        self.touch();
1013    }
1014
1015    /// Set the git clean status.
1016    pub fn set_git_clean(&mut self, clean: bool) {
1017        self.git_clean = clean;
1018        self.touch();
1019    }
1020
1021    // ── Status and diagnostics ──────────────────────────────────────
1022
1023    /// Produce a serializable status snapshot.
1024    pub fn status(&self) -> LoopStatus {
1025        LoopStatus {
1026            task: self.task.clone(),
1027            iteration: self.iteration,
1028            max_iterations: self.max_iterations,
1029            phase: self.phase,
1030            batches: self.batches.clone(),
1031            issues: self.issues.clone(),
1032            last_verification: self.last_verification.clone(),
1033            last_commit: self.last_commit.clone(),
1034            git_clean: self.git_clean,
1035            blocker: self.blocker.clone(),
1036            timestamp: Utc::now().to_rfc3339(),
1037        }
1038    }
1039
1040    /// Produce a diagnostic report when the loop hits max iterations or
1041    /// emergency stop.
1042    pub fn diagnostic(&self) -> String {
1043        let mut s = String::with_capacity(4096);
1044
1045        s.push_str("═══ AUTONOMOUS LOOP DIAGNOSTIC ═══\n\n");
1046        s.push_str(&format!("Task: {}\n", self.task));
1047        s.push_str(&format!("Iterations used: {} / {}\n", self.iteration, self.max_iterations));
1048        s.push_str(&format!("Phase at stop: {}\n", self.phase));
1049
1050        if let Some(ref blocker) = self.blocker {
1051            s.push_str(&format!("Blocker: {}\n", blocker));
1052        }
1053
1054        s.push_str(&format!(
1055            "Emergency stopped: {}\n",
1056            self.emergency_stopped
1057        ));
1058
1059        // Batch progress
1060        s.push_str("\n── Batches ──\n");
1061        for batch in &self.batches {
1062            let task_ids: Vec<&str> = batch.tasks.iter().map(|t| t.id.as_str()).collect();
1063            s.push_str(&format!(
1064                "  Batch {}: [{}] — {}\n",
1065                batch.index,
1066                task_ids.join(", "),
1067                batch.status
1068            ));
1069            for task in &batch.tasks {
1070                s.push_str(&format!(
1071                    "    {}: {} [{}]\n",
1072                    task.id,
1073                    task.description,
1074                    task.status
1075                ));
1076            }
1077        }
1078
1079        // Issue summary
1080        s.push_str("\n── Issues ──\n");
1081        let total = self.issues.len();
1082        let confirmed = self.issues_by_verdict(IssueVerdict::Confirmed);
1083        let false_pos = self.issues_by_verdict(IssueVerdict::FalsePositive);
1084        let deferred = self.issues_by_verdict(IssueVerdict::Deferred);
1085        let fixed = self.fixed_issue_count();
1086        s.push_str(&format!(
1087            "  Total: {} | Confirmed: {} | False positives: {} | Deferred: {} | Fixed: {}\n",
1088            total, confirmed, false_pos, deferred, fixed
1089        ));
1090
1091        // Recurring issues
1092        s.push_str("\n── Unfixed Confirmed Issues ──\n");
1093        for issue in self.issues.iter().filter(|i| i.needs_fix()) {
1094            s.push_str(&format!(
1095                "  #{} [{}] {} — {}\n",
1096                issue.number, issue.severity, issue.description, issue.location
1097            ));
1098            if !issue.evidence.is_empty() {
1099                s.push_str(&format!("    Evidence: {}\n", issue.evidence));
1100            }
1101            if !issue.fix_approach.is_empty() {
1102                s.push_str(&format!("    Fix approach: {}\n", issue.fix_approach));
1103            }
1104        }
1105
1106        // Verification history
1107        if let Some(ref v) = self.last_verification {
1108            s.push_str("\n── Last Verification ──\n");
1109            s.push_str(&format!("  Build: {}\n", if v.build_passed { "✅" } else { "❌" }));
1110            s.push_str(&format!("  Tests: {}\n", if v.tests_passed { "✅" } else { "❌" }));
1111            s.push_str(&format!(
1112                "  Type check: {}\n",
1113                if v.type_check_passed { "✅" } else { "❌" }
1114            ));
1115            s.push_str(&format!("  Lint: {}\n", if v.lint_passed { "✅" } else { "❌" }));
1116        }
1117
1118        s.push('\n');
1119        s
1120    }
1121
1122    /// Update the timestamp.
1123    fn touch(&mut self) {
1124        self.updated_at = Utc::now().to_rfc3339();
1125    }
1126}
1127
1128// ── Skill prompt ───────────────────────────────────────────────────────
1129
1130/// The autonomous-loop skill content generator.
1131///
1132/// Produces the system-prompt instructions that guide the LLM through the
1133/// autonomous development workflow.
1134pub struct AutonomousLoopSkill;
1135
1136impl AutonomousLoopSkill {
1137    /// Generate the full skill instructions to be injected into the system
1138    /// prompt when the autonomous-loop skill is active.
1139    pub fn skill_instructions() -> String {
1140        let prompt = r#"# Autonomous Development Loop Skill
1141
1142You are operating the **autonomous-loop** skill. Your goal is to execute a
1143fully autonomous development cycle that produces a finished, verified, and
1144committed result from a single task description.
1145
1146## Core Principles
1147
11481. **Never stop until genuinely done.** No "I think this looks good, please review" — keep going until verification gates pass clean.
11492. **Every finding must survive cross-examination.** A bug is only a bug if it can be proven. An issue is only an issue if it can be demonstrated.
11503. **Every checkpoint is a save point.** Git commits at every stable state mean any step is reversible.
11514. **TDD for logic.** When implementing logic, algorithms, or data transformations — write the failing test first. When implementing UI layout or configuration, TDD is optional.
1152
1153## Maximum Iterations
1154
1155The loop runs at most **8 full iterations**. If still failing after 8 iterations, stop and produce a diagnostic report explaining what went wrong.
1156
1157## Loop Phases
1158
1159```
1160┌─────────────────────────────────────────────────────────────────┐
1161│                                                                 │
1162│  1. DESIGN ──── 2. PLAN ──── 3. IMPLEMENT ──── 4. VERIFY       │
1163│                                                  │              │
1164│                                                  ▼              │
1165│                                           Issues found?         │
1166│                                           ┌─────┴─────┐        │
1167│                                           │ YES       │ NO     │
1168│                                           ▼           ▼        │
1169│                                     5. RE-VALIDATE   7. DONE   │
1170│                                           │                     │
1171│                                     Real issues?                │
1172│                                     ┌────┴────┐                │
1173│                                     │ YES     │ NO (false +)   │
1174│                                     ▼         ▼                │
1175│                                   6. FIX    Discard,           │
1176│                                     │       re-verify           │
1177│                                     ▼                           │
1178│                              Commit fix ──→ back to 4           │
1179│                                                                 │
1180└─────────────────────────────────────────────────────────────────┘
1181```
1182
1183## Phase 1: DESIGN
1184
1185**Goal:** Understand requirements and produce a clear design before touching code.
1186
1187### Design Quality Gate
1188
1189Before proceeding, evaluate:
1190
1191- [ ] Spec or design doc exists for this feature?
1192- [ ] Design is up-to-date with current codebase state?
1193- [ ] Approach is defined with specific files to touch?
1194- [ ] Acceptance criteria are concrete and testable?
1195- [ ] No known gaps or ambiguities in requirements?
1196
1197**If ANY of these are "no":** Stop. Use the deep-research skill to investigate
1198and produce a solid design before continuing.
1199
1200### Steps
1201
12021. Read all relevant context — specs, existing code, AGENTS.md, project conventions
12032. If no spec/design exists, produce a minimal design doc
12043. If a design already exists, validate it against the current codebase state
12054. Identify risks and unknowns
12065. **Commit checkpoint** if you created or updated a design doc
1207
1208### Exit Criteria
1209
1210- [ ] Objective is clear and testable
1211- [ ] Approach is defined
1212- [ ] Files to touch are identified
1213- [ ] Acceptance criteria are concrete
1214
1215## Phase 2: PLAN
1216
1217**Goal:** Decompose into ordered, verifiable implementation steps.
1218
1219### Steps
1220
12211. Break into vertical slices — each slice delivers a working, testable increment
12222. Order by dependency — foundations first, consumers last
12233. Each task must have:
1224   - Task ID (e.g., T1, T2)
1225   - Exact file paths
1226   - What it accomplishes
1227   - How to verify it works
1228   - `dependsOn` — list of task IDs
1229   - `touchesFiles` — files this task creates or modifies
12304. Group tasks into parallel execution batches
12315. Mark commit points — commit after each batch completes
1232
1233### Exit Criteria
1234
1235- [ ] Every task has acceptance criteria
1236- [ ] Every task has a verification method
1237- [ ] Every task has dependsOn and touchesFiles
1238- [ ] Tasks are grouped into execution batches
1239- [ ] No circular dependencies
1240- [ ] No task exceeds ~5 files
1241
1242## Phase 3: IMPLEMENT
1243
1244**Goal:** Execute the plan by batch, parallelizing independent tasks, with commits at every stable point.
1245
1246### Rules
1247
1248**Rule 0: Simplicity First.** Before writing code: "What is the simplest thing that could work?"
1249
1250**Rule 1: Batch Execution.** Execute tasks by batch, respecting the dependency graph.
1251
1252**Rule 2: Build Must Stay Green.** After each batch: build compiles, existing tests pass.
1253
1254**Rule 3: Scope Discipline.** Touch only what the task requires. No unsolicited refactoring.
1255
1256**Rule 4: Commit Frequently.** After every successful batch:
1257```
1258git commit -m "<type>(<scope>): <what this batch accomplishes>"
1259```
1260
1261### Commit Message Format
1262
1263Types: `feat`, `fix`, `refactor`, `test`, `docs`, `chore`
1264Scopes: match the module/area being changed
1265
1266Examples:
1267- ✅ `feat(auth): add JWT token generation`
1268- ✅ `test(cache): add LRU eviction tests`
1269- ❌ `feat: implement phase 1` — too coarse
1270
1271### Safety Protocol
1272
1273At the START of implementation:
1274```bash
1275git add -A && git commit -m "chore: checkpoint before <feature> implementation"
1276```
1277
1278## Phase 4: VERIFY
1279
1280**Goal:** Multi-axis verification that catches real problems.
1281
1282### Steps
1283
12841. Run build, test, lint:
1285   ```bash
1286   npm run build && npm test && npm run lint
1287   # or: cargo build && cargo test && cargo clippy
1288   ```
1289   - [ ] Build succeeds with zero errors
1290   - [ ] All tests pass (existing + new)
1291   - [ ] Zero type/lint errors
1292
12932. Walk through acceptance criteria from Phase 2:
1294   - [ ] Every acceptance criterion is met
1295   - [ ] Edge cases handled
1296   - [ ] Error paths handled
1297
12983. **Log any issues found:**
1299   ```
1300   ISSUE [N]: [one-line description]
1301     Severity: Critical | Important | Minor | Nit
1302     Location: file:line or component
1303     Evidence: [exact error message or concrete observation]
1304     Reproducible: YES/NO
1305     Fix approach: [brief description]
1306   ```
1307
1308**Severity definitions:**
1309- **Critical:** Build broken, data loss, security vulnerability — must fix
1310- **Important:** Incorrect behavior, failing tests, broken feature — must fix
1311- **Minor:** Style inconsistency, missing edge case — should fix
1312- **Nit:** Formatting, naming preference — optional
1313
1314## Phase 5: RE-VALIDATE (The False Positive Filter)
1315
1316**Goal:** Confirm that every issue found in Phase 4 is a REAL issue.
1317
1318### For EVERY issue:
1319
1320**Step 1: Reproduce or Demonstrate**
1321- Build error? → Re-read the exact error message and the code
1322- Test failure? → Re-run the specific failing test in isolation
1323- Logic bug? → Trace the data flow: input → wrong output
1324
1325**Step 2: Cross-Examine** — if ANY answer is "no," it's likely a false positive:
1326
1327| Question | Why it matters |
1328|----------|---------------|
1329| Does this violate a project convention? | Many "issues" are intentional styles |
1330| Is this actually in scope? | Adjacent code may look "wrong" but isn't this change |
1331| Would a staff engineer flag this? | Distinguishes real from theoretical |
1332| Is the "correct" version actually better here? | Context-dependent patterns exist |
1333| Does this affect actual behavior? | Theoretical issues waste time |
1334
1335**Step 3: Verdict**
1336
1337| Verdict | Action |
1338|---------|--------|
1339| **CONFIRMED** | Real issue → proceed to Phase 6 |
1340| **FALSE_POSITIVE** | Not a problem → discard, document why |
1341| **DEFERRED** | Real but out of scope → log, don't fix now |
1342| **NEEDS_CONTEXT** | Can't determine → ask the user |
1343
1344### Common False Positive Patterns
1345
1346- **Over-applying best practices** on internal-only functions
1347- **Misunderstanding intent** (variable "unused" but used in templates)
1348- **Generic rules vs project context** (project disables a linter rule intentionally)
1349- **Theoretical concerns** ("could be slow" with bounded data)
1350- **Adjacent code problems** outside task scope
1351
1352## Phase 6: FIX (If CONFIRMED Issues Exist)
1353
1354**Goal:** Fix only the confirmed, genuine issues.
1355
1356### Rules
1357
1358- **One fix per commit.** Each fix is independently revertable.
1359- **Fix the root cause, not the symptom.** Ask "why?" at least twice.
1360- **Re-run specific verification after each fix.**
1361
1362After all fixes committed:
1363```bash
1364npm run build && npm test && npm run lint
1365```
1366
1367Then **return to Phase 4 (VERIFY)** for a fresh pass.
1368
1369## Phase 7: DONE
1370
1371**Goal:** Final confirmation that the task is genuinely complete.
1372
1373### Final Verification
1374
1375- [ ] Build succeeds with zero errors
1376- [ ] Full test suite passes
1377- [ ] Type check passes
1378- [ ] Lint passes
1379- [ ] All acceptance criteria met
1380- [ ] No uncommitted changes (`git status` is clean)
1381- [ ] No TODO/FIXME/HACK that should have been resolved
1382- [ ] No debug logging left behind
1383
1384### Completion Report
1385
1386```
1387## Task Complete: [Task Name]
1388
1389### Summary
1390[1-2 sentences]
1391
1392### Changes
1393- [file list with one-line descriptions]
1394
1395### Commits
1396[newest first]
1397
1398### Verification
1399- Build: ✅ PASS
1400- Tests: ✅ PASS (N tests)
1401- Type check: ✅ PASS
1402- Lint: ✅ PASS
1403
1404### Issues Found & Resolved
1405- [Issues confirmed and fixed]
1406
1407### Discarded False Positives
1408- [Issues discarded with reasons]
1409```
1410
1411## Emergency Stop Conditions
1412
1413Stop immediately and report if:
1414- Build broken and can't fix within 2 attempts
1415- Tests failing and fix introduces new failures
1416- Hit 8 loop iterations
1417- Fundamental design flaw discovered
1418- Something genuinely not understood
1419
1420## Anti-Rationalization Table
1421
1422| Rationalization | Reality |
1423|---|---|
1424| "Build passes, probably good enough" | Build ≠ working. Tests + type checks + acceptance criteria matter. |
1425| "Mental review is sufficient" | Mental review misses the same bugs introduced. |
1426| "Minor issues can wait" | Minor issues compound into tomorrow's bugs. |
1427| "Skip re-validation, issue is obvious" | False positives waste hours. 5 min cross-examination saves 30 min. |
1428| "Commit everything at the end" | Catastrophic failure at min 45 = losing 45 min. |
1429| "These issues are all real" | When finding many issues at once, false positive rate is highest. |
1430| "Fix all issues at once" | Batch fixes hide which fix solved which issue. |
1431| "Improve nearby code while here" | Every unsolicited change is a risk. Stay in scope. |
1432
1433## Red Flags (Self-Monitoring)
1434
1435- Skipping re-validation because "the issue is obvious"
1436- Committing >100 lines without a build/test check
1437- Finding the same issue in iteration 3 that was "fixed" in iteration 2
1438- Rationalizing why a failed test "doesn't count"
1439- Broadening scope beyond the original task
1440- More than 3 consecutive fix-and-reverify cycles on the same issue
1441"#;
1442        prompt.to_string()
1443    }
1444}
1445
1446// ── Tests ──────────────────────────────────────────────────────────────
1447
1448#[cfg(test)]
1449mod tests {
1450    use super::*;
1451
1452    // ── LoopPhase tests ────────────────────────────────────────────
1453
1454    #[test]
1455    fn test_phase_display() {
1456        assert_eq!(format!("{}", LoopPhase::Design), "DESIGN");
1457        assert_eq!(format!("{}", LoopPhase::Plan), "PLAN");
1458        assert_eq!(format!("{}", LoopPhase::Implement), "IMPLEMENT");
1459        assert_eq!(format!("{}", LoopPhase::Verify), "VERIFY");
1460        assert_eq!(format!("{}", LoopPhase::ReValidate), "RE-VALIDATE");
1461        assert_eq!(format!("{}", LoopPhase::Fix), "FIX");
1462        assert_eq!(format!("{}", LoopPhase::Done), "DONE");
1463    }
1464
1465    #[test]
1466    fn test_phase_next() {
1467        assert_eq!(LoopPhase::Design.next(), Some(LoopPhase::Plan));
1468        assert_eq!(LoopPhase::Plan.next(), Some(LoopPhase::Implement));
1469        assert_eq!(LoopPhase::Implement.next(), Some(LoopPhase::Verify));
1470        assert_eq!(LoopPhase::Verify.next(), Some(LoopPhase::ReValidate));
1471        assert_eq!(LoopPhase::ReValidate.next(), Some(LoopPhase::Fix));
1472        assert_eq!(LoopPhase::Fix.next(), Some(LoopPhase::Verify));
1473        assert_eq!(LoopPhase::Done.next(), None);
1474    }
1475
1476    #[test]
1477    fn test_phase_can_exit_on_clean() {
1478        assert!(LoopPhase::Verify.can_exit_on_clean());
1479        assert!(LoopPhase::ReValidate.can_exit_on_clean());
1480        assert!(!LoopPhase::Design.can_exit_on_clean());
1481        assert!(!LoopPhase::Fix.can_exit_on_clean());
1482    }
1483
1484    #[test]
1485    fn test_phase_all() {
1486        let all = LoopPhase::all();
1487        assert_eq!(all.len(), 7);
1488        assert_eq!(all[0], LoopPhase::Design);
1489        assert_eq!(all[6], LoopPhase::Done);
1490    }
1491
1492    // ── TaskStatus tests ───────────────────────────────────────────
1493
1494    #[test]
1495    fn test_task_status_display() {
1496        assert_eq!(format!("{}", TaskStatus::Pending), "pending");
1497        assert_eq!(format!("{}", TaskStatus::Running), "running");
1498        assert_eq!(format!("{}", TaskStatus::Done), "done");
1499        assert_eq!(format!("{}", TaskStatus::Failed), "failed");
1500    }
1501
1502    // ── LoopTask tests ─────────────────────────────────────────────
1503
1504    #[test]
1505    fn test_task_builder() {
1506        let task = LoopTask::new("T1", "Create auth module")
1507            .touches("src/auth.rs")
1508            .touches("src/lib.rs")
1509            .depends_on("T0")
1510            .verify_with("cargo test auth");
1511
1512        assert_eq!(task.id, "T1");
1513        assert_eq!(task.description, "Create auth module");
1514        assert_eq!(task.touches_files.len(), 2);
1515        assert_eq!(task.depends_on, vec!["T0"]);
1516        assert_eq!(task.verification, "cargo test auth");
1517        assert_eq!(task.status, TaskStatus::Pending);
1518    }
1519
1520    #[test]
1521    fn test_task_lifecycle() {
1522        let mut task = LoopTask::new("T1", "Do something");
1523        assert_eq!(task.status, TaskStatus::Pending);
1524
1525        task.start();
1526        assert_eq!(task.status, TaskStatus::Running);
1527
1528        task.complete(Some("abc123".to_string()));
1529        assert_eq!(task.status, TaskStatus::Done);
1530        assert_eq!(task.commit_hash, Some("abc123".to_string()));
1531    }
1532
1533    #[test]
1534    fn test_task_fail() {
1535        let mut task = LoopTask::new("T1", "Do something");
1536        task.start();
1537        task.fail();
1538        assert_eq!(task.status, TaskStatus::Failed);
1539    }
1540
1541    // ── TaskBatch tests ────────────────────────────────────────────
1542
1543    #[test]
1544    fn test_batch_lifecycle() {
1545        let mut batch = TaskBatch::new(0);
1546        batch.add_task(LoopTask::new("T1", "Task 1"));
1547        batch.add_task(LoopTask::new("T2", "Task 2"));
1548
1549        assert!(!batch.all_done());
1550        assert!(!batch.any_failed());
1551
1552        batch.start();
1553        assert_eq!(batch.status, TaskStatus::Running);
1554        assert_eq!(batch.tasks[0].status, TaskStatus::Running);
1555
1556        batch.complete();
1557        assert_eq!(batch.status, TaskStatus::Done);
1558    }
1559
1560    #[test]
1561    fn test_batch_all_done() {
1562        let mut batch = TaskBatch::new(0);
1563        let mut t1 = LoopTask::new("T1", "Task 1");
1564        t1.complete(None);
1565        let mut t2 = LoopTask::new("T2", "Task 2");
1566        t2.complete(None);
1567        batch.add_task(t1);
1568        batch.add_task(t2);
1569        assert!(batch.all_done());
1570    }
1571
1572    #[test]
1573    fn test_batch_any_failed() {
1574        let mut batch = TaskBatch::new(0);
1575        batch.add_task(LoopTask::new("T1", "Task 1"));
1576        let mut t2 = LoopTask::new("T2", "Task 2");
1577        t2.fail();
1578        batch.add_task(t2);
1579        assert!(batch.any_failed());
1580    }
1581
1582    // ── Issue tests ────────────────────────────────────────────────
1583
1584    #[test]
1585    fn test_issue_builder() {
1586        let issue = Issue::new(1, "Build fails on ARM64", IssueSeverity::Critical, "src/build.rs:42")
1587            .with_evidence("error: unsupported target")
1588            .reproducible(true)
1589            .fix_approach("Add ARM64 target detection");
1590
1591        assert_eq!(issue.number, 1);
1592        assert_eq!(issue.severity, IssueSeverity::Critical);
1593        assert!(issue.reproducible);
1594        assert!(issue.verdict.is_none());
1595        assert!(!issue.fixed);
1596    }
1597
1598    #[test]
1599    fn test_issue_verdict() {
1600        let mut issue = Issue::new(1, "Test", IssueSeverity::Minor, "main.rs");
1601        assert!(!issue.needs_fix());
1602
1603        issue.set_verdict(IssueVerdict::Confirmed, "Reproduced locally");
1604        assert!(issue.needs_fix());
1605        assert!(issue.is_actionable());
1606
1607        issue.mark_fixed(Some("abc123".to_string()));
1608        assert!(!issue.needs_fix());
1609        assert!(issue.fixed);
1610        assert_eq!(issue.fix_commit, Some("abc123".to_string()));
1611    }
1612
1613    #[test]
1614    fn test_issue_false_positive() {
1615        let mut issue = Issue::new(1, "Test", IssueSeverity::Nit, "main.rs");
1616        issue.set_verdict(IssueVerdict::FalsePositive, "Internal function, callers trusted");
1617        assert!(!issue.needs_fix());
1618        assert!(!issue.is_actionable());
1619    }
1620
1621    #[test]
1622    fn test_issue_deferred() {
1623        let mut issue = Issue::new(1, "Test", IssueSeverity::Minor, "main.rs");
1624        issue.set_verdict(IssueVerdict::Deferred, "Out of scope for this task");
1625        assert!(!issue.needs_fix());
1626        assert!(!issue.is_actionable());
1627    }
1628
1629    #[test]
1630    fn test_severity_ordering() {
1631        assert!(IssueSeverity::Critical > IssueSeverity::Important);
1632        assert!(IssueSeverity::Important > IssueSeverity::Minor);
1633        assert!(IssueSeverity::Minor > IssueSeverity::Nit);
1634    }
1635
1636    #[test]
1637    fn test_severity_display() {
1638        assert_eq!(format!("{}", IssueSeverity::Critical), "Critical");
1639        assert_eq!(format!("{}", IssueSeverity::Important), "Important");
1640        assert_eq!(format!("{}", IssueSeverity::Minor), "Minor");
1641        assert_eq!(format!("{}", IssueSeverity::Nit), "Nit");
1642    }
1643
1644    #[test]
1645    fn test_verdict_display() {
1646        assert_eq!(format!("{}", IssueVerdict::Confirmed), "CONFIRMED");
1647        assert_eq!(format!("{}", IssueVerdict::FalsePositive), "FALSE_POSITIVE");
1648        assert_eq!(format!("{}", IssueVerdict::Deferred), "DEFERRED");
1649        assert_eq!(format!("{}", IssueVerdict::NeedsContext), "NEEDS_CONTEXT");
1650    }
1651
1652    // ── VerificationResult tests ───────────────────────────────────
1653
1654    #[test]
1655    fn test_verification_result_new() {
1656        let result = VerificationResult::new();
1657        assert!(!result.build_passed);
1658        assert!(!result.tests_passed);
1659        assert!(!result.type_check_passed);
1660        assert!(!result.lint_passed);
1661        assert!(result.issues.is_empty());
1662        assert!(!result.is_clean());
1663    }
1664
1665    #[test]
1666    fn test_verification_result_clean() {
1667        let result = VerificationResult {
1668            build_passed: true,
1669            tests_passed: true,
1670            type_check_passed: true,
1671            lint_passed: true,
1672            issues: vec![],
1673            timestamp: Utc::now().to_rfc3339(),
1674        };
1675        assert!(result.is_clean());
1676        assert!(result.critical_passed());
1677    }
1678
1679    #[test]
1680    fn test_verification_result_with_issues() {
1681        let mut result = VerificationResult {
1682            build_passed: true,
1683            tests_passed: true,
1684            type_check_passed: true,
1685            lint_passed: true,
1686            issues: vec![],
1687            timestamp: Utc::now().to_rfc3339(),
1688        };
1689        result.issues.push(Issue::new(1, "Bug", IssueSeverity::Minor, "main.rs"));
1690        assert!(!result.is_clean());
1691    }
1692
1693    #[test]
1694    fn test_verification_critical_failed() {
1695        let result = VerificationResult {
1696            build_passed: false,
1697            tests_passed: true,
1698            type_check_passed: true,
1699            lint_passed: true,
1700            issues: vec![],
1701            timestamp: Utc::now().to_rfc3339(),
1702        };
1703        assert!(!result.critical_passed());
1704    }
1705
1706    #[test]
1707    fn test_verification_issue_count_by_severity() {
1708        let mut result = VerificationResult::new();
1709        result.issues.push(Issue::new(1, "A", IssueSeverity::Critical, "a.rs"));
1710        result.issues.push(Issue::new(2, "B", IssueSeverity::Critical, "b.rs"));
1711        result.issues.push(Issue::new(3, "C", IssueSeverity::Minor, "c.rs"));
1712        assert_eq!(result.issue_count_by_severity(IssueSeverity::Critical), 2);
1713        assert_eq!(result.issue_count_by_severity(IssueSeverity::Minor), 1);
1714        assert_eq!(result.issue_count_by_severity(IssueSeverity::Nit), 0);
1715    }
1716
1717    // ── AutonomousLoop tests ───────────────────────────────────────
1718
1719    #[test]
1720    fn test_loop_new() {
1721        let al = AutonomousLoop::new("Implement auth");
1722        assert_eq!(al.task, "Implement auth");
1723        assert_eq!(al.iteration, 0);
1724        assert_eq!(al.max_iterations, MAX_ITERATIONS);
1725        assert_eq!(al.phase, LoopPhase::Design);
1726        assert!(!al.started);
1727        assert!(al.tasks.is_empty());
1728        assert!(al.issues.is_empty());
1729    }
1730
1731    #[test]
1732    fn test_loop_with_max_iterations() {
1733        let al = AutonomousLoop::new("Test").with_max_iterations(4);
1734        assert_eq!(al.max_iterations, 4);
1735    }
1736
1737    #[test]
1738    fn test_loop_with_max_iterations_clamped() {
1739        let al = AutonomousLoop::new("Test").with_max_iterations(0);
1740        assert_eq!(al.max_iterations, 1);
1741
1742        let al = AutonomousLoop::new("Test").with_max_iterations(100);
1743        assert_eq!(al.max_iterations, MAX_ITERATIONS);
1744    }
1745
1746    #[test]
1747    fn test_loop_start() {
1748        let mut al = AutonomousLoop::new("Test");
1749        al.start().unwrap();
1750        assert!(al.started);
1751        assert_eq!(al.iteration, 1);
1752        assert_eq!(al.phase, LoopPhase::Design);
1753    }
1754
1755    #[test]
1756    fn test_loop_start_twice() {
1757        let mut al = AutonomousLoop::new("Test");
1758        al.start().unwrap();
1759        assert!(al.start().is_err());
1760    }
1761
1762    #[test]
1763    fn test_loop_emergency_stop() {
1764        let mut al = AutonomousLoop::new("Test");
1765        al.start().unwrap();
1766        al.emergency_stop("Build is broken beyond repair");
1767        assert!(al.emergency_stopped);
1768        assert_eq!(al.blocker, Some("Build is broken beyond repair".to_string()));
1769    }
1770
1771    #[test]
1772    fn test_loop_advance_not_started() {
1773        let mut al = AutonomousLoop::new("Test");
1774        assert!(al.advance().is_err());
1775    }
1776
1777    #[test]
1778    fn test_loop_advance_after_emergency_stop() {
1779        let mut al = AutonomousLoop::new("Test");
1780        al.start().unwrap();
1781        al.emergency_stop("Nope");
1782        assert!(al.advance().is_err());
1783    }
1784
1785    #[test]
1786    fn test_loop_advance_design_to_plan() {
1787        let mut al = AutonomousLoop::new("Test");
1788        al.start().unwrap();
1789        let next = al.advance().unwrap();
1790        assert_eq!(next, LoopPhase::Plan);
1791        assert_eq!(al.phase, LoopPhase::Plan);
1792    }
1793
1794    #[test]
1795    fn test_loop_advance_plan_to_implement() {
1796        let mut al = AutonomousLoop::new("Test");
1797        al.start().unwrap();
1798        al.advance().unwrap(); // Design → Plan
1799        let next = al.advance().unwrap(); // Plan → Implement
1800        assert_eq!(next, LoopPhase::Implement);
1801    }
1802
1803    #[test]
1804    fn test_loop_advance_implement_to_verify() {
1805        let mut al = AutonomousLoop::new("Test");
1806        al.start().unwrap();
1807        al.advance().unwrap(); // → Plan
1808        al.advance().unwrap(); // → Implement
1809        let next = al.advance().unwrap(); // → Verify
1810        assert_eq!(next, LoopPhase::Verify);
1811    }
1812
1813    #[test]
1814    fn test_loop_advance_verify_clean_goes_to_done() {
1815        let mut al = AutonomousLoop::new("Test");
1816        al.start().unwrap();
1817        al.advance().unwrap(); // → Plan
1818        al.advance().unwrap(); // → Implement
1819        al.advance().unwrap(); // → Verify
1820
1821        // Record a clean verification
1822        al.record_verification(VerificationResult {
1823            build_passed: true,
1824            tests_passed: true,
1825            type_check_passed: true,
1826            lint_passed: true,
1827            issues: vec![],
1828            timestamp: Utc::now().to_rfc3339(),
1829        });
1830
1831        let next = al.advance().unwrap(); // → Done (clean)
1832        assert_eq!(next, LoopPhase::Done);
1833    }
1834
1835    #[test]
1836    fn test_loop_advance_verify_with_issues_goes_to_revalidate() {
1837        let mut al = AutonomousLoop::new("Test");
1838        al.start().unwrap();
1839        al.advance().unwrap(); // → Plan
1840        al.advance().unwrap(); // → Implement
1841        al.advance().unwrap(); // → Verify
1842
1843        // Record verification with an issue
1844        al.record_verification(VerificationResult {
1845            build_passed: true,
1846            tests_passed: true,
1847            type_check_passed: true,
1848            lint_passed: true,
1849            issues: vec![Issue::new(1, "A bug", IssueSeverity::Important, "main.rs")],
1850            timestamp: Utc::now().to_rfc3339(),
1851        });
1852
1853        let next = al.advance().unwrap(); // → ReValidate
1854        assert_eq!(next, LoopPhase::ReValidate);
1855    }
1856
1857    #[test]
1858    fn test_loop_advance_revalidate_confirmed_goes_to_fix() {
1859        let mut al = AutonomousLoop::new("Test");
1860        al.start().unwrap();
1861        al.advance().unwrap(); // → Plan
1862        al.advance().unwrap(); // → Implement
1863        al.advance().unwrap(); // → Verify
1864
1865        // Add an issue and confirm it
1866        let mut issue = Issue::new(1, "A bug", IssueSeverity::Important, "main.rs");
1867        issue.set_verdict(IssueVerdict::Confirmed, "Reproduced");
1868        al.add_issue(issue);
1869
1870        al.set_phase(LoopPhase::ReValidate);
1871        let next = al.advance().unwrap(); // → Fix
1872        assert_eq!(next, LoopPhase::Fix);
1873    }
1874
1875    #[test]
1876    fn test_loop_advance_revalidate_false_positive_goes_to_done() {
1877        let mut al = AutonomousLoop::new("Test");
1878        al.start().unwrap();
1879
1880        // Add a false-positive issue
1881        let mut issue = Issue::new(1, "False alarm", IssueSeverity::Nit, "main.rs");
1882        issue.set_verdict(IssueVerdict::FalsePositive, "Internal function");
1883        al.add_issue(issue);
1884
1885        al.set_phase(LoopPhase::ReValidate);
1886        let next = al.advance().unwrap(); // → Done
1887        assert_eq!(next, LoopPhase::Done);
1888    }
1889
1890    #[test]
1891    fn test_loop_advance_fix_goes_to_verify_with_increment() {
1892        let mut al = AutonomousLoop::new("Test");
1893        al.start().unwrap();
1894        assert_eq!(al.iteration, 1);
1895
1896        al.set_phase(LoopPhase::Fix);
1897        let next = al.advance().unwrap(); // → Verify, iteration++
1898        assert_eq!(next, LoopPhase::Verify);
1899        assert_eq!(al.iteration, 2);
1900    }
1901
1902    #[test]
1903    fn test_loop_advance_done_is_error() {
1904        let mut al = AutonomousLoop::new("Test");
1905        al.start().unwrap();
1906        al.set_phase(LoopPhase::Done);
1907        assert!(al.advance().is_err());
1908    }
1909
1910    #[test]
1911    fn test_loop_max_iterations_exceeded() {
1912        let mut al = AutonomousLoop::new("Test").with_max_iterations(2);
1913        al.start().unwrap();
1914        al.iteration = 2;
1915
1916        al.set_phase(LoopPhase::Fix);
1917        assert!(al.advance().is_err());
1918        assert!(al.emergency_stopped);
1919    }
1920
1921    #[test]
1922    fn test_loop_set_phase() {
1923        let mut al = AutonomousLoop::new("Test");
1924        al.set_phase(LoopPhase::Verify);
1925        assert_eq!(al.phase, LoopPhase::Verify);
1926    }
1927
1928    // ── Task management tests ──────────────────────────────────────
1929
1930    #[test]
1931    fn test_add_and_get_task() {
1932        let mut al = AutonomousLoop::new("Test");
1933        al.add_task(LoopTask::new("T1", "Create module"));
1934        al.add_task(LoopTask::new("T2", "Add tests"));
1935
1936        assert_eq!(al.tasks.len(), 2);
1937        assert_eq!(al.get_task("T1").unwrap().description, "Create module");
1938        assert_eq!(al.get_task("T2").unwrap().description, "Add tests");
1939        assert!(al.get_task("T3").is_none());
1940    }
1941
1942    #[test]
1943    fn test_get_task_mut() {
1944        let mut al = AutonomousLoop::new("Test");
1945        al.add_task(LoopTask::new("T1", "Create module"));
1946
1947        al.get_task_mut("T1").unwrap().complete(Some("abc".to_string()));
1948        assert_eq!(al.get_task("T1").unwrap().commit_hash, Some("abc".to_string()));
1949    }
1950
1951    // ── Batch computation tests ────────────────────────────────────
1952
1953    #[test]
1954    fn test_compute_batches_empty() {
1955        let mut al = AutonomousLoop::new("Test");
1956        al.compute_batches().unwrap();
1957        assert!(al.batches.is_empty());
1958    }
1959
1960    #[test]
1961    fn test_compute_batches_single_task() {
1962        let mut al = AutonomousLoop::new("Test");
1963        al.add_task(LoopTask::new("T1", "Do thing"));
1964        al.compute_batches().unwrap();
1965
1966        assert_eq!(al.batches.len(), 1);
1967        assert_eq!(al.batches[0].tasks.len(), 1);
1968        assert!(!al.batches[0].has_conflicts);
1969    }
1970
1971    #[test]
1972    fn test_compute_batches_parallel() {
1973        let mut al = AutonomousLoop::new("Test");
1974        al.add_task(LoopTask::new("T1", "Task 1"));
1975        al.add_task(LoopTask::new("T2", "Task 2"));
1976        al.compute_batches().unwrap();
1977
1978        // Both in batch 0 (no dependencies)
1979        assert_eq!(al.batches.len(), 1);
1980        assert_eq!(al.batches[0].tasks.len(), 2);
1981    }
1982
1983    #[test]
1984    fn test_compute_batches_sequential() {
1985        let mut al = AutonomousLoop::new("Test");
1986        al.add_task(LoopTask::new("T1", "Foundation"));
1987        al.add_task(
1988            LoopTask::new("T2", "Build on foundation").depends_on("T1"),
1989        );
1990        al.add_task(
1991            LoopTask::new("T3", "Final layer").depends_on("T2"),
1992        );
1993
1994        al.compute_batches().unwrap();
1995
1996        assert_eq!(al.batches.len(), 3);
1997        assert_eq!(al.batches[0].tasks[0].id, "T1");
1998        assert_eq!(al.batches[1].tasks[0].id, "T2");
1999        assert_eq!(al.batches[2].tasks[0].id, "T3");
2000    }
2001
2002    #[test]
2003    fn test_compute_batches_mixed() {
2004        let mut al = AutonomousLoop::new("Test");
2005        // T1, T2 independent → batch 0
2006        al.add_task(LoopTask::new("T1", "Independent 1"));
2007        al.add_task(LoopTask::new("T2", "Independent 2"));
2008        // T3 depends on T1, T4 depends on T2 → batch 1
2009        al.add_task(
2010            LoopTask::new("T3", "After T1").depends_on("T1"),
2011        );
2012        al.add_task(
2013            LoopTask::new("T4", "After T2").depends_on("T2"),
2014        );
2015        // T5 depends on T3, T4 → batch 2
2016        al.add_task(
2017            LoopTask::new("T5", "After T3 and T4")
2018                .depends_on("T3")
2019                .depends_on("T4"),
2020        );
2021
2022        al.compute_batches().unwrap();
2023
2024        assert_eq!(al.batches.len(), 3);
2025        assert_eq!(al.batches[0].tasks.len(), 2);
2026        assert_eq!(al.batches[1].tasks.len(), 2);
2027        assert_eq!(al.batches[2].tasks.len(), 1);
2028    }
2029
2030    #[test]
2031    fn test_compute_batches_file_conflicts() {
2032        let mut al = AutonomousLoop::new("Test");
2033        al.add_task(
2034            LoopTask::new("T1", "Touch lib").touches("src/lib.rs"),
2035        );
2036        al.add_task(
2037            LoopTask::new("T2", "Also touch lib").touches("src/lib.rs"),
2038        );
2039
2040        al.compute_batches().unwrap();
2041
2042        // Both in batch 0 but flagged as conflicting
2043        assert_eq!(al.batches.len(), 1);
2044        assert!(al.batches[0].has_conflicts);
2045    }
2046
2047    #[test]
2048    fn test_compute_batches_circular_dependency() {
2049        let mut al = AutonomousLoop::new("Test");
2050        al.add_task(
2051            LoopTask::new("T1", "Circular 1").depends_on("T2"),
2052        );
2053        al.add_task(
2054            LoopTask::new("T2", "Circular 2").depends_on("T1"),
2055        );
2056
2057        let result = al.compute_batches();
2058        assert!(result.is_err());
2059        assert!(result.unwrap_err().to_string().contains("circular dependency"));
2060    }
2061
2062    #[test]
2063    fn test_next_pending_batch() {
2064        let mut al = AutonomousLoop::new("Test");
2065        al.add_task(LoopTask::new("T1", "Task 1"));
2066        al.add_task(LoopTask::new("T2", "Task 2").depends_on("T1"));
2067        al.compute_batches().unwrap();
2068
2069        assert!(al.next_pending_batch().is_some());
2070        assert_eq!(al.next_pending_batch().unwrap().index, 0);
2071
2072        al.batches[0].status = TaskStatus::Done;
2073        assert!(al.next_pending_batch().is_some());
2074        assert_eq!(al.next_pending_batch().unwrap().index, 1);
2075
2076        al.batches[1].status = TaskStatus::Done;
2077        assert!(al.next_pending_batch().is_none());
2078    }
2079
2080    #[test]
2081    fn test_completed_batch_count() {
2082        let mut al = AutonomousLoop::new("Test");
2083        al.add_task(LoopTask::new("T1", "Task 1"));
2084        al.add_task(LoopTask::new("T2", "Task 2").depends_on("T1"));
2085        al.compute_batches().unwrap();
2086
2087        assert_eq!(al.completed_batch_count(), 0);
2088        assert_eq!(al.total_batch_count(), 2);
2089
2090        al.batches[0].status = TaskStatus::Done;
2091        assert_eq!(al.completed_batch_count(), 1);
2092    }
2093
2094    // ── Issue management tests ─────────────────────────────────────
2095
2096    #[test]
2097    fn test_add_issue() {
2098        let mut al = AutonomousLoop::new("Test");
2099        al.add_issue(Issue::new(1, "Bug", IssueSeverity::Important, "main.rs"));
2100
2101        assert_eq!(al.issues.len(), 1);
2102        assert_eq!(al.issues[0].description, "Bug");
2103    }
2104
2105    #[test]
2106    fn test_confirmed_issues() {
2107        let mut al = AutonomousLoop::new("Test");
2108
2109        // Add a confirmed issue
2110        let mut confirmed = Issue::new(1, "Real bug", IssueSeverity::Important, "main.rs");
2111        confirmed.set_verdict(IssueVerdict::Confirmed, "Reproduced");
2112        al.add_issue(confirmed);
2113
2114        // Add a false positive
2115        let mut fp = Issue::new(2, "False alarm", IssueSeverity::Nit, "lib.rs");
2116        fp.set_verdict(IssueVerdict::FalsePositive, "Internal function");
2117        al.add_issue(fp);
2118
2119        // Add a fixed issue
2120        let mut fixed = Issue::new(3, "Already fixed", IssueSeverity::Minor, "util.rs");
2121        fixed.set_verdict(IssueVerdict::Confirmed, "Was real");
2122        fixed.mark_fixed(None);
2123        al.add_issue(fixed);
2124
2125        assert_eq!(al.confirmed_issues().len(), 1);
2126        assert_eq!(al.confirmed_issues()[0].description, "Real bug");
2127    }
2128
2129    #[test]
2130    fn test_issues_by_verdict() {
2131        let mut al = AutonomousLoop::new("Test");
2132
2133        let mut i1 = Issue::new(1, "A", IssueSeverity::Minor, "a");
2134        i1.set_verdict(IssueVerdict::Confirmed, "Real");
2135        al.add_issue(i1);
2136
2137        let mut i2 = Issue::new(2, "B", IssueSeverity::Nit, "b");
2138        i2.set_verdict(IssueVerdict::FalsePositive, "Fake");
2139        al.add_issue(i2);
2140
2141        let mut i3 = Issue::new(3, "C", IssueSeverity::Minor, "c");
2142        i3.set_verdict(IssueVerdict::Confirmed, "Real");
2143        al.add_issue(i3);
2144
2145        assert_eq!(al.issues_by_verdict(IssueVerdict::Confirmed), 2);
2146        assert_eq!(al.issues_by_verdict(IssueVerdict::FalsePositive), 1);
2147        assert_eq!(al.issues_by_verdict(IssueVerdict::Deferred), 0);
2148    }
2149
2150    #[test]
2151    fn test_fixed_issue_count() {
2152        let mut al = AutonomousLoop::new("Test");
2153
2154        let mut i1 = Issue::new(1, "A", IssueSeverity::Minor, "a");
2155        i1.mark_fixed(Some("abc".to_string()));
2156        al.add_issue(i1);
2157
2158        al.add_issue(Issue::new(2, "B", IssueSeverity::Minor, "b"));
2159
2160        assert_eq!(al.fixed_issue_count(), 1);
2161    }
2162
2163    // ── Verification tests ─────────────────────────────────────────
2164
2165    #[test]
2166    fn test_record_verification() {
2167        let mut al = AutonomousLoop::new("Test");
2168        al.record_verification(VerificationResult {
2169            build_passed: true,
2170            tests_passed: true,
2171            type_check_passed: true,
2172            lint_passed: true,
2173            issues: vec![Issue::new(1, "Bug", IssueSeverity::Minor, "main.rs")],
2174            timestamp: Utc::now().to_rfc3339(),
2175        });
2176
2177        assert!(al.last_verification.is_some());
2178        assert_eq!(al.issues.len(), 1);
2179    }
2180
2181    #[test]
2182    fn test_is_clean() {
2183        let mut al = AutonomousLoop::new("Test");
2184        assert!(!al.is_clean()); // No verification yet
2185
2186        al.record_verification(VerificationResult {
2187            build_passed: true,
2188            tests_passed: true,
2189            type_check_passed: true,
2190            lint_passed: true,
2191            issues: vec![],
2192            timestamp: Utc::now().to_rfc3339(),
2193        });
2194        assert!(al.is_clean());
2195    }
2196
2197    #[test]
2198    fn test_is_dirty_with_issue() {
2199        let mut al = AutonomousLoop::new("Test");
2200        al.record_verification(VerificationResult {
2201            build_passed: true,
2202            tests_passed: true,
2203            type_check_passed: true,
2204            lint_passed: true,
2205            issues: vec![Issue::new(1, "Bug", IssueSeverity::Minor, "main.rs")],
2206            timestamp: Utc::now().to_rfc3339(),
2207        });
2208        assert!(!al.is_clean());
2209    }
2210
2211    // ── Git integration tests ──────────────────────────────────────
2212
2213    #[test]
2214    fn test_record_commit() {
2215        let mut al = AutonomousLoop::new("Test");
2216        al.record_commit("deadbeef");
2217        assert_eq!(al.last_commit, Some("deadbeef".to_string()));
2218    }
2219
2220    #[test]
2221    fn test_set_git_clean() {
2222        let mut al = AutonomousLoop::new("Test");
2223        al.set_git_clean(false);
2224        assert!(!al.git_clean);
2225        al.set_git_clean(true);
2226        assert!(al.git_clean);
2227    }
2228
2229    // ── Status and diagnostics tests ───────────────────────────────
2230
2231    #[test]
2232    fn test_status_snapshot() {
2233        let mut al = AutonomousLoop::new("Build auth system");
2234        al.start().unwrap();
2235        al.add_task(LoopTask::new("T1", "Create module"));
2236        al.compute_batches().unwrap();
2237
2238        let status = al.status();
2239        assert_eq!(status.task, "Build auth system");
2240        assert_eq!(status.iteration, 1);
2241        assert_eq!(status.phase, LoopPhase::Design);
2242        assert_eq!(status.batches.len(), 1);
2243        assert!(status.git_clean);
2244    }
2245
2246    #[test]
2247    fn test_status_render() {
2248        let mut al = AutonomousLoop::new("Test task");
2249        al.start().unwrap();
2250        al.add_task(LoopTask::new("T1", "Foundation"));
2251        al.add_task(LoopTask::new("T2", "Build on it").depends_on("T1"));
2252        al.compute_batches().unwrap();
2253        al.record_commit("abc1234");
2254
2255        let status = al.status();
2256        let rendered = status.render();
2257
2258        assert!(rendered.contains("AUTONOMOUS LOOP STATUS"));
2259        assert!(rendered.contains("Test task"));
2260        assert!(rendered.contains("DESIGN"));
2261        assert!(rendered.contains("T1"));
2262        assert!(rendered.contains("abc1234"));
2263    }
2264
2265    #[test]
2266    fn test_diagnostic() {
2267        let mut al = AutonomousLoop::new("Test task");
2268        al.start().unwrap();
2269        al.emergency_stop("Hit max iterations");
2270
2271        let diag = al.diagnostic();
2272        assert!(diag.contains("AUTONOMOUS LOOP DIAGNOSTIC"));
2273        assert!(diag.contains("Test task"));
2274        assert!(diag.contains("Hit max iterations"));
2275    }
2276
2277    #[test]
2278    fn test_diagnostic_with_issues() {
2279        let mut al = AutonomousLoop::new("Test");
2280        al.start().unwrap();
2281
2282        let mut issue = Issue::new(1, "Critical build failure", IssueSeverity::Critical, "build.rs:1");
2283        issue.set_verdict(IssueVerdict::Confirmed, "Build won't compile");
2284        issue.set_verdict(IssueVerdict::Confirmed, "Still broken");
2285        al.add_issue(issue);
2286
2287        let diag = al.diagnostic();
2288        assert!(diag.contains("Critical build failure"));
2289        assert!(diag.contains("build.rs:1"));
2290    }
2291
2292    // ── Skill prompt test ──────────────────────────────────────────
2293
2294    #[test]
2295    fn test_skill_instructions() {
2296        let prompt = AutonomousLoopSkill::skill_instructions();
2297        assert!(prompt.contains("Autonomous Development Loop"));
2298        assert!(prompt.contains("DESIGN"));
2299        assert!(prompt.contains("PLAN"));
2300        assert!(prompt.contains("IMPLEMENT"));
2301        assert!(prompt.contains("VERIFY"));
2302        assert!(prompt.contains("RE-VALIDATE"));
2303        assert!(prompt.contains("FIX"));
2304        assert!(prompt.contains("DONE"));
2305        assert!(prompt.contains("8"));
2306        assert!(prompt.contains("Emergency Stop"));
2307        assert!(prompt.contains("Anti-Rationalization"));
2308        assert!(prompt.contains("Red Flags"));
2309    }
2310
2311    // ── Full loop integration test ─────────────────────────────────
2312
2313    #[test]
2314    fn test_full_loop_happy_path() {
2315        let mut al = AutonomousLoop::new("Implement caching").with_max_iterations(3);
2316        al.start().unwrap();
2317        assert_eq!(al.iteration, 1);
2318        assert_eq!(al.phase, LoopPhase::Design);
2319
2320        // Design → Plan
2321        al.advance().unwrap();
2322        assert_eq!(al.phase, LoopPhase::Plan);
2323
2324        // Add tasks
2325        al.add_task(LoopTask::new("T1", "Create cache module").touches("src/cache.rs"));
2326        al.add_task(
2327            LoopTask::new("T2", "Add tests")
2328                .depends_on("T1")
2329                .touches("tests/cache_test.rs"),
2330        );
2331        al.compute_batches().unwrap();
2332        assert_eq!(al.total_batch_count(), 2);
2333
2334        // Plan → Implement
2335        al.advance().unwrap();
2336        assert_eq!(al.phase, LoopPhase::Implement);
2337
2338        // Implement → Verify
2339        al.advance().unwrap();
2340        assert_eq!(al.phase, LoopPhase::Verify);
2341
2342        // Record clean verification
2343        al.record_verification(VerificationResult {
2344            build_passed: true,
2345            tests_passed: true,
2346            type_check_passed: true,
2347            lint_passed: true,
2348            issues: vec![],
2349            timestamp: Utc::now().to_rfc3339(),
2350        });
2351        assert!(al.is_clean());
2352
2353        // Verify → Done
2354        al.advance().unwrap();
2355        assert_eq!(al.phase, LoopPhase::Done);
2356    }
2357
2358    #[test]
2359    fn test_full_loop_with_fix_cycle() {
2360        let mut al = AutonomousLoop::new("Fix bugs").with_max_iterations(4);
2361        al.start().unwrap();
2362
2363        // Fast forward to Verify
2364        al.set_phase(LoopPhase::Verify);
2365
2366        // Record verification with issue
2367        al.record_verification(VerificationResult {
2368            build_passed: false,
2369            tests_passed: false,
2370            type_check_passed: true,
2371            lint_passed: true,
2372            issues: vec![Issue::new(
2373                1,
2374                "Build fails",
2375                IssueSeverity::Critical,
2376                "main.rs:10",
2377            )
2378            .with_evidence("undefined variable")],
2379            timestamp: Utc::now().to_rfc3339(),
2380        });
2381        assert!(!al.is_clean());
2382
2383        // Verify → ReValidate
2384        al.advance().unwrap();
2385        assert_eq!(al.phase, LoopPhase::ReValidate);
2386
2387        // Confirm the issue
2388        al.issues[0].set_verdict(IssueVerdict::Confirmed, "Build output reproduced");
2389
2390        // ReValidate → Fix
2391        al.advance().unwrap();
2392        assert_eq!(al.phase, LoopPhase::Fix);
2393
2394        // Fix the issue
2395        al.issues[0].mark_fixed(Some("fix123".to_string()));
2396        al.record_commit("fix123");
2397
2398        // Fix → Verify (iteration 2)
2399        al.advance().unwrap();
2400        assert_eq!(al.phase, LoopPhase::Verify);
2401        assert_eq!(al.iteration, 2);
2402
2403        // Now record clean verification
2404        al.record_verification(VerificationResult {
2405            build_passed: true,
2406            tests_passed: true,
2407            type_check_passed: true,
2408            lint_passed: true,
2409            issues: vec![],
2410            timestamp: Utc::now().to_rfc3339(),
2411        });
2412
2413        // Verify → Done
2414        al.advance().unwrap();
2415        assert_eq!(al.phase, LoopPhase::Done);
2416    }
2417
2418    // ── Serialization tests ────────────────────────────────────────
2419
2420    #[test]
2421    fn test_loop_serde_roundtrip() {
2422        let mut al = AutonomousLoop::new("Serialize test");
2423        al.start().unwrap();
2424        al.add_task(LoopTask::new("T1", "Do work").touches("src/main.rs"));
2425        al.compute_batches().unwrap();
2426        al.add_issue(Issue::new(1, "Bug", IssueSeverity::Important, "main.rs"));
2427        al.record_commit("abc123");
2428
2429        let json = serde_json::to_string_pretty(&al).unwrap();
2430        let parsed: AutonomousLoop = serde_json::from_str(&json).unwrap();
2431
2432        assert_eq!(parsed.task, al.task);
2433        assert_eq!(parsed.iteration, al.iteration);
2434        assert_eq!(parsed.phase, al.phase);
2435        assert_eq!(parsed.tasks.len(), 1);
2436        assert_eq!(parsed.batches.len(), 1);
2437        assert_eq!(parsed.issues.len(), 1);
2438        assert_eq!(parsed.last_commit, Some("abc123".to_string()));
2439    }
2440
2441    #[test]
2442    fn test_status_serde_roundtrip() {
2443        let al = AutonomousLoop::new("Status test");
2444        let status = al.status();
2445
2446        let json = serde_json::to_string(&status).unwrap();
2447        let parsed: LoopStatus = serde_json::from_str(&json).unwrap();
2448
2449        assert_eq!(parsed.task, status.task);
2450        assert_eq!(parsed.iteration, status.iteration);
2451        assert_eq!(parsed.phase, status.phase);
2452    }
2453
2454    #[test]
2455    fn test_verification_result_serde_roundtrip() {
2456        let result = VerificationResult {
2457            build_passed: true,
2458            tests_passed: false,
2459            type_check_passed: true,
2460            lint_passed: false,
2461            issues: vec![Issue::new(1, "Test fail", IssueSeverity::Important, "test.rs")],
2462            timestamp: Utc::now().to_rfc3339(),
2463        };
2464
2465        let json = serde_json::to_string(&result).unwrap();
2466        let parsed: VerificationResult = serde_json::from_str(&json).unwrap();
2467
2468        assert!(parsed.build_passed);
2469        assert!(!parsed.tests_passed);
2470        assert_eq!(parsed.issues.len(), 1);
2471    }
2472
2473    #[test]
2474    fn test_issue_serde_roundtrip() {
2475        let mut issue = Issue::new(1, "Bug", IssueSeverity::Critical, "main.rs:10")
2476            .with_evidence("error: undefined")
2477            .reproducible(true)
2478            .fix_approach("Add variable declaration");
2479        issue.set_verdict(IssueVerdict::Confirmed, "Reproduced on main");
2480        issue.mark_fixed(Some("fix456".to_string()));
2481
2482        let json = serde_json::to_string(&issue).unwrap();
2483        let parsed: Issue = serde_json::from_str(&json).unwrap();
2484
2485        assert_eq!(parsed.number, 1);
2486        assert_eq!(parsed.severity, IssueSeverity::Critical);
2487        assert!(parsed.reproducible);
2488        assert_eq!(parsed.verdict, Some(IssueVerdict::Confirmed));
2489        assert!(parsed.fixed);
2490        assert_eq!(parsed.fix_commit, Some("fix456".to_string()));
2491    }
2492
2493    #[test]
2494    fn test_loop_task_serde_roundtrip() {
2495        let task = LoopTask::new("T1", "Create module")
2496            .touches("src/mod.rs")
2497            .depends_on("T0")
2498            .verify_with("cargo test");
2499
2500        let json = serde_json::to_string(&task).unwrap();
2501        let parsed: LoopTask = serde_json::from_str(&json).unwrap();
2502
2503        assert_eq!(parsed.id, "T1");
2504        assert_eq!(parsed.touches_files.len(), 1);
2505        assert_eq!(parsed.depends_on, vec!["T0"]);
2506        assert_eq!(parsed.verification, "cargo test");
2507    }
2508}
oxi/skills/autonomous_loop.rs

oxi/skills/
autonomous_loop.rs