Skip to main content

oven_cli/pipeline/
runner.rs

1use std::{
2    collections::HashSet,
3    path::{Path, PathBuf},
4    sync::Arc,
5    time::Duration,
6};
7
8use anyhow::Result;
9use tokio::{sync::Semaphore, task::JoinSet};
10use tokio_util::sync::CancellationToken;
11use tracing::{error, info, warn};
12
13use super::{
14    executor::{PipelineExecutor, PipelineOutcome},
15    graph::DependencyGraph,
16};
17use crate::{
18    agents::Complexity,
19    db::graph::NodeState,
20    git,
21    issues::PipelineIssue,
22    pipeline::{executor::generate_run_id, graph::GraphNode},
23    process::CommandRunner,
24};
25
26/// Shared mutable state for the polling scheduler.
27///
28/// The `DependencyGraph` is the single source of truth for issue states,
29/// dependency edges, and scheduling decisions.
30struct SchedulerState {
31    graph: DependencyGraph,
32    semaphore: Arc<Semaphore>,
33    tasks: JoinSet<(u32, Result<PipelineOutcome>)>,
34}
35
36/// Run the pipeline for a batch of issues using planner-driven sequencing.
37///
38/// Used for the explicit-IDs path (`oven on 42,43`). Calls the planner with no
39/// in-flight context, builds a `DependencyGraph`, then runs layers sequentially
40/// (issues within each layer run in parallel). Falls back to all-parallel if the
41/// planner fails.
42pub async fn run_batch<R: CommandRunner + 'static>(
43    executor: &Arc<PipelineExecutor<R>>,
44    issues: Vec<PipelineIssue>,
45    max_parallel: usize,
46    auto_merge: bool,
47) -> Result<()> {
48    let session_id = generate_run_id();
49    let mut graph = if let Some(plan) = executor.plan_issues(&issues, &[]).await {
50        info!(nodes = plan.nodes.len(), total = plan.total_issues, "planner produced a plan");
51        DependencyGraph::from_planner_output(&session_id, &plan, &issues)
52    } else {
53        warn!("planner failed, falling back to all-parallel execution");
54        let mut g = DependencyGraph::new(&session_id);
55        for issue in &issues {
56            g.add_node(standalone_node(issue));
57        }
58        g
59    };
60
61    save_graph(&graph, executor).await;
62
63    let semaphore = Arc::new(Semaphore::new(max_parallel));
64    let mut had_errors = false;
65
66    while !graph.all_terminal() {
67        let ready = graph.ready_issues();
68        if ready.is_empty() {
69            warn!("no ready issues but graph is not terminal, breaking to avoid infinite loop");
70            save_graph(&graph, executor).await;
71            break;
72        }
73
74        let mut tasks: JoinSet<(u32, Result<PipelineOutcome>)> = JoinSet::new();
75
76        for num in &ready {
77            graph.transition(*num, NodeState::InFlight);
78        }
79        save_graph(&graph, executor).await;
80
81        for num in ready {
82            let node = graph.node(num).expect("ready issue must exist in graph");
83            let issue = node.issue.clone().expect("batch issues have issue attached");
84            let complexity = node.complexity.parse::<Complexity>().ok();
85            let sem = Arc::clone(&semaphore);
86            let exec = Arc::clone(executor);
87
88            tasks.spawn(async move {
89                let permit = match sem.acquire_owned().await {
90                    Ok(p) => p,
91                    Err(e) => return (num, Err(anyhow::anyhow!("semaphore closed: {e}"))),
92                };
93                let result = exec.run_issue_pipeline(&issue, auto_merge, complexity).await;
94                let outcome = match result {
95                    Ok(outcome) => {
96                        if let Err(e) = exec.finalize_merge(&outcome, &issue).await {
97                            warn!(issue = num, error = %e, "finalize_merge failed");
98                        }
99                        Ok(outcome)
100                    }
101                    Err(e) => Err(e),
102                };
103                drop(permit);
104                (num, outcome)
105            });
106        }
107
108        let mut merged_target_dirs: HashSet<PathBuf> = HashSet::new();
109        while let Some(join_result) = tasks.join_next().await {
110            match join_result {
111                Ok((number, Ok(ref outcome))) => {
112                    info!(issue = number, "pipeline completed successfully");
113                    graph.set_pr_number(number, outcome.pr_number);
114                    graph.set_run_id(number, &outcome.run_id);
115                    graph.transition(number, NodeState::Merged);
116                    if auto_merge {
117                        merged_target_dirs.insert(outcome.target_dir.clone());
118                    }
119                }
120                Ok((number, Err(ref e))) => {
121                    error!(issue = number, error = %e, "pipeline failed for issue");
122                    graph.transition(number, NodeState::Failed);
123                    let blocked = graph.propagate_failure(number);
124                    for b in &blocked {
125                        warn!(issue = b, blocked_by = number, "transitively failed");
126                    }
127                    had_errors = true;
128                }
129                Err(e) => {
130                    error!(error = %e, "pipeline task panicked");
131                    had_errors = true;
132                }
133            }
134        }
135
136        // After merges land on the remote, update the local base branch so the
137        // next layer's worktrees fork from post-merge state.
138        if !merged_target_dirs.is_empty() && !graph.all_terminal() {
139            fetch_base_branches(&merged_target_dirs).await;
140        }
141
142        save_graph(&graph, executor).await;
143    }
144
145    if had_errors {
146        anyhow::bail!("one or more pipelines failed in batch");
147    }
148    Ok(())
149}
150
151/// Poll for new issues and run them through the pipeline.
152///
153/// Unlike `run_batch`, this function continuously polls for new issues even while
154/// existing pipelines are running. The `DependencyGraph` is the single source of
155/// truth: `ready_issues()` drives scheduling, `transition()` replaces manual map
156/// mutations, and `propagate_failure()` handles dependency cascades.
157pub async fn polling_loop<R: CommandRunner + 'static>(
158    executor: Arc<PipelineExecutor<R>>,
159    auto_merge: bool,
160    cancel_token: CancellationToken,
161) -> Result<()> {
162    let poll_interval = Duration::from_secs(executor.config.pipeline.poll_interval);
163    let max_parallel = executor.config.pipeline.max_parallel as usize;
164    let ready_label = executor.config.labels.ready.clone();
165
166    // Try loading an existing graph session (crash recovery), or create a new one.
167    let graph = load_or_create_graph(&executor).await;
168
169    let mut sched = SchedulerState {
170        graph,
171        semaphore: Arc::new(Semaphore::new(max_parallel)),
172        tasks: JoinSet::new(),
173    };
174
175    info!(poll_interval_secs = poll_interval.as_secs(), max_parallel, "continuous polling started");
176
177    loop {
178        tokio::select! {
179            () = cancel_token.cancelled() => {
180                info!("shutdown signal received, waiting for in-flight pipelines");
181                drain_tasks(&mut sched, &executor).await;
182                break;
183            }
184            () = tokio::time::sleep(poll_interval) => {
185                poll_and_spawn(&executor, &ready_label, &mut sched, auto_merge).await;
186            }
187            Some(result) = sched.tasks.join_next(), if !sched.tasks.is_empty() => {
188                handle_task_result(result, &mut sched.graph, &executor).await;
189            }
190        }
191    }
192
193    Ok(())
194}
195
196/// Load an existing active graph session from DB, or create a new empty one.
197async fn load_or_create_graph<R: CommandRunner>(
198    executor: &Arc<PipelineExecutor<R>>,
199) -> DependencyGraph {
200    let conn = executor.db.lock().await;
201    match crate::db::graph::get_active_session(&conn) {
202        Ok(Some(session_id)) => match DependencyGraph::from_db(&conn, &session_id) {
203            Ok(graph) => {
204                info!(session_id = %session_id, nodes = graph.node_count(), "resumed existing graph session");
205                return graph;
206            }
207            Err(e) => {
208                warn!(error = %e, "failed to load graph session, starting fresh");
209            }
210        },
211        Ok(None) => {}
212        Err(e) => {
213            warn!(error = %e, "failed to check for active graph session");
214        }
215    }
216    let session_id = generate_run_id();
217    info!(session_id = %session_id, "starting new graph session");
218    DependencyGraph::new(&session_id)
219}
220
221/// Drain remaining tasks on shutdown.
222async fn drain_tasks<R: CommandRunner>(
223    sched: &mut SchedulerState,
224    executor: &Arc<PipelineExecutor<R>>,
225) {
226    while let Some(result) = sched.tasks.join_next().await {
227        handle_task_result(result, &mut sched.graph, executor).await;
228    }
229}
230
231/// Process a completed pipeline task: update graph state and persist.
232async fn handle_task_result<R: CommandRunner>(
233    result: Result<(u32, Result<PipelineOutcome>), tokio::task::JoinError>,
234    graph: &mut DependencyGraph,
235    executor: &Arc<PipelineExecutor<R>>,
236) {
237    match result {
238        Ok((number, Ok(ref outcome))) => {
239            info!(issue = number, "pipeline completed successfully");
240            graph.set_pr_number(number, outcome.pr_number);
241            graph.set_run_id(number, &outcome.run_id);
242            graph.transition(number, NodeState::AwaitingMerge);
243        }
244        Ok((number, Err(ref e))) => {
245            error!(issue = number, error = %e, "pipeline failed for issue");
246            graph.transition(number, NodeState::Failed);
247            let blocked = graph.propagate_failure(number);
248            for b in &blocked {
249                warn!(issue = b, blocked_by = number, "transitively failed");
250            }
251        }
252        Err(e) => {
253            error!(error = %e, "pipeline task panicked");
254            return;
255        }
256    }
257    save_graph(graph, executor).await;
258}
259
260/// Check `AwaitingMerge` nodes and transition them to `Merged` or `Failed`
261/// based on the PR's actual state on GitHub.
262async fn poll_awaiting_merges<R: CommandRunner + 'static>(
263    graph: &mut DependencyGraph,
264    executor: &Arc<PipelineExecutor<R>>,
265) {
266    let awaiting = graph.awaiting_merge();
267    if awaiting.is_empty() {
268        return;
269    }
270
271    let mut merged_target_dirs: HashSet<PathBuf> = HashSet::new();
272    for num in awaiting {
273        let Some(node) = graph.node(num) else { continue };
274        let Some(pr_number) = node.pr_number else {
275            warn!(issue = num, "AwaitingMerge node has no PR number, skipping");
276            continue;
277        };
278        let run_id = node.run_id.clone().unwrap_or_default();
279        let issue = node.issue.clone();
280        let target_repo = node.target_repo.clone();
281
282        // Resolve which repo directory to query for PR state.
283        // Multi-repo PRs live in the target repo, not the god repo.
284        let pr_repo_dir = match executor.resolve_target_dir(target_repo.as_ref()) {
285            Ok((dir, _)) => dir,
286            Err(e) => {
287                warn!(issue = num, error = %e, "failed to resolve target dir for PR state check");
288                continue;
289            }
290        };
291
292        let pr_state = match executor.github.get_pr_state_in(pr_number, &pr_repo_dir).await {
293            Ok(s) => s,
294            Err(e) => {
295                warn!(issue = num, pr = pr_number, error = %e, "failed to check PR state");
296                continue;
297            }
298        };
299
300        match pr_state {
301            crate::github::PrState::Merged => {
302                info!(issue = num, pr = pr_number, "PR merged, finalizing");
303                if let Some(ref issue) = issue {
304                    match executor.reconstruct_outcome(issue, &run_id, pr_number).await {
305                        Ok(outcome) => {
306                            if let Err(e) = executor.finalize_merge(&outcome, issue).await {
307                                warn!(issue = num, error = %e, "finalize_merge after poll failed");
308                            }
309                        }
310                        Err(e) => {
311                            warn!(issue = num, error = %e, "failed to reconstruct outcome");
312                        }
313                    }
314                } else {
315                    warn!(
316                        issue = num,
317                        pr = pr_number,
318                        "node restored from DB has no PipelineIssue, \
319                         skipping finalization (labels and worktree may need manual cleanup)"
320                    );
321                }
322                graph.transition(num, NodeState::Merged);
323                merged_target_dirs.insert(pr_repo_dir);
324            }
325            crate::github::PrState::Closed => {
326                warn!(issue = num, pr = pr_number, "PR closed without merge, marking failed");
327                graph.transition(num, NodeState::Failed);
328                let blocked = graph.propagate_failure(num);
329                for b in &blocked {
330                    warn!(issue = b, blocked_by = num, "transitively failed (PR closed)");
331                }
332            }
333            crate::github::PrState::Open => {
334                // Still open, keep waiting
335            }
336        }
337    }
338
339    // After merges land, update each affected repo's base branch so the next
340    // layer's worktrees fork from post-merge state.
341    if !merged_target_dirs.is_empty() {
342        fetch_base_branches(&merged_target_dirs).await;
343    }
344
345    save_graph(graph, executor).await;
346}
347
348/// Single poll cycle: plan new issues, promote ready ones, and spawn tasks.
349async fn poll_and_spawn<R: CommandRunner + 'static>(
350    executor: &Arc<PipelineExecutor<R>>,
351    ready_label: &str,
352    sched: &mut SchedulerState,
353    auto_merge: bool,
354) {
355    // Check if any AwaitingMerge PRs have been merged
356    poll_awaiting_merges(&mut sched.graph, executor).await;
357
358    let ready_issues = match executor.issues.get_ready_issues(ready_label).await {
359        Ok(i) => i,
360        Err(e) => {
361            error!(error = %e, "failed to fetch issues");
362            return;
363        }
364    };
365
366    let ready_numbers: HashSet<u32> = ready_issues.iter().map(|i| i.number).collect();
367
368    // Clean stale nodes: remove Pending nodes whose issues disappeared from the ready list
369    clean_stale_nodes(&mut sched.graph, &ready_numbers);
370
371    // Filter to genuinely new issues not already in the graph
372    let new_issues: Vec<_> =
373        ready_issues.into_iter().filter(|i| !sched.graph.contains(i.number)).collect();
374
375    // Plan and merge new issues into the graph
376    if !new_issues.is_empty() {
377        info!(count = new_issues.len(), "found new issues to evaluate");
378        let graph_context = sched.graph.to_graph_context();
379
380        if let Some(plan) = executor.plan_issues(&new_issues, &graph_context).await {
381            info!(nodes = plan.nodes.len(), total = plan.total_issues, "planner produced a plan");
382            sched.graph.merge_planner_output(&plan, &new_issues);
383        } else {
384            warn!("planner failed, adding all new issues as independent nodes");
385            add_independent_nodes(&mut sched.graph, &new_issues);
386        }
387
388        save_graph(&sched.graph, executor).await;
389    }
390
391    // Spawn ready issues
392    let to_spawn = collect_ready_issues(&mut sched.graph);
393    if to_spawn.is_empty() {
394        if new_issues.is_empty() {
395            info!("no actionable issues, waiting");
396        }
397        return;
398    }
399
400    save_graph(&sched.graph, executor).await;
401    spawn_issues(to_spawn, executor, sched, auto_merge);
402}
403
404/// Remove graph nodes that are still `Pending` but no longer in the provider's ready list.
405fn clean_stale_nodes(graph: &mut DependencyGraph, ready_numbers: &HashSet<u32>) {
406    let stale: Vec<u32> = graph
407        .all_issues()
408        .into_iter()
409        .filter(|num| {
410            !ready_numbers.contains(num)
411                && graph.node(*num).is_some_and(|n| n.state == NodeState::Pending)
412        })
413        .collect();
414    if !stale.is_empty() {
415        info!(count = stale.len(), "removing stale pending nodes");
416        for num in stale {
417            graph.remove_node(num);
418        }
419    }
420}
421
422/// Add issues to the graph as independent nodes (no edges) when the planner fails.
423fn add_independent_nodes(graph: &mut DependencyGraph, issues: &[PipelineIssue]) {
424    for issue in issues {
425        if !graph.contains(issue.number) {
426            graph.add_node(standalone_node(issue));
427        }
428    }
429}
430
431/// Find ready issues in the graph, transition them to `InFlight`, return spawn data.
432fn collect_ready_issues(graph: &mut DependencyGraph) -> Vec<(u32, PipelineIssue, Complexity)> {
433    let ready = graph.ready_issues();
434    let mut to_spawn = Vec::new();
435
436    for num in ready {
437        let Some(node) = graph.node(num) else { continue };
438        let Some(issue) = node.issue.clone() else {
439            warn!(issue = num, "ready node has no PipelineIssue attached, skipping");
440            continue;
441        };
442        let complexity = node.complexity.parse::<Complexity>().unwrap_or(Complexity::Full);
443        graph.transition(num, NodeState::InFlight);
444        to_spawn.push((num, issue, complexity));
445    }
446
447    to_spawn
448}
449
450/// Spawn pipeline tasks for a set of issues.
451fn spawn_issues<R: CommandRunner + 'static>(
452    to_spawn: Vec<(u32, PipelineIssue, Complexity)>,
453    executor: &Arc<PipelineExecutor<R>>,
454    sched: &mut SchedulerState,
455    auto_merge: bool,
456) {
457    for (number, issue, complexity) in to_spawn {
458        let sem = Arc::clone(&sched.semaphore);
459        let exec = Arc::clone(executor);
460
461        sched.tasks.spawn(async move {
462            let permit = match sem.acquire_owned().await {
463                Ok(p) => p,
464                Err(e) => return (number, Err(anyhow::anyhow!("semaphore closed: {e}"))),
465            };
466            let outcome = exec.run_issue_pipeline(&issue, auto_merge, Some(complexity)).await;
467            drop(permit);
468            (number, outcome)
469        });
470    }
471}
472
473/// Create a `GraphNode` for an issue with no planner metadata.
474fn standalone_node(issue: &PipelineIssue) -> GraphNode {
475    GraphNode {
476        issue_number: issue.number,
477        title: issue.title.clone(),
478        area: String::new(),
479        predicted_files: Vec::new(),
480        has_migration: false,
481        complexity: Complexity::Full.to_string(),
482        state: NodeState::Pending,
483        pr_number: None,
484        run_id: None,
485        target_repo: issue.target_repo.clone(),
486        issue: Some(issue.clone()),
487    }
488}
489
490/// Update the base branch in every repo where merges landed.
491///
492/// Without this, new worktrees created for the next layer would fork from a
493/// stale local ref, causing the implementer to work against pre-merge code.
494/// In multi-repo mode, merges may land in different target repos, so we fetch
495/// the base branch in each distinct repo directory.
496async fn fetch_base_branches(repo_dirs: &HashSet<PathBuf>) {
497    for repo_dir in repo_dirs {
498        fetch_base_branch_in(repo_dir).await;
499    }
500}
501
502/// Fetch the base branch for a single repo directory.
503async fn fetch_base_branch_in(repo_dir: &Path) {
504    match git::default_branch(repo_dir).await {
505        Ok(branch) => {
506            if let Err(e) = git::fetch_branch(repo_dir, &branch).await {
507                warn!(
508                    repo = %repo_dir.display(), error = %e,
509                    "failed to fetch base branch after merge"
510                );
511            } else {
512                info!(
513                    repo = %repo_dir.display(), branch = %branch,
514                    "updated base branch after merge"
515                );
516            }
517        }
518        Err(e) => {
519            warn!(
520                repo = %repo_dir.display(), error = %e,
521                "failed to detect base branch for post-merge fetch"
522            );
523        }
524    }
525}
526
527/// Persist graph state to the database.
528async fn save_graph<R: CommandRunner>(
529    graph: &DependencyGraph,
530    executor: &Arc<PipelineExecutor<R>>,
531) {
532    let conn = executor.db.lock().await;
533    if let Err(e) = graph.save_to_db(&conn) {
534        warn!(error = %e, "failed to persist dependency graph");
535    }
536}
537
538#[cfg(test)]
539mod tests {
540    use std::path::PathBuf;
541
542    use tokio::sync::Mutex;
543
544    use super::*;
545    use crate::{
546        agents::PlannerGraphOutput,
547        config::Config,
548        github::GhClient,
549        issues::{IssueOrigin, IssueProvider, github::GithubIssueProvider},
550        process::{AgentResult, CommandOutput, MockCommandRunner},
551    };
552
553    fn mock_runner_for_batch() -> MockCommandRunner {
554        let mut mock = MockCommandRunner::new();
555        mock.expect_run_gh().returning(|_, _| {
556            Box::pin(async {
557                Ok(CommandOutput {
558                    stdout: "https://github.com/user/repo/pull/1\n".to_string(),
559                    stderr: String::new(),
560                    success: true,
561                })
562            })
563        });
564        mock.expect_run_claude().returning(|_, _, _, _, _| {
565            Box::pin(async {
566                Ok(AgentResult {
567                    cost_usd: 1.0,
568                    duration: Duration::from_secs(5),
569                    turns: 3,
570                    output: r#"{"findings":[],"summary":"clean"}"#.to_string(),
571                    session_id: "sess-1".to_string(),
572                    success: true,
573                })
574            })
575        });
576        mock
577    }
578
579    fn make_github_provider(gh: &Arc<GhClient<MockCommandRunner>>) -> Arc<dyn IssueProvider> {
580        Arc::new(GithubIssueProvider::new(Arc::clone(gh), "target_repo"))
581    }
582
583    fn make_issue(number: u32) -> PipelineIssue {
584        PipelineIssue {
585            number,
586            title: format!("Issue #{number}"),
587            body: String::new(),
588            source: IssueOrigin::Github,
589            target_repo: None,
590            author: None,
591        }
592    }
593
594    #[tokio::test]
595    async fn cancellation_stops_polling() {
596        let cancel = CancellationToken::new();
597        let runner = Arc::new(mock_runner_for_batch());
598        let github = Arc::new(GhClient::new(mock_runner_for_batch(), std::path::Path::new("/tmp")));
599        let issues = make_github_provider(&github);
600        let db = Arc::new(Mutex::new(crate::db::open_in_memory().unwrap()));
601
602        let mut config = Config::default();
603        config.pipeline.poll_interval = 3600; // very long so we don't actually poll
604
605        let executor = Arc::new(PipelineExecutor {
606            runner,
607            github,
608            issues,
609            db,
610            config,
611            cancel_token: cancel.clone(),
612            repo_dir: PathBuf::from("/tmp"),
613        });
614
615        let cancel_clone = cancel.clone();
616        let handle = tokio::spawn(async move { polling_loop(executor, false, cancel_clone).await });
617
618        // Cancel immediately
619        cancel.cancel();
620
621        let result = handle.await.unwrap();
622        assert!(result.is_ok());
623    }
624
625    #[tokio::test]
626    async fn cancellation_exits_within_timeout() {
627        let cancel = CancellationToken::new();
628        let runner = Arc::new(mock_runner_for_batch());
629        let github = Arc::new(GhClient::new(mock_runner_for_batch(), std::path::Path::new("/tmp")));
630        let issues = make_github_provider(&github);
631        let db = Arc::new(Mutex::new(crate::db::open_in_memory().unwrap()));
632
633        let mut config = Config::default();
634        config.pipeline.poll_interval = 3600;
635
636        let executor = Arc::new(PipelineExecutor {
637            runner,
638            github,
639            issues,
640            db,
641            config,
642            cancel_token: cancel.clone(),
643            repo_dir: PathBuf::from("/tmp"),
644        });
645
646        let cancel_clone = cancel.clone();
647        let handle = tokio::spawn(async move { polling_loop(executor, false, cancel_clone).await });
648
649        cancel.cancel();
650
651        let result = tokio::time::timeout(Duration::from_secs(5), handle)
652            .await
653            .expect("polling loop should exit within timeout")
654            .unwrap();
655        assert!(result.is_ok());
656    }
657
658    #[test]
659    fn handle_task_success_transitions_to_awaiting_merge() {
660        let rt = tokio::runtime::Builder::new_current_thread().build().unwrap();
661        rt.block_on(async {
662            let executor = {
663                let runner = Arc::new(mock_runner_for_batch());
664                let github =
665                    Arc::new(GhClient::new(mock_runner_for_batch(), std::path::Path::new("/tmp")));
666                let issues = make_github_provider(&github);
667                let db = Arc::new(Mutex::new(crate::db::open_in_memory().unwrap()));
668                Arc::new(PipelineExecutor {
669                    runner,
670                    github,
671                    issues,
672                    db,
673                    config: Config::default(),
674                    cancel_token: CancellationToken::new(),
675                    repo_dir: PathBuf::from("/tmp"),
676                })
677            };
678
679            let mut graph = DependencyGraph::new("test");
680            graph.add_node(standalone_node(&make_issue(1)));
681            graph.transition(1, NodeState::InFlight);
682
683            let outcome = PipelineOutcome {
684                run_id: "run-abc".to_string(),
685                pr_number: 42,
686                branch: Some("oven/issue-1-abc12345".to_string()),
687                worktree_path: PathBuf::from("/tmp/wt"),
688                target_dir: PathBuf::from("/tmp"),
689            };
690
691            handle_task_result(Ok((1, Ok(outcome))), &mut graph, &executor).await;
692
693            assert_eq!(graph.node(1).unwrap().state, NodeState::AwaitingMerge);
694            assert_eq!(graph.node(1).unwrap().pr_number, Some(42));
695            assert_eq!(graph.node(1).unwrap().run_id.as_deref(), Some("run-abc"));
696        });
697    }
698
699    #[test]
700    fn handle_task_failure_propagates_to_dependents() {
701        let rt = tokio::runtime::Builder::new_current_thread().build().unwrap();
702        rt.block_on(async {
703            let executor = {
704                let runner = Arc::new(mock_runner_for_batch());
705                let github =
706                    Arc::new(GhClient::new(mock_runner_for_batch(), std::path::Path::new("/tmp")));
707                let issues = make_github_provider(&github);
708                let db = Arc::new(Mutex::new(crate::db::open_in_memory().unwrap()));
709                Arc::new(PipelineExecutor {
710                    runner,
711                    github,
712                    issues,
713                    db,
714                    config: Config::default(),
715                    cancel_token: CancellationToken::new(),
716                    repo_dir: PathBuf::from("/tmp"),
717                })
718            };
719
720            let plan = PlannerGraphOutput {
721                nodes: vec![
722                    crate::agents::PlannedNode {
723                        number: 1,
724                        title: "Root".to_string(),
725                        area: "a".to_string(),
726                        predicted_files: vec![],
727                        has_migration: false,
728                        complexity: Complexity::Full,
729                        depends_on: vec![],
730                        reasoning: String::new(),
731                    },
732                    crate::agents::PlannedNode {
733                        number: 2,
734                        title: "Dep".to_string(),
735                        area: "b".to_string(),
736                        predicted_files: vec![],
737                        has_migration: false,
738                        complexity: Complexity::Full,
739                        depends_on: vec![1],
740                        reasoning: String::new(),
741                    },
742                ],
743                total_issues: 2,
744                parallel_capacity: 1,
745            };
746            let issues = vec![make_issue(1), make_issue(2)];
747            let mut graph = DependencyGraph::from_planner_output("test", &plan, &issues);
748            graph.transition(1, NodeState::InFlight);
749
750            handle_task_result(
751                Ok((1, Err(anyhow::anyhow!("pipeline failed")))),
752                &mut graph,
753                &executor,
754            )
755            .await;
756
757            assert_eq!(graph.node(1).unwrap().state, NodeState::Failed);
758            assert_eq!(graph.node(2).unwrap().state, NodeState::Failed);
759        });
760    }
761
762    #[test]
763    fn stale_node_removed_when_issue_disappears() {
764        let mut graph = DependencyGraph::new("test");
765        graph.add_node(standalone_node(&make_issue(1)));
766        graph.add_node(standalone_node(&make_issue(2)));
767        graph.add_node(standalone_node(&make_issue(3)));
768        graph.transition(2, NodeState::InFlight);
769
770        // Only issue 1 and 2 remain in provider; 3 disappeared
771        let ready_numbers: HashSet<u32> = HashSet::from([1, 2]);
772        clean_stale_nodes(&mut graph, &ready_numbers);
773
774        assert!(graph.contains(1)); // still Pending + in ready list
775        assert!(graph.contains(2)); // InFlight, not removed even if not in ready
776        assert!(!graph.contains(3)); // Pending + not in ready = removed
777    }
778
779    #[test]
780    fn collect_ready_issues_transitions_to_in_flight() {
781        let mut graph = DependencyGraph::new("test");
782        graph.add_node(standalone_node(&make_issue(1)));
783        graph.add_node(standalone_node(&make_issue(2)));
784
785        let spawnable = collect_ready_issues(&mut graph);
786        assert_eq!(spawnable.len(), 2);
787
788        // Both should now be InFlight
789        assert_eq!(graph.node(1).unwrap().state, NodeState::InFlight);
790        assert_eq!(graph.node(2).unwrap().state, NodeState::InFlight);
791
792        // No more ready issues
793        assert!(collect_ready_issues(&mut graph).is_empty());
794    }
795
796    #[tokio::test]
797    async fn planner_failure_falls_back_to_all_parallel() {
798        let mut mock = MockCommandRunner::new();
799        mock.expect_run_gh().returning(|_, _| {
800            Box::pin(async {
801                Ok(CommandOutput { stdout: String::new(), stderr: String::new(), success: true })
802            })
803        });
804        mock.expect_run_claude().returning(|_, _, _, _, _| {
805            Box::pin(async {
806                Ok(AgentResult {
807                    cost_usd: 0.5,
808                    duration: Duration::from_secs(2),
809                    turns: 1,
810                    output: "I don't know how to plan".to_string(),
811                    session_id: "sess-plan".to_string(),
812                    success: true,
813                })
814            })
815        });
816
817        let runner = Arc::new(mock);
818        let github = Arc::new(GhClient::new(mock_runner_for_batch(), std::path::Path::new("/tmp")));
819        let issues_provider = make_github_provider(&github);
820        let db = Arc::new(Mutex::new(crate::db::open_in_memory().unwrap()));
821
822        let executor = Arc::new(PipelineExecutor {
823            runner,
824            github,
825            issues: issues_provider,
826            db,
827            config: Config::default(),
828            cancel_token: CancellationToken::new(),
829            repo_dir: PathBuf::from("/tmp"),
830        });
831
832        let issues = vec![PipelineIssue {
833            number: 1,
834            title: "Test".to_string(),
835            body: "body".to_string(),
836            source: IssueOrigin::Github,
837            target_repo: None,
838            author: None,
839        }];
840
841        // plan_issues returns None for unparseable output
842        let plan = executor.plan_issues(&issues, &[]).await;
843        assert!(plan.is_none());
844    }
845
846    #[test]
847    fn graph_persisted_after_state_change() {
848        let rt = tokio::runtime::Builder::new_current_thread().build().unwrap();
849        rt.block_on(async {
850            let db = Arc::new(Mutex::new(crate::db::open_in_memory().unwrap()));
851            let runner = Arc::new(mock_runner_for_batch());
852            let github =
853                Arc::new(GhClient::new(mock_runner_for_batch(), std::path::Path::new("/tmp")));
854            let issues = make_github_provider(&github);
855            let executor = Arc::new(PipelineExecutor {
856                runner,
857                github,
858                issues,
859                db: Arc::clone(&db),
860                config: Config::default(),
861                cancel_token: CancellationToken::new(),
862                repo_dir: PathBuf::from("/tmp"),
863            });
864
865            let mut graph = DependencyGraph::new("persist-test");
866            graph.add_node(standalone_node(&make_issue(1)));
867            graph.transition(1, NodeState::InFlight);
868
869            let outcome = PipelineOutcome {
870                run_id: "run-1".to_string(),
871                pr_number: 10,
872                branch: Some("oven/issue-1-abc12345".to_string()),
873                worktree_path: PathBuf::from("/tmp/wt"),
874                target_dir: PathBuf::from("/tmp"),
875            };
876            handle_task_result(Ok((1, Ok(outcome))), &mut graph, &executor).await;
877
878            // Load from DB and verify
879            let loaded = DependencyGraph::from_db(&*db.lock().await, "persist-test").unwrap();
880            assert_eq!(loaded.node(1).unwrap().state, NodeState::AwaitingMerge);
881            assert_eq!(loaded.node(1).unwrap().pr_number, Some(10));
882        });
883    }
884
885    fn mock_runner_with_pr_state(state: &'static str) -> MockCommandRunner {
886        let mut mock = MockCommandRunner::new();
887        mock.expect_run_gh().returning(move |args, _| {
888            let args = args.to_vec();
889            Box::pin(async move {
890                if args.iter().any(|a| a == "view") {
891                    Ok(CommandOutput {
892                        stdout: format!(r#"{{"state":"{state}"}}"#),
893                        stderr: String::new(),
894                        success: true,
895                    })
896                } else {
897                    Ok(CommandOutput {
898                        stdout: String::new(),
899                        stderr: String::new(),
900                        success: true,
901                    })
902                }
903            })
904        });
905        mock.expect_run_claude().returning(|_, _, _, _, _| {
906            Box::pin(async {
907                Ok(AgentResult {
908                    cost_usd: 0.0,
909                    duration: Duration::from_secs(0),
910                    turns: 0,
911                    output: String::new(),
912                    session_id: String::new(),
913                    success: true,
914                })
915            })
916        });
917        mock
918    }
919
920    fn make_merge_poll_executor(state: &'static str) -> Arc<PipelineExecutor<MockCommandRunner>> {
921        let gh_mock = mock_runner_with_pr_state(state);
922        let github = Arc::new(GhClient::new(gh_mock, std::path::Path::new("/tmp")));
923        let issues = make_github_provider(&github);
924        let db = Arc::new(Mutex::new(crate::db::open_in_memory().unwrap()));
925        let runner = Arc::new(mock_runner_with_pr_state(state));
926        Arc::new(PipelineExecutor {
927            runner,
928            github,
929            issues,
930            db,
931            config: Config::default(),
932            cancel_token: CancellationToken::new(),
933            repo_dir: PathBuf::from("/tmp"),
934        })
935    }
936
937    #[test]
938    fn merge_polling_transitions_merged_pr() {
939        let rt = tokio::runtime::Builder::new_current_thread().enable_all().build().unwrap();
940        rt.block_on(async {
941            let executor = make_merge_poll_executor("MERGED");
942
943            let mut graph = DependencyGraph::new("merge-poll-test");
944            let mut node = standalone_node(&make_issue(1));
945            node.pr_number = Some(42);
946            node.run_id = Some("run-1".to_string());
947            graph.add_node(node);
948            graph.transition(1, NodeState::AwaitingMerge);
949
950            poll_awaiting_merges(&mut graph, &executor).await;
951
952            assert_eq!(graph.node(1).unwrap().state, NodeState::Merged);
953        });
954    }
955
956    #[test]
957    fn merge_polling_transitions_node_without_issue() {
958        let rt = tokio::runtime::Builder::new_current_thread().enable_all().build().unwrap();
959        rt.block_on(async {
960            let executor = make_merge_poll_executor("MERGED");
961
962            let mut graph = DependencyGraph::new("db-restore-test");
963            // Simulate a node restored from DB (no PipelineIssue attached)
964            let mut node = GraphNode {
965                issue_number: 1,
966                title: "Issue #1".to_string(),
967                area: "test".to_string(),
968                predicted_files: vec![],
969                has_migration: false,
970                complexity: "full".to_string(),
971                state: NodeState::Pending,
972                pr_number: Some(42),
973                run_id: Some("run-1".to_string()),
974                issue: None,
975                target_repo: None,
976            };
977            node.state = NodeState::Pending;
978            graph.add_node(node);
979            graph.transition(1, NodeState::AwaitingMerge);
980
981            poll_awaiting_merges(&mut graph, &executor).await;
982
983            // Should still transition to Merged even without issue data
984            assert_eq!(graph.node(1).unwrap().state, NodeState::Merged);
985        });
986    }
987
988    #[test]
989    fn merge_polling_handles_closed_pr() {
990        let rt = tokio::runtime::Builder::new_current_thread().build().unwrap();
991        rt.block_on(async {
992            let executor = make_merge_poll_executor("CLOSED");
993
994            let plan = PlannerGraphOutput {
995                nodes: vec![
996                    crate::agents::PlannedNode {
997                        number: 1,
998                        title: "Root".to_string(),
999                        area: "a".to_string(),
1000                        predicted_files: vec![],
1001                        has_migration: false,
1002                        complexity: Complexity::Full,
1003                        depends_on: vec![],
1004                        reasoning: String::new(),
1005                    },
1006                    crate::agents::PlannedNode {
1007                        number: 2,
1008                        title: "Dep".to_string(),
1009                        area: "b".to_string(),
1010                        predicted_files: vec![],
1011                        has_migration: false,
1012                        complexity: Complexity::Full,
1013                        depends_on: vec![1],
1014                        reasoning: String::new(),
1015                    },
1016                ],
1017                total_issues: 2,
1018                parallel_capacity: 1,
1019            };
1020            let test_issues = vec![make_issue(1), make_issue(2)];
1021            let mut graph =
1022                DependencyGraph::from_planner_output("merge-poll-close", &plan, &test_issues);
1023            graph.transition(1, NodeState::AwaitingMerge);
1024            graph.set_pr_number(1, 42);
1025            graph.set_run_id(1, "run-1");
1026
1027            poll_awaiting_merges(&mut graph, &executor).await;
1028
1029            assert_eq!(graph.node(1).unwrap().state, NodeState::Failed);
1030            // Dependent should be transitively failed
1031            assert_eq!(graph.node(2).unwrap().state, NodeState::Failed);
1032        });
1033    }
1034
1035    #[test]
1036    fn merge_unlocks_dependent() {
1037        let rt = tokio::runtime::Builder::new_current_thread().enable_all().build().unwrap();
1038        rt.block_on(async {
1039            let executor = make_merge_poll_executor("MERGED");
1040
1041            let plan = PlannerGraphOutput {
1042                nodes: vec![
1043                    crate::agents::PlannedNode {
1044                        number: 1,
1045                        title: "Root".to_string(),
1046                        area: "a".to_string(),
1047                        predicted_files: vec![],
1048                        has_migration: false,
1049                        complexity: Complexity::Full,
1050                        depends_on: vec![],
1051                        reasoning: String::new(),
1052                    },
1053                    crate::agents::PlannedNode {
1054                        number: 2,
1055                        title: "Dep".to_string(),
1056                        area: "b".to_string(),
1057                        predicted_files: vec![],
1058                        has_migration: false,
1059                        complexity: Complexity::Full,
1060                        depends_on: vec![1],
1061                        reasoning: String::new(),
1062                    },
1063                ],
1064                total_issues: 2,
1065                parallel_capacity: 1,
1066            };
1067            let test_issues = vec![make_issue(1), make_issue(2)];
1068            let mut graph =
1069                DependencyGraph::from_planner_output("merge-unlock", &plan, &test_issues);
1070            graph.transition(1, NodeState::AwaitingMerge);
1071            graph.set_pr_number(1, 42);
1072            graph.set_run_id(1, "run-1");
1073
1074            // Before polling: node 2 is not ready (dep 1 is AwaitingMerge)
1075            assert!(graph.ready_issues().is_empty());
1076
1077            poll_awaiting_merges(&mut graph, &executor).await;
1078
1079            // After polling: node 1 merged, node 2 should now be ready
1080            assert_eq!(graph.node(1).unwrap().state, NodeState::Merged);
1081            assert_eq!(graph.ready_issues(), vec![2]);
1082        });
1083    }
1084
1085    #[tokio::test]
1086    async fn fetch_base_branches_handles_multiple_repos() {
1087        // Create two independent repos with remotes so `default_branch` and
1088        // `fetch_branch` work.
1089        async fn repo_with_remote() -> (tempfile::TempDir, tempfile::TempDir) {
1090            use tokio::process::Command as TokioCmd;
1091
1092            let dir = tempfile::tempdir().unwrap();
1093            for (args, cwd) in [
1094                (vec!["init"], dir.path()),
1095                (vec!["config", "user.email", "test@test.com"], dir.path()),
1096                (vec!["config", "user.name", "Test"], dir.path()),
1097            ] {
1098                TokioCmd::new("git").args(&args).current_dir(cwd).output().await.unwrap();
1099            }
1100            tokio::fs::write(dir.path().join("README.md"), "init").await.unwrap();
1101            TokioCmd::new("git").args(["add", "."]).current_dir(dir.path()).output().await.unwrap();
1102            TokioCmd::new("git")
1103                .args(["commit", "-m", "init"])
1104                .current_dir(dir.path())
1105                .output()
1106                .await
1107                .unwrap();
1108
1109            let remote = tempfile::tempdir().unwrap();
1110            TokioCmd::new("git")
1111                .args(["clone", "--bare", dir.path().to_string_lossy().as_ref(), "."])
1112                .current_dir(remote.path())
1113                .output()
1114                .await
1115                .unwrap();
1116            TokioCmd::new("git")
1117                .args(["remote", "add", "origin", remote.path().to_string_lossy().as_ref()])
1118                .current_dir(dir.path())
1119                .output()
1120                .await
1121                .unwrap();
1122            TokioCmd::new("git")
1123                .args(["fetch", "origin"])
1124                .current_dir(dir.path())
1125                .output()
1126                .await
1127                .unwrap();
1128
1129            (dir, remote)
1130        }
1131
1132        let (repo_a, _remote_a) = repo_with_remote().await;
1133        let (repo_b, _remote_b) = repo_with_remote().await;
1134
1135        let mut dirs = HashSet::new();
1136        dirs.insert(repo_a.path().to_path_buf());
1137        dirs.insert(repo_b.path().to_path_buf());
1138
1139        // Should not panic or error -- just fetches both repos
1140        fetch_base_branches(&dirs).await;
1141    }
1142
1143    #[tokio::test]
1144    async fn fetch_base_branches_skips_invalid_repo_gracefully() {
1145        let mut dirs = HashSet::new();
1146        dirs.insert(PathBuf::from("/tmp/nonexistent-repo-12345"));
1147
1148        // Should log a warning but not panic
1149        fetch_base_branches(&dirs).await;
1150    }
1151}