Skip to main content

oven_cli/pipeline/
runner.rs

1use std::{
2    collections::HashSet,
3    path::{Path, PathBuf},
4    sync::Arc,
5    time::Duration,
6};
7
8use anyhow::Result;
9use tokio::{sync::Semaphore, task::JoinSet};
10use tokio_util::sync::CancellationToken;
11use tracing::{error, info, warn};
12
13use super::{
14    executor::{PipelineExecutor, PipelineOutcome},
15    graph::DependencyGraph,
16};
17use crate::{
18    agents::Complexity,
19    db::graph::NodeState,
20    git,
21    issues::PipelineIssue,
22    pipeline::{executor::generate_run_id, graph::GraphNode},
23    process::CommandRunner,
24};
25
26/// Shared mutable state for the polling scheduler.
27///
28/// The `DependencyGraph` is the single source of truth for issue states,
29/// dependency edges, and scheduling decisions.
30struct SchedulerState {
31    graph: DependencyGraph,
32    semaphore: Arc<Semaphore>,
33    tasks: JoinSet<(u32, Result<PipelineOutcome>)>,
34}
35
36/// Run the pipeline for a batch of issues using planner-driven sequencing.
37///
38/// Used for the explicit-IDs path (`oven on 42,43`). Calls the planner with no
39/// in-flight context, builds a `DependencyGraph`, then runs layers sequentially
40/// (issues within each layer run in parallel). Falls back to all-parallel if the
41/// planner fails.
42pub async fn run_batch<R: CommandRunner + 'static>(
43    executor: &Arc<PipelineExecutor<R>>,
44    issues: Vec<PipelineIssue>,
45    max_parallel: usize,
46    auto_merge: bool,
47) -> Result<()> {
48    let session_id = generate_run_id();
49    let mut graph = if let Some(plan) = executor.plan_issues(&issues, &[]).await {
50        info!(nodes = plan.nodes.len(), total = plan.total_issues, "planner produced a plan");
51        DependencyGraph::from_planner_output(&session_id, &plan, &issues)
52    } else {
53        warn!("planner failed, falling back to all-parallel execution");
54        let mut g = DependencyGraph::new(&session_id);
55        for issue in &issues {
56            g.add_node(standalone_node(issue));
57        }
58        g
59    };
60
61    save_graph(&graph, executor).await;
62
63    let semaphore = Arc::new(Semaphore::new(max_parallel));
64    let mut had_errors = false;
65
66    while !graph.all_terminal() {
67        let ready = graph.ready_issues();
68        if ready.is_empty() {
69            warn!("no ready issues but graph is not terminal, breaking to avoid infinite loop");
70            save_graph(&graph, executor).await;
71            break;
72        }
73
74        let mut tasks: JoinSet<(u32, Result<PipelineOutcome>)> = JoinSet::new();
75
76        for num in &ready {
77            graph.transition(*num, NodeState::InFlight);
78        }
79        save_graph(&graph, executor).await;
80
81        for num in ready {
82            let node = graph.node(num).expect("ready issue must exist in graph");
83            let issue = node.issue.clone().expect("batch issues have issue attached");
84            let complexity = node.complexity.parse::<Complexity>().ok();
85            let sem = Arc::clone(&semaphore);
86            let exec = Arc::clone(executor);
87
88            tasks.spawn(async move {
89                let permit = match sem.acquire_owned().await {
90                    Ok(p) => p,
91                    Err(e) => return (num, Err(anyhow::anyhow!("semaphore closed: {e}"))),
92                };
93                let result = exec.run_issue_pipeline(&issue, auto_merge, complexity).await;
94                let outcome = match result {
95                    Ok(outcome) => {
96                        if let Err(e) = exec.finalize_merge(&outcome, &issue).await {
97                            warn!(issue = num, error = %e, "finalize_merge failed");
98                        }
99                        Ok(outcome)
100                    }
101                    Err(e) => Err(e),
102                };
103                drop(permit);
104                (num, outcome)
105            });
106        }
107
108        let mut merged_target_dirs: HashSet<PathBuf> = HashSet::new();
109        while let Some(join_result) = tasks.join_next().await {
110            match join_result {
111                Ok((number, Ok(ref outcome))) => {
112                    info!(issue = number, "pipeline completed successfully");
113                    graph.set_pr_number(number, outcome.pr_number);
114                    graph.set_run_id(number, &outcome.run_id);
115                    graph.transition(number, NodeState::Merged);
116                    if auto_merge {
117                        merged_target_dirs.insert(outcome.target_dir.clone());
118                    }
119                }
120                Ok((number, Err(ref e))) => {
121                    error!(issue = number, error = %e, "pipeline failed for issue");
122                    graph.transition(number, NodeState::Failed);
123                    let blocked = graph.propagate_failure(number);
124                    for b in &blocked {
125                        warn!(issue = b, blocked_by = number, "transitively failed");
126                    }
127                    had_errors = true;
128                }
129                Err(e) => {
130                    error!(error = %e, "pipeline task panicked");
131                    had_errors = true;
132                }
133            }
134        }
135
136        // After merges land on the remote, update the local base branch so the
137        // next layer's worktrees fork from post-merge state.
138        if !merged_target_dirs.is_empty() && !graph.all_terminal() {
139            fetch_base_branches(&merged_target_dirs).await;
140        }
141
142        save_graph(&graph, executor).await;
143    }
144
145    if had_errors {
146        anyhow::bail!("one or more pipelines failed in batch");
147    }
148    Ok(())
149}
150
151/// Poll for new issues and run them through the pipeline.
152///
153/// Unlike `run_batch`, this function continuously polls for new issues even while
154/// existing pipelines are running. The `DependencyGraph` is the single source of
155/// truth: `ready_issues()` drives scheduling, `transition()` replaces manual map
156/// mutations, and `propagate_failure()` handles dependency cascades.
157pub async fn polling_loop<R: CommandRunner + 'static>(
158    executor: Arc<PipelineExecutor<R>>,
159    auto_merge: bool,
160    cancel_token: CancellationToken,
161) -> Result<()> {
162    let poll_interval = Duration::from_secs(executor.config.pipeline.poll_interval);
163    let max_parallel = executor.config.pipeline.max_parallel as usize;
164    let ready_label = executor.config.labels.ready.clone();
165
166    // Try loading an existing graph session (crash recovery), or create a new one.
167    let graph = load_or_create_graph(&executor).await;
168
169    let mut sched = SchedulerState {
170        graph,
171        semaphore: Arc::new(Semaphore::new(max_parallel)),
172        tasks: JoinSet::new(),
173    };
174
175    info!(poll_interval_secs = poll_interval.as_secs(), max_parallel, "continuous polling started");
176
177    loop {
178        tokio::select! {
179            () = cancel_token.cancelled() => {
180                info!("shutdown signal received, waiting for in-flight pipelines");
181                drain_tasks(&mut sched, &executor).await;
182                break;
183            }
184            () = tokio::time::sleep(poll_interval) => {
185                poll_and_spawn(&executor, &ready_label, &mut sched, auto_merge).await;
186            }
187            Some(result) = sched.tasks.join_next(), if !sched.tasks.is_empty() => {
188                handle_task_result(result, &mut sched.graph, &executor).await;
189            }
190        }
191    }
192
193    Ok(())
194}
195
196/// Load an existing active graph session from DB, or create a new empty one.
197async fn load_or_create_graph<R: CommandRunner>(
198    executor: &Arc<PipelineExecutor<R>>,
199) -> DependencyGraph {
200    let conn = executor.db.lock().await;
201    match crate::db::graph::get_active_session(&conn) {
202        Ok(Some(session_id)) => match DependencyGraph::from_db(&conn, &session_id) {
203            Ok(graph) => {
204                info!(session_id = %session_id, nodes = graph.node_count(), "resumed existing graph session");
205                return graph;
206            }
207            Err(e) => {
208                warn!(error = %e, "failed to load graph session, starting fresh");
209            }
210        },
211        Ok(None) => {}
212        Err(e) => {
213            warn!(error = %e, "failed to check for active graph session");
214        }
215    }
216    let session_id = generate_run_id();
217    info!(session_id = %session_id, "starting new graph session");
218    DependencyGraph::new(&session_id)
219}
220
221/// Drain remaining tasks on shutdown.
222async fn drain_tasks<R: CommandRunner>(
223    sched: &mut SchedulerState,
224    executor: &Arc<PipelineExecutor<R>>,
225) {
226    while let Some(result) = sched.tasks.join_next().await {
227        handle_task_result(result, &mut sched.graph, executor).await;
228    }
229}
230
231/// Process a completed pipeline task: update graph state and persist.
232async fn handle_task_result<R: CommandRunner>(
233    result: Result<(u32, Result<PipelineOutcome>), tokio::task::JoinError>,
234    graph: &mut DependencyGraph,
235    executor: &Arc<PipelineExecutor<R>>,
236) {
237    match result {
238        Ok((number, Ok(ref outcome))) => {
239            info!(issue = number, "pipeline completed successfully");
240            graph.set_pr_number(number, outcome.pr_number);
241            graph.set_run_id(number, &outcome.run_id);
242            graph.transition(number, NodeState::AwaitingMerge);
243        }
244        Ok((number, Err(ref e))) => {
245            error!(issue = number, error = %e, "pipeline failed for issue");
246            graph.transition(number, NodeState::Failed);
247            let blocked = graph.propagate_failure(number);
248            for b in &blocked {
249                warn!(issue = b, blocked_by = number, "transitively failed");
250            }
251        }
252        Err(e) => {
253            error!(error = %e, "pipeline task panicked");
254            return;
255        }
256    }
257    save_graph(graph, executor).await;
258}
259
260/// Check `AwaitingMerge` nodes and transition them to `Merged` or `Failed`
261/// based on the PR's actual state on GitHub.
262async fn poll_awaiting_merges<R: CommandRunner + 'static>(
263    graph: &mut DependencyGraph,
264    executor: &Arc<PipelineExecutor<R>>,
265) {
266    let awaiting = graph.awaiting_merge();
267    if awaiting.is_empty() {
268        return;
269    }
270
271    let mut merged_target_dirs: HashSet<PathBuf> = HashSet::new();
272    for num in awaiting {
273        let Some(node) = graph.node(num) else { continue };
274        let Some(pr_number) = node.pr_number else {
275            warn!(issue = num, "AwaitingMerge node has no PR number, skipping");
276            continue;
277        };
278        let run_id = node.run_id.clone().unwrap_or_default();
279        let issue = node.issue.clone();
280        let target_repo = node.target_repo.clone();
281
282        // Resolve which repo directory to query for PR state.
283        // Multi-repo PRs live in the target repo, not the god repo.
284        let pr_repo_dir = match executor.resolve_target_dir(target_repo.as_ref()) {
285            Ok((dir, _)) => dir,
286            Err(e) => {
287                warn!(issue = num, error = %e, "failed to resolve target dir for PR state check");
288                continue;
289            }
290        };
291
292        let pr_state = match executor.github.get_pr_state_in(pr_number, &pr_repo_dir).await {
293            Ok(s) => s,
294            Err(e) => {
295                warn!(issue = num, pr = pr_number, error = %e, "failed to check PR state");
296                continue;
297            }
298        };
299
300        match pr_state {
301            crate::github::PrState::Merged => {
302                info!(issue = num, pr = pr_number, "PR merged, finalizing");
303                if let Some(ref issue) = issue {
304                    match executor.reconstruct_outcome(issue, &run_id, pr_number) {
305                        Ok(outcome) => {
306                            if let Err(e) = executor.finalize_merge(&outcome, issue).await {
307                                warn!(issue = num, error = %e, "finalize_merge after poll failed");
308                            }
309                        }
310                        Err(e) => {
311                            warn!(issue = num, error = %e, "failed to reconstruct outcome");
312                        }
313                    }
314                } else {
315                    warn!(
316                        issue = num,
317                        pr = pr_number,
318                        "node restored from DB has no PipelineIssue, \
319                         skipping finalization (labels and worktree may need manual cleanup)"
320                    );
321                }
322                graph.transition(num, NodeState::Merged);
323                merged_target_dirs.insert(pr_repo_dir);
324            }
325            crate::github::PrState::Closed => {
326                warn!(issue = num, pr = pr_number, "PR closed without merge, marking failed");
327                graph.transition(num, NodeState::Failed);
328                let blocked = graph.propagate_failure(num);
329                for b in &blocked {
330                    warn!(issue = b, blocked_by = num, "transitively failed (PR closed)");
331                }
332            }
333            crate::github::PrState::Open => {
334                // Still open, keep waiting
335            }
336        }
337    }
338
339    // After merges land, update each affected repo's base branch so the next
340    // layer's worktrees fork from post-merge state.
341    if !merged_target_dirs.is_empty() {
342        fetch_base_branches(&merged_target_dirs).await;
343    }
344
345    save_graph(graph, executor).await;
346}
347
348/// Single poll cycle: plan new issues, promote ready ones, and spawn tasks.
349async fn poll_and_spawn<R: CommandRunner + 'static>(
350    executor: &Arc<PipelineExecutor<R>>,
351    ready_label: &str,
352    sched: &mut SchedulerState,
353    auto_merge: bool,
354) {
355    // Check if any AwaitingMerge PRs have been merged
356    poll_awaiting_merges(&mut sched.graph, executor).await;
357
358    let ready_issues = match executor.issues.get_ready_issues(ready_label).await {
359        Ok(i) => i,
360        Err(e) => {
361            error!(error = %e, "failed to fetch issues");
362            return;
363        }
364    };
365
366    let ready_numbers: HashSet<u32> = ready_issues.iter().map(|i| i.number).collect();
367
368    // Clean stale nodes: remove Pending nodes whose issues disappeared from the ready list
369    clean_stale_nodes(&mut sched.graph, &ready_numbers);
370
371    // Filter to genuinely new issues not already in the graph
372    let new_issues: Vec<_> =
373        ready_issues.into_iter().filter(|i| !sched.graph.contains(i.number)).collect();
374
375    // Plan and merge new issues into the graph
376    if !new_issues.is_empty() {
377        info!(count = new_issues.len(), "found new issues to evaluate");
378        let graph_context = sched.graph.to_graph_context();
379
380        if let Some(plan) = executor.plan_issues(&new_issues, &graph_context).await {
381            info!(nodes = plan.nodes.len(), total = plan.total_issues, "planner produced a plan");
382            sched.graph.merge_planner_output(&plan, &new_issues);
383        } else {
384            warn!("planner failed, adding all new issues as independent nodes");
385            add_independent_nodes(&mut sched.graph, &new_issues);
386        }
387
388        save_graph(&sched.graph, executor).await;
389    }
390
391    // Spawn ready issues
392    let to_spawn = collect_ready_issues(&mut sched.graph);
393    if to_spawn.is_empty() {
394        if new_issues.is_empty() {
395            info!("no actionable issues, waiting");
396        }
397        return;
398    }
399
400    save_graph(&sched.graph, executor).await;
401    spawn_issues(to_spawn, executor, sched, auto_merge);
402}
403
404/// Remove graph nodes that are still `Pending` but no longer in the provider's ready list.
405fn clean_stale_nodes(graph: &mut DependencyGraph, ready_numbers: &HashSet<u32>) {
406    let stale: Vec<u32> = graph
407        .all_issues()
408        .into_iter()
409        .filter(|num| {
410            !ready_numbers.contains(num)
411                && graph.node(*num).is_some_and(|n| n.state == NodeState::Pending)
412        })
413        .collect();
414    if !stale.is_empty() {
415        info!(count = stale.len(), "removing stale pending nodes");
416        for num in stale {
417            graph.remove_node(num);
418        }
419    }
420}
421
422/// Add issues to the graph as independent nodes (no edges) when the planner fails.
423fn add_independent_nodes(graph: &mut DependencyGraph, issues: &[PipelineIssue]) {
424    for issue in issues {
425        if !graph.contains(issue.number) {
426            graph.add_node(standalone_node(issue));
427        }
428    }
429}
430
431/// Find ready issues in the graph, transition them to `InFlight`, return spawn data.
432fn collect_ready_issues(graph: &mut DependencyGraph) -> Vec<(u32, PipelineIssue, Complexity)> {
433    let ready = graph.ready_issues();
434    let mut to_spawn = Vec::new();
435
436    for num in ready {
437        let Some(node) = graph.node(num) else { continue };
438        let Some(issue) = node.issue.clone() else {
439            warn!(issue = num, "ready node has no PipelineIssue attached, skipping");
440            continue;
441        };
442        let complexity = node.complexity.parse::<Complexity>().unwrap_or(Complexity::Full);
443        graph.transition(num, NodeState::InFlight);
444        to_spawn.push((num, issue, complexity));
445    }
446
447    to_spawn
448}
449
450/// Spawn pipeline tasks for a set of issues.
451fn spawn_issues<R: CommandRunner + 'static>(
452    to_spawn: Vec<(u32, PipelineIssue, Complexity)>,
453    executor: &Arc<PipelineExecutor<R>>,
454    sched: &mut SchedulerState,
455    auto_merge: bool,
456) {
457    for (number, issue, complexity) in to_spawn {
458        let sem = Arc::clone(&sched.semaphore);
459        let exec = Arc::clone(executor);
460
461        sched.tasks.spawn(async move {
462            let permit = match sem.acquire_owned().await {
463                Ok(p) => p,
464                Err(e) => return (number, Err(anyhow::anyhow!("semaphore closed: {e}"))),
465            };
466            let outcome = exec.run_issue_pipeline(&issue, auto_merge, Some(complexity)).await;
467            drop(permit);
468            (number, outcome)
469        });
470    }
471}
472
473/// Create a `GraphNode` for an issue with no planner metadata.
474fn standalone_node(issue: &PipelineIssue) -> GraphNode {
475    GraphNode {
476        issue_number: issue.number,
477        title: issue.title.clone(),
478        area: String::new(),
479        predicted_files: Vec::new(),
480        has_migration: false,
481        complexity: Complexity::Full.to_string(),
482        state: NodeState::Pending,
483        pr_number: None,
484        run_id: None,
485        target_repo: issue.target_repo.clone(),
486        issue: Some(issue.clone()),
487    }
488}
489
490/// Update the base branch in every repo where merges landed.
491///
492/// Without this, new worktrees created for the next layer would fork from a
493/// stale local ref, causing the implementer to work against pre-merge code.
494/// In multi-repo mode, merges may land in different target repos, so we fetch
495/// the base branch in each distinct repo directory.
496async fn fetch_base_branches(repo_dirs: &HashSet<PathBuf>) {
497    for repo_dir in repo_dirs {
498        fetch_base_branch_in(repo_dir).await;
499    }
500}
501
502/// Fetch the base branch for a single repo directory.
503async fn fetch_base_branch_in(repo_dir: &Path) {
504    match git::default_branch(repo_dir).await {
505        Ok(branch) => {
506            if let Err(e) = git::fetch_branch(repo_dir, &branch).await {
507                warn!(
508                    repo = %repo_dir.display(), error = %e,
509                    "failed to fetch base branch after merge"
510                );
511            } else {
512                info!(
513                    repo = %repo_dir.display(), branch = %branch,
514                    "updated base branch after merge"
515                );
516            }
517        }
518        Err(e) => {
519            warn!(
520                repo = %repo_dir.display(), error = %e,
521                "failed to detect base branch for post-merge fetch"
522            );
523        }
524    }
525}
526
527/// Persist graph state to the database.
528async fn save_graph<R: CommandRunner>(
529    graph: &DependencyGraph,
530    executor: &Arc<PipelineExecutor<R>>,
531) {
532    let conn = executor.db.lock().await;
533    if let Err(e) = graph.save_to_db(&conn) {
534        warn!(error = %e, "failed to persist dependency graph");
535    }
536}
537
538#[cfg(test)]
539mod tests {
540    use std::path::PathBuf;
541
542    use tokio::sync::Mutex;
543
544    use super::*;
545    use crate::{
546        agents::PlannerGraphOutput,
547        config::Config,
548        github::GhClient,
549        issues::{IssueOrigin, IssueProvider, github::GithubIssueProvider},
550        process::{AgentResult, CommandOutput, MockCommandRunner},
551    };
552
553    fn mock_runner_for_batch() -> MockCommandRunner {
554        let mut mock = MockCommandRunner::new();
555        mock.expect_run_gh().returning(|_, _| {
556            Box::pin(async {
557                Ok(CommandOutput {
558                    stdout: "https://github.com/user/repo/pull/1\n".to_string(),
559                    stderr: String::new(),
560                    success: true,
561                })
562            })
563        });
564        mock.expect_run_claude().returning(|_, _, _, _, _| {
565            Box::pin(async {
566                Ok(AgentResult {
567                    cost_usd: 1.0,
568                    duration: Duration::from_secs(5),
569                    turns: 3,
570                    output: r#"{"findings":[],"summary":"clean"}"#.to_string(),
571                    session_id: "sess-1".to_string(),
572                    success: true,
573                })
574            })
575        });
576        mock
577    }
578
579    fn make_github_provider(gh: &Arc<GhClient<MockCommandRunner>>) -> Arc<dyn IssueProvider> {
580        Arc::new(GithubIssueProvider::new(Arc::clone(gh), "target_repo"))
581    }
582
583    fn make_issue(number: u32) -> PipelineIssue {
584        PipelineIssue {
585            number,
586            title: format!("Issue #{number}"),
587            body: String::new(),
588            source: IssueOrigin::Github,
589            target_repo: None,
590            author: None,
591        }
592    }
593
594    #[tokio::test]
595    async fn cancellation_stops_polling() {
596        let cancel = CancellationToken::new();
597        let runner = Arc::new(mock_runner_for_batch());
598        let github = Arc::new(GhClient::new(mock_runner_for_batch(), std::path::Path::new("/tmp")));
599        let issues = make_github_provider(&github);
600        let db = Arc::new(Mutex::new(crate::db::open_in_memory().unwrap()));
601
602        let mut config = Config::default();
603        config.pipeline.poll_interval = 3600; // very long so we don't actually poll
604
605        let executor = Arc::new(PipelineExecutor {
606            runner,
607            github,
608            issues,
609            db,
610            config,
611            cancel_token: cancel.clone(),
612            repo_dir: PathBuf::from("/tmp"),
613        });
614
615        let cancel_clone = cancel.clone();
616        let handle = tokio::spawn(async move { polling_loop(executor, false, cancel_clone).await });
617
618        // Cancel immediately
619        cancel.cancel();
620
621        let result = handle.await.unwrap();
622        assert!(result.is_ok());
623    }
624
625    #[tokio::test]
626    async fn cancellation_exits_within_timeout() {
627        let cancel = CancellationToken::new();
628        let runner = Arc::new(mock_runner_for_batch());
629        let github = Arc::new(GhClient::new(mock_runner_for_batch(), std::path::Path::new("/tmp")));
630        let issues = make_github_provider(&github);
631        let db = Arc::new(Mutex::new(crate::db::open_in_memory().unwrap()));
632
633        let mut config = Config::default();
634        config.pipeline.poll_interval = 3600;
635
636        let executor = Arc::new(PipelineExecutor {
637            runner,
638            github,
639            issues,
640            db,
641            config,
642            cancel_token: cancel.clone(),
643            repo_dir: PathBuf::from("/tmp"),
644        });
645
646        let cancel_clone = cancel.clone();
647        let handle = tokio::spawn(async move { polling_loop(executor, false, cancel_clone).await });
648
649        cancel.cancel();
650
651        let result = tokio::time::timeout(Duration::from_secs(5), handle)
652            .await
653            .expect("polling loop should exit within timeout")
654            .unwrap();
655        assert!(result.is_ok());
656    }
657
658    #[test]
659    fn handle_task_success_transitions_to_awaiting_merge() {
660        let rt = tokio::runtime::Builder::new_current_thread().build().unwrap();
661        rt.block_on(async {
662            let executor = {
663                let runner = Arc::new(mock_runner_for_batch());
664                let github =
665                    Arc::new(GhClient::new(mock_runner_for_batch(), std::path::Path::new("/tmp")));
666                let issues = make_github_provider(&github);
667                let db = Arc::new(Mutex::new(crate::db::open_in_memory().unwrap()));
668                Arc::new(PipelineExecutor {
669                    runner,
670                    github,
671                    issues,
672                    db,
673                    config: Config::default(),
674                    cancel_token: CancellationToken::new(),
675                    repo_dir: PathBuf::from("/tmp"),
676                })
677            };
678
679            let mut graph = DependencyGraph::new("test");
680            graph.add_node(standalone_node(&make_issue(1)));
681            graph.transition(1, NodeState::InFlight);
682
683            let outcome = PipelineOutcome {
684                run_id: "run-abc".to_string(),
685                pr_number: 42,
686                worktree_path: PathBuf::from("/tmp/wt"),
687                target_dir: PathBuf::from("/tmp"),
688            };
689
690            handle_task_result(Ok((1, Ok(outcome))), &mut graph, &executor).await;
691
692            assert_eq!(graph.node(1).unwrap().state, NodeState::AwaitingMerge);
693            assert_eq!(graph.node(1).unwrap().pr_number, Some(42));
694            assert_eq!(graph.node(1).unwrap().run_id.as_deref(), Some("run-abc"));
695        });
696    }
697
698    #[test]
699    fn handle_task_failure_propagates_to_dependents() {
700        let rt = tokio::runtime::Builder::new_current_thread().build().unwrap();
701        rt.block_on(async {
702            let executor = {
703                let runner = Arc::new(mock_runner_for_batch());
704                let github =
705                    Arc::new(GhClient::new(mock_runner_for_batch(), std::path::Path::new("/tmp")));
706                let issues = make_github_provider(&github);
707                let db = Arc::new(Mutex::new(crate::db::open_in_memory().unwrap()));
708                Arc::new(PipelineExecutor {
709                    runner,
710                    github,
711                    issues,
712                    db,
713                    config: Config::default(),
714                    cancel_token: CancellationToken::new(),
715                    repo_dir: PathBuf::from("/tmp"),
716                })
717            };
718
719            let plan = PlannerGraphOutput {
720                nodes: vec![
721                    crate::agents::PlannedNode {
722                        number: 1,
723                        title: "Root".to_string(),
724                        area: "a".to_string(),
725                        predicted_files: vec![],
726                        has_migration: false,
727                        complexity: Complexity::Full,
728                        depends_on: vec![],
729                        reasoning: String::new(),
730                    },
731                    crate::agents::PlannedNode {
732                        number: 2,
733                        title: "Dep".to_string(),
734                        area: "b".to_string(),
735                        predicted_files: vec![],
736                        has_migration: false,
737                        complexity: Complexity::Full,
738                        depends_on: vec![1],
739                        reasoning: String::new(),
740                    },
741                ],
742                total_issues: 2,
743                parallel_capacity: 1,
744            };
745            let issues = vec![make_issue(1), make_issue(2)];
746            let mut graph = DependencyGraph::from_planner_output("test", &plan, &issues);
747            graph.transition(1, NodeState::InFlight);
748
749            handle_task_result(
750                Ok((1, Err(anyhow::anyhow!("pipeline failed")))),
751                &mut graph,
752                &executor,
753            )
754            .await;
755
756            assert_eq!(graph.node(1).unwrap().state, NodeState::Failed);
757            assert_eq!(graph.node(2).unwrap().state, NodeState::Failed);
758        });
759    }
760
761    #[test]
762    fn stale_node_removed_when_issue_disappears() {
763        let mut graph = DependencyGraph::new("test");
764        graph.add_node(standalone_node(&make_issue(1)));
765        graph.add_node(standalone_node(&make_issue(2)));
766        graph.add_node(standalone_node(&make_issue(3)));
767        graph.transition(2, NodeState::InFlight);
768
769        // Only issue 1 and 2 remain in provider; 3 disappeared
770        let ready_numbers: HashSet<u32> = HashSet::from([1, 2]);
771        clean_stale_nodes(&mut graph, &ready_numbers);
772
773        assert!(graph.contains(1)); // still Pending + in ready list
774        assert!(graph.contains(2)); // InFlight, not removed even if not in ready
775        assert!(!graph.contains(3)); // Pending + not in ready = removed
776    }
777
778    #[test]
779    fn collect_ready_issues_transitions_to_in_flight() {
780        let mut graph = DependencyGraph::new("test");
781        graph.add_node(standalone_node(&make_issue(1)));
782        graph.add_node(standalone_node(&make_issue(2)));
783
784        let spawnable = collect_ready_issues(&mut graph);
785        assert_eq!(spawnable.len(), 2);
786
787        // Both should now be InFlight
788        assert_eq!(graph.node(1).unwrap().state, NodeState::InFlight);
789        assert_eq!(graph.node(2).unwrap().state, NodeState::InFlight);
790
791        // No more ready issues
792        assert!(collect_ready_issues(&mut graph).is_empty());
793    }
794
795    #[tokio::test]
796    async fn planner_failure_falls_back_to_all_parallel() {
797        let mut mock = MockCommandRunner::new();
798        mock.expect_run_gh().returning(|_, _| {
799            Box::pin(async {
800                Ok(CommandOutput { stdout: String::new(), stderr: String::new(), success: true })
801            })
802        });
803        mock.expect_run_claude().returning(|_, _, _, _, _| {
804            Box::pin(async {
805                Ok(AgentResult {
806                    cost_usd: 0.5,
807                    duration: Duration::from_secs(2),
808                    turns: 1,
809                    output: "I don't know how to plan".to_string(),
810                    session_id: "sess-plan".to_string(),
811                    success: true,
812                })
813            })
814        });
815
816        let runner = Arc::new(mock);
817        let github = Arc::new(GhClient::new(mock_runner_for_batch(), std::path::Path::new("/tmp")));
818        let issues_provider = make_github_provider(&github);
819        let db = Arc::new(Mutex::new(crate::db::open_in_memory().unwrap()));
820
821        let executor = Arc::new(PipelineExecutor {
822            runner,
823            github,
824            issues: issues_provider,
825            db,
826            config: Config::default(),
827            cancel_token: CancellationToken::new(),
828            repo_dir: PathBuf::from("/tmp"),
829        });
830
831        let issues = vec![PipelineIssue {
832            number: 1,
833            title: "Test".to_string(),
834            body: "body".to_string(),
835            source: IssueOrigin::Github,
836            target_repo: None,
837            author: None,
838        }];
839
840        // plan_issues returns None for unparseable output
841        let plan = executor.plan_issues(&issues, &[]).await;
842        assert!(plan.is_none());
843    }
844
845    #[test]
846    fn graph_persisted_after_state_change() {
847        let rt = tokio::runtime::Builder::new_current_thread().build().unwrap();
848        rt.block_on(async {
849            let db = Arc::new(Mutex::new(crate::db::open_in_memory().unwrap()));
850            let runner = Arc::new(mock_runner_for_batch());
851            let github =
852                Arc::new(GhClient::new(mock_runner_for_batch(), std::path::Path::new("/tmp")));
853            let issues = make_github_provider(&github);
854            let executor = Arc::new(PipelineExecutor {
855                runner,
856                github,
857                issues,
858                db: Arc::clone(&db),
859                config: Config::default(),
860                cancel_token: CancellationToken::new(),
861                repo_dir: PathBuf::from("/tmp"),
862            });
863
864            let mut graph = DependencyGraph::new("persist-test");
865            graph.add_node(standalone_node(&make_issue(1)));
866            graph.transition(1, NodeState::InFlight);
867
868            let outcome = PipelineOutcome {
869                run_id: "run-1".to_string(),
870                pr_number: 10,
871                worktree_path: PathBuf::from("/tmp/wt"),
872                target_dir: PathBuf::from("/tmp"),
873            };
874            handle_task_result(Ok((1, Ok(outcome))), &mut graph, &executor).await;
875
876            // Load from DB and verify
877            let loaded = DependencyGraph::from_db(&*db.lock().await, "persist-test").unwrap();
878            assert_eq!(loaded.node(1).unwrap().state, NodeState::AwaitingMerge);
879            assert_eq!(loaded.node(1).unwrap().pr_number, Some(10));
880        });
881    }
882
883    fn mock_runner_with_pr_state(state: &'static str) -> MockCommandRunner {
884        let mut mock = MockCommandRunner::new();
885        mock.expect_run_gh().returning(move |args, _| {
886            let args = args.to_vec();
887            Box::pin(async move {
888                if args.iter().any(|a| a == "view") {
889                    Ok(CommandOutput {
890                        stdout: format!(r#"{{"state":"{state}"}}"#),
891                        stderr: String::new(),
892                        success: true,
893                    })
894                } else {
895                    Ok(CommandOutput {
896                        stdout: String::new(),
897                        stderr: String::new(),
898                        success: true,
899                    })
900                }
901            })
902        });
903        mock.expect_run_claude().returning(|_, _, _, _, _| {
904            Box::pin(async {
905                Ok(AgentResult {
906                    cost_usd: 0.0,
907                    duration: Duration::from_secs(0),
908                    turns: 0,
909                    output: String::new(),
910                    session_id: String::new(),
911                    success: true,
912                })
913            })
914        });
915        mock
916    }
917
918    fn make_merge_poll_executor(state: &'static str) -> Arc<PipelineExecutor<MockCommandRunner>> {
919        let gh_mock = mock_runner_with_pr_state(state);
920        let github = Arc::new(GhClient::new(gh_mock, std::path::Path::new("/tmp")));
921        let issues = make_github_provider(&github);
922        let db = Arc::new(Mutex::new(crate::db::open_in_memory().unwrap()));
923        let runner = Arc::new(mock_runner_with_pr_state(state));
924        Arc::new(PipelineExecutor {
925            runner,
926            github,
927            issues,
928            db,
929            config: Config::default(),
930            cancel_token: CancellationToken::new(),
931            repo_dir: PathBuf::from("/tmp"),
932        })
933    }
934
935    #[test]
936    fn merge_polling_transitions_merged_pr() {
937        let rt = tokio::runtime::Builder::new_current_thread().enable_all().build().unwrap();
938        rt.block_on(async {
939            let executor = make_merge_poll_executor("MERGED");
940
941            let mut graph = DependencyGraph::new("merge-poll-test");
942            let mut node = standalone_node(&make_issue(1));
943            node.pr_number = Some(42);
944            node.run_id = Some("run-1".to_string());
945            graph.add_node(node);
946            graph.transition(1, NodeState::AwaitingMerge);
947
948            poll_awaiting_merges(&mut graph, &executor).await;
949
950            assert_eq!(graph.node(1).unwrap().state, NodeState::Merged);
951        });
952    }
953
954    #[test]
955    fn merge_polling_transitions_node_without_issue() {
956        let rt = tokio::runtime::Builder::new_current_thread().enable_all().build().unwrap();
957        rt.block_on(async {
958            let executor = make_merge_poll_executor("MERGED");
959
960            let mut graph = DependencyGraph::new("db-restore-test");
961            // Simulate a node restored from DB (no PipelineIssue attached)
962            let mut node = GraphNode {
963                issue_number: 1,
964                title: "Issue #1".to_string(),
965                area: "test".to_string(),
966                predicted_files: vec![],
967                has_migration: false,
968                complexity: "full".to_string(),
969                state: NodeState::Pending,
970                pr_number: Some(42),
971                run_id: Some("run-1".to_string()),
972                issue: None,
973                target_repo: None,
974            };
975            node.state = NodeState::Pending;
976            graph.add_node(node);
977            graph.transition(1, NodeState::AwaitingMerge);
978
979            poll_awaiting_merges(&mut graph, &executor).await;
980
981            // Should still transition to Merged even without issue data
982            assert_eq!(graph.node(1).unwrap().state, NodeState::Merged);
983        });
984    }
985
986    #[test]
987    fn merge_polling_handles_closed_pr() {
988        let rt = tokio::runtime::Builder::new_current_thread().build().unwrap();
989        rt.block_on(async {
990            let executor = make_merge_poll_executor("CLOSED");
991
992            let plan = PlannerGraphOutput {
993                nodes: vec![
994                    crate::agents::PlannedNode {
995                        number: 1,
996                        title: "Root".to_string(),
997                        area: "a".to_string(),
998                        predicted_files: vec![],
999                        has_migration: false,
1000                        complexity: Complexity::Full,
1001                        depends_on: vec![],
1002                        reasoning: String::new(),
1003                    },
1004                    crate::agents::PlannedNode {
1005                        number: 2,
1006                        title: "Dep".to_string(),
1007                        area: "b".to_string(),
1008                        predicted_files: vec![],
1009                        has_migration: false,
1010                        complexity: Complexity::Full,
1011                        depends_on: vec![1],
1012                        reasoning: String::new(),
1013                    },
1014                ],
1015                total_issues: 2,
1016                parallel_capacity: 1,
1017            };
1018            let test_issues = vec![make_issue(1), make_issue(2)];
1019            let mut graph =
1020                DependencyGraph::from_planner_output("merge-poll-close", &plan, &test_issues);
1021            graph.transition(1, NodeState::AwaitingMerge);
1022            graph.set_pr_number(1, 42);
1023            graph.set_run_id(1, "run-1");
1024
1025            poll_awaiting_merges(&mut graph, &executor).await;
1026
1027            assert_eq!(graph.node(1).unwrap().state, NodeState::Failed);
1028            // Dependent should be transitively failed
1029            assert_eq!(graph.node(2).unwrap().state, NodeState::Failed);
1030        });
1031    }
1032
1033    #[test]
1034    fn merge_unlocks_dependent() {
1035        let rt = tokio::runtime::Builder::new_current_thread().enable_all().build().unwrap();
1036        rt.block_on(async {
1037            let executor = make_merge_poll_executor("MERGED");
1038
1039            let plan = PlannerGraphOutput {
1040                nodes: vec![
1041                    crate::agents::PlannedNode {
1042                        number: 1,
1043                        title: "Root".to_string(),
1044                        area: "a".to_string(),
1045                        predicted_files: vec![],
1046                        has_migration: false,
1047                        complexity: Complexity::Full,
1048                        depends_on: vec![],
1049                        reasoning: String::new(),
1050                    },
1051                    crate::agents::PlannedNode {
1052                        number: 2,
1053                        title: "Dep".to_string(),
1054                        area: "b".to_string(),
1055                        predicted_files: vec![],
1056                        has_migration: false,
1057                        complexity: Complexity::Full,
1058                        depends_on: vec![1],
1059                        reasoning: String::new(),
1060                    },
1061                ],
1062                total_issues: 2,
1063                parallel_capacity: 1,
1064            };
1065            let test_issues = vec![make_issue(1), make_issue(2)];
1066            let mut graph =
1067                DependencyGraph::from_planner_output("merge-unlock", &plan, &test_issues);
1068            graph.transition(1, NodeState::AwaitingMerge);
1069            graph.set_pr_number(1, 42);
1070            graph.set_run_id(1, "run-1");
1071
1072            // Before polling: node 2 is not ready (dep 1 is AwaitingMerge)
1073            assert!(graph.ready_issues().is_empty());
1074
1075            poll_awaiting_merges(&mut graph, &executor).await;
1076
1077            // After polling: node 1 merged, node 2 should now be ready
1078            assert_eq!(graph.node(1).unwrap().state, NodeState::Merged);
1079            assert_eq!(graph.ready_issues(), vec![2]);
1080        });
1081    }
1082
1083    #[tokio::test]
1084    async fn fetch_base_branches_handles_multiple_repos() {
1085        // Create two independent repos with remotes so `default_branch` and
1086        // `fetch_branch` work.
1087        async fn repo_with_remote() -> (tempfile::TempDir, tempfile::TempDir) {
1088            use tokio::process::Command as TokioCmd;
1089
1090            let dir = tempfile::tempdir().unwrap();
1091            for (args, cwd) in [
1092                (vec!["init"], dir.path()),
1093                (vec!["config", "user.email", "test@test.com"], dir.path()),
1094                (vec!["config", "user.name", "Test"], dir.path()),
1095            ] {
1096                TokioCmd::new("git").args(&args).current_dir(cwd).output().await.unwrap();
1097            }
1098            tokio::fs::write(dir.path().join("README.md"), "init").await.unwrap();
1099            TokioCmd::new("git").args(["add", "."]).current_dir(dir.path()).output().await.unwrap();
1100            TokioCmd::new("git")
1101                .args(["commit", "-m", "init"])
1102                .current_dir(dir.path())
1103                .output()
1104                .await
1105                .unwrap();
1106
1107            let remote = tempfile::tempdir().unwrap();
1108            TokioCmd::new("git")
1109                .args(["clone", "--bare", dir.path().to_string_lossy().as_ref(), "."])
1110                .current_dir(remote.path())
1111                .output()
1112                .await
1113                .unwrap();
1114            TokioCmd::new("git")
1115                .args(["remote", "add", "origin", remote.path().to_string_lossy().as_ref()])
1116                .current_dir(dir.path())
1117                .output()
1118                .await
1119                .unwrap();
1120            TokioCmd::new("git")
1121                .args(["fetch", "origin"])
1122                .current_dir(dir.path())
1123                .output()
1124                .await
1125                .unwrap();
1126
1127            (dir, remote)
1128        }
1129
1130        let (repo_a, _remote_a) = repo_with_remote().await;
1131        let (repo_b, _remote_b) = repo_with_remote().await;
1132
1133        let mut dirs = HashSet::new();
1134        dirs.insert(repo_a.path().to_path_buf());
1135        dirs.insert(repo_b.path().to_path_buf());
1136
1137        // Should not panic or error -- just fetches both repos
1138        fetch_base_branches(&dirs).await;
1139    }
1140
1141    #[tokio::test]
1142    async fn fetch_base_branches_skips_invalid_repo_gracefully() {
1143        let mut dirs = HashSet::new();
1144        dirs.insert(PathBuf::from("/tmp/nonexistent-repo-12345"));
1145
1146        // Should log a warning but not panic
1147        fetch_base_branches(&dirs).await;
1148    }
1149}