cuenv_core/tasks/
graph.rs

1//! Task graph builder using petgraph
2//!
3//! This module builds directed acyclic graphs (DAGs) from task definitions
4//! to handle dependencies and determine execution order.
5
6use super::{ParallelGroup, Task, TaskDefinition, TaskGroup, Tasks};
7use crate::Result;
8use petgraph::algo::{is_cyclic_directed, toposort};
9use petgraph::graph::{DiGraph, NodeIndex};
10use petgraph::visit::IntoNodeReferences;
11use std::collections::{HashMap, HashSet};
12use tracing::debug;
13
14/// A node in the task graph
15#[derive(Debug, Clone)]
16pub struct TaskNode {
17    /// Name of the task
18    pub name: String,
19    /// The task to execute
20    pub task: Task,
21}
22
23/// Task graph for dependency resolution and execution ordering
24pub struct TaskGraph {
25    /// The directed graph of tasks
26    graph: DiGraph<TaskNode, ()>,
27    /// Map from task names to node indices
28    name_to_node: HashMap<String, NodeIndex>,
29}
30
31impl TaskGraph {
32    /// Create a new empty task graph
33    pub fn new() -> Self {
34        Self {
35            graph: DiGraph::new(),
36            name_to_node: HashMap::new(),
37        }
38    }
39
40    /// Build a graph from a task definition
41    pub fn build_from_definition(
42        &mut self,
43        name: &str,
44        definition: &TaskDefinition,
45        all_tasks: &Tasks,
46    ) -> Result<Vec<NodeIndex>> {
47        match definition {
48            TaskDefinition::Single(task) => {
49                let node = self.add_task(name, task.as_ref().clone())?;
50                Ok(vec![node])
51            }
52            TaskDefinition::Group(group) => self.build_from_group(name, group, all_tasks),
53        }
54    }
55
56    /// Build a graph from a task group
57    fn build_from_group(
58        &mut self,
59        prefix: &str,
60        group: &TaskGroup,
61        all_tasks: &Tasks,
62    ) -> Result<Vec<NodeIndex>> {
63        match group {
64            TaskGroup::Sequential(tasks) => self.build_sequential_group(prefix, tasks, all_tasks),
65            TaskGroup::Parallel(group) => self.build_parallel_group(prefix, group, all_tasks),
66        }
67    }
68
69    /// Build a sequential task group (tasks run one after another)
70    fn build_sequential_group(
71        &mut self,
72        prefix: &str,
73        tasks: &[TaskDefinition],
74        all_tasks: &Tasks,
75    ) -> Result<Vec<NodeIndex>> {
76        let mut nodes = Vec::new();
77        let mut previous: Option<NodeIndex> = None;
78
79        for (i, task_def) in tasks.iter().enumerate() {
80            let task_name = format!("{}[{}]", prefix, i);
81            let task_nodes = self.build_from_definition(&task_name, task_def, all_tasks)?;
82
83            // For sequential execution, link previous task to current
84            if let Some(prev) = previous
85                && let Some(first) = task_nodes.first()
86            {
87                self.graph.add_edge(prev, *first, ());
88            }
89
90            if let Some(last) = task_nodes.last() {
91                previous = Some(*last);
92            }
93
94            nodes.extend(task_nodes);
95        }
96
97        Ok(nodes)
98    }
99
100    /// Build a parallel task group (tasks can run concurrently)
101    fn build_parallel_group(
102        &mut self,
103        prefix: &str,
104        group: &ParallelGroup,
105        all_tasks: &Tasks,
106    ) -> Result<Vec<NodeIndex>> {
107        let mut nodes = Vec::new();
108
109        for (name, task_def) in &group.tasks {
110            let task_name = format!("{}.{}", prefix, name);
111            let task_nodes = self.build_from_definition(&task_name, task_def, all_tasks)?;
112
113            // Apply group-level dependencies to each subtask
114            if !group.depends_on.is_empty() {
115                for node_idx in &task_nodes {
116                    let node = &mut self.graph[*node_idx];
117                    for dep in &group.depends_on {
118                        if !node.task.depends_on.contains(dep) {
119                            node.task.depends_on.push(dep.clone());
120                        }
121                    }
122                }
123            }
124
125            nodes.extend(task_nodes);
126        }
127
128        Ok(nodes)
129    }
130
131    /// Add a single task to the graph
132    pub fn add_task(&mut self, name: &str, task: Task) -> Result<NodeIndex> {
133        // Check if task already exists
134        if let Some(&node) = self.name_to_node.get(name) {
135            return Ok(node);
136        }
137
138        let node = TaskNode {
139            name: name.to_string(),
140            task,
141        };
142
143        let node_index = self.graph.add_node(node);
144        self.name_to_node.insert(name.to_string(), node_index);
145        debug!("Added task node '{}'", name);
146
147        Ok(node_index)
148    }
149
150    /// Add dependency edges after all tasks have been added
151    /// This ensures proper cycle detection and missing dependency validation
152    fn add_dependency_edges(&mut self) -> Result<()> {
153        let mut missing_deps = Vec::new();
154        let mut edges_to_add = Vec::new();
155
156        // Collect all dependency relationships
157        for (node_index, node) in self.graph.node_references() {
158            for dep_name in &node.task.depends_on {
159                if let Some(&dep_node_index) = self.name_to_node.get(dep_name as &str) {
160                    // Record edge to add later
161                    edges_to_add.push((dep_node_index, node_index));
162                } else {
163                    missing_deps.push((node.name.clone(), dep_name.clone()));
164                }
165            }
166        }
167
168        // Report missing dependencies
169        if !missing_deps.is_empty() {
170            let missing_list = missing_deps
171                .iter()
172                .map(|(task, dep)| format!("Task '{}' depends on missing task '{}'", task, dep))
173                .collect::<Vec<_>>()
174                .join(", ");
175            return Err(crate::Error::configuration(format!(
176                "Missing dependencies: {}",
177                missing_list
178            )));
179        }
180
181        // Add all edges
182        for (from, to) in edges_to_add {
183            self.graph.add_edge(from, to, ());
184        }
185
186        Ok(())
187    }
188
189    /// Check if the graph has cycles
190    pub fn has_cycles(&self) -> bool {
191        is_cyclic_directed(&self.graph)
192    }
193
194    /// Get topologically sorted list of tasks
195    pub fn topological_sort(&self) -> Result<Vec<TaskNode>> {
196        if self.has_cycles() {
197            return Err(crate::Error::configuration(
198                "Task dependency graph contains cycles".to_string(),
199            ));
200        }
201
202        match toposort(&self.graph, None) {
203            Ok(sorted_indices) => Ok(sorted_indices
204                .into_iter()
205                .map(|idx| self.graph[idx].clone())
206                .collect()),
207            Err(_) => Err(crate::Error::configuration(
208                "Failed to sort tasks topologically".to_string(),
209            )),
210        }
211    }
212
213    /// Get all tasks that can run in parallel (no dependencies between them)
214    pub fn get_parallel_groups(&self) -> Result<Vec<Vec<TaskNode>>> {
215        let sorted = self.topological_sort()?;
216
217        if sorted.is_empty() {
218            return Ok(vec![]);
219        }
220
221        // Group tasks by their dependency level
222        let mut groups: Vec<Vec<TaskNode>> = vec![];
223        let mut processed: HashMap<String, usize> = HashMap::new();
224
225        for task in sorted {
226            // Find the maximum level of all dependencies
227            let mut level = 0;
228            for dep in &task.task.depends_on {
229                if let Some(&dep_level) = processed.get(dep) {
230                    level = level.max(dep_level + 1);
231                }
232            }
233
234            // Add to appropriate group
235            if level >= groups.len() {
236                groups.resize(level + 1, vec![]);
237            }
238            groups[level].push(task.clone());
239            processed.insert(task.name.clone(), level);
240        }
241
242        Ok(groups)
243    }
244
245    /// Get the number of tasks in the graph
246    pub fn task_count(&self) -> usize {
247        self.graph.node_count()
248    }
249
250    /// Check if a task exists in the graph
251    pub fn contains_task(&self, name: &str) -> bool {
252        self.name_to_node.contains_key(name)
253    }
254
255    /// Build a complete graph from tasks with proper dependency resolution
256    /// This performs a two-pass build: first adding all nodes, then all edges
257    pub fn build_complete_graph(&mut self, tasks: &Tasks) -> Result<()> {
258        // First pass: Add all tasks as nodes
259        for (name, definition) in tasks.tasks.iter() {
260            match definition {
261                TaskDefinition::Single(task) => {
262                    self.add_task(name, task.as_ref().clone())?;
263                }
264                TaskDefinition::Group(_) => {
265                    // For groups, we'd need to expand them - this is more complex
266                    // and not needed for the current fix. Groups should be handled
267                    // by build_from_definition which already works correctly.
268                }
269            }
270        }
271
272        // Second pass: Add all dependency edges
273        self.add_dependency_edges()?;
274
275        Ok(())
276    }
277
278    /// Build graph for a specific task and all its transitive dependencies
279    pub fn build_for_task(&mut self, task_name: &str, all_tasks: &Tasks) -> Result<()> {
280        let mut to_process = vec![task_name.to_string()];
281        let mut processed = HashSet::new();
282
283        debug!(
284            "Building graph for '{}' with tasks {:?}",
285            task_name,
286            all_tasks.list_tasks()
287        );
288
289        // First pass: Collect all tasks that need to be included
290        while let Some(current_name) = to_process.pop() {
291            if processed.contains(&current_name) {
292                continue;
293            }
294            processed.insert(current_name.clone());
295
296            if let Some(definition) = all_tasks.get(&current_name) {
297                match definition {
298                    TaskDefinition::Single(task) => {
299                        self.add_task(&current_name, task.as_ref().clone())?;
300                        // Add dependencies to processing queue
301                        for dep in &task.depends_on {
302                            if !processed.contains(dep) {
303                                to_process.push(dep.clone());
304                            }
305                        }
306                    }
307                    TaskDefinition::Group(_) => {
308                        // Handle groups with build_from_definition
309                        let added_nodes =
310                            self.build_from_definition(&current_name, definition, all_tasks)?;
311                        // Collect dependencies from newly added tasks
312                        for node_idx in added_nodes {
313                            let node = &self.graph[node_idx];
314                            for dep in &node.task.depends_on {
315                                if !processed.contains(dep) {
316                                    to_process.push(dep.clone());
317                                }
318                            }
319                        }
320                    }
321                }
322            } else {
323                debug!("Task '{}' not found while building graph", current_name);
324            }
325        }
326
327        // Second pass: Add dependency edges
328        self.add_dependency_edges()?;
329
330        Ok(())
331    }
332}
333
334impl Default for TaskGraph {
335    fn default() -> Self {
336        Self::new()
337    }
338}
339
340#[cfg(test)]
341#[path = "graph_advanced_tests.rs"]
342mod graph_advanced_tests;
343
344#[cfg(test)]
345mod tests {
346    use super::*;
347    use crate::test_utils::create_task;
348
349    #[test]
350    fn test_task_graph_new() {
351        let graph = TaskGraph::new();
352        assert_eq!(graph.task_count(), 0);
353    }
354
355    #[test]
356    fn test_add_single_task() {
357        let mut graph = TaskGraph::new();
358        let task = create_task("test", vec![], vec![]);
359
360        let node = graph.add_task("test", task).unwrap();
361        assert!(graph.contains_task("test"));
362        assert_eq!(graph.task_count(), 1);
363
364        // Adding same task again should return same node
365        let task2 = create_task("test", vec![], vec![]);
366        let node2 = graph.add_task("test", task2).unwrap();
367        assert_eq!(node, node2);
368        assert_eq!(graph.task_count(), 1);
369    }
370
371    #[test]
372    fn test_task_dependencies() {
373        let mut graph = TaskGraph::new();
374
375        // Add tasks with dependencies
376        let task1 = create_task("task1", vec![], vec![]);
377        let task2 = create_task("task2", vec!["task1"], vec![]);
378        let task3 = create_task("task3", vec!["task1", "task2"], vec![]);
379
380        graph.add_task("task1", task1).unwrap();
381        graph.add_task("task2", task2).unwrap();
382        graph.add_task("task3", task3).unwrap();
383        graph.add_dependency_edges().unwrap(); // Add dependency edges after adding all tasks
384
385        assert_eq!(graph.task_count(), 3);
386        assert!(!graph.has_cycles());
387
388        let sorted = graph.topological_sort().unwrap();
389        assert_eq!(sorted.len(), 3);
390
391        // task1 should come before task2 and task3
392        let positions: HashMap<String, usize> = sorted
393            .iter()
394            .enumerate()
395            .map(|(i, node)| (node.name.clone(), i))
396            .collect();
397
398        assert!(positions["task1"] < positions["task2"]);
399        assert!(positions["task1"] < positions["task3"]);
400        assert!(positions["task2"] < positions["task3"]);
401    }
402
403    #[test]
404    fn test_cycle_detection() {
405        let mut graph = TaskGraph::new();
406
407        // Create a cycle: task1 -> task2 -> task3 -> task1
408        let task1 = create_task("task1", vec!["task3"], vec![]);
409        let task2 = create_task("task2", vec!["task1"], vec![]);
410        let task3 = create_task("task3", vec!["task2"], vec![]);
411
412        graph.add_task("task1", task1).unwrap();
413        graph.add_task("task2", task2).unwrap();
414        graph.add_task("task3", task3).unwrap();
415        graph.add_dependency_edges().unwrap(); // Add dependency edges after adding all tasks
416
417        assert!(graph.has_cycles());
418        assert!(graph.topological_sort().is_err());
419    }
420
421    #[test]
422    fn test_parallel_groups() {
423        let mut graph = TaskGraph::new();
424
425        // Create tasks that can run in parallel
426        // Level 0: task1, task2 (no dependencies)
427        // Level 1: task3 (depends on task1), task4 (depends on task2)
428        // Level 2: task5 (depends on task3 and task4)
429
430        let task1 = create_task("task1", vec![], vec![]);
431        let task2 = create_task("task2", vec![], vec![]);
432        let task3 = create_task("task3", vec!["task1"], vec![]);
433        let task4 = create_task("task4", vec!["task2"], vec![]);
434        let task5 = create_task("task5", vec!["task3", "task4"], vec![]);
435
436        graph.add_task("task1", task1).unwrap();
437        graph.add_task("task2", task2).unwrap();
438        graph.add_task("task3", task3).unwrap();
439        graph.add_task("task4", task4).unwrap();
440        graph.add_task("task5", task5).unwrap();
441        graph.add_dependency_edges().unwrap(); // Add dependency edges after adding all tasks
442
443        let groups = graph.get_parallel_groups().unwrap();
444
445        // Should have 3 levels
446        assert_eq!(groups.len(), 3);
447
448        // Level 0 should have 2 tasks
449        assert_eq!(groups[0].len(), 2);
450
451        // Level 1 should have 2 tasks
452        assert_eq!(groups[1].len(), 2);
453
454        // Level 2 should have 1 task
455        assert_eq!(groups[2].len(), 1);
456        assert_eq!(groups[2][0].name, "task5");
457    }
458
459    #[test]
460    fn test_build_from_sequential_group() {
461        let mut graph = TaskGraph::new();
462        let tasks = Tasks::new();
463
464        let task1 = create_task("t1", vec![], vec![]);
465        let task2 = create_task("t2", vec![], vec![]);
466
467        let group = TaskGroup::Sequential(vec![
468            TaskDefinition::Single(Box::new(task1)),
469            TaskDefinition::Single(Box::new(task2)),
470        ]);
471
472        let nodes = graph.build_from_group("seq", &group, &tasks).unwrap();
473        assert_eq!(nodes.len(), 2);
474
475        // Sequential tasks should have dependency chain
476        let sorted = graph.topological_sort().unwrap();
477        assert_eq!(sorted.len(), 2);
478        assert_eq!(sorted[0].name, "seq[0]");
479        assert_eq!(sorted[1].name, "seq[1]");
480    }
481
482    #[test]
483    fn test_build_from_parallel_group() {
484        let mut graph = TaskGraph::new();
485        let tasks = Tasks::new();
486
487        let task1 = create_task("t1", vec![], vec![]);
488        let task2 = create_task("t2", vec![], vec![]);
489
490        let mut parallel_tasks = HashMap::new();
491        parallel_tasks.insert("first".to_string(), TaskDefinition::Single(Box::new(task1)));
492        parallel_tasks.insert(
493            "second".to_string(),
494            TaskDefinition::Single(Box::new(task2)),
495        );
496
497        let group = TaskGroup::Parallel(ParallelGroup {
498            tasks: parallel_tasks,
499            depends_on: vec![],
500        });
501
502        let nodes = graph.build_from_group("par", &group, &tasks).unwrap();
503        assert_eq!(nodes.len(), 2);
504
505        // Parallel tasks should not have dependencies between them
506        assert!(!graph.has_cycles());
507
508        let groups = graph.get_parallel_groups().unwrap();
509        assert_eq!(groups.len(), 1); // All in same level
510        assert_eq!(groups[0].len(), 2); // Both can run in parallel
511    }
512
513    #[test]
514    fn test_three_way_cycle_detection() {
515        let mut graph = TaskGraph::new();
516
517        // Create cyclic dependencies: A -> B -> C -> A
518        let task_a = create_task("task_a", vec!["task_c"], vec![]);
519        let task_b = create_task("task_b", vec!["task_a"], vec![]);
520        let task_c = create_task("task_c", vec!["task_b"], vec![]);
521
522        graph.add_task("task_a", task_a).unwrap();
523        graph.add_task("task_b", task_b).unwrap();
524        graph.add_task("task_c", task_c).unwrap();
525        graph.add_dependency_edges().unwrap(); // Add dependency edges after adding all tasks
526
527        // This should create a cycle
528        assert!(graph.has_cycles());
529
530        // Should fail when trying to get parallel groups
531        assert!(graph.get_parallel_groups().is_err());
532    }
533
534    #[test]
535    fn test_self_dependency_cycle() {
536        let mut graph = TaskGraph::new();
537
538        // Create self-referencing task
539        let task = create_task("self_ref", vec!["self_ref"], vec![]);
540        graph.add_task("self_ref", task).unwrap();
541        graph.add_dependency_edges().unwrap(); // Add dependency edges after adding all tasks
542
543        assert!(graph.has_cycles());
544        assert!(graph.get_parallel_groups().is_err());
545    }
546
547    #[test]
548    fn test_complex_dependency_graph() {
549        let mut graph = TaskGraph::new();
550
551        // Create a diamond dependency pattern:
552        //     A
553        //    / \
554        //   B   C
555        //    \ /
556        //     D
557        let task_a = create_task("a", vec![], vec![]);
558        let task_b = create_task("b", vec!["a"], vec![]);
559        let task_c = create_task("c", vec!["a"], vec![]);
560        let task_d = create_task("d", vec!["b", "c"], vec![]);
561
562        graph.add_task("a", task_a).unwrap();
563        graph.add_task("b", task_b).unwrap();
564        graph.add_task("c", task_c).unwrap();
565        graph.add_task("d", task_d).unwrap();
566        graph.add_dependency_edges().unwrap(); // Add dependency edges after adding all tasks
567
568        assert!(!graph.has_cycles());
569        assert_eq!(graph.task_count(), 4);
570
571        let groups = graph.get_parallel_groups().unwrap();
572
573        // Should have 3 levels: [A], [B,C], [D]
574        assert_eq!(groups.len(), 3);
575        assert_eq!(groups[0].len(), 1); // A
576        assert_eq!(groups[1].len(), 2); // B and C can run in parallel
577        assert_eq!(groups[2].len(), 1); // D
578    }
579
580    #[test]
581    fn test_missing_dependency() {
582        let mut graph = TaskGraph::new();
583
584        // Create task with dependency that doesn't exist
585        let task = create_task("dependent", vec!["missing"], vec![]);
586        graph.add_task("dependent", task).unwrap();
587
588        // Should fail to get parallel groups due to missing dependency
589        assert!(graph.add_dependency_edges().is_err());
590    }
591
592    #[test]
593    fn test_empty_graph() {
594        let graph = TaskGraph::new();
595
596        assert_eq!(graph.task_count(), 0);
597        assert!(!graph.has_cycles());
598
599        let groups = graph.get_parallel_groups().unwrap();
600        assert!(groups.is_empty());
601    }
602
603    #[test]
604    fn test_single_task_no_deps() {
605        let mut graph = TaskGraph::new();
606
607        let task = create_task("solo", vec![], vec![]);
608        graph.add_task("solo", task).unwrap();
609
610        assert_eq!(graph.task_count(), 1);
611        assert!(!graph.has_cycles());
612
613        let groups = graph.get_parallel_groups().unwrap();
614        assert_eq!(groups.len(), 1);
615        assert_eq!(groups[0].len(), 1);
616    }
617
618    #[test]
619    fn test_linear_chain() {
620        let mut graph = TaskGraph::new();
621
622        // Create linear chain: A -> B -> C -> D
623        let task_a = create_task("a", vec![], vec![]);
624        let task_b = create_task("b", vec!["a"], vec![]);
625        let task_c = create_task("c", vec!["b"], vec![]);
626        let task_d = create_task("d", vec!["c"], vec![]);
627
628        graph.add_task("a", task_a).unwrap();
629        graph.add_task("b", task_b).unwrap();
630        graph.add_task("c", task_c).unwrap();
631        graph.add_task("d", task_d).unwrap();
632        graph.add_dependency_edges().unwrap(); // Add dependency edges after adding all tasks
633
634        assert!(!graph.has_cycles());
635        assert_eq!(graph.task_count(), 4);
636
637        let groups = graph.get_parallel_groups().unwrap();
638
639        // Should be 4 sequential groups
640        assert_eq!(groups.len(), 4);
641        for group in &groups {
642            assert_eq!(group.len(), 1);
643        }
644    }
645}