tsg_core/graph/analysis/
graph.rs

1use crate::graph::{GraphSection, PathAnalysis, TSGraph};
2use ahash::{HashMap, HashMapExt, HashSet, HashSetExt};
3use anyhow::{Context, Ok, Result};
4use bstr::BString;
5use derive_more::Display;
6use petgraph::graph::NodeIndex;
7use petgraph::visit::EdgeRef;
8use std::collections::VecDeque;
9
10/// Enumeration representing different graph topologies.
11/// The topology can be used to classify the structure of the graph.
12/// One graph only include one topology.
13#[derive(Debug, Clone, PartialEq, Eq, Display)]
14pub enum GraphTopology {
15    /// The graph is a fade-in structure.
16    FadeIn,
17    /// The graph is a fade-out structure.
18    FadeOut,
19    /// The graph is bipartite.
20    Bipartite,
21
22    /// The graph is a unique path.
23    UniquePath,
24    /// The graph is an equi-path.
25    EquiPath,
26    /// The graph is a hetero-path.
27    HeteroPath,
28
29    /// The category is not defined.
30    NotDefined,
31}
32
33pub trait GraphAnalysis {
34    fn topo(&self) -> Result<GraphTopology>;
35
36    /// Determines whether the graph is connected.
37    ///
38    /// A graph is connected if there is a path between every pair of vertices.
39    /// For directed graphs, this considers connections in both directions.
40    ///
41    /// # Returns
42    ///
43    /// * `Ok(true)` - If the graph is connected or empty
44    /// * `Ok(false)` - If the graph has disconnected components
45    /// * `Err` - If an error occurs during the analysis
46    fn is_connected(&self) -> Result<bool>;
47
48    /// Determines whether the graph contains any cycles.
49    ///
50    /// A cycle is a path that starts and ends at the same node.
51    /// This method performs a depth-first search to detect cycles.
52    ///
53    /// # Returns
54    ///
55    /// * `Ok(true)` - If the graph contains at least one cycle
56    /// * `Ok(false)` - If the graph is acyclic (contains no cycles)
57    /// * `Err` - If an error occurs during the analysis
58    fn is_cyclic(&self) -> Result<bool>;
59
60    /// Determines whether the graph contains any bubbles.
61    ///
62    /// A bubble is a subgraph that starts at a single source node, branches into multiple paths,
63    /// and then reconverges at a single sink node.
64    ///
65    /// # Returns
66    ///
67    /// * `Ok(true)` - If the graph contains at least one bubble
68    /// * `Ok(false)` - If the graph does not contain any bubbles
69    /// * `Err` - If an error occurs during the analysis
70    fn is_bubble(&self) -> Result<bool>;
71
72    /// Determines whether the graph is a directed acyclic graph (DAG).
73    ///
74    /// A graph is a DAG if it is both connected and does not contain cycles.
75    ///
76    /// # Returns
77    ///
78    /// * `Ok(true)` - If the graph is a DAG
79    /// * `Ok(false)` - If the graph is not a DAG
80    /// * `Err` - If an error occurs during the analysis
81    fn is_directed_acyclic_graph(&self) -> Result<bool> {
82        Ok(self.is_connected()? && !self.is_cyclic()?)
83    }
84
85    /// Determines whether the graph is simple.
86    ///
87    /// A graph is considered simple if the maximum path length is 1.
88    ///
89    /// # Returns
90    ///
91    /// * `Ok(true)` - If the graph is simple
92    /// * `Ok(false)` - If the graph is not simple
93    /// * `Err` - If an error occurs during the analysis
94    fn is_simple(&self) -> Result<bool>;
95
96    /// Determines whether the directed graph is a fade-in structure.
97    ///
98    /// A graph is considered a fade-in if it is simple and has multiple source nodes
99    /// but only one sink node.
100    ///
101    /// # Returns
102    ///
103    /// * `Ok(true)` - If the graph is a fade-in structure
104    /// * `Ok(false)` - If the graph is not a fade-in structure
105    /// * `Err` - If an error occurs during the analysis
106    fn is_fade_in(&self) -> Result<bool> {
107        Ok(matches!(self.topo()?, GraphTopology::FadeIn))
108    }
109
110    /// Determines whether the directed graph is a fade-out structure.
111    ///
112    /// A graph is considered a fade-out if it is simple and has only one source node
113    /// but multiple sink nodes.
114    ///
115    /// # Returns
116    ///
117    /// * `Ok(true)` - If the graph is a fade-out structure
118    /// * `Ok(false)` - If the graph is not a fade-out structure
119    /// * `Err` - If an error occurs during the analysis
120    fn is_fade_out(&self) -> Result<bool> {
121        Ok(matches!(self.topo()?, GraphTopology::FadeOut))
122    }
123
124    /// Determines whether the graph is bipartite.
125    ///
126    /// A bipartite graph is a simple graph with multiple source nodes and multiple sink nodes,
127    /// where the nodes can be divided into two disjoint sets with edges only between nodes of
128    /// different sets.
129    ///
130    /// # Returns
131    ///
132    /// * `Ok(true)` - If the graph is bipartite
133    /// * `Ok(false)` - If the graph is not bipartite
134    /// * `Err` - If an error occurs during the analysis
135    fn is_bipartite(&self) -> Result<bool> {
136        Ok(matches!(self.topo()?, GraphTopology::Bipartite))
137    }
138
139    /// Determines whether the graph contains a unique path.
140    ///
141    /// A graph has a unique path if it is not simple and there is only one path
142    /// after traversing the graph.
143    ///
144    /// # Returns
145    ///
146    /// * `Ok(true)` - If the graph contains a unique path
147    /// * `Ok(false)` - If the graph does not contain a unique path
148    /// * `Err` - If an error occurs during the analysis
149    fn is_unique_path(&self) -> Result<bool> {
150        Ok(matches!(self.topo()?, GraphTopology::UniquePath))
151    }
152
153    /// Determines whether the graph contains equi-paths.
154    ///
155    /// A graph has equi-paths if it is not simple, contains bubbles (alternative paths),
156    /// and all alternative paths have the same length.
157    ///
158    /// # Returns
159    ///
160    /// * `Ok(true)` - If the graph contains equi-paths
161    /// * `Ok(false)` - If the graph does not contain equi-paths
162    /// * `Err` - If an error occurs during the analysis
163    fn is_equi_path(&self) -> Result<bool> {
164        Ok(matches!(self.topo()?, GraphTopology::EquiPath))
165    }
166
167    /// Determines whether the graph contains hetero-paths.
168    ///
169    /// A graph has hetero-paths if it is not simple, contains bubbles (alternative paths),
170    /// and the alternative paths have different lengths.
171    ///
172    /// # Returns
173    ///
174    /// * `Ok(true)` - If the graph contains hetero-paths
175    /// * `Ok(false)` - If the graph does not contain hetero-paths
176    /// * `Err` - If an error occurs during the analysis
177    fn is_hetero_path(&self) -> Result<bool> {
178        Ok(matches!(self.topo()?, GraphTopology::HeteroPath))
179    }
180
181    /// Helper method to check if the graph matches a specific topology.
182    ///
183    /// This internal method can be used to avoid redundant calls to `topo()`
184    /// when checking for multiple topology types.
185    ///
186    /// # Parameters
187    ///
188    /// * `topology` - The GraphTopology variant to check against
189    ///
190    /// # Returns
191    ///
192    /// * `Ok(true)` - If the graph matches the specified topology
193    /// * `Ok(false)` - If the graph does not match the specified topology
194    /// * `Err` - If an error occurs during the analysis
195    fn matches_topology(&self, topology: GraphTopology) -> Result<bool> {
196        Ok(self.topo()? == topology)
197    }
198}
199
200impl GraphAnalysis for GraphSection {
201    fn is_connected(&self) -> Result<bool> {
202        if self.nodes().is_empty() {
203            return Ok(true); // Empty graph is trivially connected
204        }
205
206        // Start DFS from the first node
207        let start_node = self.node_indices.values().next().unwrap();
208        let mut visited = HashSet::new();
209
210        // Perform DFS to find all reachable nodes
211        self.dfs(*start_node, &mut visited);
212
213        // The graph is connected if all nodes are visited
214        Ok(visited.len() == self.node_indices.len())
215    }
216
217    fn is_cyclic(&self) -> Result<bool> {
218        // Track both visited nodes and nodes in the current recursion stack
219        let mut visited = HashSet::new();
220        let mut rec_stack = HashSet::new();
221
222        // Check from each node (to handle disconnected components)
223        for start_node in self.node_indices.values() {
224            if self.is_cyclic_util(*start_node, &mut visited, &mut rec_stack) {
225                return Ok(true);
226            }
227        }
228
229        Ok(false) // Updated to return Result<bool>
230    }
231
232    fn is_bubble(&self) -> Result<bool> {
233        let bubbles = self.collect_bubbles()?;
234        Ok(!bubbles.is_empty())
235    }
236
237    fn is_simple(&self) -> Result<bool> {
238        // A graph is simple if the maximum path length is 1
239        let paths = self.traverse()?;
240        let max_path_len = paths.iter().map(|path| path.len()).max().unwrap_or(0);
241        Ok(max_path_len == 2)
242    }
243
244    fn topo(&self) -> Result<GraphTopology> {
245        // Check if the graph is simple first since we need this for classification
246        let is_simple = self.is_simple()?;
247
248        // Count sources and sinks only once
249        let sources = self
250            ._graph
251            .node_indices()
252            .filter(|&n| {
253                self._graph
254                    .edges_directed(n, petgraph::Direction::Incoming)
255                    .count()
256                    == 0
257            })
258            .count();
259
260        let sinks = self
261            ._graph
262            .node_indices()
263            .filter(|&n| self._graph.edges(n).count() == 0)
264            .count();
265
266        if is_simple {
267            // Simple graph classification
268            match (sources, sinks) {
269                (s, 1) if s > 1 => Ok(GraphTopology::FadeIn),
270                (1, s) if s > 1 => Ok(GraphTopology::FadeOut),
271                (s, t) if s > 1 && t > 1 => Ok(GraphTopology::Bipartite),
272                _ => Ok(GraphTopology::NotDefined),
273            }
274        } else {
275            // Handle non-simple graph
276            if sources == 1 && sinks == 1 {
277                return Ok(GraphTopology::UniquePath);
278            }
279
280            // Only collect bubbles if needed
281            let bubbles = self.collect_bubbles()?;
282
283            if bubbles.is_empty() {
284                return Ok(GraphTopology::NotDefined);
285            }
286
287            // Check if any bubble has paths of different lengths
288            for bubble in &bubbles {
289                if bubble[0].len() != bubble[1].len() {
290                    return Ok(GraphTopology::HeteroPath);
291                }
292            }
293
294            Ok(GraphTopology::EquiPath)
295        }
296    }
297}
298
299impl GraphSection {
300    /// Performs a depth-first search (DFS) traversal of the graph.
301    ///
302    /// This method visits nodes in the graph in a depth-first manner, marking each visited node.
303    /// It considers both outgoing and incoming edges to ensure connectivity in both directions,
304    /// which is necessary for undirected connectivity analysis.
305    ///
306    /// # Parameters
307    ///
308    /// * `node` - The current node being visited in the traversal
309    /// * `visited` - A mutable HashSet tracking which nodes have been visited to avoid cycles
310    ///
311    /// # Note
312    ///
313    /// The method modifies the `visited` set in-place, adding each node encountered during traversal.
314    /// This is primarily used by the `is_connected` method to determine graph connectivity.
315    fn dfs(&self, node: NodeIndex, visited: &mut HashSet<NodeIndex>) {
316        // If already visited, return
317        if visited.contains(&node) {
318            return;
319        }
320
321        // Mark as visited
322        visited.insert(node);
323
324        // Visit all neighbors through outgoing edges
325        for edge in self._graph.edges(node) {
326            self.dfs(edge.target(), visited);
327        }
328
329        // Visit all neighbors through incoming edges
330        // This is necessary for undirected connectivity
331        for edge in self
332            ._graph
333            .edges_directed(node, petgraph::Direction::Incoming)
334        {
335            self.dfs(edge.source(), visited);
336        }
337    }
338
339    /// Helper method for cycle detection in a graph.
340    ///
341    /// This method implements a depth-first search that tracks both visited nodes
342    /// and nodes currently in the recursion stack to detect cycles.
343    ///
344    /// # Parameters
345    ///
346    /// * `node` - The current node being visited in the traversal
347    /// * `visited` - A mutable HashSet tracking all nodes that have been visited
348    /// * `rec_stack` - A mutable HashSet tracking nodes in the current recursion path
349    ///
350    /// # Returns
351    ///
352    /// `true` if a cycle is detected, `false` otherwise
353    fn is_cyclic_util(
354        &self,
355        node: NodeIndex,
356        visited: &mut HashSet<NodeIndex>,
357        rec_stack: &mut HashSet<NodeIndex>,
358    ) -> bool {
359        // If node is not visited yet, mark it visited and add to recursion stack
360        if !visited.contains(&node) {
361            visited.insert(node);
362            rec_stack.insert(node);
363
364            // Visit all neighbors through outgoing edges
365            for edge in self._graph.edges(node) {
366                let neighbor = edge.target();
367
368                // If neighbor is not visited, check if cycles exist in DFS
369                if !visited.contains(&neighbor) {
370                    if self.is_cyclic_util(neighbor, visited, rec_stack) {
371                        return true;
372                    }
373                }
374                // If neighbor is in recursion stack, we found a cycle
375                else if rec_stack.contains(&neighbor) {
376                    return true;
377                }
378            }
379        }
380
381        // Remove node from recursion stack when we're done exploring it
382        rec_stack.remove(&node);
383        false
384    }
385
386    fn collect_bubbles(&self) -> Result<Vec<Vec<Vec<NodeIndex>>>> {
387        let mut visited = HashSet::new();
388        let mut bubble_pairs = Vec::new();
389
390        for start_node in self.node_indices.values() {
391            if !visited.contains(start_node) {
392                self.find_bubbles(*start_node, &mut bubble_pairs, &mut visited)
393                    .context("Failed to find bubbles")?;
394            }
395        }
396        Ok(bubble_pairs)
397    }
398
399    fn find_bubbles(
400        &self,
401        start: NodeIndex,
402        bubbles: &mut Vec<Vec<Vec<NodeIndex>>>,
403        visited: &mut HashSet<NodeIndex>,
404    ) -> Result<()> {
405        // Get all outgoing neighbors
406        let outgoing_edges = self._graph.edges(start).collect::<Vec<_>>();
407
408        // If this node has multiple outgoing edges, it might be the start of a bubble
409        if outgoing_edges.len() >= 2 {
410            // For each pair of outgoing edges, check if they lead to the same end node
411            for i in 0..outgoing_edges.len() {
412                let path1_start = outgoing_edges[i].target();
413
414                for path2_outgoing_edge in outgoing_edges.iter().skip(i + 1) {
415                    let path2_start = path2_outgoing_edge.target();
416                    // Find bubbles from these two starting points
417                    self.find_bubble_paths(start, path1_start, path2_start, bubbles);
418                }
419            }
420
421            // Check for direct edges and alternative paths that form bubbles
422            let direct_targets: HashSet<NodeIndex> =
423                outgoing_edges.iter().map(|e| e.target()).collect();
424
425            for target in &direct_targets {
426                // For each direct target, check if there are alternative paths to it
427                self.check_alternative_paths(start, *target, &direct_targets, bubbles);
428            }
429        }
430
431        // Mark current node as visited
432        visited.insert(start);
433
434        // Continue DFS for bubble detection
435        for edge in outgoing_edges {
436            let next_node = edge.target();
437            if !visited.contains(&next_node) {
438                self.find_bubbles(next_node, bubbles, visited)?;
439            }
440        }
441        Ok(())
442    }
443
444    // Helper method to find bubble paths between two starting nodes
445    fn find_bubble_paths(
446        &self,
447        source: NodeIndex,      // The common source node
448        path1_start: NodeIndex, // First path's start node
449        path2_start: NodeIndex, // Second path's start node
450        bubbles: &mut Vec<Vec<Vec<NodeIndex>>>,
451    ) {
452        // Track visited nodes and their paths for each branch
453        let mut path1_visited = HashMap::new();
454        let mut path2_visited = HashMap::new();
455
456        // Track nodes where both paths converge (potential bubble end points)
457        let mut convergence_points = HashSet::new();
458
459        // Initialize queues for BFS
460        let mut queue1 = VecDeque::new();
461        let mut queue2 = VecDeque::new();
462
463        queue1.push_back(path1_start);
464        path1_visited.insert(path1_start, vec![source, path1_start]);
465
466        queue2.push_back(path2_start);
467        path2_visited.insert(path2_start, vec![source, path2_start]);
468
469        // BFS to find all possible convergence points
470        let max_depth = 100; // Prevent infinite loops
471        let mut depth = 0;
472
473        while (!queue1.is_empty() || !queue2.is_empty()) && depth < max_depth {
474            depth += 1;
475
476            // Process one level of path1
477            self.process_path(
478                &mut queue1,
479                &mut path1_visited,
480                &path2_visited,
481                &mut convergence_points,
482            );
483
484            // Process one level of path2
485            self.process_path(
486                &mut queue2,
487                &mut path2_visited,
488                &path1_visited,
489                &mut convergence_points,
490            );
491
492            // If we found convergence points, create bubble pairs
493            if !convergence_points.is_empty() {
494                // For each convergence point, construct a bubble pair
495                for &end_point in &convergence_points {
496                    if let Some(path1) = path1_visited.get(&end_point) {
497                        if let Some(path2) = path2_visited.get(&end_point) {
498                            // We have two paths that start at source and end at end_point
499                            // This is a proper bubble with common start and end points
500
501                            // Create a bubble pair if both paths are valid and different
502                            if path1.len() >= 3
503                                && path2.len() >= 3
504                                && path1.first() == Some(&source)
505                                && path1.last() == Some(&end_point)
506                                && path2.first() == Some(&source)
507                                && path2.last() == Some(&end_point)
508                                && path1 != path2
509                            {
510                                // Create a bubble pair as a Vec of two paths
511                                let bubble_pair = vec![path1.clone(), path2.clone()];
512                                bubbles.push(bubble_pair);
513                            }
514                        }
515                    }
516                }
517
518                // We found bubbles at this level, so we're done
519                break;
520            }
521        }
522    }
523
524    // Helper to process one level of a path during bubble search
525    fn process_path(
526        &self,
527        queue: &mut VecDeque<NodeIndex>,
528        current_visited: &mut HashMap<NodeIndex, Vec<NodeIndex>>,
529        other_visited: &HashMap<NodeIndex, Vec<NodeIndex>>,
530        convergence_points: &mut HashSet<NodeIndex>,
531    ) {
532        if queue.is_empty() {
533            return;
534        }
535
536        let node = queue.pop_front().unwrap();
537        let current_path = current_visited.get(&node).unwrap().clone();
538
539        // Check if this node has been visited in the other path - convergence point
540        if other_visited.contains_key(&node) {
541            // Found a convergence point - this is a potential bubble end point
542            convergence_points.insert(node);
543            return;
544        }
545
546        // Continue BFS
547        for edge in self._graph.edges(node) {
548            let next = edge.target();
549            if let std::collections::hash_map::Entry::Vacant(e) = current_visited.entry(next) {
550                let mut new_path = current_path.clone();
551                new_path.push(next);
552                e.insert(new_path);
553                queue.push_back(next);
554            }
555        }
556    }
557
558    /// Check for alternative paths between a start node and a target node
559    /// A true bubble must have both a common start point and a common end point
560    fn check_alternative_paths(
561        &self,
562        start: NodeIndex,
563        target: NodeIndex,
564        direct_targets: &HashSet<NodeIndex>,
565        bubbles: &mut Vec<Vec<Vec<NodeIndex>>>,
566    ) {
567        // First, check if there is a direct path from start to target
568        let direct_path = vec![start, target];
569
570        // Next, find all alternative paths from start to target
571        let mut alternative_paths = Vec::new();
572
573        // BFS to find all paths from start to target
574        let mut queue = VecDeque::new();
575        let mut paths: HashMap<NodeIndex, Vec<Vec<NodeIndex>>> = HashMap::new();
576
577        // Initialize with all direct neighbors except the target
578        for edge in self._graph.edges(start) {
579            let next = edge.target();
580            if next != target {
581                queue.push_back(next);
582                paths.insert(next, vec![vec![start, next]]);
583            }
584        }
585
586        // Track visited nodes to avoid cycles
587        let mut visited = HashSet::new();
588        visited.insert(start);
589
590        // BFS with path tracking
591        while let Some(node) = queue.pop_front() {
592            if visited.contains(&node) {
593                continue;
594            }
595
596            visited.insert(node);
597            let current_paths = paths.get(&node).unwrap().clone();
598
599            for edge in self._graph.edges(node) {
600                let next = edge.target();
601
602                // If we reached our target, we found an alternative path
603                if next == target {
604                    for path in &current_paths {
605                        let mut bubble_path = path.clone();
606                        bubble_path.push(target);
607
608                        // Only add as an alternative path if it's valid:
609                        // 1. Path must start at the start node
610                        // 2. Path must end at the target node
611                        // 3. Path must be at least 3 nodes long (start->middle->target)
612                        if bubble_path.len() >= 3
613                            && bubble_path.first() == Some(&start)
614                            && bubble_path.last() == Some(&target)
615                        {
616                            alternative_paths.push(bubble_path);
617                        }
618                    }
619                    continue;
620                }
621
622                // Skip if we've seen this node already to avoid cycles
623                if visited.contains(&next) || direct_targets.contains(&next) {
624                    continue;
625                }
626
627                // Create new paths by extending current paths
628                let mut new_paths = Vec::new();
629                for path in &current_paths {
630                    let mut new_path = path.clone();
631                    new_path.push(next);
632                    new_paths.push(new_path);
633                }
634
635                // Update or insert paths for this node
636                paths
637                    .entry(next)
638                    .and_modify(|e| e.extend(new_paths.clone()))
639                    .or_insert(new_paths.clone());
640
641                // Add to queue for further exploration
642                queue.push_back(next);
643            }
644        }
645
646        // If there's a direct path from start to target AND at least one alternative path,
647        // create bubble pairs
648        if !alternative_paths.is_empty() {
649            // For each alternative path, create a bubble pair with the direct path
650            for alt_path in alternative_paths {
651                // Create a bubble pair as a Vec of two paths
652                let bubble_pair = vec![direct_path.clone(), alt_path];
653                bubbles.push(bubble_pair);
654            }
655        }
656    }
657}
658
659pub trait TSGraphAnalysis {
660    fn summarize(&self) -> Result<BString>;
661}
662
663impl TSGraphAnalysis for TSGraph {
664    fn summarize(&self) -> Result<BString> {
665        // Pre-calculate capacity based on expected size
666        let graph_count = self.graphs.len();
667        // Estimate 30 bytes per graph entry (adjust as needed based on actual data sizes)
668        let estimated_capacity = graph_count * 30;
669
670        // Pre-allocate with capacity to avoid reallocations
671        let mut summary = Vec::with_capacity(estimated_capacity);
672        let headers = [
673            "gid",
674            "nodes",
675            "edges",
676            "paths",
677            "max_path_len",
678            "super_path",
679            "topology",
680        ];
681
682        let delimiter = ",";
683        let header_str = headers.join(delimiter) + "\n";
684        summary.extend_from_slice(header_str.as_bytes());
685
686        for (id, graph) in self.graphs.iter() {
687            let node_count = graph.nodes().len();
688            let edge_count = graph.edges().len();
689            let paths = graph.traverse()?;
690
691            let path_count = paths.len();
692            let max_path_len = paths.iter().map(|path| path.nodes.len()).max().unwrap_or(0);
693
694            let include_super_path = paths.iter().any(|path| {
695                path.is_super()
696                    .context("Failed to check super path")
697                    .unwrap()
698            });
699
700            let topo = graph.topo()?;
701
702            // Use write! to format directly into the buffer without intermediate allocations
703            use std::io::Write;
704            writeln!(
705                summary,
706                "{},{},{},{},{},{},{}",
707                id, node_count, edge_count, path_count, max_path_len, include_super_path, topo
708            )?;
709        }
710        // Convert to BString only once at the end
711        Ok(BString::from(summary))
712    }
713}
714
715#[cfg(test)]
716mod tests {
717    use super::*;
718    use crate::graph::TSGraph;
719    use std::str::FromStr;
720
721    #[test]
722    fn test_is_connected() {
723        // Create a connected graph
724        let tsg_string = r#"H	VN	1.0
725H	PN	TestGraph
726N	node1	chr1:+:100-200	read1:SO,read2:IN	ACGT
727N	node2	chr1:+:300-400	read1:SO,read3:IN
728N	node3	chr1:+:500-600	read1:SO,read4:IN
729E	edge1	node1	node2	chr1,chr1,1700,2000,INV
730E	edge2	node2	node3	chr1,chr1,1700,2000,DUP
731"#;
732
733        let tsgraph = TSGraph::from_str(tsg_string).unwrap();
734        let graph = tsgraph.default_graph().unwrap();
735        assert!(graph.is_connected().unwrap());
736
737        // Create a disconnected graph
738        let tsg_string = r#"H	VN	1.0
739H	PN	TestGraph
740N	node1	chr1:+:100-200	read1:SO,read2:IN	ACGT
741N	node2	chr1:+:300-400	read1:SO,read3:IN
742N	node3	chr1:+:500-600	read1:SO,read4:IN
743E	edge1	node1	node2	chr1,chr1,1700,2000,INV
744"#;
745
746        let tsgraph = TSGraph::from_str(tsg_string).unwrap();
747        let graph = tsgraph.default_graph().unwrap();
748        assert!(!graph.is_connected().unwrap());
749    }
750
751    #[test]
752    fn test_is_cyclic() {
753        // Create an acyclic graph
754        let tsg_string = r#"H	VN	1.0
755H	PN	TestGraph
756N	node1	chr1:+:100-200	read1:SO,read2:IN	ACGT
757N	node2	chr1:+:300-400	read1:SO,read3:IN
758N	node3	chr1:+:500-600	read1:SO,read4:IN
759E	edge1	node1	node2	chr1,chr1,1700,2000,INV
760E	edge2	node2	node3	chr1,chr1,1700,2000,DUP
761"#;
762        let tsgraph = TSGraph::from_str(tsg_string).unwrap();
763        let graph = tsgraph.default_graph().unwrap();
764        assert!(!graph.is_cyclic().unwrap());
765
766        // Create a cyclic graph
767        let tsg_string = r#"H	VN	1.0
768H	PN	TestGraph
769N	node1	chr1:+:100-200	read1:SO,read2:IN	ACGT
770N	node2	chr1:+:300-400	read1:SO,read3:IN
771N	node3	chr1:+:500-600	read1:SO,read4:IN
772E	edge1	node1	node2	chr1,chr1,1700,2000,INV
773E	edge2	node2	node3	chr1,chr1,1700,2000,DUP
774E	edge3	node3	node1	chr1,chr1,1700,2000,DUP
775"#;
776        let tsgraph = TSGraph::from_str(tsg_string).unwrap();
777        let graph = tsgraph.default_graph().unwrap();
778
779        assert!(graph.is_cyclic().unwrap());
780    }
781
782    #[test]
783    fn test_detect_bubbles() {
784        // Create a graph with a bubble
785        let tsg_string = r#"H	VN	1.0
786H	PN	TestGraph
787N	node1	chr1:+:100-200	read1:SO,read2:IN	ACGT
788N	node2	chr1:+:300-400	read1:SO,read3:IN
789N	node3	chr1:+:500-600	read1:SO,read4:IN
790N	node4	chr1:+:700-800	read1:SO,read5:IN
791E	edge1	node1	node2	chr1,chr1,1700,2000,INV
792E	edge2	node2	node3	chr1,chr1,1700,2000,DUP
793E	edge3	node2	node4	chr1,chr1,1700,2000,DUP
794E	edge4	node3	node4	chr1,chr1,1700,2000,INV
795E	edge5	node1	node3	chr1,chr1,1700,2000,INV
796"#;
797
798        let tsgraph = TSGraph::from_str(tsg_string).unwrap();
799        let graph = tsgraph.default_graph().unwrap();
800        let is_bubble = graph.is_bubble().unwrap();
801        assert!(is_bubble);
802
803        let bubbles = graph.collect_bubbles().unwrap();
804        println!("Bubbles: {:?}", bubbles);
805
806        // Verify the bubbles are detected as pairs of paths
807        assert!(!bubbles.is_empty(), "Should detect at least one bubble");
808
809        // Each bubble should be a pair of alternative paths
810        for bubble_pair in &bubbles {
811            assert_eq!(
812                bubble_pair.len(),
813                2,
814                "Each bubble should have exactly 2 paths"
815            );
816
817            // Both paths in a pair should have the same start and end nodes
818            let path1 = &bubble_pair[0];
819            let path2 = &bubble_pair[1];
820
821            assert_eq!(
822                path1.first(),
823                path2.first(),
824                "Paths should have the same start node"
825            );
826            assert_eq!(
827                path1.last(),
828                path2.last(),
829                "Paths should have the same end node"
830            );
831            assert!(path1 != path2, "The two paths should be different");
832        }
833
834        // No bubbles in a linear graph
835        let tsg_string = r#"H	VN	1.0
836H	PN	TestGraph
837N	node1	chr1:+:100-200	read1:SO,read2:IN	ACGT
838N	node2	chr1:+:300-400	read1:SO,read3:IN
839N	node3	chr1:+:500-600	read1:SO,read4:IN
840E	edge1	node1	node2	chr1,chr1,1700,2000,INV
841E	edge2	node2	node3	chr1,chr1,1700,2000,DUP
842"#;
843
844        let tsg_graph = TSGraph::from_str(tsg_string).unwrap();
845        let graph = tsg_graph.default_graph().unwrap();
846        let is_bubble = graph.is_bubble().unwrap();
847        assert!(!is_bubble, "Should not detect bubbles in a linear graph");
848    }
849
850    #[test]
851    fn test_summarize() {
852        let tsg_string = r#"H	VN	1.0
853    H	PN	TestGraph
854    G	g1
855    N	node1	chr1:+:100-200	read1:SO,read2:IN	ACGT
856    N	node2	chr1:+:300-400	read1:SO,read3:IN
857    N	node3	chr1:+:500-600	read1:SO,read4:IN
858    E	edge1	node1	node2	chr1,chr1,1700,2000,INV
859    E	edge2	node2	node3	chr1,chr1,1700,2000,DUP
860    G	g2
861    N	node4	chr2:+:100-200	read5:SO,read6:IN	ACGT
862    N	node5	chr2:+:300-400	read5:SO,read7:IN
863    E	edge3	node4	node5	chr2,chr2,1700,2000,INV
864    "#;
865
866        let tsgraph = TSGraph::from_str(tsg_string).unwrap();
867        let summary = tsgraph.summarize().unwrap();
868        let summary_str = summary.to_string();
869
870        assert!(summary_str.contains("edges"));
871        assert!(summary_str.contains("paths"));
872        assert!(summary_str.contains("max_path_len"));
873        assert!(summary_str.contains("g1"));
874        assert!(summary_str.contains("g2"));
875        assert!(summary_str.contains("edges"));
876        assert!(summary_str.contains("paths"));
877    }
878
879    #[test]
880    fn test_proper_bubble_detection() {
881        // Create a graph with the example from the prompt
882        let tsg_string = r#"H	VN	1.0
883H	PN	TestGraph
884N	node1	chr1:+:100-200	read1:SO,read2:IN	ACGT
885N	node2	chr1:+:300-400	read1:SO,read3:IN
886N	node3	chr1:+:500-600	read1:SO,read4:IN
887N	node4	chr1:+:700-800	read1:SO,read5:IN
888E	edge1	node1	node2	chr1,chr1,1700,2000,INV
889E	edge2	node2	node3	chr1,chr1,1700,2000,DUP
890E	edge3	node2	node4	chr1,chr1,1700,2000,DUP
891E	edge4	node3	node4	chr1,chr1,1700,2000,INV
892E	edge5	node1	node3	chr1,chr1,1700,2000,INV
893"#;
894
895        let tsgraph = TSGraph::from_str(tsg_string).unwrap();
896        let graph = tsgraph.default_graph().unwrap();
897
898        // Collect all bubbles in the graph
899        let bubbles = graph.collect_bubbles().unwrap();
900        // Extract node names for test validation
901        let node_names: HashMap<_, _> = graph.node_indices_to_ids();
902
903        // Print the bubbles with node names for debugging
904        println!("Found {} bubbles:", bubbles.len());
905        for (i, bubble) in bubbles.iter().enumerate() {
906            let path1 = bubble[0]
907                .iter()
908                .map(|&idx| node_names.get(&idx).unwrap().to_string())
909                .collect::<Vec<_>>()
910                .join(" -> ");
911
912            let path2 = bubble[1]
913                .iter()
914                .map(|&idx| node_names.get(&idx).unwrap().to_string())
915                .collect::<Vec<_>>()
916                .join(" -> ");
917            println!("Bubble {}: {} and {}", i + 1, path1, path2);
918        }
919
920        // Check that we only detect proper bubbles (paths with both common start and end points)
921        // We should NOT detect "node1 -> node2 -> node3" or "node2 -> node3 -> node4" as bubbles
922        // because they don't have common end points
923
924        // True bubbles should include:
925        // 1. node1 -> node2 -> node4 and node1 -> node3 -> node4 (common start node1, common end node4)
926        // 2. node1 -> node2 -> node3 and node1 -> node3 (common start node1, common end node3)
927        // 3. node2 -> node3 -> node4 and node2 -> node4 (common start node2, common end node4)
928
929        assert_eq!(bubbles.len(), 3, "Should detect exactly 3 bubbles");
930    }
931}
tsg_core/graph/analysis/graph.rs

tsg_core/graph/analysis/
graph.rs