tsg_core/graph/analysis/graph.rs
1use crate::graph::{GraphSection, PathAnalysis, TSGraph};
2use ahash::{HashMap, HashMapExt, HashSet, HashSetExt};
3use anyhow::{Context, Ok, Result};
4use bstr::BString;
5use derive_more::Display;
6use petgraph::graph::NodeIndex;
7use petgraph::visit::EdgeRef;
8use std::collections::VecDeque;
9
10/// Enumeration representing different graph topologies.
11/// The topology can be used to classify the structure of the graph.
12/// One graph only include one topology.
13#[derive(Debug, Clone, PartialEq, Eq, Display)]
14pub enum GraphTopology {
15 /// The graph is a fade-in structure.
16 FadeIn,
17 /// The graph is a fade-out structure.
18 FadeOut,
19 /// The graph is bipartite.
20 Bipartite,
21
22 /// The graph is a unique path.
23 UniquePath,
24 /// The graph is an equi-path.
25 EquiPath,
26 /// The graph is a hetero-path.
27 HeteroPath,
28
29 /// The category is not defined.
30 NotDefined,
31}
32
33pub trait GraphAnalysis {
34 fn topo(&self) -> Result<GraphTopology>;
35
36 /// Determines whether the graph is connected.
37 ///
38 /// A graph is connected if there is a path between every pair of vertices.
39 /// For directed graphs, this considers connections in both directions.
40 ///
41 /// # Returns
42 ///
43 /// * `Ok(true)` - If the graph is connected or empty
44 /// * `Ok(false)` - If the graph has disconnected components
45 /// * `Err` - If an error occurs during the analysis
46 fn is_connected(&self) -> Result<bool>;
47
48 /// Determines whether the graph contains any cycles.
49 ///
50 /// A cycle is a path that starts and ends at the same node.
51 /// This method performs a depth-first search to detect cycles.
52 ///
53 /// # Returns
54 ///
55 /// * `Ok(true)` - If the graph contains at least one cycle
56 /// * `Ok(false)` - If the graph is acyclic (contains no cycles)
57 /// * `Err` - If an error occurs during the analysis
58 fn is_cyclic(&self) -> Result<bool>;
59
60 /// Determines whether the graph contains any bubbles.
61 ///
62 /// A bubble is a subgraph that starts at a single source node, branches into multiple paths,
63 /// and then reconverges at a single sink node.
64 ///
65 /// # Returns
66 ///
67 /// * `Ok(true)` - If the graph contains at least one bubble
68 /// * `Ok(false)` - If the graph does not contain any bubbles
69 /// * `Err` - If an error occurs during the analysis
70 fn is_bubble(&self) -> Result<bool>;
71
72 /// Determines whether the graph is a directed acyclic graph (DAG).
73 ///
74 /// A graph is a DAG if it is both connected and does not contain cycles.
75 ///
76 /// # Returns
77 ///
78 /// * `Ok(true)` - If the graph is a DAG
79 /// * `Ok(false)` - If the graph is not a DAG
80 /// * `Err` - If an error occurs during the analysis
81 fn is_directed_acyclic_graph(&self) -> Result<bool> {
82 Ok(self.is_connected()? && !self.is_cyclic()?)
83 }
84
85 /// Determines whether the graph is simple.
86 ///
87 /// A graph is considered simple if the maximum path length is 1.
88 ///
89 /// # Returns
90 ///
91 /// * `Ok(true)` - If the graph is simple
92 /// * `Ok(false)` - If the graph is not simple
93 /// * `Err` - If an error occurs during the analysis
94 fn is_simple(&self) -> Result<bool>;
95
96 /// Determines whether the directed graph is a fade-in structure.
97 ///
98 /// A graph is considered a fade-in if it is simple and has multiple source nodes
99 /// but only one sink node.
100 ///
101 /// # Returns
102 ///
103 /// * `Ok(true)` - If the graph is a fade-in structure
104 /// * `Ok(false)` - If the graph is not a fade-in structure
105 /// * `Err` - If an error occurs during the analysis
106 fn is_fade_in(&self) -> Result<bool> {
107 Ok(matches!(self.topo()?, GraphTopology::FadeIn))
108 }
109
110 /// Determines whether the directed graph is a fade-out structure.
111 ///
112 /// A graph is considered a fade-out if it is simple and has only one source node
113 /// but multiple sink nodes.
114 ///
115 /// # Returns
116 ///
117 /// * `Ok(true)` - If the graph is a fade-out structure
118 /// * `Ok(false)` - If the graph is not a fade-out structure
119 /// * `Err` - If an error occurs during the analysis
120 fn is_fade_out(&self) -> Result<bool> {
121 Ok(matches!(self.topo()?, GraphTopology::FadeOut))
122 }
123
124 /// Determines whether the graph is bipartite.
125 ///
126 /// A bipartite graph is a simple graph with multiple source nodes and multiple sink nodes,
127 /// where the nodes can be divided into two disjoint sets with edges only between nodes of
128 /// different sets.
129 ///
130 /// # Returns
131 ///
132 /// * `Ok(true)` - If the graph is bipartite
133 /// * `Ok(false)` - If the graph is not bipartite
134 /// * `Err` - If an error occurs during the analysis
135 fn is_bipartite(&self) -> Result<bool> {
136 Ok(matches!(self.topo()?, GraphTopology::Bipartite))
137 }
138
139 /// Determines whether the graph contains a unique path.
140 ///
141 /// A graph has a unique path if it is not simple and there is only one path
142 /// after traversing the graph.
143 ///
144 /// # Returns
145 ///
146 /// * `Ok(true)` - If the graph contains a unique path
147 /// * `Ok(false)` - If the graph does not contain a unique path
148 /// * `Err` - If an error occurs during the analysis
149 fn is_unique_path(&self) -> Result<bool> {
150 Ok(matches!(self.topo()?, GraphTopology::UniquePath))
151 }
152
153 /// Determines whether the graph contains equi-paths.
154 ///
155 /// A graph has equi-paths if it is not simple, contains bubbles (alternative paths),
156 /// and all alternative paths have the same length.
157 ///
158 /// # Returns
159 ///
160 /// * `Ok(true)` - If the graph contains equi-paths
161 /// * `Ok(false)` - If the graph does not contain equi-paths
162 /// * `Err` - If an error occurs during the analysis
163 fn is_equi_path(&self) -> Result<bool> {
164 Ok(matches!(self.topo()?, GraphTopology::EquiPath))
165 }
166
167 /// Determines whether the graph contains hetero-paths.
168 ///
169 /// A graph has hetero-paths if it is not simple, contains bubbles (alternative paths),
170 /// and the alternative paths have different lengths.
171 ///
172 /// # Returns
173 ///
174 /// * `Ok(true)` - If the graph contains hetero-paths
175 /// * `Ok(false)` - If the graph does not contain hetero-paths
176 /// * `Err` - If an error occurs during the analysis
177 fn is_hetero_path(&self) -> Result<bool> {
178 Ok(matches!(self.topo()?, GraphTopology::HeteroPath))
179 }
180
181 /// Helper method to check if the graph matches a specific topology.
182 ///
183 /// This internal method can be used to avoid redundant calls to `topo()`
184 /// when checking for multiple topology types.
185 ///
186 /// # Parameters
187 ///
188 /// * `topology` - The GraphTopology variant to check against
189 ///
190 /// # Returns
191 ///
192 /// * `Ok(true)` - If the graph matches the specified topology
193 /// * `Ok(false)` - If the graph does not match the specified topology
194 /// * `Err` - If an error occurs during the analysis
195 fn matches_topology(&self, topology: GraphTopology) -> Result<bool> {
196 Ok(self.topo()? == topology)
197 }
198}
199
200impl GraphAnalysis for GraphSection {
201 fn is_connected(&self) -> Result<bool> {
202 if self.nodes().is_empty() {
203 return Ok(true); // Empty graph is trivially connected
204 }
205
206 // Start DFS from the first node
207 let start_node = self.node_indices.values().next().unwrap();
208 let mut visited = HashSet::new();
209
210 // Perform DFS to find all reachable nodes
211 self.dfs(*start_node, &mut visited);
212
213 // The graph is connected if all nodes are visited
214 Ok(visited.len() == self.node_indices.len())
215 }
216
217 fn is_cyclic(&self) -> Result<bool> {
218 // Track both visited nodes and nodes in the current recursion stack
219 let mut visited = HashSet::new();
220 let mut rec_stack = HashSet::new();
221
222 // Check from each node (to handle disconnected components)
223 for start_node in self.node_indices.values() {
224 if self.is_cyclic_util(*start_node, &mut visited, &mut rec_stack) {
225 return Ok(true);
226 }
227 }
228
229 Ok(false) // Updated to return Result<bool>
230 }
231
232 fn is_bubble(&self) -> Result<bool> {
233 let bubbles = self.collect_bubbles()?;
234 Ok(!bubbles.is_empty())
235 }
236
237 fn is_simple(&self) -> Result<bool> {
238 // A graph is simple if the maximum path length is 1
239 let paths = self.traverse()?;
240 let max_path_len = paths.iter().map(|path| path.len()).max().unwrap_or(0);
241 Ok(max_path_len == 2)
242 }
243
244 fn topo(&self) -> Result<GraphTopology> {
245 // Check if the graph is simple first since we need this for classification
246 let is_simple = self.is_simple()?;
247
248 // Count sources and sinks only once
249 let sources = self
250 ._graph
251 .node_indices()
252 .filter(|&n| {
253 self._graph
254 .edges_directed(n, petgraph::Direction::Incoming)
255 .count()
256 == 0
257 })
258 .count();
259
260 let sinks = self
261 ._graph
262 .node_indices()
263 .filter(|&n| self._graph.edges(n).count() == 0)
264 .count();
265
266 if is_simple {
267 // Simple graph classification
268 match (sources, sinks) {
269 (s, 1) if s > 1 => Ok(GraphTopology::FadeIn),
270 (1, s) if s > 1 => Ok(GraphTopology::FadeOut),
271 (s, t) if s > 1 && t > 1 => Ok(GraphTopology::Bipartite),
272 _ => Ok(GraphTopology::NotDefined),
273 }
274 } else {
275 // Handle non-simple graph
276 if sources == 1 && sinks == 1 {
277 return Ok(GraphTopology::UniquePath);
278 }
279
280 // Only collect bubbles if needed
281 let bubbles = self.collect_bubbles()?;
282
283 if bubbles.is_empty() {
284 return Ok(GraphTopology::NotDefined);
285 }
286
287 // Check if any bubble has paths of different lengths
288 for bubble in &bubbles {
289 if bubble[0].len() != bubble[1].len() {
290 return Ok(GraphTopology::HeteroPath);
291 }
292 }
293
294 Ok(GraphTopology::EquiPath)
295 }
296 }
297}
298
299impl GraphSection {
300 /// Performs a depth-first search (DFS) traversal of the graph.
301 ///
302 /// This method visits nodes in the graph in a depth-first manner, marking each visited node.
303 /// It considers both outgoing and incoming edges to ensure connectivity in both directions,
304 /// which is necessary for undirected connectivity analysis.
305 ///
306 /// # Parameters
307 ///
308 /// * `node` - The current node being visited in the traversal
309 /// * `visited` - A mutable HashSet tracking which nodes have been visited to avoid cycles
310 ///
311 /// # Note
312 ///
313 /// The method modifies the `visited` set in-place, adding each node encountered during traversal.
314 /// This is primarily used by the `is_connected` method to determine graph connectivity.
315 fn dfs(&self, node: NodeIndex, visited: &mut HashSet<NodeIndex>) {
316 // If already visited, return
317 if visited.contains(&node) {
318 return;
319 }
320
321 // Mark as visited
322 visited.insert(node);
323
324 // Visit all neighbors through outgoing edges
325 for edge in self._graph.edges(node) {
326 self.dfs(edge.target(), visited);
327 }
328
329 // Visit all neighbors through incoming edges
330 // This is necessary for undirected connectivity
331 for edge in self
332 ._graph
333 .edges_directed(node, petgraph::Direction::Incoming)
334 {
335 self.dfs(edge.source(), visited);
336 }
337 }
338
339 /// Helper method for cycle detection in a graph.
340 ///
341 /// This method implements a depth-first search that tracks both visited nodes
342 /// and nodes currently in the recursion stack to detect cycles.
343 ///
344 /// # Parameters
345 ///
346 /// * `node` - The current node being visited in the traversal
347 /// * `visited` - A mutable HashSet tracking all nodes that have been visited
348 /// * `rec_stack` - A mutable HashSet tracking nodes in the current recursion path
349 ///
350 /// # Returns
351 ///
352 /// `true` if a cycle is detected, `false` otherwise
353 fn is_cyclic_util(
354 &self,
355 node: NodeIndex,
356 visited: &mut HashSet<NodeIndex>,
357 rec_stack: &mut HashSet<NodeIndex>,
358 ) -> bool {
359 // If node is not visited yet, mark it visited and add to recursion stack
360 if !visited.contains(&node) {
361 visited.insert(node);
362 rec_stack.insert(node);
363
364 // Visit all neighbors through outgoing edges
365 for edge in self._graph.edges(node) {
366 let neighbor = edge.target();
367
368 // If neighbor is not visited, check if cycles exist in DFS
369 if !visited.contains(&neighbor) {
370 if self.is_cyclic_util(neighbor, visited, rec_stack) {
371 return true;
372 }
373 }
374 // If neighbor is in recursion stack, we found a cycle
375 else if rec_stack.contains(&neighbor) {
376 return true;
377 }
378 }
379 }
380
381 // Remove node from recursion stack when we're done exploring it
382 rec_stack.remove(&node);
383 false
384 }
385
386 fn collect_bubbles(&self) -> Result<Vec<Vec<Vec<NodeIndex>>>> {
387 let mut visited = HashSet::new();
388 let mut bubble_pairs = Vec::new();
389
390 for start_node in self.node_indices.values() {
391 if !visited.contains(start_node) {
392 self.find_bubbles(*start_node, &mut bubble_pairs, &mut visited)
393 .context("Failed to find bubbles")?;
394 }
395 }
396 Ok(bubble_pairs)
397 }
398
399 fn find_bubbles(
400 &self,
401 start: NodeIndex,
402 bubbles: &mut Vec<Vec<Vec<NodeIndex>>>,
403 visited: &mut HashSet<NodeIndex>,
404 ) -> Result<()> {
405 // Get all outgoing neighbors
406 let outgoing_edges = self._graph.edges(start).collect::<Vec<_>>();
407
408 // If this node has multiple outgoing edges, it might be the start of a bubble
409 if outgoing_edges.len() >= 2 {
410 // For each pair of outgoing edges, check if they lead to the same end node
411 for i in 0..outgoing_edges.len() {
412 let path1_start = outgoing_edges[i].target();
413
414 for path2_outgoing_edge in outgoing_edges.iter().skip(i + 1) {
415 let path2_start = path2_outgoing_edge.target();
416 // Find bubbles from these two starting points
417 self.find_bubble_paths(start, path1_start, path2_start, bubbles);
418 }
419 }
420
421 // Check for direct edges and alternative paths that form bubbles
422 let direct_targets: HashSet<NodeIndex> =
423 outgoing_edges.iter().map(|e| e.target()).collect();
424
425 for target in &direct_targets {
426 // For each direct target, check if there are alternative paths to it
427 self.check_alternative_paths(start, *target, &direct_targets, bubbles);
428 }
429 }
430
431 // Mark current node as visited
432 visited.insert(start);
433
434 // Continue DFS for bubble detection
435 for edge in outgoing_edges {
436 let next_node = edge.target();
437 if !visited.contains(&next_node) {
438 self.find_bubbles(next_node, bubbles, visited)?;
439 }
440 }
441 Ok(())
442 }
443
444 // Helper method to find bubble paths between two starting nodes
445 fn find_bubble_paths(
446 &self,
447 source: NodeIndex, // The common source node
448 path1_start: NodeIndex, // First path's start node
449 path2_start: NodeIndex, // Second path's start node
450 bubbles: &mut Vec<Vec<Vec<NodeIndex>>>,
451 ) {
452 // Track visited nodes and their paths for each branch
453 let mut path1_visited = HashMap::new();
454 let mut path2_visited = HashMap::new();
455
456 // Track nodes where both paths converge (potential bubble end points)
457 let mut convergence_points = HashSet::new();
458
459 // Initialize queues for BFS
460 let mut queue1 = VecDeque::new();
461 let mut queue2 = VecDeque::new();
462
463 queue1.push_back(path1_start);
464 path1_visited.insert(path1_start, vec![source, path1_start]);
465
466 queue2.push_back(path2_start);
467 path2_visited.insert(path2_start, vec![source, path2_start]);
468
469 // BFS to find all possible convergence points
470 let max_depth = 100; // Prevent infinite loops
471 let mut depth = 0;
472
473 while (!queue1.is_empty() || !queue2.is_empty()) && depth < max_depth {
474 depth += 1;
475
476 // Process one level of path1
477 self.process_path(
478 &mut queue1,
479 &mut path1_visited,
480 &path2_visited,
481 &mut convergence_points,
482 );
483
484 // Process one level of path2
485 self.process_path(
486 &mut queue2,
487 &mut path2_visited,
488 &path1_visited,
489 &mut convergence_points,
490 );
491
492 // If we found convergence points, create bubble pairs
493 if !convergence_points.is_empty() {
494 // For each convergence point, construct a bubble pair
495 for &end_point in &convergence_points {
496 if let Some(path1) = path1_visited.get(&end_point) {
497 if let Some(path2) = path2_visited.get(&end_point) {
498 // We have two paths that start at source and end at end_point
499 // This is a proper bubble with common start and end points
500
501 // Create a bubble pair if both paths are valid and different
502 if path1.len() >= 3
503 && path2.len() >= 3
504 && path1.first() == Some(&source)
505 && path1.last() == Some(&end_point)
506 && path2.first() == Some(&source)
507 && path2.last() == Some(&end_point)
508 && path1 != path2
509 {
510 // Create a bubble pair as a Vec of two paths
511 let bubble_pair = vec![path1.clone(), path2.clone()];
512 bubbles.push(bubble_pair);
513 }
514 }
515 }
516 }
517
518 // We found bubbles at this level, so we're done
519 break;
520 }
521 }
522 }
523
524 // Helper to process one level of a path during bubble search
525 fn process_path(
526 &self,
527 queue: &mut VecDeque<NodeIndex>,
528 current_visited: &mut HashMap<NodeIndex, Vec<NodeIndex>>,
529 other_visited: &HashMap<NodeIndex, Vec<NodeIndex>>,
530 convergence_points: &mut HashSet<NodeIndex>,
531 ) {
532 if queue.is_empty() {
533 return;
534 }
535
536 let node = queue.pop_front().unwrap();
537 let current_path = current_visited.get(&node).unwrap().clone();
538
539 // Check if this node has been visited in the other path - convergence point
540 if other_visited.contains_key(&node) {
541 // Found a convergence point - this is a potential bubble end point
542 convergence_points.insert(node);
543 return;
544 }
545
546 // Continue BFS
547 for edge in self._graph.edges(node) {
548 let next = edge.target();
549 if let std::collections::hash_map::Entry::Vacant(e) = current_visited.entry(next) {
550 let mut new_path = current_path.clone();
551 new_path.push(next);
552 e.insert(new_path);
553 queue.push_back(next);
554 }
555 }
556 }
557
558 /// Check for alternative paths between a start node and a target node
559 /// A true bubble must have both a common start point and a common end point
560 fn check_alternative_paths(
561 &self,
562 start: NodeIndex,
563 target: NodeIndex,
564 direct_targets: &HashSet<NodeIndex>,
565 bubbles: &mut Vec<Vec<Vec<NodeIndex>>>,
566 ) {
567 // First, check if there is a direct path from start to target
568 let direct_path = vec![start, target];
569
570 // Next, find all alternative paths from start to target
571 let mut alternative_paths = Vec::new();
572
573 // BFS to find all paths from start to target
574 let mut queue = VecDeque::new();
575 let mut paths: HashMap<NodeIndex, Vec<Vec<NodeIndex>>> = HashMap::new();
576
577 // Initialize with all direct neighbors except the target
578 for edge in self._graph.edges(start) {
579 let next = edge.target();
580 if next != target {
581 queue.push_back(next);
582 paths.insert(next, vec![vec![start, next]]);
583 }
584 }
585
586 // Track visited nodes to avoid cycles
587 let mut visited = HashSet::new();
588 visited.insert(start);
589
590 // BFS with path tracking
591 while let Some(node) = queue.pop_front() {
592 if visited.contains(&node) {
593 continue;
594 }
595
596 visited.insert(node);
597 let current_paths = paths.get(&node).unwrap().clone();
598
599 for edge in self._graph.edges(node) {
600 let next = edge.target();
601
602 // If we reached our target, we found an alternative path
603 if next == target {
604 for path in ¤t_paths {
605 let mut bubble_path = path.clone();
606 bubble_path.push(target);
607
608 // Only add as an alternative path if it's valid:
609 // 1. Path must start at the start node
610 // 2. Path must end at the target node
611 // 3. Path must be at least 3 nodes long (start->middle->target)
612 if bubble_path.len() >= 3
613 && bubble_path.first() == Some(&start)
614 && bubble_path.last() == Some(&target)
615 {
616 alternative_paths.push(bubble_path);
617 }
618 }
619 continue;
620 }
621
622 // Skip if we've seen this node already to avoid cycles
623 if visited.contains(&next) || direct_targets.contains(&next) {
624 continue;
625 }
626
627 // Create new paths by extending current paths
628 let mut new_paths = Vec::new();
629 for path in ¤t_paths {
630 let mut new_path = path.clone();
631 new_path.push(next);
632 new_paths.push(new_path);
633 }
634
635 // Update or insert paths for this node
636 paths
637 .entry(next)
638 .and_modify(|e| e.extend(new_paths.clone()))
639 .or_insert(new_paths.clone());
640
641 // Add to queue for further exploration
642 queue.push_back(next);
643 }
644 }
645
646 // If there's a direct path from start to target AND at least one alternative path,
647 // create bubble pairs
648 if !alternative_paths.is_empty() {
649 // For each alternative path, create a bubble pair with the direct path
650 for alt_path in alternative_paths {
651 // Create a bubble pair as a Vec of two paths
652 let bubble_pair = vec![direct_path.clone(), alt_path];
653 bubbles.push(bubble_pair);
654 }
655 }
656 }
657}
658
659pub trait TSGraphAnalysis {
660 fn summarize(&self) -> Result<BString>;
661}
662
663impl TSGraphAnalysis for TSGraph {
664 fn summarize(&self) -> Result<BString> {
665 // Pre-calculate capacity based on expected size
666 let graph_count = self.graphs.len();
667 // Estimate 30 bytes per graph entry (adjust as needed based on actual data sizes)
668 let estimated_capacity = graph_count * 30;
669
670 // Pre-allocate with capacity to avoid reallocations
671 let mut summary = Vec::with_capacity(estimated_capacity);
672 let headers = [
673 "gid",
674 "nodes",
675 "edges",
676 "paths",
677 "max_path_len",
678 "super_path",
679 "topology",
680 ];
681
682 let delimiter = ",";
683 let header_str = headers.join(delimiter) + "\n";
684 summary.extend_from_slice(header_str.as_bytes());
685
686 for (id, graph) in self.graphs.iter() {
687 let node_count = graph.nodes().len();
688 let edge_count = graph.edges().len();
689 let paths = graph.traverse()?;
690
691 let path_count = paths.len();
692 let max_path_len = paths.iter().map(|path| path.nodes.len()).max().unwrap_or(0);
693
694 let include_super_path = paths.iter().any(|path| {
695 path.is_super()
696 .context("Failed to check super path")
697 .unwrap()
698 });
699
700 let topo = graph.topo()?;
701
702 // Use write! to format directly into the buffer without intermediate allocations
703 use std::io::Write;
704 writeln!(
705 summary,
706 "{},{},{},{},{},{},{}",
707 id, node_count, edge_count, path_count, max_path_len, include_super_path, topo
708 )?;
709 }
710 // Convert to BString only once at the end
711 Ok(BString::from(summary))
712 }
713}
714
715#[cfg(test)]
716mod tests {
717 use super::*;
718 use crate::graph::TSGraph;
719 use std::str::FromStr;
720
721 #[test]
722 fn test_is_connected() {
723 // Create a connected graph
724 let tsg_string = r#"H VN 1.0
725H PN TestGraph
726N node1 chr1:+:100-200 read1:SO,read2:IN ACGT
727N node2 chr1:+:300-400 read1:SO,read3:IN
728N node3 chr1:+:500-600 read1:SO,read4:IN
729E edge1 node1 node2 chr1,chr1,1700,2000,INV
730E edge2 node2 node3 chr1,chr1,1700,2000,DUP
731"#;
732
733 let tsgraph = TSGraph::from_str(tsg_string).unwrap();
734 let graph = tsgraph.default_graph().unwrap();
735 assert!(graph.is_connected().unwrap());
736
737 // Create a disconnected graph
738 let tsg_string = r#"H VN 1.0
739H PN TestGraph
740N node1 chr1:+:100-200 read1:SO,read2:IN ACGT
741N node2 chr1:+:300-400 read1:SO,read3:IN
742N node3 chr1:+:500-600 read1:SO,read4:IN
743E edge1 node1 node2 chr1,chr1,1700,2000,INV
744"#;
745
746 let tsgraph = TSGraph::from_str(tsg_string).unwrap();
747 let graph = tsgraph.default_graph().unwrap();
748 assert!(!graph.is_connected().unwrap());
749 }
750
751 #[test]
752 fn test_is_cyclic() {
753 // Create an acyclic graph
754 let tsg_string = r#"H VN 1.0
755H PN TestGraph
756N node1 chr1:+:100-200 read1:SO,read2:IN ACGT
757N node2 chr1:+:300-400 read1:SO,read3:IN
758N node3 chr1:+:500-600 read1:SO,read4:IN
759E edge1 node1 node2 chr1,chr1,1700,2000,INV
760E edge2 node2 node3 chr1,chr1,1700,2000,DUP
761"#;
762 let tsgraph = TSGraph::from_str(tsg_string).unwrap();
763 let graph = tsgraph.default_graph().unwrap();
764 assert!(!graph.is_cyclic().unwrap());
765
766 // Create a cyclic graph
767 let tsg_string = r#"H VN 1.0
768H PN TestGraph
769N node1 chr1:+:100-200 read1:SO,read2:IN ACGT
770N node2 chr1:+:300-400 read1:SO,read3:IN
771N node3 chr1:+:500-600 read1:SO,read4:IN
772E edge1 node1 node2 chr1,chr1,1700,2000,INV
773E edge2 node2 node3 chr1,chr1,1700,2000,DUP
774E edge3 node3 node1 chr1,chr1,1700,2000,DUP
775"#;
776 let tsgraph = TSGraph::from_str(tsg_string).unwrap();
777 let graph = tsgraph.default_graph().unwrap();
778
779 assert!(graph.is_cyclic().unwrap());
780 }
781
782 #[test]
783 fn test_detect_bubbles() {
784 // Create a graph with a bubble
785 let tsg_string = r#"H VN 1.0
786H PN TestGraph
787N node1 chr1:+:100-200 read1:SO,read2:IN ACGT
788N node2 chr1:+:300-400 read1:SO,read3:IN
789N node3 chr1:+:500-600 read1:SO,read4:IN
790N node4 chr1:+:700-800 read1:SO,read5:IN
791E edge1 node1 node2 chr1,chr1,1700,2000,INV
792E edge2 node2 node3 chr1,chr1,1700,2000,DUP
793E edge3 node2 node4 chr1,chr1,1700,2000,DUP
794E edge4 node3 node4 chr1,chr1,1700,2000,INV
795E edge5 node1 node3 chr1,chr1,1700,2000,INV
796"#;
797
798 let tsgraph = TSGraph::from_str(tsg_string).unwrap();
799 let graph = tsgraph.default_graph().unwrap();
800 let is_bubble = graph.is_bubble().unwrap();
801 assert!(is_bubble);
802
803 let bubbles = graph.collect_bubbles().unwrap();
804 println!("Bubbles: {:?}", bubbles);
805
806 // Verify the bubbles are detected as pairs of paths
807 assert!(!bubbles.is_empty(), "Should detect at least one bubble");
808
809 // Each bubble should be a pair of alternative paths
810 for bubble_pair in &bubbles {
811 assert_eq!(
812 bubble_pair.len(),
813 2,
814 "Each bubble should have exactly 2 paths"
815 );
816
817 // Both paths in a pair should have the same start and end nodes
818 let path1 = &bubble_pair[0];
819 let path2 = &bubble_pair[1];
820
821 assert_eq!(
822 path1.first(),
823 path2.first(),
824 "Paths should have the same start node"
825 );
826 assert_eq!(
827 path1.last(),
828 path2.last(),
829 "Paths should have the same end node"
830 );
831 assert!(path1 != path2, "The two paths should be different");
832 }
833
834 // No bubbles in a linear graph
835 let tsg_string = r#"H VN 1.0
836H PN TestGraph
837N node1 chr1:+:100-200 read1:SO,read2:IN ACGT
838N node2 chr1:+:300-400 read1:SO,read3:IN
839N node3 chr1:+:500-600 read1:SO,read4:IN
840E edge1 node1 node2 chr1,chr1,1700,2000,INV
841E edge2 node2 node3 chr1,chr1,1700,2000,DUP
842"#;
843
844 let tsg_graph = TSGraph::from_str(tsg_string).unwrap();
845 let graph = tsg_graph.default_graph().unwrap();
846 let is_bubble = graph.is_bubble().unwrap();
847 assert!(!is_bubble, "Should not detect bubbles in a linear graph");
848 }
849
850 #[test]
851 fn test_summarize() {
852 let tsg_string = r#"H VN 1.0
853 H PN TestGraph
854 G g1
855 N node1 chr1:+:100-200 read1:SO,read2:IN ACGT
856 N node2 chr1:+:300-400 read1:SO,read3:IN
857 N node3 chr1:+:500-600 read1:SO,read4:IN
858 E edge1 node1 node2 chr1,chr1,1700,2000,INV
859 E edge2 node2 node3 chr1,chr1,1700,2000,DUP
860 G g2
861 N node4 chr2:+:100-200 read5:SO,read6:IN ACGT
862 N node5 chr2:+:300-400 read5:SO,read7:IN
863 E edge3 node4 node5 chr2,chr2,1700,2000,INV
864 "#;
865
866 let tsgraph = TSGraph::from_str(tsg_string).unwrap();
867 let summary = tsgraph.summarize().unwrap();
868 let summary_str = summary.to_string();
869
870 assert!(summary_str.contains("edges"));
871 assert!(summary_str.contains("paths"));
872 assert!(summary_str.contains("max_path_len"));
873 assert!(summary_str.contains("g1"));
874 assert!(summary_str.contains("g2"));
875 assert!(summary_str.contains("edges"));
876 assert!(summary_str.contains("paths"));
877 }
878
879 #[test]
880 fn test_proper_bubble_detection() {
881 // Create a graph with the example from the prompt
882 let tsg_string = r#"H VN 1.0
883H PN TestGraph
884N node1 chr1:+:100-200 read1:SO,read2:IN ACGT
885N node2 chr1:+:300-400 read1:SO,read3:IN
886N node3 chr1:+:500-600 read1:SO,read4:IN
887N node4 chr1:+:700-800 read1:SO,read5:IN
888E edge1 node1 node2 chr1,chr1,1700,2000,INV
889E edge2 node2 node3 chr1,chr1,1700,2000,DUP
890E edge3 node2 node4 chr1,chr1,1700,2000,DUP
891E edge4 node3 node4 chr1,chr1,1700,2000,INV
892E edge5 node1 node3 chr1,chr1,1700,2000,INV
893"#;
894
895 let tsgraph = TSGraph::from_str(tsg_string).unwrap();
896 let graph = tsgraph.default_graph().unwrap();
897
898 // Collect all bubbles in the graph
899 let bubbles = graph.collect_bubbles().unwrap();
900 // Extract node names for test validation
901 let node_names: HashMap<_, _> = graph.node_indices_to_ids();
902
903 // Print the bubbles with node names for debugging
904 println!("Found {} bubbles:", bubbles.len());
905 for (i, bubble) in bubbles.iter().enumerate() {
906 let path1 = bubble[0]
907 .iter()
908 .map(|&idx| node_names.get(&idx).unwrap().to_string())
909 .collect::<Vec<_>>()
910 .join(" -> ");
911
912 let path2 = bubble[1]
913 .iter()
914 .map(|&idx| node_names.get(&idx).unwrap().to_string())
915 .collect::<Vec<_>>()
916 .join(" -> ");
917 println!("Bubble {}: {} and {}", i + 1, path1, path2);
918 }
919
920 // Check that we only detect proper bubbles (paths with both common start and end points)
921 // We should NOT detect "node1 -> node2 -> node3" or "node2 -> node3 -> node4" as bubbles
922 // because they don't have common end points
923
924 // True bubbles should include:
925 // 1. node1 -> node2 -> node4 and node1 -> node3 -> node4 (common start node1, common end node4)
926 // 2. node1 -> node2 -> node3 and node1 -> node3 (common start node1, common end node3)
927 // 3. node2 -> node3 -> node4 and node2 -> node4 (common start node2, common end node4)
928
929 assert_eq!(bubbles.len(), 3, "Should detect exactly 3 bubbles");
930 }
931}