tsg_core/
graph.rs

1mod analysis;
2mod attr;
3mod edge;
4mod group;
5mod header;
6mod node;
7mod path;
8mod utils;
9
10use noodles::fasta;
11use std::fs::File;
12use std::io::{BufRead, BufReader, BufWriter};
13use std::path::Path;
14use std::str::FromStr;
15use tracing::debug;
16use tracing::warn;
17
18use ahash::{HashMap, HashMapExt, HashSet, HashSetExt};
19use anyhow::{Context, Result, anyhow};
20use bstr::{BStr, BString, ByteSlice};
21
22pub use analysis::*;
23pub use attr::*;
24pub use edge::*;
25pub use group::*;
26pub use header::*;
27pub use node::*;
28pub use path::*;
29pub use utils::*;
30
31use bon::Builder;
32use petgraph::dot::{Config, Dot};
33use petgraph::graph::{DiGraph, EdgeIndex, NodeIndex};
34use petgraph::visit::EdgeRef;
35use rayon::prelude::*;
36use serde_json::json;
37use std::collections::VecDeque;
38
39pub const DEFAULT_GRAPH_ID: &str = "G.graph";
40/// Represents a graph section within the TSG file
41#[derive(Debug, Clone, Default, Builder)]
42pub struct GraphSection {
43    pub id: BString,
44    pub attributes: HashMap<BString, Attribute>,
45    _graph: DiGraph<NodeData, EdgeData>,
46    pub node_indices: HashMap<BString, NodeIndex>,
47    pub edge_indices: HashMap<BString, EdgeIndex>,
48    pub groups: HashMap<BString, Group>,
49    pub chains: HashMap<BString, Group>,
50}
51
52impl GraphSection {
53    /// Create a new empty GraphSection
54    pub fn new(id: BString) -> Self {
55        Self {
56            id,
57            ..Default::default()
58        }
59    }
60
61    pub fn new_default_graph() -> Self {
62        Self::new(DEFAULT_GRAPH_ID.into())
63    }
64
65    pub fn node_indices_to_ids(&self) -> HashMap<NodeIndex, BString> {
66        self.node_indices
67            .par_iter()
68            .map(|(id, &idx)| (idx, id.clone()))
69            .collect()
70    }
71
72    pub fn edge_indices_to_ids(&self) -> HashMap<EdgeIndex, BString> {
73        self.edge_indices
74            .par_iter()
75            .map(|(id, &idx)| (idx, id.clone()))
76            .collect()
77    }
78
79    pub fn node_weight(&self, node_idx: NodeIndex) -> Option<&NodeData> {
80        self._graph.node_weight(node_idx)
81    }
82
83    pub fn edge_weight(&self, edge_idx: EdgeIndex) -> Option<&EdgeData> {
84        self._graph.edge_weight(edge_idx)
85    }
86
87    pub fn in_degree(&self, node_idx: NodeIndex) -> usize {
88        self._graph
89            .edges_directed(node_idx, petgraph::Direction::Incoming)
90            .count()
91    }
92
93    pub fn out_degree(&self, node_idx: NodeIndex) -> usize {
94        self._graph
95            .edges_directed(node_idx, petgraph::Direction::Outgoing)
96            .count()
97    }
98
99    /// Add a node to the graph
100    pub fn add_node(&mut self, node_data: NodeData) -> Result<NodeIndex> {
101        let id = node_data.id.clone();
102
103        // Check if node already exists
104        if let Some(&idx) = self.node_indices.get(&id) {
105            // Update node data
106            if let Some(attr) = self._graph.node_weight_mut(idx) {
107                *attr = node_data;
108                return Ok(idx);
109            }
110            return Err(anyhow!("Node with ID {} not found in graph", id));
111        }
112
113        // Add new node
114        let node_idx = self._graph.add_node(node_data);
115
116        debug!(
117            "graph {} add node {} ;len of node_indices: {}",
118            self.id,
119            id,
120            self.node_indices.len() + 1
121        );
122
123        self.node_indices.insert(id, node_idx);
124        Ok(node_idx)
125    }
126
127    /// Add an edge to the graph
128    pub fn add_edge(
129        &mut self,
130        source_id: &BStr,
131        sink_id: &BStr,
132        edge_data: EdgeData,
133    ) -> Result<EdgeIndex> {
134        let id = edge_data.id.clone();
135
136        // Get source node index or create it if it doesn't exist
137        let source_idx = match self.node_indices.get(source_id) {
138            Some(&idx) => idx,
139            None => {
140                // Create a placeholder node if it doesn't exist
141                let placeholder_node = NodeData::builder().id(source_id.to_owned()).build();
142                self.add_node(placeholder_node)?
143            }
144        };
145
146        // Get sink node index or create it if it doesn't exist
147        let sink_idx = match self.node_indices.get(sink_id) {
148            Some(&idx) => idx,
149            None => {
150                // Create a placeholder node if it doesn't exist
151                let placeholder_node = NodeData::builder().id(sink_id.to_owned()).build();
152                self.add_node(placeholder_node)?
153            }
154        };
155
156        // petgraph provide update_edge method to update edge data
157        let edge_idx = self._graph.update_edge(source_idx, sink_idx, edge_data);
158
159        self.edge_indices.insert(id, edge_idx);
160        Ok(edge_idx)
161    }
162
163    // Methods from old TSGraph that should now belong to GraphSection
164
165    /// Build graph based on the current state
166    fn ensure_graph_is_built(&mut self) -> Result<()> {
167        // If we already have nodes and edges, assume the graph is properly constructed
168        if !self.node_indices.is_empty() && !self.edge_indices.is_empty() {
169            return Ok(());
170        }
171
172        // If nodes and edges are missing, build them from chains
173        if !self.chains.is_empty() {
174            for group in self.chains.clone().values() {
175                if let Group::Chain { elements, .. } = group {
176                    // Process each element in the chain
177                    for (i, element_id) in elements.iter().enumerate() {
178                        if i % 2 == 0 {
179                            // It's a node - add it if it doesn't exist
180                            if !self.node_indices.contains_key(element_id) {
181                                // Create placeholder node
182                                let node_data = NodeData {
183                                    id: element_id.clone(),
184                                    ..Default::default()
185                                };
186                                self.add_node(node_data)?;
187                            }
188                        } else if i + 1 < elements.len() {
189                            // Prevent index out of bounds
190                            // It's an edge - add it if it doesn't exist
191                            if !self.edge_indices.contains_key(element_id) {
192                                // Get connecting nodes
193                                let source_id = &elements[i - 1];
194                                let sink_id = &elements[i + 1];
195
196                                // Create placeholder edge
197                                let edge_data = EdgeData {
198                                    id: element_id.clone(),
199                                    ..Default::default()
200                                };
201
202                                self.add_edge(source_id.as_bstr(), sink_id.as_bstr(), edge_data)?;
203                            }
204                        }
205                    }
206                }
207            }
208            return Ok(());
209        }
210
211        if self.id == DEFAULT_GRAPH_ID {
212            // ignore default graph
213            return Ok(());
214        }
215
216        if self.node_indices.is_empty() || self.edge_indices.is_empty() {
217            warn!(
218                "Graph {} has no nodes/edges defined and no chains available",
219                self.id
220            );
221        }
222
223        Ok(())
224    }
225
226    // Additional GraphSection methods...
227    pub fn node_by_idx(&self, node_idx: NodeIndex) -> Option<&NodeData> {
228        self._graph.node_weight(node_idx)
229    }
230
231    pub fn node_by_id(&self, id: &str) -> Option<&NodeData> {
232        let node_idx = self.node_indices.get(&BString::from(id))?;
233        self._graph.node_weight(*node_idx)
234    }
235
236    pub fn edge_by_id(&self, id: &str) -> Option<&EdgeData> {
237        let edge_idx = self.edge_indices.get(&BString::from(id))?;
238        self._graph.edge_weight(*edge_idx)
239    }
240
241    pub fn edge_by_idx(&self, edge_idx: EdgeIndex) -> Option<&EdgeData> {
242        self._graph.edge_weight(edge_idx)
243    }
244
245    pub fn nodes(&self) -> Vec<&NodeData> {
246        self.node_indices
247            .values()
248            .map(|&idx| self._graph.node_weight(idx).unwrap())
249            .collect()
250    }
251
252    pub fn edges(&self) -> Vec<&EdgeData> {
253        self.edge_indices
254            .values()
255            .map(|&idx| self._graph.edge_weight(idx).unwrap())
256            .collect()
257    }
258
259    /// Helper method to find a node's ID by its index
260    pub fn find_node_id_by_idx(&self, node_idx: NodeIndex) -> Option<&BString> {
261        self.node_indices
262            .par_iter()
263            .find_map_first(|(id, &idx)| if idx == node_idx { Some(id) } else { None })
264    }
265
266    // Other methods from TSGraph that make sense at the graph section level
267
268    /// Traverse the graph and return all valid paths from source nodes to sink nodes.
269    ///
270    /// A valid path must respect read continuity, especially for nodes with Intermediary (IN) reads.
271    /// For nodes with IN reads, we ensure that:
272    /// 1. The node shares at least one read with previous nodes in the path
273    /// 2. The node can connect to at least one subsequent node that shares a read with it
274    ///
275    /// Example:
276    /// n1 (r1) -> n3 (r1,r2) -> n4 (r1)
277    /// n2 (r2) -> n3 (r1,r2) -> n5 (r2)
278    ///
279    /// If n3 has IN reads, then only these paths are valid:
280    /// - n1 -> n3 -> n4 (valid because they all share read r1)
281    /// - n2 -> n3 -> n5 (valid because they all share read r2)
282    ///
283    /// These paths would be invalid:
284    /// - n1 -> n3 -> n5 (invalid because n1 and n5 don't share a common read)
285    /// - n2 -> n3 -> n4 (invalid because n2 and n4 don't share a common read)
286    pub fn traverse(&self) -> Result<Vec<TSGPath>> {
287        // Find all source nodes (nodes with no incoming edges)
288        let source_nodes: Vec<NodeIndex> = self
289            ._graph
290            .node_indices()
291            .filter(|&idx| {
292                self._graph
293                    .edges_directed(idx, petgraph::Direction::Incoming)
294                    .count()
295                    == 0
296            })
297            .collect();
298
299        if source_nodes.is_empty() {
300            return Ok(Vec::new());
301        }
302
303        let mut all_paths = Vec::new();
304        // Cache node read IDs to avoid repeated lookups
305        let mut node_read_ids_cache: HashMap<NodeIndex, HashSet<BString>> = HashMap::new();
306
307        // Pre-compute node read IDs
308        for node_idx in self._graph.node_indices() {
309            if let Some(node) = self._graph.node_weight(node_idx) {
310                let read_ids: HashSet<BString> =
311                    node.reads.par_iter().map(|r| r.id.clone()).collect();
312                node_read_ids_cache.insert(node_idx, read_ids);
313            }
314        }
315
316        // For each source node, perform a traversal
317        for &start_node in &source_nodes {
318            // Skip nodes with no reads
319            if let Some(read_set) = node_read_ids_cache.get(&start_node) {
320                if read_set.is_empty() {
321                    continue;
322                }
323
324                let mut queue = VecDeque::new();
325                // (node, path_so_far, active_reads)
326                let mut initial_path = TSGPath::builder().graph(self).build();
327                initial_path.add_node(start_node);
328
329                queue.push_back((start_node, initial_path, read_set.clone()));
330
331                while let Some((current_node, path, active_reads)) = queue.pop_front() {
332                    // Get outgoing edges
333                    let outgoing_edges: Vec<_> = self
334                        ._graph
335                        .edges_directed(current_node, petgraph::Direction::Outgoing)
336                        .collect();
337
338                    // If this is a sink node (no outgoing edges), save the path
339                    if outgoing_edges.is_empty() {
340                        path.validate()?;
341                        all_paths.push(path);
342                        continue;
343                    }
344
345                    for edge_ref in outgoing_edges {
346                        let edge_idx = edge_ref.id();
347                        let target_node = edge_ref.target();
348
349                        if let Some(target_read_ids) = node_read_ids_cache.get(&target_node) {
350                            // Calculate reads that continue from current path to target
351                            let continuing_reads: HashSet<_> = active_reads
352                                .par_iter()
353                                .filter(|id| target_read_ids.contains(*id))
354                                .cloned()
355                                .collect();
356
357                            if continuing_reads.is_empty() {
358                                // No read continuity, skip this edge
359                                continue;
360                            }
361
362                            // Check if target node has IN reads
363                            let has_in_reads =
364                                if let Some(target_data) = self._graph.node_weight(target_node) {
365                                    target_data
366                                        .reads
367                                        .par_iter()
368                                        .any(|r| r.identity == ReadIdentity::IN)
369                                } else {
370                                    false
371                                };
372
373                            if has_in_reads {
374                                // For IN nodes, check if there's a valid path forward
375                                let mut can_continue = false;
376                                let outgoing_from_target: Vec<_> = self
377                                    ._graph
378                                    .edges_directed(target_node, petgraph::Direction::Outgoing)
379                                    .map(|e| e.target())
380                                    .collect();
381
382                                for &next_node in &outgoing_from_target {
383                                    if let Some(next_read_ids) = node_read_ids_cache.get(&next_node)
384                                    {
385                                        // Check if there's at least one read that continues through
386                                        if continuing_reads
387                                            .par_iter()
388                                            .any(|id| next_read_ids.contains(id))
389                                        {
390                                            can_continue = true;
391                                            break;
392                                        }
393                                    }
394                                }
395
396                                if !can_continue && !outgoing_from_target.is_empty() {
397                                    // Has outgoing edges but no valid continuation, skip this edge
398                                    continue;
399                                }
400                            }
401
402                            // Create new path and continue traversal
403                            let mut new_path = path.clone();
404                            new_path.add_edge(edge_idx);
405                            new_path.add_node(target_node);
406                            queue.push_back((target_node, new_path, continuing_reads));
407                        }
408                    }
409                }
410            }
411        }
412
413        Ok(all_paths)
414    }
415
416    pub fn to_dot(&self, node_label: bool, edge_label: bool) -> Result<String> {
417        let mut config = vec![];
418        if node_label {
419            config.push(Config::NodeIndexLabel);
420        }
421        if edge_label {
422            config.push(Config::EdgeIndexLabel);
423        }
424
425        let dot = Dot::with_config(&self._graph, &config);
426        Ok(format!("{:?}", dot))
427    }
428
429    pub fn to_json(&self) -> Result<serde_json::Value> {
430        let mut nodes = Vec::new();
431        let mut edges = Vec::new();
432
433        // Process all nodes
434        for node_idx in self._graph.node_indices() {
435            if let Some(node) = self._graph.node_weight(node_idx) {
436                if let Ok(node_json) = node.to_json(None) {
437                    nodes.push(node_json);
438                }
439            }
440        }
441
442        // Process all edges
443        for edge_idx in self._graph.edge_indices() {
444            if let Some(edge) = self._graph.edge_weight(edge_idx) {
445                let edge_endpoints = self._graph.edge_endpoints(edge_idx);
446
447                if let Some((source, target)) = edge_endpoints {
448                    let source_id = self.find_node_id_by_idx(source);
449                    let target_id = self.find_node_id_by_idx(target);
450
451                    // get reads from source node and target node
452                    // the weight will be the intersection of reads
453                    let source_data = self.node_by_idx(source).unwrap();
454                    let target_data = self.node_by_idx(target).unwrap();
455
456                    // get the intersection of reads
457                    let source_reads = source_data
458                        .reads
459                        .iter()
460                        .map(|r| r.id.clone())
461                        .collect::<HashSet<_>>();
462                    let target_reads = target_data
463                        .reads
464                        .iter()
465                        .map(|r| r.id.clone())
466                        .collect::<HashSet<_>>();
467                    let edge_weight = source_reads
468                        .intersection(&target_reads)
469                        .collect::<HashSet<_>>()
470                        .len();
471
472                    if let (Some(source_id), Some(target_id)) = (source_id, target_id) {
473                        let edge_data = json!({
474                            "data": {
475                                "id": edge.id.to_str().unwrap(),
476                                "source": source_id.to_str().unwrap(),
477                                "target": target_id.to_str().unwrap(),
478                                "weight": edge_weight,
479                                "breakpoints": format!("{}", edge.sv)
480                            }
481                        });
482                        edges.push(edge_data);
483                    }
484                }
485            }
486        }
487
488        // Combine nodes and edges into a Cytoscape-compatible format
489        let elements = json!({
490            "directed": true,
491            "multigraph": true,
492            "elements": {
493            "nodes": nodes,
494            "edges": edges
495            }
496        });
497
498        Ok(elements)
499    }
500
501    pub fn annotate_node_with_sequence<P: AsRef<Path>>(
502        &mut self,
503        reference_genome_path: P,
504    ) -> Result<()> {
505        let mut reader = fasta::io::indexed_reader::Builder::default()
506            .build_from_path(reference_genome_path.as_ref())?;
507
508        for node_idx in self.node_indices.values() {
509            let node_data = self._graph.node_weight_mut(*node_idx).unwrap();
510
511            let region = format!(
512                "{}:{}-{}",
513                node_data.reference_id,
514                node_data.reference_start() - 1, // 0-based to 1-based
515                node_data.reference_end(),
516            )
517            .parse()?;
518            let record = reader.query(&region)?;
519            node_data.sequence = Some(record.sequence().as_ref().into());
520        }
521        Ok(())
522    }
523}
524
525/// Represents a link between elements in different graphs
526#[derive(Debug, Clone, Default, Builder)]
527pub struct InterGraphLink {
528    pub id: BString,
529    pub source_graph: BString,
530    pub source_element: BString,
531    pub target_graph: BString,
532    pub target_element: BString,
533    pub link_type: BString,
534    #[builder(default)]
535    pub attributes: HashMap<BString, Attribute>,
536}
537
538/// The complete transcript segment graph containing multiple graph sections
539#[derive(Debug, Clone, Default, Builder)]
540pub struct TSGraph {
541    pub headers: Vec<Header>,
542    pub graphs: HashMap<BString, GraphSection>,
543    pub links: Vec<InterGraphLink>,
544    current_graph_id: Option<BString>, // Tracks which graph is currently active during parsing
545}
546
547impl TSGraph {
548    /// Create a new empty TSGraph
549    pub fn new() -> Self {
550        let graph = GraphSection::new_default_graph();
551        let mut graphs = HashMap::new();
552        graphs.insert(graph.id.clone(), graph);
553        Self {
554            graphs,
555            current_graph_id: Some(DEFAULT_GRAPH_ID.into()),
556            ..Default::default()
557        }
558    }
559
560    /// Parse a header line
561    fn parse_header_line(&mut self, fields: &[&str]) -> Result<()> {
562        if fields.len() < 3 {
563            return Err(anyhow!("Invalid header line format"));
564        }
565
566        self.headers.push(Header {
567            tag: fields[1].into(),
568            value: fields[2].into(),
569        });
570
571        Ok(())
572    }
573
574    /// Parse a graph section line
575    fn parse_graph_line(&mut self, fields: &[&str]) -> Result<()> {
576        if fields.len() < 2 {
577            return Err(anyhow!("Invalid graph line format"));
578        }
579
580        let graph_id: BString = fields[1].into();
581
582        // Check if graph with this ID already exists
583        if self.graphs.contains_key(&graph_id) {
584            return Err(anyhow!(
585                "Graph with ID {} already exists",
586                graph_id.to_str().unwrap_or("")
587            ));
588        }
589
590        // Create new graph section
591        let mut graph_section = GraphSection::new(graph_id.clone());
592
593        // Parse optional attributes
594        if fields.len() > 2 {
595            for attr_str in &fields[2..] {
596                let attr = attr_str.parse::<Attribute>()?;
597                graph_section.attributes.insert(attr.tag.clone(), attr);
598            }
599        }
600
601        // Update current graph ID and add to graphs map
602        self.current_graph_id = Some(graph_id.clone());
603        self.graphs.insert(graph_id, graph_section);
604
605        Ok(())
606    }
607
608    /// Get the current graph section (or error if none is active)
609    fn current_graph_mut(&mut self) -> Result<&mut GraphSection> {
610        if let Some(graph_id) = &self.current_graph_id {
611            if let Some(graph) = self.graphs.get_mut(graph_id) {
612                return Ok(graph);
613            }
614        }
615        Err(anyhow!("No active graph section"))
616    }
617
618    /// Parse an inter-graph link line
619    fn parse_link_line(&mut self, fields: &[&str]) -> Result<()> {
620        if fields.len() < 5 {
621            return Err(anyhow!("Invalid link line format"));
622        }
623
624        let id: BString = fields[1].into();
625
626        // Parse source and target references (format: graph_id:element_id)
627        let source_ref: Vec<&str> = fields[2].splitn(2, ':').collect();
628        let target_ref: Vec<&str> = fields[3].splitn(2, ':').collect();
629
630        if source_ref.len() != 2 || target_ref.len() != 2 {
631            return Err(anyhow!("Invalid element reference format in link line"));
632        }
633
634        let source_graph: BString = source_ref[0].into();
635        let source_element: BString = source_ref[1].into();
636        let target_graph: BString = target_ref[0].into();
637        let target_element: BString = target_ref[1].into();
638
639        // Verify the referenced graphs exist
640        if !self.graphs.contains_key(&source_graph) {
641            return Err(anyhow!(
642                "Source graph {} not found",
643                source_graph.to_str().unwrap_or("")
644            ));
645        }
646        if !self.graphs.contains_key(&target_graph) {
647            return Err(anyhow!(
648                "Target graph {} not found",
649                target_graph.to_str().unwrap_or("")
650            ));
651        }
652
653        let link_type: BString = fields[4].into();
654
655        let mut link = InterGraphLink::builder()
656            .id(id)
657            .source_graph(source_graph)
658            .source_element(source_element)
659            .target_graph(target_graph)
660            .target_element(target_element)
661            .link_type(link_type)
662            .build();
663
664        // Parse optional attributes
665        if fields.len() > 5 {
666            for attr_str in &fields[5..] {
667                let attr = attr_str.parse::<Attribute>()?;
668                link.attributes.insert(attr.tag.clone(), attr);
669            }
670        }
671
672        self.links.push(link);
673        Ok(())
674    }
675
676    /// Parse a node line
677    fn parse_node_line(&mut self, fields: &str) -> Result<()> {
678        let node_data = NodeData::from_str(fields)?;
679        let graph = self.current_graph_mut()?;
680        graph.add_node(node_data)?;
681        Ok(())
682    }
683
684    /// Parse an edge line
685    fn parse_edge_line(&mut self, fields: &[&str]) -> Result<()> {
686        if fields.len() < 5 {
687            return Err(anyhow!("Invalid edge line format"));
688        }
689
690        let id: BString = fields[1].into();
691        let source_id: BString = fields[2].into();
692        let sink_id: BString = fields[3].into();
693        let sv = fields[4].parse::<StructuralVariant>()?;
694
695        let edge_data = EdgeData::builder().id(id).sv(sv).build();
696
697        let graph = self.current_graph_mut()?;
698        graph.add_edge(source_id.as_bstr(), sink_id.as_bstr(), edge_data)?;
699        Ok(())
700    }
701
702    /// Parse an unordered group line
703    fn parse_unordered_group_line(&mut self, fields: &[&str]) -> Result<()> {
704        if fields.len() < 3 {
705            return Err(anyhow!("Invalid unordered group line format"));
706        }
707
708        let id: BString = fields[1].into();
709        let graph = self.current_graph_mut()?;
710
711        // Check for duplicate group name
712        if graph.groups.contains_key(&id) {
713            return Err(anyhow!("Group with ID {} already exists", id));
714        }
715
716        // Parse element IDs (space-separated)
717        let elements_str = fields[2..].join(" ");
718        let elements = elements_str
719            .split_whitespace()
720            .map(|s| s.into())
721            .collect::<Vec<_>>();
722
723        let group = Group::Unordered {
724            id: id.clone(),
725            elements,
726            attributes: HashMap::new(),
727        };
728
729        graph.groups.insert(id, group);
730        Ok(())
731    }
732
733    /// Parse a path line
734    fn parse_path_line(&mut self, fields: &[&str]) -> Result<()> {
735        if fields.len() < 3 {
736            return Err(anyhow!("Invalid path line format"));
737        }
738
739        let id: BString = fields[1].into();
740        let graph = self.current_graph_mut()?;
741
742        // Check for duplicate group name
743        if graph.groups.contains_key(&id) {
744            return Err(anyhow!("Group with ID {} already exists", id));
745        }
746
747        // Parse oriented element IDs (space-separated)
748        let elements_str = fields[2..].join(" ");
749        let elements = elements_str
750            .split_whitespace()
751            .map(|s| s.parse::<OrientedElement>())
752            .collect::<Result<Vec<_>, _>>()?;
753
754        let group = Group::Ordered {
755            id: id.clone(),
756            elements,
757            attributes: HashMap::new(),
758        };
759
760        graph.groups.insert(id, group);
761        Ok(())
762    }
763
764    /// Parse a chain line
765    fn parse_chain_line(&mut self, fields: &[&str]) -> Result<()> {
766        if fields.len() < 3 {
767            return Err(anyhow!("Invalid chain line format"));
768        }
769
770        let id: BString = fields[1].into();
771        let graph = self.current_graph_mut()?;
772
773        // Check for duplicate group name
774        if graph.groups.contains_key(&id) {
775            return Err(anyhow!("Group with ID {} already exists", id));
776        }
777
778        // Parse element IDs (space-separated)
779        let elements_str = fields[2..].join(" ");
780        let elements: Vec<BString> = elements_str.split_whitespace().map(|s| s.into()).collect();
781
782        // Validate chain structure: must start and end with nodes, and have alternating nodes/edges
783        if elements.is_empty() {
784            return Err(anyhow!("Chain must contain at least one element"));
785        }
786
787        if elements.len() % 2 == 0 {
788            return Err(anyhow!(
789                "Chain must have an odd number of elements (starting and ending with nodes)"
790            ));
791        }
792
793        // Create the chain group
794        let group = Group::Chain {
795            id: id.clone(),
796            elements,
797            attributes: HashMap::new(),
798        };
799
800        // Store the chain in both maps
801        graph.chains.insert(id.clone(), group.clone());
802        graph.groups.insert(id, group);
803        Ok(())
804    }
805
806    /// Parse an attribute line
807    fn parse_attribute_line(&mut self, fields: &[&str]) -> Result<()> {
808        if fields.len() < 4 {
809            return Err(anyhow!("Invalid attribute line format"));
810        }
811
812        let element_type = fields[1];
813        let element_id: BString = fields[2].into();
814        let graph = self.current_graph_mut()?;
815
816        let attrs: Vec<Attribute> = fields
817            .iter()
818            .skip(3)
819            .map(|s| s.parse())
820            .collect::<Result<Vec<_>>>()
821            .context("invalidate attribute line")?;
822
823        match element_type {
824            "N" => {
825                if let Some(&node_idx) = graph.node_indices.get(&element_id) {
826                    if let Some(node_data) = graph._graph.node_weight_mut(node_idx) {
827                        for attr in attrs {
828                            let tag = attr.tag.clone();
829                            node_data.attributes.insert(tag, attr);
830                        }
831                    } else {
832                        return Err(anyhow!("Node with ID {} not found in graph", element_id));
833                    }
834                } else {
835                    return Err(anyhow!("Node with ID {} not found", element_id));
836                }
837            }
838            "E" => {
839                if let Some(&edge_idx) = graph.edge_indices.get(&element_id) {
840                    if let Some(edge_data) = graph._graph.edge_weight_mut(edge_idx) {
841                        for attr in attrs {
842                            let tag = attr.tag.clone();
843                            edge_data.attributes.insert(tag, attr);
844                        }
845                    } else {
846                        return Err(anyhow!("Edge with ID {} not found in graph", element_id));
847                    }
848                } else {
849                    return Err(anyhow!("Edge with ID {} not found", element_id));
850                }
851            }
852            "U" | "P" | "C" => {
853                if let Some(group) = graph.groups.get_mut(&element_id) {
854                    match group {
855                        Group::Unordered { attributes, .. }
856                        | Group::Ordered { attributes, .. }
857                        | Group::Chain { attributes, .. } => {
858                            for attr in attrs {
859                                let tag = attr.tag.clone();
860                                attributes.insert(tag, attr);
861                            }
862                        }
863                    }
864                } else {
865                    return Err(anyhow!("Group with ID {} not found", element_id));
866                }
867            }
868            "G" => {
869                // Handle graph attributes
870                if let Some(graph_section) = self.graphs.get_mut(&element_id) {
871                    for attr in attrs {
872                        let tag = attr.tag.clone();
873                        graph_section.attributes.insert(tag, attr);
874                    }
875                } else {
876                    return Err(anyhow!("Graph with ID {} not found", element_id));
877                }
878            }
879            _ => {
880                return Err(anyhow!("Unknown element type: {}", element_type));
881            }
882        }
883
884        Ok(())
885    }
886
887    /// Validate all graphs and their paths
888    fn validate(&self) -> Result<()> {
889        // Validate each graph section
890        for (graph_id, graph) in &self.graphs {
891            // Validate paths against the graph
892            for (id, group) in &graph.groups {
893                if let Group::Ordered { elements, .. } = group {
894                    // Validate that all elements in the path exist in the graph
895                    for element in elements {
896                        let element_exists = graph.node_indices.contains_key(&element.id)
897                            || graph.edge_indices.contains_key(&element.id)
898                            || graph.groups.contains_key(&element.id);
899
900                        if !element_exists {
901                            return Err(anyhow!(
902                                "Path {} in graph {} references non-existent element {}",
903                                id,
904                                graph_id,
905                                element.id
906                            ));
907                        }
908                    }
909                }
910            }
911        }
912
913        // Validate all inter-graph links
914        for link in &self.links {
915            // Check source element exists
916            let source_graph = self.graphs.get(&link.source_graph).ok_or_else(|| {
917                anyhow!(
918                    "Link {} references non-existent graph {}",
919                    link.id,
920                    link.source_graph
921                )
922            })?;
923
924            let source_exists = source_graph.node_indices.contains_key(&link.source_element)
925                || source_graph.edge_indices.contains_key(&link.source_element)
926                || source_graph.groups.contains_key(&link.source_element);
927
928            if !source_exists {
929                return Err(anyhow!(
930                    "Link {} references non-existent element {}:{}",
931                    link.id,
932                    link.source_graph,
933                    link.source_element
934                ));
935            }
936
937            // Check target element exists
938            let target_graph = self.graphs.get(&link.target_graph).ok_or_else(|| {
939                anyhow!(
940                    "Link {} references non-existent graph {}",
941                    link.id,
942                    link.target_graph
943                )
944            })?;
945
946            let target_exists = target_graph.node_indices.contains_key(&link.target_element)
947                || target_graph.edge_indices.contains_key(&link.target_element)
948                || target_graph.groups.contains_key(&link.target_element);
949
950            if !target_exists {
951                return Err(anyhow!(
952                    "Link {} references non-existent element {}:{}",
953                    link.id,
954                    link.target_graph,
955                    link.target_element
956                ));
957            }
958        }
959
960        Ok(())
961    }
962
963    pub fn from_reader<R: BufRead>(reader: R) -> Result<Self> {
964        let mut tsgraph = TSGraph::new();
965
966        // Create a default graph if needed for backward compatibility
967        let default_graph_id: BString = DEFAULT_GRAPH_ID.into();
968        let default_graph = GraphSection::new(default_graph_id.clone());
969        tsgraph
970            .graphs
971            .insert(default_graph_id.clone(), default_graph);
972
973        tsgraph.current_graph_id = Some(default_graph_id);
974
975        // First pass: Parse all record types
976        for line in reader.lines() {
977            let line = line?;
978            if line.is_empty() || line.starts_with('#') {
979                continue;
980            }
981            let fields: Vec<&str> = line.split_whitespace().collect();
982            if fields.is_empty() {
983                continue;
984            }
985
986            match fields[0] {
987                "H" => tsgraph.parse_header_line(&fields)?,
988                "G" => tsgraph.parse_graph_line(&fields)?,
989                "N" => tsgraph.parse_node_line(&line)?,
990                "E" => tsgraph.parse_edge_line(&fields)?,
991                "U" => tsgraph.parse_unordered_group_line(&fields)?,
992                "P" => tsgraph.parse_path_line(&fields)?,
993                "C" => tsgraph.parse_chain_line(&fields)?,
994                "A" => tsgraph.parse_attribute_line(&fields)?,
995                "L" => tsgraph.parse_link_line(&fields)?,
996                _ => {
997                    // ignore unknown record types
998                    debug!("Ignoring unknown record type: {}", fields[0]);
999                }
1000            }
1001        }
1002
1003        // Second pass: Ensure all graphs are built and validate
1004        for graph_section in tsgraph.graphs.values_mut() {
1005            // Populate chains hash map from groups if needed
1006            for (id, group) in &graph_section.groups {
1007                if let Group::Chain { .. } = group {
1008                    if !graph_section.chains.contains_key(id) {
1009                        graph_section.chains.insert(id.clone(), group.clone());
1010                    }
1011                }
1012            }
1013
1014            // Ensure graph is built
1015            graph_section.ensure_graph_is_built()?;
1016        }
1017
1018        // Validate all graphs and links
1019        tsgraph.validate()?;
1020
1021        // pop the default graph if it's empty
1022        if let Some(default_graph) = tsgraph.graph(DEFAULT_GRAPH_ID) {
1023            if default_graph.node_indices.is_empty() {
1024                tsgraph.graphs.remove(&BString::from(DEFAULT_GRAPH_ID));
1025            }
1026        }
1027        Ok(tsgraph)
1028    }
1029
1030    /// Parse a TSG file and construct a TSGraph
1031    pub fn from_file<P: AsRef<Path>>(path: P) -> Result<Self> {
1032        let file = File::open(path)?;
1033        let reader = BufReader::new(file);
1034        Self::from_reader(reader)
1035    }
1036
1037    /// Write the TSGraph to writer
1038    pub fn to_writer<W: std::io::Write>(&self, writer: &mut W) -> Result<()> {
1039        // Write global headers
1040        writeln!(writer, "# Global header")?;
1041        for header in &self.headers {
1042            writeln!(writer, "{}", header)?;
1043        }
1044
1045        let new_header = Header::builder().tag("PG").value("tsg").build();
1046        writeln!(writer, "{}", new_header)?;
1047
1048        // Write each graph section
1049        for (graph_id, graph) in &self.graphs {
1050            // Skip writing the default graph header if it's empty
1051            if graph_id == &BString::from(DEFAULT_GRAPH_ID) && graph.node_indices.is_empty() {
1052                continue;
1053            }
1054
1055            writeln!(writer, "\n# Graph: {}", graph_id)?;
1056
1057            // Write graph section header
1058            write!(writer, "G\t{}", graph_id)?;
1059            for attr in graph.attributes.values() {
1060                write!(writer, "\t{}", attr)?;
1061            }
1062            writeln!(writer)?;
1063
1064            // Write nodes
1065            writeln!(writer, "# Nodes")?;
1066            for node_idx in graph._graph.node_indices() {
1067                if let Some(node_data) = graph._graph.node_weight(node_idx) {
1068                    writeln!(writer, "{}", node_data)?;
1069                }
1070            }
1071
1072            // Write edges
1073            writeln!(writer, "# Edges")?;
1074            for edge_ref in graph._graph.edge_references() {
1075                let edge = edge_ref.weight();
1076                let source_idx = edge_ref.source();
1077                let sink_idx = edge_ref.target();
1078
1079                if let (Some(source), Some(sink)) = (
1080                    graph._graph.node_weight(source_idx),
1081                    graph._graph.node_weight(sink_idx),
1082                ) {
1083                    writeln!(
1084                        writer,
1085                        "E\t{}\t{}\t{}\t{}",
1086                        edge.id, source.id, sink.id, edge.sv
1087                    )?;
1088                }
1089            }
1090
1091            // Write groups
1092            writeln!(writer, "# Groups")?;
1093            let mut seen_chain_ids = HashSet::new();
1094
1095            for group in graph.groups.values() {
1096                match group {
1097                    Group::Unordered { id, elements, .. } => {
1098                        let elements_str: Vec<String> = elements
1099                            .par_iter()
1100                            .map(|e| e.to_str().unwrap_or("").to_string())
1101                            .collect();
1102                        writeln!(
1103                            writer,
1104                            "U\t{}\t{}",
1105                            id.to_str().unwrap_or(""),
1106                            elements_str.join(" ")
1107                        )?;
1108                    }
1109                    Group::Ordered { id, elements, .. } => {
1110                        let elements_str: Vec<String> =
1111                            elements.par_iter().map(|e| e.to_string()).collect();
1112                        writeln!(
1113                            writer,
1114                            "P\t{}\t{}",
1115                            id.to_str().unwrap_or(""),
1116                            elements_str.join(" ")
1117                        )?;
1118                    }
1119                    Group::Chain { id, elements, .. } => {
1120                        // Skip writing chains that are duplicated with groups
1121                        if seen_chain_ids.contains(id) {
1122                            continue;
1123                        }
1124                        seen_chain_ids.insert(id);
1125
1126                        let elements_str: Vec<String> = elements
1127                            .par_iter()
1128                            .map(|e| e.to_str().unwrap_or("").to_string())
1129                            .collect();
1130                        writeln!(
1131                            writer,
1132                            "C\t{}\t{}",
1133                            id.to_str().unwrap_or(""),
1134                            elements_str.join(" ")
1135                        )?;
1136                    }
1137                }
1138            }
1139
1140            // Write attributes for this graph section
1141            writeln!(writer, "# Attributes")?;
1142
1143            // Write attributes for nodes
1144            for node_idx in graph._graph.node_indices() {
1145                if let Some(node) = graph._graph.node_weight(node_idx) {
1146                    for attr in node.attributes.values() {
1147                        writeln!(writer, "A\tN\t{}\t{}", node.id, attr)?;
1148                    }
1149                }
1150            }
1151
1152            // Write attributes for edges
1153            for edge_idx in graph._graph.edge_indices() {
1154                if let Some(edge) = graph._graph.edge_weight(edge_idx) {
1155                    for attr in edge.attributes.values() {
1156                        writeln!(writer, "A\tE\t{}\t{}", edge.id, attr)?;
1157                    }
1158                }
1159            }
1160
1161            // Write attributes for groups
1162            for (id, group) in &graph.groups {
1163                let (group_type, attributes) = match group {
1164                    Group::Unordered { attributes, .. } => ("U", attributes),
1165                    Group::Ordered { attributes, .. } => ("P", attributes),
1166                    Group::Chain { attributes, .. } => ("C", attributes),
1167                };
1168
1169                for attr in attributes.values() {
1170                    writeln!(
1171                        writer,
1172                        "A\t{}\t{}\t{}",
1173                        group_type,
1174                        id.to_str().unwrap_or(""),
1175                        attr
1176                    )?;
1177                }
1178            }
1179        }
1180
1181        // Write inter-graph links
1182        if !self.links.is_empty() {
1183            writeln!(writer, "\n# Inter-graph links")?;
1184            for link in &self.links {
1185                write!(
1186                    writer,
1187                    "L\t{}\t{}:{}\t{}:{}\t{}",
1188                    link.id,
1189                    link.source_graph,
1190                    link.source_element,
1191                    link.target_graph,
1192                    link.target_element,
1193                    link.link_type
1194                )?;
1195
1196                for attr in link.attributes.values() {
1197                    write!(writer, "\t{}", attr)?;
1198                }
1199                writeln!(writer)?;
1200            }
1201        }
1202
1203        writer.flush()?;
1204        Ok(())
1205    }
1206
1207    /// Write the TSGraph to a TSG file
1208    pub fn to_file<P: AsRef<Path>>(&self, path: P) -> Result<()> {
1209        let file = File::create(path)?;
1210        let mut writer = BufWriter::new(file);
1211        self.to_writer(&mut writer)
1212    }
1213
1214    // Helper methods for accessing graph elements
1215
1216    /// Get a graph section by its ID
1217    pub fn graph(&self, id: &str) -> Option<&GraphSection> {
1218        self.graphs.get(&BString::from(id))
1219    }
1220
1221    pub fn graph_mut(&mut self, id: &str) -> Option<&mut GraphSection> {
1222        self.graphs.get_mut(&BString::from(id))
1223    }
1224
1225    /// get default graph
1226    pub fn default_graph(&self) -> Option<&GraphSection> {
1227        self.graphs.get(&BString::from(DEFAULT_GRAPH_ID))
1228    }
1229
1230    pub fn default_graph_mut(&mut self) -> Option<&mut GraphSection> {
1231        self.graphs.get_mut(&BString::from(DEFAULT_GRAPH_ID))
1232    }
1233
1234    /// Get a node by its ID and graph ID
1235    pub fn node(&self, graph_id: &str, node_id: &str) -> Option<&NodeData> {
1236        let graph = self.graphs.get(&BString::from(graph_id))?;
1237        graph.node_by_id(node_id)
1238    }
1239
1240    /// Get an edge by its ID and graph ID
1241    pub fn edge(&self, graph_id: &str, edge_id: &str) -> Option<&EdgeData> {
1242        let graph = self.graphs.get(&BString::from(graph_id))?;
1243        graph.edge_by_id(edge_id)
1244    }
1245
1246    /// Get the nodes in a chain in order
1247    pub fn chain_nodes(&self, graph_id: &str, chain_id: &BStr) -> Option<Vec<NodeIndex>> {
1248        let graph = self.graphs.get(&BString::from(graph_id))?;
1249        let group = graph.chains.get(chain_id)?;
1250
1251        match group {
1252            Group::Chain { elements, .. } => {
1253                let mut nodes = Vec::with_capacity(elements.len().div_ceil(2));
1254
1255                for (i, element_id) in elements.iter().enumerate() {
1256                    if i % 2 == 0 {
1257                        // Nodes are at even positions (0, 2, 4...)
1258                        if let Some(&node_idx) = graph.node_indices.get(element_id) {
1259                            nodes.push(node_idx);
1260                        } else {
1261                            return None; // Invalid chain structure
1262                        }
1263                    }
1264                }
1265
1266                Some(nodes)
1267            }
1268            _ => None, // Not a chain
1269        }
1270    }
1271
1272    /// Get the edges in a chain in order
1273    pub fn chain_edges(&self, graph_id: &str, chain_id: &BStr) -> Option<Vec<EdgeIndex>> {
1274        let graph = self.graphs.get(&BString::from(graph_id))?;
1275        let group = graph.chains.get(chain_id)?;
1276
1277        match group {
1278            Group::Chain { elements, .. } => {
1279                let mut edges = Vec::with_capacity(elements.len() / 2);
1280
1281                for (i, element_id) in elements.iter().enumerate() {
1282                    if i % 2 == 1 {
1283                        // Edges are at odd positions (1, 3, 5...)
1284                        if let Some(&edge_idx) = graph.edge_indices.get(element_id) {
1285                            edges.push(edge_idx);
1286                        } else {
1287                            return None; // Invalid chain structure
1288                        }
1289                    }
1290                }
1291
1292                Some(edges)
1293            }
1294            _ => None, // Not a chain
1295        }
1296    }
1297
1298    /// Helper method to find a node's ID by its index
1299    pub fn find_node_id_by_idx(&self, graph_id: &str, node_idx: NodeIndex) -> Option<&BString> {
1300        let graph = self.graphs.get(&BString::from(graph_id))?;
1301        graph
1302            .node_indices
1303            .par_iter()
1304            .find_map_first(|(id, &idx)| if idx == node_idx { Some(id) } else { None })
1305    }
1306
1307    pub fn node_by_idx(&self, graph_id: &str, node_idx: NodeIndex) -> Option<&NodeData> {
1308        let graph = self.graphs.get(&BString::from(graph_id))?;
1309        graph.node_by_idx(node_idx)
1310    }
1311
1312    pub fn edge_by_idx(&self, graph_id: &str, edge_idx: EdgeIndex) -> Option<&EdgeData> {
1313        let graph = self.graphs.get(&BString::from(graph_id))?;
1314        graph.edge_by_idx(edge_idx)
1315    }
1316
1317    /// Get all nodes in the graph
1318    pub fn nodes(&self, graph_id: &str) -> Vec<&NodeData> {
1319        let graph = self.graphs.get(&BString::from(graph_id)).unwrap();
1320        graph
1321            .node_indices
1322            .values()
1323            .filter_map(|&idx| graph._graph.node_weight(idx))
1324            .collect()
1325    }
1326
1327    /// Get all edges in the graph
1328    pub fn edges(&self, graph_id: &str) -> Vec<&EdgeData> {
1329        let graph = self.graphs.get(&BString::from(graph_id)).unwrap();
1330        graph
1331            .edge_indices
1332            .values()
1333            .filter_map(|&idx| graph._graph.edge_weight(idx))
1334            .collect()
1335    }
1336
1337    /// Traverse the graph and return all valid paths from source nodes to sink nodes.
1338    pub fn traverse_by_id(&self, graph_id: &str) -> Result<Vec<TSGPath>> {
1339        let graph = self.graphs.get(&BString::from(graph_id)).unwrap();
1340        graph.traverse()
1341    }
1342
1343    /// traverse all graphs
1344    pub fn traverse_all_graphs(&self) -> Result<Vec<TSGPath>> {
1345        let all_paths = self
1346            .graphs
1347            .values()
1348            .try_fold(Vec::new(), |mut all_paths, graph| {
1349                let paths = graph.traverse()?;
1350                all_paths.extend(paths);
1351                Ok(all_paths)
1352            });
1353        all_paths
1354    }
1355
1356    pub fn to_dot_by_id(
1357        &self,
1358        graph_id: &str,
1359        node_label: bool,
1360        edge_label: bool,
1361    ) -> Result<String> {
1362        let graph = self.graphs.get(&BString::from(graph_id)).unwrap();
1363        graph.to_dot(node_label, edge_label)
1364    }
1365
1366    pub fn to_json_by_id(&self, graph_id: &str) -> Result<serde_json::Value> {
1367        let graph = self.graphs.get(&BString::from(graph_id)).unwrap();
1368        graph.to_json()
1369    }
1370}
1371
1372impl FromStr for TSGraph {
1373    type Err = anyhow::Error;
1374    fn from_str(s: &str) -> Result<Self> {
1375        let reader = BufReader::new(s.as_bytes());
1376        Self::from_reader(reader)
1377    }
1378}
1379
1380#[cfg(test)]
1381mod tests {
1382    use super::*;
1383
1384    #[test]
1385    fn test_create_empty_graph() {
1386        let graph = TSGraph::new();
1387        assert_eq!(graph.headers.len(), 0);
1388        assert_eq!(graph.default_graph().unwrap().nodes().len(), 0);
1389        assert_eq!(graph.edges(DEFAULT_GRAPH_ID).len(), 0);
1390        assert_eq!(graph.graphs.len(), 1);
1391        assert_eq!(graph.links.len(), 0);
1392    }
1393
1394    #[test]
1395    fn test_add_node() -> Result<()> {
1396        let mut graph = TSGraph::new();
1397        let node = NodeData::builder().id("node1").reference_id("chr1").build();
1398
1399        graph.default_graph_mut().unwrap().add_node(node.clone())?;
1400        assert_eq!(graph.nodes(DEFAULT_GRAPH_ID).len(), 1);
1401        assert_eq!(graph.node(DEFAULT_GRAPH_ID, "node1").unwrap().id, node.id);
1402        Ok(())
1403    }
1404
1405    #[test]
1406    fn test_add_edge() -> Result<()> {
1407        let mut graph = TSGraph::new();
1408
1409        // Add nodes first
1410        let node1 = NodeData::builder().id("node1").reference_id("chr1").build();
1411        let node2 = NodeData::builder().id("node2").reference_id("chr1").build();
1412
1413        graph.graph_mut(DEFAULT_GRAPH_ID).unwrap().add_node(node1)?;
1414        graph.graph_mut(DEFAULT_GRAPH_ID).unwrap().add_node(node2)?;
1415
1416        // Add edge
1417        let sv_from_builder = StructuralVariant::builder()
1418            .reference_name1("chr1")
1419            .reference_name2("chr1")
1420            .breakpoint1(1000)
1421            .breakpoint2(5000)
1422            .sv_type(BString::from("DEL"))
1423            .build();
1424        let edge = EdgeData::builder().id("edge1").sv(sv_from_builder).build();
1425
1426        graph.graph_mut(DEFAULT_GRAPH_ID).unwrap().add_edge(
1427            "node1".into(),
1428            "node2".into(),
1429            edge.clone(),
1430        )?;
1431
1432        assert_eq!(graph.edges(DEFAULT_GRAPH_ID).len(), 1);
1433        assert_eq!(graph.edge(DEFAULT_GRAPH_ID, "edge1").unwrap().id, edge.id);
1434
1435        Ok(())
1436    }
1437
1438    #[test]
1439    fn test_parse_header_line() -> Result<()> {
1440        let mut graph = TSGraph::new();
1441        let fields = vec!["H", "VN", "1.0"];
1442
1443        graph.parse_header_line(&fields)?;
1444
1445        assert_eq!(graph.headers.len(), 1);
1446        assert_eq!(graph.headers[0].tag, "VN");
1447        assert_eq!(graph.headers[0].value, "1.0");
1448
1449        Ok(())
1450    }
1451
1452    #[test]
1453    fn test_parse_node_line() -> Result<()> {
1454        let mut graph = TSGraph::new();
1455        let line = "N\tnode1\tchr1:+:100-200\tread1:SO,read2:IN\tACGT";
1456
1457        graph.parse_node_line(line)?;
1458
1459        // let node = graph.get_node("defaul, "node1").unwrap();
1460        let node = graph.default_graph().unwrap().node_by_id("node1").unwrap();
1461
1462        assert_eq!(node.id, "node1");
1463        assert_eq!(node.exons.exons.len(), 1);
1464        assert_eq!(node.exons.exons[0].start, 100);
1465        assert_eq!(node.exons.exons[0].end, 200);
1466        assert_eq!(
1467            node.reads,
1468            vec![
1469                ReadData::builder().id("read1").identity("SO").build(),
1470                ReadData::builder().id("read2").identity("IN").build(),
1471            ]
1472        );
1473        assert_eq!(node.sequence, Some("ACGT".into()));
1474
1475        Ok(())
1476    }
1477
1478    #[test]
1479    fn test_parse_unordered_group() -> Result<()> {
1480        let mut graph = TSGraph::new();
1481        let fields = vec!["U", "group1", "node1", "node2", "edge1"];
1482
1483        graph.parse_unordered_group_line(&fields)?;
1484
1485        let graph_section = graph.default_graph().unwrap();
1486        assert_eq!(graph_section.groups.len(), 1);
1487        if let Group::Unordered { id, elements, .. } = &graph_section.groups["group1".as_bytes()] {
1488            assert_eq!(id, "group1");
1489            assert_eq!(elements.len(), 3);
1490            assert_eq!(elements[0], "node1");
1491            assert_eq!(elements[1], "node2");
1492            assert_eq!(elements[2], "edge1");
1493        } else {
1494            panic!("Expected Unordered group");
1495        }
1496
1497        Ok(())
1498    }
1499
1500    #[test]
1501    fn test_read_from_file() -> Result<()> {
1502        let file = "tests/data/test.tsg";
1503        let graph = TSGraph::from_file(file)?;
1504
1505        assert_eq!(graph.headers.len(), 2);
1506        assert_eq!(graph.nodes(DEFAULT_GRAPH_ID).len(), 5);
1507        assert_eq!(graph.edges(DEFAULT_GRAPH_ID).len(), 4);
1508
1509        graph.to_file("tests/data/test_write.tsg")?;
1510
1511        Ok(())
1512    }
1513
1514    #[test]
1515    fn test_from_str() -> Result<()> {
1516        let tsg_string = r#"H	VN	1.0
1517    H	PN	TestGraph
1518    N	node1	chr1:+:100-200	read1:SO,read2:IN	ACGT
1519    N	node2	chr1:+:300-400	read1:SO,read2:IN
1520    N	node3	chr1:+:500-600	read2:SO,read3:IN
1521    E	edge1	node1	node2	chr1,chr1,1700,2000,INV
1522    E	edge2	node2	node3	chr1,chr1,1700,2000,DUP
1523    C	chain1	node1 edge1 node2 edge2 node3
1524    "#;
1525
1526        let graph = TSGraph::from_str(tsg_string)?;
1527
1528        // Verify headers
1529        assert_eq!(graph.headers.len(), 2);
1530        assert_eq!(graph.headers[0].tag, "VN");
1531        assert_eq!(graph.headers[0].value, "1.0");
1532        assert_eq!(graph.headers[1].tag, "PN");
1533        assert_eq!(graph.headers[1].value, "TestGraph");
1534
1535        // Verify nodes
1536        assert_eq!(graph.nodes(DEFAULT_GRAPH_ID).len(), 3);
1537        let node1 = graph.node(DEFAULT_GRAPH_ID, "node1").unwrap();
1538        assert_eq!(node1.id, "node1");
1539        assert_eq!(node1.sequence, Some("ACGT".into()));
1540
1541        // Verify edges
1542        assert_eq!(graph.edges(DEFAULT_GRAPH_ID).len(), 2);
1543        let edge1 = graph.edge(DEFAULT_GRAPH_ID, "edge1").unwrap();
1544        assert_eq!(edge1.id, "edge1");
1545
1546        // Verify groups
1547        let graph_section = graph.graph(DEFAULT_GRAPH_ID).unwrap();
1548
1549        // Verify chain
1550        if let Group::Chain { elements, .. } = &graph_section.chains["chain1".as_bytes()] {
1551            assert_eq!(elements.len(), 5);
1552            assert_eq!(elements[0], "node1");
1553            assert_eq!(elements[1], "edge1");
1554        } else {
1555            panic!("Expected Chain group");
1556        }
1557
1558        Ok(())
1559    }
1560
1561    #[test]
1562    fn test_traverse() -> Result<()> {
1563        let file = "tests/data/test.tsg";
1564        let graph = TSGraph::from_file(file)?;
1565
1566        let paths = graph.traverse_by_id(DEFAULT_GRAPH_ID)?;
1567        // assert_eq!(paths.len(), 2);
1568
1569        for path in paths {
1570            println!("{}", path);
1571        }
1572        Ok(())
1573    }
1574
1575    #[test]
1576    fn test_to_dot() -> Result<()> {
1577        let file = "tests/data/test.tsg";
1578        let graph = TSGraph::from_file(file)?;
1579
1580        let dot = graph.to_dot_by_id(DEFAULT_GRAPH_ID, true, true)?;
1581        println!("{}", dot);
1582
1583        Ok(())
1584    }
1585
1586    #[test]
1587    fn test_to_json() -> Result<()> {
1588        let file = "tests/data/test.tsg";
1589        let graph = TSGraph::from_file(file)?;
1590
1591        let json = graph.to_json_by_id(DEFAULT_GRAPH_ID)?;
1592        println!("{}", json);
1593        Ok(())
1594    }
1595}