tsg_core/
graph.rs

1mod analysis;
2mod attr;
3mod edge;
4mod group;
5mod header;
6mod node;
7mod path;
8mod utils;
9
10use noodles::fasta;
11use std::fs::File;
12use std::io::{BufRead, BufReader, BufWriter};
13use std::path::Path;
14use std::str::FromStr;
15use tracing::debug;
16use tracing::warn;
17
18use ahash::{HashMap, HashMapExt, HashSet, HashSetExt};
19use anyhow::{Context, Result, anyhow};
20use bstr::{BStr, BString, ByteSlice};
21
22pub use analysis::*;
23pub use attr::*;
24pub use edge::*;
25pub use group::*;
26pub use header::*;
27pub use node::*;
28pub use path::*;
29pub use utils::*;
30
31use bon::Builder;
32use petgraph::dot::{Config, Dot};
33use petgraph::graph::{DiGraph, EdgeIndex, NodeIndex};
34use petgraph::visit::EdgeRef;
35use rayon::prelude::*;
36use serde_json::json;
37use std::collections::VecDeque;
38
39pub const DEFAULT_GRAPH_ID: &str = "G.graph";
40/// Represents a graph section within the TSG file
41#[derive(Debug, Clone, Default, Builder)]
42pub struct GraphSection {
43    pub id: BString,
44    pub attributes: HashMap<BString, Attribute>,
45    _graph: DiGraph<NodeData, EdgeData>,
46    pub node_indices: HashMap<BString, NodeIndex>,
47    pub edge_indices: HashMap<BString, EdgeIndex>,
48    pub groups: HashMap<BString, Group>,
49    pub chains: HashMap<BString, Group>,
50}
51
52impl GraphSection {
53    /// Create a new empty GraphSection
54    pub fn new(id: BString) -> Self {
55        Self {
56            id,
57            ..Default::default()
58        }
59    }
60
61    pub fn new_default_graph() -> Self {
62        Self::new(DEFAULT_GRAPH_ID.into())
63    }
64
65    pub fn node_indices_to_ids(&self) -> HashMap<NodeIndex, BString> {
66        self.node_indices
67            .par_iter()
68            .map(|(id, &idx)| (idx, id.clone()))
69            .collect()
70    }
71
72    pub fn edge_indices_to_ids(&self) -> HashMap<EdgeIndex, BString> {
73        self.edge_indices
74            .par_iter()
75            .map(|(id, &idx)| (idx, id.clone()))
76            .collect()
77    }
78
79    pub fn node_weight(&self, node_idx: NodeIndex) -> Option<&NodeData> {
80        self._graph.node_weight(node_idx)
81    }
82
83    pub fn edge_weight(&self, edge_idx: EdgeIndex) -> Option<&EdgeData> {
84        self._graph.edge_weight(edge_idx)
85    }
86
87    pub fn in_degree(&self, node_idx: NodeIndex) -> usize {
88        self._graph
89            .edges_directed(node_idx, petgraph::Direction::Incoming)
90            .count()
91    }
92
93    pub fn out_degree(&self, node_idx: NodeIndex) -> usize {
94        self._graph
95            .edges_directed(node_idx, petgraph::Direction::Outgoing)
96            .count()
97    }
98
99    /// Add a node to the graph
100    pub fn add_node(&mut self, node_data: NodeData) -> Result<NodeIndex> {
101        let id = node_data.id.clone();
102
103        // Check if node already exists
104        if let Some(&idx) = self.node_indices.get(&id) {
105            // Update node data
106            if let Some(attr) = self._graph.node_weight_mut(idx) {
107                *attr = node_data;
108                return Ok(idx);
109            }
110            return Err(anyhow!("Node with ID {} not found in graph", id));
111        }
112
113        // Add new node
114        let node_idx = self._graph.add_node(node_data);
115
116        debug!(
117            "graph {} add node {} ;len of node_indices: {}",
118            self.id,
119            id,
120            self.node_indices.len() + 1
121        );
122
123        self.node_indices.insert(id, node_idx);
124        Ok(node_idx)
125    }
126
127    /// Add an edge to the graph
128    pub fn add_edge(
129        &mut self,
130        source_id: &BStr,
131        sink_id: &BStr,
132        edge_data: EdgeData,
133    ) -> Result<EdgeIndex> {
134        let id = edge_data.id.clone();
135
136        // Get source node index or create it if it doesn't exist
137        let source_idx = match self.node_indices.get(source_id) {
138            Some(&idx) => idx,
139            None => {
140                // Create a placeholder node if it doesn't exist
141                let placeholder_node = NodeData {
142                    id: source_id.to_owned(),
143                    ..Default::default()
144                };
145                self.add_node(placeholder_node)?
146            }
147        };
148
149        // Get sink node index or create it if it doesn't exist
150        let sink_idx = match self.node_indices.get(sink_id) {
151            Some(&idx) => idx,
152            None => {
153                // Create a placeholder node if it doesn't exist
154                let placeholder_node = NodeData {
155                    id: sink_id.to_owned(),
156                    ..Default::default()
157                };
158                self.add_node(placeholder_node)?
159            }
160        };
161
162        // petgraph provide update_edge method to update edge data
163        let edge_idx = self._graph.update_edge(source_idx, sink_idx, edge_data);
164
165        self.edge_indices.insert(id, edge_idx);
166        Ok(edge_idx)
167    }
168
169    // Methods from old TSGraph that should now belong to GraphSection
170
171    /// Build graph based on the current state
172    fn ensure_graph_is_built(&mut self) -> Result<()> {
173        // If we already have nodes and edges, assume the graph is properly constructed
174        if !self.node_indices.is_empty() && !self.edge_indices.is_empty() {
175            return Ok(());
176        }
177
178        // If nodes and edges are missing, build them from chains
179        if !self.chains.is_empty() {
180            for group in self.chains.clone().values() {
181                if let Group::Chain { elements, .. } = group {
182                    // Process each element in the chain
183                    for (i, element_id) in elements.iter().enumerate() {
184                        if i % 2 == 0 {
185                            // It's a node - add it if it doesn't exist
186                            if !self.node_indices.contains_key(element_id) {
187                                // Create placeholder node
188                                let node_data = NodeData {
189                                    id: element_id.clone(),
190                                    ..Default::default()
191                                };
192                                self.add_node(node_data)?;
193                            }
194                        } else if i + 1 < elements.len() {
195                            // Prevent index out of bounds
196                            // It's an edge - add it if it doesn't exist
197                            if !self.edge_indices.contains_key(element_id) {
198                                // Get connecting nodes
199                                let source_id = &elements[i - 1];
200                                let sink_id = &elements[i + 1];
201
202                                // Create placeholder edge
203                                let edge_data = EdgeData {
204                                    id: element_id.clone(),
205                                    ..Default::default()
206                                };
207
208                                self.add_edge(source_id.as_bstr(), sink_id.as_bstr(), edge_data)?;
209                            }
210                        }
211                    }
212                }
213            }
214            return Ok(());
215        }
216
217        if self.id == DEFAULT_GRAPH_ID {
218            // ignore default graph
219            return Ok(());
220        }
221
222        if self.node_indices.is_empty() || self.edge_indices.is_empty() {
223            warn!(
224                "Graph {} has no nodes/edges defined and no chains available",
225                self.id
226            );
227        }
228
229        Ok(())
230    }
231
232    // Additional GraphSection methods...
233    pub fn node_by_idx(&self, node_idx: NodeIndex) -> Option<&NodeData> {
234        self._graph.node_weight(node_idx)
235    }
236
237    pub fn node_by_id(&self, id: &str) -> Option<&NodeData> {
238        let node_idx = self.node_indices.get(&BString::from(id))?;
239        self._graph.node_weight(*node_idx)
240    }
241
242    pub fn edge_by_id(&self, id: &str) -> Option<&EdgeData> {
243        let edge_idx = self.edge_indices.get(&BString::from(id))?;
244        self._graph.edge_weight(*edge_idx)
245    }
246
247    pub fn edge_by_idx(&self, edge_idx: EdgeIndex) -> Option<&EdgeData> {
248        self._graph.edge_weight(edge_idx)
249    }
250
251    pub fn nodes(&self) -> Vec<&NodeData> {
252        self.node_indices
253            .values()
254            .map(|&idx| self._graph.node_weight(idx).unwrap())
255            .collect()
256    }
257
258    pub fn edges(&self) -> Vec<&EdgeData> {
259        self.edge_indices
260            .values()
261            .map(|&idx| self._graph.edge_weight(idx).unwrap())
262            .collect()
263    }
264
265    /// Helper method to find a node's ID by its index
266    pub fn find_node_id_by_idx(&self, node_idx: NodeIndex) -> Option<&BString> {
267        self.node_indices
268            .par_iter()
269            .find_map_first(|(id, &idx)| if idx == node_idx { Some(id) } else { None })
270    }
271
272    // Other methods from TSGraph that make sense at the graph section level
273
274    /// Traverse the graph and return all valid paths from source nodes to sink nodes.
275    ///
276    /// A valid path must respect read continuity, especially for nodes with Intermediary (IN) reads.
277    /// For nodes with IN reads, we ensure that:
278    /// 1. The node shares at least one read with previous nodes in the path
279    /// 2. The node can connect to at least one subsequent node that shares a read with it
280    ///
281    /// Example:
282    /// n1 (r1) -> n3 (r1,r2) -> n4 (r1)
283    /// n2 (r2) -> n3 (r1,r2) -> n5 (r2)
284    ///
285    /// If n3 has IN reads, then only these paths are valid:
286    /// - n1 -> n3 -> n4 (valid because they all share read r1)
287    /// - n2 -> n3 -> n5 (valid because they all share read r2)
288    ///
289    /// These paths would be invalid:
290    /// - n1 -> n3 -> n5 (invalid because n1 and n5 don't share a common read)
291    /// - n2 -> n3 -> n4 (invalid because n2 and n4 don't share a common read)
292    pub fn traverse(&self) -> Result<Vec<TSGPath>> {
293        // Find all source nodes (nodes with no incoming edges)
294        let source_nodes: Vec<NodeIndex> = self
295            ._graph
296            .node_indices()
297            .filter(|&idx| {
298                self._graph
299                    .edges_directed(idx, petgraph::Direction::Incoming)
300                    .count()
301                    == 0
302            })
303            .collect();
304
305        if source_nodes.is_empty() {
306            return Ok(Vec::new());
307        }
308
309        let mut all_paths = Vec::new();
310        // Cache node read IDs to avoid repeated lookups
311        let mut node_read_ids_cache: HashMap<NodeIndex, HashSet<BString>> = HashMap::new();
312
313        // Pre-compute node read IDs
314        for node_idx in self._graph.node_indices() {
315            if let Some(node) = self._graph.node_weight(node_idx) {
316                let read_ids: HashSet<BString> =
317                    node.reads.par_iter().map(|r| r.id.clone()).collect();
318                node_read_ids_cache.insert(node_idx, read_ids);
319            }
320        }
321
322        // For each source node, perform a traversal
323        for &start_node in &source_nodes {
324            // Skip nodes with no reads
325            if let Some(read_set) = node_read_ids_cache.get(&start_node) {
326                if read_set.is_empty() {
327                    continue;
328                }
329
330                let mut queue = VecDeque::new();
331                // (node, path_so_far, active_reads)
332                let mut initial_path = TSGPath::builder().graph(self).build();
333                initial_path.add_node(start_node);
334
335                queue.push_back((start_node, initial_path, read_set.clone()));
336
337                while let Some((current_node, path, active_reads)) = queue.pop_front() {
338                    // Get outgoing edges
339                    let outgoing_edges: Vec<_> = self
340                        ._graph
341                        .edges_directed(current_node, petgraph::Direction::Outgoing)
342                        .collect();
343
344                    // If this is a sink node (no outgoing edges), save the path
345                    if outgoing_edges.is_empty() {
346                        path.validate()?;
347                        all_paths.push(path);
348                        continue;
349                    }
350
351                    for edge_ref in outgoing_edges {
352                        let edge_idx = edge_ref.id();
353                        let target_node = edge_ref.target();
354
355                        if let Some(target_read_ids) = node_read_ids_cache.get(&target_node) {
356                            // Calculate reads that continue from current path to target
357                            let continuing_reads: HashSet<_> = active_reads
358                                .par_iter()
359                                .filter(|id| target_read_ids.contains(*id))
360                                .cloned()
361                                .collect();
362
363                            if continuing_reads.is_empty() {
364                                // No read continuity, skip this edge
365                                continue;
366                            }
367
368                            // Check if target node has IN reads
369                            let has_in_reads =
370                                if let Some(target_data) = self._graph.node_weight(target_node) {
371                                    target_data
372                                        .reads
373                                        .par_iter()
374                                        .any(|r| r.identity == ReadIdentity::IN)
375                                } else {
376                                    false
377                                };
378
379                            if has_in_reads {
380                                // For IN nodes, check if there's a valid path forward
381                                let mut can_continue = false;
382                                let outgoing_from_target: Vec<_> = self
383                                    ._graph
384                                    .edges_directed(target_node, petgraph::Direction::Outgoing)
385                                    .map(|e| e.target())
386                                    .collect();
387
388                                for &next_node in &outgoing_from_target {
389                                    if let Some(next_read_ids) = node_read_ids_cache.get(&next_node)
390                                    {
391                                        // Check if there's at least one read that continues through
392                                        if continuing_reads
393                                            .par_iter()
394                                            .any(|id| next_read_ids.contains(id))
395                                        {
396                                            can_continue = true;
397                                            break;
398                                        }
399                                    }
400                                }
401
402                                if !can_continue && !outgoing_from_target.is_empty() {
403                                    // Has outgoing edges but no valid continuation, skip this edge
404                                    continue;
405                                }
406                            }
407
408                            // Create new path and continue traversal
409                            let mut new_path = path.clone();
410                            new_path.add_edge(edge_idx);
411                            new_path.add_node(target_node);
412                            queue.push_back((target_node, new_path, continuing_reads));
413                        }
414                    }
415                }
416            }
417        }
418
419        Ok(all_paths)
420    }
421
422    pub fn to_dot(&self, node_label: bool, edge_label: bool) -> Result<String> {
423        let mut config = vec![];
424        if node_label {
425            config.push(Config::NodeIndexLabel);
426        }
427        if edge_label {
428            config.push(Config::EdgeIndexLabel);
429        }
430
431        let dot = Dot::with_config(&self._graph, &config);
432        Ok(format!("{:?}", dot))
433    }
434
435    pub fn to_json(&self) -> Result<serde_json::Value> {
436        let mut nodes = Vec::new();
437        let mut edges = Vec::new();
438
439        // Process all nodes
440        for node_idx in self._graph.node_indices() {
441            if let Some(node) = self._graph.node_weight(node_idx) {
442                if let Ok(node_json) = node.to_json(None) {
443                    nodes.push(node_json);
444                }
445            }
446        }
447
448        // Process all edges
449        for edge_idx in self._graph.edge_indices() {
450            if let Some(edge) = self._graph.edge_weight(edge_idx) {
451                let edge_endpoints = self._graph.edge_endpoints(edge_idx);
452
453                if let Some((source, target)) = edge_endpoints {
454                    let source_id = self.find_node_id_by_idx(source);
455                    let target_id = self.find_node_id_by_idx(target);
456
457                    // get reads from source node and target node
458                    // the weight will be the intersection of reads
459                    let source_data = self.node_by_idx(source).unwrap();
460                    let target_data = self.node_by_idx(target).unwrap();
461
462                    // get the intersection of reads
463                    let source_reads = source_data
464                        .reads
465                        .iter()
466                        .map(|r| r.id.clone())
467                        .collect::<HashSet<_>>();
468                    let target_reads = target_data
469                        .reads
470                        .iter()
471                        .map(|r| r.id.clone())
472                        .collect::<HashSet<_>>();
473                    let edge_weight = source_reads
474                        .intersection(&target_reads)
475                        .collect::<HashSet<_>>()
476                        .len();
477
478                    if let (Some(source_id), Some(target_id)) = (source_id, target_id) {
479                        let edge_data = json!({
480                            "data": {
481                                "id": edge.id.to_str().unwrap(),
482                                "source": source_id.to_str().unwrap(),
483                                "target": target_id.to_str().unwrap(),
484                                "weight": edge_weight,
485                                "breakpoints": format!("{}", edge.sv)
486                            }
487                        });
488                        edges.push(edge_data);
489                    }
490                }
491            }
492        }
493
494        // Combine nodes and edges into a Cytoscape-compatible format
495        let elements = json!({
496            "directed": true,
497            "multigraph": true,
498            "elements": {
499            "nodes": nodes,
500            "edges": edges
501            }
502        });
503
504        Ok(elements)
505    }
506
507    pub fn annotate_node_with_sequence<P: AsRef<Path>>(
508        &mut self,
509        reference_genome_path: P,
510    ) -> Result<()> {
511        let mut reader = fasta::io::indexed_reader::Builder::default()
512            .build_from_path(reference_genome_path.as_ref())?;
513
514        for node_idx in self.node_indices.values() {
515            let node_data = self._graph.node_weight_mut(*node_idx).unwrap();
516
517            let region = format!(
518                "{}:{}-{}",
519                node_data.reference_id,
520                node_data.reference_start() - 1, // 0-based to 1-based
521                node_data.reference_end(),
522            )
523            .parse()?;
524            let record = reader.query(&region)?;
525            node_data.sequence = Some(record.sequence().as_ref().into());
526        }
527        Ok(())
528    }
529}
530
531/// Represents a link between elements in different graphs
532#[derive(Debug, Clone, Default, Builder)]
533pub struct InterGraphLink {
534    pub id: BString,
535    pub source_graph: BString,
536    pub source_element: BString,
537    pub target_graph: BString,
538    pub target_element: BString,
539    pub link_type: BString,
540    pub attributes: HashMap<BString, Attribute>,
541}
542
543/// The complete transcript segment graph containing multiple graph sections
544#[derive(Debug, Clone, Default, Builder)]
545pub struct TSGraph {
546    pub headers: Vec<Header>,
547    pub graphs: HashMap<BString, GraphSection>,
548    pub links: Vec<InterGraphLink>,
549    current_graph_id: Option<BString>, // Tracks which graph is currently active during parsing
550}
551
552impl TSGraph {
553    /// Create a new empty TSGraph
554    pub fn new() -> Self {
555        let graph = GraphSection::new_default_graph();
556        let mut graphs = HashMap::new();
557        graphs.insert(graph.id.clone(), graph);
558        Self {
559            graphs,
560            current_graph_id: Some(DEFAULT_GRAPH_ID.into()),
561            ..Default::default()
562        }
563    }
564
565    /// Parse a header line
566    fn parse_header_line(&mut self, fields: &[&str]) -> Result<()> {
567        if fields.len() < 3 {
568            return Err(anyhow!("Invalid header line format"));
569        }
570
571        self.headers.push(Header {
572            tag: fields[1].into(),
573            value: fields[2].into(),
574        });
575
576        Ok(())
577    }
578
579    /// Parse a graph section line
580    fn parse_graph_line(&mut self, fields: &[&str]) -> Result<()> {
581        if fields.len() < 2 {
582            return Err(anyhow!("Invalid graph line format"));
583        }
584
585        let graph_id: BString = fields[1].into();
586
587        // Check if graph with this ID already exists
588        if self.graphs.contains_key(&graph_id) {
589            return Err(anyhow!(
590                "Graph with ID {} already exists",
591                graph_id.to_str().unwrap_or("")
592            ));
593        }
594
595        // Create new graph section
596        let mut graph_section = GraphSection::new(graph_id.clone());
597
598        // Parse optional attributes
599        if fields.len() > 2 {
600            for attr_str in &fields[2..] {
601                let attr = attr_str.parse::<Attribute>()?;
602                graph_section.attributes.insert(attr.tag.clone(), attr);
603            }
604        }
605
606        // Update current graph ID and add to graphs map
607        self.current_graph_id = Some(graph_id.clone());
608        self.graphs.insert(graph_id, graph_section);
609
610        Ok(())
611    }
612
613    /// Get the current graph section (or error if none is active)
614    fn current_graph_mut(&mut self) -> Result<&mut GraphSection> {
615        if let Some(graph_id) = &self.current_graph_id {
616            if let Some(graph) = self.graphs.get_mut(graph_id) {
617                return Ok(graph);
618            }
619        }
620        Err(anyhow!("No active graph section"))
621    }
622
623    /// Parse an inter-graph link line
624    fn parse_link_line(&mut self, fields: &[&str]) -> Result<()> {
625        if fields.len() < 5 {
626            return Err(anyhow!("Invalid link line format"));
627        }
628
629        let id: BString = fields[1].into();
630
631        // Parse source and target references (format: graph_id:element_id)
632        let source_ref: Vec<&str> = fields[2].splitn(2, ':').collect();
633        let target_ref: Vec<&str> = fields[3].splitn(2, ':').collect();
634
635        if source_ref.len() != 2 || target_ref.len() != 2 {
636            return Err(anyhow!("Invalid element reference format in link line"));
637        }
638
639        let source_graph: BString = source_ref[0].into();
640        let source_element: BString = source_ref[1].into();
641        let target_graph: BString = target_ref[0].into();
642        let target_element: BString = target_ref[1].into();
643
644        // Verify the referenced graphs exist
645        if !self.graphs.contains_key(&source_graph) {
646            return Err(anyhow!(
647                "Source graph {} not found",
648                source_graph.to_str().unwrap_or("")
649            ));
650        }
651        if !self.graphs.contains_key(&target_graph) {
652            return Err(anyhow!(
653                "Target graph {} not found",
654                target_graph.to_str().unwrap_or("")
655            ));
656        }
657
658        let link_type: BString = fields[4].into();
659
660        let mut link = InterGraphLink {
661            id,
662            source_graph,
663            source_element,
664            target_graph,
665            target_element,
666            link_type,
667            attributes: HashMap::new(),
668        };
669
670        // Parse optional attributes
671        if fields.len() > 5 {
672            for attr_str in &fields[5..] {
673                let attr = attr_str.parse::<Attribute>()?;
674                link.attributes.insert(attr.tag.clone(), attr);
675            }
676        }
677
678        self.links.push(link);
679        Ok(())
680    }
681
682    /// Parse a node line
683    fn parse_node_line(&mut self, fields: &str) -> Result<()> {
684        let node_data = NodeData::from_str(fields)?;
685        let graph = self.current_graph_mut()?;
686        graph.add_node(node_data)?;
687        Ok(())
688    }
689
690    /// Parse an edge line
691    fn parse_edge_line(&mut self, fields: &[&str]) -> Result<()> {
692        if fields.len() < 5 {
693            return Err(anyhow!("Invalid edge line format"));
694        }
695
696        let id: BString = fields[1].into();
697        let source_id: BString = fields[2].into();
698        let sink_id: BString = fields[3].into();
699        let sv = fields[4].parse::<StructuralVariant>()?;
700
701        let edge_data = EdgeData {
702            id,
703            sv,
704            attributes: HashMap::new(),
705        };
706
707        let graph = self.current_graph_mut()?;
708        graph.add_edge(source_id.as_bstr(), sink_id.as_bstr(), edge_data)?;
709        Ok(())
710    }
711
712    /// Parse an unordered group line
713    fn parse_unordered_group_line(&mut self, fields: &[&str]) -> Result<()> {
714        if fields.len() < 3 {
715            return Err(anyhow!("Invalid unordered group line format"));
716        }
717
718        let id: BString = fields[1].into();
719        let graph = self.current_graph_mut()?;
720
721        // Check for duplicate group name
722        if graph.groups.contains_key(&id) {
723            return Err(anyhow!("Group with ID {} already exists", id));
724        }
725
726        // Parse element IDs (space-separated)
727        let elements_str = fields[2..].join(" ");
728        let elements = elements_str
729            .split_whitespace()
730            .map(|s| s.into())
731            .collect::<Vec<_>>();
732
733        let group = Group::Unordered {
734            id: id.clone(),
735            elements,
736            attributes: HashMap::new(),
737        };
738
739        graph.groups.insert(id, group);
740        Ok(())
741    }
742
743    /// Parse a path line
744    fn parse_path_line(&mut self, fields: &[&str]) -> Result<()> {
745        if fields.len() < 3 {
746            return Err(anyhow!("Invalid path line format"));
747        }
748
749        let id: BString = fields[1].into();
750        let graph = self.current_graph_mut()?;
751
752        // Check for duplicate group name
753        if graph.groups.contains_key(&id) {
754            return Err(anyhow!("Group with ID {} already exists", id));
755        }
756
757        // Parse oriented element IDs (space-separated)
758        let elements_str = fields[2..].join(" ");
759        let elements = elements_str
760            .split_whitespace()
761            .map(|s| s.parse::<OrientedElement>())
762            .collect::<Result<Vec<_>, _>>()?;
763
764        let group = Group::Ordered {
765            id: id.clone(),
766            elements,
767            attributes: HashMap::new(),
768        };
769
770        graph.groups.insert(id, group);
771        Ok(())
772    }
773
774    /// Parse a chain line
775    fn parse_chain_line(&mut self, fields: &[&str]) -> Result<()> {
776        if fields.len() < 3 {
777            return Err(anyhow!("Invalid chain line format"));
778        }
779
780        let id: BString = fields[1].into();
781        let graph = self.current_graph_mut()?;
782
783        // Check for duplicate group name
784        if graph.groups.contains_key(&id) {
785            return Err(anyhow!("Group with ID {} already exists", id));
786        }
787
788        // Parse element IDs (space-separated)
789        let elements_str = fields[2..].join(" ");
790        let elements: Vec<BString> = elements_str.split_whitespace().map(|s| s.into()).collect();
791
792        // Validate chain structure: must start and end with nodes, and have alternating nodes/edges
793        if elements.is_empty() {
794            return Err(anyhow!("Chain must contain at least one element"));
795        }
796
797        if elements.len() % 2 == 0 {
798            return Err(anyhow!(
799                "Chain must have an odd number of elements (starting and ending with nodes)"
800            ));
801        }
802
803        // Create the chain group
804        let group = Group::Chain {
805            id: id.clone(),
806            elements,
807            attributes: HashMap::new(),
808        };
809
810        // Store the chain in both maps
811        graph.chains.insert(id.clone(), group.clone());
812        graph.groups.insert(id, group);
813        Ok(())
814    }
815
816    /// Parse an attribute line
817    fn parse_attribute_line(&mut self, fields: &[&str]) -> Result<()> {
818        if fields.len() < 4 {
819            return Err(anyhow!("Invalid attribute line format"));
820        }
821
822        let element_type = fields[1];
823        let element_id: BString = fields[2].into();
824        let graph = self.current_graph_mut()?;
825
826        let attrs: Vec<Attribute> = fields
827            .iter()
828            .skip(3)
829            .map(|s| s.parse())
830            .collect::<Result<Vec<_>>>()
831            .context("invalidate attribute line")?;
832
833        match element_type {
834            "N" => {
835                if let Some(&node_idx) = graph.node_indices.get(&element_id) {
836                    if let Some(node_data) = graph._graph.node_weight_mut(node_idx) {
837                        for attr in attrs {
838                            let tag = attr.tag.clone();
839                            node_data.attributes.insert(tag, attr);
840                        }
841                    } else {
842                        return Err(anyhow!("Node with ID {} not found in graph", element_id));
843                    }
844                } else {
845                    return Err(anyhow!("Node with ID {} not found", element_id));
846                }
847            }
848            "E" => {
849                if let Some(&edge_idx) = graph.edge_indices.get(&element_id) {
850                    if let Some(edge_data) = graph._graph.edge_weight_mut(edge_idx) {
851                        for attr in attrs {
852                            let tag = attr.tag.clone();
853                            edge_data.attributes.insert(tag, attr);
854                        }
855                    } else {
856                        return Err(anyhow!("Edge with ID {} not found in graph", element_id));
857                    }
858                } else {
859                    return Err(anyhow!("Edge with ID {} not found", element_id));
860                }
861            }
862            "U" | "P" | "C" => {
863                if let Some(group) = graph.groups.get_mut(&element_id) {
864                    match group {
865                        Group::Unordered { attributes, .. }
866                        | Group::Ordered { attributes, .. }
867                        | Group::Chain { attributes, .. } => {
868                            for attr in attrs {
869                                let tag = attr.tag.clone();
870                                attributes.insert(tag, attr);
871                            }
872                        }
873                    }
874                } else {
875                    return Err(anyhow!("Group with ID {} not found", element_id));
876                }
877            }
878            "G" => {
879                // Handle graph attributes
880                if let Some(graph_section) = self.graphs.get_mut(&element_id) {
881                    for attr in attrs {
882                        let tag = attr.tag.clone();
883                        graph_section.attributes.insert(tag, attr);
884                    }
885                } else {
886                    return Err(anyhow!("Graph with ID {} not found", element_id));
887                }
888            }
889            _ => {
890                return Err(anyhow!("Unknown element type: {}", element_type));
891            }
892        }
893
894        Ok(())
895    }
896
897    /// Validate all graphs and their paths
898    fn validate(&self) -> Result<()> {
899        // Validate each graph section
900        for (graph_id, graph) in &self.graphs {
901            // Validate paths against the graph
902            for (id, group) in &graph.groups {
903                if let Group::Ordered { elements, .. } = group {
904                    // Validate that all elements in the path exist in the graph
905                    for element in elements {
906                        let element_exists = graph.node_indices.contains_key(&element.id)
907                            || graph.edge_indices.contains_key(&element.id)
908                            || graph.groups.contains_key(&element.id);
909
910                        if !element_exists {
911                            return Err(anyhow!(
912                                "Path {} in graph {} references non-existent element {}",
913                                id,
914                                graph_id,
915                                element.id
916                            ));
917                        }
918                    }
919                }
920            }
921        }
922
923        // Validate all inter-graph links
924        for link in &self.links {
925            // Check source element exists
926            let source_graph = self.graphs.get(&link.source_graph).ok_or_else(|| {
927                anyhow!(
928                    "Link {} references non-existent graph {}",
929                    link.id,
930                    link.source_graph
931                )
932            })?;
933
934            let source_exists = source_graph.node_indices.contains_key(&link.source_element)
935                || source_graph.edge_indices.contains_key(&link.source_element)
936                || source_graph.groups.contains_key(&link.source_element);
937
938            if !source_exists {
939                return Err(anyhow!(
940                    "Link {} references non-existent element {}:{}",
941                    link.id,
942                    link.source_graph,
943                    link.source_element
944                ));
945            }
946
947            // Check target element exists
948            let target_graph = self.graphs.get(&link.target_graph).ok_or_else(|| {
949                anyhow!(
950                    "Link {} references non-existent graph {}",
951                    link.id,
952                    link.target_graph
953                )
954            })?;
955
956            let target_exists = target_graph.node_indices.contains_key(&link.target_element)
957                || target_graph.edge_indices.contains_key(&link.target_element)
958                || target_graph.groups.contains_key(&link.target_element);
959
960            if !target_exists {
961                return Err(anyhow!(
962                    "Link {} references non-existent element {}:{}",
963                    link.id,
964                    link.target_graph,
965                    link.target_element
966                ));
967            }
968        }
969
970        Ok(())
971    }
972
973    pub fn from_reader<R: BufRead>(reader: R) -> Result<Self> {
974        let mut tsgraph = TSGraph::new();
975
976        // Create a default graph if needed for backward compatibility
977        let default_graph_id: BString = DEFAULT_GRAPH_ID.into();
978        let default_graph = GraphSection::new(default_graph_id.clone());
979        tsgraph
980            .graphs
981            .insert(default_graph_id.clone(), default_graph);
982
983        tsgraph.current_graph_id = Some(default_graph_id);
984
985        // First pass: Parse all record types
986        for line in reader.lines() {
987            let line = line?;
988            if line.is_empty() || line.starts_with('#') {
989                continue;
990            }
991            let fields: Vec<&str> = line.split_whitespace().collect();
992            if fields.is_empty() {
993                continue;
994            }
995
996            match fields[0] {
997                "H" => tsgraph.parse_header_line(&fields)?,
998                "G" => tsgraph.parse_graph_line(&fields)?,
999                "N" => tsgraph.parse_node_line(&line)?,
1000                "E" => tsgraph.parse_edge_line(&fields)?,
1001                "U" => tsgraph.parse_unordered_group_line(&fields)?,
1002                "P" => tsgraph.parse_path_line(&fields)?,
1003                "C" => tsgraph.parse_chain_line(&fields)?,
1004                "A" => tsgraph.parse_attribute_line(&fields)?,
1005                "L" => tsgraph.parse_link_line(&fields)?,
1006                _ => {
1007                    // ignore unknown record types
1008                    debug!("Ignoring unknown record type: {}", fields[0]);
1009                }
1010            }
1011        }
1012
1013        // Second pass: Ensure all graphs are built and validate
1014        for graph_section in tsgraph.graphs.values_mut() {
1015            // Populate chains hash map from groups if needed
1016            for (id, group) in &graph_section.groups {
1017                if let Group::Chain { .. } = group {
1018                    if !graph_section.chains.contains_key(id) {
1019                        graph_section.chains.insert(id.clone(), group.clone());
1020                    }
1021                }
1022            }
1023
1024            // Ensure graph is built
1025            graph_section.ensure_graph_is_built()?;
1026        }
1027
1028        // Validate all graphs and links
1029        tsgraph.validate()?;
1030
1031        // pop the default graph if it's empty
1032        if let Some(default_graph) = tsgraph.graph(DEFAULT_GRAPH_ID) {
1033            if default_graph.node_indices.is_empty() {
1034                tsgraph.graphs.remove(&BString::from(DEFAULT_GRAPH_ID));
1035            }
1036        }
1037        Ok(tsgraph)
1038    }
1039
1040    /// Parse a TSG file and construct a TSGraph
1041    pub fn from_file<P: AsRef<Path>>(path: P) -> Result<Self> {
1042        let file = File::open(path)?;
1043        let reader = BufReader::new(file);
1044        Self::from_reader(reader)
1045    }
1046
1047    /// Write the TSGraph to writer
1048    pub fn to_writer<W: std::io::Write>(&self, writer: &mut W) -> Result<()> {
1049        // Write global headers
1050        writeln!(writer, "# Global header")?;
1051        for header in &self.headers {
1052            writeln!(writer, "{}", header)?;
1053        }
1054
1055        let new_header = Header::builder().tag("PG").value("tsg").build();
1056        writeln!(writer, "{}", new_header)?;
1057
1058        // Write each graph section
1059        for (graph_id, graph) in &self.graphs {
1060            // Skip writing the default graph header if it's empty
1061            if graph_id == &BString::from(DEFAULT_GRAPH_ID) && graph.node_indices.is_empty() {
1062                continue;
1063            }
1064
1065            writeln!(writer, "\n# Graph: {}", graph_id)?;
1066
1067            // Write graph section header
1068            write!(writer, "G\t{}", graph_id)?;
1069            for attr in graph.attributes.values() {
1070                write!(writer, "\t{}", attr)?;
1071            }
1072            writeln!(writer)?;
1073
1074            // Write nodes
1075            writeln!(writer, "# Nodes")?;
1076            for node_idx in graph._graph.node_indices() {
1077                if let Some(node_data) = graph._graph.node_weight(node_idx) {
1078                    writeln!(writer, "{}", node_data)?;
1079                }
1080            }
1081
1082            // Write edges
1083            writeln!(writer, "# Edges")?;
1084            for edge_ref in graph._graph.edge_references() {
1085                let edge = edge_ref.weight();
1086                let source_idx = edge_ref.source();
1087                let sink_idx = edge_ref.target();
1088
1089                if let (Some(source), Some(sink)) = (
1090                    graph._graph.node_weight(source_idx),
1091                    graph._graph.node_weight(sink_idx),
1092                ) {
1093                    writeln!(
1094                        writer,
1095                        "E\t{}\t{}\t{}\t{}",
1096                        edge.id, source.id, sink.id, edge.sv
1097                    )?;
1098                }
1099            }
1100
1101            // Write groups
1102            writeln!(writer, "# Groups")?;
1103            let mut seen_chain_ids = HashSet::new();
1104
1105            for group in graph.groups.values() {
1106                match group {
1107                    Group::Unordered { id, elements, .. } => {
1108                        let elements_str: Vec<String> = elements
1109                            .par_iter()
1110                            .map(|e| e.to_str().unwrap_or("").to_string())
1111                            .collect();
1112                        writeln!(
1113                            writer,
1114                            "U\t{}\t{}",
1115                            id.to_str().unwrap_or(""),
1116                            elements_str.join(" ")
1117                        )?;
1118                    }
1119                    Group::Ordered { id, elements, .. } => {
1120                        let elements_str: Vec<String> =
1121                            elements.par_iter().map(|e| e.to_string()).collect();
1122                        writeln!(
1123                            writer,
1124                            "P\t{}\t{}",
1125                            id.to_str().unwrap_or(""),
1126                            elements_str.join(" ")
1127                        )?;
1128                    }
1129                    Group::Chain { id, elements, .. } => {
1130                        // Skip writing chains that are duplicated with groups
1131                        if seen_chain_ids.contains(id) {
1132                            continue;
1133                        }
1134                        seen_chain_ids.insert(id);
1135
1136                        let elements_str: Vec<String> = elements
1137                            .par_iter()
1138                            .map(|e| e.to_str().unwrap_or("").to_string())
1139                            .collect();
1140                        writeln!(
1141                            writer,
1142                            "C\t{}\t{}",
1143                            id.to_str().unwrap_or(""),
1144                            elements_str.join(" ")
1145                        )?;
1146                    }
1147                }
1148            }
1149
1150            // Write attributes for this graph section
1151            writeln!(writer, "# Attributes")?;
1152
1153            // Write attributes for nodes
1154            for node_idx in graph._graph.node_indices() {
1155                if let Some(node) = graph._graph.node_weight(node_idx) {
1156                    for attr in node.attributes.values() {
1157                        writeln!(writer, "A\tN\t{}\t{}", node.id, attr)?;
1158                    }
1159                }
1160            }
1161
1162            // Write attributes for edges
1163            for edge_idx in graph._graph.edge_indices() {
1164                if let Some(edge) = graph._graph.edge_weight(edge_idx) {
1165                    for attr in edge.attributes.values() {
1166                        writeln!(writer, "A\tE\t{}\t{}", edge.id, attr)?;
1167                    }
1168                }
1169            }
1170
1171            // Write attributes for groups
1172            for (id, group) in &graph.groups {
1173                let (group_type, attributes) = match group {
1174                    Group::Unordered { attributes, .. } => ("U", attributes),
1175                    Group::Ordered { attributes, .. } => ("P", attributes),
1176                    Group::Chain { attributes, .. } => ("C", attributes),
1177                };
1178
1179                for attr in attributes.values() {
1180                    writeln!(
1181                        writer,
1182                        "A\t{}\t{}\t{}",
1183                        group_type,
1184                        id.to_str().unwrap_or(""),
1185                        attr
1186                    )?;
1187                }
1188            }
1189        }
1190
1191        // Write inter-graph links
1192        if !self.links.is_empty() {
1193            writeln!(writer, "\n# Inter-graph links")?;
1194            for link in &self.links {
1195                write!(
1196                    writer,
1197                    "L\t{}\t{}:{}\t{}:{}\t{}",
1198                    link.id,
1199                    link.source_graph,
1200                    link.source_element,
1201                    link.target_graph,
1202                    link.target_element,
1203                    link.link_type
1204                )?;
1205
1206                for attr in link.attributes.values() {
1207                    write!(writer, "\t{}", attr)?;
1208                }
1209                writeln!(writer)?;
1210            }
1211        }
1212
1213        writer.flush()?;
1214        Ok(())
1215    }
1216
1217    /// Write the TSGraph to a TSG file
1218    pub fn to_file<P: AsRef<Path>>(&self, path: P) -> Result<()> {
1219        let file = File::create(path)?;
1220        let mut writer = BufWriter::new(file);
1221        self.to_writer(&mut writer)
1222    }
1223
1224    // Helper methods for accessing graph elements
1225
1226    /// Get a graph section by its ID
1227    pub fn graph(&self, id: &str) -> Option<&GraphSection> {
1228        self.graphs.get(&BString::from(id))
1229    }
1230
1231    pub fn graph_mut(&mut self, id: &str) -> Option<&mut GraphSection> {
1232        self.graphs.get_mut(&BString::from(id))
1233    }
1234
1235    /// get default graph
1236    pub fn default_graph(&self) -> Option<&GraphSection> {
1237        self.graphs.get(&BString::from(DEFAULT_GRAPH_ID))
1238    }
1239
1240    pub fn default_graph_mut(&mut self) -> Option<&mut GraphSection> {
1241        self.graphs.get_mut(&BString::from(DEFAULT_GRAPH_ID))
1242    }
1243
1244    /// Get a node by its ID and graph ID
1245    pub fn node(&self, graph_id: &str, node_id: &str) -> Option<&NodeData> {
1246        let graph = self.graphs.get(&BString::from(graph_id))?;
1247        graph.node_by_id(node_id)
1248    }
1249
1250    /// Get an edge by its ID and graph ID
1251    pub fn edge(&self, graph_id: &str, edge_id: &str) -> Option<&EdgeData> {
1252        let graph = self.graphs.get(&BString::from(graph_id))?;
1253        graph.edge_by_id(edge_id)
1254    }
1255
1256    /// Get the nodes in a chain in order
1257    pub fn chain_nodes(&self, graph_id: &str, chain_id: &BStr) -> Option<Vec<NodeIndex>> {
1258        let graph = self.graphs.get(&BString::from(graph_id))?;
1259        let group = graph.chains.get(chain_id)?;
1260
1261        match group {
1262            Group::Chain { elements, .. } => {
1263                let mut nodes = Vec::with_capacity(elements.len().div_ceil(2));
1264
1265                for (i, element_id) in elements.iter().enumerate() {
1266                    if i % 2 == 0 {
1267                        // Nodes are at even positions (0, 2, 4...)
1268                        if let Some(&node_idx) = graph.node_indices.get(element_id) {
1269                            nodes.push(node_idx);
1270                        } else {
1271                            return None; // Invalid chain structure
1272                        }
1273                    }
1274                }
1275
1276                Some(nodes)
1277            }
1278            _ => None, // Not a chain
1279        }
1280    }
1281
1282    /// Get the edges in a chain in order
1283    pub fn chain_edges(&self, graph_id: &str, chain_id: &BStr) -> Option<Vec<EdgeIndex>> {
1284        let graph = self.graphs.get(&BString::from(graph_id))?;
1285        let group = graph.chains.get(chain_id)?;
1286
1287        match group {
1288            Group::Chain { elements, .. } => {
1289                let mut edges = Vec::with_capacity(elements.len() / 2);
1290
1291                for (i, element_id) in elements.iter().enumerate() {
1292                    if i % 2 == 1 {
1293                        // Edges are at odd positions (1, 3, 5...)
1294                        if let Some(&edge_idx) = graph.edge_indices.get(element_id) {
1295                            edges.push(edge_idx);
1296                        } else {
1297                            return None; // Invalid chain structure
1298                        }
1299                    }
1300                }
1301
1302                Some(edges)
1303            }
1304            _ => None, // Not a chain
1305        }
1306    }
1307
1308    /// Helper method to find a node's ID by its index
1309    pub fn find_node_id_by_idx(&self, graph_id: &str, node_idx: NodeIndex) -> Option<&BString> {
1310        let graph = self.graphs.get(&BString::from(graph_id))?;
1311        graph
1312            .node_indices
1313            .par_iter()
1314            .find_map_first(|(id, &idx)| if idx == node_idx { Some(id) } else { None })
1315    }
1316
1317    pub fn node_by_idx(&self, graph_id: &str, node_idx: NodeIndex) -> Option<&NodeData> {
1318        let graph = self.graphs.get(&BString::from(graph_id))?;
1319        graph.node_by_idx(node_idx)
1320    }
1321
1322    pub fn edge_by_idx(&self, graph_id: &str, edge_idx: EdgeIndex) -> Option<&EdgeData> {
1323        let graph = self.graphs.get(&BString::from(graph_id))?;
1324        graph.edge_by_idx(edge_idx)
1325    }
1326
1327    /// Get all nodes in the graph
1328    pub fn nodes(&self, graph_id: &str) -> Vec<&NodeData> {
1329        let graph = self.graphs.get(&BString::from(graph_id)).unwrap();
1330        graph
1331            .node_indices
1332            .values()
1333            .filter_map(|&idx| graph._graph.node_weight(idx))
1334            .collect()
1335    }
1336
1337    /// Get all edges in the graph
1338    pub fn edges(&self, graph_id: &str) -> Vec<&EdgeData> {
1339        let graph = self.graphs.get(&BString::from(graph_id)).unwrap();
1340        graph
1341            .edge_indices
1342            .values()
1343            .filter_map(|&idx| graph._graph.edge_weight(idx))
1344            .collect()
1345    }
1346
1347    /// Traverse the graph and return all valid paths from source nodes to sink nodes.
1348    pub fn traverse_by_id(&self, graph_id: &str) -> Result<Vec<TSGPath>> {
1349        let graph = self.graphs.get(&BString::from(graph_id)).unwrap();
1350        graph.traverse()
1351    }
1352
1353    /// traverse all graphs
1354    pub fn traverse_all_graphs(&self) -> Result<Vec<TSGPath>> {
1355        let all_paths = self
1356            .graphs
1357            .values()
1358            .try_fold(Vec::new(), |mut all_paths, graph| {
1359                let paths = graph.traverse()?;
1360                all_paths.extend(paths);
1361                Ok(all_paths)
1362            });
1363        all_paths
1364    }
1365
1366    pub fn to_dot_by_id(
1367        &self,
1368        graph_id: &str,
1369        node_label: bool,
1370        edge_label: bool,
1371    ) -> Result<String> {
1372        let graph = self.graphs.get(&BString::from(graph_id)).unwrap();
1373        graph.to_dot(node_label, edge_label)
1374    }
1375
1376    pub fn to_json_by_id(&self, graph_id: &str) -> Result<serde_json::Value> {
1377        let graph = self.graphs.get(&BString::from(graph_id)).unwrap();
1378        graph.to_json()
1379    }
1380}
1381
1382impl FromStr for TSGraph {
1383    type Err = anyhow::Error;
1384    fn from_str(s: &str) -> Result<Self> {
1385        let reader = BufReader::new(s.as_bytes());
1386        Self::from_reader(reader)
1387    }
1388}
1389
1390#[cfg(test)]
1391mod tests {
1392    use super::*;
1393
1394    #[test]
1395    fn test_create_empty_graph() {
1396        let graph = TSGraph::new();
1397        assert_eq!(graph.headers.len(), 0);
1398        assert_eq!(graph.default_graph().unwrap().nodes().len(), 0);
1399        assert_eq!(graph.edges(DEFAULT_GRAPH_ID).len(), 0);
1400        assert_eq!(graph.graphs.len(), 1);
1401        assert_eq!(graph.links.len(), 0);
1402    }
1403
1404    #[test]
1405    fn test_add_node() -> Result<()> {
1406        let mut graph = TSGraph::new();
1407        let node = NodeData {
1408            id: "node1".into(),
1409            reference_id: "chr1".into(),
1410            ..Default::default()
1411        };
1412
1413        graph.default_graph_mut().unwrap().add_node(node.clone())?;
1414        assert_eq!(graph.nodes(DEFAULT_GRAPH_ID).len(), 1);
1415        assert_eq!(graph.node(DEFAULT_GRAPH_ID, "node1").unwrap().id, node.id);
1416        Ok(())
1417    }
1418
1419    #[test]
1420    fn test_add_edge() -> Result<()> {
1421        let mut graph = TSGraph::new();
1422
1423        // Add nodes first
1424        let node1 = NodeData {
1425            id: "node1".into(),
1426            reference_id: "chr1".into(),
1427            ..Default::default()
1428        };
1429
1430        let node2 = NodeData {
1431            id: "node2".into(),
1432            ..Default::default()
1433        };
1434
1435        graph.graph_mut(DEFAULT_GRAPH_ID).unwrap().add_node(node1)?;
1436        graph.graph_mut(DEFAULT_GRAPH_ID).unwrap().add_node(node2)?;
1437
1438        // Add edge
1439        let edge = EdgeData {
1440            id: "edge1".into(),
1441            ..Default::default()
1442        };
1443
1444        graph.graph_mut(DEFAULT_GRAPH_ID).unwrap().add_edge(
1445            "node1".into(),
1446            "node2".into(),
1447            edge.clone(),
1448        )?;
1449
1450        assert_eq!(graph.edges(DEFAULT_GRAPH_ID).len(), 1);
1451        assert_eq!(graph.edge(DEFAULT_GRAPH_ID, "edge1").unwrap().id, edge.id);
1452
1453        Ok(())
1454    }
1455
1456    #[test]
1457    fn test_parse_header_line() -> Result<()> {
1458        let mut graph = TSGraph::new();
1459        let fields = vec!["H", "VN", "1.0"];
1460
1461        graph.parse_header_line(&fields)?;
1462
1463        assert_eq!(graph.headers.len(), 1);
1464        assert_eq!(graph.headers[0].tag, "VN");
1465        assert_eq!(graph.headers[0].value, "1.0");
1466
1467        Ok(())
1468    }
1469
1470    #[test]
1471    fn test_parse_node_line() -> Result<()> {
1472        let mut graph = TSGraph::new();
1473        let line = "N\tnode1\tchr1:+:100-200\tread1:SO,read2:IN\tACGT";
1474
1475        graph.parse_node_line(line)?;
1476
1477        // let node = graph.get_node("defaul, "node1").unwrap();
1478        let node = graph.default_graph().unwrap().node_by_id("node1").unwrap();
1479
1480        assert_eq!(node.id, "node1");
1481        assert_eq!(node.exons.exons.len(), 1);
1482        assert_eq!(node.exons.exons[0].start, 100);
1483        assert_eq!(node.exons.exons[0].end, 200);
1484        assert_eq!(
1485            node.reads,
1486            vec![
1487                ReadData::builder().id("read1").identity("SO").build(),
1488                ReadData::builder().id("read2").identity("IN").build(),
1489            ]
1490        );
1491        assert_eq!(node.sequence, Some("ACGT".into()));
1492
1493        Ok(())
1494    }
1495
1496    #[test]
1497    fn test_parse_unordered_group() -> Result<()> {
1498        let mut graph = TSGraph::new();
1499        let fields = vec!["U", "group1", "node1", "node2", "edge1"];
1500
1501        graph.parse_unordered_group_line(&fields)?;
1502
1503        let graph_section = graph.default_graph().unwrap();
1504        assert_eq!(graph_section.groups.len(), 1);
1505        if let Group::Unordered { id, elements, .. } = &graph_section.groups["group1".as_bytes()] {
1506            assert_eq!(id, "group1");
1507            assert_eq!(elements.len(), 3);
1508            assert_eq!(elements[0], "node1");
1509            assert_eq!(elements[1], "node2");
1510            assert_eq!(elements[2], "edge1");
1511        } else {
1512            panic!("Expected Unordered group");
1513        }
1514
1515        Ok(())
1516    }
1517
1518    #[test]
1519    fn test_read_from_file() -> Result<()> {
1520        let file = "tests/data/test.tsg";
1521        let graph = TSGraph::from_file(file)?;
1522
1523        assert_eq!(graph.headers.len(), 2);
1524        assert_eq!(graph.nodes(DEFAULT_GRAPH_ID).len(), 5);
1525        assert_eq!(graph.edges(DEFAULT_GRAPH_ID).len(), 4);
1526
1527        graph.to_file("tests/data/test_write.tsg")?;
1528
1529        Ok(())
1530    }
1531
1532    #[test]
1533    fn test_from_str() -> Result<()> {
1534        let tsg_string = r#"H	VN	1.0
1535    H	PN	TestGraph
1536    N	node1	chr1:+:100-200	read1:SO,read2:IN	ACGT
1537    N	node2	chr1:+:300-400	read1:SO,read2:IN
1538    N	node3	chr1:+:500-600	read2:SO,read3:IN
1539    E	edge1	node1	node2	chr1,chr1,1700,2000,INV
1540    E	edge2	node2	node3	chr1,chr1,1700,2000,DUP
1541    C	chain1	node1 edge1 node2 edge2 node3
1542    "#;
1543
1544        let graph = TSGraph::from_str(tsg_string)?;
1545
1546        // Verify headers
1547        assert_eq!(graph.headers.len(), 2);
1548        assert_eq!(graph.headers[0].tag, "VN");
1549        assert_eq!(graph.headers[0].value, "1.0");
1550        assert_eq!(graph.headers[1].tag, "PN");
1551        assert_eq!(graph.headers[1].value, "TestGraph");
1552
1553        // Verify nodes
1554        assert_eq!(graph.nodes(DEFAULT_GRAPH_ID).len(), 3);
1555        let node1 = graph.node(DEFAULT_GRAPH_ID, "node1").unwrap();
1556        assert_eq!(node1.id, "node1");
1557        assert_eq!(node1.sequence, Some("ACGT".into()));
1558
1559        // Verify edges
1560        assert_eq!(graph.edges(DEFAULT_GRAPH_ID).len(), 2);
1561        let edge1 = graph.edge(DEFAULT_GRAPH_ID, "edge1").unwrap();
1562        assert_eq!(edge1.id, "edge1");
1563
1564        // Verify groups
1565        let graph_section = graph.graph(DEFAULT_GRAPH_ID).unwrap();
1566
1567        // Verify chain
1568        if let Group::Chain { elements, .. } = &graph_section.chains["chain1".as_bytes()] {
1569            assert_eq!(elements.len(), 5);
1570            assert_eq!(elements[0], "node1");
1571            assert_eq!(elements[1], "edge1");
1572        } else {
1573            panic!("Expected Chain group");
1574        }
1575
1576        Ok(())
1577    }
1578
1579    #[test]
1580    fn test_traverse() -> Result<()> {
1581        let file = "tests/data/test.tsg";
1582        let graph = TSGraph::from_file(file)?;
1583
1584        let paths = graph.traverse_by_id(DEFAULT_GRAPH_ID)?;
1585        // assert_eq!(paths.len(), 2);
1586
1587        for path in paths {
1588            println!("{}", path);
1589        }
1590        Ok(())
1591    }
1592
1593    #[test]
1594    fn test_to_dot() -> Result<()> {
1595        let file = "tests/data/test.tsg";
1596        let graph = TSGraph::from_file(file)?;
1597
1598        let dot = graph.to_dot_by_id(DEFAULT_GRAPH_ID, true, true)?;
1599        println!("{}", dot);
1600
1601        Ok(())
1602    }
1603
1604    #[test]
1605    fn test_to_json() -> Result<()> {
1606        let file = "tests/data/test.tsg";
1607        let graph = TSGraph::from_file(file)?;
1608
1609        let json = graph.to_json_by_id(DEFAULT_GRAPH_ID)?;
1610        println!("{}", json);
1611        Ok(())
1612    }
1613}