1mod analysis;
2mod attr;
3mod edge;
4mod group;
5mod header;
6mod node;
7mod path;
8mod utils;
9
10use noodles::fasta;
11use std::fs::File;
12use std::io::{BufRead, BufReader, BufWriter};
13use std::path::Path;
14use std::str::FromStr;
15use tracing::debug;
16use tracing::warn;
17
18use ahash::{HashMap, HashMapExt, HashSet, HashSetExt};
19use anyhow::{Context, Result, anyhow};
20use bstr::{BStr, BString, ByteSlice};
21
22pub use analysis::*;
23pub use attr::*;
24pub use edge::*;
25pub use group::*;
26pub use header::*;
27pub use node::*;
28pub use path::*;
29pub use utils::*;
30
31use bon::Builder;
32use petgraph::dot::{Config, Dot};
33use petgraph::graph::{DiGraph, EdgeIndex, NodeIndex};
34use petgraph::visit::EdgeRef;
35use rayon::prelude::*;
36use serde_json::json;
37use std::collections::VecDeque;
38
39pub const DEFAULT_GRAPH_ID: &str = "G.graph";
40#[derive(Debug, Clone, Default, Builder)]
42pub struct GraphSection {
43 pub id: BString,
44 pub attributes: HashMap<BString, Attribute>,
45 _graph: DiGraph<NodeData, EdgeData>,
46 pub node_indices: HashMap<BString, NodeIndex>,
47 pub edge_indices: HashMap<BString, EdgeIndex>,
48 pub groups: HashMap<BString, Group>,
49 pub chains: HashMap<BString, Group>,
50}
51
52impl GraphSection {
53 pub fn new(id: BString) -> Self {
55 Self {
56 id,
57 ..Default::default()
58 }
59 }
60
61 pub fn new_default_graph() -> Self {
62 Self::new(DEFAULT_GRAPH_ID.into())
63 }
64
65 pub fn node_indices_to_ids(&self) -> HashMap<NodeIndex, BString> {
66 self.node_indices
67 .par_iter()
68 .map(|(id, &idx)| (idx, id.clone()))
69 .collect()
70 }
71
72 pub fn edge_indices_to_ids(&self) -> HashMap<EdgeIndex, BString> {
73 self.edge_indices
74 .par_iter()
75 .map(|(id, &idx)| (idx, id.clone()))
76 .collect()
77 }
78
79 pub fn node_weight(&self, node_idx: NodeIndex) -> Option<&NodeData> {
80 self._graph.node_weight(node_idx)
81 }
82
83 pub fn edge_weight(&self, edge_idx: EdgeIndex) -> Option<&EdgeData> {
84 self._graph.edge_weight(edge_idx)
85 }
86
87 pub fn in_degree(&self, node_idx: NodeIndex) -> usize {
88 self._graph
89 .edges_directed(node_idx, petgraph::Direction::Incoming)
90 .count()
91 }
92
93 pub fn out_degree(&self, node_idx: NodeIndex) -> usize {
94 self._graph
95 .edges_directed(node_idx, petgraph::Direction::Outgoing)
96 .count()
97 }
98
99 pub fn add_node(&mut self, node_data: NodeData) -> Result<NodeIndex> {
101 let id = node_data.id.clone();
102
103 if let Some(&idx) = self.node_indices.get(&id) {
105 if let Some(attr) = self._graph.node_weight_mut(idx) {
107 *attr = node_data;
108 return Ok(idx);
109 }
110 return Err(anyhow!("Node with ID {} not found in graph", id));
111 }
112
113 let node_idx = self._graph.add_node(node_data);
115
116 debug!(
117 "graph {} add node {} ;len of node_indices: {}",
118 self.id,
119 id,
120 self.node_indices.len() + 1
121 );
122
123 self.node_indices.insert(id, node_idx);
124 Ok(node_idx)
125 }
126
127 pub fn add_edge(
129 &mut self,
130 source_id: &BStr,
131 sink_id: &BStr,
132 edge_data: EdgeData,
133 ) -> Result<EdgeIndex> {
134 let id = edge_data.id.clone();
135
136 let source_idx = match self.node_indices.get(source_id) {
138 Some(&idx) => idx,
139 None => {
140 let placeholder_node = NodeData {
142 id: source_id.to_owned(),
143 ..Default::default()
144 };
145 self.add_node(placeholder_node)?
146 }
147 };
148
149 let sink_idx = match self.node_indices.get(sink_id) {
151 Some(&idx) => idx,
152 None => {
153 let placeholder_node = NodeData {
155 id: sink_id.to_owned(),
156 ..Default::default()
157 };
158 self.add_node(placeholder_node)?
159 }
160 };
161
162 let edge_idx = self._graph.update_edge(source_idx, sink_idx, edge_data);
164
165 self.edge_indices.insert(id, edge_idx);
166 Ok(edge_idx)
167 }
168
169 fn ensure_graph_is_built(&mut self) -> Result<()> {
173 if !self.node_indices.is_empty() && !self.edge_indices.is_empty() {
175 return Ok(());
176 }
177
178 if !self.chains.is_empty() {
180 for group in self.chains.clone().values() {
181 if let Group::Chain { elements, .. } = group {
182 for (i, element_id) in elements.iter().enumerate() {
184 if i % 2 == 0 {
185 if !self.node_indices.contains_key(element_id) {
187 let node_data = NodeData {
189 id: element_id.clone(),
190 ..Default::default()
191 };
192 self.add_node(node_data)?;
193 }
194 } else if i + 1 < elements.len() {
195 if !self.edge_indices.contains_key(element_id) {
198 let source_id = &elements[i - 1];
200 let sink_id = &elements[i + 1];
201
202 let edge_data = EdgeData {
204 id: element_id.clone(),
205 ..Default::default()
206 };
207
208 self.add_edge(source_id.as_bstr(), sink_id.as_bstr(), edge_data)?;
209 }
210 }
211 }
212 }
213 }
214 return Ok(());
215 }
216
217 if self.id == DEFAULT_GRAPH_ID {
218 return Ok(());
220 }
221
222 if self.node_indices.is_empty() || self.edge_indices.is_empty() {
223 warn!(
224 "Graph {} has no nodes/edges defined and no chains available",
225 self.id
226 );
227 }
228
229 Ok(())
230 }
231
232 pub fn node_by_idx(&self, node_idx: NodeIndex) -> Option<&NodeData> {
234 self._graph.node_weight(node_idx)
235 }
236
237 pub fn node_by_id(&self, id: &str) -> Option<&NodeData> {
238 let node_idx = self.node_indices.get(&BString::from(id))?;
239 self._graph.node_weight(*node_idx)
240 }
241
242 pub fn edge_by_id(&self, id: &str) -> Option<&EdgeData> {
243 let edge_idx = self.edge_indices.get(&BString::from(id))?;
244 self._graph.edge_weight(*edge_idx)
245 }
246
247 pub fn edge_by_idx(&self, edge_idx: EdgeIndex) -> Option<&EdgeData> {
248 self._graph.edge_weight(edge_idx)
249 }
250
251 pub fn nodes(&self) -> Vec<&NodeData> {
252 self.node_indices
253 .values()
254 .map(|&idx| self._graph.node_weight(idx).unwrap())
255 .collect()
256 }
257
258 pub fn edges(&self) -> Vec<&EdgeData> {
259 self.edge_indices
260 .values()
261 .map(|&idx| self._graph.edge_weight(idx).unwrap())
262 .collect()
263 }
264
265 pub fn find_node_id_by_idx(&self, node_idx: NodeIndex) -> Option<&BString> {
267 self.node_indices
268 .par_iter()
269 .find_map_first(|(id, &idx)| if idx == node_idx { Some(id) } else { None })
270 }
271
272 pub fn traverse(&self) -> Result<Vec<TSGPath>> {
293 let source_nodes: Vec<NodeIndex> = self
295 ._graph
296 .node_indices()
297 .filter(|&idx| {
298 self._graph
299 .edges_directed(idx, petgraph::Direction::Incoming)
300 .count()
301 == 0
302 })
303 .collect();
304
305 if source_nodes.is_empty() {
306 return Ok(Vec::new());
307 }
308
309 let mut all_paths = Vec::new();
310 let mut node_read_ids_cache: HashMap<NodeIndex, HashSet<BString>> = HashMap::new();
312
313 for node_idx in self._graph.node_indices() {
315 if let Some(node) = self._graph.node_weight(node_idx) {
316 let read_ids: HashSet<BString> =
317 node.reads.par_iter().map(|r| r.id.clone()).collect();
318 node_read_ids_cache.insert(node_idx, read_ids);
319 }
320 }
321
322 for &start_node in &source_nodes {
324 if let Some(read_set) = node_read_ids_cache.get(&start_node) {
326 if read_set.is_empty() {
327 continue;
328 }
329
330 let mut queue = VecDeque::new();
331 let mut initial_path = TSGPath::builder().graph(self).build();
333 initial_path.add_node(start_node);
334
335 queue.push_back((start_node, initial_path, read_set.clone()));
336
337 while let Some((current_node, path, active_reads)) = queue.pop_front() {
338 let outgoing_edges: Vec<_> = self
340 ._graph
341 .edges_directed(current_node, petgraph::Direction::Outgoing)
342 .collect();
343
344 if outgoing_edges.is_empty() {
346 path.validate()?;
347 all_paths.push(path);
348 continue;
349 }
350
351 for edge_ref in outgoing_edges {
352 let edge_idx = edge_ref.id();
353 let target_node = edge_ref.target();
354
355 if let Some(target_read_ids) = node_read_ids_cache.get(&target_node) {
356 let continuing_reads: HashSet<_> = active_reads
358 .par_iter()
359 .filter(|id| target_read_ids.contains(*id))
360 .cloned()
361 .collect();
362
363 if continuing_reads.is_empty() {
364 continue;
366 }
367
368 let has_in_reads =
370 if let Some(target_data) = self._graph.node_weight(target_node) {
371 target_data
372 .reads
373 .par_iter()
374 .any(|r| r.identity == ReadIdentity::IN)
375 } else {
376 false
377 };
378
379 if has_in_reads {
380 let mut can_continue = false;
382 let outgoing_from_target: Vec<_> = self
383 ._graph
384 .edges_directed(target_node, petgraph::Direction::Outgoing)
385 .map(|e| e.target())
386 .collect();
387
388 for &next_node in &outgoing_from_target {
389 if let Some(next_read_ids) = node_read_ids_cache.get(&next_node)
390 {
391 if continuing_reads
393 .par_iter()
394 .any(|id| next_read_ids.contains(id))
395 {
396 can_continue = true;
397 break;
398 }
399 }
400 }
401
402 if !can_continue && !outgoing_from_target.is_empty() {
403 continue;
405 }
406 }
407
408 let mut new_path = path.clone();
410 new_path.add_edge(edge_idx);
411 new_path.add_node(target_node);
412 queue.push_back((target_node, new_path, continuing_reads));
413 }
414 }
415 }
416 }
417 }
418
419 Ok(all_paths)
420 }
421
422 pub fn to_dot(&self, node_label: bool, edge_label: bool) -> Result<String> {
423 let mut config = vec![];
424 if node_label {
425 config.push(Config::NodeIndexLabel);
426 }
427 if edge_label {
428 config.push(Config::EdgeIndexLabel);
429 }
430
431 let dot = Dot::with_config(&self._graph, &config);
432 Ok(format!("{:?}", dot))
433 }
434
435 pub fn to_json(&self) -> Result<serde_json::Value> {
436 let mut nodes = Vec::new();
437 let mut edges = Vec::new();
438
439 for node_idx in self._graph.node_indices() {
441 if let Some(node) = self._graph.node_weight(node_idx) {
442 if let Ok(node_json) = node.to_json(None) {
443 nodes.push(node_json);
444 }
445 }
446 }
447
448 for edge_idx in self._graph.edge_indices() {
450 if let Some(edge) = self._graph.edge_weight(edge_idx) {
451 let edge_endpoints = self._graph.edge_endpoints(edge_idx);
452
453 if let Some((source, target)) = edge_endpoints {
454 let source_id = self.find_node_id_by_idx(source);
455 let target_id = self.find_node_id_by_idx(target);
456
457 let source_data = self.node_by_idx(source).unwrap();
460 let target_data = self.node_by_idx(target).unwrap();
461
462 let source_reads = source_data
464 .reads
465 .iter()
466 .map(|r| r.id.clone())
467 .collect::<HashSet<_>>();
468 let target_reads = target_data
469 .reads
470 .iter()
471 .map(|r| r.id.clone())
472 .collect::<HashSet<_>>();
473 let edge_weight = source_reads
474 .intersection(&target_reads)
475 .collect::<HashSet<_>>()
476 .len();
477
478 if let (Some(source_id), Some(target_id)) = (source_id, target_id) {
479 let edge_data = json!({
480 "data": {
481 "id": edge.id.to_str().unwrap(),
482 "source": source_id.to_str().unwrap(),
483 "target": target_id.to_str().unwrap(),
484 "weight": edge_weight,
485 "breakpoints": format!("{}", edge.sv)
486 }
487 });
488 edges.push(edge_data);
489 }
490 }
491 }
492 }
493
494 let elements = json!({
496 "directed": true,
497 "multigraph": true,
498 "elements": {
499 "nodes": nodes,
500 "edges": edges
501 }
502 });
503
504 Ok(elements)
505 }
506
507 pub fn annotate_node_with_sequence<P: AsRef<Path>>(
508 &mut self,
509 reference_genome_path: P,
510 ) -> Result<()> {
511 let mut reader = fasta::io::indexed_reader::Builder::default()
512 .build_from_path(reference_genome_path.as_ref())?;
513
514 for node_idx in self.node_indices.values() {
515 let node_data = self._graph.node_weight_mut(*node_idx).unwrap();
516
517 let region = format!(
518 "{}:{}-{}",
519 node_data.reference_id,
520 node_data.reference_start() - 1, node_data.reference_end(),
522 )
523 .parse()?;
524 let record = reader.query(®ion)?;
525 node_data.sequence = Some(record.sequence().as_ref().into());
526 }
527 Ok(())
528 }
529}
530
531#[derive(Debug, Clone, Default, Builder)]
533pub struct InterGraphLink {
534 pub id: BString,
535 pub source_graph: BString,
536 pub source_element: BString,
537 pub target_graph: BString,
538 pub target_element: BString,
539 pub link_type: BString,
540 pub attributes: HashMap<BString, Attribute>,
541}
542
543#[derive(Debug, Clone, Default, Builder)]
545pub struct TSGraph {
546 pub headers: Vec<Header>,
547 pub graphs: HashMap<BString, GraphSection>,
548 pub links: Vec<InterGraphLink>,
549 current_graph_id: Option<BString>, }
551
552impl TSGraph {
553 pub fn new() -> Self {
555 let graph = GraphSection::new_default_graph();
556 let mut graphs = HashMap::new();
557 graphs.insert(graph.id.clone(), graph);
558 Self {
559 graphs,
560 current_graph_id: Some(DEFAULT_GRAPH_ID.into()),
561 ..Default::default()
562 }
563 }
564
565 fn parse_header_line(&mut self, fields: &[&str]) -> Result<()> {
567 if fields.len() < 3 {
568 return Err(anyhow!("Invalid header line format"));
569 }
570
571 self.headers.push(Header {
572 tag: fields[1].into(),
573 value: fields[2].into(),
574 });
575
576 Ok(())
577 }
578
579 fn parse_graph_line(&mut self, fields: &[&str]) -> Result<()> {
581 if fields.len() < 2 {
582 return Err(anyhow!("Invalid graph line format"));
583 }
584
585 let graph_id: BString = fields[1].into();
586
587 if self.graphs.contains_key(&graph_id) {
589 return Err(anyhow!(
590 "Graph with ID {} already exists",
591 graph_id.to_str().unwrap_or("")
592 ));
593 }
594
595 let mut graph_section = GraphSection::new(graph_id.clone());
597
598 if fields.len() > 2 {
600 for attr_str in &fields[2..] {
601 let attr = attr_str.parse::<Attribute>()?;
602 graph_section.attributes.insert(attr.tag.clone(), attr);
603 }
604 }
605
606 self.current_graph_id = Some(graph_id.clone());
608 self.graphs.insert(graph_id, graph_section);
609
610 Ok(())
611 }
612
613 fn current_graph_mut(&mut self) -> Result<&mut GraphSection> {
615 if let Some(graph_id) = &self.current_graph_id {
616 if let Some(graph) = self.graphs.get_mut(graph_id) {
617 return Ok(graph);
618 }
619 }
620 Err(anyhow!("No active graph section"))
621 }
622
623 fn parse_link_line(&mut self, fields: &[&str]) -> Result<()> {
625 if fields.len() < 5 {
626 return Err(anyhow!("Invalid link line format"));
627 }
628
629 let id: BString = fields[1].into();
630
631 let source_ref: Vec<&str> = fields[2].splitn(2, ':').collect();
633 let target_ref: Vec<&str> = fields[3].splitn(2, ':').collect();
634
635 if source_ref.len() != 2 || target_ref.len() != 2 {
636 return Err(anyhow!("Invalid element reference format in link line"));
637 }
638
639 let source_graph: BString = source_ref[0].into();
640 let source_element: BString = source_ref[1].into();
641 let target_graph: BString = target_ref[0].into();
642 let target_element: BString = target_ref[1].into();
643
644 if !self.graphs.contains_key(&source_graph) {
646 return Err(anyhow!(
647 "Source graph {} not found",
648 source_graph.to_str().unwrap_or("")
649 ));
650 }
651 if !self.graphs.contains_key(&target_graph) {
652 return Err(anyhow!(
653 "Target graph {} not found",
654 target_graph.to_str().unwrap_or("")
655 ));
656 }
657
658 let link_type: BString = fields[4].into();
659
660 let mut link = InterGraphLink {
661 id,
662 source_graph,
663 source_element,
664 target_graph,
665 target_element,
666 link_type,
667 attributes: HashMap::new(),
668 };
669
670 if fields.len() > 5 {
672 for attr_str in &fields[5..] {
673 let attr = attr_str.parse::<Attribute>()?;
674 link.attributes.insert(attr.tag.clone(), attr);
675 }
676 }
677
678 self.links.push(link);
679 Ok(())
680 }
681
682 fn parse_node_line(&mut self, fields: &str) -> Result<()> {
684 let node_data = NodeData::from_str(fields)?;
685 let graph = self.current_graph_mut()?;
686 graph.add_node(node_data)?;
687 Ok(())
688 }
689
690 fn parse_edge_line(&mut self, fields: &[&str]) -> Result<()> {
692 if fields.len() < 5 {
693 return Err(anyhow!("Invalid edge line format"));
694 }
695
696 let id: BString = fields[1].into();
697 let source_id: BString = fields[2].into();
698 let sink_id: BString = fields[3].into();
699 let sv = fields[4].parse::<StructuralVariant>()?;
700
701 let edge_data = EdgeData {
702 id,
703 sv,
704 attributes: HashMap::new(),
705 };
706
707 let graph = self.current_graph_mut()?;
708 graph.add_edge(source_id.as_bstr(), sink_id.as_bstr(), edge_data)?;
709 Ok(())
710 }
711
712 fn parse_unordered_group_line(&mut self, fields: &[&str]) -> Result<()> {
714 if fields.len() < 3 {
715 return Err(anyhow!("Invalid unordered group line format"));
716 }
717
718 let id: BString = fields[1].into();
719 let graph = self.current_graph_mut()?;
720
721 if graph.groups.contains_key(&id) {
723 return Err(anyhow!("Group with ID {} already exists", id));
724 }
725
726 let elements_str = fields[2..].join(" ");
728 let elements = elements_str
729 .split_whitespace()
730 .map(|s| s.into())
731 .collect::<Vec<_>>();
732
733 let group = Group::Unordered {
734 id: id.clone(),
735 elements,
736 attributes: HashMap::new(),
737 };
738
739 graph.groups.insert(id, group);
740 Ok(())
741 }
742
743 fn parse_path_line(&mut self, fields: &[&str]) -> Result<()> {
745 if fields.len() < 3 {
746 return Err(anyhow!("Invalid path line format"));
747 }
748
749 let id: BString = fields[1].into();
750 let graph = self.current_graph_mut()?;
751
752 if graph.groups.contains_key(&id) {
754 return Err(anyhow!("Group with ID {} already exists", id));
755 }
756
757 let elements_str = fields[2..].join(" ");
759 let elements = elements_str
760 .split_whitespace()
761 .map(|s| s.parse::<OrientedElement>())
762 .collect::<Result<Vec<_>, _>>()?;
763
764 let group = Group::Ordered {
765 id: id.clone(),
766 elements,
767 attributes: HashMap::new(),
768 };
769
770 graph.groups.insert(id, group);
771 Ok(())
772 }
773
774 fn parse_chain_line(&mut self, fields: &[&str]) -> Result<()> {
776 if fields.len() < 3 {
777 return Err(anyhow!("Invalid chain line format"));
778 }
779
780 let id: BString = fields[1].into();
781 let graph = self.current_graph_mut()?;
782
783 if graph.groups.contains_key(&id) {
785 return Err(anyhow!("Group with ID {} already exists", id));
786 }
787
788 let elements_str = fields[2..].join(" ");
790 let elements: Vec<BString> = elements_str.split_whitespace().map(|s| s.into()).collect();
791
792 if elements.is_empty() {
794 return Err(anyhow!("Chain must contain at least one element"));
795 }
796
797 if elements.len() % 2 == 0 {
798 return Err(anyhow!(
799 "Chain must have an odd number of elements (starting and ending with nodes)"
800 ));
801 }
802
803 let group = Group::Chain {
805 id: id.clone(),
806 elements,
807 attributes: HashMap::new(),
808 };
809
810 graph.chains.insert(id.clone(), group.clone());
812 graph.groups.insert(id, group);
813 Ok(())
814 }
815
816 fn parse_attribute_line(&mut self, fields: &[&str]) -> Result<()> {
818 if fields.len() < 4 {
819 return Err(anyhow!("Invalid attribute line format"));
820 }
821
822 let element_type = fields[1];
823 let element_id: BString = fields[2].into();
824 let graph = self.current_graph_mut()?;
825
826 let attrs: Vec<Attribute> = fields
827 .iter()
828 .skip(3)
829 .map(|s| s.parse())
830 .collect::<Result<Vec<_>>>()
831 .context("invalidate attribute line")?;
832
833 match element_type {
834 "N" => {
835 if let Some(&node_idx) = graph.node_indices.get(&element_id) {
836 if let Some(node_data) = graph._graph.node_weight_mut(node_idx) {
837 for attr in attrs {
838 let tag = attr.tag.clone();
839 node_data.attributes.insert(tag, attr);
840 }
841 } else {
842 return Err(anyhow!("Node with ID {} not found in graph", element_id));
843 }
844 } else {
845 return Err(anyhow!("Node with ID {} not found", element_id));
846 }
847 }
848 "E" => {
849 if let Some(&edge_idx) = graph.edge_indices.get(&element_id) {
850 if let Some(edge_data) = graph._graph.edge_weight_mut(edge_idx) {
851 for attr in attrs {
852 let tag = attr.tag.clone();
853 edge_data.attributes.insert(tag, attr);
854 }
855 } else {
856 return Err(anyhow!("Edge with ID {} not found in graph", element_id));
857 }
858 } else {
859 return Err(anyhow!("Edge with ID {} not found", element_id));
860 }
861 }
862 "U" | "P" | "C" => {
863 if let Some(group) = graph.groups.get_mut(&element_id) {
864 match group {
865 Group::Unordered { attributes, .. }
866 | Group::Ordered { attributes, .. }
867 | Group::Chain { attributes, .. } => {
868 for attr in attrs {
869 let tag = attr.tag.clone();
870 attributes.insert(tag, attr);
871 }
872 }
873 }
874 } else {
875 return Err(anyhow!("Group with ID {} not found", element_id));
876 }
877 }
878 "G" => {
879 if let Some(graph_section) = self.graphs.get_mut(&element_id) {
881 for attr in attrs {
882 let tag = attr.tag.clone();
883 graph_section.attributes.insert(tag, attr);
884 }
885 } else {
886 return Err(anyhow!("Graph with ID {} not found", element_id));
887 }
888 }
889 _ => {
890 return Err(anyhow!("Unknown element type: {}", element_type));
891 }
892 }
893
894 Ok(())
895 }
896
897 fn validate(&self) -> Result<()> {
899 for (graph_id, graph) in &self.graphs {
901 for (id, group) in &graph.groups {
903 if let Group::Ordered { elements, .. } = group {
904 for element in elements {
906 let element_exists = graph.node_indices.contains_key(&element.id)
907 || graph.edge_indices.contains_key(&element.id)
908 || graph.groups.contains_key(&element.id);
909
910 if !element_exists {
911 return Err(anyhow!(
912 "Path {} in graph {} references non-existent element {}",
913 id,
914 graph_id,
915 element.id
916 ));
917 }
918 }
919 }
920 }
921 }
922
923 for link in &self.links {
925 let source_graph = self.graphs.get(&link.source_graph).ok_or_else(|| {
927 anyhow!(
928 "Link {} references non-existent graph {}",
929 link.id,
930 link.source_graph
931 )
932 })?;
933
934 let source_exists = source_graph.node_indices.contains_key(&link.source_element)
935 || source_graph.edge_indices.contains_key(&link.source_element)
936 || source_graph.groups.contains_key(&link.source_element);
937
938 if !source_exists {
939 return Err(anyhow!(
940 "Link {} references non-existent element {}:{}",
941 link.id,
942 link.source_graph,
943 link.source_element
944 ));
945 }
946
947 let target_graph = self.graphs.get(&link.target_graph).ok_or_else(|| {
949 anyhow!(
950 "Link {} references non-existent graph {}",
951 link.id,
952 link.target_graph
953 )
954 })?;
955
956 let target_exists = target_graph.node_indices.contains_key(&link.target_element)
957 || target_graph.edge_indices.contains_key(&link.target_element)
958 || target_graph.groups.contains_key(&link.target_element);
959
960 if !target_exists {
961 return Err(anyhow!(
962 "Link {} references non-existent element {}:{}",
963 link.id,
964 link.target_graph,
965 link.target_element
966 ));
967 }
968 }
969
970 Ok(())
971 }
972
973 pub fn from_reader<R: BufRead>(reader: R) -> Result<Self> {
974 let mut tsgraph = TSGraph::new();
975
976 let default_graph_id: BString = DEFAULT_GRAPH_ID.into();
978 let default_graph = GraphSection::new(default_graph_id.clone());
979 tsgraph
980 .graphs
981 .insert(default_graph_id.clone(), default_graph);
982
983 tsgraph.current_graph_id = Some(default_graph_id);
984
985 for line in reader.lines() {
987 let line = line?;
988 if line.is_empty() || line.starts_with('#') {
989 continue;
990 }
991 let fields: Vec<&str> = line.split_whitespace().collect();
992 if fields.is_empty() {
993 continue;
994 }
995
996 match fields[0] {
997 "H" => tsgraph.parse_header_line(&fields)?,
998 "G" => tsgraph.parse_graph_line(&fields)?,
999 "N" => tsgraph.parse_node_line(&line)?,
1000 "E" => tsgraph.parse_edge_line(&fields)?,
1001 "U" => tsgraph.parse_unordered_group_line(&fields)?,
1002 "P" => tsgraph.parse_path_line(&fields)?,
1003 "C" => tsgraph.parse_chain_line(&fields)?,
1004 "A" => tsgraph.parse_attribute_line(&fields)?,
1005 "L" => tsgraph.parse_link_line(&fields)?,
1006 _ => {
1007 debug!("Ignoring unknown record type: {}", fields[0]);
1009 }
1010 }
1011 }
1012
1013 for graph_section in tsgraph.graphs.values_mut() {
1015 for (id, group) in &graph_section.groups {
1017 if let Group::Chain { .. } = group {
1018 if !graph_section.chains.contains_key(id) {
1019 graph_section.chains.insert(id.clone(), group.clone());
1020 }
1021 }
1022 }
1023
1024 graph_section.ensure_graph_is_built()?;
1026 }
1027
1028 tsgraph.validate()?;
1030
1031 if let Some(default_graph) = tsgraph.graph(DEFAULT_GRAPH_ID) {
1033 if default_graph.node_indices.is_empty() {
1034 tsgraph.graphs.remove(&BString::from(DEFAULT_GRAPH_ID));
1035 }
1036 }
1037 Ok(tsgraph)
1038 }
1039
1040 pub fn from_file<P: AsRef<Path>>(path: P) -> Result<Self> {
1042 let file = File::open(path)?;
1043 let reader = BufReader::new(file);
1044 Self::from_reader(reader)
1045 }
1046
1047 pub fn to_writer<W: std::io::Write>(&self, writer: &mut W) -> Result<()> {
1049 writeln!(writer, "# Global header")?;
1051 for header in &self.headers {
1052 writeln!(writer, "{}", header)?;
1053 }
1054
1055 let new_header = Header::builder().tag("PG").value("tsg").build();
1056 writeln!(writer, "{}", new_header)?;
1057
1058 for (graph_id, graph) in &self.graphs {
1060 if graph_id == &BString::from(DEFAULT_GRAPH_ID) && graph.node_indices.is_empty() {
1062 continue;
1063 }
1064
1065 writeln!(writer, "\n# Graph: {}", graph_id)?;
1066
1067 write!(writer, "G\t{}", graph_id)?;
1069 for attr in graph.attributes.values() {
1070 write!(writer, "\t{}", attr)?;
1071 }
1072 writeln!(writer)?;
1073
1074 writeln!(writer, "# Nodes")?;
1076 for node_idx in graph._graph.node_indices() {
1077 if let Some(node_data) = graph._graph.node_weight(node_idx) {
1078 writeln!(writer, "{}", node_data)?;
1079 }
1080 }
1081
1082 writeln!(writer, "# Edges")?;
1084 for edge_ref in graph._graph.edge_references() {
1085 let edge = edge_ref.weight();
1086 let source_idx = edge_ref.source();
1087 let sink_idx = edge_ref.target();
1088
1089 if let (Some(source), Some(sink)) = (
1090 graph._graph.node_weight(source_idx),
1091 graph._graph.node_weight(sink_idx),
1092 ) {
1093 writeln!(
1094 writer,
1095 "E\t{}\t{}\t{}\t{}",
1096 edge.id, source.id, sink.id, edge.sv
1097 )?;
1098 }
1099 }
1100
1101 writeln!(writer, "# Groups")?;
1103 let mut seen_chain_ids = HashSet::new();
1104
1105 for group in graph.groups.values() {
1106 match group {
1107 Group::Unordered { id, elements, .. } => {
1108 let elements_str: Vec<String> = elements
1109 .par_iter()
1110 .map(|e| e.to_str().unwrap_or("").to_string())
1111 .collect();
1112 writeln!(
1113 writer,
1114 "U\t{}\t{}",
1115 id.to_str().unwrap_or(""),
1116 elements_str.join(" ")
1117 )?;
1118 }
1119 Group::Ordered { id, elements, .. } => {
1120 let elements_str: Vec<String> =
1121 elements.par_iter().map(|e| e.to_string()).collect();
1122 writeln!(
1123 writer,
1124 "P\t{}\t{}",
1125 id.to_str().unwrap_or(""),
1126 elements_str.join(" ")
1127 )?;
1128 }
1129 Group::Chain { id, elements, .. } => {
1130 if seen_chain_ids.contains(id) {
1132 continue;
1133 }
1134 seen_chain_ids.insert(id);
1135
1136 let elements_str: Vec<String> = elements
1137 .par_iter()
1138 .map(|e| e.to_str().unwrap_or("").to_string())
1139 .collect();
1140 writeln!(
1141 writer,
1142 "C\t{}\t{}",
1143 id.to_str().unwrap_or(""),
1144 elements_str.join(" ")
1145 )?;
1146 }
1147 }
1148 }
1149
1150 writeln!(writer, "# Attributes")?;
1152
1153 for node_idx in graph._graph.node_indices() {
1155 if let Some(node) = graph._graph.node_weight(node_idx) {
1156 for attr in node.attributes.values() {
1157 writeln!(writer, "A\tN\t{}\t{}", node.id, attr)?;
1158 }
1159 }
1160 }
1161
1162 for edge_idx in graph._graph.edge_indices() {
1164 if let Some(edge) = graph._graph.edge_weight(edge_idx) {
1165 for attr in edge.attributes.values() {
1166 writeln!(writer, "A\tE\t{}\t{}", edge.id, attr)?;
1167 }
1168 }
1169 }
1170
1171 for (id, group) in &graph.groups {
1173 let (group_type, attributes) = match group {
1174 Group::Unordered { attributes, .. } => ("U", attributes),
1175 Group::Ordered { attributes, .. } => ("P", attributes),
1176 Group::Chain { attributes, .. } => ("C", attributes),
1177 };
1178
1179 for attr in attributes.values() {
1180 writeln!(
1181 writer,
1182 "A\t{}\t{}\t{}",
1183 group_type,
1184 id.to_str().unwrap_or(""),
1185 attr
1186 )?;
1187 }
1188 }
1189 }
1190
1191 if !self.links.is_empty() {
1193 writeln!(writer, "\n# Inter-graph links")?;
1194 for link in &self.links {
1195 write!(
1196 writer,
1197 "L\t{}\t{}:{}\t{}:{}\t{}",
1198 link.id,
1199 link.source_graph,
1200 link.source_element,
1201 link.target_graph,
1202 link.target_element,
1203 link.link_type
1204 )?;
1205
1206 for attr in link.attributes.values() {
1207 write!(writer, "\t{}", attr)?;
1208 }
1209 writeln!(writer)?;
1210 }
1211 }
1212
1213 writer.flush()?;
1214 Ok(())
1215 }
1216
1217 pub fn to_file<P: AsRef<Path>>(&self, path: P) -> Result<()> {
1219 let file = File::create(path)?;
1220 let mut writer = BufWriter::new(file);
1221 self.to_writer(&mut writer)
1222 }
1223
1224 pub fn graph(&self, id: &str) -> Option<&GraphSection> {
1228 self.graphs.get(&BString::from(id))
1229 }
1230
1231 pub fn graph_mut(&mut self, id: &str) -> Option<&mut GraphSection> {
1232 self.graphs.get_mut(&BString::from(id))
1233 }
1234
1235 pub fn default_graph(&self) -> Option<&GraphSection> {
1237 self.graphs.get(&BString::from(DEFAULT_GRAPH_ID))
1238 }
1239
1240 pub fn default_graph_mut(&mut self) -> Option<&mut GraphSection> {
1241 self.graphs.get_mut(&BString::from(DEFAULT_GRAPH_ID))
1242 }
1243
1244 pub fn node(&self, graph_id: &str, node_id: &str) -> Option<&NodeData> {
1246 let graph = self.graphs.get(&BString::from(graph_id))?;
1247 graph.node_by_id(node_id)
1248 }
1249
1250 pub fn edge(&self, graph_id: &str, edge_id: &str) -> Option<&EdgeData> {
1252 let graph = self.graphs.get(&BString::from(graph_id))?;
1253 graph.edge_by_id(edge_id)
1254 }
1255
1256 pub fn chain_nodes(&self, graph_id: &str, chain_id: &BStr) -> Option<Vec<NodeIndex>> {
1258 let graph = self.graphs.get(&BString::from(graph_id))?;
1259 let group = graph.chains.get(chain_id)?;
1260
1261 match group {
1262 Group::Chain { elements, .. } => {
1263 let mut nodes = Vec::with_capacity(elements.len().div_ceil(2));
1264
1265 for (i, element_id) in elements.iter().enumerate() {
1266 if i % 2 == 0 {
1267 if let Some(&node_idx) = graph.node_indices.get(element_id) {
1269 nodes.push(node_idx);
1270 } else {
1271 return None; }
1273 }
1274 }
1275
1276 Some(nodes)
1277 }
1278 _ => None, }
1280 }
1281
1282 pub fn chain_edges(&self, graph_id: &str, chain_id: &BStr) -> Option<Vec<EdgeIndex>> {
1284 let graph = self.graphs.get(&BString::from(graph_id))?;
1285 let group = graph.chains.get(chain_id)?;
1286
1287 match group {
1288 Group::Chain { elements, .. } => {
1289 let mut edges = Vec::with_capacity(elements.len() / 2);
1290
1291 for (i, element_id) in elements.iter().enumerate() {
1292 if i % 2 == 1 {
1293 if let Some(&edge_idx) = graph.edge_indices.get(element_id) {
1295 edges.push(edge_idx);
1296 } else {
1297 return None; }
1299 }
1300 }
1301
1302 Some(edges)
1303 }
1304 _ => None, }
1306 }
1307
1308 pub fn find_node_id_by_idx(&self, graph_id: &str, node_idx: NodeIndex) -> Option<&BString> {
1310 let graph = self.graphs.get(&BString::from(graph_id))?;
1311 graph
1312 .node_indices
1313 .par_iter()
1314 .find_map_first(|(id, &idx)| if idx == node_idx { Some(id) } else { None })
1315 }
1316
1317 pub fn node_by_idx(&self, graph_id: &str, node_idx: NodeIndex) -> Option<&NodeData> {
1318 let graph = self.graphs.get(&BString::from(graph_id))?;
1319 graph.node_by_idx(node_idx)
1320 }
1321
1322 pub fn edge_by_idx(&self, graph_id: &str, edge_idx: EdgeIndex) -> Option<&EdgeData> {
1323 let graph = self.graphs.get(&BString::from(graph_id))?;
1324 graph.edge_by_idx(edge_idx)
1325 }
1326
1327 pub fn nodes(&self, graph_id: &str) -> Vec<&NodeData> {
1329 let graph = self.graphs.get(&BString::from(graph_id)).unwrap();
1330 graph
1331 .node_indices
1332 .values()
1333 .filter_map(|&idx| graph._graph.node_weight(idx))
1334 .collect()
1335 }
1336
1337 pub fn edges(&self, graph_id: &str) -> Vec<&EdgeData> {
1339 let graph = self.graphs.get(&BString::from(graph_id)).unwrap();
1340 graph
1341 .edge_indices
1342 .values()
1343 .filter_map(|&idx| graph._graph.edge_weight(idx))
1344 .collect()
1345 }
1346
1347 pub fn traverse_by_id(&self, graph_id: &str) -> Result<Vec<TSGPath>> {
1349 let graph = self.graphs.get(&BString::from(graph_id)).unwrap();
1350 graph.traverse()
1351 }
1352
1353 pub fn traverse_all_graphs(&self) -> Result<Vec<TSGPath>> {
1355 let all_paths = self
1356 .graphs
1357 .values()
1358 .try_fold(Vec::new(), |mut all_paths, graph| {
1359 let paths = graph.traverse()?;
1360 all_paths.extend(paths);
1361 Ok(all_paths)
1362 });
1363 all_paths
1364 }
1365
1366 pub fn to_dot_by_id(
1367 &self,
1368 graph_id: &str,
1369 node_label: bool,
1370 edge_label: bool,
1371 ) -> Result<String> {
1372 let graph = self.graphs.get(&BString::from(graph_id)).unwrap();
1373 graph.to_dot(node_label, edge_label)
1374 }
1375
1376 pub fn to_json_by_id(&self, graph_id: &str) -> Result<serde_json::Value> {
1377 let graph = self.graphs.get(&BString::from(graph_id)).unwrap();
1378 graph.to_json()
1379 }
1380}
1381
1382impl FromStr for TSGraph {
1383 type Err = anyhow::Error;
1384 fn from_str(s: &str) -> Result<Self> {
1385 let reader = BufReader::new(s.as_bytes());
1386 Self::from_reader(reader)
1387 }
1388}
1389
1390#[cfg(test)]
1391mod tests {
1392 use super::*;
1393
1394 #[test]
1395 fn test_create_empty_graph() {
1396 let graph = TSGraph::new();
1397 assert_eq!(graph.headers.len(), 0);
1398 assert_eq!(graph.default_graph().unwrap().nodes().len(), 0);
1399 assert_eq!(graph.edges(DEFAULT_GRAPH_ID).len(), 0);
1400 assert_eq!(graph.graphs.len(), 1);
1401 assert_eq!(graph.links.len(), 0);
1402 }
1403
1404 #[test]
1405 fn test_add_node() -> Result<()> {
1406 let mut graph = TSGraph::new();
1407 let node = NodeData {
1408 id: "node1".into(),
1409 reference_id: "chr1".into(),
1410 ..Default::default()
1411 };
1412
1413 graph.default_graph_mut().unwrap().add_node(node.clone())?;
1414 assert_eq!(graph.nodes(DEFAULT_GRAPH_ID).len(), 1);
1415 assert_eq!(graph.node(DEFAULT_GRAPH_ID, "node1").unwrap().id, node.id);
1416 Ok(())
1417 }
1418
1419 #[test]
1420 fn test_add_edge() -> Result<()> {
1421 let mut graph = TSGraph::new();
1422
1423 let node1 = NodeData {
1425 id: "node1".into(),
1426 reference_id: "chr1".into(),
1427 ..Default::default()
1428 };
1429
1430 let node2 = NodeData {
1431 id: "node2".into(),
1432 ..Default::default()
1433 };
1434
1435 graph.graph_mut(DEFAULT_GRAPH_ID).unwrap().add_node(node1)?;
1436 graph.graph_mut(DEFAULT_GRAPH_ID).unwrap().add_node(node2)?;
1437
1438 let edge = EdgeData {
1440 id: "edge1".into(),
1441 ..Default::default()
1442 };
1443
1444 graph.graph_mut(DEFAULT_GRAPH_ID).unwrap().add_edge(
1445 "node1".into(),
1446 "node2".into(),
1447 edge.clone(),
1448 )?;
1449
1450 assert_eq!(graph.edges(DEFAULT_GRAPH_ID).len(), 1);
1451 assert_eq!(graph.edge(DEFAULT_GRAPH_ID, "edge1").unwrap().id, edge.id);
1452
1453 Ok(())
1454 }
1455
1456 #[test]
1457 fn test_parse_header_line() -> Result<()> {
1458 let mut graph = TSGraph::new();
1459 let fields = vec!["H", "VN", "1.0"];
1460
1461 graph.parse_header_line(&fields)?;
1462
1463 assert_eq!(graph.headers.len(), 1);
1464 assert_eq!(graph.headers[0].tag, "VN");
1465 assert_eq!(graph.headers[0].value, "1.0");
1466
1467 Ok(())
1468 }
1469
1470 #[test]
1471 fn test_parse_node_line() -> Result<()> {
1472 let mut graph = TSGraph::new();
1473 let line = "N\tnode1\tchr1:+:100-200\tread1:SO,read2:IN\tACGT";
1474
1475 graph.parse_node_line(line)?;
1476
1477 let node = graph.default_graph().unwrap().node_by_id("node1").unwrap();
1479
1480 assert_eq!(node.id, "node1");
1481 assert_eq!(node.exons.exons.len(), 1);
1482 assert_eq!(node.exons.exons[0].start, 100);
1483 assert_eq!(node.exons.exons[0].end, 200);
1484 assert_eq!(
1485 node.reads,
1486 vec![
1487 ReadData::builder().id("read1").identity("SO").build(),
1488 ReadData::builder().id("read2").identity("IN").build(),
1489 ]
1490 );
1491 assert_eq!(node.sequence, Some("ACGT".into()));
1492
1493 Ok(())
1494 }
1495
1496 #[test]
1497 fn test_parse_unordered_group() -> Result<()> {
1498 let mut graph = TSGraph::new();
1499 let fields = vec!["U", "group1", "node1", "node2", "edge1"];
1500
1501 graph.parse_unordered_group_line(&fields)?;
1502
1503 let graph_section = graph.default_graph().unwrap();
1504 assert_eq!(graph_section.groups.len(), 1);
1505 if let Group::Unordered { id, elements, .. } = &graph_section.groups["group1".as_bytes()] {
1506 assert_eq!(id, "group1");
1507 assert_eq!(elements.len(), 3);
1508 assert_eq!(elements[0], "node1");
1509 assert_eq!(elements[1], "node2");
1510 assert_eq!(elements[2], "edge1");
1511 } else {
1512 panic!("Expected Unordered group");
1513 }
1514
1515 Ok(())
1516 }
1517
1518 #[test]
1519 fn test_read_from_file() -> Result<()> {
1520 let file = "tests/data/test.tsg";
1521 let graph = TSGraph::from_file(file)?;
1522
1523 assert_eq!(graph.headers.len(), 2);
1524 assert_eq!(graph.nodes(DEFAULT_GRAPH_ID).len(), 5);
1525 assert_eq!(graph.edges(DEFAULT_GRAPH_ID).len(), 4);
1526
1527 graph.to_file("tests/data/test_write.tsg")?;
1528
1529 Ok(())
1530 }
1531
1532 #[test]
1533 fn test_from_str() -> Result<()> {
1534 let tsg_string = r#"H VN 1.0
1535 H PN TestGraph
1536 N node1 chr1:+:100-200 read1:SO,read2:IN ACGT
1537 N node2 chr1:+:300-400 read1:SO,read2:IN
1538 N node3 chr1:+:500-600 read2:SO,read3:IN
1539 E edge1 node1 node2 chr1,chr1,1700,2000,INV
1540 E edge2 node2 node3 chr1,chr1,1700,2000,DUP
1541 C chain1 node1 edge1 node2 edge2 node3
1542 "#;
1543
1544 let graph = TSGraph::from_str(tsg_string)?;
1545
1546 assert_eq!(graph.headers.len(), 2);
1548 assert_eq!(graph.headers[0].tag, "VN");
1549 assert_eq!(graph.headers[0].value, "1.0");
1550 assert_eq!(graph.headers[1].tag, "PN");
1551 assert_eq!(graph.headers[1].value, "TestGraph");
1552
1553 assert_eq!(graph.nodes(DEFAULT_GRAPH_ID).len(), 3);
1555 let node1 = graph.node(DEFAULT_GRAPH_ID, "node1").unwrap();
1556 assert_eq!(node1.id, "node1");
1557 assert_eq!(node1.sequence, Some("ACGT".into()));
1558
1559 assert_eq!(graph.edges(DEFAULT_GRAPH_ID).len(), 2);
1561 let edge1 = graph.edge(DEFAULT_GRAPH_ID, "edge1").unwrap();
1562 assert_eq!(edge1.id, "edge1");
1563
1564 let graph_section = graph.graph(DEFAULT_GRAPH_ID).unwrap();
1566
1567 if let Group::Chain { elements, .. } = &graph_section.chains["chain1".as_bytes()] {
1569 assert_eq!(elements.len(), 5);
1570 assert_eq!(elements[0], "node1");
1571 assert_eq!(elements[1], "edge1");
1572 } else {
1573 panic!("Expected Chain group");
1574 }
1575
1576 Ok(())
1577 }
1578
1579 #[test]
1580 fn test_traverse() -> Result<()> {
1581 let file = "tests/data/test.tsg";
1582 let graph = TSGraph::from_file(file)?;
1583
1584 let paths = graph.traverse_by_id(DEFAULT_GRAPH_ID)?;
1585 for path in paths {
1588 println!("{}", path);
1589 }
1590 Ok(())
1591 }
1592
1593 #[test]
1594 fn test_to_dot() -> Result<()> {
1595 let file = "tests/data/test.tsg";
1596 let graph = TSGraph::from_file(file)?;
1597
1598 let dot = graph.to_dot_by_id(DEFAULT_GRAPH_ID, true, true)?;
1599 println!("{}", dot);
1600
1601 Ok(())
1602 }
1603
1604 #[test]
1605 fn test_to_json() -> Result<()> {
1606 let file = "tests/data/test.tsg";
1607 let graph = TSGraph::from_file(file)?;
1608
1609 let json = graph.to_json_by_id(DEFAULT_GRAPH_ID)?;
1610 println!("{}", json);
1611 Ok(())
1612 }
1613}