1mod analysis;
2mod attr;
3mod edge;
4mod group;
5mod header;
6mod node;
7mod path;
8mod utils;
9
10use noodles::fasta;
11use std::fs::File;
12use std::io::{BufRead, BufReader, BufWriter};
13use std::path::Path;
14use std::str::FromStr;
15use tracing::debug;
16use tracing::warn;
17
18use ahash::{HashMap, HashMapExt, HashSet, HashSetExt};
19use anyhow::{Context, Result, anyhow};
20use bstr::{BStr, BString, ByteSlice};
21
22pub use analysis::*;
23pub use attr::*;
24pub use edge::*;
25pub use group::*;
26pub use header::*;
27pub use node::*;
28pub use path::*;
29pub use utils::*;
30
31use bon::Builder;
32use petgraph::dot::{Config, Dot};
33use petgraph::graph::{DiGraph, EdgeIndex, NodeIndex};
34use petgraph::visit::EdgeRef;
35use rayon::prelude::*;
36use serde_json::json;
37use std::collections::VecDeque;
38
39pub const DEFAULT_GRAPH_ID: &str = "G.graph";
40#[derive(Debug, Clone, Default, Builder)]
42pub struct GraphSection {
43 pub id: BString,
44 pub attributes: HashMap<BString, Attribute>,
45 _graph: DiGraph<NodeData, EdgeData>,
46 pub node_indices: HashMap<BString, NodeIndex>,
47 pub edge_indices: HashMap<BString, EdgeIndex>,
48 pub groups: HashMap<BString, Group>,
49 pub chains: HashMap<BString, Group>,
50}
51
52impl GraphSection {
53 pub fn new(id: BString) -> Self {
55 Self {
56 id,
57 ..Default::default()
58 }
59 }
60
61 pub fn new_default_graph() -> Self {
62 Self::new(DEFAULT_GRAPH_ID.into())
63 }
64
65 pub fn node_indices_to_ids(&self) -> HashMap<NodeIndex, BString> {
66 self.node_indices
67 .par_iter()
68 .map(|(id, &idx)| (idx, id.clone()))
69 .collect()
70 }
71
72 pub fn edge_indices_to_ids(&self) -> HashMap<EdgeIndex, BString> {
73 self.edge_indices
74 .par_iter()
75 .map(|(id, &idx)| (idx, id.clone()))
76 .collect()
77 }
78
79 pub fn node_weight(&self, node_idx: NodeIndex) -> Option<&NodeData> {
80 self._graph.node_weight(node_idx)
81 }
82
83 pub fn edge_weight(&self, edge_idx: EdgeIndex) -> Option<&EdgeData> {
84 self._graph.edge_weight(edge_idx)
85 }
86
87 pub fn in_degree(&self, node_idx: NodeIndex) -> usize {
88 self._graph
89 .edges_directed(node_idx, petgraph::Direction::Incoming)
90 .count()
91 }
92
93 pub fn out_degree(&self, node_idx: NodeIndex) -> usize {
94 self._graph
95 .edges_directed(node_idx, petgraph::Direction::Outgoing)
96 .count()
97 }
98
99 pub fn add_node(&mut self, node_data: NodeData) -> Result<NodeIndex> {
101 let id = node_data.id.clone();
102
103 if let Some(&idx) = self.node_indices.get(&id) {
105 if let Some(attr) = self._graph.node_weight_mut(idx) {
107 *attr = node_data;
108 return Ok(idx);
109 }
110 return Err(anyhow!("Node with ID {} not found in graph", id));
111 }
112
113 let node_idx = self._graph.add_node(node_data);
115
116 debug!(
117 "graph {} add node {} ;len of node_indices: {}",
118 self.id,
119 id,
120 self.node_indices.len() + 1
121 );
122
123 self.node_indices.insert(id, node_idx);
124 Ok(node_idx)
125 }
126
127 pub fn add_edge(
129 &mut self,
130 source_id: &BStr,
131 sink_id: &BStr,
132 edge_data: EdgeData,
133 ) -> Result<EdgeIndex> {
134 let id = edge_data.id.clone();
135
136 let source_idx = match self.node_indices.get(source_id) {
138 Some(&idx) => idx,
139 None => {
140 let placeholder_node = NodeData::builder().id(source_id.to_owned()).build();
142 self.add_node(placeholder_node)?
143 }
144 };
145
146 let sink_idx = match self.node_indices.get(sink_id) {
148 Some(&idx) => idx,
149 None => {
150 let placeholder_node = NodeData::builder().id(sink_id.to_owned()).build();
152 self.add_node(placeholder_node)?
153 }
154 };
155
156 let edge_idx = self._graph.update_edge(source_idx, sink_idx, edge_data);
158
159 self.edge_indices.insert(id, edge_idx);
160 Ok(edge_idx)
161 }
162
163 fn ensure_graph_is_built(&mut self) -> Result<()> {
167 if !self.node_indices.is_empty() && !self.edge_indices.is_empty() {
169 return Ok(());
170 }
171
172 if !self.chains.is_empty() {
174 for group in self.chains.clone().values() {
175 if let Group::Chain { elements, .. } = group {
176 for (i, element_id) in elements.iter().enumerate() {
178 if i % 2 == 0 {
179 if !self.node_indices.contains_key(element_id) {
181 let node_data = NodeData {
183 id: element_id.clone(),
184 ..Default::default()
185 };
186 self.add_node(node_data)?;
187 }
188 } else if i + 1 < elements.len() {
189 if !self.edge_indices.contains_key(element_id) {
192 let source_id = &elements[i - 1];
194 let sink_id = &elements[i + 1];
195
196 let edge_data = EdgeData {
198 id: element_id.clone(),
199 ..Default::default()
200 };
201
202 self.add_edge(source_id.as_bstr(), sink_id.as_bstr(), edge_data)?;
203 }
204 }
205 }
206 }
207 }
208 return Ok(());
209 }
210
211 if self.id == DEFAULT_GRAPH_ID {
212 return Ok(());
214 }
215
216 if self.node_indices.is_empty() || self.edge_indices.is_empty() {
217 warn!(
218 "Graph {} has no nodes/edges defined and no chains available",
219 self.id
220 );
221 }
222
223 Ok(())
224 }
225
226 pub fn node_by_idx(&self, node_idx: NodeIndex) -> Option<&NodeData> {
228 self._graph.node_weight(node_idx)
229 }
230
231 pub fn node_by_id(&self, id: &str) -> Option<&NodeData> {
232 let node_idx = self.node_indices.get(&BString::from(id))?;
233 self._graph.node_weight(*node_idx)
234 }
235
236 pub fn edge_by_id(&self, id: &str) -> Option<&EdgeData> {
237 let edge_idx = self.edge_indices.get(&BString::from(id))?;
238 self._graph.edge_weight(*edge_idx)
239 }
240
241 pub fn edge_by_idx(&self, edge_idx: EdgeIndex) -> Option<&EdgeData> {
242 self._graph.edge_weight(edge_idx)
243 }
244
245 pub fn nodes(&self) -> Vec<&NodeData> {
246 self.node_indices
247 .values()
248 .map(|&idx| self._graph.node_weight(idx).unwrap())
249 .collect()
250 }
251
252 pub fn edges(&self) -> Vec<&EdgeData> {
253 self.edge_indices
254 .values()
255 .map(|&idx| self._graph.edge_weight(idx).unwrap())
256 .collect()
257 }
258
259 pub fn find_node_id_by_idx(&self, node_idx: NodeIndex) -> Option<&BString> {
261 self.node_indices
262 .par_iter()
263 .find_map_first(|(id, &idx)| if idx == node_idx { Some(id) } else { None })
264 }
265
266 pub fn traverse(&self) -> Result<Vec<TSGPath>> {
287 let source_nodes: Vec<NodeIndex> = self
289 ._graph
290 .node_indices()
291 .filter(|&idx| {
292 self._graph
293 .edges_directed(idx, petgraph::Direction::Incoming)
294 .count()
295 == 0
296 })
297 .collect();
298
299 if source_nodes.is_empty() {
300 return Ok(Vec::new());
301 }
302
303 let mut all_paths = Vec::new();
304 let mut node_read_ids_cache: HashMap<NodeIndex, HashSet<BString>> = HashMap::new();
306
307 for node_idx in self._graph.node_indices() {
309 if let Some(node) = self._graph.node_weight(node_idx) {
310 let read_ids: HashSet<BString> =
311 node.reads.par_iter().map(|r| r.id.clone()).collect();
312 node_read_ids_cache.insert(node_idx, read_ids);
313 }
314 }
315
316 for &start_node in &source_nodes {
318 if let Some(read_set) = node_read_ids_cache.get(&start_node) {
320 if read_set.is_empty() {
321 continue;
322 }
323
324 let mut queue = VecDeque::new();
325 let mut initial_path = TSGPath::builder().graph(self).build();
327 initial_path.add_node(start_node);
328
329 queue.push_back((start_node, initial_path, read_set.clone()));
330
331 while let Some((current_node, path, active_reads)) = queue.pop_front() {
332 let outgoing_edges: Vec<_> = self
334 ._graph
335 .edges_directed(current_node, petgraph::Direction::Outgoing)
336 .collect();
337
338 if outgoing_edges.is_empty() {
340 path.validate()?;
341 all_paths.push(path);
342 continue;
343 }
344
345 for edge_ref in outgoing_edges {
346 let edge_idx = edge_ref.id();
347 let target_node = edge_ref.target();
348
349 if let Some(target_read_ids) = node_read_ids_cache.get(&target_node) {
350 let continuing_reads: HashSet<_> = active_reads
352 .par_iter()
353 .filter(|id| target_read_ids.contains(*id))
354 .cloned()
355 .collect();
356
357 if continuing_reads.is_empty() {
358 continue;
360 }
361
362 let has_in_reads =
364 if let Some(target_data) = self._graph.node_weight(target_node) {
365 target_data
366 .reads
367 .par_iter()
368 .any(|r| r.identity == ReadIdentity::IN)
369 } else {
370 false
371 };
372
373 if has_in_reads {
374 let mut can_continue = false;
376 let outgoing_from_target: Vec<_> = self
377 ._graph
378 .edges_directed(target_node, petgraph::Direction::Outgoing)
379 .map(|e| e.target())
380 .collect();
381
382 for &next_node in &outgoing_from_target {
383 if let Some(next_read_ids) = node_read_ids_cache.get(&next_node)
384 {
385 if continuing_reads
387 .par_iter()
388 .any(|id| next_read_ids.contains(id))
389 {
390 can_continue = true;
391 break;
392 }
393 }
394 }
395
396 if !can_continue && !outgoing_from_target.is_empty() {
397 continue;
399 }
400 }
401
402 let mut new_path = path.clone();
404 new_path.add_edge(edge_idx);
405 new_path.add_node(target_node);
406 queue.push_back((target_node, new_path, continuing_reads));
407 }
408 }
409 }
410 }
411 }
412
413 Ok(all_paths)
414 }
415
416 pub fn to_dot(&self, node_label: bool, edge_label: bool) -> Result<String> {
417 let mut config = vec![];
418 if node_label {
419 config.push(Config::NodeIndexLabel);
420 }
421 if edge_label {
422 config.push(Config::EdgeIndexLabel);
423 }
424
425 let dot = Dot::with_config(&self._graph, &config);
426 Ok(format!("{:?}", dot))
427 }
428
429 pub fn to_json(&self) -> Result<serde_json::Value> {
430 let mut nodes = Vec::new();
431 let mut edges = Vec::new();
432
433 for node_idx in self._graph.node_indices() {
435 if let Some(node) = self._graph.node_weight(node_idx) {
436 if let Ok(node_json) = node.to_json(None) {
437 nodes.push(node_json);
438 }
439 }
440 }
441
442 for edge_idx in self._graph.edge_indices() {
444 if let Some(edge) = self._graph.edge_weight(edge_idx) {
445 let edge_endpoints = self._graph.edge_endpoints(edge_idx);
446
447 if let Some((source, target)) = edge_endpoints {
448 let source_id = self.find_node_id_by_idx(source);
449 let target_id = self.find_node_id_by_idx(target);
450
451 let source_data = self.node_by_idx(source).unwrap();
454 let target_data = self.node_by_idx(target).unwrap();
455
456 let source_reads = source_data
458 .reads
459 .iter()
460 .map(|r| r.id.clone())
461 .collect::<HashSet<_>>();
462 let target_reads = target_data
463 .reads
464 .iter()
465 .map(|r| r.id.clone())
466 .collect::<HashSet<_>>();
467 let edge_weight = source_reads
468 .intersection(&target_reads)
469 .collect::<HashSet<_>>()
470 .len();
471
472 if let (Some(source_id), Some(target_id)) = (source_id, target_id) {
473 let edge_data = json!({
474 "data": {
475 "id": edge.id.to_str().unwrap(),
476 "source": source_id.to_str().unwrap(),
477 "target": target_id.to_str().unwrap(),
478 "weight": edge_weight,
479 "breakpoints": format!("{}", edge.sv)
480 }
481 });
482 edges.push(edge_data);
483 }
484 }
485 }
486 }
487
488 let elements = json!({
490 "directed": true,
491 "multigraph": true,
492 "elements": {
493 "nodes": nodes,
494 "edges": edges
495 }
496 });
497
498 Ok(elements)
499 }
500
501 pub fn annotate_node_with_sequence<P: AsRef<Path>>(
502 &mut self,
503 reference_genome_path: P,
504 ) -> Result<()> {
505 let mut reader = fasta::io::indexed_reader::Builder::default()
506 .build_from_path(reference_genome_path.as_ref())?;
507
508 for node_idx in self.node_indices.values() {
509 let node_data = self._graph.node_weight_mut(*node_idx).unwrap();
510
511 let region = format!(
512 "{}:{}-{}",
513 node_data.reference_id,
514 node_data.reference_start() - 1, node_data.reference_end(),
516 )
517 .parse()?;
518 let record = reader.query(®ion)?;
519 node_data.sequence = Some(record.sequence().as_ref().into());
520 }
521 Ok(())
522 }
523}
524
525#[derive(Debug, Clone, Default, Builder)]
527pub struct InterGraphLink {
528 pub id: BString,
529 pub source_graph: BString,
530 pub source_element: BString,
531 pub target_graph: BString,
532 pub target_element: BString,
533 pub link_type: BString,
534 #[builder(default)]
535 pub attributes: HashMap<BString, Attribute>,
536}
537
538#[derive(Debug, Clone, Default, Builder)]
540pub struct TSGraph {
541 pub headers: Vec<Header>,
542 pub graphs: HashMap<BString, GraphSection>,
543 pub links: Vec<InterGraphLink>,
544 current_graph_id: Option<BString>, }
546
547impl TSGraph {
548 pub fn new() -> Self {
550 let graph = GraphSection::new_default_graph();
551 let mut graphs = HashMap::new();
552 graphs.insert(graph.id.clone(), graph);
553 Self {
554 graphs,
555 current_graph_id: Some(DEFAULT_GRAPH_ID.into()),
556 ..Default::default()
557 }
558 }
559
560 fn parse_header_line(&mut self, fields: &[&str]) -> Result<()> {
562 if fields.len() < 3 {
563 return Err(anyhow!("Invalid header line format"));
564 }
565
566 self.headers.push(Header {
567 tag: fields[1].into(),
568 value: fields[2].into(),
569 });
570
571 Ok(())
572 }
573
574 fn parse_graph_line(&mut self, fields: &[&str]) -> Result<()> {
576 if fields.len() < 2 {
577 return Err(anyhow!("Invalid graph line format"));
578 }
579
580 let graph_id: BString = fields[1].into();
581
582 if self.graphs.contains_key(&graph_id) {
584 return Err(anyhow!(
585 "Graph with ID {} already exists",
586 graph_id.to_str().unwrap_or("")
587 ));
588 }
589
590 let mut graph_section = GraphSection::new(graph_id.clone());
592
593 if fields.len() > 2 {
595 for attr_str in &fields[2..] {
596 let attr = attr_str.parse::<Attribute>()?;
597 graph_section.attributes.insert(attr.tag.clone(), attr);
598 }
599 }
600
601 self.current_graph_id = Some(graph_id.clone());
603 self.graphs.insert(graph_id, graph_section);
604
605 Ok(())
606 }
607
608 fn current_graph_mut(&mut self) -> Result<&mut GraphSection> {
610 if let Some(graph_id) = &self.current_graph_id {
611 if let Some(graph) = self.graphs.get_mut(graph_id) {
612 return Ok(graph);
613 }
614 }
615 Err(anyhow!("No active graph section"))
616 }
617
618 fn parse_link_line(&mut self, fields: &[&str]) -> Result<()> {
620 if fields.len() < 5 {
621 return Err(anyhow!("Invalid link line format"));
622 }
623
624 let id: BString = fields[1].into();
625
626 let source_ref: Vec<&str> = fields[2].splitn(2, ':').collect();
628 let target_ref: Vec<&str> = fields[3].splitn(2, ':').collect();
629
630 if source_ref.len() != 2 || target_ref.len() != 2 {
631 return Err(anyhow!("Invalid element reference format in link line"));
632 }
633
634 let source_graph: BString = source_ref[0].into();
635 let source_element: BString = source_ref[1].into();
636 let target_graph: BString = target_ref[0].into();
637 let target_element: BString = target_ref[1].into();
638
639 if !self.graphs.contains_key(&source_graph) {
641 return Err(anyhow!(
642 "Source graph {} not found",
643 source_graph.to_str().unwrap_or("")
644 ));
645 }
646 if !self.graphs.contains_key(&target_graph) {
647 return Err(anyhow!(
648 "Target graph {} not found",
649 target_graph.to_str().unwrap_or("")
650 ));
651 }
652
653 let link_type: BString = fields[4].into();
654
655 let mut link = InterGraphLink::builder()
656 .id(id)
657 .source_graph(source_graph)
658 .source_element(source_element)
659 .target_graph(target_graph)
660 .target_element(target_element)
661 .link_type(link_type)
662 .build();
663
664 if fields.len() > 5 {
666 for attr_str in &fields[5..] {
667 let attr = attr_str.parse::<Attribute>()?;
668 link.attributes.insert(attr.tag.clone(), attr);
669 }
670 }
671
672 self.links.push(link);
673 Ok(())
674 }
675
676 fn parse_node_line(&mut self, fields: &str) -> Result<()> {
678 let node_data = NodeData::from_str(fields)?;
679 let graph = self.current_graph_mut()?;
680 graph.add_node(node_data)?;
681 Ok(())
682 }
683
684 fn parse_edge_line(&mut self, fields: &[&str]) -> Result<()> {
686 if fields.len() < 5 {
687 return Err(anyhow!("Invalid edge line format"));
688 }
689
690 let id: BString = fields[1].into();
691 let source_id: BString = fields[2].into();
692 let sink_id: BString = fields[3].into();
693 let sv = fields[4].parse::<StructuralVariant>()?;
694
695 let edge_data = EdgeData::builder().id(id).sv(sv).build();
696
697 let graph = self.current_graph_mut()?;
698 graph.add_edge(source_id.as_bstr(), sink_id.as_bstr(), edge_data)?;
699 Ok(())
700 }
701
702 fn parse_unordered_group_line(&mut self, fields: &[&str]) -> Result<()> {
704 if fields.len() < 3 {
705 return Err(anyhow!("Invalid unordered group line format"));
706 }
707
708 let id: BString = fields[1].into();
709 let graph = self.current_graph_mut()?;
710
711 if graph.groups.contains_key(&id) {
713 return Err(anyhow!("Group with ID {} already exists", id));
714 }
715
716 let elements_str = fields[2..].join(" ");
718 let elements = elements_str
719 .split_whitespace()
720 .map(|s| s.into())
721 .collect::<Vec<_>>();
722
723 let group = Group::Unordered {
724 id: id.clone(),
725 elements,
726 attributes: HashMap::new(),
727 };
728
729 graph.groups.insert(id, group);
730 Ok(())
731 }
732
733 fn parse_path_line(&mut self, fields: &[&str]) -> Result<()> {
735 if fields.len() < 3 {
736 return Err(anyhow!("Invalid path line format"));
737 }
738
739 let id: BString = fields[1].into();
740 let graph = self.current_graph_mut()?;
741
742 if graph.groups.contains_key(&id) {
744 return Err(anyhow!("Group with ID {} already exists", id));
745 }
746
747 let elements_str = fields[2..].join(" ");
749 let elements = elements_str
750 .split_whitespace()
751 .map(|s| s.parse::<OrientedElement>())
752 .collect::<Result<Vec<_>, _>>()?;
753
754 let group = Group::Ordered {
755 id: id.clone(),
756 elements,
757 attributes: HashMap::new(),
758 };
759
760 graph.groups.insert(id, group);
761 Ok(())
762 }
763
764 fn parse_chain_line(&mut self, fields: &[&str]) -> Result<()> {
766 if fields.len() < 3 {
767 return Err(anyhow!("Invalid chain line format"));
768 }
769
770 let id: BString = fields[1].into();
771 let graph = self.current_graph_mut()?;
772
773 if graph.groups.contains_key(&id) {
775 return Err(anyhow!("Group with ID {} already exists", id));
776 }
777
778 let elements_str = fields[2..].join(" ");
780 let elements: Vec<BString> = elements_str.split_whitespace().map(|s| s.into()).collect();
781
782 if elements.is_empty() {
784 return Err(anyhow!("Chain must contain at least one element"));
785 }
786
787 if elements.len() % 2 == 0 {
788 return Err(anyhow!(
789 "Chain must have an odd number of elements (starting and ending with nodes)"
790 ));
791 }
792
793 let group = Group::Chain {
795 id: id.clone(),
796 elements,
797 attributes: HashMap::new(),
798 };
799
800 graph.chains.insert(id.clone(), group.clone());
802 graph.groups.insert(id, group);
803 Ok(())
804 }
805
806 fn parse_attribute_line(&mut self, fields: &[&str]) -> Result<()> {
808 if fields.len() < 4 {
809 return Err(anyhow!("Invalid attribute line format"));
810 }
811
812 let element_type = fields[1];
813 let element_id: BString = fields[2].into();
814 let graph = self.current_graph_mut()?;
815
816 let attrs: Vec<Attribute> = fields
817 .iter()
818 .skip(3)
819 .map(|s| s.parse())
820 .collect::<Result<Vec<_>>>()
821 .context("invalidate attribute line")?;
822
823 match element_type {
824 "N" => {
825 if let Some(&node_idx) = graph.node_indices.get(&element_id) {
826 if let Some(node_data) = graph._graph.node_weight_mut(node_idx) {
827 for attr in attrs {
828 let tag = attr.tag.clone();
829 node_data.attributes.insert(tag, attr);
830 }
831 } else {
832 return Err(anyhow!("Node with ID {} not found in graph", element_id));
833 }
834 } else {
835 return Err(anyhow!("Node with ID {} not found", element_id));
836 }
837 }
838 "E" => {
839 if let Some(&edge_idx) = graph.edge_indices.get(&element_id) {
840 if let Some(edge_data) = graph._graph.edge_weight_mut(edge_idx) {
841 for attr in attrs {
842 let tag = attr.tag.clone();
843 edge_data.attributes.insert(tag, attr);
844 }
845 } else {
846 return Err(anyhow!("Edge with ID {} not found in graph", element_id));
847 }
848 } else {
849 return Err(anyhow!("Edge with ID {} not found", element_id));
850 }
851 }
852 "U" | "P" | "C" => {
853 if let Some(group) = graph.groups.get_mut(&element_id) {
854 match group {
855 Group::Unordered { attributes, .. }
856 | Group::Ordered { attributes, .. }
857 | Group::Chain { attributes, .. } => {
858 for attr in attrs {
859 let tag = attr.tag.clone();
860 attributes.insert(tag, attr);
861 }
862 }
863 }
864 } else {
865 return Err(anyhow!("Group with ID {} not found", element_id));
866 }
867 }
868 "G" => {
869 if let Some(graph_section) = self.graphs.get_mut(&element_id) {
871 for attr in attrs {
872 let tag = attr.tag.clone();
873 graph_section.attributes.insert(tag, attr);
874 }
875 } else {
876 return Err(anyhow!("Graph with ID {} not found", element_id));
877 }
878 }
879 _ => {
880 return Err(anyhow!("Unknown element type: {}", element_type));
881 }
882 }
883
884 Ok(())
885 }
886
887 fn validate(&self) -> Result<()> {
889 for (graph_id, graph) in &self.graphs {
891 for (id, group) in &graph.groups {
893 if let Group::Ordered { elements, .. } = group {
894 for element in elements {
896 let element_exists = graph.node_indices.contains_key(&element.id)
897 || graph.edge_indices.contains_key(&element.id)
898 || graph.groups.contains_key(&element.id);
899
900 if !element_exists {
901 return Err(anyhow!(
902 "Path {} in graph {} references non-existent element {}",
903 id,
904 graph_id,
905 element.id
906 ));
907 }
908 }
909 }
910 }
911 }
912
913 for link in &self.links {
915 let source_graph = self.graphs.get(&link.source_graph).ok_or_else(|| {
917 anyhow!(
918 "Link {} references non-existent graph {}",
919 link.id,
920 link.source_graph
921 )
922 })?;
923
924 let source_exists = source_graph.node_indices.contains_key(&link.source_element)
925 || source_graph.edge_indices.contains_key(&link.source_element)
926 || source_graph.groups.contains_key(&link.source_element);
927
928 if !source_exists {
929 return Err(anyhow!(
930 "Link {} references non-existent element {}:{}",
931 link.id,
932 link.source_graph,
933 link.source_element
934 ));
935 }
936
937 let target_graph = self.graphs.get(&link.target_graph).ok_or_else(|| {
939 anyhow!(
940 "Link {} references non-existent graph {}",
941 link.id,
942 link.target_graph
943 )
944 })?;
945
946 let target_exists = target_graph.node_indices.contains_key(&link.target_element)
947 || target_graph.edge_indices.contains_key(&link.target_element)
948 || target_graph.groups.contains_key(&link.target_element);
949
950 if !target_exists {
951 return Err(anyhow!(
952 "Link {} references non-existent element {}:{}",
953 link.id,
954 link.target_graph,
955 link.target_element
956 ));
957 }
958 }
959
960 Ok(())
961 }
962
963 pub fn from_reader<R: BufRead>(reader: R) -> Result<Self> {
964 let mut tsgraph = TSGraph::new();
965
966 let default_graph_id: BString = DEFAULT_GRAPH_ID.into();
968 let default_graph = GraphSection::new(default_graph_id.clone());
969 tsgraph
970 .graphs
971 .insert(default_graph_id.clone(), default_graph);
972
973 tsgraph.current_graph_id = Some(default_graph_id);
974
975 for line in reader.lines() {
977 let line = line?;
978 if line.is_empty() || line.starts_with('#') {
979 continue;
980 }
981 let fields: Vec<&str> = line.split_whitespace().collect();
982 if fields.is_empty() {
983 continue;
984 }
985
986 match fields[0] {
987 "H" => tsgraph.parse_header_line(&fields)?,
988 "G" => tsgraph.parse_graph_line(&fields)?,
989 "N" => tsgraph.parse_node_line(&line)?,
990 "E" => tsgraph.parse_edge_line(&fields)?,
991 "U" => tsgraph.parse_unordered_group_line(&fields)?,
992 "P" => tsgraph.parse_path_line(&fields)?,
993 "C" => tsgraph.parse_chain_line(&fields)?,
994 "A" => tsgraph.parse_attribute_line(&fields)?,
995 "L" => tsgraph.parse_link_line(&fields)?,
996 _ => {
997 debug!("Ignoring unknown record type: {}", fields[0]);
999 }
1000 }
1001 }
1002
1003 for graph_section in tsgraph.graphs.values_mut() {
1005 for (id, group) in &graph_section.groups {
1007 if let Group::Chain { .. } = group {
1008 if !graph_section.chains.contains_key(id) {
1009 graph_section.chains.insert(id.clone(), group.clone());
1010 }
1011 }
1012 }
1013
1014 graph_section.ensure_graph_is_built()?;
1016 }
1017
1018 tsgraph.validate()?;
1020
1021 if let Some(default_graph) = tsgraph.graph(DEFAULT_GRAPH_ID) {
1023 if default_graph.node_indices.is_empty() {
1024 tsgraph.graphs.remove(&BString::from(DEFAULT_GRAPH_ID));
1025 }
1026 }
1027 Ok(tsgraph)
1028 }
1029
1030 pub fn from_file<P: AsRef<Path>>(path: P) -> Result<Self> {
1032 let file = File::open(path)?;
1033 let reader = BufReader::new(file);
1034 Self::from_reader(reader)
1035 }
1036
1037 pub fn to_writer<W: std::io::Write>(&self, writer: &mut W) -> Result<()> {
1039 writeln!(writer, "# Global header")?;
1041 for header in &self.headers {
1042 writeln!(writer, "{}", header)?;
1043 }
1044
1045 let new_header = Header::builder().tag("PG").value("tsg").build();
1046 writeln!(writer, "{}", new_header)?;
1047
1048 for (graph_id, graph) in &self.graphs {
1050 if graph_id == &BString::from(DEFAULT_GRAPH_ID) && graph.node_indices.is_empty() {
1052 continue;
1053 }
1054
1055 writeln!(writer, "\n# Graph: {}", graph_id)?;
1056
1057 write!(writer, "G\t{}", graph_id)?;
1059 for attr in graph.attributes.values() {
1060 write!(writer, "\t{}", attr)?;
1061 }
1062 writeln!(writer)?;
1063
1064 writeln!(writer, "# Nodes")?;
1066 for node_idx in graph._graph.node_indices() {
1067 if let Some(node_data) = graph._graph.node_weight(node_idx) {
1068 writeln!(writer, "{}", node_data)?;
1069 }
1070 }
1071
1072 writeln!(writer, "# Edges")?;
1074 for edge_ref in graph._graph.edge_references() {
1075 let edge = edge_ref.weight();
1076 let source_idx = edge_ref.source();
1077 let sink_idx = edge_ref.target();
1078
1079 if let (Some(source), Some(sink)) = (
1080 graph._graph.node_weight(source_idx),
1081 graph._graph.node_weight(sink_idx),
1082 ) {
1083 writeln!(
1084 writer,
1085 "E\t{}\t{}\t{}\t{}",
1086 edge.id, source.id, sink.id, edge.sv
1087 )?;
1088 }
1089 }
1090
1091 writeln!(writer, "# Groups")?;
1093 let mut seen_chain_ids = HashSet::new();
1094
1095 for group in graph.groups.values() {
1096 match group {
1097 Group::Unordered { id, elements, .. } => {
1098 let elements_str: Vec<String> = elements
1099 .par_iter()
1100 .map(|e| e.to_str().unwrap_or("").to_string())
1101 .collect();
1102 writeln!(
1103 writer,
1104 "U\t{}\t{}",
1105 id.to_str().unwrap_or(""),
1106 elements_str.join(" ")
1107 )?;
1108 }
1109 Group::Ordered { id, elements, .. } => {
1110 let elements_str: Vec<String> =
1111 elements.par_iter().map(|e| e.to_string()).collect();
1112 writeln!(
1113 writer,
1114 "P\t{}\t{}",
1115 id.to_str().unwrap_or(""),
1116 elements_str.join(" ")
1117 )?;
1118 }
1119 Group::Chain { id, elements, .. } => {
1120 if seen_chain_ids.contains(id) {
1122 continue;
1123 }
1124 seen_chain_ids.insert(id);
1125
1126 let elements_str: Vec<String> = elements
1127 .par_iter()
1128 .map(|e| e.to_str().unwrap_or("").to_string())
1129 .collect();
1130 writeln!(
1131 writer,
1132 "C\t{}\t{}",
1133 id.to_str().unwrap_or(""),
1134 elements_str.join(" ")
1135 )?;
1136 }
1137 }
1138 }
1139
1140 writeln!(writer, "# Attributes")?;
1142
1143 for node_idx in graph._graph.node_indices() {
1145 if let Some(node) = graph._graph.node_weight(node_idx) {
1146 for attr in node.attributes.values() {
1147 writeln!(writer, "A\tN\t{}\t{}", node.id, attr)?;
1148 }
1149 }
1150 }
1151
1152 for edge_idx in graph._graph.edge_indices() {
1154 if let Some(edge) = graph._graph.edge_weight(edge_idx) {
1155 for attr in edge.attributes.values() {
1156 writeln!(writer, "A\tE\t{}\t{}", edge.id, attr)?;
1157 }
1158 }
1159 }
1160
1161 for (id, group) in &graph.groups {
1163 let (group_type, attributes) = match group {
1164 Group::Unordered { attributes, .. } => ("U", attributes),
1165 Group::Ordered { attributes, .. } => ("P", attributes),
1166 Group::Chain { attributes, .. } => ("C", attributes),
1167 };
1168
1169 for attr in attributes.values() {
1170 writeln!(
1171 writer,
1172 "A\t{}\t{}\t{}",
1173 group_type,
1174 id.to_str().unwrap_or(""),
1175 attr
1176 )?;
1177 }
1178 }
1179 }
1180
1181 if !self.links.is_empty() {
1183 writeln!(writer, "\n# Inter-graph links")?;
1184 for link in &self.links {
1185 write!(
1186 writer,
1187 "L\t{}\t{}:{}\t{}:{}\t{}",
1188 link.id,
1189 link.source_graph,
1190 link.source_element,
1191 link.target_graph,
1192 link.target_element,
1193 link.link_type
1194 )?;
1195
1196 for attr in link.attributes.values() {
1197 write!(writer, "\t{}", attr)?;
1198 }
1199 writeln!(writer)?;
1200 }
1201 }
1202
1203 writer.flush()?;
1204 Ok(())
1205 }
1206
1207 pub fn to_file<P: AsRef<Path>>(&self, path: P) -> Result<()> {
1209 let file = File::create(path)?;
1210 let mut writer = BufWriter::new(file);
1211 self.to_writer(&mut writer)
1212 }
1213
1214 pub fn graph(&self, id: &str) -> Option<&GraphSection> {
1218 self.graphs.get(&BString::from(id))
1219 }
1220
1221 pub fn graph_mut(&mut self, id: &str) -> Option<&mut GraphSection> {
1222 self.graphs.get_mut(&BString::from(id))
1223 }
1224
1225 pub fn default_graph(&self) -> Option<&GraphSection> {
1227 self.graphs.get(&BString::from(DEFAULT_GRAPH_ID))
1228 }
1229
1230 pub fn default_graph_mut(&mut self) -> Option<&mut GraphSection> {
1231 self.graphs.get_mut(&BString::from(DEFAULT_GRAPH_ID))
1232 }
1233
1234 pub fn node(&self, graph_id: &str, node_id: &str) -> Option<&NodeData> {
1236 let graph = self.graphs.get(&BString::from(graph_id))?;
1237 graph.node_by_id(node_id)
1238 }
1239
1240 pub fn edge(&self, graph_id: &str, edge_id: &str) -> Option<&EdgeData> {
1242 let graph = self.graphs.get(&BString::from(graph_id))?;
1243 graph.edge_by_id(edge_id)
1244 }
1245
1246 pub fn chain_nodes(&self, graph_id: &str, chain_id: &BStr) -> Option<Vec<NodeIndex>> {
1248 let graph = self.graphs.get(&BString::from(graph_id))?;
1249 let group = graph.chains.get(chain_id)?;
1250
1251 match group {
1252 Group::Chain { elements, .. } => {
1253 let mut nodes = Vec::with_capacity(elements.len().div_ceil(2));
1254
1255 for (i, element_id) in elements.iter().enumerate() {
1256 if i % 2 == 0 {
1257 if let Some(&node_idx) = graph.node_indices.get(element_id) {
1259 nodes.push(node_idx);
1260 } else {
1261 return None; }
1263 }
1264 }
1265
1266 Some(nodes)
1267 }
1268 _ => None, }
1270 }
1271
1272 pub fn chain_edges(&self, graph_id: &str, chain_id: &BStr) -> Option<Vec<EdgeIndex>> {
1274 let graph = self.graphs.get(&BString::from(graph_id))?;
1275 let group = graph.chains.get(chain_id)?;
1276
1277 match group {
1278 Group::Chain { elements, .. } => {
1279 let mut edges = Vec::with_capacity(elements.len() / 2);
1280
1281 for (i, element_id) in elements.iter().enumerate() {
1282 if i % 2 == 1 {
1283 if let Some(&edge_idx) = graph.edge_indices.get(element_id) {
1285 edges.push(edge_idx);
1286 } else {
1287 return None; }
1289 }
1290 }
1291
1292 Some(edges)
1293 }
1294 _ => None, }
1296 }
1297
1298 pub fn find_node_id_by_idx(&self, graph_id: &str, node_idx: NodeIndex) -> Option<&BString> {
1300 let graph = self.graphs.get(&BString::from(graph_id))?;
1301 graph
1302 .node_indices
1303 .par_iter()
1304 .find_map_first(|(id, &idx)| if idx == node_idx { Some(id) } else { None })
1305 }
1306
1307 pub fn node_by_idx(&self, graph_id: &str, node_idx: NodeIndex) -> Option<&NodeData> {
1308 let graph = self.graphs.get(&BString::from(graph_id))?;
1309 graph.node_by_idx(node_idx)
1310 }
1311
1312 pub fn edge_by_idx(&self, graph_id: &str, edge_idx: EdgeIndex) -> Option<&EdgeData> {
1313 let graph = self.graphs.get(&BString::from(graph_id))?;
1314 graph.edge_by_idx(edge_idx)
1315 }
1316
1317 pub fn nodes(&self, graph_id: &str) -> Vec<&NodeData> {
1319 let graph = self.graphs.get(&BString::from(graph_id)).unwrap();
1320 graph
1321 .node_indices
1322 .values()
1323 .filter_map(|&idx| graph._graph.node_weight(idx))
1324 .collect()
1325 }
1326
1327 pub fn edges(&self, graph_id: &str) -> Vec<&EdgeData> {
1329 let graph = self.graphs.get(&BString::from(graph_id)).unwrap();
1330 graph
1331 .edge_indices
1332 .values()
1333 .filter_map(|&idx| graph._graph.edge_weight(idx))
1334 .collect()
1335 }
1336
1337 pub fn traverse_by_id(&self, graph_id: &str) -> Result<Vec<TSGPath>> {
1339 let graph = self.graphs.get(&BString::from(graph_id)).unwrap();
1340 graph.traverse()
1341 }
1342
1343 pub fn traverse_all_graphs(&self) -> Result<Vec<TSGPath>> {
1345 let all_paths = self
1346 .graphs
1347 .values()
1348 .try_fold(Vec::new(), |mut all_paths, graph| {
1349 let paths = graph.traverse()?;
1350 all_paths.extend(paths);
1351 Ok(all_paths)
1352 });
1353 all_paths
1354 }
1355
1356 pub fn to_dot_by_id(
1357 &self,
1358 graph_id: &str,
1359 node_label: bool,
1360 edge_label: bool,
1361 ) -> Result<String> {
1362 let graph = self.graphs.get(&BString::from(graph_id)).unwrap();
1363 graph.to_dot(node_label, edge_label)
1364 }
1365
1366 pub fn to_json_by_id(&self, graph_id: &str) -> Result<serde_json::Value> {
1367 let graph = self.graphs.get(&BString::from(graph_id)).unwrap();
1368 graph.to_json()
1369 }
1370}
1371
1372impl FromStr for TSGraph {
1373 type Err = anyhow::Error;
1374 fn from_str(s: &str) -> Result<Self> {
1375 let reader = BufReader::new(s.as_bytes());
1376 Self::from_reader(reader)
1377 }
1378}
1379
1380#[cfg(test)]
1381mod tests {
1382 use super::*;
1383
1384 #[test]
1385 fn test_create_empty_graph() {
1386 let graph = TSGraph::new();
1387 assert_eq!(graph.headers.len(), 0);
1388 assert_eq!(graph.default_graph().unwrap().nodes().len(), 0);
1389 assert_eq!(graph.edges(DEFAULT_GRAPH_ID).len(), 0);
1390 assert_eq!(graph.graphs.len(), 1);
1391 assert_eq!(graph.links.len(), 0);
1392 }
1393
1394 #[test]
1395 fn test_add_node() -> Result<()> {
1396 let mut graph = TSGraph::new();
1397 let node = NodeData::builder().id("node1").reference_id("chr1").build();
1398
1399 graph.default_graph_mut().unwrap().add_node(node.clone())?;
1400 assert_eq!(graph.nodes(DEFAULT_GRAPH_ID).len(), 1);
1401 assert_eq!(graph.node(DEFAULT_GRAPH_ID, "node1").unwrap().id, node.id);
1402 Ok(())
1403 }
1404
1405 #[test]
1406 fn test_add_edge() -> Result<()> {
1407 let mut graph = TSGraph::new();
1408
1409 let node1 = NodeData::builder().id("node1").reference_id("chr1").build();
1411 let node2 = NodeData::builder().id("node2").reference_id("chr1").build();
1412
1413 graph.graph_mut(DEFAULT_GRAPH_ID).unwrap().add_node(node1)?;
1414 graph.graph_mut(DEFAULT_GRAPH_ID).unwrap().add_node(node2)?;
1415
1416 let sv_from_builder = StructuralVariant::builder()
1418 .reference_name1("chr1")
1419 .reference_name2("chr1")
1420 .breakpoint1(1000)
1421 .breakpoint2(5000)
1422 .sv_type(BString::from("DEL"))
1423 .build();
1424 let edge = EdgeData::builder().id("edge1").sv(sv_from_builder).build();
1425
1426 graph.graph_mut(DEFAULT_GRAPH_ID).unwrap().add_edge(
1427 "node1".into(),
1428 "node2".into(),
1429 edge.clone(),
1430 )?;
1431
1432 assert_eq!(graph.edges(DEFAULT_GRAPH_ID).len(), 1);
1433 assert_eq!(graph.edge(DEFAULT_GRAPH_ID, "edge1").unwrap().id, edge.id);
1434
1435 Ok(())
1436 }
1437
1438 #[test]
1439 fn test_parse_header_line() -> Result<()> {
1440 let mut graph = TSGraph::new();
1441 let fields = vec!["H", "VN", "1.0"];
1442
1443 graph.parse_header_line(&fields)?;
1444
1445 assert_eq!(graph.headers.len(), 1);
1446 assert_eq!(graph.headers[0].tag, "VN");
1447 assert_eq!(graph.headers[0].value, "1.0");
1448
1449 Ok(())
1450 }
1451
1452 #[test]
1453 fn test_parse_node_line() -> Result<()> {
1454 let mut graph = TSGraph::new();
1455 let line = "N\tnode1\tchr1:+:100-200\tread1:SO,read2:IN\tACGT";
1456
1457 graph.parse_node_line(line)?;
1458
1459 let node = graph.default_graph().unwrap().node_by_id("node1").unwrap();
1461
1462 assert_eq!(node.id, "node1");
1463 assert_eq!(node.exons.exons.len(), 1);
1464 assert_eq!(node.exons.exons[0].start, 100);
1465 assert_eq!(node.exons.exons[0].end, 200);
1466 assert_eq!(
1467 node.reads,
1468 vec![
1469 ReadData::builder().id("read1").identity("SO").build(),
1470 ReadData::builder().id("read2").identity("IN").build(),
1471 ]
1472 );
1473 assert_eq!(node.sequence, Some("ACGT".into()));
1474
1475 Ok(())
1476 }
1477
1478 #[test]
1479 fn test_parse_unordered_group() -> Result<()> {
1480 let mut graph = TSGraph::new();
1481 let fields = vec!["U", "group1", "node1", "node2", "edge1"];
1482
1483 graph.parse_unordered_group_line(&fields)?;
1484
1485 let graph_section = graph.default_graph().unwrap();
1486 assert_eq!(graph_section.groups.len(), 1);
1487 if let Group::Unordered { id, elements, .. } = &graph_section.groups["group1".as_bytes()] {
1488 assert_eq!(id, "group1");
1489 assert_eq!(elements.len(), 3);
1490 assert_eq!(elements[0], "node1");
1491 assert_eq!(elements[1], "node2");
1492 assert_eq!(elements[2], "edge1");
1493 } else {
1494 panic!("Expected Unordered group");
1495 }
1496
1497 Ok(())
1498 }
1499
1500 #[test]
1501 fn test_read_from_file() -> Result<()> {
1502 let file = "tests/data/test.tsg";
1503 let graph = TSGraph::from_file(file)?;
1504
1505 assert_eq!(graph.headers.len(), 2);
1506 assert_eq!(graph.nodes(DEFAULT_GRAPH_ID).len(), 5);
1507 assert_eq!(graph.edges(DEFAULT_GRAPH_ID).len(), 4);
1508
1509 graph.to_file("tests/data/test_write.tsg")?;
1510
1511 Ok(())
1512 }
1513
1514 #[test]
1515 fn test_from_str() -> Result<()> {
1516 let tsg_string = r#"H VN 1.0
1517 H PN TestGraph
1518 N node1 chr1:+:100-200 read1:SO,read2:IN ACGT
1519 N node2 chr1:+:300-400 read1:SO,read2:IN
1520 N node3 chr1:+:500-600 read2:SO,read3:IN
1521 E edge1 node1 node2 chr1,chr1,1700,2000,INV
1522 E edge2 node2 node3 chr1,chr1,1700,2000,DUP
1523 C chain1 node1 edge1 node2 edge2 node3
1524 "#;
1525
1526 let graph = TSGraph::from_str(tsg_string)?;
1527
1528 assert_eq!(graph.headers.len(), 2);
1530 assert_eq!(graph.headers[0].tag, "VN");
1531 assert_eq!(graph.headers[0].value, "1.0");
1532 assert_eq!(graph.headers[1].tag, "PN");
1533 assert_eq!(graph.headers[1].value, "TestGraph");
1534
1535 assert_eq!(graph.nodes(DEFAULT_GRAPH_ID).len(), 3);
1537 let node1 = graph.node(DEFAULT_GRAPH_ID, "node1").unwrap();
1538 assert_eq!(node1.id, "node1");
1539 assert_eq!(node1.sequence, Some("ACGT".into()));
1540
1541 assert_eq!(graph.edges(DEFAULT_GRAPH_ID).len(), 2);
1543 let edge1 = graph.edge(DEFAULT_GRAPH_ID, "edge1").unwrap();
1544 assert_eq!(edge1.id, "edge1");
1545
1546 let graph_section = graph.graph(DEFAULT_GRAPH_ID).unwrap();
1548
1549 if let Group::Chain { elements, .. } = &graph_section.chains["chain1".as_bytes()] {
1551 assert_eq!(elements.len(), 5);
1552 assert_eq!(elements[0], "node1");
1553 assert_eq!(elements[1], "edge1");
1554 } else {
1555 panic!("Expected Chain group");
1556 }
1557
1558 Ok(())
1559 }
1560
1561 #[test]
1562 fn test_traverse() -> Result<()> {
1563 let file = "tests/data/test.tsg";
1564 let graph = TSGraph::from_file(file)?;
1565
1566 let paths = graph.traverse_by_id(DEFAULT_GRAPH_ID)?;
1567 for path in paths {
1570 println!("{}", path);
1571 }
1572 Ok(())
1573 }
1574
1575 #[test]
1576 fn test_to_dot() -> Result<()> {
1577 let file = "tests/data/test.tsg";
1578 let graph = TSGraph::from_file(file)?;
1579
1580 let dot = graph.to_dot_by_id(DEFAULT_GRAPH_ID, true, true)?;
1581 println!("{}", dot);
1582
1583 Ok(())
1584 }
1585
1586 #[test]
1587 fn test_to_json() -> Result<()> {
1588 let file = "tests/data/test.tsg";
1589 let graph = TSGraph::from_file(file)?;
1590
1591 let json = graph.to_json_by_id(DEFAULT_GRAPH_ID)?;
1592 println!("{}", json);
1593 Ok(())
1594 }
1595}