1use std::collections::HashMap;
2use std::fs::File;
3use std::io::{prelude::*, BufReader};
4use std::io::{BufWriter, Write};
5use std::path::Path as file_path;
6use std::str::Split;
7use flate2::read::GzDecoder;
8
9
10#[derive(Debug, Clone, Default)]
11pub struct Header {
14 pub tag: String,
15 pub typ: String,
16 pub version_number: String,
17}
18
19impl Header {
20 fn to_string1(&self) -> String {
22 format!("H\tVN:Z:\t{}\n", self.version_number)
23 }
24
25 fn from_string(line: &str) -> Header {
27 let line = line.split("\t").nth(1).unwrap();
28 let tag = line.split(':').nth(0).unwrap().to_string();
29 let typ = line.split(':').nth(1).unwrap().to_string();
30 let version_number = line.split(':').nth(2).unwrap().to_string();
31 Header {tag, typ, version_number }
32 }
33}
34
35
36
37
38
39
40
41#[derive(Debug, PartialEq, Clone)]
42pub struct OptElem {
44 pub tag: String,
45 pub typee: String,
46 pub value: String,
47}
48
49impl OptElem {
50 fn to_string1(&self) -> String{
52 format!("{}\t{}\t{}", self.tag, self.typee, self.value)
53 }
54}
55
56
57pub trait OptFields: Sized + Default + Clone {
59
60 fn fields(&self) -> &[OptElem];
63
64 fn parse(input: Split<&str>) -> Self;
68
69 fn new() -> Self;
70
71
72
73}
74
75
76
77impl OptFields for () {
81
82 fn fields(&self) -> &[OptElem] {
83 &[]
84 }
85
86 fn parse(_input: Split<&str> ) -> Self
87 {
88 }
89
90 fn new() -> Self {
91 }
92
93
94}
95
96
97impl OptFields for Vec<OptElem> {
99 fn fields(&self) -> &[OptElem] {
100 self.as_slice()
101 }
102 fn parse(mut input: Split<&str> ) -> Self{
103 let mut fields = Vec::new();
104
105 while let Some(value) = input.next() {
106
107 let mut parts = value.split(':');
108 let tag = parts.next().unwrap();
109 let typ = parts.next().unwrap();
110 let val = parts.next().unwrap();
111 fields.push(OptElem { tag: tag.to_string(), typee: typ.to_string(), value: val.to_string()});
112
113 }
114 fields
115 }
116
117 fn new() -> Self {
118 Vec::new()
119 }
120
121
122}
123
124
125
126#[derive(Debug)]
127pub struct Segment<T: OptFields>{
137 pub name: String,
138 pub sequence: String,
139 pub size: u32,
140 pub opt: T,
141}
142
143
144impl <T: OptFields> Segment<T> {
145
146 fn to_string(&self) -> String {
148 let a = format!("S\t{}\t{}\n", self.name, self.sequence.len());
149
150 if self.opt.fields().len() > 0 {
151 let b: Vec<String> = self.opt.fields().iter().map(|a| a.to_string1()).collect();
152 let c = b.join("\t");
153 format!("{}{}\n", a, c)
154 } else {
155 a
156 }
157 }
158
159 #[allow(dead_code)]
160 fn to_fasta(&self) -> String {
162
163 format!(">{}\n{}", self.name, self.sequence)
164 }
165}
166
167
168
169
170#[derive(Debug, PartialEq, Clone, Default)]
171pub struct Containment<T: OptFields>{
185 pub container: String,
186 pub container_orient: bool,
187 pub contained: String,
188 pub contained_orient: bool,
189 pub pos : usize, pub overlap: String,
191 pub opt: T,
192}
193
194impl <T: OptFields>Containment<T> {
195
196 #[allow(dead_code)]
197 fn to_string_link(&self) -> String {
199 let a = format!("L\t{}\t{}\t{}\t{}\t{}\n", self.container, {if self.container_orient {"+"} else {"-"}}, self.contained, {if self.contained_orient {"+"} else {"-"}}, self.overlap);
200 if self.opt.fields().len() > 0 {
201 let b: Vec<String> = self.opt.fields().iter().map(|a| a.to_string1()).collect();
202 let c = b.join("\t");
203 format!("{}{}\n", a, c)
204 } else {
205 a
206 }
207 }
208}
209
210
211
212#[derive(Debug, PartialEq, Clone, Default)]
213pub struct Link<T: OptFields>{
226 pub from: String,
227 pub from_dir: bool,
228 pub to: String,
229 pub to_dir: bool,
230 pub overlap: String,
231 pub opt: T,
232}
233
234
235
236impl <T: OptFields> Link<T> {
237
238 fn to_string_link(&self) -> String {
240 let a = format!("L\t{}\t{}\t{}\t{}\t{}\n", self.from, {if self.from_dir{"+"} else {"-"}}, self.to, {if self.to_dir{"+"} else {"-"}}, self.overlap);
241 if self.opt.fields().len() > 0 {
242 let b: Vec<String> = self.opt.fields().iter().map(|a| a.to_string1()).collect();
243 let c = b.join("\t");
244 format!("{}{}\n", a, c)
245 } else {
246 a
247 }
248 }
249}
250
251
252
253#[derive(Debug)]
254pub struct Path{
264 pub name: String,
265 pub dir: Vec<bool>,
266 pub nodes: Vec<String>,
267 pub overlap: Vec<String>,
268}
269
270impl Path {
271
272 fn to_string(&self) -> String {
274 let a = format!("P\t{}\t", self.name);
275 let f1: Vec<String> = self.nodes.iter().zip(&self.dir).map(|n| format!("{}{}", n.0, {if *n.1{"+".to_string()} else {"-".to_string()}})).collect();
276 let f2 = f1.join(",");
277 let f: Vec<String> = self.overlap.iter().map(|a| a.to_string()).collect();
278 let g = f.join(",");
279 format!("{}\t{}\t{}\n", a, f2, g)
280 }
281}
282
283#[derive(Debug)]
284pub struct Walk{
292 pub sample_id: String,
293 pub hap_index: usize,
294 pub seq_id: String,
295 pub seq_start: usize,
296 pub seq_end: usize,
297 pub walk_segments: Vec<String>,
298 pub walk_dir: Vec<bool>,
299}
300
301impl Walk {
302
303 #[allow(dead_code)]
304 fn to_string(&self) -> String {
307 let a = format!("W\t{}\t{}\t{}\t{}\t{}", self.sample_id, self.hap_index, self.seq_id, self.seq_start, self.seq_end);
308 let f1: Vec<String> = self.walk_segments.iter().zip(&self.walk_dir).map(|n| format!("{}{}", n.0, {if *n.1{">".to_string()} else {"<".to_string()}})).collect();
309 let f2 = f1.join(",");
310 let a = format!("{}\t{}\n", a, f2);
311 a
312 }
313}
314
315
316
317
318
319#[derive(Debug)]
320pub struct Fragment<T: OptFields>{
321 pub sample_id: String,
322 pub external_ref: usize,
323 pub seg_begin: usize,
324 pub seg_end: usize,
325 pub frag_begin: usize,
326 pub frag_end: usize,
327 pub alignment: String,
328 pub opt: T,
329}
330
331impl <T: OptFields>Fragment<T>{
332
333 #[allow(dead_code)]
334 fn to_string(&self) -> String {
337 let a = format!("F\t{}\t{}\t{}\t{}\t{}\t{}\t{}\n", self.sample_id, self.external_ref, self.seg_begin, self.seg_end, self.frag_begin, self.frag_end, self.alignment);
338 if self.opt.fields().len() > 0 {
339 let b: Vec<String> = self.opt.fields().iter().map(|a| a.to_string1()).collect();
340 let c = b.join("\t");
341 format!("{}{}\n", a, c)
342 } else {
343 a
344 }
345 }
346}
347
348#[derive(Debug)]
349pub struct Group {
352 pub is_ordered: bool,
353 pub name: String,
354 pub nodes: Vec<String>,
355 pub direction: Vec<bool>,
356}
357
358impl Group {
359
360 pub fn to_string2(&self) -> String{
362 let mut a = format!("{}\t", {if self.is_ordered{"O".to_string()} else {"U".to_string()}});
363 a = format!("{}\t{}", a, self.name);
364 if self.is_ordered {
365 let f1: Vec<String> = self.nodes.iter().zip(&self.direction).map(|n| format!("{}{}", n.0, {if *n.1{"+".to_string()} else {"-".to_string()}})).collect();
366 let f2 = f1.join("\t");
367 format!("{}\t{}\n", a, f2)
368 } else {
369 let f1: Vec<String> = self.nodes.iter().map(|n| format!("{}", n)).collect();
370 let f2 = f1.join("\t");
371 format!("{}\t{}\n", a, f2)
372 }
373 }
374
375 pub fn to_string(&self) -> String {
378 let mut a = format!("{}\t", "P");
379 a = format!("{}\t{}", a, self.name);
380 if self.is_ordered {
381 let f1: Vec<String> = self.nodes.iter().zip(&self.direction).map(|n| format!("{}{}", n.0, {if *n.1{"+".to_string()} else {"-".to_string()}})).collect();
382 let f2 = f1.join(",");
383 format!("{}\t{}\n", a, f2)
384 } else {
385 format!("{}\n", a)
386 }
387 }
388}
389
390
391
392#[derive(Debug)]
393pub struct Gap<T: OptFields>{
395 pub name: String,
396 pub sid1: String,
397 pub sid1_ref: bool,
398 pub sid2: String,
399 pub sid2_ref: bool,
400 pub dist: usize,
401 pub tag: T,
402}
403
404
405impl <T: OptFields> Gap<T> {
406
407 #[allow(dead_code)]
408 fn to_string(&self) -> String {
410 let a = format!("G\t{}\t{}{}\t{}{}\t{}\n", self.name, self.sid1, {if self.sid1_ref{"+"} else {"-"}}, self.sid2, {if self.sid2_ref{"+"} else {"-"}}, self.dist);
411 if self.tag.fields().len() > 0 {
412 let b: Vec<String> = self.tag.fields().iter().map(|a| a.to_string1()).collect();
413 let c = b.join("\t");
414 format!("{}{}\n", a, c)
415 } else {
416 a
417 }
418 }
419
420}
421
422#[derive(Debug)]
423pub struct Jump<T: OptFields>{
424 pub from: String,
425 pub from_orient: bool,
426 pub to: String,
427 pub to_orient: bool,
428 pub distance: String,
429 pub opt: T,
430}
431
432impl <T: OptFields>Jump<T> {
433
434 #[allow(dead_code)]
435 fn to_string(&self) -> String {
438 let a = format!("J\t{}\t{}\t{}\t{}\t{}\n", self.from, {if self.from_orient {"+"} else {"-"}}, self.to, {if self.to_orient {"+"} else {"-"}}, self.distance);
439 if self.opt.fields().len() > 0 {
440 let b: Vec<String> = self.opt.fields().iter().map(|a| a.to_string1()).collect();
441 let c = b.join("\t");
442 format!("{}{}\n", a, c)
443 } else {
444 a
445 }
446 }
447}
448
449#[derive(Debug)]
450pub struct Edges<T: OptFields>{
455 pub id: u32,
456 pub source_name: String,
457 pub sink_name: String,
458 pub source_dir: bool,
459 pub sink_dir: bool,
460 pub source_begin: u32,
461 pub source_end: u32,
462 pub sink_begin: u32,
463 pub sink_end: u32,
464 pub ends: u8, pub alignment: String,
466 pub opts: T
467}
468
469impl <T: OptFields>Edges<T>{
470
471 #[allow(dead_code)]
472 fn to_string2(&self) -> String {
474 let mut a = format!("E\t{}\t{}\t{}\t{}\t{}\t{}\n",
475 self.id,
476 self.source_name,
477 {if self.source_dir{"+"} else {"-"}},
478 self.sink_name,
479 {if self.sink_dir{"+"} else {"-"}},
480 self.source_begin);
481 if &self.ends & &1 != 0 {
482 a.push('$');
483 }
484 use std::fmt::Write;
485
486 write!(&mut a, "\t{}", self.source_end).unwrap();
487 if self.ends & 2 != 0 {
488 a.push('$');
489 }
490
491 write!(&mut a, "\t{}", self.sink_begin).unwrap();
492 if self.ends & 4 != 0 {
493 a.push('$');
494 }
495
496 write!(&mut a, "\t{}", self.sink_end).unwrap();
497 if self.ends & 8 != 0 {
498 a.push('$');
499 }
500
501 write!(&mut a, "\t{}", self.alignment).unwrap();
502
503 if self.opts.fields().len() > 0 {
504 let b: Vec<String> = self.opts.fields().iter().map(|a| a.to_string1()).collect();
505 let c = b.join("\t");
506 format!("{}{}\n", a, c)
507 } else {
508 a
509 }
510 }
511
512}
513
514
515
516
517
518
519
520
521
522#[derive(Debug)]
523pub struct Gfa<T: OptFields>{
531
532 pub header: Header,
534 pub segments: Vec<Segment<T>>,
535 pub paths: Vec<Path>,
536 pub links: Option<Vec<Link<T>>>,
537 pub containments: Vec<Containment<T>>,
538
539 pub walk: Vec<Walk>,
541 pub jumps: Vec<Jump<T>>,
542
543 pub edges: Vec<Edges<T>>,
545 pub fragments: Vec<Fragment<T>>,
546 pub groups: Vec<Group>,
547 pub gaps: Vec<Gap<T>>,
548 pub string2index: HashMap<String, usize>,
549}
550
551
552
553impl <T: OptFields> Gfa<T>{
554 pub fn new() -> Self {
564 Self {
565 segments: Vec::new(),
566 paths: Vec::new(),
567 links: None,
568 header: Header{tag: "".to_string(), typ: "".to_string(), version_number: "".to_string()},
569 containments: Vec::new(),
570 walk: Vec::new(), jumps: Vec::new(), string2index: HashMap::new(),
573
574 edges: Vec::new(), fragments: Vec::new(), groups: Vec::new(), gaps: Vec::new(), }
580 }
581
582 pub fn check_nc(&mut self) -> Option<Vec<usize>>{
592
593 if self.segments.len() == 0 {
595 return None
596 }
597
598
599 let is_digit = self.segments.iter().map(|x| x.name.chars().map(|g| g.is_ascii_digit()).collect::<Vec<bool>>().contains(&false)).collect::<Vec<bool>>().contains(&false);
602
603 if is_digit {
605 let mut numeric_nodes = self.segments.iter().map(|x| x.name.parse::<usize>().unwrap()).collect::<Vec<usize>>();
606 numeric_nodes.sort();
607 let _f = numeric_nodes.windows(2).all(|pair| pair[1] == &pair[0] + 1);
608
609 let mm = numeric_nodes.iter().cloned().min().unwrap();
611 if mm == 1 {
612 return Some(numeric_nodes)
613 }
614 }
615 return None
616
617
618 }
619
620
621
622
623
624
625 pub fn parse_gfa_file(&mut self, file_name: &str, edges: bool) {
635
636
637 if file_path::new(file_name).exists() {
638 let file = File::open(file_name).expect("ERROR: CAN NOT READ FILE\n");
639
640 let reader: Box<dyn BufRead> = if file_name.ends_with(".gz") {
642 Box::new(BufReader::new(GzDecoder::new(file)))
643 } else {
644 Box::new(BufReader::new(file))
645 };
646 let version_number = get_version(file_name);
647
648
649 let mut nodes: Vec<Segment<T>> = Vec::new();
650 let mut links: Vec<Link<T>> = Vec::new();
651 for line in reader.lines() {
653 let l = line.unwrap();
654 let l2 = l.clone();
655 let mut a = l2.split("\t");
656 let first = a.next().unwrap();
657 let line_split: Vec<&str> = l.split("\t").collect();
658 match first {
659 "S" => {
660 let name = a.next().unwrap().parse().unwrap();
661 if version_number < 2.0 {
662 let sequence: String = a.next().unwrap().parse().unwrap();
663 let size = sequence.len() as u32;
664 nodes.push(Segment { name, sequence, size, opt: T::parse(a) });
665 } else {
666 let sequence: String = a.next().unwrap().parse().unwrap();
667 let size = a.next().unwrap().parse().unwrap();
668 nodes.push(Segment { name, sequence, size, opt: T::parse(a) });
669 }
670
671
672
673 },
674 "P" => {
675
676 let name: String = String::from(line_split[1]);
677 let dirs: Vec<bool> = line_split[2].split(",").map(|d| if &d[d.len() - 1..] == "+" { !false } else { !true }).collect();
678 let node_id: Vec<String> = line_split[2].split(",").map(|d| d[..d.len() - 1].parse().unwrap()).collect();
679 let overlap;
680 if line_split.len() > 3{
681 overlap = line_split[3].split(",").map(|d| d.parse().unwrap()).collect();
682 } else {
683 overlap = vec!["*".to_string(); node_id.len()];
684 }
685 self.paths.push(Path { name: name, dir: dirs, nodes: node_id, overlap: overlap});
686
687
688
689 },
690 "L" => {
691 if edges {
692
693 links.push(Link {from: a.next().unwrap().to_string(), from_dir: if a.next().unwrap() == "+" { !false } else { !true }, to: a.next().unwrap().to_string(), to_dir: if a.next().unwrap() == "+" { !false } else { !true }, overlap: a.next().unwrap().to_string(), opt: T::parse(a)});
695
696 }
697
698 }
699 "C" => {
700 if edges {
701 self.containments.push(Containment {container: a.next().unwrap().to_string(), container_orient: if a.next().unwrap() == "+" { !false } else { !true }, contained: a.next().unwrap().to_string(), contained_orient: if a.next().unwrap() == "+" { !false } else { !true }, overlap: a.next().unwrap().to_string(), opt: T::parse(a), pos: 0 });
702
703 }
704 }
705 "H" => {
706 let header = Header::from_string(&l);
707 self.header = header;
708 }
709 "W" => {
710 let sample_id = a.next().unwrap().to_string();
711 let hap_index = a.next().unwrap().parse().unwrap();
712 let seq_id = a.next().unwrap().to_string();
713 let seq_start = a.next().unwrap().parse().unwrap();
714 let seq_end = a.next().unwrap().parse().unwrap();
715 let walk = a.next().unwrap().to_string();
716 let dirs: Vec<bool> = walk.split(",").map(|d| if &d[d.len() - 1..] == ">" { !false } else { !true }).collect();
717 let node_id: Vec<String> = walk.split(",").map(|d| d[..d.len() - 1].parse().unwrap()).collect();
718 self.walk.push(Walk{ sample_id, hap_index, seq_id, seq_start, seq_end, walk_segments: node_id, walk_dir: dirs});
719 }
720 "J" => {
721 let from = a.next().unwrap().to_string();
722 let from_orient = if a.next().unwrap() == "+" { !false } else { !true };
723 let to = a.next().unwrap().to_string();
724 let to_orient = if a.next().unwrap() == "+" { !false } else { !true };
725 let distance = a.next().unwrap().to_string();
726 self.jumps.push(Jump{from, from_orient: from_orient, to, to_orient: to_orient, distance, opt: T::parse(a)});
727 }
728
729 "G" => {
730 let name = a.next().unwrap().to_string();
731 let sid1 = a.next().unwrap().to_string();
732 let sid2 = a.next().unwrap().to_string();
733 let dist = a.next().unwrap().parse().unwrap();
734 self.gaps.push(Gap{name, sid1, sid1_ref: false, sid2, sid2_ref: false, dist, tag: T::parse(a)});
735 }
736
737 "F" => {
738 let sample_id = a.next().unwrap().to_string();
739 let external_ref = a.next().unwrap().parse().unwrap();
740 let seg_begin = a.next().unwrap().parse().unwrap();
741 let seg_end = a.next().unwrap().parse().unwrap();
742 let frag_begin = a.next().unwrap().parse().unwrap();
743 let frag_end = a.next().unwrap().parse().unwrap();
744 let alignment = a.next().unwrap().to_string();
745 self.fragments.push(Fragment{sample_id, external_ref, seg_begin, seg_end, frag_begin, frag_end, alignment, opt: T::parse(a)});
746 }
747 "E" => {
748 let id = a.next().unwrap().parse().unwrap();
749
750 let (source_name, source_dir) = split_string(a.next().unwrap()).unwrap();
751 let (sink_name, sink_dir) = split_string(a.next().unwrap()).unwrap();
752
753 let mut end = 0;
754 let source_begin: String = a.next().unwrap().parse().unwrap();
755 end = if source_begin.ends_with("$"){end & 1} else {end};
756 let s1 = source_begin.replace("$", "").parse().unwrap();
757
758 let s2: String = a.next().unwrap().parse().unwrap();
759 end = if s2.ends_with("$"){end & 2} else {end};
760 let s2 = s2.replace("$", "").parse().unwrap();
761
762 let s3: String = a.next().unwrap().parse().unwrap();
763 end = if s3.ends_with("$"){end & 4} else {end};
764 let s3 = s3.replace("$", "").parse().unwrap();
765
766 let s4: String = a.next().unwrap().parse().unwrap();
767 end = if s4.ends_with("$"){end & 8} else {end};
768 let s4 = s4.replace("$", "").parse().unwrap();
769
770 let alignment = a.next().unwrap().to_string();
771
772 self.edges.push(Edges{id,
773 source_name: source_name.to_string(),
774 sink_name: sink_name.to_string(),
775 source_dir: source_dir,
776 sink_dir: sink_dir,
777 source_begin: s1,
778 source_end: s2,
779 sink_begin: s3,
780 sink_end: s4,
781 ends: end,
782 alignment: alignment,
783 opts: T::parse(a)});
784 }
785 "O" => {
786 let is_ordered = true;
787 let name = a.next().unwrap().to_string();
788 let nodes: Vec<(&str, bool)> = a.next().unwrap().split(" ").map(|d| split_string(d).unwrap()).collect();
789 let (nodes, direction): (Vec<&str>, Vec<bool>) = nodes.iter().cloned().unzip();
790 self.groups.push(Group{is_ordered, direction: direction, nodes: nodes.iter().map(|a| a.to_string()).collect(), name: name});
791 }
792 "U" => {
793 let is_ordered = false;
794 let name = a.next().unwrap().to_string();
795 let nodes: Vec<(&str, bool)> = a.next().unwrap().split(" ").map(|d| split_string(d).unwrap()).collect();
796 let (nodes, direction): (Vec<&str>, Vec<bool>) = nodes.iter().cloned().unzip();
797 self.groups.push(Group{is_ordered, direction: direction, nodes: nodes.iter().map(|a| a.to_string()).collect(), name: name});
798 }
799
800
801
802
803
804 _ => {
805 }
806 }
807
808 }
809 if edges {
810 self.links = Some(links);
811 }
812 self.segments.extend(nodes);
813
814 }
815
816 }
817
818 pub fn to_file(self, file_name: &str){
820 let f = File::create(file_name).expect("Unable to create file");
821 let mut f = BufWriter::new(f);
822
823 write!(f, "{}", self.header.to_string1()).expect("Not able to write");
824 for node in self.segments.iter() {
825 write!(f, "{}", node.to_string()).expect("Not able to write");
826 }
827 match &self.links {
828 Some(value) =>{
829 for edge in value.iter() {
830 write!(f, "{}", edge.to_string_link()).expect("Not able to write");
831 }
832 }
833 _ => {}
834 }
835 for path in self.paths.iter() {
836 write!(f, "{}", path.to_string()).expect("Not able to write");
837 }
838 }
839
840
841
842 pub fn convert_to_ncgraph(& self, graph: &Gfa<T>) -> NCGfa<T>{
843 let mut ncgraph: NCGfa<T> = NCGfa::new();
844 let f = ncgraph.make_mapper(graph);
845 ncgraph.convert_with_mapper(f, &graph);
846 ncgraph
847 }
848}
849
850
851
852
853
854
855#[derive(Debug, Clone)]
862pub struct NCGfa<T: OptFields>{
869 pub header: Header,
870 pub nodes: Vec<NCNode<T>>,
871 pub paths: Vec<NCPath>,
872 pub edges: Option<Vec<NCEdge<T>>>,
873 pub mapper: Vec<String>
874}
875
876
877
878#[derive(Debug, Clone)]
879pub struct NCNode<T: OptFields>{
884 pub id: u32,
885 pub seq: String,
886 pub opt: T,
887}
888
889
890impl <T: OptFields>NCNode<T> {
891
892 fn to_string(&self) -> String {
894 let a = format!("S\t{}\t{}\n", self.id, self.seq.len());
895
896 if self.opt.fields().len() > 0 {
897 let b: Vec<String> = self.opt.fields().iter().map(|a| a.to_string1()).collect();
898 let c = b.join("\t");
899 format!("{}{}\n", a, c)
900 } else {
901 a
902 }
903 }
904
905 #[allow(dead_code)]
906 fn to_fasta(&self) -> String {
908
909 format!(">{}\n{}", self.id, self.seq)
910 }
911}
912
913#[derive(Debug, PartialEq, Clone, Default)]
914pub struct NCEdge<T: OptFields>{
926 pub from: u32,
927 pub from_dir: bool,
928 pub to: u32,
929 pub to_dir: bool,
930 pub overlap: String,
931 pub opt: T,
932}
933
934
935impl <T: OptFields>NCEdge<T> {
936 fn to_string_link(&self) -> String {
938 let a = format!("L\t{}\t{}\t{}\t{}\t{}\n", self.from, {if self.from_dir{"+"} else {"-"}}, self.to, {if self.to_dir{"+"} else {"-"}}, self.overlap);
939 if self.opt.fields().len() > 0 {
940 let b: Vec<String> = self.opt.fields().iter().map(|a| a.to_string1()).collect();
941 let c = b.join("\t");
942 format!("{}{}\n", a, c)
943 } else {
944 a
945 }
946 }
947
948}
949
950#[derive(Debug, Clone)]
951pub struct NCPath {
956 pub name: String,
957 pub dir: Vec<bool>,
958 pub nodes: Vec<u32>,
959 pub overlap: Vec<String>,
960
961}
962
963impl NCPath{
964 pub fn to_string(&self, mapper: &Option<Vec<&String>>) -> String{
965 let a = format!("P\t{}\t", self.name);
966 let vec: Vec<String>;
967 if Some(mapper) != None{
968 vec = self.nodes.iter().zip(&self.dir).map(|n| format!("{}{}", mapper.as_ref().unwrap()[*n.0 as usize], {if *n.1{"+".to_string()} else {"-".to_string()}})).collect();
969
970 } else {
971 vec = self.nodes.iter().zip(&self.dir).map(|n| format!("{}{}", n.0, {if *n.1{"+".to_string()} else {"-".to_string()}})).collect();
972
973 }
974
975 let f2 = vec.join(",");
976 format!("{}\t{}\n", a, f2)
977
978 }
979
980
981 fn to_string2(&self) -> String {
982 let a = format!("P\t{}\t", self.name);
983 let f1: Vec<String> = self.nodes.iter().zip(&self.dir).map(|n| format!("{}{}", n.0, {if *n.1{"+".to_string()} else {"-".to_string()}})).collect();
984 let f2 = f1.join(",");
985 let f: Vec<String> = self.overlap.iter().map(|a| a.to_string()).collect();
986 let g = f.join(",");
987 format!("{}\t{}\t{}\n", a, f2, g)
988 }
989}
990
991impl <T: OptFields>NCGfa <T> {
992
993 pub fn new() -> Self {
1003 Self {
1004 header: Header {
1005 tag: "".to_string(),
1006 typ: "".to_string(),
1007 version_number: "".to_string(),
1008 },
1009 nodes: Vec::new(),
1010 paths: Vec::new(),
1011 edges: Option::None,
1012 mapper: Vec::new(),
1013 }
1014 }
1015
1016 pub fn parse_gfa_file_direct(&mut self, file_name: &str, edge: bool) {
1026
1027
1028 if file_path::new(file_name).exists() {
1029 let file = File::open(file_name).expect("ERROR: CAN NOT READ FILE\n");
1030
1031 let reader: Box<dyn BufRead> = if file_name.ends_with(".gz") {
1033 Box::new(BufReader::new(GzDecoder::new(file)))
1034 } else {
1035 Box::new(BufReader::new(file))
1036 };
1037
1038
1039 let mut nodes: Vec<NCNode<T>> = Vec::new();
1040 let mut edges: Vec<NCEdge<T>> = Vec::new();
1041
1042 for line in reader.lines() {
1044 let l = line.unwrap();
1045 let line_split: Vec<&str> = l.split("\t").collect();
1046 match line_split[0] {
1047 "S" => {
1048
1049 let mut a = l.split("\t");
1050 a.next();
1051
1052 nodes.push(NCNode { id: a.next().unwrap().parse().unwrap(), seq: a.next().unwrap().parse().unwrap(), opt: T::parse(a) });
1053
1054
1055 },
1056 "P" => {
1057
1058 let name: String = String::from(line_split[1]);
1059 let dirs: Vec<bool> = line_split[2].split(",").map(|d| if &d[d.len() - 1..] == "+" { !false } else { !true }).collect();
1060 let node_id: Vec<u32> = line_split[2].split(",").map(|d| d[..d.len() - 1].parse().unwrap()).collect();
1061 let overlap;
1062 if line_split.len() > 3{
1063 overlap = line_split[3].split(",").map(|d| d.parse().unwrap()).collect();
1064 } else {
1065 overlap = vec!["*".to_string(); node_id.len()];
1066 }
1067 self.paths.push(NCPath { name: name, dir: dirs, nodes: node_id, overlap: overlap});
1068
1069
1070
1071 },
1072 "L" => {
1073
1074 if edge {
1075 let mut a = l.split("\t");
1076 a.next();
1077 edges.push(NCEdge{from: a.next().unwrap().parse().unwrap(), from_dir: if a.next().unwrap() == "+" { !false } else { !true }, to: a.next().unwrap().parse().unwrap(), to_dir: if a.next().unwrap() == "+" { !false } else { !true }, overlap: a.next().unwrap().to_string(), opt: T::parse(a)});
1079
1080 }
1081
1082 }
1083 "C" => {
1084 if edge {
1085 let mut a = l.split("\t");
1086 a.next();
1087 edges.push(NCEdge{from: a.next().unwrap().parse().unwrap(), from_dir: if a.next().unwrap() == "+" { !false } else { !true }, to: a.next().unwrap().parse().unwrap(), to_dir: if a.next().unwrap() == "+" { !false } else { !true }, overlap: a.next().unwrap().to_string(), opt: T::parse(a)});
1089
1090 }
1091 }
1092 "H" => {
1093 let header = Header::from_string(&l);
1094 self.header = header;
1095 }
1096 _ => {
1097 }
1098 }
1099
1100 }
1101 nodes.sort_by_key(|a| a.id);
1102 self.nodes.extend(nodes);
1103 self.edges = Some(edges);
1104
1105 }
1106
1107 }
1108
1109
1110 pub fn parse_gfa_file_and_convert(&mut self, file_name: &str, edges: bool) {
1120
1121 let mut graph: Gfa<T> = Gfa::new();
1122 graph.parse_gfa_file(file_name, edges);
1123 let ncgraph: NCGfa<T> = graph.convert_to_ncgraph(&graph);
1124 self.header = ncgraph.header;
1125 self.nodes = ncgraph.nodes;
1126 self.edges = ncgraph.edges;
1127 self.paths = ncgraph.paths;
1128 self.mapper = ncgraph.mapper;
1129
1130 }
1131
1132
1133 pub fn make_mapper(&mut self, graph: & Gfa<T>) -> HashMap<String, usize> {
1135 let mut f = graph.segments.iter().map(|x| x.name.clone()).collect::<Vec<String>>();
1136 f.sort_by_key(|digit| digit.parse::<u32>().unwrap());
1137 let mut wrapper = HashMap::new();
1138 for (i, node) in f.iter().enumerate() {
1139 wrapper.insert(node.clone(), i+1);
1140 }
1141 wrapper
1142 }
1143
1144 pub fn convert_with_mapper(&mut self, mapper: HashMap<String, usize>, graph: &Gfa<T>){
1148 let mut nodes: Vec<NCNode<T>> = graph.segments.iter().map(|x| NCNode{id: mapper.get(&x.name).unwrap().clone() as u32, seq: x.sequence.clone(), opt: x.opt.clone()}).collect();
1149 nodes.sort_by_key(|a| a.id);
1150 self.nodes = nodes;
1151 self.edges = None;
1152 match &graph.links {
1153 Some(value) => {
1154 self.edges = Some(value.iter().map(|x| NCEdge{from: mapper.get(&x.from).unwrap().clone() as u32, from_dir: x.from_dir.clone(), to: mapper.get(&x.to).unwrap().clone() as u32, to_dir: x.to_dir.clone(), overlap: "".to_string(), opt: x.opt.clone() }).collect());
1155
1156 }
1157 _ =>{}
1158 }
1159 self.paths = graph.paths.iter().map(|x| NCPath{name: x.name.clone(), dir: x.dir.clone(), nodes: x.nodes.iter().map(|y| mapper.get(y).unwrap().clone() as u32).collect(), overlap: x.overlap.clone() }).collect();
1160 let mut test: Vec<(&usize, String)> = mapper.iter().map(|a| (a.1, a.0.clone())).collect();
1161 test.sort_by_key(|a| a.0);
1162 self.mapper = test.iter().map(|a| a.1.clone()).collect();
1163
1164 }
1165
1166 pub fn get_old_node(&self, node_id: &usize) -> &String{
1168 &self.mapper[node_id-1]
1169 }
1170
1171 pub fn to_file(self, file_name: &str){
1173 let f = File::create(file_name).expect("Unable to create file");
1174 let mut f = BufWriter::new(f);
1175
1176 write!(f, "{}", self.header.to_string1()).expect("Not able to write");
1177 for node in self.nodes.iter() {
1178 write!(f, "{}", node.to_string()).expect("Not able to write");
1179 }
1180 match &self.edges {
1181 Some(value) =>{
1182 for edge in value.iter() {
1183 write!(f, "{}", edge.to_string_link()).expect("Not able to write");
1184 }
1185 }
1186 _ => {}
1187 }
1188 for path in self.paths.iter() {
1189 write!(f, "{}", path.to_string2()).expect("Not able to write");
1190 }
1191 }
1192
1193 pub fn check_numeric(&self) -> bool{
1195 for (i, x) in self.mapper.iter().enumerate(){
1196 if (i+1).to_string() != *x{
1197 return false
1198 }
1199 }
1200 return true
1201 }
1202
1203 pub fn remove_mapper(&mut self){
1205 if self.check_numeric(){
1206 self.mapper = Vec::new();
1207 }
1208 }
1209
1210
1211}
1212
1213pub fn vec_is_digit(nodes: &Vec<&str>) -> bool{
1215
1216 nodes.iter().map(|x| x.chars().map(|g| g.is_ascii_digit()).collect::<Vec<bool>>().contains(&false)).collect::<Vec<bool>>().contains(&false)
1217}
1218
1219pub fn vec_check_start(node: &Vec<usize>) -> bool{
1221 let mm = node.iter().cloned().min().unwrap();
1222 if mm == 1 {
1223 return true
1224 }
1225 return false
1226}
1227
1228pub fn create_sort_numeric(nodes: &Vec<&str>) -> Vec<usize> {
1230 let mut numeric_nodes = nodes.iter().map(|x| x.parse::<usize>().unwrap()).collect::<Vec<usize>>();
1231 numeric_nodes.sort();
1232 numeric_nodes
1233}
1234
1235
1236pub fn vec_is_compact(numeric_nodes: &Vec<usize>) -> bool{
1238 numeric_nodes.windows(2).all(|pair| pair[1] == &pair[0] + 1)
1239}
1240
1241fn get_version(file_path: &str) -> f32{
1242 let file = File::open(file_path).expect("ERROR: CAN NOT READ FILE\n");
1243
1244 let reader: Box<dyn BufRead> = if file_path.ends_with(".gz") {
1246 Box::new(BufReader::new(GzDecoder::new(file)))
1247 } else {
1248 Box::new(BufReader::new(file))
1249 };
1250
1251
1252 let first_line = reader.lines().next().unwrap().unwrap();
1254 let line = first_line.split("\t").nth(1).unwrap();
1255 let version_number = line.split(':').nth(2).unwrap().to_string();
1256 return version_number.parse::<f32>().unwrap();
1257
1258}
1259
1260fn split_string(input_string: &str) -> Option<(&str, bool)> {
1261 let len = input_string.len();
1262
1263 if len >= 1 {
1264 let first_substring = &input_string[0..len - 1];
1265 let last_letter = if &input_string[len - 1..] == "+"{true} else {false};
1266
1267 Some((first_substring, last_letter))
1268 } else {
1269 None
1270 }
1271}
1272
1273
1274
1275pub trait IsPath {
1281 fn get_name(&self) -> &String;
1282}
1283
1284
1285
1286impl IsPath for Path{
1287 fn get_name(&self) -> &String{
1288 &self.name
1289 }
1290}
1291
1292impl IsPath for NCPath{
1293 fn get_name(&self) -> &String{
1294 &self.name
1295 }
1296}
1297
1298#[derive(Debug, Clone)]
1299pub struct Pansn<'a, T: IsPath>{
1313 pub genomes: Vec<Sample<'a, T>>,
1314}
1315
1316#[derive(Debug, Clone)]
1317pub struct Sample<'a, T: IsPath>{
1318 pub name: String,
1319 pub haplotypes: Vec<Haplotype<'a, T>>
1320
1321}
1322
1323#[derive(Debug, Clone)]
1324pub struct Haplotype<'a, T: IsPath> {
1328 pub name: String,
1329 pub paths: Vec<&'a T>
1330}
1331
1332impl <'a, T: IsPath> Pansn<'a, T> {
1333
1334 pub fn from_graph(paths: &'a Vec<T>, del: &str) -> Self{
1336 let mut genomes: Vec<Sample<'a, T>> = Vec::new();
1337
1338 if del == " " {
1340 for path in paths.iter() {
1341 genomes.push(Sample {name: path.get_name().to_string(), haplotypes: vec![Haplotype{name: path.get_name().to_string(), paths: vec![path]}]})
1342 }
1343 } else {
1344 for path in paths.iter() {
1345 let name_split: Vec<&str> = path.get_name().split(del).collect();
1346 let genome;
1347 let haplotype;
1348 if name_split.len() > 1{
1349 genome = name_split[0].to_string();
1350 haplotype = name_split[1].to_string();
1351 } else {
1352 panic!("No Pansn, remove sep or adjust gfa")
1353 }
1354 if let Some((index1, _)) = genomes.iter().enumerate().find(|(_, item)| item.name == genome) {
1356 let genome = &mut genomes[index1];
1357 if let Some((index2, _)) = genome.haplotypes.iter().enumerate().find(|(_, item)| item.name == haplotype) {
1359 let haplo = &mut genome.haplotypes[index2];
1360 haplo.paths.push(path);
1361 } else {
1362 let haplo = Haplotype{name: haplotype, paths: vec![path]};
1363 genome.haplotypes.push(haplo);
1364
1365 }
1366 } else {
1367 let haplo = Haplotype{name: haplotype, paths: vec![path]};
1368 let genome = Sample {name: genome, haplotypes: vec![haplo]};
1369 genomes.push(genome);
1370 println!("Did not find the specific string.");
1371 }
1372
1373 }
1374 }
1375 Pansn {
1376 genomes,
1377 }
1378 }
1379
1380 pub fn get_haplo_path(&self) -> Vec<(String, Vec<&'a T>)>{
1381 let mut result = Vec::new();
1382 for x in self.genomes.iter(){
1383 for y in x.haplotypes.iter(){
1384 let kk: Vec<&T> = y.paths.iter().map(|i| *i).collect();
1385 result.push((x.name.clone() + "#" + &y.name, kk));
1386 }
1387 }
1388
1389 result
1390 }
1391
1392 pub fn get_path_genome(&self) -> Vec<(String, Vec<&'a T>)>{
1393 let mut result = Vec::new();
1394 for x in self.genomes.iter(){
1395 let mut aa = Vec::new();
1396 for y in x.haplotypes.iter(){
1397 let kk: Vec<&T> = y.paths.iter().map(|i| *i).collect();
1398 aa.extend(kk);
1399 }
1400 result.push((x.name.clone(), aa));
1401 }
1402
1403 result
1404 }
1405
1406 pub fn get_paths_direct(&self) -> Vec<(String, Vec<&'a T>)>{
1407 let mut result = Vec::new();
1408 for x in self.genomes.iter(){
1409 for y in x.haplotypes.iter(){
1410 y.paths.iter().for_each(|i| result.push((i.get_name().to_string(), vec![*i])))
1411 }
1412 }
1413 return result
1414 }
1415
1416}
1417