bustools_cli/
busmerger.rs1use bustools_core::{
3 io::{BusParams, BusReader, BusWriterPlain}, iterators::CbUmiGroupIterator, merger::MultiIterator
4};
5use std::collections::HashMap;
6
7pub fn merge_busfiles_on_overlap(busfile1: &str, busfile2: &str, outfile1: &str, outfile2: &str) {
16 let params = BusParams {cb_len: 16, umi_len: 12};
27 let mut writers: HashMap<String, BusWriterPlain> = HashMap::from([
28 (
29 "f1".to_string(),
30 BusWriterPlain::new(outfile1, params.clone()),
31 ),
32 (
33 "f2".to_string(),
34 BusWriterPlain::new(outfile2, params),
35 ),
36 ]);
37
38
39 let h: HashMap<String, _> = HashMap::from([
40 ("f1".to_string(), BusReader::new(busfile1).groupby_cbumi()),
41 ("f2".to_string(), BusReader::new(busfile2).groupby_cbumi()),
42 ]);
43 let cbumi_merge_iter = MultiIterator::new(h);
44
45 for (_cbumi, record_map) in cbumi_merge_iter {
46 if record_map.len() == 2 {
48 for (name, records) in record_map {
49 let w1 = writers.get_mut(&name).unwrap();
50 w1.write_records(&records)
51 }
52 }
53 }
54
55 }
64
65#[cfg(test)]
66mod tests {
67 use super::*;
68 use bustools_core::io::{setup_busfile, BusReader, BusRecord};
69
70 fn get_records(fname: &str) -> Vec<BusRecord> {
71 let reader = BusReader::new(fname);
72 let records: Vec<BusRecord> = reader.into_iter().collect();
73 records
74 }
75
76 #[test]
77 fn test_merge() {
78 let r1 = BusRecord { CB: 0, UMI: 21, EC: 0, COUNT: 2, FLAG: 0 };
79 let r2 = BusRecord { CB: 1, UMI: 2, EC: 0, COUNT: 12, FLAG: 0 };
80 let r3 = BusRecord { CB: 1, UMI: 3, EC: 0, COUNT: 2, FLAG: 0 };
81 let r4 = BusRecord { CB: 3, UMI: 0, EC: 0, COUNT: 2, FLAG: 0 };
82 let r5 = BusRecord { CB: 3, UMI: 0, EC: 1, COUNT: 2, FLAG: 0 };
83
84 let v1 = vec![r1.clone(), r2.clone(), r3.clone(), r4.clone(), r5.clone()];
85
86 let s2 = BusRecord { CB: 1, UMI: 2, EC: 1, COUNT: 12, FLAG: 0 };
87 let s3 = BusRecord { CB: 2, UMI: 3, EC: 1, COUNT: 2, FLAG: 0 };
88 let s4 = BusRecord { CB: 3, UMI: 0, EC: 1, COUNT: 2, FLAG: 0 };
89
90 let v2 = vec![s2.clone(), s3.clone(), s4.clone()];
91
92 let (input1, _dir1) = setup_busfile(&v1); let (input2, _dir2) = setup_busfile(&v2); let output1_path = _dir1.path().join("merge1_out.bus");
99 let output1 = output1_path.to_str().unwrap();
100 let output2_path = _dir2.path().join("merge2_out.bus");
101 let output2 = output2_path.to_str().unwrap();
102
103 merge_busfiles_on_overlap(&input1, &input2, output1, output2);
104
105 assert_eq!(get_records(output1), vec![r2, r4, r5]);
106 assert_eq!(get_records(output2), vec![s2, s4]);
107 }
108}