Skip to main content

gtars_core/models/
bed_set.rs

1use crate::models::region_set::RegionSet;
2use crate::utils::read_bedset_file;
3
4use anyhow::Result;
5use md5::{Digest, Md5};
6use std::fmt::Debug;
7use std::path::{Path, PathBuf};
8
9#[derive(Clone, Debug)]
10pub struct BedSet {
11    pub region_sets: Vec<RegionSet>,
12    pub path: Option<PathBuf>,
13}
14
15pub struct BedSetIterator<'a> {
16    bed_set: &'a BedSet,
17    index: usize,
18}
19
20impl TryFrom<&Path> for BedSet {
21    type Error = anyhow::Error;
22
23    fn try_from(value: &Path) -> Result<Self> {
24        let region_sets_paths = read_bedset_file(value)?;
25        let mut region_sets = Vec::new();
26        for region_set_path in region_sets_paths {
27            let region_set = RegionSet::try_from(region_set_path)?;
28            region_sets.push(region_set);
29        }
30        Ok(BedSet {
31            region_sets,
32            path: Some(value.to_owned()),
33        })
34    }
35}
36
37impl TryFrom<&str> for BedSet {
38    type Error = anyhow::Error;
39
40    fn try_from(value: &str) -> Result<Self> {
41        BedSet::try_from(Path::new(value))
42    }
43}
44
45impl TryFrom<String> for BedSet {
46    type Error = anyhow::Error;
47
48    fn try_from(value: String) -> Result<Self> {
49        BedSet::try_from(Path::new(&value))
50    }
51}
52
53impl TryFrom<PathBuf> for BedSet {
54    type Error = anyhow::Error;
55
56    fn try_from(value: PathBuf) -> Result<Self> {
57        BedSet::try_from(value.as_path())
58    }
59}
60
61impl TryFrom<Vec<&Path>> for BedSet {
62    type Error = anyhow::Error;
63
64    fn try_from(value: Vec<&Path>) -> Result<Self> {
65        let mut region_sets = Vec::new();
66        for region_set_path in value {
67            let region_set = RegionSet::try_from(region_set_path)?;
68            region_sets.push(region_set);
69        }
70        Ok(BedSet {
71            region_sets,
72            path: None,
73        })
74    }
75}
76
77impl TryFrom<Vec<&str>> for BedSet {
78    type Error = anyhow::Error;
79
80    fn try_from(value: Vec<&str>) -> Result<Self> {
81        let mut region_sets = Vec::new();
82        for region_set_path in value {
83            let region_set = RegionSet::try_from(region_set_path)?;
84            region_sets.push(region_set);
85        }
86        Ok(BedSet {
87            region_sets,
88            path: None,
89        })
90    }
91}
92
93impl TryFrom<Vec<String>> for BedSet {
94    type Error = anyhow::Error;
95
96    fn try_from(value: Vec<String>) -> Result<Self> {
97        let mut region_sets = Vec::new();
98        for region_set_path in value {
99            let region_set = RegionSet::try_from(region_set_path)?;
100            region_sets.push(region_set);
101        }
102        Ok(BedSet {
103            region_sets,
104            path: None,
105        })
106    }
107}
108
109impl TryFrom<Vec<PathBuf>> for BedSet {
110    type Error = anyhow::Error;
111
112    fn try_from(value: Vec<PathBuf>) -> Result<Self> {
113        let mut region_sets = Vec::new();
114        for region_set_path in value {
115            let region_set = RegionSet::try_from(region_set_path)?;
116            region_sets.push(region_set);
117        }
118        Ok(BedSet {
119            region_sets,
120            path: None,
121        })
122    }
123}
124
125impl From<Vec<RegionSet>> for BedSet {
126    fn from(region_sets: Vec<RegionSet>) -> Self {
127        BedSet {
128            region_sets,
129            path: None,
130        }
131    }
132}
133
134impl<'a> Iterator for BedSetIterator<'a> {
135    type Item = &'a RegionSet;
136
137    fn next(&mut self) -> Option<Self::Item> {
138        if self.index < self.bed_set.region_sets.len() {
139            let region_set = &self.bed_set.region_sets[self.index];
140            self.index += 1;
141            Some(region_set)
142        } else {
143            None
144        }
145    }
146}
147
148impl<'a> IntoIterator for &'a BedSet {
149    type Item = &'a RegionSet;
150    type IntoIter = BedSetIterator<'a>;
151
152    fn into_iter(self) -> Self::IntoIter {
153        BedSetIterator {
154            bed_set: self,
155            index: 0,
156        }
157    }
158}
159
160impl BedSet {
161    pub fn add(&mut self, region_set: RegionSet) {
162        self.region_sets.push(region_set);
163    }
164
165    pub fn is_empty(&self) -> bool {
166        if self.region_sets.len() == 0 {
167            return true;
168        }
169        false
170    }
171
172    pub fn len(&self) -> usize {
173        self.region_sets.len()
174    }
175
176    pub fn identifier(&self) -> String {
177        let mut bedfile_ids = Vec::new();
178        for rs in &self.region_sets {
179            let id = rs.identifier();
180            bedfile_ids.push(id);
181        }
182        bedfile_ids.sort();
183        let mut hasher = Md5::new();
184        let combined = bedfile_ids.join("");
185        hasher.update(combined.as_bytes());
186
187        let hash = hasher.finalize();
188        let bedset_digest: String = format!("{:x}", hash);
189
190        bedset_digest
191    }
192}
193
194#[cfg(test)]
195mod tests {
196    use super::*;
197    use std::fs::read_dir;
198    use std::io::Error;
199    use std::io::Write;
200    use tempfile::NamedTempFile;
201
202    fn get_test_bed_dir() -> Result<PathBuf, Error> {
203        let folder_path = std::env::current_dir()
204            .unwrap()
205            .join("../tests/data/bedset");
206
207        Ok(folder_path)
208    }
209
210    fn get_test_bed_paths() -> Vec<PathBuf> {
211        let dir = get_test_bed_dir();
212        let mut paths = vec![];
213        for entry in read_dir(dir.unwrap()).unwrap() {
214            let entry = entry.unwrap();
215            let path = entry.path();
216            paths.push(path);
217        }
218        paths.sort();
219        paths
220    }
221
222    fn write_temp_bedset_list(paths: Vec<PathBuf>) -> NamedTempFile {
223        let mut temp_file = NamedTempFile::new().expect("Failed to create temporary file");
224
225        for path in paths {
226            let abs_path = std::fs::canonicalize(&path)
227                .unwrap_or_else(|_| panic!("Failed to canonicalize path: {}", path.display()));
228            writeln!(temp_file, "{}", abs_path.display())
229                .unwrap_or_else(|_| panic!("Failed to write to temp file: {}", abs_path.display()));
230        }
231
232        temp_file
233    }
234
235    #[test]
236    fn test_open_from_file_path() {
237        let paths = get_test_bed_paths(); // Returns Vec<PathBuf>
238        let temp_file = write_temp_bedset_list(paths);
239        assert!(BedSet::try_from(temp_file.path()).is_ok());
240    }
241
242    #[test]
243    fn test_try_from_pathbuf_vec() {
244        let paths = get_test_bed_paths();
245        assert!(BedSet::try_from(paths).is_ok());
246    }
247
248    #[test]
249    fn test_try_from_str_vec() {
250        let paths = get_test_bed_paths();
251        let path_strs: Vec<String> = paths.iter().map(|p| p.to_string_lossy().into()).collect();
252        assert!(BedSet::try_from(path_strs).is_ok());
253    }
254
255    #[test]
256    fn test_try_from_path_vec() {
257        let paths = get_test_bed_paths();
258        let refs: Vec<&Path> = paths.iter().map(|p| p.as_path()).collect();
259        assert!(BedSet::try_from(refs).is_ok());
260    }
261
262    #[test]
263    fn test_bedset_add_and_len() {
264        let paths = get_test_bed_paths();
265        let mut bedset = BedSet::try_from(paths[..1].to_vec()).unwrap();
266        let len_before = bedset.len();
267
268        let additional = RegionSet::try_from(paths[1].as_path()).unwrap();
269        bedset.add(additional);
270        assert_eq!(bedset.len(), len_before + 1);
271    }
272
273    #[test]
274    fn test_bedset_is_empty() {
275        let paths = get_test_bed_paths();
276        let bedset = BedSet::try_from(paths).unwrap();
277        assert!(!bedset.is_empty());
278    }
279
280    #[test]
281    fn test_from_vec_regionset() {
282        let paths = get_test_bed_paths();
283        let region_sets: Vec<RegionSet> = paths
284            .iter()
285            .map(|p| RegionSet::try_from(p.as_path()).unwrap())
286            .collect();
287        let bedset = BedSet::from(region_sets);
288        assert_eq!(bedset.len(), paths.len());
289    }
290
291    #[test]
292    fn test_calculate_identifier() {
293        let paths = get_test_bed_paths(); // Returns Vec<PathBuf>
294        let temp_file = write_temp_bedset_list(paths);
295
296        let bs = BedSet::try_from(temp_file.path()).unwrap();
297        assert_eq!("17a10ce63638431b34e7d044c3eac186", bs.identifier());
298    }
299}