gtars_igd/
lib.rs

1#![allow(nonstandard_style)]
2
3pub mod create;
4pub mod search;
5
6#[cfg(feature = "bloom")]
7pub mod igdbloom; // Bloom filter work is only available as library functions NOT CLI
8
9#[cfg(test)]
10mod tests {
11
12    use rstest::rstest;
13
14    use crate::create::{create_igd_f, gdata_t, igd_add, igd_save_db, igd_saveT, igd_t, parse_bed};
15    // Import get_igd_info if it is defined in another module
16    use crate::search::{
17        get_file_info_tsv, get_igd_info, get_tsv_path, igd_search, igd_t_from_disk,
18    };
19
20    use std::collections::HashMap;
21    use std::path::{Path, PathBuf};
22
23    use byteorder::{LittleEndian, ReadBytesExt};
24    use std::collections::HashSet;
25    use std::fs::OpenOptions;
26    use std::io::{BufReader, Read, Seek, SeekFrom};
27
28    // IGD TESTS
29    #[rstest]
30    fn test_igd_parse_bed_file() {
31        // Given some random line from a  bed file...
32        let bed_file_string =
33            String::from("chr1	32481	32787	SRX4150706.05_peak_1	92	.	7.69231	13.22648	9.25988	155");
34
35        //Placeholder start and end values
36        let mut start = 0;
37        let mut end = 0;
38        let mut va = 0;
39
40        let result = parse_bed(&bed_file_string, &mut start, &mut end, &mut va).unwrap(); // this will return
41
42        let unwrapped_result = result.as_str();
43
44        assert_eq!(unwrapped_result, "chr1");
45
46        // Ensure start and end is modified via parse_bed
47        assert_eq!(start, 32481);
48        assert_eq!(end, 32787);
49    }
50
51    #[rstest]
52    fn test_igd_create_short_long_regions() {
53        // Depending on start and end coordinates which are divided by nbp=16384
54        // the number of tiles per ctg are adjusted, this tests to ensure they are created appropriately
55        let tempdir = tempfile::tempdir().unwrap();
56        let path = PathBuf::from(&tempdir.path());
57        let db_path_unwrapped = path.into_os_string().into_string().unwrap();
58        let db_output_path = db_path_unwrapped;
59
60        let path_to_crate = env!("CARGO_MANIFEST_DIR");
61        let testfilelists = PathBuf::from(path_to_crate)
62            .parent()
63            .unwrap()
64            .join("tests/data/igd_file_list_01/")
65            .to_string_lossy()
66            .to_string();
67
68        let demo_name = String::from("demo");
69
70        let igd = create_igd_f(&db_output_path, &testfilelists, &demo_name);
71        assert_eq!(igd.ctg[0].name, "chr1");
72        assert_eq!(igd.ctg[1].name, "chr2");
73        assert_eq!(igd.ctg[2].name, "chr3");
74        assert_eq!(igd.nctg, 3);
75
76        assert_eq!(igd.ctg[0].mTiles, 4); // chr1 has 4 Tiles because of the 32768, and 49152 starts
77        assert_eq!(igd.ctg[1].mTiles, 1); // chr only has 1 Tile due to the 200 start
78
79        assert_eq!(igd.ctg[0].gTile[0].gList[0].start, 1); // look specific tile's start
80        assert_eq!(
81            igd.ctg[0].gTile[(igd.ctg[0].mTiles - 1) as usize].gList[0].start,
82            49152
83        ); // look specific tile's start
84
85        assert_eq!(igd.ctg[0].gTile[0].nCnts, 2); // look at nCnts
86        assert_eq!(igd.ctg[0].gTile[1].nCnts, 0); // look at nCnts
87        assert_eq!(igd.ctg[0].gTile[2].nCnts, 1); // look at nCnts
88
89        // Overall stats
90        assert_eq!(igd.total_regions, 8);
91        assert_eq!(igd.total_average, 998.0);
92        assert_eq!(igd.average_length, 124.75);
93    }
94
95    #[rstest]
96    fn test_igd_create_then_load_from_disk() {
97        // Depending on start and end coordinates which are divided by nbp=16384
98        // the number of tiles per ctg are adjusted, this tests to ensure they are created appropriately
99        let tempdir = tempfile::tempdir().unwrap();
100        let path = PathBuf::from(&tempdir.path());
101        let mut db_path_unwrapped = path.into_os_string().into_string().unwrap();
102        db_path_unwrapped.push('/');
103        let db_output_path = db_path_unwrapped.clone();
104
105        let path_to_crate = env!("CARGO_MANIFEST_DIR");
106        let testfilelists = PathBuf::from(path_to_crate)
107            .parent()
108            .unwrap()
109            .join("tests/data/igd_file_list_01/")
110            .to_string_lossy()
111            .to_string();
112
113        let demo_name = String::from("demo");
114
115        let igd_saved = create_igd_f(&db_output_path, &testfilelists, &demo_name);
116
117        println!("dboutput_path {}", db_output_path);
118
119        db_path_unwrapped.push_str("/demo.igd");
120
121        let mut hash_table: HashMap<String, i32> = HashMap::new();
122
123        // Create IGD Struct from database
124        let mut igd_from_disk: igd_t_from_disk =
125            get_igd_info(&db_path_unwrapped, &mut hash_table).expect("Could not open IGD");
126        let tsv_path = get_tsv_path(db_path_unwrapped.as_str()).unwrap();
127        get_file_info_tsv(tsv_path, &mut igd_from_disk).unwrap(); //sets igd.finfo
128
129        assert_eq!(igd_saved.ctg.len(), igd_from_disk.nCtg as usize);
130
131        assert_eq!(igd_from_disk.nFiles, 1);
132
133        assert_eq!(
134            igd_from_disk.nCnt[0].len(),
135            igd_saved.ctg[0].mTiles as usize
136        );
137        assert_eq!(
138            igd_from_disk.nCnt[1].len(),
139            igd_saved.ctg[1].mTiles as usize
140        );
141        assert_eq!(
142            igd_from_disk.nCnt[2].len(),
143            igd_saved.ctg[2].mTiles as usize
144        );
145
146        assert_eq!(igd_from_disk.nCnt[0][0], igd_saved.ctg[0].gTile[0].nCnts);
147        assert_eq!(igd_from_disk.nCnt[0][1], igd_saved.ctg[0].gTile[1].nCnts);
148        assert_eq!(igd_from_disk.nCnt[0][2], igd_saved.ctg[0].gTile[2].nCnts);
149        assert_eq!(igd_from_disk.nCnt[0][3], igd_saved.ctg[0].gTile[3].nCnts);
150
151        // Check to see if the regions on disk are the same as the original igd (minus the unused zeros)
152        let dbpath = std::path::Path::new(&db_path_unwrapped);
153        let db_file = OpenOptions::new()
154            .create(true)
155            .append(true)
156            .read(true)
157            .open(dbpath)
158            .unwrap();
159        let mut db_reader = BufReader::new(db_file);
160
161        for k in 0..3 {
162            let nCnt_len = igd_from_disk.nCnt[k].len();
163
164            for l in 0..nCnt_len {
165                let mut a: HashSet<i32> = Default::default();
166                let mut b: HashSet<i32> = Default::default();
167
168                let tmpi = igd_from_disk.nCnt[k][l]; // number of gdata_t to read
169
170                //println!("Here is k {}, l {}, and igd_from_disk.tIdx[k][l] {}",k,l, igd_from_disk.tIdx[k][l]);
171                db_reader
172                    .seek(SeekFrom::Start(igd_from_disk.tIdx[k][l] as u64)) // [k]contig [l] tile position
173                    .unwrap();
174
175                let mut gData: Vec<gdata_t> = Vec::new();
176
177                //println!("Creating gData with tmpi {}", tmpi);
178                for _j in 0..tmpi {
179                    gData.push(gdata_t::default())
180                }
181
182                for i in 0..tmpi {
183                    // number of gdata_t to read
184                    //println!("Iterating with i {} of tmpi {} ",i,tmpi);
185                    let mut buf = [0u8; 16];
186
187                    let n = db_reader.read(&mut buf).unwrap();
188
189                    if n == 0 {
190                        //println!("Breaking loop while reading tempfile");
191                        break;
192                    } else if n != 16 {
193                        //panic!("Cannot read temp file.");
194                        break;
195                    }
196
197                    let mut rdr = &buf[..] as &[u8];
198                    let idx = rdr.read_i32::<LittleEndian>().unwrap();
199                    let start = rdr.read_i32::<LittleEndian>().unwrap();
200                    let end = rdr.read_i32::<LittleEndian>().unwrap();
201                    let value = rdr.read_i32::<LittleEndian>().unwrap();
202
203                    //println!("Looping through g_datat in temp files");
204                    //println!("Chr_name: {} Filename: {}  start: {} end: {}", igd_from_disk.cName[k], igd_from_disk.file_info[idx as usize].fileName, start, end);
205
206                    gData[i as usize] = gdata_t {
207                        idx,
208                        start,
209                        end,
210                        value,
211                    };
212                }
213
214                //println!("here is k {}, l {}",k,l);
215                for g in gData.iter() {
216                    //println!("Inserting {} from gData on Disk", g.start);
217                    a.insert(g.start);
218                }
219
220                for g in igd_saved.ctg[k].gTile[l].gList.iter() {
221                    //println!("Inserting {} from original gList ", g.start);
222                    b.insert(g.start);
223                }
224                //println!("A: {:?}", a);
225                //println!("B: {:?}", b);
226                // There difference should at most be a 0 from unused tiles, therefore the difference length should at MOST be 1.
227                let diff = b.difference(&a).collect::<Vec<&i32>>();
228                //println!("Difference: {:?}", diff);
229                assert!(diff.len() <= 1)
230            }
231        }
232    }
233
234    #[rstest]
235    fn test_igd_create_removes_temp_dir() {
236        let tempdir = tempfile::tempdir().unwrap();
237        let path = PathBuf::from(&tempdir.path());
238        let mut db_path_unwrapped = path.into_os_string().into_string().unwrap();
239        db_path_unwrapped.push('/');
240        let db_output_path = db_path_unwrapped.clone();
241
242        let path_to_crate = env!("CARGO_MANIFEST_DIR");
243        let testfilelists = PathBuf::from(path_to_crate)
244            .parent()
245            .unwrap()
246            .join("tests/data/igd_file_list_01/")
247            .to_string_lossy()
248            .to_string();
249
250        let demo_name = String::from("demo");
251
252        let _igd_saved = create_igd_f(&db_output_path, &testfilelists, &demo_name);
253
254        let temp_folder = format!("{}{}", db_output_path, "data0/");
255        let path = Path::new(&temp_folder);
256
257        // Assert path does not exist
258        assert!(!path.exists());
259    }
260
261    #[rstest]
262    #[case(
263        "/../tests/data/igd_file_list_01/",
264        "/../tests/data/igd_query_files/query1.bed",
265        8,
266        8
267    )]
268    // #[case(
269    //     "/tests/data/igd_file_list_02/",
270    //     "/tests/data/igd_query_files/query2.bed",
271    //     4,
272    //     1
273    // )]
274    fn test_igd_create_then_search(
275        #[case] input: &str,
276        #[case] query_file: &str,
277        #[case] expected_regions: u32,
278        #[case] expected_hits: u32,
279    ) {
280        let tempdir = tempfile::tempdir().unwrap();
281        let path = PathBuf::from(&tempdir.path());
282        let mut db_path_unwrapped = path.into_os_string().into_string().unwrap();
283        db_path_unwrapped.push('/');
284        let db_output_path = db_path_unwrapped.clone();
285
286        let path_to_crate = env!("CARGO_MANIFEST_DIR");
287        let testfilelists = PathBuf::from(path_to_crate)
288            .parent()
289            .unwrap()
290            .join("tests")
291            .join(input.trim_start_matches('/'))
292            .to_string_lossy()
293            .to_string();
294
295        let demo_name = String::from("demo");
296
297        let _igd_saved = create_igd_f(&db_output_path, &testfilelists, &demo_name);
298
299        println!("dboutput_path {}", db_output_path);
300
301        db_path_unwrapped.push_str("/demo.igd");
302
303        let queryfile = PathBuf::from(path_to_crate)
304            .parent()
305            .unwrap()
306            .join("tests")
307            .join(query_file.trim_start_matches('/'))
308            .to_string_lossy()
309            .to_string();
310        let res = igd_search(&db_path_unwrapped, &queryfile).expect("Error during testing:");
311        let mut res_iter = res[1].split('\t');
312
313        // Skip the first two columns
314        res_iter.next().unwrap();
315
316        // Extract the third and fourth columns
317        let second_column = res_iter.next().unwrap().to_string();
318        let third_column = res_iter.next().unwrap().to_string();
319
320        println!("Number of Regions: {}", second_column);
321        println!("Number of Hits: {}", third_column);
322
323        assert_eq!(second_column, expected_regions.to_string());
324        assert_eq!(third_column, expected_hits.to_string());
325    }
326
327    #[rstest]
328    fn test_igd_add() {
329        // First create a new igd struct
330
331        let mut igd = igd_t::new();
332        // create hash table
333        let mut hash_table: HashMap<String, i32> = HashMap::new();
334
335        // Set values of struct
336        igd.gType = 1;
337        igd.nbp = 16384; // from og code tile_size = 16384;  -> this is the bin size (2^14) from the original paper
338        igd.nctg = 0;
339        igd.mctg = 32;
340        igd.total = 0;
341
342        // Given some random line from a bed file...
343        let bed_file_string =
344            String::from("chr1	32481	32787	SRX4150706.05_peak_1	92	.	7.69231	13.22648	9.25988	155");
345        //Placeholder start and end values
346        let mut start = 0;
347        let mut end = 0;
348        let mut va = 0;
349
350        // We've now parsed to get the chromosome and the new start and end of the current contig.
351        let result = parse_bed(&bed_file_string, &mut start, &mut end, &mut va).unwrap();
352        let chromosome = result;
353
354        // Add to the database (hash table)
355        igd_add(&mut igd, &mut hash_table, chromosome, start, end, 0, 0);
356    }
357
358    #[rstest]
359    fn test_igd_saving() {
360        let mut igd = igd_t::new();
361        // create hash table
362        let mut hash_table: HashMap<String, i32> = HashMap::new();
363
364        // Set values of struct
365        igd.gType = 1;
366        igd.nbp = 16384; // from og code tile_size = 16384;  -> this is the bin size (2^14) from the original paper
367        igd.nctg = 0;
368        igd.mctg = 32;
369        igd.total = 0;
370
371        // Given some random line from a bed file...
372        let bed_file_string =
373            String::from("chr1	32481	32787	SRX4150706.05_peak_1	92	.	7.69231	13.22648	9.25988	155");
374        //Placeholder start and end values
375        let mut start = 0;
376        let mut end = 0;
377        let mut va = 0;
378
379        // We've now parsed to get the chromosome and the new start and end of the current contig.
380        let result = parse_bed(&bed_file_string, &mut start, &mut end, &mut va).unwrap();
381        let chromosome = result;
382
383        // Add to the database (hash table)
384        igd_add(&mut igd, &mut hash_table, chromosome, start, end, 0, 0);
385
386        let tempdir = tempfile::tempdir().unwrap();
387        let path = PathBuf::from(&tempdir.path());
388
389        // For some reason, you cannot chain .as_string() to .unwrap() and must create a new line.
390        let db_path_unwrapped = path.into_os_string().into_string().unwrap();
391        let db_output_path = &db_path_unwrapped;
392
393        // First test igd_saveT
394        igd_saveT(&mut igd, db_output_path);
395
396        // then test saveing main databse
397
398        igd_save_db(&mut igd, db_output_path, &String::from("randomname"));
399    }
400}