Skip to main content

gtars_core/
lib.rs

1//!
2//! # Common, core utilities for `gtars`
3//! This module contains core utilities across the `gtars` crate. While possible, it's usually not interfaced with directly
4//! unless interacting with any of the [models].
5//!
6//! ## Examples
7//! ### Create region set
8//! ```rust
9//! use std::path::Path;
10//! use gtars_core::models::RegionSet;
11//!
12//! let path_to_tokenize_bed_file = "../tests/data/to_tokenize.bed";
13//! let rs = RegionSet::try_from(Path::new(path_to_tokenize_bed_file)).unwrap();
14//!
15//! println!("{:?}", rs.regions);
16//! ```
17//!
18
19pub mod consts;
20pub mod errors;
21pub mod models;
22pub mod utils;
23
24#[cfg(test)]
25mod tests {
26    use pretty_assertions::assert_eq;
27    use rstest::*;
28    use tempfile::NamedTempFile;
29
30    use super::models::{Region, RegionSet};
31    use std::io::Read;
32    use std::path::Path;
33
34    #[fixture]
35    fn path_to_data() -> &'static str {
36        "../tests/data"
37    }
38
39    #[fixture]
40    fn path_to_bed_file() -> &'static str {
41        "../tests/data/tokenizers/peaks.bed"
42    }
43
44    #[fixture]
45    fn path_to_bed_file_gzipped() -> &'static str {
46        "../tests/data/tokenizers/peaks.bed.gz"
47    }
48
49    #[fixture]
50    fn path_to_anndata_file() -> &'static str {
51        "../tests/data/pbmc_hg38.h5ad"
52    }
53
54    #[fixture]
55    fn path_to_r2v_repo() -> &'static str {
56        "databio/r2v-luecken2021-hg38-v2"
57    }
58
59    #[fixture]
60    fn bb_bed_id() -> &'static str {
61        "fa09672b962809b408b356728d81640e"
62    }
63
64    #[fixture]
65    fn path_to_gtok_file() -> &'static str {
66        "../tests/data/out/tokens.gtok"
67    }
68
69    #[rstest]
70    fn test_region() {
71        let region = Region {
72            chr: "chr1".to_string(),
73            start: 100,
74            end: 200,
75            rest: None,
76        };
77
78        assert_eq!(region.chr, "chr1");
79        assert_eq!(region.start, 100);
80        assert_eq!(region.end, 200);
81    }
82
83    #[rstest]
84    fn test_extract_regions_from_bed_file(path_to_bed_file: &str) {
85        let path = Path::new(path_to_bed_file);
86        let regions = RegionSet::try_from(path).unwrap().regions;
87        assert!(regions.len() == 25);
88    }
89
90    #[rstest]
91    fn test_extract_regions_from_bed_file_gzipped(path_to_bed_file_gzipped: &str) {
92        let path = Path::new(path_to_bed_file_gzipped);
93        let regions = RegionSet::try_from(path).unwrap().regions;
94        assert_eq!(regions.len(), 25);
95    }
96
97    #[rstest]
98    fn test_region_set_from_bed(path_to_bed_file: &str) {
99        let path = Path::new(path_to_bed_file);
100        let rs = RegionSet::try_from(path).unwrap();
101
102        assert_eq!(rs.len(), 25);
103    }
104
105    #[rstest]
106    fn test_region_set_from_bytes(path_to_bed_file: &str) {
107        let path = Path::new(path_to_bed_file);
108        let rs = RegionSet::try_from(path).unwrap();
109
110        let mut bytes: Vec<u8> = Vec::new();
111
112        std::fs::File::open(path)
113            .unwrap()
114            .read_to_end(&mut bytes)
115            .unwrap();
116
117        let rs2 = RegionSet::from(bytes.as_slice());
118
119        assert_eq!(rs2.len(), rs.len());
120    }
121
122    #[rstest]
123    fn test_region_set_to_bed(path_to_bed_file: &str) {
124        let path = Path::new(path_to_bed_file);
125        let rs = RegionSet::try_from(path).unwrap();
126
127        // create a temporary file
128        let tmp_file = NamedTempFile::new().unwrap();
129        let tmp_path = tmp_file.into_temp_path();
130        let tmp_path = Path::new(tmp_path.to_str().unwrap());
131
132        // write the region set to the temporary file
133        rs.to_bed(tmp_path).unwrap();
134
135        // read the temporary file back in as a region set
136        let rs2 = RegionSet::try_from(tmp_path).unwrap();
137
138        assert_eq!(rs2.len(), 25);
139    }
140}