gtars_core/
lib.rs

1//!
2//! # Common, core utilities for `gtars`
3//! This module contains core utilities across the `gtars` crate. While possible, it's usually not interfaced with directly
4//! unless interacting with any of the [models].
5//!
6//! ## Examples
7//! ### Create region set
8//! ```rust
9//! use std::path::Path;
10//! use gtars_core::models::RegionSet;
11//!
12//! let path_to_tokenize_bed_file = "../tests/data/to_tokenize.bed";
13//! let rs = RegionSet::try_from(Path::new(path_to_tokenize_bed_file)).unwrap();
14//!
15//! println!("{:?}", rs.regions);
16//! ```
17//!
18
19pub mod consts;
20pub mod models;
21pub mod utils;
22
23#[cfg(test)]
24mod tests {
25    use pretty_assertions::assert_eq;
26    use rstest::*;
27    use tempfile::NamedTempFile;
28
29    use super::models::{Region, RegionSet};
30    use std::io::Read;
31    use std::path::Path;
32
33    #[fixture]
34    fn path_to_data() -> &'static str {
35        "../tests/data"
36    }
37
38    #[fixture]
39    fn path_to_bed_file() -> &'static str {
40        "../tests/data/tokenizers/peaks.bed"
41    }
42
43    #[fixture]
44    fn path_to_bed_file_gzipped() -> &'static str {
45        "../tests/data/tokenizers/peaks.bed.gz"
46    }
47
48    #[fixture]
49    fn path_to_anndata_file() -> &'static str {
50        "../tests/data/pbmc_hg38.h5ad"
51    }
52
53    #[fixture]
54    fn path_to_r2v_repo() -> &'static str {
55        "databio/r2v-luecken2021-hg38-v2"
56    }
57
58    #[fixture]
59    fn bb_bed_id() -> &'static str {
60        "fa09672b962809b408b356728d81640e"
61    }
62
63    #[fixture]
64    fn path_to_gtok_file() -> &'static str {
65        "../tests/data/out/tokens.gtok"
66    }
67
68    #[rstest]
69    fn test_region() {
70        let region = Region {
71            chr: "chr1".to_string(),
72            start: 100,
73            end: 200,
74            rest: None,
75        };
76
77        assert_eq!(region.chr, "chr1");
78        assert_eq!(region.start, 100);
79        assert_eq!(region.end, 200);
80    }
81
82    #[rstest]
83    fn test_extract_regions_from_bed_file(path_to_bed_file: &str) {
84        let path = Path::new(path_to_bed_file);
85        let regions = RegionSet::try_from(path).unwrap().regions;
86        assert!(regions.len() == 25);
87    }
88
89    #[rstest]
90    fn test_extract_regions_from_bed_file_gzipped(path_to_bed_file_gzipped: &str) {
91        let path = Path::new(path_to_bed_file_gzipped);
92        let regions = RegionSet::try_from(path).unwrap().regions;
93        assert_eq!(regions.len(), 25);
94    }
95
96    #[rstest]
97    fn test_region_set_from_bed(path_to_bed_file: &str) {
98        let path = Path::new(path_to_bed_file);
99        let rs = RegionSet::try_from(path).unwrap();
100
101        assert_eq!(rs.len(), 25);
102    }
103
104    #[rstest]
105    fn test_region_set_from_bytes(path_to_bed_file: &str) {
106        let path = Path::new(path_to_bed_file);
107        let rs = RegionSet::try_from(path).unwrap();
108
109        let mut bytes: Vec<u8> = Vec::new();
110
111        std::fs::File::open(path)
112            .unwrap()
113            .read_to_end(&mut bytes)
114            .unwrap();
115
116        let rs2 = RegionSet::from(bytes.as_slice());
117
118        assert_eq!(rs2.len(), rs.len());
119    }
120
121    #[rstest]
122    fn test_region_set_to_bed(path_to_bed_file: &str) {
123        let path = Path::new(path_to_bed_file);
124        let rs = RegionSet::try_from(path).unwrap();
125
126        // create a temporary file
127        let tmp_file = NamedTempFile::new().unwrap();
128        let tmp_path = tmp_file.into_temp_path();
129        let tmp_path = Path::new(tmp_path.to_str().unwrap());
130
131        // write the region set to the temporary file
132        rs.to_bed(tmp_path).unwrap();
133
134        // read the temporary file back in as a region set
135        let rs2 = RegionSet::try_from(tmp_path).unwrap();
136
137        assert_eq!(rs2.len(), 25);
138    }
139}