1use std::collections::HashMap;
2use std::ffi::OsStr;
3use std::fs::File;
4use std::io::prelude::*;
5use std::io::{BufRead, BufReader, Cursor};
6use std::path::{Path, PathBuf};
7use std::str::FromStr;
8
9use anyhow::{Context, Result};
10use flate2::read::{GzDecoder, MultiGzDecoder};
11use std::error::Error;
12#[cfg(feature = "http")]
13use ureq::{get, Error as UreqError};
14
15use crate::models::region::Region;
16
17#[derive(Debug, Clone)]
18#[allow(clippy::upper_case_acronyms)]
19pub enum FileType {
20 BED,
21 BAM,
22 NARROWPEAK,
23 UNKNOWN, }
25
26impl FromStr for FileType {
27 type Err = String;
28
29 fn from_str(s: &str) -> Result<Self, Self::Err> {
30 match s.to_lowercase().as_str() {
31 "bed" => Ok(FileType::BED),
32 "bam" => Ok(FileType::BAM),
33 "narrowpeak" => Ok(FileType::NARROWPEAK),
34 _ => Ok(FileType::UNKNOWN), }
37 }
38}
39
40pub struct FileInfo {
41 pub file_type: FileType,
42 pub is_gzipped: bool,
43}
44
45pub fn get_file_info(path: &Path) -> FileInfo {
46 let mut file_type = FileType::UNKNOWN;
47 let mut is_gzipped = false;
48
49 if let Some(os_str_filename) = path.file_name() {
50 if let Some(filename) = os_str_filename.to_str() {
51 if filename.ends_with(".gz") {
53 is_gzipped = true;
54 if let Some(base_filename) = filename.strip_suffix(".gz") {
55 if let Some(ext) = PathBuf::from(base_filename)
57 .extension()
58 .and_then(|e| e.to_str())
59 {
60 file_type = FileType::from_str(ext).unwrap_or(FileType::UNKNOWN);
61 } else {
62 file_type = FileType::from_str(base_filename).unwrap_or(FileType::UNKNOWN);
66 }
67 }
68 } else {
69 if let Some(ext) = path.extension().and_then(|e| e.to_str()) {
71 file_type = FileType::from_str(ext).unwrap_or(FileType::UNKNOWN);
72 }
73 }
74 }
75 }
76
77 FileInfo {
78 file_type,
79 is_gzipped,
80 }
81}
82
83pub fn parse_bedlike_file(line: &str) -> Option<(String, i32, i32)> {
86 let mut fields = line.split('\t');
87 let ctg = fields.next()?;
89 let st = fields
91 .next()
92 .and_then(|s| s.parse::<i32>().ok())
93 .unwrap_or(-1);
94 let en = fields
95 .next()
96 .and_then(|s| s.parse::<i32>().ok())
97 .unwrap_or(-1);
98
99 Some((ctg.parse().unwrap(), st, en))
103}
104
105pub fn get_dynamic_reader(path: &Path) -> Result<BufReader<Box<dyn Read>>> {
113 let is_gzipped = path.extension() == Some(OsStr::new("gz"));
114 let file = File::open(path).with_context(|| format!("Failed to open file: {:?}", path))?;
115 let file: Box<dyn Read> = match is_gzipped {
116 true => Box::new(MultiGzDecoder::new(file)),
117 false => Box::new(file),
118 };
119
120 let reader = BufReader::new(file);
121
122 Ok(reader)
123}
124
125#[cfg(feature = "http")]
133pub fn get_dynamic_reader_from_url(
134 url: &Path,
135) -> Result<BufReader<Box<dyn std::io::Read>>, Box<dyn Error>> {
136 let mut url_str = url
137 .to_str()
138 .ok_or_else(|| "URL path is not valid UTF-8")?
139 .to_string();
140
141 let is_ftp = url_str.starts_with("ftp://");
142 if is_ftp {
143 println!("ftp is not fully implemented. Bugs could appear");
144 url_str = url_str.replacen("ftp://", "http://", 1);
145 }
146
147 let response = match get(&url_str).call() {
149 Ok(resp) => resp,
150 Err(UreqError::StatusCode(code)) => {
151 return Err(format!("HTTP status {} when fetching {}", code, url_str).into())
152 }
153 Err(e) => return Err(format!("Request error when fetching {}: {}", url_str, e).into()),
154 };
155
156 let mut bytes = Vec::new();
158 response
159 .into_body()
160 .into_reader()
161 .read_to_end(&mut bytes)
162 .map_err(|e| format!("Failed reading response body from {}: {}", url_str, e))?;
163
164 let cursor = Cursor::new(bytes);
165
166 let is_gzipped = url_str.ends_with(".gz");
167
168 let reader: Box<dyn std::io::Read> = match is_gzipped {
169 true => Box::new(GzDecoder::new(cursor)),
170 false => Box::new(cursor),
171 };
172
173 Ok(BufReader::new(reader))
174}
175
176pub fn get_dynamic_reader_w_stdin(file_path_str: &str) -> Result<BufReader<Box<dyn Read>>> {
186 if file_path_str == "-" {
187 Ok(BufReader::new(Box::new(std::io::stdin()) as Box<dyn Read>))
188 } else {
189 let file_path = Path::new(file_path_str);
190 get_dynamic_reader(file_path)
191 }
192}
193
194pub fn generate_region_to_id_map(regions: &[Region]) -> HashMap<Region, u32> {
200 let mut current_id = 0;
201 let mut region_to_id: HashMap<Region, u32> = HashMap::new();
202 for region in regions.iter() {
203 region_to_id.entry(region.to_owned()).or_insert_with(|| {
204 let old_id = current_id;
205 current_id += 1;
206 old_id
207 });
208 }
209
210 region_to_id
211}
212
213pub fn generate_id_to_region_map(regions: &[Region]) -> HashMap<u32, Region> {
219 let mut current_id = 0;
220 let mut id_to_region: HashMap<u32, Region> = HashMap::new();
221
222 for region in regions.iter() {
223 id_to_region.entry(current_id).or_insert_with(|| {
224 current_id += 1;
225 region.clone()
226 });
227 }
228
229 id_to_region
230}
231
232pub fn generate_region_string_to_id_map(regions: &[String]) -> HashMap<String, u32> {
238 let mut current_id = 0;
239 let mut region_to_id: HashMap<String, u32> = HashMap::new();
240 for region in regions.iter() {
241 region_to_id.entry(region.to_owned()).or_insert_with(|| {
242 let old_id = current_id;
243 current_id += 1;
244 old_id
245 });
246 }
247
248 region_to_id
249}
250
251pub fn generate_id_to_region_string_map(regions: &[String]) -> HashMap<u32, String> {
257 let mut current_id = 0;
258 let mut id_to_region: HashMap<u32, String> = HashMap::new();
259
260 for region in regions.iter() {
261 id_to_region.entry(current_id).or_insert_with(|| {
262 current_id += 1;
263 region.clone()
264 });
265 }
266
267 id_to_region
268}
269
270pub fn get_chrom_sizes<T: AsRef<Path>>(path: T) -> HashMap<String, u32> {
271 let chrom_sizes_file = File::open(path.as_ref())
272 .with_context(|| "Failed to open chrom sizes file.")
273 .unwrap();
274
275 let mut chrom_sizes: HashMap<String, u32> = HashMap::new();
276
277 let file_buf = BufReader::new(chrom_sizes_file);
278
279 for line in file_buf.lines() {
280 let line_string: String = match line {
281 Ok(value) => value,
282 Err(_) => panic!("Error while reading chrom sizes file"),
283 };
284
285 let line_parts: Vec<String> = line_string
286 .split_whitespace()
287 .map(|s| s.to_string())
288 .collect();
289
290 chrom_sizes.insert(line_parts[0].clone(), line_parts[1].parse::<u32>().unwrap());
291 }
292
293 chrom_sizes
294}
295
296pub fn generate_ordering_map_for_universe_regions<T: AsRef<Path>>(
299 path: T,
300) -> Result<HashMap<Region, f64>> {
301 let mut map = HashMap::new();
302
303 let reader = get_dynamic_reader(path.as_ref())?;
304
305 for line in reader.lines() {
306 let line = line?;
307 let parts: Vec<&str> = line.split('\t').collect();
308
309 if parts.len() < 5 {
310 anyhow::bail!("BED file line does not have at least 5 fields: {}. It needs to have chr, start, end, name, and score.", line);
311 }
312
313 let chr = parts[0];
315 let start = parts[1].parse::<u32>().with_context(|| {
316 format!("Failed to parse start position in BED file line: {}", line)
317 })?;
318
319 let end = parts[2]
320 .parse::<u32>()
321 .with_context(|| format!("Failed to parse end position in BED file line: {}", line))?;
322
323 let score = parts[4]
324 .parse::<f64>()
325 .with_context(|| format!("Failed to parse score in BED file line: {}", line))?;
326
327 let rest = Some(parts[3..].join("\t")).filter(|s| !s.is_empty());
328
329 let region = Region {
330 chr: chr.to_owned(),
331 start,
332 end,
333 rest,
334 };
335
336 map.insert(region, score);
337 }
338
339 Ok(map)
340}
341
342pub fn read_bedset_file<P: AsRef<Path>>(file_path: P) -> Result<Vec<String>> {
343 let file = File::open(file_path)?;
344 let reader = BufReader::new(file);
345
346 let bed_identifiers = reader
347 .lines()
348 .map(|line| line.map(|s| s.trim().to_string()))
349 .collect::<Result<Vec<_>, _>>()?;
350
351 Ok(bed_identifiers)
352}
353
354pub fn remove_all_extensions(path: &Path) -> String {
355 let mut stem = path.file_stem().unwrap().to_string_lossy().to_string();
356
357 let mut parent_path = path.with_file_name(stem.clone());
358 while let Some(_extension) = parent_path.extension() {
359 parent_path = parent_path.with_extension("");
361 stem = parent_path
362 .file_stem()
363 .unwrap()
364 .to_string_lossy()
365 .to_string();
366 }
367
368 stem
369}