1use std::collections::HashMap;
2use std::ffi::OsStr;
3use std::fs::File;
4use std::io::prelude::*;
5#[cfg(feature = "http")]
6use std::io::Cursor;
7use std::io::{BufRead, BufReader};
8use std::path::{Path, PathBuf};
9use std::str::FromStr;
10
11use anyhow::{Context, Result};
12#[cfg(feature = "http")]
13use flate2::read::GzDecoder;
14use flate2::read::MultiGzDecoder;
15use std::error::Error;
16#[cfg(feature = "http")]
17use ureq::{get, Error as UreqError};
18
19use crate::models::region::Region;
20
21#[derive(Debug, Clone)]
22#[allow(clippy::upper_case_acronyms)]
23pub enum FileType {
24 BED,
25 BAM,
26 NARROWPEAK,
27 UNKNOWN, }
29
30impl FromStr for FileType {
31 type Err = String;
32
33 fn from_str(s: &str) -> Result<Self, Self::Err> {
34 match s.to_lowercase().as_str() {
35 "bed" => Ok(FileType::BED),
36 "bam" => Ok(FileType::BAM),
37 "narrowpeak" => Ok(FileType::NARROWPEAK),
38 _ => Ok(FileType::UNKNOWN), }
41 }
42}
43
44pub struct FileInfo {
45 pub file_type: FileType,
46 pub is_gzipped: bool,
47}
48
49pub fn get_file_info(path: &Path) -> FileInfo {
50 let mut file_type = FileType::UNKNOWN;
51 let mut is_gzipped = false;
52
53 if let Some(os_str_filename) = path.file_name() {
54 if let Some(filename) = os_str_filename.to_str() {
55 if filename.ends_with(".gz") {
57 is_gzipped = true;
58 if let Some(base_filename) = filename.strip_suffix(".gz") {
59 if let Some(ext) = PathBuf::from(base_filename)
61 .extension()
62 .and_then(|e| e.to_str())
63 {
64 file_type = FileType::from_str(ext).unwrap_or(FileType::UNKNOWN);
65 } else {
66 file_type = FileType::from_str(base_filename).unwrap_or(FileType::UNKNOWN);
70 }
71 }
72 } else {
73 if let Some(ext) = path.extension().and_then(|e| e.to_str()) {
75 file_type = FileType::from_str(ext).unwrap_or(FileType::UNKNOWN);
76 }
77 }
78 }
79 }
80
81 FileInfo {
82 file_type,
83 is_gzipped,
84 }
85}
86
87pub fn parse_bedlike_file(line: &str) -> Option<(String, i32, i32)> {
90 let mut fields = line.split('\t');
91 let ctg = fields.next()?;
93 let st = fields
95 .next()
96 .and_then(|s| s.parse::<i32>().ok())
97 .unwrap_or(-1);
98 let en = fields
99 .next()
100 .and_then(|s| s.parse::<i32>().ok())
101 .unwrap_or(-1);
102
103 Some((ctg.parse().unwrap(), st, en))
107}
108
109pub fn get_dynamic_reader(path: &Path) -> Result<BufReader<Box<dyn Read>>> {
117 let is_gzipped = path.extension() == Some(OsStr::new("gz"));
118 let file = File::open(path).with_context(|| format!("Failed to open file: {:?}", path))?;
119 let file: Box<dyn Read> = match is_gzipped {
120 true => Box::new(MultiGzDecoder::new(file)),
121 false => Box::new(file),
122 };
123
124 let reader = BufReader::new(file);
125
126 Ok(reader)
127}
128
129#[cfg(feature = "http")]
137pub fn get_dynamic_reader_from_url(
138 url: &Path,
139) -> Result<BufReader<Box<dyn std::io::Read>>, Box<dyn Error>> {
140 let mut url_str = url
141 .to_str()
142 .ok_or_else(|| "URL path is not valid UTF-8")?
143 .to_string();
144
145 let is_ftp = url_str.starts_with("ftp://");
146 if is_ftp {
147 println!("ftp is not fully implemented. Bugs could appear");
148 url_str = url_str.replacen("ftp://", "https://", 1);
149 }
150
151 let response = match get(&url_str).call() {
153 Ok(resp) => resp,
154 Err(UreqError::StatusCode(code)) => {
155 return Err(format!("HTTP status {} when fetching {}", code, url_str).into())
156 }
157 Err(e) => return Err(format!("Request error when fetching {}: {}", url_str, e).into()),
158 };
159
160 let mut bytes = Vec::new();
162 response
163 .into_body()
164 .into_reader()
165 .read_to_end(&mut bytes)
166 .map_err(|e| format!("Failed reading response body from {}: {}", url_str, e))?;
167
168 let cursor = Cursor::new(bytes);
169
170 let is_gzipped = url_str.ends_with(".gz");
171
172 let reader: Box<dyn std::io::Read> = match is_gzipped {
173 true => Box::new(GzDecoder::new(cursor)),
174 false => Box::new(cursor),
175 };
176
177 Ok(BufReader::new(reader))
178}
179
180pub fn get_dynamic_reader_w_stdin(file_path_str: &str) -> Result<BufReader<Box<dyn Read>>> {
190 if file_path_str == "-" {
191 Ok(BufReader::new(Box::new(std::io::stdin()) as Box<dyn Read>))
192 } else {
193 let file_path = Path::new(file_path_str);
194 get_dynamic_reader(file_path)
195 }
196}
197
198pub fn generate_region_to_id_map(regions: &[Region]) -> HashMap<Region, u32> {
204 let mut current_id = 0;
205 let mut region_to_id: HashMap<Region, u32> = HashMap::new();
206 for region in regions.iter() {
207 region_to_id.entry(region.to_owned()).or_insert_with(|| {
208 let old_id = current_id;
209 current_id += 1;
210 old_id
211 });
212 }
213
214 region_to_id
215}
216
217pub fn generate_id_to_region_map(regions: &[Region]) -> HashMap<u32, Region> {
223 let mut current_id = 0;
224 let mut id_to_region: HashMap<u32, Region> = HashMap::new();
225
226 for region in regions.iter() {
227 id_to_region.entry(current_id).or_insert_with(|| {
228 current_id += 1;
229 region.clone()
230 });
231 }
232
233 id_to_region
234}
235
236pub fn generate_region_string_to_id_map(regions: &[String]) -> HashMap<String, u32> {
242 let mut current_id = 0;
243 let mut region_to_id: HashMap<String, u32> = HashMap::new();
244 for region in regions.iter() {
245 region_to_id.entry(region.to_owned()).or_insert_with(|| {
246 let old_id = current_id;
247 current_id += 1;
248 old_id
249 });
250 }
251
252 region_to_id
253}
254
255pub fn generate_id_to_region_string_map(regions: &[String]) -> HashMap<u32, String> {
261 let mut current_id = 0;
262 let mut id_to_region: HashMap<u32, String> = HashMap::new();
263
264 for region in regions.iter() {
265 id_to_region.entry(current_id).or_insert_with(|| {
266 current_id += 1;
267 region.clone()
268 });
269 }
270
271 id_to_region
272}
273
274pub fn get_chrom_sizes<T: AsRef<Path>>(path: T) -> HashMap<String, u32> {
275 let chrom_sizes_file = File::open(path.as_ref())
276 .with_context(|| "Failed to open chrom sizes file.")
277 .unwrap();
278
279 let mut chrom_sizes: HashMap<String, u32> = HashMap::new();
280
281 let file_buf = BufReader::new(chrom_sizes_file);
282
283 for line in file_buf.lines() {
284 let line_string: String = match line {
285 Ok(value) => value,
286 Err(_) => panic!("Error while reading chrom sizes file"),
287 };
288
289 let line_parts: Vec<String> = line_string
290 .split_whitespace()
291 .map(|s| s.to_string())
292 .collect();
293
294 chrom_sizes.insert(line_parts[0].clone(), line_parts[1].parse::<u32>().unwrap());
295 }
296
297 chrom_sizes
298}
299
300pub fn generate_ordering_map_for_universe_regions<T: AsRef<Path>>(
303 path: T,
304) -> Result<HashMap<Region, f64>> {
305 let mut map = HashMap::new();
306
307 let reader = get_dynamic_reader(path.as_ref())?;
308
309 for line in reader.lines() {
310 let line = line?;
311 let parts: Vec<&str> = line.split('\t').collect();
312
313 if parts.len() < 5 {
314 anyhow::bail!("BED file line does not have at least 5 fields: {}. It needs to have chr, start, end, name, and score.", line);
315 }
316
317 let chr = parts[0];
319 let start = parts[1].parse::<u32>().with_context(|| {
320 format!("Failed to parse start position in BED file line: {}", line)
321 })?;
322
323 let end = parts[2]
324 .parse::<u32>()
325 .with_context(|| format!("Failed to parse end position in BED file line: {}", line))?;
326
327 let score = parts[4]
328 .parse::<f64>()
329 .with_context(|| format!("Failed to parse score in BED file line: {}", line))?;
330
331 let rest = Some(parts[3..].join("\t")).filter(|s| !s.is_empty());
332
333 let region = Region {
334 chr: chr.to_owned(),
335 start,
336 end,
337 rest,
338 };
339
340 map.insert(region, score);
341 }
342
343 Ok(map)
344}
345
346pub fn read_bedset_file<P: AsRef<Path>>(file_path: P) -> Result<Vec<String>> {
347 let file = File::open(file_path)?;
348 let reader = BufReader::new(file);
349
350 let bed_identifiers = reader
351 .lines()
352 .map(|line| line.map(|s| s.trim().to_string()))
353 .collect::<Result<Vec<_>, _>>()?;
354
355 Ok(bed_identifiers)
356}
357
358pub fn remove_all_extensions(path: &Path) -> String {
359 let mut stem = path.file_stem().unwrap().to_string_lossy().to_string();
360
361 let mut parent_path = path.with_file_name(stem.clone());
362 while let Some(_extension) = parent_path.extension() {
363 parent_path = parent_path.with_extension("");
365 stem = parent_path
366 .file_stem()
367 .unwrap()
368 .to_string_lossy()
369 .to_string();
370 }
371
372 stem
373}