1use crate::{IntSpan, Range};
2use anyhow::anyhow;
3use path_clean::PathClean;
4use std::cmp::Reverse;
5use std::collections::{BTreeMap, BTreeSet, HashMap};
6use std::io::{BufRead, BufReader, BufWriter, Read, Write};
7
8pub fn reader(input: &str) -> Box<dyn BufRead> {
21 let reader: Box<dyn BufRead> = if input == "stdin" {
22 Box::new(BufReader::new(std::io::stdin()))
23 } else {
24 let path = std::path::Path::new(input);
25 let file = match std::fs::File::open(path) {
26 Err(why) => panic!("could not open {}: {}", path.display(), why),
27 Ok(file) => file,
28 };
29
30 if path.extension() == Some(std::ffi::OsStr::new("gz")) {
31 Box::new(BufReader::new(flate2::read::MultiGzDecoder::new(file)))
32 } else {
33 Box::new(BufReader::new(file))
34 }
35 };
36
37 reader
38}
39
40pub fn read_lines(input: &str) -> Vec<String> {
45 let mut reader = reader(input);
46 let mut s = String::new();
47 reader.read_to_string(&mut s).expect("Read error");
48 s.lines().map(|s| s.to_string()).collect::<Vec<String>>()
49}
50
51pub fn read_sizes(input: &str) -> BTreeMap<String, i32> {
57 let mut sizes: BTreeMap<String, i32> = BTreeMap::new();
58
59 for line in read_lines(input) {
60 let fields: Vec<&str> = line.split('\t').collect();
61 if fields.len() == 2 {
62 sizes.insert(fields[0].to_string(), fields[1].parse::<i32>().unwrap());
63 }
64 }
65
66 sizes
67}
68
69pub fn read_first_column(input: &str) -> Vec<String> {
76 let reader = reader(input);
77 let mut rows: Vec<String> = Vec::new();
78
79 for line in reader.lines() {
80 let field = line.unwrap().split('\t').next().unwrap().to_string();
81 rows.push(field);
82 }
83
84 rows
85}
86
87pub fn read_replaces(input: &str) -> BTreeMap<String, Vec<String>> {
93 let mut replaces: BTreeMap<String, Vec<String>> = BTreeMap::new();
94
95 for line in read_lines(input) {
96 let mut fields: Vec<&str> = line.split('\t').collect();
97
98 let left = fields.split_off(1);
99
100 replaces.insert(
101 fields[0].to_string(),
102 left.iter().map(|s| (*s).to_string()).collect(),
103 );
104 }
105
106 replaces
107}
108
109pub fn read_json(input: &str) -> BTreeMap<String, serde_json::Value> {
110 let mut reader = reader(input);
111 let mut s = String::new();
112 reader.read_to_string(&mut s).expect("Read error");
113
114 serde_json::from_str(&s).unwrap()
115}
116
117pub fn writer(output: &str) -> Box<dyn Write> {
118 let writer: Box<dyn Write> = if output == "stdout" {
119 Box::new(BufWriter::new(std::io::stdout()))
120 } else {
121 Box::new(BufWriter::new(std::fs::File::create(output).unwrap()))
122 };
123
124 writer
125}
126
127pub fn write_lines(output: &str, lines: &Vec<String>) -> Result<(), std::io::Error> {
128 let mut writer = writer(output);
129
130 for line in lines {
131 writer.write_all(format!("{}\n", line).as_ref())?;
132 }
133
134 Ok(())
135}
136
137pub fn write_json(
138 output: &str,
139 json: &BTreeMap<String, serde_json::Value>,
140) -> Result<(), std::io::Error> {
141 let mut writer = writer(output);
142 let mut s = serde_json::to_string_pretty(json).unwrap();
143 s.push('\n');
144 writer.write_all(s.as_bytes())?;
145
146 Ok(())
147}
148
149pub fn json2set(json: &BTreeMap<String, serde_json::Value>) -> BTreeMap<String, IntSpan> {
160 let mut set: BTreeMap<String, IntSpan> = BTreeMap::new();
161
162 for (chr, value) in json {
163 let intspan = IntSpan::from(value.as_str().unwrap());
164 set.insert(chr.into(), intspan);
165 }
166
167 set
168}
169
170pub fn set2json(set: &BTreeMap<String, IntSpan>) -> BTreeMap<String, serde_json::Value> {
186 let mut json: BTreeMap<String, serde_json::Value> = BTreeMap::new();
187
188 for (chr, value) in set {
189 let runlist = value.to_string();
190 json.insert(chr.into(), serde_json::to_value(runlist).unwrap());
191 }
192
193 json
194}
195
196pub fn set2json_m(
197 set_of: &BTreeMap<String, BTreeMap<String, IntSpan>>,
198) -> BTreeMap<String, serde_json::Value> {
199 let mut out_json: BTreeMap<String, serde_json::Value> = BTreeMap::new();
200
201 for (name, set) in set_of {
202 let json = set2json(set);
203 out_json.insert(name.to_string(), serde_json::to_value(json).unwrap());
204 }
205
206 out_json
207}
208
209pub fn json2set_m(
210 json: &BTreeMap<String, serde_json::Value>,
211) -> BTreeMap<String, BTreeMap<String, IntSpan>> {
212 let is_multi: bool = json.values().next().unwrap().is_object();
213
214 let mut s_of: BTreeMap<String, BTreeMap<String, IntSpan>> = BTreeMap::new();
215 if is_multi {
216 for (key, value) in json {
217 let string = serde_json::to_string(value).unwrap();
218 let runlist_one: BTreeMap<String, serde_json::Value> =
219 serde_json::from_str(string.as_str()).unwrap();
220 let set_one = json2set(&runlist_one);
221 s_of.insert(key.to_string(), set_one);
222 }
223 } else {
224 let set_one = json2set(json);
225 s_of.insert("__single".to_string(), set_one);
226 }
227
228 s_of
229}
230
231pub fn fill_up_m(
232 set_of: &mut BTreeMap<String, BTreeMap<String, IntSpan>>,
233 chrs: &BTreeSet<String>,
234) {
235 for set in set_of.values_mut() {
236 for chr in chrs {
237 if !set.contains_key(chr) {
238 set.insert(chr.into(), IntSpan::new());
239 }
240 }
241 }
242}
243
244pub fn fill_up_s(set: &mut BTreeMap<String, IntSpan>, chrs: &BTreeSet<String>) {
245 for chr in chrs {
246 if !set.contains_key(chr) {
247 set.insert(chr.into(), IntSpan::new());
248 }
249 }
250}
251
252pub fn chrs_in_sets(set_of: &BTreeMap<String, BTreeMap<String, IntSpan>>) -> BTreeSet<String> {
253 let mut chrs: BTreeSet<String> = BTreeSet::new();
254
255 for name in set_of.keys() {
256 for chr in set_of.get(name).unwrap().keys() {
257 chrs.insert(chr.clone());
258 }
259 }
260
261 chrs
262}
263
264pub fn build_range_of_part(line: &str, range_of_str: &mut HashMap<String, Range>) {
265 for part in line.split('\t') {
266 let range = Range::from_str(part);
267 if !range.is_valid() {
268 continue;
269 }
270
271 if !range_of_str.contains_key(part) {
272 range_of_str.insert(part.to_string(), range);
273 }
274 }
275}
276
277pub fn sort_links(lines: &[String]) -> Vec<String> {
278 let mut range_of_part: HashMap<String, Range> = HashMap::new();
280
281 let mut within_links: BTreeSet<String> = BTreeSet::new();
285 for line in lines {
286 build_range_of_part(line, &mut range_of_part);
287
288 let parts: Vec<&str> = line.split('\t').collect();
289
290 let mut valids: Vec<&str> = parts
291 .clone()
292 .into_iter()
293 .filter(|p| range_of_part.contains_key(*p))
294 .collect();
295
296 let mut invalids: Vec<&str> = parts
297 .clone()
298 .into_iter()
299 .filter(|p| !range_of_part.contains_key(*p))
300 .collect();
301
302 valids.sort_by_key(|k| range_of_part.get(*k).unwrap().strand());
304
305 valids.sort_by_key(|k| range_of_part.get(*k).unwrap().start());
307
308 valids.sort_by_key(|k| range_of_part.get(*k).unwrap().chr());
310
311 valids.append(&mut invalids);
313 let new_line: String = valids.join("\t");
314 within_links.insert(new_line);
315 }
316
317 let mut among_links: Vec<String> = within_links.into_iter().collect();
321 {
322 among_links.sort_by_cached_key(|k| {
324 let parts: Vec<&str> = k.split('\t').collect();
325 range_of_part.get(parts[0]).unwrap().strand()
326 });
327
328 among_links.sort_by_cached_key(|k| {
330 let parts: Vec<&str> = k.split('\t').collect();
331 range_of_part.get(parts[0]).unwrap().start()
332 });
333
334 among_links.sort_by_cached_key(|k| {
336 let parts: Vec<&str> = k.split('\t').collect();
337 range_of_part.get(parts[0]).unwrap().chr()
338 });
339 }
340
341 {
345 among_links.sort_by_cached_key(|k| Reverse(k.split('\t').count()));
346 }
347
348 among_links
349}
350
351pub fn get_seq_faidx(file: &str, range: &str) -> anyhow::Result<String> {
364 let mut bin = String::new();
365 for e in &["samtools"] {
366 if let Ok(pth) = which::which(e) {
367 bin = pth.to_string_lossy().to_string();
368 break;
369 }
370 }
371
372 if bin.is_empty() {
373 return Err(anyhow!("Can't find the external command"));
374 }
375
376 let mut seq = String::new();
377 let output = std::process::Command::new(bin)
378 .arg("faidx")
379 .arg(file)
380 .arg(range)
381 .output()?;
382
383 if !output.status.success() {
384 return Err(anyhow!("Command executed with failing error code"));
385 }
386
387 for line in output.stdout.lines().map_while(Result::ok) {
388 if line.starts_with('>') {
390 continue;
391 }
392
393 seq += line.as_str();
394 }
395
396 Ok(seq)
397}
398
399pub fn basename(path: impl AsRef<std::path::Path>) -> std::io::Result<String> {
400 let path = path.as_ref();
401
402 let basename = path
403 .file_stem()
404 .and_then(std::ffi::OsStr::to_str)
405 .unwrap()
406 .split('.')
407 .next()
408 .unwrap()
409 .to_string();
410
411 Ok(basename)
412}
413
414pub fn absolute_path(path: impl AsRef<std::path::Path>) -> std::io::Result<std::path::PathBuf> {
415 let path = path.as_ref();
416
417 let absolute_path = if path.is_absolute() {
418 path.to_path_buf()
419 } else {
420 std::env::current_dir()?.join(path)
421 }
422 .clean();
423
424 Ok(absolute_path)
425}
426
427#[cfg(test)]
428mod read_write {
429 use super::*;
430 use tempfile::TempDir;
431
432 #[test]
433 fn test_write_lines() {
434 let tmp = TempDir::new().unwrap();
435 let filename = tmp
436 .path()
437 .join("test.txt")
438 .into_os_string()
439 .into_string()
440 .unwrap();
441 write_lines(
442 &filename,
443 &vec!["This".to_string(), "is".to_string(), "a\ntest".to_string()],
444 )
445 .expect("Write error");
446
447 let lines = read_lines(&filename);
448 assert_eq!(lines.len(), 4);
449 }
450
451 #[test]
452 fn test_read_write_json() {
453 let tmp = TempDir::new().unwrap();
454 let filename = tmp
455 .path()
456 .join("test.json")
457 .into_os_string()
458 .into_string()
459 .unwrap();
460
461 let json = read_json("tests/spanr/Atha.json");
462
463 write_json(&filename, &json).expect("Write error");
464
465 let lines = read_lines(&filename);
466 assert!(lines.len() == 17 || lines.len() == 18);
467 }
468}
469
470pub fn ints_to_idx(str: &str) -> Vec<usize> {
471 let mut ints: Vec<i32> = vec![];
472 let parts: Vec<&str> = str.split(',').collect();
473 for p in parts {
474 let intspan = IntSpan::from(p);
475 intspan.elements().iter().for_each(|e| ints.push(*e));
476 }
477
478 ints.iter().map(|e| *e as usize).collect()
479}
480
481pub fn named_field_to_idx(
482 str: &str,
483 idx_of: &HashMap<String, usize>,
484) -> anyhow::Result<Vec<usize>> {
485 let mut ints: Vec<i32> = vec![];
486 let parts: Vec<&str> = str.split(',').collect();
487 for p in parts {
488 if IntSpan::valid(p) {
489 let intspan = IntSpan::from(p);
490 intspan.elements().iter().for_each(|e| ints.push(*e));
491 } else if idx_of.contains_key(p) {
492 ints.push(*idx_of.get(p).unwrap() as i32)
493 } else {
494 return Err(anyhow!("Field not found in file header: `{}`", p));
495 }
496 }
497
498 Ok(ints.iter().map(|e| *e as usize).collect())
499}
500
501pub fn fields_to_ints(str: &str) -> IntSpan {
502 let mut ints = IntSpan::new();
503 let parts: Vec<&str> = str.split(',').collect();
504 for p in parts {
505 ints.add_runlist(p);
506 }
507
508 ints
509}
510
511pub fn extract_rg(line: &str, opt_idx_range: usize) -> Option<Range> {
512 let parts: Vec<&str> = line.split('\t').collect();
513
514 let range = if opt_idx_range == 0 {
515 parts.iter().find_map(|part| {
516 let rg = Range::from_str(part);
517 if rg.is_valid() {
518 Some(rg)
519 } else {
520 None
521 }
522 })
523 } else {
524 let part = parts.get(opt_idx_range - 1).unwrap();
525 let rg = Range::from_str(part);
526 if rg.is_valid() {
527 Some(rg)
528 } else {
529 None
530 }
531 };
532
533 range
534}
535
536pub fn format_number(number: f64, decimal_digits: usize) -> String {
538 let sign = if number < 0.0 { -1 } else { 1 };
540 let mut number = number.abs();
541 number = round(number, decimal_digits); let integer_part = number.trunc() as i64;
545 let decimal_part = number.fract();
546
547 let integer_str = integer_part.to_string();
549 let formatted_integer = integer_str
550 .chars()
551 .rev()
552 .collect::<Vec<_>>()
553 .chunks(3)
554 .map(|chunk| chunk.iter().collect::<String>())
555 .collect::<Vec<_>>()
556 .join(",")
557 .chars()
558 .rev()
559 .collect::<String>();
560
561 let decimal_str = format!("{:.1$}", decimal_part, decimal_digits)
562 .trim_start_matches('0')
563 .to_string();
564
565 let result = if !decimal_str.is_empty() {
566 format!("{}{}", formatted_integer, decimal_str)
567 } else {
568 formatted_integer
569 };
570
571 if sign < 0 {
572 format!("-{}", result)
573 } else {
574 result
575 }
576}
577
578fn round(number: f64, precision: usize) -> f64 {
579 (number * 10f64.powi(precision as i32)).round() / 10f64.powi(precision as i32)
581}
582
583#[cfg(test)]
584mod tests {
585 use super::*;
586
587 #[test]
588 fn test_format_number() {
589 assert_eq!(format_number(1234567.89, 2), "1,234,567.89");
591 assert_eq!(format_number(1000.0, 0), "1,000");
592 assert_eq!(format_number(0.12345, 3), "0.123");
593
594 assert_eq!(format_number(-9876543.21, 3), "-9,876,543.210");
596 assert_eq!(format_number(-1000.0, 0), "-1,000");
597 assert_eq!(format_number(-0.98765, 4), "-0.9877");
598
599 assert_eq!(format_number(0.0, 2), "0.00");
601 assert_eq!(format_number(-0.0, 2), "0.00");
602
603 assert_eq!(format_number(1e10, 2), "10,000,000,000.00");
605 assert_eq!(format_number(-1e10, 2), "-10,000,000,000.00");
606
607 assert_eq!(format_number(1234.56789, 3), "1,234.568");
609 assert_eq!(format_number(1234.0, 5), "1,234.00000");
610 }
611}