use crate::{IntSpan, Range};
use anyhow::anyhow;
use flate2::read::GzDecoder;
use path_clean::PathClean;
use serde_json::Value;
use std::cmp::Reverse;
use std::collections::{BTreeMap, BTreeSet, HashMap};
use std::ffi::OsStr;
use std::fs::File;
use std::io::{BufRead, BufReader, BufWriter, Read, Write};
use std::path::{Path, PathBuf};
use std::process::Command;
use std::{env, io};
pub fn reader(input: &str) -> Box<dyn BufRead> {
let reader: Box<dyn BufRead> = if input == "stdin" {
Box::new(BufReader::new(io::stdin()))
} else {
let path = Path::new(input);
let file = match File::open(path) {
Err(why) => panic!("could not open {}: {}", path.display(), why),
Ok(file) => file,
};
if path.extension() == Some(OsStr::new("gz")) {
Box::new(BufReader::new(GzDecoder::new(file)))
} else {
Box::new(BufReader::new(file))
}
};
reader
}
pub fn read_lines(input: &str) -> Vec<String> {
let mut reader = reader(input);
let mut s = String::new();
reader.read_to_string(&mut s).expect("Read error");
s.lines().map(|s| s.to_string()).collect::<Vec<String>>()
}
pub fn read_sizes(input: &str) -> BTreeMap<String, i32> {
let mut sizes: BTreeMap<String, i32> = BTreeMap::new();
for line in read_lines(input) {
let fields: Vec<&str> = line.split('\t').collect();
if fields.len() == 2 {
sizes.insert(fields[0].to_string(), fields[1].parse::<i32>().unwrap());
}
}
sizes
}
pub fn read_first_column(input: &str) -> Vec<String> {
let reader = reader(input);
let mut rows: Vec<String> = Vec::new();
for line in reader.lines() {
let field = line.unwrap().split('\t').next().unwrap().to_string();
rows.push(field);
}
rows
}
pub fn read_replaces(input: &str) -> BTreeMap<String, Vec<String>> {
let mut replaces: BTreeMap<String, Vec<String>> = BTreeMap::new();
for line in read_lines(input) {
let mut fields: Vec<&str> = line.split('\t').collect();
let left = fields.split_off(1);
replaces.insert(
fields[0].to_string(),
left.iter().map(|s| (*s).to_string()).collect(),
);
}
replaces
}
pub fn read_json(input: &str) -> BTreeMap<String, Value> {
let mut reader = reader(input);
let mut s = String::new();
reader.read_to_string(&mut s).expect("Read error");
serde_json::from_str(&s).unwrap()
}
pub fn writer(output: &str) -> Box<dyn Write> {
let writer: Box<dyn Write> = if output == "stdout" {
Box::new(BufWriter::new(io::stdout()))
} else {
Box::new(BufWriter::new(File::create(output).unwrap()))
};
writer
}
pub fn write_lines(output: &str, lines: &Vec<&str>) -> Result<(), std::io::Error> {
let mut writer = writer(output);
for line in lines {
writer.write_all(format!("{}\n", line).as_ref())?;
}
Ok(())
}
pub fn write_json(output: &str, json: &BTreeMap<String, Value>) -> Result<(), std::io::Error> {
let mut writer = writer(output);
let mut s = serde_json::to_string_pretty(json).unwrap();
s.push('\n');
writer.write_all(s.as_bytes())?;
Ok(())
}
pub fn json2set(json: &BTreeMap<String, Value>) -> BTreeMap<String, IntSpan> {
let mut set: BTreeMap<String, IntSpan> = BTreeMap::new();
for (chr, value) in json {
let intspan = IntSpan::from(value.as_str().unwrap());
set.insert(chr.into(), intspan);
}
set
}
pub fn set2json(set: &BTreeMap<String, IntSpan>) -> BTreeMap<String, Value> {
let mut json: BTreeMap<String, Value> = BTreeMap::new();
for (chr, value) in set {
let runlist = value.to_string();
json.insert(chr.into(), serde_json::to_value(runlist).unwrap());
}
json
}
pub fn set2json_m(set_of: &BTreeMap<String, BTreeMap<String, IntSpan>>) -> BTreeMap<String, Value> {
let mut out_json: BTreeMap<String, Value> = BTreeMap::new();
for (name, set) in set_of {
let json = set2json(set);
out_json.insert(name.to_string(), serde_json::to_value(json).unwrap());
}
out_json
}
pub fn json2set_m(json: &BTreeMap<String, Value>) -> BTreeMap<String, BTreeMap<String, IntSpan>> {
let is_multi: bool = json.values().next().unwrap().is_object();
let mut s_of: BTreeMap<String, BTreeMap<String, IntSpan>> = BTreeMap::new();
if is_multi {
for (key, value) in json {
let string = serde_json::to_string(value).unwrap();
let runlist_one: BTreeMap<String, Value> =
serde_json::from_str(string.as_str()).unwrap();
let set_one = json2set(&runlist_one);
s_of.insert(key.to_string(), set_one);
}
} else {
let set_one = json2set(json);
s_of.insert("__single".to_string(), set_one);
}
s_of
}
pub fn fill_up_m(
set_of: &mut BTreeMap<String, BTreeMap<String, IntSpan>>,
chrs: &BTreeSet<String>,
) {
for set in set_of.values_mut() {
for chr in chrs {
if !set.contains_key(chr) {
set.insert(chr.into(), IntSpan::new());
}
}
}
}
pub fn fill_up_s(set: &mut BTreeMap<String, IntSpan>, chrs: &BTreeSet<String>) {
for chr in chrs {
if !set.contains_key(chr) {
set.insert(chr.into(), IntSpan::new());
}
}
}
pub fn chrs_in_sets(set_of: &BTreeMap<String, BTreeMap<String, IntSpan>>) -> BTreeSet<String> {
let mut chrs: BTreeSet<String> = BTreeSet::new();
for name in set_of.keys() {
for chr in set_of.get(name).unwrap().keys() {
chrs.insert(chr.clone());
}
}
chrs
}
pub fn build_range_of_part(line: &str, range_of_str: &mut HashMap<String, Range>) {
for part in line.split('\t') {
let range = Range::from_str(part);
if !range.is_valid() {
continue;
}
if !range_of_str.contains_key(part) {
range_of_str.insert(part.to_string(), range);
}
}
}
pub fn sort_links(lines: &[String]) -> Vec<String> {
let mut range_of_part: HashMap<String, Range> = HashMap::new();
let mut within_links: BTreeSet<String> = BTreeSet::new();
for line in lines {
build_range_of_part(line, &mut range_of_part);
let parts: Vec<&str> = line.split('\t').collect();
let mut valids: Vec<&str> = parts
.clone()
.into_iter()
.filter(|p| range_of_part.contains_key(*p))
.collect();
let mut invalids: Vec<&str> = parts
.clone()
.into_iter()
.filter(|p| !range_of_part.contains_key(*p))
.collect();
valids.sort_by_key(|k| range_of_part.get(*k).unwrap().strand());
valids.sort_by_key(|k| range_of_part.get(*k).unwrap().start());
valids.sort_by_key(|k| range_of_part.get(*k).unwrap().chr());
valids.append(&mut invalids);
let new_line: String = valids.join("\t");
within_links.insert(new_line);
}
let mut among_links: Vec<String> = within_links.into_iter().collect();
{
among_links.sort_by_cached_key(|k| {
let parts: Vec<&str> = k.split('\t').collect();
range_of_part.get(parts[0]).unwrap().strand()
});
among_links.sort_by_cached_key(|k| {
let parts: Vec<&str> = k.split('\t').collect();
range_of_part.get(parts[0]).unwrap().start()
});
among_links.sort_by_cached_key(|k| {
let parts: Vec<&str> = k.split('\t').collect();
range_of_part.get(parts[0]).unwrap().chr()
});
}
{
among_links.sort_by_cached_key(|k| Reverse(k.split('\t').count()));
}
among_links
}
pub fn get_seq_faidx(file: &str, range: &str) -> anyhow::Result<String> {
let mut bin = String::new();
for e in &["samtools"] {
if let Ok(pth) = which::which(e) {
bin = pth.to_string_lossy().to_string();
break;
}
}
if bin.is_empty() {
return Err(anyhow!("Can't find the external command"));
}
let mut seq = String::new();
let output = Command::new(bin)
.arg("faidx")
.arg(file)
.arg(range)
.output()?;
if !output.status.success() {
return Err(anyhow!("Command executed with failing error code"));
}
for line in output.stdout.lines().map_while(Result::ok) {
if line.starts_with('>') {
continue;
}
seq += line.as_str();
}
Ok(seq)
}
pub fn basename(path: impl AsRef<Path>) -> io::Result<String> {
let path = path.as_ref();
let basename = path
.file_stem()
.and_then(OsStr::to_str)
.unwrap()
.split('.')
.next()
.unwrap()
.to_string();
Ok(basename)
}
pub fn absolute_path(path: impl AsRef<Path>) -> io::Result<PathBuf> {
let path = path.as_ref();
let absolute_path = if path.is_absolute() {
path.to_path_buf()
} else {
env::current_dir()?.join(path)
}
.clean();
Ok(absolute_path)
}
#[cfg(test)]
mod read_write {
use super::*;
use tempfile::TempDir;
#[test]
fn test_write_lines() {
let tmp = TempDir::new().unwrap();
let filename = tmp
.path()
.join("test.txt")
.into_os_string()
.into_string()
.unwrap();
write_lines(&filename, &vec!["This", "is", "a\ntest"]).expect("Write error");
let lines = read_lines(&filename);
assert_eq!(lines.len(), 4);
}
#[test]
fn test_read_write_json() {
let tmp = TempDir::new().unwrap();
let filename = tmp
.path()
.join("test.json")
.into_os_string()
.into_string()
.unwrap();
let json = read_json("tests/spanr/Atha.json");
write_json(&filename, &json).expect("Write error");
let lines = read_lines(&filename);
assert!(lines.len() == 17 || lines.len() == 18);
}
}