1use std::collections::HashMap;
14use std::fs::File;
15use std::io::{self, BufRead, BufReader, BufWriter, Write};
16use std::path::Path;
17
18use rsomics_common::{Result, RsomicsError};
19
20pub fn load_genome(path: &Path) -> Result<HashMap<String, u64>> {
23 let raw = std::fs::read_to_string(path)
24 .map_err(|e| RsomicsError::InvalidInput(format!("{}: {e}", path.display())))?;
25 let mut map = HashMap::new();
26 for line in raw.lines() {
27 if line.is_empty() || line.starts_with('#') {
28 continue;
29 }
30 let mut parts = line.splitn(2, '\t');
31 let chrom = parts
32 .next()
33 .ok_or_else(|| RsomicsError::InvalidInput(format!("genome: bad line: {line:?}")))?;
34 let size_str = parts
35 .next()
36 .ok_or_else(|| RsomicsError::InvalidInput(format!("genome: missing size for {chrom}")))?
37 .trim();
38 let size: u64 = size_str.parse().map_err(|_| {
39 RsomicsError::InvalidInput(format!("genome: bad size {size_str:?} for {chrom}"))
40 })?;
41 map.insert(chrom.to_owned(), size);
42 }
43 Ok(map)
44}
45
46#[inline]
47fn is_header(line: &str) -> bool {
48 line.is_empty()
49 || line.starts_with('#')
50 || line.starts_with("track")
51 || line.starts_with("browser")
52}
53
54pub fn shift_reader<R: io::Read>(
60 reader: BufReader<R>,
61 shift: i64,
62 plus_shift: i64,
63 minus_shift: i64,
64 genome: Option<&HashMap<String, u64>>,
65 output: &mut dyn Write,
66) -> Result<()> {
67 let mut out = BufWriter::with_capacity(256 * 1024, output);
68 for (lineno_0, line) in reader.lines().enumerate() {
69 let line = line.map_err(RsomicsError::Io)?;
70 if is_header(&line) {
71 out.write_all(line.as_bytes()).map_err(RsomicsError::Io)?;
72 out.write_all(b"\n").map_err(RsomicsError::Io)?;
73 continue;
74 }
75 let lineno = lineno_0 + 1;
76
77 let cols: Vec<&str> = line.splitn(7, '\t').collect();
79 if cols.len() < 3 {
80 return Err(RsomicsError::InvalidInput(format!(
81 "line {lineno}: fewer than 3 fields"
82 )));
83 }
84 let chrom = cols[0];
85 let start: i64 = cols[1].parse().map_err(|_| {
86 RsomicsError::InvalidInput(format!("line {lineno}: bad start {:?}", cols[1]))
87 })?;
88 let end: i64 = cols[2].parse().map_err(|_| {
89 RsomicsError::InvalidInput(format!("line {lineno}: bad end {:?}", cols[2]))
90 })?;
91
92 let strand = if cols.len() >= 6 { cols[5] } else { "." };
94 let s = if strand == "+" {
95 plus_shift
96 } else if strand == "-" {
97 minus_shift
98 } else {
99 shift
100 };
101
102 let new_start = start + s;
103 let new_end = end + s;
104
105 let (new_start, new_end) = if let Some(g) = genome {
106 let chrom_sz = g.get(chrom).copied().unwrap_or(i64::MAX as u64) as i64;
107 let cs = new_start.max(0);
108 let ce = new_end.min(chrom_sz);
109 if cs >= ce {
110 continue;
111 }
112 (cs, ce)
113 } else {
114 (new_start, new_end)
115 };
116
117 out.write_all(chrom.as_bytes()).map_err(RsomicsError::Io)?;
119 write!(out, "\t{new_start}\t{new_end}").map_err(RsomicsError::Io)?;
120 for col in &cols[3..] {
121 out.write_all(b"\t").map_err(RsomicsError::Io)?;
122 out.write_all(col.as_bytes()).map_err(RsomicsError::Io)?;
123 }
124 out.write_all(b"\n").map_err(RsomicsError::Io)?;
125 }
126 out.flush().map_err(RsomicsError::Io)?;
127 Ok(())
128}
129
130pub fn shift(
132 path: &Path,
133 shift: i64,
134 plus_shift: i64,
135 minus_shift: i64,
136 genome: Option<&HashMap<String, u64>>,
137 output: &mut dyn Write,
138) -> Result<()> {
139 let file = File::open(path)
140 .map_err(|e| RsomicsError::InvalidInput(format!("{}: {e}", path.display())))?;
141 shift_reader(
142 BufReader::new(file),
143 shift,
144 plus_shift,
145 minus_shift,
146 genome,
147 output,
148 )
149}
150
151pub fn shift_stdin(
153 shift_val: i64,
154 plus_shift: i64,
155 minus_shift: i64,
156 genome: Option<&HashMap<String, u64>>,
157 output: &mut dyn Write,
158) -> Result<()> {
159 shift_reader(
160 BufReader::new(io::stdin()),
161 shift_val,
162 plus_shift,
163 minus_shift,
164 genome,
165 output,
166 )
167}