1use std::collections::BinaryHeap;
28use std::io::{BufRead, BufReader, BufWriter, Read, Write};
29
30use rsomics_common::{Result, RsomicsError};
31
32#[derive(Debug, Clone)]
36struct Bed3 {
37 chrom: String,
38 start: i64,
39 end: i64,
40}
41
42#[derive(Debug, Eq, PartialEq)]
44struct Event {
45 coord: i64,
47 is_start: bool,
49 file_idx: usize,
51}
52
53impl Ord for Event {
57 fn cmp(&self, other: &Self) -> std::cmp::Ordering {
58 other
60 .coord
61 .cmp(&self.coord)
62 .then(self.is_start.cmp(&other.is_start)) }
64}
65impl PartialOrd for Event {
66 fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
67 Some(self.cmp(other))
68 }
69}
70
71struct FileReader<R: BufRead> {
74 reader: R,
75 peeked: Option<Bed3>,
77}
78
79impl<R: BufRead> FileReader<R> {
80 fn new(reader: R) -> Result<Self> {
81 let mut fr = FileReader {
82 reader,
83 peeked: None,
84 };
85 fr.peeked = fr.read_next()?;
86 Ok(fr)
87 }
88
89 fn read_next(&mut self) -> Result<Option<Bed3>> {
90 loop {
91 let mut line = String::new();
92 let n = self.reader.read_line(&mut line).map_err(RsomicsError::Io)?;
93 if n == 0 {
94 return Ok(None);
95 }
96 let t = line.trim_end_matches(['\n', '\r']);
97 if t.is_empty()
98 || t.starts_with('#')
99 || t.starts_with("track")
100 || t.starts_with("browser")
101 {
102 continue;
103 }
104 let mut cols = t.splitn(4, '\t');
105 let chrom = match cols.next() {
106 Some(c) => c.to_owned(),
107 None => continue,
108 };
109 let start: i64 = match cols.next().and_then(|s| s.parse().ok()) {
110 Some(v) => v,
111 None => continue,
112 };
113 let end: i64 = match cols.next().and_then(|s| s.parse().ok()) {
114 Some(v) => v,
115 None => continue,
116 };
117 return Ok(Some(Bed3 { chrom, start, end }));
118 }
119 }
120
121 fn peek(&self) -> Option<&Bed3> {
123 self.peeked.as_ref()
124 }
125
126 fn next(&mut self) -> Result<Option<Bed3>> {
128 let cur = self.peeked.take();
129 self.peeked = self.read_next()?;
130 Ok(cur)
131 }
132}
133
134pub fn multiinter<R: Read, W: Write>(
141 readers: Vec<R>,
142 names: &[String],
143 header: bool,
144 out: W,
145) -> Result<()> {
146 let n = readers.len();
147 let mut file_readers: Vec<FileReader<BufReader<R>>> = readers
148 .into_iter()
149 .map(|r| FileReader::new(BufReader::new(r)))
150 .collect::<Result<Vec<_>>>()?;
151
152 let mut writer = BufWriter::new(out);
153
154 if header {
155 write!(writer, "chrom\tstart\tend\tnum\tlist").map_err(RsomicsError::Io)?;
156 for name in names {
157 write!(writer, "\t{name}").map_err(RsomicsError::Io)?;
158 }
159 writeln!(writer).map_err(RsomicsError::Io)?;
160 }
161
162 let mut processed_chroms: Vec<String> = Vec::new();
166
167 loop {
168 let next_chrom = file_readers
170 .iter()
171 .filter_map(|fr| fr.peek().map(|r| r.chrom.as_str()))
172 .min()
173 .map(ToOwned::to_owned);
174
175 let Some(chrom) = next_chrom else {
176 break;
177 };
178
179 if !processed_chroms.contains(&chrom) {
180 processed_chroms.push(chrom.clone());
181 }
182
183 let mut queue: BinaryHeap<Event> = BinaryHeap::new();
185
186 for (file_idx, fr) in file_readers.iter_mut().enumerate() {
187 while fr.peek().is_some_and(|r| r.chrom == chrom) {
189 let rec = fr.next()?.unwrap();
190 queue.push(Event {
191 coord: rec.start,
192 is_start: true,
193 file_idx,
194 });
195 queue.push(Event {
196 coord: rec.end,
197 is_start: false,
198 file_idx,
199 });
200 }
201 }
202
203 let mut depth = vec![0u32; n];
205 let mut active_count = 0usize;
206 let mut prev_coord: i64 = 0;
207 let mut first = true;
208
209 while let Some(event) = queue.pop() {
210 let coord = event.coord;
211
212 if !first && coord > prev_coord && active_count > 0 {
216 emit_row(
217 &mut writer,
218 &chrom,
219 prev_coord,
220 coord,
221 active_count,
222 &depth,
223 names,
224 )?;
225 }
226
227 if event.is_start {
228 if depth[event.file_idx] == 0 {
229 active_count += 1;
230 }
231 depth[event.file_idx] += 1;
232 } else {
233 depth[event.file_idx] -= 1;
234 if depth[event.file_idx] == 0 {
235 active_count = active_count.saturating_sub(1);
236 }
237 }
238
239 prev_coord = coord;
240 first = false;
241 }
242 }
243
244 writer.flush().map_err(RsomicsError::Io)?;
245 Ok(())
246}
247
248fn emit_row<W: Write>(
249 w: &mut W,
250 chrom: &str,
251 start: i64,
252 end: i64,
253 active_count: usize,
254 depth: &[u32],
255 names: &[String],
256) -> Result<()> {
257 let mut list = String::new();
259 let mut first = true;
260 for (i, &d) in depth.iter().enumerate() {
261 if d > 0 {
262 if !first {
263 list.push(',');
264 }
265 list.push_str(&names[i]);
266 first = false;
267 }
268 }
269 if list.is_empty() {
270 list.push_str("none");
271 }
272
273 write!(w, "{chrom}\t{start}\t{end}\t{active_count}\t{list}").map_err(RsomicsError::Io)?;
274 for &d in depth {
275 write!(w, "\t{d}").map_err(RsomicsError::Io)?;
276 }
277 writeln!(w).map_err(RsomicsError::Io)
278}
279
280#[cfg(test)]
283mod tests {
284 use std::io::Cursor;
285
286 use super::*;
287
288 fn run(inputs: &[&str], names: &[&str]) -> String {
289 let readers: Vec<Cursor<&str>> = inputs.iter().map(|s| Cursor::new(*s)).collect();
290 let name_strings: Vec<String> = names.iter().copied().map(str::to_string).collect();
291 let mut out = Vec::new();
292 multiinter(readers, &name_strings, false, &mut out).unwrap();
293 String::from_utf8(out).unwrap()
294 }
295
296 #[test]
297 fn no_overlap_two_files() {
298 let out = run(&["chr1\t100\t200\n", "chr1\t300\t400\n"], &["f1", "f2"]);
299 let lines: Vec<&str> = out.lines().collect();
300 assert_eq!(lines.len(), 2, "lines: {out:?}");
302 assert!(lines[0].contains("\t1\tf1\t1\t0"), "row0: {}", lines[0]);
303 assert!(lines[1].contains("\t1\tf2\t0\t1"), "row1: {}", lines[1]);
304 }
305
306 #[test]
307 fn full_overlap_two_files() {
308 let out = run(&["chr1\t100\t300\n", "chr1\t100\t300\n"], &["f1", "f2"]);
309 let lines: Vec<&str> = out.lines().collect();
310 assert_eq!(lines.len(), 1, "lines: {out:?}");
311 assert!(lines[0].contains("\t2\tf1,f2\t1\t1"), "row: {}", lines[0]);
312 }
313
314 #[test]
315 fn partial_overlap_three_files() {
316 let out = run(
321 &["chr1\t100\t200\n", "chr1\t150\t250\n", "chr1\t50\t120\n"],
322 &["f1", "f2", "f3"],
323 );
324 let lines: Vec<&str> = out.lines().collect();
325 assert_eq!(lines.len(), 5, "lines: {out:?}");
326 assert!(lines[0].starts_with("chr1\t50\t100\t1\tf3"), "{}", lines[0]);
328 assert!(
330 lines[1].starts_with("chr1\t100\t120\t2\tf1,f3"),
331 "{}",
332 lines[1]
333 );
334 assert!(
336 lines[2].starts_with("chr1\t120\t150\t1\tf1"),
337 "{}",
338 lines[2]
339 );
340 assert!(
342 lines[3].starts_with("chr1\t150\t200\t2\tf1,f2"),
343 "{}",
344 lines[3]
345 );
346 assert!(
348 lines[4].starts_with("chr1\t200\t250\t1\tf2"),
349 "{}",
350 lines[4]
351 );
352 }
353
354 #[test]
355 fn header_printed_when_requested() {
356 let readers: Vec<Cursor<&str>> = vec![Cursor::new("chr1\t10\t20\n")];
357 let names = vec!["s1".to_string()];
358 let mut out = Vec::new();
359 multiinter(readers, &names, true, &mut out).unwrap();
360 let s = String::from_utf8(out).unwrap();
361 assert!(s.starts_with("chrom\tstart\tend\tnum\tlist\ts1"), "{s}");
362 }
363
364 #[test]
365 fn multi_chrom_ordering() {
366 let out = run(
367 &["chr1\t10\t20\nchr2\t10\t20\n", "chr2\t15\t25\n"],
368 &["f1", "f2"],
369 );
370 let lines: Vec<&str> = out.lines().collect();
371 assert_eq!(lines.len(), 4, "lines:\n{out}");
374 assert!(lines[0].starts_with("chr1"), "{}", lines[0]);
375 assert!(lines[1].starts_with("chr2\t10\t15"), "{}", lines[1]);
376 assert!(lines[2].starts_with("chr2\t15\t20"), "{}", lines[2]);
377 assert!(lines[3].starts_with("chr2\t20\t25"), "{}", lines[3]);
378 }
379
380 #[test]
381 fn empty_file_does_not_crash() {
382 let out = run(&["", "chr1\t100\t200\n"], &["f1", "f2"]);
383 let lines: Vec<&str> = out.lines().collect();
384 assert_eq!(lines.len(), 1);
385 assert!(lines[0].contains("\t1\tf2\t0\t1"), "{}", lines[0]);
386 }
387}