1use std::collections::BinaryHeap;
28use std::io::{BufRead, BufReader, BufWriter, Read, Write};
29
30use rsomics_common::{Result, RsomicsError};
31
32#[derive(Debug, Clone)]
36struct Bed3 {
37 chrom: String,
38 start: i64,
39 end: i64,
40}
41
42#[derive(Debug, Eq, PartialEq)]
44struct Event {
45 coord: i64,
47 is_start: bool,
49 file_idx: usize,
51}
52
53impl Ord for Event {
57 fn cmp(&self, other: &Self) -> std::cmp::Ordering {
58 other
60 .coord
61 .cmp(&self.coord)
62 .then(self.is_start.cmp(&other.is_start)) }
64}
65impl PartialOrd for Event {
66 fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
67 Some(self.cmp(other))
68 }
69}
70
71struct FileReader<R: BufRead> {
74 reader: R,
75 peeked: Option<Bed3>,
77}
78
79impl<R: BufRead> FileReader<R> {
80 fn new(reader: R) -> Result<Self> {
81 let mut fr = FileReader {
82 reader,
83 peeked: None,
84 };
85 fr.peeked = fr.read_next()?;
86 Ok(fr)
87 }
88
89 fn read_next(&mut self) -> Result<Option<Bed3>> {
90 loop {
91 let mut line = String::new();
92 let n = self.reader.read_line(&mut line).map_err(RsomicsError::Io)?;
93 if n == 0 {
94 return Ok(None);
95 }
96 let t = line.trim_end_matches(['\n', '\r']);
97 if t.is_empty()
98 || t.starts_with('#')
99 || t.starts_with("track")
100 || t.starts_with("browser")
101 {
102 continue;
103 }
104 let mut cols = t.splitn(4, '\t');
105 let chrom = match cols.next() {
106 Some(c) => c.to_owned(),
107 None => continue,
108 };
109 let start: i64 = match cols.next().and_then(|s| s.parse().ok()) {
110 Some(v) => v,
111 None => continue,
112 };
113 let end: i64 = match cols.next().and_then(|s| s.parse().ok()) {
114 Some(v) => v,
115 None => continue,
116 };
117 return Ok(Some(Bed3 { chrom, start, end }));
118 }
119 }
120
121 fn peek(&self) -> Option<&Bed3> {
123 self.peeked.as_ref()
124 }
125
126 fn next(&mut self) -> Result<Option<Bed3>> {
128 let cur = self.peeked.take();
129 self.peeked = self.read_next()?;
130 Ok(cur)
131 }
132}
133
134pub fn multiinter<R: Read, W: Write>(
141 readers: Vec<R>,
142 names: &[String],
143 header: bool,
144 out: W,
145) -> Result<()> {
146 let n = readers.len();
147 let mut file_readers: Vec<FileReader<BufReader<R>>> = readers
148 .into_iter()
149 .map(|r| FileReader::new(BufReader::new(r)))
150 .collect::<Result<Vec<_>>>()?;
151
152 let mut writer = BufWriter::new(out);
153
154 if header {
155 write!(writer, "chrom\tstart\tend\tnum\tlist").map_err(RsomicsError::Io)?;
156 for name in names {
157 write!(writer, "\t{name}").map_err(RsomicsError::Io)?;
158 }
159 writeln!(writer).map_err(RsomicsError::Io)?;
160 }
161
162 let mut processed_chroms: Vec<String> = Vec::new();
166
167 loop {
168 let next_chrom = file_readers
170 .iter()
171 .filter_map(|fr| fr.peek().map(|r| r.chrom.as_str()))
172 .min()
173 .map(ToOwned::to_owned);
174
175 let Some(chrom) = next_chrom else {
176 break;
177 };
178
179 if !processed_chroms.contains(&chrom) {
180 processed_chroms.push(chrom.clone());
181 }
182
183 let mut queue: BinaryHeap<Event> = BinaryHeap::new();
185
186 for (file_idx, fr) in file_readers.iter_mut().enumerate() {
187 while fr.peek().is_some_and(|r| r.chrom == chrom) {
189 let rec = fr.next()?.unwrap();
190 queue.push(Event {
191 coord: rec.start,
192 is_start: true,
193 file_idx,
194 });
195 queue.push(Event {
196 coord: rec.end,
197 is_start: false,
198 file_idx,
199 });
200 }
201 }
202
203 let mut depth = vec![0u32; n];
208 let mut active_count = 0usize;
209 let mut segment_start: i64 = 0;
210 let mut in_segment = false;
211
212 while let Some(event) = queue.pop() {
213 let coord = event.coord;
214
215 let presence_changes = if event.is_start {
217 depth[event.file_idx] == 0
218 } else {
219 depth[event.file_idx] == 1
220 };
221
222 if presence_changes && in_segment && coord > segment_start {
224 emit_row(
225 &mut writer,
226 &chrom,
227 segment_start,
228 coord,
229 active_count,
230 &depth,
231 names,
232 )?;
233 }
234
235 if event.is_start {
237 depth[event.file_idx] += 1;
238 if depth[event.file_idx] == 1 {
239 active_count += 1;
240 }
241 } else {
242 depth[event.file_idx] -= 1;
243 if depth[event.file_idx] == 0 {
244 active_count = active_count.saturating_sub(1);
245 }
246 }
247
248 if presence_changes {
250 if active_count > 0 {
251 segment_start = coord;
252 in_segment = true;
253 } else {
254 in_segment = false;
255 }
256 }
257 }
258 }
259
260 writer.flush().map_err(RsomicsError::Io)?;
261 Ok(())
262}
263
264fn emit_row<W: Write>(
265 w: &mut W,
266 chrom: &str,
267 start: i64,
268 end: i64,
269 active_count: usize,
270 depth: &[u32],
271 names: &[String],
272) -> Result<()> {
273 let mut list = String::new();
275 let mut first = true;
276 for (i, &d) in depth.iter().enumerate() {
277 if d > 0 {
278 if !first {
279 list.push(',');
280 }
281 list.push_str(&names[i]);
282 first = false;
283 }
284 }
285 if list.is_empty() {
286 list.push_str("none");
287 }
288
289 write!(w, "{chrom}\t{start}\t{end}\t{active_count}\t{list}").map_err(RsomicsError::Io)?;
290 for &d in depth {
291 write!(w, "\t{}", u32::from(d > 0)).map_err(RsomicsError::Io)?;
292 }
293 writeln!(w).map_err(RsomicsError::Io)
294}
295
296#[cfg(test)]
299mod tests {
300 use std::io::Cursor;
301
302 use super::*;
303
304 fn run(inputs: &[&str], names: &[&str]) -> String {
305 let readers: Vec<Cursor<&str>> = inputs.iter().map(|s| Cursor::new(*s)).collect();
306 let name_strings: Vec<String> = names.iter().copied().map(str::to_string).collect();
307 let mut out = Vec::new();
308 multiinter(readers, &name_strings, false, &mut out).unwrap();
309 String::from_utf8(out).unwrap()
310 }
311
312 #[test]
313 fn no_overlap_two_files() {
314 let out = run(&["chr1\t100\t200\n", "chr1\t300\t400\n"], &["f1", "f2"]);
315 let lines: Vec<&str> = out.lines().collect();
316 assert_eq!(lines.len(), 2, "lines: {out:?}");
318 assert!(lines[0].contains("\t1\tf1\t1\t0"), "row0: {}", lines[0]);
319 assert!(lines[1].contains("\t1\tf2\t0\t1"), "row1: {}", lines[1]);
320 }
321
322 #[test]
323 fn full_overlap_two_files() {
324 let out = run(&["chr1\t100\t300\n", "chr1\t100\t300\n"], &["f1", "f2"]);
325 let lines: Vec<&str> = out.lines().collect();
326 assert_eq!(lines.len(), 1, "lines: {out:?}");
327 assert!(lines[0].contains("\t2\tf1,f2\t1\t1"), "row: {}", lines[0]);
328 }
329
330 #[test]
331 fn partial_overlap_three_files() {
332 let out = run(
337 &["chr1\t100\t200\n", "chr1\t150\t250\n", "chr1\t50\t120\n"],
338 &["f1", "f2", "f3"],
339 );
340 let lines: Vec<&str> = out.lines().collect();
341 assert_eq!(lines.len(), 5, "lines: {out:?}");
342 assert!(lines[0].starts_with("chr1\t50\t100\t1\tf3"), "{}", lines[0]);
344 assert!(
346 lines[1].starts_with("chr1\t100\t120\t2\tf1,f3"),
347 "{}",
348 lines[1]
349 );
350 assert!(
352 lines[2].starts_with("chr1\t120\t150\t1\tf1"),
353 "{}",
354 lines[2]
355 );
356 assert!(
358 lines[3].starts_with("chr1\t150\t200\t2\tf1,f2"),
359 "{}",
360 lines[3]
361 );
362 assert!(
364 lines[4].starts_with("chr1\t200\t250\t1\tf2"),
365 "{}",
366 lines[4]
367 );
368 }
369
370 #[test]
371 fn header_printed_when_requested() {
372 let readers: Vec<Cursor<&str>> = vec![Cursor::new("chr1\t10\t20\n")];
373 let names = vec!["s1".to_string()];
374 let mut out = Vec::new();
375 multiinter(readers, &names, true, &mut out).unwrap();
376 let s = String::from_utf8(out).unwrap();
377 assert!(s.starts_with("chrom\tstart\tend\tnum\tlist\ts1"), "{s}");
378 }
379
380 #[test]
381 fn multi_chrom_ordering() {
382 let out = run(
383 &["chr1\t10\t20\nchr2\t10\t20\n", "chr2\t15\t25\n"],
384 &["f1", "f2"],
385 );
386 let lines: Vec<&str> = out.lines().collect();
387 assert_eq!(lines.len(), 4, "lines:\n{out}");
390 assert!(lines[0].starts_with("chr1"), "{}", lines[0]);
391 assert!(lines[1].starts_with("chr2\t10\t15"), "{}", lines[1]);
392 assert!(lines[2].starts_with("chr2\t15\t20"), "{}", lines[2]);
393 assert!(lines[3].starts_with("chr2\t20\t25"), "{}", lines[3]);
394 }
395
396 #[test]
397 fn empty_file_does_not_crash() {
398 let out = run(&["", "chr1\t100\t200\n"], &["f1", "f2"]);
399 let lines: Vec<&str> = out.lines().collect();
400 assert_eq!(lines.len(), 1);
401 assert!(lines[0].contains("\t1\tf2\t0\t1"), "{}", lines[0]);
402 }
403}