1use rsomics_common::{Result, RsomicsError};
2use std::collections::HashMap;
3use std::fs::File;
4use std::io::{BufRead, BufReader, BufWriter, Write};
5use std::path::Path;
6
7pub fn inner_join(left: &Path, right: &Path, key_col: &str, output: &mut dyn Write) -> Result<u64> {
8 let (rh, rdata) = load_tsv(right, key_col)?;
9 let src_file = File::open(left)
10 .map_err(|e| RsomicsError::InvalidInput(format!("{}: {e}", left.display())))?;
11 let buf_reader = BufReader::new(src_file);
12 let mut out = BufWriter::new(output);
13 let mut lines = buf_reader.lines();
14
15 let hdr_line = lines
16 .next()
17 .ok_or_else(|| RsomicsError::InvalidInput("empty left".into()))?
18 .map_err(RsomicsError::Io)?;
19 let lcols: Vec<&str> = hdr_line.split('\t').collect();
20 let lki = lcols
21 .iter()
22 .position(|c| *c == key_col)
23 .ok_or_else(|| RsomicsError::InvalidInput(format!("'{key_col}' not in left")))?;
24
25 write!(out, "{hdr_line}").map_err(RsomicsError::Io)?;
26 for h in &rh {
27 if *h != key_col {
28 write!(out, "\t{h}").map_err(RsomicsError::Io)?;
29 }
30 }
31 writeln!(out).map_err(RsomicsError::Io)?;
32
33 let mut count = 0u64;
34 for line in lines {
35 let line = line.map_err(RsomicsError::Io)?;
36 let fields: Vec<&str> = line.split('\t').collect();
37 let key = fields.get(lki).copied().unwrap_or("");
38 if let Some(rrow) = rdata.get(key) {
39 write!(out, "{line}").map_err(RsomicsError::Io)?;
40 for (i, val) in rrow.iter().enumerate() {
41 if rh[i] != key_col {
42 write!(out, "\t{val}").map_err(RsomicsError::Io)?;
43 }
44 }
45 writeln!(out).map_err(RsomicsError::Io)?;
46 count += 1;
47 }
48 }
49 out.flush().map_err(RsomicsError::Io)?;
50 Ok(count)
51}
52
53type RowMap = HashMap<String, Vec<String>>;
54
55fn load_tsv(path: &Path, key_col_name: &str) -> Result<(Vec<String>, RowMap)> {
56 let file = File::open(path)
57 .map_err(|e| RsomicsError::InvalidInput(format!("{}: {e}", path.display())))?;
58 let reader = BufReader::new(file);
59 let mut lines = reader.lines();
60
61 let header = lines
62 .next()
63 .ok_or_else(|| RsomicsError::InvalidInput("empty".into()))?
64 .map_err(RsomicsError::Io)?;
65 let cols: Vec<String> = header.split('\t').map(String::from).collect();
66 let ki = cols
67 .iter()
68 .position(|c| c == key_col_name)
69 .ok_or_else(|| RsomicsError::InvalidInput(format!("'{key_col_name}' not in right")))?;
70
71 let mut data = HashMap::new();
72 for line in lines {
73 let line = line.map_err(RsomicsError::Io)?;
74 let fields: Vec<String> = line.split('\t').map(String::from).collect();
75 let key = fields.get(ki).cloned().unwrap_or_default();
76 data.insert(key, fields);
77 }
78 Ok((cols, data))
79}