rsomics_vcf_reheader/
lib.rs1use std::collections::HashMap;
2use std::io::{self, BufRead, BufReader, BufWriter, Write};
3use std::path::Path;
4
5use rsomics_common::{Result, RsomicsError};
6
7pub fn reheader_replace(
10 input: &mut dyn io::Read,
11 header_file: &Path,
12 output: &mut dyn io::Write,
13) -> Result<u64> {
14 let new_header = std::fs::read_to_string(header_file).map_err(RsomicsError::Io)?;
15 let mut out = BufWriter::new(output);
16
17 for line in new_header.lines() {
19 out.write_all(line.as_bytes()).map_err(RsomicsError::Io)?;
20 out.write_all(b"\n").map_err(RsomicsError::Io)?;
21 }
22
23 let mut reader = BufReader::new(input);
25 let mut line = String::new();
26 let mut records: u64 = 0;
27 loop {
28 line.clear();
29 let n = reader.read_line(&mut line).map_err(RsomicsError::Io)?;
30 if n == 0 {
31 break;
32 }
33 let trimmed = line.trim_end_matches(['\n', '\r']);
34 if trimmed.is_empty() {
35 continue;
36 }
37 if trimmed.starts_with('#') {
38 continue;
40 }
41 out.write_all(trimmed.as_bytes())
42 .map_err(RsomicsError::Io)?;
43 out.write_all(b"\n").map_err(RsomicsError::Io)?;
44 records += 1;
45 }
46
47 out.flush().map_err(RsomicsError::Io)?;
48 Ok(records)
49}
50
51fn parse_samples_file(path: &Path) -> Result<(Vec<String>, HashMap<String, String>)> {
60 let content = std::fs::read_to_string(path).map_err(RsomicsError::Io)?;
61 let mut positional: Vec<String> = Vec::new();
62 let mut map: HashMap<String, String> = HashMap::new();
63 let mut map_mode = false;
64
65 for raw in content.lines() {
66 let line = raw.trim();
67 if line.is_empty() {
68 continue;
69 }
70 let mut parts = line.splitn(2, |c: char| c.is_whitespace());
71 let first = parts.next().unwrap_or("").trim();
72 let second = parts.next().map(|s| s.trim());
73 if let Some(new_name) = second
74 && !new_name.is_empty()
75 {
76 map_mode = true;
77 map.insert(first.to_owned(), new_name.to_owned());
78 continue;
79 }
80 positional.push(first.to_owned());
81 }
82
83 if map_mode {
84 Ok((Vec::new(), map))
85 } else {
86 Ok((positional, HashMap::new()))
87 }
88}
89
90pub fn reheader_samples(
93 input: &mut dyn io::Read,
94 samples_file: &Path,
95 output: &mut dyn io::Write,
96) -> Result<u64> {
97 let (positional, map) = parse_samples_file(samples_file)?;
98 let mut out = BufWriter::new(output);
99 let mut reader = BufReader::new(input);
100 let mut line = String::new();
101 let mut records: u64 = 0;
102
103 loop {
104 line.clear();
105 let n = reader.read_line(&mut line).map_err(RsomicsError::Io)?;
106 if n == 0 {
107 break;
108 }
109 let trimmed = line.trim_end_matches(['\n', '\r']);
110 if trimmed.is_empty() {
111 continue;
112 }
113
114 if trimmed.starts_with('#') {
115 if trimmed.starts_with("#CHROM") {
116 let cols: Vec<&str> = trimmed.split('\t').collect();
118 let fixed_cols = if cols.len() > 9 {
119 let fixed: Vec<String> = cols[9..]
120 .iter()
121 .enumerate()
122 .map(|(i, old)| {
123 if !map.is_empty() {
124 map.get(*old).map(String::as_str).unwrap_or(old).to_owned()
125 } else if i < positional.len() {
126 positional[i].clone()
127 } else {
128 old.to_string()
129 }
130 })
131 .collect();
132 [
133 &cols[..9],
134 fixed
135 .iter()
136 .map(String::as_str)
137 .collect::<Vec<_>>()
138 .as_slice(),
139 ]
140 .concat()
141 .join("\t")
142 } else {
143 trimmed.to_owned()
144 };
145 out.write_all(fixed_cols.as_bytes())
146 .map_err(RsomicsError::Io)?;
147 } else {
148 out.write_all(trimmed.as_bytes())
149 .map_err(RsomicsError::Io)?;
150 }
151 out.write_all(b"\n").map_err(RsomicsError::Io)?;
152 } else {
153 out.write_all(trimmed.as_bytes())
154 .map_err(RsomicsError::Io)?;
155 out.write_all(b"\n").map_err(RsomicsError::Io)?;
156 records += 1;
157 }
158 }
159
160 out.flush().map_err(RsomicsError::Io)?;
161 Ok(records)
162}
163
164pub fn passthrough(input: &mut dyn io::Read, output: &mut dyn io::Write) -> Result<u64> {
166 let mut out = BufWriter::new(output);
167 let mut reader = BufReader::new(input);
168 let mut line = String::new();
169 let mut records: u64 = 0;
170 loop {
171 line.clear();
172 let n = reader.read_line(&mut line).map_err(RsomicsError::Io)?;
173 if n == 0 {
174 break;
175 }
176 let trimmed = line.trim_end_matches(['\n', '\r']);
177 if trimmed.is_empty() {
178 continue;
179 }
180 out.write_all(trimmed.as_bytes())
181 .map_err(RsomicsError::Io)?;
182 out.write_all(b"\n").map_err(RsomicsError::Io)?;
183 if !trimmed.starts_with('#') {
184 records += 1;
185 }
186 }
187 out.flush().map_err(RsomicsError::Io)?;
188 Ok(records)
189}