1use std::cmp::Ordering;
2use std::io::{self, Write};
3
4#[derive(Debug, Clone, Copy, PartialEq, Eq)]
6pub enum OrderCheck {
7 Default,
9 Strict,
11 None,
13}
14
15pub struct CommConfig {
17 pub suppress_col1: bool,
18 pub suppress_col2: bool,
19 pub suppress_col3: bool,
20 pub case_insensitive: bool,
21 pub order_check: OrderCheck,
22 pub output_delimiter: Option<Vec<u8>>,
23 pub total: bool,
24 pub zero_terminated: bool,
25}
26
27impl Default for CommConfig {
28 fn default() -> Self {
29 Self {
30 suppress_col1: false,
31 suppress_col2: false,
32 suppress_col3: false,
33 case_insensitive: false,
34 order_check: OrderCheck::Default,
35 output_delimiter: None,
36 total: false,
37 zero_terminated: false,
38 }
39 }
40}
41
42pub struct CommResult {
44 pub count1: usize,
45 pub count2: usize,
46 pub count3: usize,
47 pub had_order_error: bool,
48}
49
50#[inline]
52fn compare_lines(a: &[u8], b: &[u8], case_insensitive: bool) -> Ordering {
53 if case_insensitive {
54 for (&ca, &cb) in a.iter().zip(b.iter()) {
55 match ca.to_ascii_lowercase().cmp(&cb.to_ascii_lowercase()) {
56 Ordering::Equal => continue,
57 other => return other,
58 }
59 }
60 a.len().cmp(&b.len())
61 } else {
62 a.cmp(b)
63 }
64}
65
66fn split_lines<'a>(data: &'a [u8], delim: u8) -> Vec<&'a [u8]> {
69 if data.is_empty() {
70 return Vec::new();
71 }
72 let count = memchr::memchr_iter(delim, data).count();
73 let has_trailing = data.last() == Some(&delim);
74 let cap = if has_trailing { count } else { count + 1 };
75 let mut lines = Vec::with_capacity(cap);
76 let mut start = 0;
77 for pos in memchr::memchr_iter(delim, data) {
78 lines.push(&data[start..pos]);
79 start = pos + 1;
80 }
81 if start < data.len() {
82 lines.push(&data[start..]);
83 }
84 lines
85}
86
87pub fn comm(
89 data1: &[u8],
90 data2: &[u8],
91 config: &CommConfig,
92 tool_name: &str,
93 out: &mut impl Write,
94) -> io::Result<CommResult> {
95 let delim = if config.zero_terminated { b'\0' } else { b'\n' };
96 let sep = config.output_delimiter.as_deref().unwrap_or(b"\t");
97
98 let prefix2: Vec<u8> = if !config.suppress_col1 {
102 sep.to_vec()
103 } else {
104 Vec::new()
105 };
106 let mut prefix3: Vec<u8> = Vec::new();
107 if !config.suppress_col1 {
108 prefix3.extend_from_slice(sep);
109 }
110 if !config.suppress_col2 {
111 prefix3.extend_from_slice(sep);
112 }
113
114 let lines1 = split_lines(data1, delim);
115 let lines2 = split_lines(data2, delim);
116
117 let mut i1 = 0usize;
118 let mut i2 = 0usize;
119 let mut count1 = 0usize;
120 let mut count2 = 0usize;
121 let mut count3 = 0usize;
122 let mut had_order_error = false;
123 let mut warned1 = false;
124 let mut warned2 = false;
125 let ci = config.case_insensitive;
126
127 let mut buf = Vec::with_capacity(data1.len() + data2.len());
128
129 macro_rules! check_order {
131 ($warned:ident, $lines:ident, $idx:ident, $file_num:expr) => {
132 if config.order_check != OrderCheck::None
133 && !$warned
134 && $idx > 0
135 && compare_lines($lines[$idx], $lines[$idx - 1], ci) == Ordering::Less
136 {
137 had_order_error = true;
138 $warned = true;
139 eprintln!("{}: file {} is not in sorted order", tool_name, $file_num);
140 if config.order_check == OrderCheck::Strict {
141 out.write_all(&buf)?;
142 return Ok(CommResult {
143 count1,
144 count2,
145 count3,
146 had_order_error,
147 });
148 }
149 }
150 };
151 }
152
153 while i1 < lines1.len() && i2 < lines2.len() {
154 match compare_lines(lines1[i1], lines2[i2], ci) {
155 Ordering::Less => {
156 check_order!(warned1, lines1, i1, 1);
158 if !config.suppress_col1 {
159 buf.extend_from_slice(lines1[i1]);
160 buf.push(delim);
161 }
162 count1 += 1;
163 i1 += 1;
164 }
165 Ordering::Greater => {
166 check_order!(warned2, lines2, i2, 2);
168 if !config.suppress_col2 {
169 buf.extend_from_slice(&prefix2);
170 buf.extend_from_slice(lines2[i2]);
171 buf.push(delim);
172 }
173 count2 += 1;
174 i2 += 1;
175 }
176 Ordering::Equal => {
177 if !config.suppress_col3 {
179 buf.extend_from_slice(&prefix3);
180 buf.extend_from_slice(lines1[i1]);
181 buf.push(delim);
182 }
183 count3 += 1;
184 i1 += 1;
185 i2 += 1;
186 }
187 }
188 }
189
190 while i1 < lines1.len() {
192 if config.order_check != OrderCheck::None
193 && !warned1
194 && i1 > 0
195 && compare_lines(lines1[i1], lines1[i1 - 1], ci) == Ordering::Less
196 {
197 had_order_error = true;
198 warned1 = true;
199 eprintln!("{}: file 1 is not in sorted order", tool_name);
200 if config.order_check == OrderCheck::Strict {
201 out.write_all(&buf)?;
202 return Ok(CommResult {
203 count1,
204 count2,
205 count3,
206 had_order_error,
207 });
208 }
209 }
210 if !config.suppress_col1 {
211 buf.extend_from_slice(lines1[i1]);
212 buf.push(delim);
213 }
214 count1 += 1;
215 i1 += 1;
216 }
217
218 while i2 < lines2.len() {
220 if config.order_check != OrderCheck::None
221 && !warned2
222 && i2 > 0
223 && compare_lines(lines2[i2], lines2[i2 - 1], ci) == Ordering::Less
224 {
225 had_order_error = true;
226 warned2 = true;
227 eprintln!("{}: file 2 is not in sorted order", tool_name);
228 if config.order_check == OrderCheck::Strict {
229 out.write_all(&buf)?;
230 return Ok(CommResult {
231 count1,
232 count2,
233 count3,
234 had_order_error,
235 });
236 }
237 }
238 if !config.suppress_col2 {
239 buf.extend_from_slice(&prefix2);
240 buf.extend_from_slice(lines2[i2]);
241 buf.push(delim);
242 }
243 count2 += 1;
244 i2 += 1;
245 }
246
247 if config.total {
249 write!(&mut buf, "{}", count1)?;
250 buf.extend_from_slice(sep);
251 write!(&mut buf, "{}", count2)?;
252 buf.extend_from_slice(sep);
253 write!(&mut buf, "{}", count3)?;
254 buf.extend_from_slice(sep);
255 buf.extend_from_slice(b"total");
256 buf.push(delim);
257 }
258
259 if had_order_error && config.order_check == OrderCheck::Default {
261 eprintln!("{}: input is not in sorted order", tool_name);
262 }
263
264 out.write_all(&buf)?;
265 Ok(CommResult {
266 count1,
267 count2,
268 count3,
269 had_order_error,
270 })
271}