1use memchr::memchr_iter;
2use std::io::{self, BufRead, Write};
3
4pub struct CutConfig<'a> {
6 pub mode: CutMode,
7 pub ranges: &'a [Range],
8 pub complement: bool,
9 pub delim: u8,
10 pub output_delim: &'a [u8],
11 pub suppress_no_delim: bool,
12 pub line_delim: u8,
13}
14
15#[derive(Debug, Clone)]
17pub struct Range {
18 pub start: usize, pub end: usize, }
21
22pub fn parse_ranges(spec: &str) -> Result<Vec<Range>, String> {
25 let mut ranges = Vec::new();
26
27 for part in spec.split(',') {
28 let part = part.trim();
29 if part.is_empty() {
30 continue;
31 }
32
33 if let Some(idx) = part.find('-') {
34 let left = &part[..idx];
35 let right = &part[idx + 1..];
36
37 let start = if left.is_empty() {
38 1
39 } else {
40 left.parse::<usize>()
41 .map_err(|_| format!("invalid range: '{}'", part))?
42 };
43
44 let end = if right.is_empty() {
45 usize::MAX
46 } else {
47 right
48 .parse::<usize>()
49 .map_err(|_| format!("invalid range: '{}'", part))?
50 };
51
52 if start == 0 {
53 return Err("fields and positions are numbered from 1".to_string());
54 }
55 if start > end {
56 return Err(format!("invalid decreasing range: '{}'", part));
57 }
58
59 ranges.push(Range { start, end });
60 } else {
61 let n = part
62 .parse::<usize>()
63 .map_err(|_| format!("invalid field: '{}'", part))?;
64 if n == 0 {
65 return Err("fields and positions are numbered from 1".to_string());
66 }
67 ranges.push(Range { start: n, end: n });
68 }
69 }
70
71 if ranges.is_empty() {
72 return Err("you must specify a list of bytes, characters, or fields".to_string());
73 }
74
75 ranges.sort_by_key(|r| (r.start, r.end));
77 let mut merged = vec![ranges[0].clone()];
78 for r in &ranges[1..] {
79 let last = merged.last_mut().unwrap();
80 if r.start <= last.end.saturating_add(1) {
81 last.end = last.end.max(r.end);
82 } else {
83 merged.push(r.clone());
84 }
85 }
86
87 Ok(merged)
88}
89
90#[inline(always)]
92fn in_ranges(ranges: &[Range], pos: usize) -> bool {
93 for r in ranges {
94 if pos < r.start {
95 return false; }
97 if pos <= r.end {
98 return true;
99 }
100 }
101 false
102}
103
104#[inline]
107pub fn cut_fields(
108 line: &[u8],
109 delim: u8,
110 ranges: &[Range],
111 complement: bool,
112 output_delim: &[u8],
113 suppress_no_delim: bool,
114 out: &mut impl Write,
115) -> io::Result<()> {
116 if memchr::memchr(delim, line).is_none() {
118 if !suppress_no_delim {
119 out.write_all(line)?;
120 }
121 return Ok(());
122 }
123
124 let mut field_num: usize = 1;
126 let mut field_start: usize = 0;
127 let mut first_output = true;
128
129 for delim_pos in memchr_iter(delim, line) {
130 let selected = in_ranges(ranges, field_num) != complement;
131 if selected {
132 if !first_output {
133 out.write_all(output_delim)?;
134 }
135 out.write_all(&line[field_start..delim_pos])?;
136 first_output = false;
137 }
138 field_start = delim_pos + 1;
139 field_num += 1;
140 }
141
142 let selected = in_ranges(ranges, field_num) != complement;
144 if selected {
145 if !first_output {
146 out.write_all(output_delim)?;
147 }
148 out.write_all(&line[field_start..])?;
149 }
150
151 Ok(())
152}
153
154#[inline]
156pub fn cut_bytes(
157 line: &[u8],
158 ranges: &[Range],
159 complement: bool,
160 output_delim: &[u8],
161 out: &mut impl Write,
162) -> io::Result<()> {
163 let mut first_range = true;
164
165 if complement {
166 let mut in_excluded = false;
168 for (i, &b) in line.iter().enumerate() {
169 let pos = i + 1;
170 if in_ranges(ranges, pos) {
171 if in_excluded {
172 first_range = false;
173 }
174 in_excluded = false;
175 } else {
176 if !in_excluded && !first_range && !output_delim.is_empty() {
177 out.write_all(output_delim)?;
178 }
179 out.write_all(&[b])?;
180 in_excluded = true;
181 }
182 }
183 } else {
184 for r in ranges {
186 let start = r.start.saturating_sub(1); let end = r.end.min(line.len()); if start >= line.len() {
189 break;
190 }
191 if !first_range && !output_delim.is_empty() {
192 out.write_all(output_delim)?;
193 }
194 out.write_all(&line[start..end])?;
195 first_range = false;
196 }
197 }
198 Ok(())
199}
200
201pub fn process_cut_data(data: &[u8], cfg: &CutConfig, out: &mut impl Write) -> io::Result<()> {
204 let mut start = 0;
205
206 for end_pos in memchr_iter(cfg.line_delim, data) {
207 let line = &data[start..end_pos];
208 process_one_line(line, cfg, out)?;
209 out.write_all(&[cfg.line_delim])?;
210 start = end_pos + 1;
211 }
212
213 if start < data.len() {
215 let line = &data[start..];
216 process_one_line(line, cfg, out)?;
217 out.write_all(b"\n")?;
218 }
219
220 Ok(())
221}
222
223pub fn process_cut_reader<R: BufRead>(
225 mut reader: R,
226 cfg: &CutConfig,
227 out: &mut impl Write,
228) -> io::Result<()> {
229 let mut buf = Vec::new();
230
231 loop {
232 buf.clear();
233 let n = reader.read_until(cfg.line_delim, &mut buf)?;
234 if n == 0 {
235 break;
236 }
237
238 let has_delim = buf.last() == Some(&cfg.line_delim);
239 let line = if has_delim {
240 &buf[..buf.len() - 1]
241 } else {
242 &buf[..]
243 };
244
245 process_one_line(line, cfg, out)?;
246
247 if has_delim {
248 out.write_all(&[cfg.line_delim])?;
249 } else if !line.is_empty() {
250 out.write_all(b"\n")?;
251 }
252 }
253
254 Ok(())
255}
256
257#[inline]
258fn process_one_line(line: &[u8], cfg: &CutConfig, out: &mut impl Write) -> io::Result<()> {
259 match cfg.mode {
260 CutMode::Fields => cut_fields(
261 line,
262 cfg.delim,
263 cfg.ranges,
264 cfg.complement,
265 cfg.output_delim,
266 cfg.suppress_no_delim,
267 out,
268 ),
269 CutMode::Bytes | CutMode::Characters => {
270 cut_bytes(line, cfg.ranges, cfg.complement, cfg.output_delim, out)
271 }
272 }
273}
274
275#[derive(Debug, Clone, Copy, PartialEq)]
277pub enum CutMode {
278 Bytes,
279 Characters,
280 Fields,
281}