1#[derive(Debug, Clone, Default)]
8pub struct KeyOpts {
9 pub numeric: bool,
10 pub general_numeric: bool,
11 pub human_numeric: bool,
12 pub month: bool,
13 pub version: bool,
14 pub random: bool,
15 pub reverse: bool,
16 pub ignore_leading_blanks: bool,
17 pub dictionary_order: bool,
18 pub ignore_case: bool,
19 pub ignore_nonprinting: bool,
20}
21
22impl KeyOpts {
23 pub fn has_sort_type(&self) -> bool {
25 self.numeric
26 || self.general_numeric
27 || self.human_numeric
28 || self.month
29 || self.version
30 || self.random
31 }
32
33 pub fn has_any_option(&self) -> bool {
35 self.has_sort_type()
36 || self.ignore_case
37 || self.dictionary_order
38 || self.ignore_nonprinting
39 || self.ignore_leading_blanks
40 || self.reverse
41 }
42
43 pub fn parse_flags(&mut self, flags: &str) {
45 for c in flags.chars() {
46 match c {
47 'b' => self.ignore_leading_blanks = true,
48 'd' => self.dictionary_order = true,
49 'f' => self.ignore_case = true,
50 'g' => self.general_numeric = true,
51 'h' => self.human_numeric = true,
52 'i' => self.ignore_nonprinting = true,
53 'M' => self.month = true,
54 'n' => self.numeric = true,
55 'R' => self.random = true,
56 'r' => self.reverse = true,
57 'V' => self.version = true,
58 _ => {}
59 }
60 }
61 }
62
63 pub fn validate(&self) -> Result<(), String> {
75 let mut active: Vec<char> = Vec::new();
78 if self.dictionary_order {
79 active.push('d');
80 }
81 if self.general_numeric {
82 active.push('g');
83 }
84 if self.human_numeric {
85 active.push('h');
86 }
87 if self.ignore_nonprinting {
88 active.push('i');
89 }
90 if self.month {
91 active.push('M');
92 }
93 if self.numeric {
94 active.push('n');
95 }
96 if self.random {
97 active.push('R');
98 }
99 if self.version {
100 active.push('V');
101 }
102
103 let is_numeric_type = |c: char| matches!(c, 'g' | 'h' | 'M' | 'n');
108 let incompatible_with_numeric = |c: char| matches!(c, 'd' | 'i' | 'R' | 'V');
109
110 for i in 0..active.len() {
112 for j in (i + 1)..active.len() {
113 let a = active[i];
114 let b = active[j];
115 let conflict = (is_numeric_type(a) && is_numeric_type(b))
116 || (is_numeric_type(a) && incompatible_with_numeric(b))
117 || (incompatible_with_numeric(a) && is_numeric_type(b));
118 if conflict {
119 return Err(format!("options '-{}{}' are incompatible", a, b));
120 }
121 }
122 }
123
124 Ok(())
125 }
126}
127
128#[derive(Debug, Clone)]
130pub struct KeyDef {
131 pub start_field: usize,
132 pub start_char: usize,
133 pub end_field: usize,
134 pub end_char: usize,
135 pub opts: KeyOpts,
136}
137
138impl KeyDef {
139 pub fn parse(spec: &str) -> Result<KeyDef, String> {
141 let parts: Vec<&str> = spec.splitn(2, ',').collect();
142
143 let (start_field, start_char, start_opts) = parse_field_spec(parts[0])?;
144
145 let (end_field, end_char, end_opts) = if parts.len() > 1 {
146 parse_field_spec(parts[1])?
147 } else {
148 (0, 0, String::new())
149 };
150
151 let mut opts = KeyOpts::default();
152 opts.parse_flags(&start_opts);
153 opts.parse_flags(&end_opts);
154
155 if start_field == 0 {
156 return Err("field number is zero: invalid field specification".to_string());
157 }
158
159 if start_char == 0 && parts[0].contains('.') {
162 return Err(format!(
163 "character offset is zero: invalid field specification '{}'",
164 spec
165 ));
166 }
167
168 opts.validate()?;
170
171 Ok(KeyDef {
172 start_field,
173 start_char,
174 end_field,
175 end_char,
176 opts,
177 })
178 }
179}
180
181fn parse_field_spec(s: &str) -> Result<(usize, usize, String), String> {
183 let mut field_str = String::new();
184 let mut char_str = String::new();
185 let mut opts = String::new();
186 let mut in_char = false;
187
188 for c in s.chars() {
189 if c == '.' && !in_char && opts.is_empty() {
190 in_char = true;
191 } else if c.is_ascii_digit() && opts.is_empty() {
192 if in_char {
193 char_str.push(c);
194 } else {
195 field_str.push(c);
196 }
197 } else if c.is_ascii_alphabetic() {
198 opts.push(c);
199 } else {
200 return Err(format!("invalid character '{}' in key spec", c));
201 }
202 }
203
204 let field = if field_str.is_empty() {
205 0
206 } else {
207 field_str
208 .parse::<usize>()
209 .map_err(|_| "invalid field number".to_string())?
210 };
211
212 let char_pos = if char_str.is_empty() {
213 0
214 } else {
215 char_str
216 .parse::<usize>()
217 .map_err(|_| "invalid character position".to_string())?
218 };
219
220 Ok((field, char_pos, opts))
221}
222
223#[inline]
229fn find_nth_field(line: &[u8], n: usize, separator: Option<u8>) -> (usize, usize) {
230 match separator {
231 Some(sep) => {
232 if n < 4 {
236 find_nth_field_memchr(line, n, sep)
237 } else {
238 find_nth_field_iter(line, n, sep)
239 }
240 }
241 None => {
242 let mut field = 0;
243 let mut i = 0;
244 let len = line.len();
245
246 while i < len {
247 let field_start = i;
248 while i < len && is_blank(line[i]) {
250 i += 1;
251 }
252 while i < len && !is_blank(line[i]) {
254 i += 1;
255 }
256 if field == n {
257 return (field_start, i);
258 }
259 field += 1;
260 }
261
262 (line.len(), line.len())
263 }
264 }
265}
266
267#[inline(always)]
270fn find_nth_field_memchr(line: &[u8], n: usize, sep: u8) -> (usize, usize) {
271 let mut start = 0;
272 for _ in 0..n {
274 match memchr::memchr(sep, &line[start..]) {
275 Some(pos) => start = start + pos + 1,
276 None => return (line.len(), line.len()),
277 }
278 }
279 match memchr::memchr(sep, &line[start..]) {
281 Some(pos) => (start, start + pos),
282 None => (start, line.len()),
283 }
284}
285
286#[inline]
288fn find_nth_field_iter(line: &[u8], n: usize, sep: u8) -> (usize, usize) {
289 let mut field = 0;
290 let mut start = 0;
291 for pos in memchr::memchr_iter(sep, line) {
292 if field == n {
293 return (start, pos);
294 }
295 field += 1;
296 start = pos + 1;
297 }
298 if field == n {
299 (start, line.len())
300 } else {
301 (line.len(), line.len())
302 }
303}
304
305#[inline]
306fn is_blank(b: u8) -> bool {
307 b == b' ' || b == b'\t'
308}
309
310#[inline]
312fn is_blank_z(b: u8) -> bool {
313 b == b' ' || b == b'\t' || b == b'\n'
314}
315
316#[inline]
318fn skip_blanks_from_fn(line: &[u8], from: usize, end: usize, blank_fn: fn(u8) -> bool) -> usize {
319 let mut i = from;
320 while i < end && blank_fn(line[i]) {
321 i += 1;
322 }
323 i
324}
325
326#[inline]
328fn find_nth_field_z(
329 line: &[u8],
330 n: usize,
331 separator: Option<u8>,
332 zero_terminated: bool,
333) -> (usize, usize) {
334 if !zero_terminated || separator.is_some() {
335 return find_nth_field(line, n, separator);
336 }
337 let mut field = 0;
339 let mut i = 0;
340 let len = line.len();
341
342 while i < len {
343 let field_start = i;
344 while i < len && is_blank_z(line[i]) {
345 i += 1;
346 }
347 while i < len && !is_blank_z(line[i]) {
348 i += 1;
349 }
350 if field == n {
351 return (field_start, i);
352 }
353 field += 1;
354 }
355 (line.len(), line.len())
356}
357
358pub fn extract_key<'a>(
366 line: &'a [u8],
367 key: &KeyDef,
368 separator: Option<u8>,
369 ignore_leading_blanks: bool,
370) -> &'a [u8] {
371 extract_key_z(line, key, separator, ignore_leading_blanks, false)
372}
373
374pub fn extract_key_z<'a>(
378 line: &'a [u8],
379 key: &KeyDef,
380 separator: Option<u8>,
381 ignore_leading_blanks: bool,
382 zero_terminated: bool,
383) -> &'a [u8] {
384 let sf = key.start_field.saturating_sub(1);
385 let (sf_start, sf_end) = find_nth_field_z(line, sf, separator, zero_terminated);
386
387 if sf_start >= line.len() {
388 return b"";
389 }
390
391 let blank_fn: fn(u8) -> bool = if zero_terminated && separator.is_none() {
392 is_blank_z
393 } else {
394 is_blank
395 };
396
397 let start_byte = if key.start_char > 0 {
398 let effective_start = if ignore_leading_blanks {
399 skip_blanks_from_fn(line, sf_start, sf_end, blank_fn)
400 } else {
401 sf_start
402 };
403 let field_len = sf_end - effective_start;
404 let char_offset = (key.start_char - 1).min(field_len);
405 effective_start + char_offset
406 } else {
407 sf_start
408 };
409
410 let end_byte = if key.end_field > 0 {
411 let ef = key.end_field.saturating_sub(1);
412 let (ef_start, ef_end) = find_nth_field_z(line, ef, separator, zero_terminated);
413 if key.end_char > 0 {
414 let effective_start = if ignore_leading_blanks {
415 skip_blanks_from_fn(line, ef_start, ef_end, blank_fn)
416 } else {
417 ef_start
418 };
419 let field_len = ef_end - effective_start;
420 let char_offset = key.end_char.min(field_len);
421 effective_start + char_offset
422 } else {
423 ef_end
424 }
425 } else {
426 line.len()
427 };
428
429 if start_byte >= end_byte || start_byte >= line.len() {
430 return b"";
431 }
432
433 &line[start_byte..end_byte.min(line.len())]
434}