1#[derive(Debug, Clone, Default)]
8pub struct KeyOpts {
9 pub numeric: bool,
10 pub general_numeric: bool,
11 pub human_numeric: bool,
12 pub month: bool,
13 pub version: bool,
14 pub random: bool,
15 pub reverse: bool,
16 pub ignore_leading_blanks: bool,
17 pub dictionary_order: bool,
18 pub ignore_case: bool,
19 pub ignore_nonprinting: bool,
20}
21
22impl KeyOpts {
23 pub fn has_sort_type(&self) -> bool {
25 self.numeric
26 || self.general_numeric
27 || self.human_numeric
28 || self.month
29 || self.version
30 || self.random
31 }
32
33 pub fn has_any_option(&self) -> bool {
35 self.has_sort_type()
36 || self.ignore_case
37 || self.dictionary_order
38 || self.ignore_nonprinting
39 || self.ignore_leading_blanks
40 || self.reverse
41 }
42
43 pub fn parse_flags(&mut self, flags: &str) {
45 for c in flags.chars() {
46 match c {
47 'b' => self.ignore_leading_blanks = true,
48 'd' => self.dictionary_order = true,
49 'f' => self.ignore_case = true,
50 'g' => self.general_numeric = true,
51 'h' => self.human_numeric = true,
52 'i' => self.ignore_nonprinting = true,
53 'M' => self.month = true,
54 'n' => self.numeric = true,
55 'R' => self.random = true,
56 'r' => self.reverse = true,
57 'V' => self.version = true,
58 _ => {}
59 }
60 }
61 }
62}
63
64#[derive(Debug, Clone)]
66pub struct KeyDef {
67 pub start_field: usize,
68 pub start_char: usize,
69 pub end_field: usize,
70 pub end_char: usize,
71 pub opts: KeyOpts,
72}
73
74impl KeyDef {
75 pub fn parse(spec: &str) -> Result<KeyDef, String> {
77 let parts: Vec<&str> = spec.splitn(2, ',').collect();
78
79 let (start_field, start_char, start_opts) = parse_field_spec(parts[0])?;
80
81 let (end_field, end_char, end_opts) = if parts.len() > 1 {
82 parse_field_spec(parts[1])?
83 } else {
84 (0, 0, String::new())
85 };
86
87 let mut opts = KeyOpts::default();
88 opts.parse_flags(&start_opts);
89 opts.parse_flags(&end_opts);
90
91 if start_field == 0 {
92 return Err("field number is zero: invalid field specification".to_string());
93 }
94
95 Ok(KeyDef {
96 start_field,
97 start_char,
98 end_field,
99 end_char,
100 opts,
101 })
102 }
103}
104
105fn parse_field_spec(s: &str) -> Result<(usize, usize, String), String> {
107 let mut field_str = String::new();
108 let mut char_str = String::new();
109 let mut opts = String::new();
110 let mut in_char = false;
111
112 for c in s.chars() {
113 if c == '.' && !in_char && opts.is_empty() {
114 in_char = true;
115 } else if c.is_ascii_digit() && opts.is_empty() {
116 if in_char {
117 char_str.push(c);
118 } else {
119 field_str.push(c);
120 }
121 } else if c.is_ascii_alphabetic() {
122 opts.push(c);
123 } else {
124 return Err(format!("invalid character '{}' in key spec", c));
125 }
126 }
127
128 let field = if field_str.is_empty() {
129 0
130 } else {
131 field_str
132 .parse::<usize>()
133 .map_err(|_| "invalid field number".to_string())?
134 };
135
136 let char_pos = if char_str.is_empty() {
137 0
138 } else {
139 char_str
140 .parse::<usize>()
141 .map_err(|_| "invalid character position".to_string())?
142 };
143
144 Ok((field, char_pos, opts))
145}
146
147#[inline]
153fn find_nth_field(line: &[u8], n: usize, separator: Option<u8>) -> (usize, usize) {
154 match separator {
155 Some(sep) => {
156 if n < 4 {
160 find_nth_field_memchr(line, n, sep)
161 } else {
162 find_nth_field_iter(line, n, sep)
163 }
164 }
165 None => {
166 let mut field = 0;
167 let mut i = 0;
168 let len = line.len();
169
170 while i < len {
171 let field_start = i;
172 while i < len && is_blank(line[i]) {
174 i += 1;
175 }
176 while i < len && !is_blank(line[i]) {
178 i += 1;
179 }
180 if field == n {
181 return (field_start, i);
182 }
183 field += 1;
184 }
185
186 (line.len(), line.len())
187 }
188 }
189}
190
191#[inline(always)]
194fn find_nth_field_memchr(line: &[u8], n: usize, sep: u8) -> (usize, usize) {
195 let mut start = 0;
196 for _ in 0..n {
198 match memchr::memchr(sep, &line[start..]) {
199 Some(pos) => start = start + pos + 1,
200 None => return (line.len(), line.len()),
201 }
202 }
203 match memchr::memchr(sep, &line[start..]) {
205 Some(pos) => (start, start + pos),
206 None => (start, line.len()),
207 }
208}
209
210#[inline]
212fn find_nth_field_iter(line: &[u8], n: usize, sep: u8) -> (usize, usize) {
213 let mut field = 0;
214 let mut start = 0;
215 for pos in memchr::memchr_iter(sep, line) {
216 if field == n {
217 return (start, pos);
218 }
219 field += 1;
220 start = pos + 1;
221 }
222 if field == n {
223 (start, line.len())
224 } else {
225 (line.len(), line.len())
226 }
227}
228
229#[inline]
230fn is_blank(b: u8) -> bool {
231 b == b' ' || b == b'\t'
232}
233
234pub fn extract_key<'a>(line: &'a [u8], key: &KeyDef, separator: Option<u8>) -> &'a [u8] {
237 let sf = key.start_field.saturating_sub(1);
238 let (sf_start, sf_end) = find_nth_field(line, sf, separator);
239
240 if sf_start >= line.len() {
241 return b"";
242 }
243
244 let start_byte = if key.start_char > 0 {
245 let field_len = sf_end - sf_start;
246 let char_offset = (key.start_char - 1).min(field_len);
247 sf_start + char_offset
248 } else {
249 sf_start
250 };
251
252 let end_byte = if key.end_field > 0 {
253 let ef = key.end_field.saturating_sub(1);
254 let (ef_start, ef_end) = find_nth_field(line, ef, separator);
255 if key.end_char > 0 {
256 let field_len = ef_end - ef_start;
257 let char_offset = key.end_char.min(field_len);
258 ef_start + char_offset
259 } else {
260 ef_end
261 }
262 } else {
263 line.len()
264 };
265
266 if start_byte >= end_byte || start_byte >= line.len() {
267 return b"";
268 }
269
270 &line[start_byte..end_byte.min(line.len())]
271}