1#[derive(Debug, Clone, Default)]
8pub struct KeyOpts {
9 pub numeric: bool,
10 pub general_numeric: bool,
11 pub human_numeric: bool,
12 pub month: bool,
13 pub version: bool,
14 pub random: bool,
15 pub reverse: bool,
16 pub ignore_leading_blanks: bool,
17 pub dictionary_order: bool,
18 pub ignore_case: bool,
19 pub ignore_nonprinting: bool,
20}
21
22impl KeyOpts {
23 pub fn has_sort_type(&self) -> bool {
25 self.numeric
26 || self.general_numeric
27 || self.human_numeric
28 || self.month
29 || self.version
30 || self.random
31 }
32
33 pub fn has_any_option(&self) -> bool {
35 self.has_sort_type()
36 || self.ignore_case
37 || self.dictionary_order
38 || self.ignore_nonprinting
39 || self.ignore_leading_blanks
40 || self.reverse
41 }
42
43 pub fn parse_flags(&mut self, flags: &str) {
45 for c in flags.chars() {
46 match c {
47 'b' => self.ignore_leading_blanks = true,
48 'd' => self.dictionary_order = true,
49 'f' => self.ignore_case = true,
50 'g' => self.general_numeric = true,
51 'h' => self.human_numeric = true,
52 'i' => self.ignore_nonprinting = true,
53 'M' => self.month = true,
54 'n' => self.numeric = true,
55 'R' => self.random = true,
56 'r' => self.reverse = true,
57 'V' => self.version = true,
58 _ => {}
59 }
60 }
61 }
62}
63
64#[derive(Debug, Clone)]
66pub struct KeyDef {
67 pub start_field: usize,
68 pub start_char: usize,
69 pub end_field: usize,
70 pub end_char: usize,
71 pub opts: KeyOpts,
72}
73
74impl KeyDef {
75 pub fn parse(spec: &str) -> Result<KeyDef, String> {
77 let parts: Vec<&str> = spec.splitn(2, ',').collect();
78
79 let (start_field, start_char, start_opts) = parse_field_spec(parts[0])?;
80
81 let (end_field, end_char, end_opts) = if parts.len() > 1 {
82 parse_field_spec(parts[1])?
83 } else {
84 (0, 0, String::new())
85 };
86
87 let mut opts = KeyOpts::default();
88 opts.parse_flags(&start_opts);
89 opts.parse_flags(&end_opts);
90
91 if start_field == 0 {
92 return Err("field number is zero: invalid field specification".to_string());
93 }
94
95 Ok(KeyDef {
96 start_field,
97 start_char,
98 end_field,
99 end_char,
100 opts,
101 })
102 }
103}
104
105fn parse_field_spec(s: &str) -> Result<(usize, usize, String), String> {
107 let mut field_str = String::new();
108 let mut char_str = String::new();
109 let mut opts = String::new();
110 let mut in_char = false;
111
112 for c in s.chars() {
113 if c == '.' && !in_char && opts.is_empty() {
114 in_char = true;
115 } else if c.is_ascii_digit() && opts.is_empty() {
116 if in_char {
117 char_str.push(c);
118 } else {
119 field_str.push(c);
120 }
121 } else if c.is_ascii_alphabetic() {
122 opts.push(c);
123 } else {
124 return Err(format!("invalid character '{}' in key spec", c));
125 }
126 }
127
128 let field = if field_str.is_empty() {
129 0
130 } else {
131 field_str
132 .parse::<usize>()
133 .map_err(|_| "invalid field number".to_string())?
134 };
135
136 let char_pos = if char_str.is_empty() {
137 0
138 } else {
139 char_str
140 .parse::<usize>()
141 .map_err(|_| "invalid character position".to_string())?
142 };
143
144 Ok((field, char_pos, opts))
145}
146
147#[inline]
151fn find_nth_field(line: &[u8], n: usize, separator: Option<u8>) -> (usize, usize) {
152 match separator {
153 Some(sep) => {
154 let mut field = 0;
156 let mut start = 0;
157 for pos in memchr::memchr_iter(sep, line) {
158 if field == n {
159 return (start, pos);
160 }
161 field += 1;
162 start = pos + 1;
163 }
164 if field == n {
165 (start, line.len())
166 } else {
167 (line.len(), line.len())
168 }
169 }
170 None => {
171 let mut field = 0;
172 let mut i = 0;
173 let len = line.len();
174
175 while i < len {
176 let field_start = i;
177 while i < len && is_blank(line[i]) {
179 i += 1;
180 }
181 while i < len && !is_blank(line[i]) {
183 i += 1;
184 }
185 if field == n {
186 return (field_start, i);
187 }
188 field += 1;
189 }
190
191 (line.len(), line.len())
192 }
193 }
194}
195
196#[inline]
197fn is_blank(b: u8) -> bool {
198 b == b' ' || b == b'\t'
199}
200
201pub fn extract_key<'a>(line: &'a [u8], key: &KeyDef, separator: Option<u8>) -> &'a [u8] {
204 let sf = key.start_field.saturating_sub(1);
205 let (sf_start, sf_end) = find_nth_field(line, sf, separator);
206
207 if sf_start >= line.len() {
208 return b"";
209 }
210
211 let start_byte = if key.start_char > 0 {
212 let field_len = sf_end - sf_start;
213 let char_offset = (key.start_char - 1).min(field_len);
214 sf_start + char_offset
215 } else {
216 sf_start
217 };
218
219 let end_byte = if key.end_field > 0 {
220 let ef = key.end_field.saturating_sub(1);
221 let (ef_start, ef_end) = find_nth_field(line, ef, separator);
222 if key.end_char > 0 {
223 let field_len = ef_end - ef_start;
224 let char_offset = key.end_char.min(field_len);
225 ef_start + char_offset
226 } else {
227 ef_end
228 }
229 } else {
230 line.len()
231 };
232
233 if start_byte >= end_byte || start_byte >= line.len() {
234 return b"";
235 }
236
237 &line[start_byte..end_byte.min(line.len())]
238}