#[derive(Debug, Clone, Default)]
pub struct KeyOpts {
pub numeric: bool,
pub general_numeric: bool,
pub human_numeric: bool,
pub month: bool,
pub version: bool,
pub random: bool,
pub reverse: bool,
pub ignore_leading_blanks: bool,
pub dictionary_order: bool,
pub ignore_case: bool,
pub ignore_nonprinting: bool,
}
impl KeyOpts {
pub fn has_sort_type(&self) -> bool {
self.numeric
|| self.general_numeric
|| self.human_numeric
|| self.month
|| self.version
|| self.random
}
pub fn has_any_option(&self) -> bool {
self.has_sort_type()
|| self.ignore_case
|| self.dictionary_order
|| self.ignore_nonprinting
|| self.ignore_leading_blanks
|| self.reverse
}
pub fn parse_flags(&mut self, flags: &str) {
for c in flags.chars() {
match c {
'b' => self.ignore_leading_blanks = true,
'd' => self.dictionary_order = true,
'f' => self.ignore_case = true,
'g' => self.general_numeric = true,
'h' => self.human_numeric = true,
'i' => self.ignore_nonprinting = true,
'M' => self.month = true,
'n' => self.numeric = true,
'R' => self.random = true,
'r' => self.reverse = true,
'V' => self.version = true,
_ => {}
}
}
}
pub fn validate(&self) -> Result<(), String> {
let mut active: Vec<char> = Vec::new();
if self.dictionary_order {
active.push('d');
}
if self.general_numeric {
active.push('g');
}
if self.human_numeric {
active.push('h');
}
if self.ignore_nonprinting {
active.push('i');
}
if self.month {
active.push('M');
}
if self.numeric {
active.push('n');
}
if self.random {
active.push('R');
}
if self.version {
active.push('V');
}
let is_numeric_type = |c: char| matches!(c, 'g' | 'h' | 'M' | 'n');
let incompatible_with_numeric = |c: char| matches!(c, 'd' | 'i' | 'R' | 'V');
for i in 0..active.len() {
for j in (i + 1)..active.len() {
let a = active[i];
let b = active[j];
let conflict = (is_numeric_type(a) && is_numeric_type(b))
|| (is_numeric_type(a) && incompatible_with_numeric(b))
|| (incompatible_with_numeric(a) && is_numeric_type(b));
if conflict {
return Err(format!("options '-{}{}' are incompatible", a, b));
}
}
}
Ok(())
}
}
#[derive(Debug, Clone)]
pub struct KeyDef {
pub start_field: usize,
pub start_char: usize,
pub end_field: usize,
pub end_char: usize,
pub opts: KeyOpts,
}
impl KeyDef {
pub fn parse(spec: &str) -> Result<KeyDef, String> {
let parts: Vec<&str> = spec.splitn(2, ',').collect();
let (start_field, start_char, start_opts) = parse_field_spec(parts[0])?;
let (end_field, end_char, end_opts) = if parts.len() > 1 {
parse_field_spec(parts[1])?
} else {
(0, 0, String::new())
};
let mut opts = KeyOpts::default();
opts.parse_flags(&start_opts);
opts.parse_flags(&end_opts);
if start_field == 0 {
return Err("field number is zero: invalid field specification".to_string());
}
if start_char == 0 && parts[0].contains('.') {
return Err(format!(
"character offset is zero: invalid field specification '{}'",
spec
));
}
opts.validate()?;
Ok(KeyDef {
start_field,
start_char,
end_field,
end_char,
opts,
})
}
}
fn parse_field_spec(s: &str) -> Result<(usize, usize, String), String> {
let mut field_str = String::new();
let mut char_str = String::new();
let mut opts = String::new();
let mut in_char = false;
for c in s.chars() {
if c == '.' && !in_char && opts.is_empty() {
in_char = true;
} else if c.is_ascii_digit() && opts.is_empty() {
if in_char {
char_str.push(c);
} else {
field_str.push(c);
}
} else if c.is_ascii_alphabetic() {
opts.push(c);
} else {
return Err(format!("invalid character '{}' in key spec", c));
}
}
let field = if field_str.is_empty() {
0
} else {
field_str
.parse::<usize>()
.map_err(|_| "invalid field number".to_string())?
};
let char_pos = if char_str.is_empty() {
0
} else {
char_str
.parse::<usize>()
.map_err(|_| "invalid character position".to_string())?
};
Ok((field, char_pos, opts))
}
#[inline]
fn find_nth_field(line: &[u8], n: usize, separator: Option<u8>) -> (usize, usize) {
match separator {
Some(sep) => {
if n < 4 {
find_nth_field_memchr(line, n, sep)
} else {
find_nth_field_iter(line, n, sep)
}
}
None => {
let mut field = 0;
let mut i = 0;
let len = line.len();
while i < len {
let field_start = i;
while i < len && is_blank(line[i]) {
i += 1;
}
while i < len && !is_blank(line[i]) {
i += 1;
}
if field == n {
return (field_start, i);
}
field += 1;
}
(line.len(), line.len())
}
}
}
#[inline(always)]
fn find_nth_field_memchr(line: &[u8], n: usize, sep: u8) -> (usize, usize) {
let mut start = 0;
for _ in 0..n {
match memchr::memchr(sep, &line[start..]) {
Some(pos) => start = start + pos + 1,
None => return (line.len(), line.len()),
}
}
match memchr::memchr(sep, &line[start..]) {
Some(pos) => (start, start + pos),
None => (start, line.len()),
}
}
#[inline]
fn find_nth_field_iter(line: &[u8], n: usize, sep: u8) -> (usize, usize) {
let mut field = 0;
let mut start = 0;
for pos in memchr::memchr_iter(sep, line) {
if field == n {
return (start, pos);
}
field += 1;
start = pos + 1;
}
if field == n {
(start, line.len())
} else {
(line.len(), line.len())
}
}
#[inline]
fn is_blank(b: u8) -> bool {
b == b' ' || b == b'\t'
}
#[inline]
fn is_blank_z(b: u8) -> bool {
b == b' ' || b == b'\t' || b == b'\n'
}
#[inline]
fn skip_blanks_from_fn(line: &[u8], from: usize, end: usize, blank_fn: fn(u8) -> bool) -> usize {
let mut i = from;
while i < end && blank_fn(line[i]) {
i += 1;
}
i
}
#[inline]
fn find_nth_field_z(
line: &[u8],
n: usize,
separator: Option<u8>,
zero_terminated: bool,
) -> (usize, usize) {
if !zero_terminated || separator.is_some() {
return find_nth_field(line, n, separator);
}
let mut field = 0;
let mut i = 0;
let len = line.len();
while i < len {
let field_start = i;
while i < len && is_blank_z(line[i]) {
i += 1;
}
while i < len && !is_blank_z(line[i]) {
i += 1;
}
if field == n {
return (field_start, i);
}
field += 1;
}
(line.len(), line.len())
}
pub fn extract_key<'a>(
line: &'a [u8],
key: &KeyDef,
separator: Option<u8>,
ignore_leading_blanks: bool,
) -> &'a [u8] {
extract_key_z(line, key, separator, ignore_leading_blanks, false)
}
pub fn extract_key_z<'a>(
line: &'a [u8],
key: &KeyDef,
separator: Option<u8>,
ignore_leading_blanks: bool,
zero_terminated: bool,
) -> &'a [u8] {
let sf = key.start_field.saturating_sub(1);
let (sf_start, sf_end) = find_nth_field_z(line, sf, separator, zero_terminated);
if sf_start >= line.len() {
return b"";
}
let blank_fn: fn(u8) -> bool = if zero_terminated && separator.is_none() {
is_blank_z
} else {
is_blank
};
let start_byte = if key.start_char > 0 {
let effective_start = if ignore_leading_blanks {
skip_blanks_from_fn(line, sf_start, sf_end, blank_fn)
} else {
sf_start
};
let field_len = sf_end - effective_start;
let char_offset = (key.start_char - 1).min(field_len);
effective_start + char_offset
} else {
sf_start
};
let end_byte = if key.end_field > 0 {
let ef = key.end_field.saturating_sub(1);
let (ef_start, ef_end) = find_nth_field_z(line, ef, separator, zero_terminated);
if key.end_char > 0 {
let effective_start = if ignore_leading_blanks {
skip_blanks_from_fn(line, ef_start, ef_end, blank_fn)
} else {
ef_start
};
let field_len = ef_end - effective_start;
let char_offset = key.end_char.min(field_len);
effective_start + char_offset
} else {
ef_end
}
} else {
line.len()
};
if start_byte >= end_byte || start_byte >= line.len() {
return b"";
}
&line[start_byte..end_byte.min(line.len())]
}