use crate::types::{Command, Opt, OptName};
use bstr::ByteSlice;
use ecow::{EcoString, EcoVec};
use memchr::memchr;
use std::collections::HashSet;
pub struct Postprocessor;
impl Postprocessor {
pub fn fix_command(mut cmd: Command) -> Command {
cmd.options = Self::deduplicate_options(cmd.options);
cmd.options = Self::filter_invalid_options(cmd.options);
cmd.subcommands = cmd.subcommands.into_iter().map(Self::fix_command).collect();
cmd
}
fn deduplicate_options(options: EcoVec<Opt>) -> EcoVec<Opt> {
let mut seen: HashSet<(EcoVec<OptName>, EcoString), foldhash::fast::RandomState> =
HashSet::with_capacity_and_hasher(
options.len(),
foldhash::fast::RandomState::default(),
);
let mut result = EcoVec::new();
for opt in options.iter() {
let key = (opt.names.clone(), opt.argument.clone());
if seen.insert(key) {
result.push(opt.clone());
}
}
result
}
fn filter_invalid_options(options: EcoVec<Opt>) -> EcoVec<Opt> {
options
.into_iter()
.filter(|opt| {
!opt.names.is_empty() && !opt.names[0].raw.is_empty() && !opt.description.is_empty()
})
.collect()
}
pub fn remove_bullets(text: &str) -> EcoString {
let bytes = text.as_bytes();
let has_asterisk = memchr(b'*', bytes).is_some();
let has_dash = memchr(b'-', bytes).is_some();
let has_bullet_utf8 = memchr(0xE2, bytes).is_some();
if !has_asterisk && !has_dash && !has_bullet_utf8 {
return EcoString::from(text);
}
let mut result = String::with_capacity(text.len());
let mut first = true;
for line in bytes.lines() {
if !first {
result.push('\n');
}
first = false;
let line_str = unsafe { std::str::from_utf8_unchecked(line) };
let trimmed = line_str.trim_start();
let prefix_len = line_str.len() - trimmed.len();
let trimmed_bytes = trimmed.as_bytes();
if trimmed_bytes.len() >= 2 {
let is_bullet = match trimmed_bytes[0] {
b'*' | b'-' => trimmed_bytes[1].is_ascii_whitespace(),
0xE2 if trimmed_bytes.len() >= 4
&& trimmed_bytes[1] == 0x80
&& trimmed_bytes[2] == 0xA2 =>
{
trimmed_bytes[3].is_ascii_whitespace()
}
_ => false,
};
if is_bullet {
result.push_str(&line_str[..prefix_len]);
let skip = if trimmed_bytes[0] == 0xE2 { 4 } else { 2 };
result.push_str(trimmed[skip..].trim_start());
continue;
}
}
result.push_str(line_str);
}
EcoString::from(result)
}
pub fn unicode_spaces_to_ascii(text: &str) -> EcoString {
let bytes = text.as_bytes();
if memchr::memchr(0x80, bytes).is_none()
&& memchr::memchr(0xC2, bytes).is_none()
&& memchr::memchr(0xE2, bytes).is_none()
{
return EcoString::from(text);
}
let has_targets = text.chars().any(|c| {
matches!(
c,
'\u{00A0}' | '\u{2002}' | '\u{2003}' | '\u{2009}' | '\u{202F}'
)
});
if !has_targets {
return EcoString::from(text);
}
let mut result = String::with_capacity(text.len() + text.len() / 8);
for c in text.chars() {
match c {
'\u{00A0}' | '\u{202F}' => result.push(' '), '\u{2009}' => result.push(' '), '\u{2002}' => result.push_str(" "), '\u{2003}' => result.push_str(" "), _ => result.push(c),
}
}
EcoString::from(result)
}
pub fn convert_tabs_to_spaces(text: &str, spaces: usize) -> EcoString {
if memchr(b'\t', text.as_bytes()).is_none() {
return EcoString::from(text);
}
EcoString::from(text.replace('\t', &" ".repeat(spaces)))
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::OptName;
use crate::types::OptNameType;
use ecow::EcoString;
#[test]
fn test_deduplicate_options() {
let mut opts = EcoVec::new();
opts.push(Opt {
names: {
let mut v = EcoVec::new();
v.push(OptName::new(EcoString::from("-v"), OptNameType::ShortType));
v
},
argument: EcoString::new(),
description: EcoString::from("verbose"),
});
opts.push(Opt {
names: {
let mut v = EcoVec::new();
v.push(OptName::new(EcoString::from("-v"), OptNameType::ShortType));
v
},
argument: EcoString::new(),
description: EcoString::from("verbose"),
});
let result = Postprocessor::deduplicate_options(opts);
assert_eq!(result.len(), 1);
}
#[test]
fn test_remove_bullets() {
let text = "• Item one\n* Item two\n- Item three";
let result = Postprocessor::remove_bullets(text);
assert!(!result.contains("•"));
}
#[test]
fn test_unicode_and_tabs_helpers() {
let text = "\u{00A0}foo\u{2002}bar\u{2003}baz\tend";
let ascii = Postprocessor::unicode_spaces_to_ascii(text);
assert_eq!(ascii.as_str(), " foo bar baz\tend");
let with_spaces = Postprocessor::convert_tabs_to_spaces(&ascii, 4);
assert!(!with_spaces.contains('\t'));
assert!(with_spaces.ends_with(" end"));
}
#[test]
fn test_fix_command_filters_and_deduplicates() {
let valid_opt = Opt {
names: {
let mut v = EcoVec::new();
v.push(OptName::new(EcoString::from("-v"), OptNameType::ShortType));
v
},
argument: EcoString::new(),
description: EcoString::from("verbose"),
};
let invalid_opt = Opt {
names: EcoVec::new(),
argument: EcoString::new(),
description: EcoString::new(),
};
let cmd = Command {
name: EcoString::from("root"),
description: EcoString::new(),
usage: EcoString::new(),
options: {
let mut v = EcoVec::new();
v.push(valid_opt.clone());
v.push(valid_opt.clone());
v.push(invalid_opt);
v
},
subcommands: {
let mut v = EcoVec::new();
v.push(Command {
name: EcoString::from("child"),
description: EcoString::new(),
usage: EcoString::new(),
options: {
let mut opts = EcoVec::new();
opts.push(valid_opt.clone());
opts
},
subcommands: EcoVec::new(),
version: EcoString::new(),
});
v
},
version: EcoString::new(),
};
let fixed = Postprocessor::fix_command(cmd);
assert_eq!(fixed.options.len(), 1);
assert_eq!(fixed.subcommands.len(), 1);
assert_eq!(fixed.subcommands[0].options.len(), 1);
}
}