use std::mem;
use std::vec::IntoIter as VecIntoIter;
use itertools::{Either, Itertools};
use regex::Regex;
use crate::error::Error as FError;
#[derive(Debug)]
enum ClassItem {
Char(char),
Range(char, char),
}
#[derive(Debug)]
struct ClassAccumulator {
negated: bool,
items: Vec<ClassItem>,
}
#[derive(Debug)]
enum State {
Start,
End,
Literal,
Escape,
ClassStart,
Class(ClassAccumulator),
ClassRange(ClassAccumulator, char),
ClassRangeDash(ClassAccumulator),
ClassEscape(ClassAccumulator),
Alternate(String, Vec<String>),
AlternateEscape(String, Vec<String>),
}
impl Default for State {
fn default() -> Self {
Self::Start
}
}
fn escape_in_class(chr: char) -> String {
if chr == ']' || chr == '\\' {
format!("\\{}", chr)
} else {
chr.to_string()
}
}
fn escape(chr: char) -> String {
if "[{(|^$.*?+\\".contains(chr) {
format!("\\{}", chr)
} else {
chr.to_string()
}
}
const fn map_letter_escape(chr: char) -> char {
match chr {
'a' => '\x07',
'b' => '\x08',
'e' => '\x1b',
'f' => '\x0c',
'n' => '\x0a',
'r' => '\x0d',
't' => '\x09',
'v' => '\x0b',
other => other,
}
}
fn escape_special(chr: char) -> String {
escape(map_letter_escape(chr))
}
struct ExcIter<I>
where
I: Iterator<Item = ClassItem>,
{
it: I,
}
impl<I> Iterator for ExcIter<I>
where
I: Iterator<Item = ClassItem>,
{
type Item = VecIntoIter<ClassItem>;
fn next(&mut self) -> Option<Self::Item> {
self.it.next().map(|cls| {
match cls {
ClassItem::Char('/') => vec![],
ClassItem::Char(_) => vec![cls],
ClassItem::Range('.', '/') => vec![ClassItem::Char('.')],
ClassItem::Range(start, '/') => vec![ClassItem::Range(start, '.')],
ClassItem::Range('/', '0') => vec![ClassItem::Char('0')],
ClassItem::Range('/', end) => vec![ClassItem::Range('0', end)],
ClassItem::Range(start, end) if start > '/' || end < '/' => vec![cls],
ClassItem::Range(start, end) => vec![
if start == '.' {
ClassItem::Char('.')
} else {
ClassItem::Range(start, '.')
},
if end == '0' {
ClassItem::Char('0')
} else {
ClassItem::Range('0', end)
},
],
}
.into_iter()
})
}
}
fn handle_slash_exclude(acc: ClassAccumulator) -> ClassAccumulator {
assert!(!acc.negated);
ClassAccumulator {
items: ExcIter {
it: acc.items.into_iter(),
}
.flatten()
.collect(),
..acc
}
}
fn handle_slash_include(mut acc: ClassAccumulator) -> ClassAccumulator {
assert!(acc.negated);
let slash_found = acc.items.iter().any(|item| match *item {
ClassItem::Char('/') => true,
ClassItem::Char(_) => false,
ClassItem::Range(start, end) => start <= '/' && end >= '/',
});
if !slash_found {
acc.items.push(ClassItem::Char('/'));
}
acc
}
fn handle_slash(acc: ClassAccumulator) -> ClassAccumulator {
if acc.negated {
handle_slash_include(acc)
} else {
handle_slash_exclude(acc)
}
}
fn close_class(glob_acc: ClassAccumulator) -> String {
let acc = handle_slash(glob_acc);
let (chars_vec, classes_vec): (Vec<_>, Vec<_>) =
acc.items.into_iter().partition_map(|item| match item {
ClassItem::Char(chr) => Either::Left(chr),
ClassItem::Range(start, end) => Either::Right((start, end)),
});
let (chars, final_dash) = {
let mut has_dash = false;
let res = chars_vec
.into_iter()
.filter(|chr| {
if *chr == '-' {
has_dash = true;
false
} else {
true
}
})
.sorted_unstable()
.dedup()
.map(escape_in_class);
(res, if has_dash { "-" } else { "" })
};
let classes = classes_vec
.into_iter()
.sorted_unstable()
.dedup()
.map(|cls| format!("{}-{}", escape_in_class(cls.0), escape_in_class(cls.1)));
format!(
"[{}{}{}]",
if acc.negated { "^" } else { "" },
chars.chain(classes).collect::<String>(),
final_dash,
)
}
fn close_alternate(gathered: Vec<String>) -> String {
let items = gathered
.into_iter()
.map(|item| item.chars().map(escape).collect::<String>())
.sorted_unstable()
.dedup()
.join("|");
format!("({})", items)
}
struct GlobIterator<I: Iterator<Item = char>> {
pattern: I,
state: State,
}
type StringResult = Result<Option<String>, FError>;
impl<I> GlobIterator<I>
where
I: Iterator<Item = char>,
{
fn handle_start(&mut self) -> String {
self.state = State::Literal;
"^".to_owned()
}
fn handle_literal(&mut self) -> Option<String> {
match self.pattern.next() {
None => {
self.state = State::End;
Some("$".to_owned())
}
Some(chr) => {
let (new_state, res) = match chr {
'\\' => (State::Escape, None),
'[' => (State::ClassStart, None),
'{' => (State::Alternate(String::new(), Vec::new()), None),
'?' => (State::Literal, Some("[^/]".to_owned())),
'*' => (State::Literal, Some(".*".to_owned())),
']' | '}' | '.' => (State::Literal, Some(format!("\\{}", chr))),
_ => (State::Literal, Some(format!("{}", chr))),
};
self.state = new_state;
res
}
}
}
fn handle_escape(&mut self) -> StringResult {
match self.pattern.next() {
Some(chr) => {
self.state = State::Literal;
Ok(Some(escape_special(chr)))
}
None => Err(FError::BareEscape),
}
}
fn handle_class_start(&mut self) -> StringResult {
match self.pattern.next() {
Some(chr) => {
self.state = match chr {
'!' => State::Class(ClassAccumulator {
negated: true,
items: Vec::new(),
}),
'-' => State::Class(ClassAccumulator {
negated: false,
items: vec![ClassItem::Char('-')],
}),
']' => State::Class(ClassAccumulator {
negated: false,
items: vec![ClassItem::Char(']')],
}),
'\\' => State::ClassEscape(ClassAccumulator {
negated: false,
items: Vec::new(),
}),
other => State::Class(ClassAccumulator {
negated: false,
items: vec![ClassItem::Char(other)],
}),
};
Ok(None)
}
None => Err(FError::UnclosedClass),
}
}
fn handle_class(&mut self, mut acc: ClassAccumulator) -> StringResult {
match self.pattern.next() {
Some(chr) => Ok(match chr {
']' => {
if acc.items.is_empty() {
acc.items.push(ClassItem::Char(']'));
self.state = State::Class(acc);
None
} else {
self.state = State::Literal;
Some(close_class(acc))
}
}
'-' => match acc.items.pop() {
None => {
acc.items.push(ClassItem::Char('-'));
self.state = State::Class(acc);
None
}
Some(ClassItem::Range(start, end)) => {
acc.items.push(ClassItem::Range(start, end));
self.state = State::ClassRangeDash(acc);
None
}
Some(ClassItem::Char(start)) => {
self.state = State::ClassRange(acc, start);
None
}
},
'\\' => {
self.state = State::ClassEscape(acc);
None
}
other => {
acc.items.push(ClassItem::Char(other));
self.state = State::Class(acc);
None
}
}),
None => Err(FError::UnclosedClass),
}
}
fn handle_class_escape(&mut self, mut acc: ClassAccumulator) -> StringResult {
match self.pattern.next() {
Some(chr) => {
acc.items.push(ClassItem::Char(map_letter_escape(chr)));
self.state = State::Class(acc);
Ok(None)
}
None => Err(FError::UnclosedClass),
}
}
fn handle_class_range(&mut self, mut acc: ClassAccumulator, start: char) -> StringResult {
match self.pattern.next() {
Some(chr) => match chr {
'\\' => Err(FError::NotImplemented(format!(
"FIXME: handle class range end escape with {:?} start {:?}",
acc, start
))),
']' => {
acc.items.push(ClassItem::Char(start));
acc.items.push(ClassItem::Char('-'));
self.state = State::Literal;
Ok(Some(close_class(acc)))
}
end if start > end => Err(FError::ReversedRange(start, end)),
end if start == end => {
acc.items.push(ClassItem::Char(start));
self.state = State::Class(acc);
Ok(None)
}
end => {
acc.items.push(ClassItem::Range(start, end));
self.state = State::Class(acc);
Ok(None)
}
},
None => Err(FError::UnclosedClass),
}
}
#[allow(clippy::panic_in_result_fn)]
#[allow(clippy::unreachable)]
fn handle_class_range_dash(&mut self, mut acc: ClassAccumulator) -> StringResult {
match self.pattern.next() {
Some(chr) => {
if chr == ']' {
acc.items.push(ClassItem::Char('-'));
self.state = State::Literal;
Ok(Some(close_class(acc)))
} else if let Some(ClassItem::Range(start, end)) = acc.items.pop() {
Err(FError::RangeAfterRange(start, end))
} else {
unreachable!()
}
}
None => Err(FError::UnclosedClass),
}
}
fn handle_alternate(&mut self, mut current: String, mut gathered: Vec<String>) -> StringResult {
match self.pattern.next() {
Some(chr) => match chr {
',' => {
gathered.push(current);
self.state = State::Alternate(String::new(), gathered);
Ok(None)
}
'}' => {
self.state = State::Literal;
if current.is_empty() && gathered.is_empty() {
Ok(Some(r"\{\}".to_owned()))
} else {
gathered.push(current);
Ok(Some(close_alternate(gathered)))
}
}
'\\' => {
self.state = State::AlternateEscape(current, gathered);
Ok(None)
}
'[' => Err(FError::NotImplemented(
"FIXME: alternate character class".to_owned(),
)),
other => {
current.push(other);
self.state = State::Alternate(current, gathered);
Ok(None)
}
},
None => Err(FError::UnclosedAlternation),
}
}
fn handle_alternate_escape(
&mut self,
mut current: String,
gathered: Vec<String>,
) -> StringResult {
match self.pattern.next() {
Some(chr) => {
current.push(map_letter_escape(chr));
self.state = State::Alternate(current, gathered);
Ok(None)
}
None => Err(FError::UnclosedAlternation),
}
}
}
impl<I> Iterator for GlobIterator<I>
where
I: Iterator<Item = char>,
{
type Item = StringResult;
fn next(&mut self) -> Option<Self::Item> {
match mem::take(&mut self.state) {
State::Start => Some(Ok(Some(self.handle_start()))),
State::End => None,
State::Literal => Some(Ok(self.handle_literal())),
State::Escape => Some(self.handle_escape()),
State::ClassStart => Some(self.handle_class_start()),
State::Class(acc) => Some(self.handle_class(acc)),
State::ClassEscape(acc) => Some(self.handle_class_escape(acc)),
State::ClassRange(acc, start) => Some(self.handle_class_range(acc, start)),
State::ClassRangeDash(acc) => Some(self.handle_class_range_dash(acc)),
State::Alternate(current, gathered) => Some(self.handle_alternate(current, gathered)),
State::AlternateEscape(current, gathered) => {
Some(self.handle_alternate_escape(current, gathered))
}
}
}
}
#[allow(clippy::missing_inline_in_public_items)]
pub fn glob_to_regex(pattern: &str) -> Result<Regex, FError> {
let parser = GlobIterator {
pattern: pattern.chars(),
state: State::Start,
};
let re_pattern = parser.flatten_ok().collect::<Result<Vec<_>, _>>()?.join("");
Regex::new(&re_pattern).map_err(|err| FError::InvalidRegex(re_pattern, err.to_string()))
}