use const_format::formatcp;
use itertools::{Itertools as _, Position};
use regex::Regex;
use std::borrow::{Borrow, Cow};
use crate::token::Token;
use crate::PositionExt as _;
#[cfg(windows)]
const SEPARATOR_CLASS_EXPRESSION: &str = "/\\\\";
#[cfg(unix)]
const SEPARATOR_CLASS_EXPRESSION: &str = "/";
#[cfg(not(any(windows, unix)))]
const SEPARATOR_CLASS_EXPRESSION: &str = main_separator_class_expression();
#[cfg(not(any(windows, unix)))]
const fn main_separator_class_expression() -> &'static str {
use std::path::MAIN_SEPARATOR;
const fn escape(x: char) -> &'static str {
match x {
'\\' | '.' | '+' | '*' | '?' | '(' | ')' | '|' | '[' | ']' | '{' | '}' | '^' | '$'
| '#' | '&' | '-' | '~' => "\\",
_ => "",
}
}
formatcp!("{0}{1}", escape(MAIN_SEPARATOR), MAIN_SEPARATOR)
}
macro_rules! sepexpr {
($fmt:expr) => {
formatcp!($fmt, formatcp!("[{0}]", SEPARATOR_CLASS_EXPRESSION))
};
}
macro_rules! nsepexpr {
($fmt:expr) => {
formatcp!($fmt, formatcp!("[^{0}]", SEPARATOR_CLASS_EXPRESSION))
};
}
trait Escaped {
fn escaped(&self) -> String;
}
impl Escaped for char {
fn escaped(&self) -> String {
regex::escape(&self.to_string())
}
}
impl Escaped for str {
fn escaped(&self) -> String {
regex::escape(self)
}
}
#[derive(Clone, Copy, Debug)]
enum Grouping {
Capture,
NonCapture,
}
impl Grouping {
pub fn push_str(&self, pattern: &mut String, encoding: &str) {
self.push_with(pattern, || encoding.into());
}
pub fn push_with<'p, F>(&self, pattern: &mut String, f: F)
where
F: Fn() -> Cow<'p, str>,
{
match self {
Grouping::Capture => pattern.push('('),
Grouping::NonCapture => pattern.push_str("(?:"),
}
pattern.push_str(f().as_ref());
pattern.push(')');
}
}
pub fn compile<'t, A, T>(tokens: impl IntoIterator<Item = T>) -> Regex
where
T: Borrow<Token<'t, A>>,
{
let mut pattern = String::new();
pattern.push('^');
encode(Grouping::Capture, None, &mut pattern, tokens);
pattern.push('$');
Regex::new(&pattern).expect("glob compilation failed")
}
fn encode<'t, A, T>(
grouping: Grouping,
superposition: Option<Position<()>>,
pattern: &mut String,
tokens: impl IntoIterator<Item = T>,
) where
T: Borrow<Token<'t, A>>,
{
use itertools::Position::{First, Last, Middle, Only};
use crate::token::Archetype::{Character, Range};
use crate::token::Evaluation::{Eager, Lazy};
use crate::token::TokenKind::{Alternative, Class, Literal, Repetition, Separator, Wildcard};
use crate::token::Wildcard::{One, Tree, ZeroOrMore};
const NULL_CHARACTER_CLASS: &str = nsepexpr!("[\\x00&&{0}]");
fn encode_intermediate_tree(grouping: Grouping, pattern: &mut String) {
pattern.push_str(sepexpr!("(?:{0}|{0}"));
grouping.push_str(pattern, sepexpr!(".*{0}"));
pattern.push(')');
}
for token in tokens.into_iter().with_position() {
match token.interior_borrow().map(Token::kind).as_tuple() {
(_, Literal(literal)) => {
if literal.is_case_insensitive() {
pattern.push_str("(?i)");
}
else {
pattern.push_str("(?-i)");
}
pattern.push_str(&literal.text().escaped());
},
(_, Separator) => pattern.push_str(sepexpr!("{0}")),
(position, Alternative(alternative)) => {
let encodings: Vec<_> = alternative
.branches()
.iter()
.map(|tokens| {
let mut pattern = String::new();
pattern.push_str("(?:");
encode(
Grouping::NonCapture,
superposition.or(Some(position)),
&mut pattern,
tokens.iter(),
);
pattern.push(')');
pattern
})
.collect();
grouping.push_str(pattern, &encodings.join("|"));
},
(position, Repetition(repetition)) => {
let encoding = {
let (lower, upper) = repetition.bounds();
let mut pattern = String::new();
pattern.push_str("(?:");
encode(
Grouping::NonCapture,
superposition.or(Some(position)),
&mut pattern,
repetition.tokens().iter(),
);
pattern.push_str(&if let Some(upper) = upper {
format!("){{{},{}}}", lower, upper)
}
else {
format!("){{{},}}", lower)
});
pattern
};
grouping.push_str(pattern, &encoding);
},
(_, Class(class)) => {
grouping.push_with(pattern, || {
let mut pattern = String::new();
pattern.push('[');
if class.is_negated() {
pattern.push('^');
}
for archetype in class.archetypes() {
match archetype {
Character(literal) => pattern.push_str(&literal.escaped()),
Range(left, right) => {
pattern.push_str(&left.escaped());
pattern.push('-');
pattern.push_str(&right.escaped());
},
}
}
pattern.push_str(nsepexpr!("&&{0}]"));
if Regex::new(&pattern).is_ok() {
pattern.into()
}
else {
NULL_CHARACTER_CLASS.into()
}
});
},
(_, Wildcard(One)) => grouping.push_str(pattern, nsepexpr!("{0}")),
(_, Wildcard(ZeroOrMore(Eager))) => grouping.push_str(pattern, nsepexpr!("{0}*")),
(_, Wildcard(ZeroOrMore(Lazy))) => grouping.push_str(pattern, nsepexpr!("{0}*?")),
(First(_), Wildcard(Tree { is_rooted })) => match superposition {
Some(Middle(_) | Last(_)) => {
encode_intermediate_tree(grouping, pattern);
},
_ => {
if *is_rooted {
grouping.push_str(pattern, sepexpr!("{0}.*{0}?"));
}
else {
pattern.push_str(sepexpr!("(?:{0}?|"));
grouping.push_str(pattern, sepexpr!(".*{0}"));
pattern.push(')');
}
},
},
(Middle(_), Wildcard(Tree { .. })) => {
encode_intermediate_tree(grouping, pattern);
},
(Last(_), Wildcard(Tree { .. })) => match superposition {
Some(First(_) | Middle(_)) => {
encode_intermediate_tree(grouping, pattern);
},
_ => {
pattern.push_str(sepexpr!("(?:{0}?|{0}"));
grouping.push_str(pattern, ".*");
pattern.push(')');
},
},
(Only(_), Wildcard(Tree { .. })) => grouping.push_str(pattern, ".*"),
}
}
}