#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
pub(crate) struct StateId(pub u32);
impl From<StateId> for usize {
fn from(id: StateId) -> usize {
id.0 as usize
}
}
#[derive(Debug, Clone)]
pub(crate) struct State {
pub(crate) terminal_for_patterns: Option<Vec<usize>>,
pub(crate) transitions: Vec<Transition>,
pub(crate) epsilon_transition: Option<StateId>,
}
impl State {
pub(crate) fn new() -> Self {
Self {
terminal_for_patterns: None,
transitions: Vec::new(),
epsilon_transition: None,
}
}
pub(crate) fn add_transition(&mut self, transition: Transition) {
self.transitions.push(transition);
}
pub(crate) fn mark_as_terminal(&mut self, pattern_id: usize) {
if let Some(patterns) = &mut self.terminal_for_patterns {
patterns.push(pattern_id);
} else {
self.terminal_for_patterns = Some(vec![pattern_id]);
}
}
}
#[derive(Clone)]
pub(crate) struct Nfa {
states: Vec<State>,
}
impl Nfa {
pub(crate) const START_STATE: StateId = StateId(0);
pub(crate) fn new() -> Self {
let states = vec![State::new()];
Self { states }
}
pub(crate) fn add_state(&mut self) -> StateId {
let id = self.states.len();
let state = State::new();
self.states.push(state);
StateId(id as u32)
}
pub(crate) fn state(&self, id: StateId) -> &State {
&self.states[usize::from(id)]
}
pub(crate) fn state_mut(&mut self, id: StateId) -> &mut State {
&mut self.states[usize::from(id)]
}
pub(crate) fn initial_states(&self) -> Vec<StateId> {
let mut states = vec![Self::START_STATE];
if let Some(epsilon_node_id) = self.state(Self::START_STATE).epsilon_transition {
states.push(epsilon_node_id);
}
states
}
pub(crate) fn transitions_from(&self, state_id: StateId) -> impl Iterator<Item = &Transition> {
self.state(state_id).transitions.iter()
}
pub(crate) fn epsilon_transitions_from(&self, state_id: StateId) -> Option<StateId> {
self.state(state_id).epsilon_transition
}
#[cfg(test)]
pub(crate) fn states_iter(&self) -> std::slice::Iter<'_, State> {
self.states.iter()
}
}
#[derive(Debug, Clone)]
pub(crate) struct Transition {
pub(crate) path_segment: String,
pub(crate) target: StateId,
condition: TransitionCondition,
}
impl Transition {
pub(crate) fn new(path_segment: String, target: StateId) -> Transition {
let condition = TransitionCondition::new(&path_segment);
Self {
path_segment,
condition,
target,
}
}
pub(crate) fn is_match(&self, candidate: &str) -> bool {
self.condition.is_match(&self.path_segment, candidate)
}
}
#[derive(Debug, Clone)]
enum TransitionCondition {
Unconditional,
Literal,
Prefix,
Suffix,
Contains,
Regex(regex::Regex),
}
impl TransitionCondition {
fn new(glob: &str) -> Self {
if glob == "*" {
return Self::Unconditional;
}
if glob.contains('\\') {
return Self::Regex(pattern_to_regex(glob));
}
let (leading_star, trailing_star, internal_wildcards) = wildcard_locations(glob);
match (leading_star, trailing_star, internal_wildcards) {
(false, false, false) => Self::Literal,
(false, true, false) => Self::Prefix,
(true, false, false) => Self::Suffix,
(true, true, false) => Self::Contains,
_ => Self::Regex(pattern_to_regex(glob)),
}
}
fn is_match(&self, pattern: &str, candidate: &str) -> bool {
match self {
Self::Unconditional => true,
Self::Literal => pattern == candidate,
Self::Prefix => candidate.starts_with(&pattern[0..pattern.len() - 1]),
Self::Suffix => candidate.ends_with(&pattern[1..]),
Self::Contains => memchr::memmem::find(
candidate.as_bytes(),
&pattern.as_bytes()[1..pattern.len() - 1],
)
.is_some(),
Self::Regex(re) => re.is_match(candidate),
}
}
}
fn pattern_to_regex(pattern: &str) -> regex::Regex {
let mut regex = String::new();
regex.push_str(r#"\A"#);
let mut escape = false;
for c in pattern.chars() {
if escape {
if regex_syntax::is_meta_character(c) {
regex.push('\\');
}
regex.push(c);
escape = false;
continue;
}
match c {
'*' => regex.push_str(r#"[^/]*"#),
'?' => regex.push_str(r#"[^/]"#),
'\\' => escape = true,
_ => {
if regex_syntax::is_meta_character(c) {
regex.push('\\');
}
regex.push(c);
}
}
}
regex.push_str(r#"\z"#);
regex::Regex::new(®ex).unwrap_or_else(|_| panic!("invalid regex: {}", regex))
}
fn wildcard_locations(pattern: &str) -> (bool, bool, bool) {
let mut chars = pattern.chars();
let first = chars.next();
let last = chars.next_back();
let mut prev = first;
let mut internal_wildcard = false;
for c in chars {
if (c == '*' || c == '?') && prev != Some('\\') {
internal_wildcard = true;
}
prev = Some(c);
}
(
first.map(|c| c == '*' || c == '?').unwrap_or(false),
last.map(|c| c == '*' || c == '?').unwrap_or(false) && prev != Some('\\'),
internal_wildcard,
)
}