use serde_derive::{Deserialize, Serialize};
#[derive(Debug, PartialEq, Eq, PartialOrd, Ord)]
pub enum Rule<'a> {
Allow(&'a str),
Disallow(&'a str),
}
impl<'a> Rule<'a> {
fn inner(&self) -> &str {
match self {
Rule::Allow(inner) => inner,
Rule::Disallow(inner) => inner,
}
}
}
#[derive(Debug, PartialEq, Clone, Copy, Serialize, Deserialize)]
enum Edge {
MatchChar(char),
MatchAny,
MatchEow,
}
#[derive(Debug, PartialEq, Clone, Copy, Serialize, Deserialize)]
struct Transition(Edge, usize);
#[derive(Debug, Clone, Copy, PartialEq, Serialize, Deserialize)]
enum State {
Allow,
Disallow,
Intermediate,
}
#[derive(Debug, Serialize, Deserialize)]
pub struct Cylon {
states: Vec<State>,
transitions: Vec<Vec<Transition>>,
}
impl Cylon {
pub fn allow(&self, path: &str) -> bool {
let mut state = path.chars().fold(2, |state, path_char| {
let t = &self.transitions[state];
t.iter()
.rev()
.find(|transition| match transition {
Transition(Edge::MatchAny, ..) => true,
Transition(Edge::MatchEow, ..) => false,
Transition(Edge::MatchChar(edge_char), ..) => *edge_char == path_char,
})
.map(|Transition(.., next_state)| *next_state)
.unwrap()
});
let t = &self.transitions[state];
state = t
.iter()
.rev()
.find(|transition| match transition {
Transition(Edge::MatchEow, ..) => true,
Transition(Edge::MatchAny, ..) => true,
_ => false,
})
.map(|Transition(.., next_state)| *next_state)
.unwrap_or(state);
match self.states[state] {
State::Allow => true,
State::Disallow => false,
State::Intermediate => unreachable!(),
}
}
pub fn compile(mut rules: Vec<Rule>) -> Self {
let mut transitions: Vec<Vec<Transition>> = vec![
vec![Transition(Edge::MatchAny, 0)],
vec![Transition(Edge::MatchAny, 1)],
];
let mut states: Vec<State> = vec![State::Allow, State::Disallow];
rules.sort_by(|a, b| Ord::cmp(a.inner(), b.inner()));
let mut queue = vec![("", 0, 0, State::Intermediate)];
while !queue.is_empty() {
let (parent_prefix, mut wildcard_state, parent_state, state) = queue.remove(0);
let last_char = parent_prefix.chars().last();
wildcard_state = match state {
State::Allow => 0,
State::Disallow if last_char == Some('$') => wildcard_state,
State::Disallow => 1,
State::Intermediate => wildcard_state,
};
let mut t = match last_char {
Some('$') => {
vec![Transition(Edge::MatchAny, wildcard_state)]
}
Some('*') => {
vec![Transition(Edge::MatchAny, transitions.len())]
}
_ => {
vec![Transition(Edge::MatchAny, wildcard_state)]
}
};
let mut curr_prefix = "";
rules
.iter()
.map(Rule::inner)
.zip(&rules)
.filter(|(path, _)| (*path).starts_with(parent_prefix))
.filter(|(path, _)| (*path) != parent_prefix)
.for_each(|(path, rule)| {
let child_prefix = &path[0..parent_prefix.len() + 1];
if curr_prefix == child_prefix {
return;
}
curr_prefix = child_prefix;
let eow = child_prefix == path;
let state = match (rule, eow) {
(Rule::Allow(..), true) => State::Allow,
(Rule::Disallow(..), true) => State::Disallow,
_ => State::Intermediate,
};
queue.push((child_prefix, wildcard_state, transitions.len(), state));
let child_index = transitions.len() + queue.len();
let edge_char = child_prefix.chars().last().unwrap();
let transition = Transition(
match edge_char {
'*' => Edge::MatchAny,
'$' => Edge::MatchEow,
c => Edge::MatchChar(c),
},
child_index,
);
if last_char == Some('*') {
let parent_t = &mut transitions[parent_state];
parent_t.push(transition);
}
t.push(transition);
});
states.push(match state {
State::Allow | State::Disallow => state,
State::Intermediate => states[wildcard_state],
});
transitions.push(t);
}
Self {
states,
transitions,
}
}
}
#[cfg(test)]
mod tests {
use super::*;
macro_rules! t {
('*' => $x:expr) => {
Transition(Edge::MatchAny, $x)
};
('$' => $x:expr) => {
Transition(Edge::MatchEow, $x)
};
($x:expr => $y:expr) => {
Transition(Edge::MatchChar($x), $y)
};
}
#[test]
fn test_compile() {
let rules = vec![
Rule::Disallow("/"),
Rule::Allow("/a"),
Rule::Allow("/abc"),
Rule::Allow("/b"),
];
let expect_transitions = vec![
vec![t!('*' => 0)],
vec![t!('*' => 1)],
vec![t!('*' => 0), t!('/' => 3)],
vec![t!('*' => 1), t!('a' => 4), t!('b' => 5)],
vec![t!('*' => 0), t!('b' => 6)],
vec![t!('*' => 0)],
vec![t!('*' => 0), t!('c' => 7)],
vec![t!('*' => 0)],
];
let expect_states = vec![
State::Allow,
State::Disallow,
State::Allow,
State::Disallow,
State::Allow,
State::Allow,
State::Allow,
State::Allow,
];
let actual = Cylon::compile(rules);
assert_eq!(actual.transitions, expect_transitions);
assert_eq!(actual.states, expect_states);
}
#[test]
fn test_compile_with_wildcard() {
let rules = vec![Rule::Disallow("/"), Rule::Allow("/a"), Rule::Allow("/*.b")];
let expect_transitions = vec![
vec![t!('*' => 0)],
vec![t!('*' => 1)],
vec![t!('*' => 0), t!('/' => 3)],
vec![t!('*' => 1), t!('*' => 4), t!('a' => 5), t!('.' => 6)],
vec![t!('*' => 4), t!('.' => 6)],
vec![t!('*' => 0)],
vec![t!('*' => 1), t!('b' => 7)],
vec![t!('*' => 0)],
];
let expect_states = vec![
State::Allow,
State::Disallow,
State::Allow,
State::Disallow,
State::Disallow,
State::Allow,
State::Disallow,
State::Allow,
];
let actual = Cylon::compile(rules);
assert_eq!(actual.transitions, expect_transitions);
assert_eq!(actual.states, expect_states);
}
#[test]
fn test_compile_tricky_wildcard() {
let rules = vec![Rule::Disallow("/"), Rule::Allow("/*.")];
let expect_transitions = vec![
vec![t!('*' => 0)],
vec![t!('*' => 1)],
vec![t!('*' => 0), t!('/' => 3)],
vec![t!('*' => 1), t!('*' => 4), t!('.' => 5)],
vec![t!('*' => 4), t!('.' => 5)],
vec![t!('*' => 0)],
];
let expect_states = vec![
State::Allow,
State::Disallow,
State::Allow,
State::Disallow,
State::Disallow,
State::Allow,
];
let actual = Cylon::compile(rules);
assert_eq!(actual.transitions, expect_transitions);
assert_eq!(actual.states, expect_states);
}
#[test]
fn test_compile_with_eow() {
let rules = vec![
Rule::Allow("/"),
Rule::Disallow("/a$"),
Rule::Disallow("/x$y"),
];
let expect_transitions = vec![
vec![t!('*' => 0)],
vec![t!('*' => 1)],
vec![t!('*' => 0), t!('/' => 3)],
vec![t!('*' => 0), t!('a' => 4), t!('x' => 5)],
vec![t!('*' => 0), t!('$' => 6)],
vec![t!('*' => 0), t!('$' => 7)],
vec![t!('*' => 0)],
vec![t!('*' => 0), t!('y' => 8)],
vec![t!('*' => 1)],
];
let expect_states = vec![
State::Allow,
State::Disallow,
State::Allow,
State::Allow,
State::Allow,
State::Allow,
State::Disallow,
State::Allow,
State::Disallow,
];
let actual = Cylon::compile(rules);
assert_eq!(actual.transitions, expect_transitions);
assert_eq!(actual.states, expect_states);
}
#[test]
fn test_allow() {
let rules = vec![
Rule::Disallow("/"),
Rule::Allow("/a"),
Rule::Allow("/abc"),
Rule::Allow("/b"),
];
let machine = Cylon::compile(rules);
assert_eq!(false, machine.allow("/"));
assert_eq!(true, machine.allow("/a"));
assert_eq!(true, machine.allow("/a/b"));
assert_eq!(true, machine.allow("/a"));
assert_eq!(true, machine.allow("/abc"));
assert_eq!(true, machine.allow("/abc/def"));
assert_eq!(true, machine.allow("/b"));
assert_eq!(true, machine.allow("/b/c"));
}
#[test]
fn test_allow_match_any() {
let rules = vec![
Rule::Allow("/"),
Rule::Disallow("/secret/*.txt"),
Rule::Disallow("/private/*"),
];
let machine = Cylon::compile(rules);
assert_eq!(true, machine.allow("/"));
assert_eq!(true, machine.allow("/abc"));
assert_eq!(false, machine.allow("/secret/abc.txt"));
assert_eq!(false, machine.allow("/secret/123.txt"));
assert_eq!(true, machine.allow("/secret/abc.csv"));
assert_eq!(true, machine.allow("/secret/123.csv"));
assert_eq!(false, machine.allow("/private/abc.txt"));
assert_eq!(false, machine.allow("/private/123.txt"));
assert_eq!(false, machine.allow("/private/abc.csv"));
assert_eq!(false, machine.allow("/private/123.csv"));
}
#[test]
fn test_allow_match_eow() {
let rules = vec![
Rule::Allow("/"),
Rule::Disallow("/ignore$"),
Rule::Disallow("/foo$bar"),
];
let machine = Cylon::compile(rules);
assert_eq!(true, machine.allow("/"));
assert_eq!(true, machine.allow("/abc"));
assert_eq!(false, machine.allow("/ignore"));
assert_eq!(true, machine.allow("/ignoreabc"));
assert_eq!(true, machine.allow("/ignore/abc"));
assert_eq!(true, machine.allow("/foo"));
assert_eq!(true, machine.allow("/foo$bar"));
}
#[test]
fn test_allow_more_complicated() {
let rules = vec![
Rule::Allow("/"),
Rule::Disallow("/a$"),
Rule::Disallow("/abc"),
Rule::Allow("/abc/*"),
Rule::Disallow("/foo/bar"),
Rule::Allow("/*/bar"),
Rule::Disallow("/www/*/images"),
Rule::Allow("/www/public/images"),
];
let machine = Cylon::compile(rules);
assert_eq!(true, machine.allow("/"));
assert_eq!(true, machine.allow("/directory"));
assert_eq!(false, machine.allow("/a"));
assert_eq!(true, machine.allow("/ab"));
assert_eq!(false, machine.allow("/abc"));
assert_eq!(true, machine.allow("/abc/123"));
assert_eq!(true, machine.allow("/foo"));
assert_eq!(true, machine.allow("/foobar"));
assert_eq!(false, machine.allow("/foo/bar"));
assert_eq!(false, machine.allow("/foo/bar/baz"));
assert_eq!(true, machine.allow("/baz/bar"));
assert_eq!(false, machine.allow("/www/cat/images"));
assert_eq!(true, machine.allow("/www/public/images"));
}
#[test]
fn test_matches() {
let machine = Cylon::compile(vec![Rule::Disallow("/"), Rule::Allow("/fish")]);
assert_eq!(true, machine.allow("/fish"));
assert_eq!(true, machine.allow("/fish.html"));
assert_eq!(true, machine.allow("/fish/salmon.html"));
assert_eq!(true, machine.allow("/fishheads.html"));
assert_eq!(true, machine.allow("/fishheads/yummy.html"));
assert_eq!(true, machine.allow("/fish.php?id=anything"));
assert_eq!(false, machine.allow("/Fish.asp"));
assert_eq!(false, machine.allow("/catfish"));
assert_eq!(false, machine.allow("/?id=fish"));
let machine = Cylon::compile(vec![Rule::Disallow("/"), Rule::Allow("/fish*")]);
assert_eq!(true, machine.allow("/fish"));
assert_eq!(true, machine.allow("/fish.html"));
assert_eq!(true, machine.allow("/fish/salmon.html"));
assert_eq!(true, machine.allow("/fishheads.html"));
assert_eq!(true, machine.allow("/fishheads/yummy.html"));
assert_eq!(true, machine.allow("/fish.php?id=anything"));
assert_eq!(false, machine.allow("/Fish.asp"));
assert_eq!(false, machine.allow("/catfish"));
assert_eq!(false, machine.allow("/?id=fish"));
let machine = Cylon::compile(vec![Rule::Disallow("/"), Rule::Allow("/fish/")]);
assert_eq!(true, machine.allow("/fish/"));
assert_eq!(true, machine.allow("/fish/?id=anything"));
assert_eq!(true, machine.allow("/fish/salmon.htm"));
assert_eq!(false, machine.allow("/fish"));
assert_eq!(false, machine.allow("/fish.html"));
assert_eq!(false, machine.allow("/Fish/Salmon.asp"));
let machine = Cylon::compile(vec![Rule::Disallow("/"), Rule::Allow("/*.php")]);
assert_eq!(true, machine.allow("/filename.php"));
assert_eq!(true, machine.allow("/folder/filename.php"));
assert_eq!(true, machine.allow("/folder/filename.php?parameters"));
assert_eq!(true, machine.allow("/folder/any.php.file.html"));
assert_eq!(true, machine.allow("/filename.php/"));
assert_eq!(false, machine.allow("/"));
assert_eq!(false, machine.allow("/windows.PHP"));
let machine = Cylon::compile(vec![Rule::Disallow("/"), Rule::Allow("/*.php$")]);
assert_eq!(true, machine.allow("/filename.php"));
assert_eq!(true, machine.allow("/folder/filename.php"));
assert_eq!(false, machine.allow("/filename.php?parameters"));
assert_eq!(false, machine.allow("/filename.php/"));
assert_eq!(false, machine.allow("/filename.php5"));
assert_eq!(false, machine.allow("/windows.PHP"));
let machine = Cylon::compile(vec![Rule::Disallow("/"), Rule::Allow("/fish*.php")]);
assert_eq!(true, machine.allow("/fish.php"));
assert_eq!(true, machine.allow("/fishheads/catfish.php?parameters"));
assert_eq!(false, machine.allow("/Fish.PHP"));
}
}