#![allow(non_snake_case)]
use crate::{core::*, types::*};
use nom::branch::alt;
use nom::bytes::complete::{tag, take_while};
use nom::character::complete::char;
use nom::combinator::{map, opt};
use nom::multi::{many0, many1};
use nom::sequence::tuple;
use nom::IResult;
pub fn rulelist(input: &[u8]) -> IResult<&[u8], Vec<Rule>> {
let parser = many1(alt((
map(rule, Some),
map(tuple((many0(WSP), c_nl)), |_| None),
)));
let (input, rulelist) = parser(input)?;
let mut res = vec![];
for rule in rulelist.into_iter() {
if let Some(rule) = rule {
res.push(rule)
}
}
Ok((input, res))
}
pub fn rule(input: &[u8]) -> IResult<&[u8], Rule> {
let parser = tuple((rulename, defined_as, elements, c_nl));
let (input, (name, definition, elements, _)) = parser(input)?;
Ok((input, Rule::new(&name, elements).definition(definition)))
}
pub fn rulename(input: &[u8]) -> IResult<&[u8], String> {
let valid = |x| is_ALPHA(x) || is_DIGIT(x) || x == b'-';
let (input, (head, tail)) = tuple((ALPHA, take_while(valid)))(input)?;
let mut val = vec![head as u8];
val.extend(tail.iter());
Ok((input, val.into_iter().map(|x| x as char).collect()))
}
pub fn defined_as(input: &[u8]) -> IResult<&[u8], Definition> {
let parser = tuple((
many0(c_wsp),
alt((
map(tag("=/"), |_| Definition::Incremental),
map(tag("="), |_| Definition::Basic),
)),
many0(c_wsp),
));
let (input, (_, definition, _)) = parser(input)?;
Ok((input, definition))
}
pub fn elements(input: &[u8]) -> IResult<&[u8], Node> {
let parser = tuple((alternation, many0(WSP)));
let (input, (alternation, _)) = parser(input)?;
Ok((input, alternation))
}
pub fn c_wsp(input: &[u8]) -> IResult<&[u8], ()> {
let parser = alt((map(WSP, |_| ()), map(tuple((c_nl, WSP)), |_| ())));
let (input, _) = parser(input)?;
Ok((input, ()))
}
pub fn c_nl(input: &[u8]) -> IResult<&[u8], ()> {
let parser = alt((comment, map(CRLF, |_| ())));
let (input, _) = parser(input)?;
Ok((input, ()))
}
pub fn comment(input: &[u8]) -> IResult<&[u8], ()> {
let valid = |x| is_WSP(x) || is_VCHAR(x);
let (input, (_, _, _)) = tuple((char(';'), take_while(valid), CRLF))(input)?;
Ok((input, ()))
}
pub fn alternation(input: &[u8]) -> IResult<&[u8], Node> {
let parser = tuple((
concatenation,
many0(tuple((
many0(c_wsp),
char('/'),
many0(c_wsp),
concatenation,
))),
));
let (input, (head, tail)) = parser(input)?;
let mut concatenations = vec![head];
for (_, _, _, item) in tail {
concatenations.push(item)
}
if concatenations.len() == 1 {
Ok((input, concatenations.pop().unwrap()))
} else {
Ok((input, Node::Alternation(concatenations)))
}
}
pub fn concatenation(input: &[u8]) -> IResult<&[u8], Node> {
let parser = tuple((repetition, many0(tuple((many1(c_wsp), repetition)))));
let (input, (head, tail)) = parser(input)?;
let mut repetitions = vec![head];
for (_, item) in tail {
repetitions.push(item)
}
if repetitions.len() == 1 {
Ok((input, repetitions.pop().unwrap()))
} else {
Ok((input, Node::Concatenation(repetitions)))
}
}
pub fn repetition(input: &[u8]) -> IResult<&[u8], Node> {
let parser = tuple((opt(repeat), element));
let (input, (repeat, node)) = parser(input)?;
if let Some(repeat) = repeat {
Ok((input, Node::Repetition(Repetition::new(repeat, node))))
} else {
Ok((input, node))
}
}
pub fn repeat(input: &[u8]) -> IResult<&[u8], Repeat> {
let parser = alt((
map(
tuple((many0(DIGIT), char('*'), many0(DIGIT))),
|(min, _, max)| {
let min = if !min.is_empty() {
Some(usize::from_str_radix(&min.into_iter().collect::<String>(), 10).unwrap())
} else {
None
};
let max = if !max.is_empty() {
Some(usize::from_str_radix(&max.into_iter().collect::<String>(), 10).unwrap())
} else {
None
};
Repeat::with(min, max)
},
),
map(many1(DIGIT), |min| {
let min = usize::from_str_radix(&min.into_iter().collect::<String>(), 10).unwrap();
Repeat::with(Some(min), Some(min))
}),
));
let (input, repeat) = parser(input)?;
Ok((input, repeat))
}
pub fn element(input: &[u8]) -> IResult<&[u8], Node> {
let parser = alt((
map(rulename, Node::Rulename),
map(group, |e| e),
map(option, |e| e),
map(char_val, Node::CharVal),
map(num_val, Node::NumVal),
map(prose_val, Node::ProseVal),
));
let (input, val) = parser(input)?;
Ok((input, val))
}
pub fn group(input: &[u8]) -> IResult<&[u8], Node> {
let parser = tuple((
char('('),
many0(c_wsp),
alternation,
many0(c_wsp),
char(')'),
));
let (input, (_, _, alternation, _, _)) = parser(input)?;
Ok((input, Node::Group(Box::new(alternation))))
}
pub fn option(input: &[u8]) -> IResult<&[u8], Node> {
let parser = tuple((
char('['),
many0(c_wsp),
alternation,
many0(c_wsp),
char(']'),
));
let (input, (_, _, alternation, _, _)) = parser(input)?;
Ok((input, Node::Optional(Box::new(alternation))))
}
pub fn char_val(input: &[u8]) -> IResult<&[u8], String> {
let char_val_chars = |x| match x {
0x20..=0x21 | 0x23..=0x7E => true,
_ => false,
};
let (input, (_, val, _)) = tuple((DQUOTE, take_while(char_val_chars), DQUOTE))(input)?;
Ok((input, val.iter().map(|b| *b as char).collect()))
}
pub fn num_val(input: &[u8]) -> IResult<&[u8], Range> {
let parser = tuple((char('%'), alt((bin_val, dec_val, hex_val))));
let (input, (_, range)) = parser(input)?;
Ok((input, range))
}
pub fn bin_val(input: &[u8]) -> IResult<&[u8], Range> {
let (input, _) = char('b')(input)?;
let (input, start) = map(many1(BIT), |val| {
u32::from_str_radix(&val.into_iter().collect::<String>(), 2).expect("should never happen")
})(input)?;
let (input, compl) = opt(alt((
map(many1(tuple((char('.'), many1(BIT)))), |pairs| {
let mut all = vec![start];
for (_, val) in pairs.into_iter() {
all.push(
u32::from_str_radix(&val.into_iter().collect::<String>(), 2)
.expect("should never happen"),
)
}
Range::OneOf(all)
}),
map(tuple((char('-'), many1(BIT))), |(_, end)| {
Range::Range(
start,
u32::from_str_radix(&end.into_iter().collect::<String>(), 2)
.expect("should never happen"),
)
}),
)))(input)?;
if let Some(r) = compl {
Ok((input, r))
} else {
Ok((input, Range::OneOf(vec![start])))
}
}
pub fn dec_val(input: &[u8]) -> IResult<&[u8], Range> {
let (input, _) = char('d')(input)?;
let (input, start) = map(many1(DIGIT), |val| {
u32::from_str_radix(&val.into_iter().collect::<String>(), 10).unwrap()
})(input)?;
let (input, compl) = opt(alt((
map(many1(tuple((char('.'), many1(DIGIT)))), |pairs| {
let mut all = vec![start];
for (_, val) in pairs.into_iter() {
all.push(u32::from_str_radix(&val.into_iter().collect::<String>(), 10).unwrap())
}
Range::OneOf(all)
}),
map(tuple((char('-'), many1(DIGIT))), |(_, end)| {
Range::Range(
start,
u32::from_str_radix(&end.into_iter().collect::<String>(), 10).unwrap(),
)
}),
)))(input)?;
if let Some(r) = compl {
Ok((input, r))
} else {
Ok((input, Range::OneOf(vec![start])))
}
}
pub fn hex_val(input: &[u8]) -> IResult<&[u8], Range> {
let (input, _) = char('x')(input)?;
let (input, start) = map(many1(HEXDIG), |val| {
u32::from_str_radix(&val.into_iter().collect::<String>(), 16).unwrap()
})(input)?;
let (input, compl) = opt(alt((
map(many1(tuple((char('.'), many1(HEXDIG)))), |pairs| {
let mut all = vec![start];
for (_, val) in pairs.into_iter() {
all.push(u32::from_str_radix(&val.into_iter().collect::<String>(), 16).unwrap())
}
Range::OneOf(all)
}),
map(tuple((char('-'), many1(HEXDIG))), |(_, end)| {
Range::Range(
start,
u32::from_str_radix(&end.into_iter().collect::<String>(), 16).unwrap(),
)
}),
)))(input)?;
if let Some(r) = compl {
Ok((input, r))
} else {
Ok((input, Range::OneOf(vec![start])))
}
}
pub fn prose_val(input: &[u8]) -> IResult<&[u8], String> {
let prose_val_chars = |x| match x {
0x20..=0x3D | 0x3F..=0x7E => true,
_ => false,
};
let (input, (_, val, _)) = tuple((char('<'), take_while(prose_val_chars), char('>')))(input)?;
Ok((input, val.iter().map(|b| *b as char).collect()))
}
#[cfg(test)]
mod tests {
use super::*;
use quickcheck::{Arbitrary, Gen};
use quickcheck_macros::quickcheck;
use rand::{distributions::Distribution, seq::SliceRandom, Rng};
struct RulenameDistribution;
impl Distribution<char> for RulenameDistribution {
fn sample<R: Rng + ?Sized>(&self, rng: &mut R) -> char {
*b"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-"
.choose(rng)
.unwrap() as char
}
}
impl Arbitrary for Rule {
fn arbitrary<G: Gen>(g: &mut G) -> Self {
let name: String = std::iter::repeat(())
.map(|()| g.sample(RulenameDistribution))
.take(7)
.collect();
let name = String::from("a") + &name;
Rule::new(&name, Node::arbitrary(g)).definition(Definition::arbitrary(g))
}
}
impl Arbitrary for Node {
fn arbitrary<G: Gen>(g: &mut G) -> Self {
let name: String = std::iter::repeat(())
.map(|()| g.sample(RulenameDistribution))
.take(7)
.collect();
let name = String::from("a") + &name;
match g.gen_range(0, 9) {
0 => Node::Alternation(vec![Node::arbitrary(g), Node::arbitrary(g)]),
1 => Node::Concatenation(vec![Node::arbitrary(g), Node::arbitrary(g)]),
2 => Node::Repetition(Repetition::new(Repeat::arbitrary(g), Node::arbitrary(g))),
3 => Node::Rulename(name),
4 => Node::Group(Box::<Node>::arbitrary(g)),
5 => Node::Optional(Box::<Node>::arbitrary(g)),
6 => Node::CharVal(name),
7 => Node::NumVal(Range::arbitrary(g)),
8 => Node::ProseVal(name),
_ => unreachable!(),
}
}
}
impl Arbitrary for Definition {
fn arbitrary<G: Gen>(g: &mut G) -> Self {
use Definition::*;
[Basic, Incremental].choose(g).unwrap().clone()
}
}
impl Arbitrary for Repeat {
fn arbitrary<G: Gen>(g: &mut G) -> Self {
Repeat::with(Option::<usize>::arbitrary(g), Option::<usize>::arbitrary(g))
}
}
impl Arbitrary for Range {
fn arbitrary<G: Gen>(g: &mut G) -> Self {
use super::Range::*;
[
OneOf(Vec::<u32>::arbitrary(g)),
Range(u32::arbitrary(g), u32::arbitrary(g)),
]
.choose(g)
.unwrap()
.clone()
}
}
#[test]
fn test_rules() {
let tests = vec![
("a = A\n", Rule::new("a", Node::Rulename("A".into()))),
(
"B = A / B\n",
Rule::new(
"B",
Node::Alternation(vec![Node::Rulename("A".into()), Node::Rulename("B".into())]),
),
),
(
"c = (A / B)\n",
Rule::new(
"c",
Node::Group(Box::new(Node::Alternation(vec![
Node::Rulename("A".into()),
Node::Rulename("B".into()),
]))),
),
),
(
"D = <this is prose>\n",
Rule::new("D", Node::ProseVal("this is prose".into())),
),
(
"xXx = ((A B))\n",
Rule::new(
"xXx",
Node::Group(Box::new(Node::Group(Box::new(Node::Concatenation(vec![
Node::Rulename("A".into()),
Node::Rulename("B".into()),
]))))),
),
),
(
"a = 0*15\"-\"\n",
Rule::new(
"a",
Node::Repetition(Repetition::new(
Repeat::with(Some(0), Some(15)),
Node::CharVal("-".into()),
)),
),
),
(
"a = *\"-\"\n",
Rule::new(
"a",
Node::Repetition(Repetition::new(Repeat::new(), Node::CharVal("-".into()))),
),
),
];
for (test, expected) in tests {
let (remaining, got) = rule(test.as_bytes()).unwrap();
assert!(remaining.is_empty());
assert_eq!(got, expected);
}
}
#[test]
fn test_rulename() {
assert_eq!(rulename(b"a").unwrap().1, "a");
assert_eq!(rulename(b"A").unwrap().1, "A");
assert_eq!(rulename(b"ab").unwrap().1, "ab");
assert_eq!(rulename(b"Ab").unwrap().1, "Ab");
assert_eq!(rulename(b"A-b").unwrap().1, "A-b");
}
#[test]
fn test_alternation() {
let (remaining, res) = alternation(b"A / \"xxx\"").unwrap();
assert!(remaining.len() == 0);
println!("{:?}", res);
}
#[test]
fn test_repetition() {
let (remaining, res) = repetition(b"1*1A").unwrap();
assert!(remaining.len() == 0);
println!("{:?}", res);
}
#[test]
fn test_num_val() {
let expected = Range::OneOf(vec![0x00, 0x0A, 0xff]);
let got1 = num_val(b"%b0.1010.11111111");
let got2 = num_val(b"%d0.10.255");
let got3 = num_val(b"%x0.A.ff");
assert_eq!(expected, got1.unwrap().1);
assert_eq!(expected, got2.unwrap().1);
assert_eq!(expected, got3.unwrap().1);
}
#[test]
fn test_bin_val() {
let expected = Range::OneOf(vec![0x00, 0x03, 0xff]);
let got = bin_val(b"b00.11.11111111");
assert_eq!(expected, got.unwrap().1);
let expected = Range::Range(0, 255);
let got = bin_val(b"b00-11111111");
assert_eq!(expected, got.unwrap().1)
}
#[test]
fn test_dec_val() {
let expected = Range::OneOf(vec![0, 42, 255]);
let got = dec_val(b"d0.42.255");
assert_eq!(expected, got.unwrap().1);
let expected = Range::Range(0, 255);
let got = dec_val(b"d0-255");
assert_eq!(expected, got.unwrap().1)
}
#[test]
fn test_hex_val() {
let expected = Range::OneOf(vec![0xCA, 0xFF, 0xEE]);
let got = hex_val(b"xCA.FF.EE");
assert_eq!(expected, got.unwrap().1);
let expected = Range::Range(0, 255);
let got = hex_val(b"x00-FF");
assert_eq!(expected, got.unwrap().1)
}
#[test]
fn test_prose_val() {
assert_eq!("Hello, World!", prose_val(b"<Hello, World!>").unwrap().1)
}
#[test]
fn test_definition() {
let tests = vec![
(
"a =/ A\n",
Rule::new("a", Node::Rulename("A".into())).definition(Definition::Incremental),
),
(
"B =/ A / B\n",
Rule::new(
"B",
Node::Alternation(vec![Node::Rulename("A".into()), Node::Rulename("B".into())]),
)
.definition(Definition::Incremental),
),
];
for (test, expected) in tests {
let (remaining, got) = rule(test.as_bytes()).unwrap();
assert!(remaining.is_empty());
assert_eq!(got, expected);
}
}
#[quickcheck]
fn test_explore_nesting(test: Rule) {
let printed = test.to_string() + "\n";
if let Err(_) = rule(printed.as_bytes()) {
println!("# Found interesting rule:");
println!("{}", test);
println!("{:#?}", test);
}
}
#[test]
fn test_repetition_repetition() {
let rule = Rule::new(
"rule",
Node::Repetition(Repetition::new(
Repeat::with(Some(1), Some(12)),
Node::Repetition(Repetition::new(
Repeat::with(Some(1), Some(2)),
Node::ProseVal("test".into()),
)),
)),
);
println!("{}", rule);
}
}