use crate::bytes::token::Token;
use crate::errors::ParseResult;
use crate::matcher::Match;
use crate::peek::{peek, PeekResult, Peekable};
use crate::recognizer::Recognizable;
use crate::scanner::Scanner;
fn is_escaped<'a, T, V>(mut scanner: Scanner<'a, u8>, escape_token: T) -> ParseResult<bool>
where
T: Recognizable<'a, u8, V> + Copy,
{
if scanner.current_position() < escape_token.size() {
return Ok(false);
}
scanner.rewind(escape_token.size());
if escape_token.recognize(&mut scanner)?.is_some() {
return Ok(true);
}
Ok(false)
}
pub fn match_for_balanced_group<'a, T1, T2, T3, V3>(
scanner: &mut Scanner<'a, u8>,
balance: &mut usize,
start: T1,
end: T2,
escape_token: T3,
) -> ParseResult<()>
where
T1: Peekable<'a, u8> + Match<u8> + Copy,
T2: Peekable<'a, u8> + Match<u8> + Copy,
T3: Recognizable<'a, u8, V3> + Copy,
{
match peek(start, scanner)? {
Some(peeking) => {
scanner.bump_by(peeking.end_slice);
let mut rewind_scanner = scanner.clone();
rewind_scanner.rewind(start.size());
if !is_escaped(rewind_scanner, escape_token)? {
*balance += 1
}
return Ok(());
}
None => {}
}
match peek(end, scanner)? {
Some(peeking) => {
scanner.bump_by(peeking.end_slice);
let mut rewind_scanner = scanner.clone();
rewind_scanner.rewind(end.size());
if is_escaped(rewind_scanner, escape_token)? {
return Ok(());
}
*balance -= 1;
}
None => {
scanner.bump_by(1);
return Ok(());
}
}
Ok(())
}
pub fn match_group<'a, T1, T2, T3, V3>(
start: T1,
end: T2,
escape_token: T3,
) -> impl Fn(&'a [u8]) -> ParseResult<PeekResult> + 'a
where
T1: Peekable<'a, u8> + Match<u8> + Copy + 'a,
T2: Peekable<'a, u8> + Match<u8> + Copy + 'a,
T3: Recognizable<'a, u8, V3> + Copy + 'a,
{
move |input: &'a [u8]| {
let mut balance = 1;
let mut scanner = Scanner::new(input);
if start.recognize(&mut scanner)?.is_none() {
return Ok(PeekResult::NotFound);
}
loop {
match_for_balanced_group(&mut scanner, &mut balance, start, end, escape_token)?;
if balance == 0 {
break;
}
}
if scanner.current_position() == 1 {
return Ok(PeekResult::NotFound);
}
Ok(PeekResult::Found {
end_slice: scanner.current_position(),
start_element_size: start.size(),
end_element_size: end.size(),
})
}
}
pub fn match_for_delimited_group<'a, T, T2>(
token: T,
escape_token: T2,
) -> impl Fn(&'a [u8]) -> ParseResult<PeekResult> + 'a
where
T: Peekable<'a, u8> + Copy + 'a + Match<u8>,
T2: Peekable<'a, u8> + Copy + 'a + Match<u8>,
{
move |input: &'a [u8]| {
if input.len() < token.size() * 2 {
return Ok(PeekResult::NotFound);
}
let mut scanner = Scanner::new(input);
if token.recognize(&mut scanner)?.is_none() {
return Ok(PeekResult::NotFound);
}
let mut found = false;
while !scanner.remaining().is_empty() {
match peek(token, &mut scanner)? {
Some(peeking) => {
scanner.bump_by(peeking.end_slice);
let mut rewind_scanner = scanner.clone();
rewind_scanner.rewind(token.size());
if is_escaped(rewind_scanner, escape_token)? {
scanner.bump_by(1);
continue;
}
found = true;
break;
}
None => break,
};
}
if !found {
return Ok(PeekResult::NotFound);
}
Ok(PeekResult::Found {
end_slice: scanner.current_position(),
start_element_size: token.size(),
end_element_size: token.size(),
})
}
}
pub enum GroupKind {
Parenthesis,
Quotes,
DoubleQuotes,
}
type GroupMatcher<'a> = Box<dyn Fn(&'a [u8]) -> ParseResult<PeekResult> + 'a>;
impl GroupKind {
fn matcher<'a>(&self) -> GroupMatcher<'a>
where {
match self {
GroupKind::Parenthesis => Box::new(match_group(
Token::OpenParen,
Token::CloseParen,
Token::Backslash,
)),
GroupKind::Quotes => {
Box::new(match_for_delimited_group(Token::Quote, Token::Backslash))
}
GroupKind::DoubleQuotes => Box::new(match_for_delimited_group(
Token::DoubleQuote,
Token::Backslash,
)),
}
}
}
impl<'a> Peekable<'a, u8> for GroupKind {
fn peek(&self, data: &Scanner<'a, u8>) -> ParseResult<PeekResult> {
self.matcher()(data.remaining())
}
}
#[cfg(test)]
mod tests {
use crate::bytes::components::groups::{match_for_delimited_group, match_group, GroupKind};
use crate::bytes::token::Token;
use crate::errors::ParseResult;
use crate::peek::{peek, PeekResult, Peeking};
use crate::scanner::Scanner;
#[test]
fn test_match_group() {
let data = "( 5 + 3 - ( 10 * 8 ) \\)) + 54";
let result =
match_group(Token::OpenParen, Token::CloseParen, Token::Backslash)(data.as_bytes())
.expect("failed to parse");
assert_eq!(
result,
PeekResult::Found {
end_slice: 24,
start_element_size: 1,
end_element_size: 1
}
);
assert_eq!(&data[..24].as_bytes(), b"( 5 + 3 - ( 10 * 8 ) \\))");
}
#[test]
fn test_match_group2() -> ParseResult<()> {
let data = "( 5 + 3 - \\( ( 10 * 8 \\)) \\)) + 54";
let mut tokenizer = Scanner::new(data.as_bytes());
let result = peek(GroupKind::Parenthesis, &mut tokenizer)?;
if let Some(peeked) = result {
assert_eq!(peeked.peeked_slice(), b" 5 + 3 - \\( ( 10 * 8 \\)) \\)");
}
Ok(())
}
#[test]
fn test_non_match_group() {
let data = "4 + ( 5 + 3 - ( 10 * 8 ) \\)) + 54";
let result =
match_group(Token::OpenParen, Token::CloseParen, Token::Backslash)(data.as_bytes())
.expect("failed to parse");
assert_eq!(result, PeekResult::NotFound);
}
#[test]
fn test_match_group_delimited() {
let data = b"( 5 + 3 - ( 10 * 8 ) ) + 54";
let mut tokenizer = Scanner::new(data);
let result = peek(GroupKind::Parenthesis, &mut tokenizer).expect("failed to parse");
assert_eq!(
result,
Some(Peeking {
start_element_size: 1,
end_element_size: 1,
data: &data[0..22],
end_slice: 22
})
);
assert_eq!(&data[..22], b"( 5 + 3 - ( 10 * 8 ) )");
}
#[test]
fn test_match_group_delimited2() {
let data = b"( 5 + 3 - ( 10 * 8 ) ) + 54";
let mut tokenizer = Scanner::new(data);
let result = peek(GroupKind::Parenthesis, &mut tokenizer).expect("failed to parse");
if let Some(peeked) = result {
assert_eq!(peeked.peeked_slice(), b" 5 + 3 - ( 10 * 8 ) ");
}
}
#[test]
fn test_match_quotes2() {
let data = b"'hello world' data";
let mut tokenizer = Scanner::new(data);
let result = peek(GroupKind::Quotes, &mut tokenizer).expect("failed to parse");
if let Some(peeked) = result {
assert_eq!(peeked.peeked_slice(), b"hello world");
}
}
#[test]
fn test_match_quotes3() {
let data = "'I\\'m a quoted data' - 'yes me too'";
let mut tokenizer = Scanner::new(data.as_bytes());
let result = peek(GroupKind::Quotes, &mut tokenizer).expect("failed to parse");
if let Some(peeked) = result {
assert_eq!(peeked.peeked_slice(), b"I\\'m a quoted data");
}
}
#[test]
fn test_match_quotes() {
let data = b"'hello world' data";
let result = match_for_delimited_group(Token::Quote, Token::Backslash)(data)
.expect("failed to parse");
assert_eq!(
result,
PeekResult::Found {
end_slice: 13,
start_element_size: 1,
end_element_size: 1
}
);
assert_eq!(&data[..13], b"'hello world'");
let data = r#"'hello world l\'éléphant' data"#;
let result = match_for_delimited_group(Token::Quote, Token::Backslash)(data.as_bytes())
.expect("failed to parse");
assert_eq!(
result,
PeekResult::Found {
end_slice: 27,
start_element_size: 1,
end_element_size: 1
}
);
assert_eq!(&data[..27], r#"'hello world l\'éléphant'"#);
let data = "\"hello world\" data";
let result =
match_for_delimited_group(Token::DoubleQuote, Token::Backslash)(data.as_bytes())
.expect("failed to parse");
assert_eq!(
result,
PeekResult::Found {
end_slice: 13,
start_element_size: 1,
end_element_size: 1
}
);
assert_eq!(&data[..13], "\"hello world\"");
let data = r#""hello world" data"#;
let result =
match_for_delimited_group(Token::DoubleQuote, Token::Backslash)(data.as_bytes())
.expect("failed to parse");
assert_eq!(
result,
PeekResult::Found {
end_slice: 13,
start_element_size: 1,
end_element_size: 1
}
);
assert_eq!(&data[..13], r#""hello world""#);
let data = r#""hello world" data"#;
let mut tokenizer = Scanner::new(data.as_bytes());
let result = peek(GroupKind::DoubleQuotes, &mut tokenizer).expect("failed to parse");
if let Some(peeked) = result {
assert_eq!(peeked.peeked_slice(), b"hello world");
} else {
panic!("failed to parse");
}
let data = r#""""#;
let mut tokenizer = Scanner::new(data.as_bytes());
let result = peek(GroupKind::DoubleQuotes, &mut tokenizer).expect("failed to parse");
if let Some(peeked) = result {
assert_eq!(peeked.peeked_slice(), b"");
} else {
panic!("failed to parse");
}
}
}