use super::Read;
use super::{Identifier, Text};
use memchr::{memchr2_iter, memchr3_iter};
use std::str::{from_utf8, from_utf8_unchecked};
use crate::error::{Error, ErrorCode};
use crate::parse::BibtexParse;
use crate::validate::IDENTIFIER_ALLOWED;
pub fn next_entry_or_eof(input: &[u8], mut pos: usize) -> (usize, bool) {
while pos < input.len() {
pos += 1;
match input[pos - 1] {
b'@' => return (pos, true),
b'%' => {
while pos < input.len() && input[pos] != b'\n' {
pos += 1;
}
if pos == input.len() {
return (pos, false);
} else {
pos += 1
}
}
_ => {}
}
}
(input.len(), false)
}
pub fn comment(input: &[u8], mut pos: usize) -> usize {
while pos < input.len() {
match input[pos] {
b'\t' | b'\n' | b'\x0C' | b'\r' | b' ' => pos += 1,
b'%' => {
pos += 1;
while pos < input.len() && input[pos] != b'\n' {
pos += 1;
}
if pos == input.len() {
return pos;
} else {
pos += 1
}
}
_ => return pos,
}
}
input.len()
}
pub fn identifier(input: &[u8], start: usize) -> Result<(usize, Identifier<&str>), Error> {
let mut end = start;
while end < input.len() && IDENTIFIER_ALLOWED[input[end] as usize] {
end += 1
}
if end == start {
return Err(Error::syntax(ErrorCode::Empty));
}
let s = from_utf8(&input[start..end])?;
Ok((end, Identifier(s)))
}
pub fn number(input: &[u8], start: usize) -> Result<(usize, &str), Error> {
let mut end = start;
while end < input.len() && input[end].is_ascii_digit() {
end += 1
}
if end == start {
return Err(Error::syntax(ErrorCode::Empty));
}
Ok((end, unsafe { from_utf8_unchecked(&input[start..end]) }))
}
pub fn balanced(input: &[u8], start: usize) -> Result<(usize, &[u8]), Error> {
let mut bracket_depth = 0;
for offset in memchr2_iter(b'{', b'}', &input[start..]) {
let end = start + offset;
if input[end] == b'{' {
bracket_depth += 1
} else {
if bracket_depth == 0 {
return Ok((end, &input[start..end]));
}
bracket_depth -= 1;
}
}
Err(Error::syntax(ErrorCode::UnterminatedTextToken))
}
pub fn protected(until: u8) -> impl FnMut(&[u8], usize) -> Result<(usize, &[u8]), Error> {
move |input: &[u8], start: usize| {
let mut bracket_depth = 0;
for offset in memchr3_iter(until, b'{', b'}', &input[start..]) {
let end = start + offset;
match input[end] {
b if b == until => {
if bracket_depth == 0 {
return Ok((end, &input[start..end]));
}
}
b'{' => bracket_depth += 1,
_ => {
if bracket_depth == 0 {
return Err(Error::syntax(ErrorCode::UnexpectedClosingBracket));
}
bracket_depth -= 1;
}
}
}
Err(Error::syntax(ErrorCode::UnterminatedTextToken))
}
}
super::create_input_impl::read_impl!([u8], SliceReader, Bytes, std::convert::identity);
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_next_entry_or_eof() {
assert_eq!(next_entry_or_eof(b"junk", 0), (4, false));
assert_eq!(next_entry_or_eof(b"junk", 2), (4, false));
assert_eq!(next_entry_or_eof(b"", 0), (0, false));
assert_eq!(next_entry_or_eof(b" @art", 2), (3, true));
assert_eq!(next_entry_or_eof(b"%@@\n@a", 0), (5, true));
assert_eq!(next_entry_or_eof(b"\nignored @a", 0), (10, true));
assert_eq!(next_entry_or_eof(b"%@a", 0), (3, false));
}
#[test]
fn test_comment() {
assert_eq!(comment(b"% a\n ab", 0), 7);
assert_eq!(comment(b"% a\n ab", 1), 4);
assert_eq!(comment(b" %\na", 1), 4);
assert_eq!(comment(b"\x09\x0a\x0c\x0d\x20b", 0), 5);
assert_eq!(comment(b"\x09\x0a\x0c\x0d\x20b", 2), 5);
assert_eq!(comment(b"%\xa8!\xfd!\x7f!\nc", 0), 8);
assert_eq!(comment(b"\x0b", 0), 0);
assert_eq!(comment(b"", 0), 0);
}
#[test]
fn test_protected() {
assert_eq!(protected(b'"')(b"cap\"rest", 0), Ok((3, &b"cap"[..])));
assert_eq!(protected(b'"')(b"cap\"rest", 1), Ok((3, &b"ap"[..])));
assert_eq!(protected(b'"')(b"a{\"}\"rest", 0), Ok((4, &b"a{\"}"[..])));
assert_eq!(
protected(b'"')(b"a{{\"} \"}\"rest", 0),
Ok((8, &b"a{{\"} \"}"[..]))
);
assert_eq!(
protected(b'"')(b"{\"", 0),
Err(Error::syntax(ErrorCode::UnterminatedTextToken))
);
assert_eq!(
protected(b'"')(b"}\"", 0),
Err(Error::syntax(ErrorCode::UnexpectedClosingBracket))
);
}
#[test]
fn test_balanced() {
assert_eq!(balanced(b"url}abc", 0), Ok((3, &b"url"[..])));
assert_eq!(balanced("u{}rl}🍄c".as_bytes(), 0), Ok((5, &b"u{}rl"[..])));
assert_eq!(balanced(b"u{{}}rl}abc", 1), Ok((7, &b"{{}}rl"[..])));
assert_eq!(
balanced(b"none", 0),
Err(Error::syntax(ErrorCode::UnterminatedTextToken))
);
assert_eq!(
balanced(b"{no}e", 0),
Err(Error::syntax(ErrorCode::UnterminatedTextToken))
);
}
use proptest::prelude::*;
proptest! {
#[test]
fn no_panic(s in "\\PC*") {
let _ = number(s.as_bytes(), 0);
}
}
}