use std::borrow::Cow;
use std::marker::PhantomData;
use ::ScanError;
pub trait IntoScanCursor<'a>: Sized {
type Output: 'a + ScanCursor<'a>;
fn into_scan_cursor(self) -> Self::Output;
}
impl<'a, T> IntoScanCursor<'a> for T where T: 'a + ScanCursor<'a> {
type Output = Self;
fn into_scan_cursor(self) -> Self::Output {
self
}
}
impl<'a> IntoScanCursor<'a> for &'a str {
type Output = StrCursor<'a>;
fn into_scan_cursor(self) -> Self::Output {
StrCursor::new(self)
}
}
impl<'a> IntoScanCursor<'a> for &'a String {
type Output = StrCursor<'a>;
fn into_scan_cursor(self) -> Self::Output {
StrCursor::new(self)
}
}
impl<'a> IntoScanCursor<'a> for &'a Cow<'a, str> {
type Output = StrCursor<'a>;
fn into_scan_cursor(self) -> Self::Output {
StrCursor::new(self)
}
}
pub trait ScanCursor<'a>: 'a + Sized + Clone {
type ScanInput: ScanInput<'a>;
fn try_end(self) -> Result<(), (ScanError, Self)>;
fn try_scan<F, Out>(self, f: F) -> Result<(Out, Self), (ScanError, Self)>
where F: FnOnce(Self::ScanInput) -> Result<(Out, usize), ScanError>;
fn try_scan_raw<F, Out>(self, f: F) -> Result<(Out, Self), (ScanError, Self)>
where F: FnOnce(Self::ScanInput) -> Result<(Out, usize), ScanError>;
fn try_match_literal(self, lit: &str) -> Result<Self, (ScanError, Self)>;
fn as_str(self) -> &'a str;
fn offset(&self) -> usize;
}
pub trait ScanInput<'a>: 'a + Sized + Clone {
type ScanCursor: ScanCursor<'a>;
type StrCompare: StrCompare;
fn as_str(&self) -> &'a str;
fn from_subslice(&self, subslice: &'a str) -> Self;
fn to_cursor(&self) -> Self::ScanCursor;
}
#[derive(Debug)]
pub struct StrCursor<'a, Cmp=ExactCompare, Space=IgnoreSpace, Word=Wordish>
where
Cmp: StrCompare,
Space: SkipSpace,
Word: SliceWord,
{
offset: usize,
slice: &'a str,
_marker: PhantomData<(Cmp, Space, Word)>,
}
impl<'a, Cmp, Space, Word>
Copy for StrCursor<'a, Cmp, Space, Word>
where
Cmp: StrCompare,
Space: SkipSpace,
Word: SliceWord,
{}
impl<'a, Cmp, Space, Word>
Clone for StrCursor<'a, Cmp, Space, Word>
where
Cmp: StrCompare,
Space: SkipSpace,
Word: SliceWord,
{
fn clone(&self) -> Self {
*self
}
}
impl<'a, Cmp, Space, Word>
StrCursor<'a, Cmp, Space, Word>
where
Cmp: StrCompare,
Space: SkipSpace,
Word: SliceWord,
{
pub fn new(slice: &'a str) -> Self {
StrCursor {
offset: 0,
slice: slice,
_marker: PhantomData,
}
}
fn advance_by(self, bytes: usize) -> Self {
StrCursor {
offset: self.offset + bytes,
slice: &self.slice[bytes..],
_marker: PhantomData,
}
}
fn offset(self) -> usize {
self.offset
}
}
impl<'a, Cmp, Space, Word>
ScanCursor<'a> for StrCursor<'a, Cmp, Space, Word>
where
Cmp: StrCompare,
Space: SkipSpace,
Word: SliceWord,
{
type ScanInput = Self;
fn try_end(self) -> Result<(), (ScanError, Self)> {
if Space::skip_space(self.slice) == self.slice.len() {
Ok(())
} else {
Err((ScanError::expected_end().add_offset(self.offset()), self))
}
}
fn try_scan<F, Out>(self, f: F) -> Result<(Out, Self), (ScanError, Self)>
where F: FnOnce(Self::ScanInput) -> Result<(Out, usize), ScanError> {
let tmp_off = Space::skip_space(self.slice);
let tmp = self.advance_by(tmp_off);
match f(tmp) {
Ok((out, off)) => Ok((out, tmp.advance_by(off))),
Err(err) => Err((err.add_offset(tmp.offset()), self)),
}
}
fn try_scan_raw<F, Out>(self, f: F) -> Result<(Out, Self), (ScanError, Self)>
where F: FnOnce(Self::ScanInput) -> Result<(Out, usize), ScanError> {
match f(self) {
Ok((out, off)) => Ok((out, self.advance_by(off))),
Err(err) => Err((err.add_offset(self.offset()), self)),
}
}
fn try_match_literal(self, lit: &str) -> Result<Self, (ScanError, Self)> {
let mut tmp_off = Space::skip_space(self.slice);
let mut tmp = &self.slice[tmp_off..];
let mut lit = lit;
while lit.len() > 0 {
match Space::match_spaces(tmp, lit) {
Ok((a, b)) => {
tmp = &tmp[a..];
tmp_off += a;
lit = &lit[b..];
},
Err(off) => {
return Err((
ScanError::literal_mismatch()
.add_offset(self.offset() + tmp_off + off),
self
));
},
}
if lit.len() == 0 { break; }
let lit_word = match Word::slice_word(lit) {
Some(0) | None => panic!("literal {:?} begins with a non-space, non-word", lit),
Some(b) => &lit[..b],
};
let tmp_word = match Word::slice_word(tmp) {
Some(b) => &tmp[..b],
None => return Err((
ScanError::literal_mismatch()
.add_offset(self.offset() + tmp_off),
self
)),
};
if !Cmp::compare(tmp_word, lit_word) {
return Err((
ScanError::literal_mismatch()
.add_offset(self.offset() + tmp_off),
self
));
}
tmp = &tmp[tmp_word.len()..];
tmp_off += tmp_word.len();
lit = &lit[lit_word.len()..];
}
Ok(self.advance_by(tmp_off))
}
fn as_str(self) -> &'a str {
self.slice
}
fn offset(&self) -> usize {
self.offset
}
}
impl<'a, Cmp, Space, Word>
ScanInput<'a> for StrCursor<'a, Cmp, Space, Word>
where
Cmp: StrCompare,
Space: SkipSpace,
Word: SliceWord,
{
type ScanCursor = Self;
type StrCompare = Cmp;
fn as_str(&self) -> &'a str {
self.slice
}
fn from_subslice(&self, subslice: &'a str) -> Self {
use ::util::StrUtil;
let offset = self.as_str().subslice_offset_stable(subslice)
.expect("called `StrCursor::from_subslice` with disjoint subslice");
StrCursor {
offset: self.offset + offset,
slice: subslice,
_marker: PhantomData,
}
}
fn to_cursor(&self) -> Self::ScanCursor {
StrCursor::new(self.slice)
}
}
impl<'a> ScanInput<'a> for &'a str {
type ScanCursor = StrCursor<'a>;
type StrCompare = ExactCompare;
fn as_str(&self) -> &'a str {
*self
}
fn from_subslice(&self, subslice: &'a str) -> Self {
subslice
}
fn to_cursor(&self) -> Self::ScanCursor {
self.into_scan_cursor()
}
}
fn skip_space(s: &str) -> (&str, usize) {
let off = s.char_indices()
.take_while(|&(_, c)| c.is_whitespace())
.map(|(i, c)| i + c.len_utf8())
.last()
.unwrap_or(0);
(&s[off..], off)
}
pub trait SkipSpace: 'static {
fn match_spaces(a: &str, b: &str) -> Result<(usize, usize), usize>;
fn skip_space(a: &str) -> usize;
}
#[derive(Debug)]
pub enum ExactSpace {}
impl SkipSpace for ExactSpace {
fn match_spaces(a: &str, b: &str) -> Result<(usize, usize), usize> {
let mut acs = a.char_indices();
let mut bcs = b.char_indices();
let (mut last_ai, mut last_bi) = (0, 0);
while let (Some((ai, ac)), Some((bi, bc))) = (acs.next(), bcs.next()) {
if !ac.is_whitespace() {
return Ok((ai, bi));
} else if ac != bc {
return Err(ai);
} else {
last_ai = ai + ac.len_utf8();
last_bi = bi + ac.len_utf8();
}
}
Ok((last_ai, last_bi))
}
fn skip_space(_: &str) -> usize {
0
}
}
#[cfg(test)]
#[test]
fn test_exact_space() {
use self::ExactSpace as ES;
assert_eq!(ES::match_spaces("", ""), Ok((0, 0)));
assert_eq!(ES::match_spaces(" ", " "), Ok((1, 1)));
assert_eq!(ES::match_spaces(" x", " x"), Ok((1, 1)));
assert_eq!(ES::match_spaces(" ", " x"), Ok((1, 1)));
assert_eq!(ES::match_spaces(" x", " "), Ok((1, 1)));
assert_eq!(ES::match_spaces(" \t ", " "), Err(1));
}
#[derive(Debug)]
pub enum FuzzySpace {}
impl SkipSpace for FuzzySpace {
fn match_spaces(inp: &str, pat: &str) -> Result<(usize, usize), usize> {
let (_, a_off) = skip_space(inp);
let (_, b_off) = skip_space(pat);
match (a_off, b_off) {
(0, 0) => Ok((0, 0)),
(a, b) if a != 0 && b != 0 => Ok((a, b)),
(_, _) => Err(0),
}
}
fn skip_space(_: &str) -> usize {
0
}
}
#[cfg(test)]
#[test]
fn test_fuzzy_space() {
use self::FuzzySpace as FS;
assert_eq!(FS::match_spaces("x", "x"), Ok((0, 0)));
assert_eq!(FS::match_spaces(" x", " x"), Ok((1, 1)));
assert_eq!(FS::match_spaces(" x", " x"), Ok((2, 1)));
assert_eq!(FS::match_spaces(" x", " x"), Ok((1, 2)));
assert_eq!(FS::match_spaces("\tx", " x"), Ok((1, 1)));
assert_eq!(FS::match_spaces(" x", "\tx"), Ok((1, 1)));
assert_eq!(FS::match_spaces("x", " x"), Err(0));
assert_eq!(FS::match_spaces(" x", "x"), Err(0));
}
#[derive(Debug)]
pub enum IgnoreNonLine {}
impl SkipSpace for IgnoreNonLine {
fn match_spaces(a: &str, b: &str) -> Result<(usize, usize), usize> {
let a_off = skip_space_non_line(a);
let b_off = skip_space_non_line(b);
Ok((a_off, b_off))
}
fn skip_space(s: &str) -> usize {
skip_space_non_line(s)
}
}
fn skip_space_non_line(s: &str) -> usize {
s.char_indices()
.take_while(|&(_, c)| c.is_whitespace()
&& c != '\r' && c != '\n')
.last()
.map(|(i, c)| i + c.len_utf8())
.unwrap_or(0)
}
#[derive(Debug)]
pub enum IgnoreSpace {}
impl SkipSpace for IgnoreSpace {
fn match_spaces(a: &str, b: &str) -> Result<(usize, usize), usize> {
let (_, a_off) = skip_space(a);
let (_, b_off) = skip_space(b);
Ok((a_off, b_off))
}
fn skip_space(s: &str) -> usize {
s.char_indices()
.take_while(|&(_, c)| c.is_whitespace())
.map(|(i, c)| i + c.len_utf8())
.last()
.unwrap_or(0)
}
}
pub trait SliceWord: 'static {
fn slice_word(s: &str) -> Option<usize>;
}
#[derive(Debug)]
pub enum NonSpace {}
impl SliceWord for NonSpace {
fn slice_word(s: &str) -> Option<usize> {
slice_non_space(s)
}
}
#[derive(Debug)]
pub enum Wordish {}
impl SliceWord for Wordish {
fn slice_word(s: &str) -> Option<usize> {
slice_wordish(s)
}
}
pub trait StrCompare: 'static {
fn compare(a: &str, b: &str) -> bool;
}
#[derive(Debug)]
pub enum ExactCompare {}
impl StrCompare for ExactCompare {
fn compare(a: &str, b: &str) -> bool {
a == b
}
}
#[derive(Debug)]
pub enum IgnoreCase {}
impl StrCompare for IgnoreCase {
fn compare(a: &str, b: &str) -> bool {
let mut acs = a.chars().flat_map(char::to_lowercase);
let mut bcs = b.chars().flat_map(char::to_lowercase);
loop {
match (acs.next(), bcs.next()) {
(Some(a), Some(b)) if a == b => (),
(None, None) => return true,
_ => return false
}
}
}
}
#[cfg(test)]
#[test]
fn test_ignore_case() {
use self::IgnoreCase as IC;
assert_eq!(IC::compare("hi", "hi"), true);
assert_eq!(IC::compare("Hi", "hI"), true);
assert_eq!(IC::compare("hI", "Hi"), true);
assert_eq!(IC::compare("ẞß", "ßẞ"), true);
assert_eq!(IC::compare("ßẞ", "ẞß"), true);
}
#[cfg(feature="unicode-normalization")]
#[derive(Debug)]
pub enum IgnoreCaseNormalized {}
#[cfg(feature="unicode-normalization")]
impl StrCompare for IgnoreCaseNormalized {
fn compare(a: &str, b: &str) -> bool {
use unicode_normalization::UnicodeNormalization;
let mut acs = a.nfd().flat_map(char::to_lowercase);
let mut bcs = b.nfd().flat_map(char::to_lowercase);
loop {
match (acs.next(), bcs.next()) {
(Some(a), Some(b)) if a == b => (),
(None, None) => return true,
_ => return false
}
}
}
}
#[cfg(feature="unicode-normalization")]
#[cfg(test)]
#[test]
fn test_ignore_case_normalized() {
use self::IgnoreCaseNormalized as ICN;
assert_eq!(ICN::compare("hi", "hi"), true);
assert_eq!(ICN::compare("Hi", "hI"), true);
assert_eq!(ICN::compare("hI", "Hi"), true);
assert_eq!(ICN::compare("café", "cafe\u{301}"), true);
assert_eq!(ICN::compare("cafe\u{301}", "café"), true);
assert_eq!(ICN::compare("CafÉ", "CafE\u{301}"), true);
assert_eq!(ICN::compare("CAFÉ", "cafe\u{301}"), true);
}
#[derive(Debug)]
pub enum IgnoreAsciiCase {}
impl StrCompare for IgnoreAsciiCase {
fn compare(a: &str, b: &str) -> bool {
use std::ascii::AsciiExt;
a.eq_ignore_ascii_case(b)
}
}
#[cfg(feature="unicode-normalization")]
#[derive(Debug)]
pub enum Normalized {}
#[cfg(feature="unicode-normalization")]
impl StrCompare for Normalized {
fn compare(a: &str, b: &str) -> bool {
use unicode_normalization::UnicodeNormalization;
let mut acs = a.nfd();
let mut bcs = b.nfd();
loop {
match (acs.next(), bcs.next()) {
(Some(a), Some(b)) if a == b => (),
(None, None) => return true,
_ => return false
}
}
}
}
#[cfg(feature="unicode-normalization")]
#[cfg(test)]
#[test]
fn test_normalized() {
use self::Normalized as N;
assert_eq!(N::compare("hi", "hi"), true);
assert_eq!(N::compare("café", "cafe\u{301}"), true);
assert_eq!(N::compare("cafe\u{301}", "café"), true);
}
fn slice_non_space(s: &str) -> Option<usize> {
use ::util::TableUtil;
use ::unicode::property::White_Space_table as WS;
s.char_indices()
.take_while(|&(_, c)| !WS.span_table_contains(&c))
.map(|(i, c)| i + c.len_utf8())
.last()
}
fn slice_wordish(s: &str) -> Option<usize> {
use ::util::TableUtil;
use ::unicode::regex::PERLW;
let word_len = s.char_indices()
.take_while(|&(_, c)| PERLW.span_table_contains(&c))
.map(|(i, c)| i + c.len_utf8())
.last();
match word_len {
Some(n) => Some(n),
None => s.chars().next().map(|c| c.len_utf8()),
}
}