#![deny(missing_docs)]
extern crate range;
use std::fmt::{ Display, Formatter };
use std::fmt::Error as FormatError;
use range::Range;
#[derive(Copy, Clone, Debug)]
pub struct ReadToken<'a> {
pub src: &'a str,
pub offset: usize,
}
impl<'a> ReadToken<'a> {
pub fn new(src: &'a str, offset: usize) -> ReadToken<'a> {
ReadToken {
src: src,
offset: offset,
}
}
pub fn consume(self, n: usize) -> ReadToken<'a> {
if n == 0 { return self; }
ReadToken {
src: &self.src[n..],
offset: self.offset + n
}
}
pub fn raw_string(&self, n: usize) -> String {
self.src[..n].into()
}
fn ended_with_newline(&self, range: Range) -> bool {
let end = range.next_offset() - self.offset;
let mut ends_with_newline = false;
for c in self.src[..end].chars() {
if c as char == '\n' {
ends_with_newline = true;
} else if !c.is_whitespace() {
ends_with_newline = false;
}
}
ends_with_newline
}
pub fn lines<F>(&self, mut f: F) -> Result<Range, Range>
where F: FnMut(&ReadToken) -> Option<Range>
{
let mut read_token = *self;
let mut new_lines = true;
loop {
let mut ended_with_newline = false;
let mut reached_end = true;
let mut byte_offset = 0;
for (i, c) in read_token.src.char_indices() {
if c == '\n' {
ended_with_newline = true;
reached_end = false;
break;
}
if !c.is_whitespace() {
reached_end = false;
break;
}
byte_offset = i + 1;
}
if reached_end {
read_token.offset += byte_offset;
break;
} else if ended_with_newline {
read_token.src = &read_token.src[byte_offset + 1..];
read_token.offset += byte_offset + 1;
new_lines |= true;
} else {
if new_lines {
match f(&read_token) {
None => { break; }
Some(range) => {
new_lines = read_token.ended_with_newline(range);
read_token = read_token.consume(range.length);
}
}
} else {
return Err(Range::empty(read_token.offset));
}
}
}
Ok(read_token.subtract(self))
}
#[inline(always)]
pub fn subtract(&self, rhs: &Self) -> Range {
Range::new(rhs.offset, self.offset - rhs.offset)
}
#[inline(always)]
pub fn start(&self) -> Range {
Range::empty(self.offset)
}
#[inline(always)]
pub fn peek(&self, n: usize) -> Range {
Range::new(self.offset, n)
}
pub fn tag(&self, tag: &str) -> Option<Range> {
if self.src.starts_with(tag) {
Some(self.peek(tag.len()))
} else {
None
}
}
pub fn until_any_or_whitespace(&self, any: &str) -> (Range, Option<usize>) {
for (i, c) in self.src.char_indices() {
if c.is_whitespace() { return (self.peek(i), None) }
for (j, b) in any.char_indices() {
if c == b { return (self.peek(i), Some(j)) }
}
}
(self.peek(self.src.len()), None)
}
pub fn until_any(&self, any: &str) -> (Range, Option<usize>) {
for (i, c) in self.src.char_indices() {
for (j, b) in any.char_indices() {
if c == b { return (self.peek(i), Some(j)) }
}
}
(self.peek(self.src.len()), None)
}
pub fn whitespace(&self) -> Range {
for (i, c) in self.src.char_indices() {
if !c.is_whitespace() { return self.peek(i); }
}
self.peek(self.src.len())
}
pub fn string(&self) -> Option<Range> {
let mut char_indices = self.src.char_indices();
match char_indices.next() {
None => { return None; }
Some((_, '"')) => {}
_ => { return None; }
}
let mut escape = false;
for (i, c) in char_indices {
if !escape && c == '\\' { escape = true; continue; }
if !escape && c == '"' {
return Some(self.peek(i + 1))
}
if escape { escape = false; }
}
None
}
pub fn number(&self, settings: &NumberSettings) -> Option<Range> {
let mut has_sign = false;
let mut has_decimal_separator = false;
let mut has_scientific = false;
let mut has_exponent_sign = false;
let mut has_digit = false;
for (i, c) in self.src.char_indices() {
if !has_sign {
has_sign = true;
if c == '+' || c == '-' { continue; }
}
if c.is_digit(10) {
has_digit = true;
continue;
}
if has_digit && settings.allow_underscore && c == '_' { continue; }
if !has_decimal_separator && c == '.' {
has_decimal_separator = true;
continue;
}
if !has_scientific && (c == 'e' || c == 'E') && i > 0 {
has_scientific = true;
continue;
}
if has_scientific && !has_exponent_sign {
has_exponent_sign = true;
if c == '+' || c == '-' { continue; }
}
if i > 0 { return Some(self.peek(i)); }
else { return None }
}
if self.src.len() > 0 { Some(self.peek(self.src.len())) }
else { None }
}
fn parse_unicode(&self) -> Result<char, Range<ParseStringError>> {
use std::char;
let mut u: [u32; 4] = [0; 4];
let mut char_indices = self.src.char_indices();
let mut byte_offset = 0;
for c in u.iter_mut() {
let (i, ch) = match char_indices.next() {
None => {
return Err(Range::new(self.offset, self.src.len())
.wrap(ParseStringError::ExpectedFourHexadecimals));
}
Some(x) => x
};
match ch.to_digit(16) {
Some(x) => *c = x as u32,
None => {
return Err(Range::new(self.offset + i, 1)
.wrap(ParseStringError::ExpectedHexadecimal))
}
}
byte_offset = i;
}
let code = (u[0] << 12) | (u[1] << 8) | (u[2] << 4) | u[3];
match char::from_u32(code) {
Some(x) => Ok(x),
None => Err(Range::new(self.offset, byte_offset)
.wrap(ParseStringError::ExpectedValidUnicode))
}
}
pub fn parse_string(&self, n: usize)
-> Result<String, Range<ParseStringError>> {
let mut escape = false;
let mut txt = String::with_capacity(n - 2);
let mut skip_unicode = 0;
for (i, c) in self.src[1..n - 1].char_indices() {
if skip_unicode > 0 {
skip_unicode -= 1;
continue;
}
if !escape && c == '\\' { escape = true; continue; }
if escape {
escape = false;
txt.push(match c {
'\"' => '"',
'\\' => '\\',
'/' => '/',
'b' => '\u{0008}',
'f' => '\u{000c}',
'n' => '\n',
'r' => '\r',
't' => '\t',
'u' => {
let offset = self.offset + 2 + i;
match ReadToken::new(&self.src[2 + i..], offset)
.parse_unicode() {
Ok(x) => { skip_unicode = 4; x },
Err(err) => return Err(err)
}
}
_ => {
return Err(Range::new(self.offset + 1 + i, 1)
.wrap(ParseStringError::ExpectedValidEscapeCharacter));
}
})
} else {
txt.push(c)
}
}
Ok(txt)
}
pub fn parse_number(&self, settings: &NumberSettings, n: usize)
-> Result<f64, ParseNumberError> {
#[inline(always)]
fn slice_shift_char(src: &str) -> Option<(char, &str)> {
if src.len() == 0 { None }
else {
let ch = src.chars().next().unwrap();
Some((ch, &src[ch.len_utf8()..]))
}
}
#[inline(always)]
fn parse_u64(settings: &NumberSettings, src: &str) -> Result<u64, ()> {
let mut res: u64 = 0;
for c in src.chars() {
if settings.allow_underscore && c == '_' { continue; }
res *= 10;
if let Some(digit) = to_digit(c) {
res += digit as u64;
} else {
return Err(())
}
}
Ok(res)
}
#[inline(always)]
fn to_digit(c: char) -> Option<u32> {
if c >= '0' && c <= '9' { Some(c as u32 - '0' as u32) }
else { None }
}
let radix: u32 = 10;
let src = &self.src[..n];
let (is_positive, src) = match slice_shift_char(src) {
None => {
return Err(ParseNumberError::ExpectedDigits);
}
Some(('-', src)) if src.len() == 0 => {
return Err(ParseNumberError::ExpectedDigits);
}
Some(('-', src)) => (false, src),
Some((_, _)) => (true, src),
};
let mut sig = if is_positive { 0.0 } else { -0.0 };
let mut prev_sig = sig;
let mut cs = src.chars().enumerate();
let mut exp_info = None::<(char, usize)>;
for (i, c) in cs.by_ref() {
if settings.allow_underscore && c == '_' { continue; }
match to_digit(c) {
Some(digit) => {
sig = sig * (radix as f64);
if is_positive {
sig = sig + ((digit as isize) as f64);
} else {
sig = sig - ((digit as isize) as f64);
}
if prev_sig != 0.0 {
if is_positive && sig <= prev_sig
{ return Err(ParseNumberError::OverflowInfinity); }
if !is_positive && sig >= prev_sig
{ return Err(ParseNumberError::OverflowNegInfinity); }
if is_positive && (prev_sig != (sig - digit as f64) / radix as f64)
{ return Err(ParseNumberError::OverflowInfinity); }
if !is_positive && (prev_sig != (sig + digit as f64) / radix as f64)
{ return Err(ParseNumberError::OverflowNegInfinity); }
}
prev_sig = sig;
},
None => match c {
'e' | 'E' | 'p' | 'P' => {
exp_info = Some((c, i + 1));
break;
},
'.' => {
break;
},
_ => {
return Err(ParseNumberError::Invalid);
},
},
}
}
if exp_info.is_none() {
let mut power = 1.0;
for (i, c) in cs.by_ref() {
if settings.allow_underscore && c == '_' { continue; }
match to_digit(c) {
Some(digit) => {
power = power / (radix as f64);
sig = if is_positive {
sig + (digit as f64) * power
} else {
sig - (digit as f64) * power
};
if is_positive && sig < prev_sig
{ return Err(ParseNumberError::OverflowInfinity); }
if !is_positive && sig > prev_sig
{ return Err(ParseNumberError::OverflowNegInfinity); }
prev_sig = sig;
},
None => match c {
'e' | 'E' | 'p' | 'P' => {
exp_info = Some((c, i + 1));
break;
},
_ => {
return Err(ParseNumberError::Invalid);
},
},
}
}
}
let exp = match exp_info {
Some((c, offset)) => {
let base = match c {
'E' | 'e' if radix == 10 => 10.0,
_ => return Err(ParseNumberError::Invalid),
};
let src = &src[offset..];
let (is_positive, exp) = match slice_shift_char(src) {
Some(('-', src)) => (false, parse_u64(settings, src)),
Some(('+', src)) => (true, parse_u64(settings, src)),
Some((_, _)) => (true, parse_u64(settings, src)),
None => return Err(ParseNumberError::Invalid),
};
match (is_positive, exp) {
(true, Ok(exp)) => f64::powi(base, exp as i32),
(false, Ok(exp)) => 1.0 / base.powi(exp as i32),
(_, Err(_)) => return Err(ParseNumberError::Invalid),
}
},
None => 1.0,
};
Ok(sig * exp)
}
}
#[derive(Copy, Clone, Debug, Eq, PartialEq)]
pub enum ParseStringError {
ExpectedFourHexadecimals,
ExpectedHexadecimal,
ExpectedValidUnicode,
ExpectedValidEscapeCharacter,
}
impl Display for ParseStringError {
fn fmt(&self, fmt: &mut Formatter) -> Result<(), FormatError> {
match self {
&ParseStringError::ExpectedFourHexadecimals =>
fmt.write_str("Expected four hexadecimals xxxx 0-9A-F"),
&ParseStringError::ExpectedHexadecimal =>
fmt.write_str("Expected hexadecimal 0-9A-F"),
&ParseStringError::ExpectedValidUnicode =>
fmt.write_str("Expected valid unicode"),
&ParseStringError::ExpectedValidEscapeCharacter =>
fmt.write_str("Expected valid escape character '\"\\/bfnrtu'"),
}
}
}
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
pub struct NumberSettings {
pub allow_underscore: bool,
}
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
pub enum ParseNumberError {
ExpectedDigits,
Invalid,
OverflowInfinity,
OverflowNegInfinity,
}
impl Display for ParseNumberError {
fn fmt(&self, fmt: &mut Formatter) -> Result<(), FormatError> {
match self {
&ParseNumberError::ExpectedDigits =>
fmt.write_str("Expected digits"),
&ParseNumberError::Invalid =>
fmt.write_str("Expected valid number format, for example `20.3e-4`"),
&ParseNumberError::OverflowInfinity =>
fmt.write_str("Number overflowed toward positive infinity"),
&ParseNumberError::OverflowNegInfinity =>
fmt.write_str("Number overflowed toward negative infinity"),
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use range::Range;
#[test]
pub fn test_token() {
let text = "one day, a nice day";
let res = ReadToken::new(&text, 0).tag("one");
assert_eq!(res, Some(Range::new(0, 3)));
let res = ReadToken::new(&text, 0).tag("two");
assert_eq!(res, None);
let text = "°a";
let res = ReadToken::new(&text, 0).tag("°a");
assert_eq!(res, Some(Range::new(0, 3)));
}
#[test]
pub fn test_until_any_or_whitespace() {
let text = "one day, a nice day";
let res = ReadToken::new(&text, 0).until_any_or_whitespace(",");
assert_eq!(res, (Range::new(0, 3), None));
let res = ReadToken::new(&text[3..], 3).until_any_or_whitespace(",");
assert_eq!(res, (Range::empty(3), None));
let res = ReadToken::new(&text[4..], 4).until_any_or_whitespace(",");
assert_eq!(res, (Range::new(4, 3), Some(0)));
}
#[test]
pub fn test_until_any() {
let text = "one day, a nice day";
let res = ReadToken::new(&text, 0).until_any(",");
assert_eq!(res, (Range::new(0, 7), Some(0)));
let res = ReadToken::new(&text[3..], 3).until_any(",");
assert_eq!(res, (Range::new(3, 4), Some(0)));
let res = ReadToken::new(&text[8..], 8).until_any(",");
assert_eq!(res, (Range::new(8, 11), None));
}
#[test]
pub fn test_whitespace() {
let text = " 123";
let res = ReadToken::new(&text, 0).whitespace();
assert_eq!(res, Range::new(0, 3));
}
#[test]
pub fn test_string() {
let text = r#""hello""#;
let res = ReadToken::new(&text, 0).string();
assert_eq!(res, Some(Range::new(0, 7)));
let txt = ReadToken::new(&text, 0).parse_string(res.unwrap().length);
let txt = txt.ok().unwrap();
assert_eq!(txt, "hello");
let text = r#""he\"llo""#;
let res = ReadToken::new(&text, 0).string();
assert_eq!(res, Some(Range::new(0, 9)));
let txt = ReadToken::new(&text, 0).parse_string(res.unwrap().length);
let txt = txt.ok().unwrap();
assert_eq!(txt, r#"he"llo"#);
let text = r#""he"llo""#;
let res = ReadToken::new(&text, 0).string();
assert_eq!(res, Some(Range::new(0, 4)));
let txt = ReadToken::new(&text, 0).parse_string(res.unwrap().length);
let txt = txt.ok().unwrap();
assert_eq!(txt, "he");
let text = "\"\\u20AC\"";
let res = ReadToken::new(&text, 0).string();
assert_eq!(res, Some(Range::new(0, 8)));
let txt = ReadToken::new(&text, 0).parse_string(res.unwrap().length);
let txt = txt.unwrap();
assert_eq!(txt, "€");
let text = "\"😎\"";
let res = ReadToken::new(&text, 0).string();
assert_eq!(res, Some(Range::new(0, 6)));
let txt = ReadToken::new(&text, 0).parse_string(res.unwrap().length);
let txt = txt.unwrap();
assert_eq!(txt, "😎");
let text = r#""hello\""#;
let res = ReadToken::new(&text, 0).string();
assert_eq!(res, None);
let text = r#""\\""#;
let res = ReadToken::new(&text, 0).string();
assert_eq!(res, Some(Range::new(0, 4)));
let txt = ReadToken::new(&text, 0).parse_string(res.unwrap().length);
let txt = txt.ok().unwrap();
assert_eq!(txt, r#"\"#);
}
#[test]
pub fn test_number() {
let settings = NumberSettings { allow_underscore: false };
let text = "20";
let res: f64 = ReadToken::new(&text, 0)
.parse_number(&settings, text.chars().count()).unwrap();
assert_eq!(res, 20.0);
let text = "-20";
let res: f64 = ReadToken::new(&text, 0)
.parse_number(&settings, text.chars().count()).unwrap();
assert_eq!(res, -20.0);
let text = "2e2";
let res: f64 = ReadToken::new(&text, 0)
.parse_number(&settings, text.chars().count()).unwrap();
assert_eq!(res, 2e2);
let text = "2.5";
let res: f64 = ReadToken::new(&text, 0)
.parse_number(&settings, text.chars().count()).unwrap();
assert_eq!(res, 2.5);
let res: f64 = "2.5e2".parse().unwrap();
assert_eq!(res, 2.5e2);
let text = "2.5E2";
let res: f64 = ReadToken::new(&text, 0)
.parse_number(&settings, text.chars().count()).unwrap();
assert_eq!(res, 2.5E2);
let text = "2.5E-2";
let res: f64 = ReadToken::new(&text, 0)
.parse_number(&settings, text.chars().count()).unwrap();
assert_eq!(res, 2.5E-2);
let text = "20";
let res = ReadToken::new(&text, 0).number(&settings);
assert_eq!(res, Some(Range::new(0, 2)));
let text = "-20";
let res = ReadToken::new(&text, 0).number(&settings);
assert_eq!(res, Some(Range::new(0, 3)));
let text = "2e2";
let res = ReadToken::new(&text, 0).number(&settings);
assert_eq!(res, Some(Range::new(0, 3)));
let text = "2.5";
let res = ReadToken::new(&text, 0).number(&settings);
assert_eq!(res, Some(Range::new(0, 3)));
let text = "2.5e2";
let res = ReadToken::new(&text, 0).number(&settings);
assert_eq!(res, Some(Range::new(0, 5)));
let text = "2.5E2";
let res = ReadToken::new(&text, 0).number(&settings);
assert_eq!(res, Some(Range::new(0, 5)));
let text = "2.5E-2";
let res = ReadToken::new(&text, 0).number(&settings);
assert_eq!(res, Some(Range::new(0, 6)));
let text = "e";
let res = ReadToken::new(&text, 0).number(&settings);
assert_eq!(res, None);
let res = ReadToken::new(&text, 0).parse_number(&settings, 1);
assert_eq!(res, Err(ParseNumberError::Invalid))
}
#[test]
pub fn test_underscore_number() {
let settings = NumberSettings { allow_underscore: true };
let text = "2_0";
let res: f64 = ReadToken::new(&text, 0)
.parse_number(&settings, text.chars().count()).unwrap();
assert_eq!(res, 20.0);
let text = "-2_0";
let res: f64 = ReadToken::new(&text, 0)
.parse_number(&settings, text.chars().count()).unwrap();
assert_eq!(res, -20.0);
let text = "2_e2_";
let res: f64 = ReadToken::new(&text, 0)
.parse_number(&settings, text.chars().count()).unwrap();
assert_eq!(res, 2e2);
let text = "2_.5_";
let res: f64 = ReadToken::new(&text, 0)
.parse_number(&settings, text.chars().count()).unwrap();
assert_eq!(res, 2.5);
let text = "2_.5_e2_";
let res: f64 = ReadToken::new(&text, 0)
.parse_number(&settings, text.chars().count()).unwrap();
assert_eq!(res, 2.5e2);
let text = "2_.5_E2_";
let res: f64 = ReadToken::new(&text, 0)
.parse_number(&settings, text.chars().count()).unwrap();
assert_eq!(res, 2.5E2);
let text = "2_.5_E-2_";
let res: f64 = ReadToken::new(&text, 0)
.parse_number(&settings, text.chars().count()).unwrap();
assert_eq!(res, 2.5E-2);
let text = "20";
let res = ReadToken::new(&text, 0).number(&settings);
assert_eq!(res, Some(Range::new(0, 2)));
let text = "-20";
let res = ReadToken::new(&text, 0).number(&settings);
assert_eq!(res, Some(Range::new(0, 3)));
let text = "2e2";
let res = ReadToken::new(&text, 0).number(&settings);
assert_eq!(res, Some(Range::new(0, 3)));
let text = "2.5";
let res = ReadToken::new(&text, 0).number(&settings);
assert_eq!(res, Some(Range::new(0, 3)));
let text = "2.5e2";
let res = ReadToken::new(&text, 0).number(&settings);
assert_eq!(res, Some(Range::new(0, 5)));
let text = "2.5E2";
let res = ReadToken::new(&text, 0).number(&settings);
assert_eq!(res, Some(Range::new(0, 5)));
let text = "2.5E-2";
let res = ReadToken::new(&text, 0).number(&settings);
assert_eq!(res, Some(Range::new(0, 6)));
let text = "_2.5E-2";
let res = ReadToken::new(&text, 0).number(&settings);
assert_eq!(res, None);
let text = "2_.5E-2";
let res = ReadToken::new(&text, 0).number(&settings);
assert_eq!(res, Some(Range::new(0, 7)));
let text = "2_000_000.5E-2";
let res = ReadToken::new(&text, 0).number(&settings);
assert_eq!(res, Some(Range::new(0, 14)));
}
}