use super::*;
use std::str::from_utf8_unchecked;
pub struct Parser<'s, 't, T> {
system: &'s UnitSystem<T>,
txt: &'t [u8],
pos: usize,
options: ParserOptions,
}
#[derive(Clone, Copy, Debug)]
struct ParserOptions {
exhaustive: bool,
ignore_ws: bool,
}
impl Default for ParserOptions {
fn default() -> Self {
ParserOptions {
exhaustive: true,
ignore_ws: true,
}
}
}
impl<'s, 't, T> Parser<'s, 't, T> {
pub fn exhaustive(mut self, exhaustive: bool) -> Self {
assert!(self.pos == 0, "parser already consumed");
self.options.exhaustive = exhaustive;
self
}
pub fn ignore_ws(mut self, ignore_ws: bool) -> Self {
assert!(self.pos == 0, "parser already consumed");
self.options.ignore_ws = ignore_ws;
self
}
pub fn parse_value(&mut self) -> UcumResult<'t, Quantity<T>>
where
T: Clone
+ Div<T, Output = T>
+ From<i32>
+ From<T>
+ FromStr
+ Mul<T, Output = T>
+ MulAssign,
<T as FromStr>::Err: std::error::Error + 'static,
{
assert!(self.pos == 0, "parser already consumed");
self.maybe_consume_whitespace();
let value = self.consume_value();
let value: &str = unsafe { from_utf8_unchecked(value) };
let value: Option<T> = if value.is_empty() {
None
} else {
match value.parse() {
Ok(v) => Some(v),
Err(e) => {
return self.err("unable to parse value").with_cause(Box::new(e));
}
}
};
let ast = self.ast(true)?;
if let Some(value) = value {
ast.make_quantity(value, self.system)
} else {
ast.as_unit_quantity(self.system)
}
}
pub fn parse_unit(&mut self) -> UcumResult<'t, Quantity<T>>
where
T: Clone
+ Div<T, Output = T>
+ From<i32>
+ From<T>
+ FromStr
+ Mul<T, Output = T>
+ MulAssign,
<T as FromStr>::Err: std::error::Error + 'static,
{
assert!(self.pos == 0, "parser already consumed");
self.ast(true)?.as_unit_quantity(self.system)
}
pub fn parse_to_ast(&mut self) -> UcumResult<'t, Box<AST<'t>>> {
assert!(self.pos == 0, "parser already consumed");
self.ast(true)
}
pub fn remaining(&self) -> &[u8] {
&self.txt[self.pos..]
}
pub(crate) fn new(system: &'s UnitSystem<T>, txt: &'t [u8]) -> Self {
Parser {
system,
txt,
pos: 0,
options: ParserOptions::default(),
}
}
fn err<U>(&self, message: &'static str) -> UcumResult<'t, U> {
Err(UcumError::new(message, self.txt, self.pos))
}
fn ast(&mut self, root: bool) -> UcumResult<'t, Box<AST<'t>>> {
let txt_len = self.txt.len();
self.maybe_consume_whitespace();
if self.pos >= txt_len {
return self.err("unexpected end of string while looking for term");
}
let mut ast = if self.txt[self.pos] == b'/' {
AST::factor(&b"1"[..])
} else {
self.ast_component()?
};
while self.pos < txt_len {
match self.txt[self.pos] {
b'.' => {
self.pos += 1;
self.maybe_consume_whitespace();
let rhs = self.ast_component()?;
ast = AST::product(ast, rhs);
}
b'/' => {
self.pos += 1;
self.maybe_consume_whitespace();
let rhs = self.ast_component()?;
ast = AST::division(ast, rhs);
}
_ => break,
}
self.maybe_consume_whitespace();
}
if root {
if self.options.exhaustive && self.pos < txt_len {
let c = self.txt[self.pos];
return self.err(match c {
MIN_CHAR..=MAX_CHAR => "spurious characters",
_ => "invalid character",
});
}
} else if self.pos >= txt_len || self.txt[self.pos] != b')' {
return self.err("expected closing parenthesis ')'");
} else {
self.pos += 1;
}
Ok(ast)
}
fn ast_component(&mut self) -> UcumResult<'t, Box<AST<'t>>> {
self.maybe_consume_whitespace();
if self.pos >= self.txt.len() {
self.err("unexpected end of string while looking for component")
} else if self.txt[self.pos] == b'(' {
self.pos += 1;
self.ast(false)
} else {
let unit = self.consume_unit()?;
if unit.is_empty() {
let factor = self.consume_factor();
if factor.is_empty() {
let annotation = self.consume_annotation()?;
if annotation.is_none() {
self.err("expected component (unit, annotation, factor or parenthesis)")
} else {
Ok(AST::unit(unit, 1, annotation))
}
} else {
Ok(AST::factor(factor))
}
} else {
self.maybe_consume_whitespace();
let exponent = self.consume_exponent()?;
if exponent == 0 {
return self.err("exponent can not be 0");
}
self.maybe_consume_whitespace();
let annotation = self.consume_annotation()?;
Ok(AST::unit(unit, exponent, annotation))
}
}
}
fn maybe_consume_whitespace(&mut self) {
if self.options.ignore_ws {
let txt_len = self.txt.len();
while self.pos < txt_len && self.txt[self.pos].is_ascii_whitespace() {
self.pos += 1;
}
}
}
fn consume_value(&mut self) -> &[u8] {
let txt_len = self.txt.len();
if self.pos == txt_len {
return &self.txt[txt_len..];
}
if self.txt[self.pos..].starts_with(b"10*") || self.txt[self.pos..].starts_with(b"10^") {
return &self.txt[txt_len..];
}
enum State {
Start,
IntPart,
DecPart,
Exp,
ExpInt,
EndNotEmpty,
};
use State::*;
let start_pos = self.pos;
let mut state = Start;
let mut checkpoint = self.pos;
let mut digits = 0;
loop {
state = match state {
Start => match self.txt[self.pos] {
b'-' | b'+' => IntPart,
b'0'..=b'9' => {
digits += 1;
IntPart
}
b'.' => DecPart,
_ => break,
},
IntPart => match self.txt[self.pos] {
b'0'..=b'9' => {
digits += 1;
IntPart
}
b'.' => DecPart,
b'e' | b'E' => Exp,
_ => EndNotEmpty,
},
DecPart => match self.txt[self.pos] {
b'0'..=b'9' => {
digits += 1;
DecPart
}
b'e' | b'E' => Exp,
_ => EndNotEmpty,
},
Exp => match self.txt[self.pos] {
b'-' | b'+' => ExpInt,
b'0'..=b'9' => {
digits += 1;
ExpInt
}
_ => EndNotEmpty,
},
ExpInt => match self.txt[self.pos] {
b'0'..=b'9' => {
digits += 1;
ExpInt
}
_ => EndNotEmpty,
},
EndNotEmpty => {
if digits == 0 {
self.pos = checkpoint;
}
break;
}
};
match state {
Exp => {
if digits == 0 {
self.pos = start_pos;
break;
} else {
checkpoint = self.pos;
digits = 0;
}
}
EndNotEmpty => continue,
_ => {}
}
self.pos += 1;
if self.pos == txt_len {
state = EndNotEmpty;
}
}
&self.txt[start_pos..self.pos]
}
fn consume_unit(&mut self) -> UcumResult<'t, &'t [u8]> {
let txt_len = self.txt.len();
let start = self.pos;
if self.txt[self.pos..].starts_with(b"10*") || self.txt[self.pos..].starts_with(b"10^") {
self.pos += 3;
}
while self.pos < txt_len {
match self.txt[self.pos] {
b'[' => {
self.consume_square_brackets()?;
}
b'}'
| b'{'
| b'='
| b'-'
| b')'
| b'('
| b'.'
| b'+'
| b'"'
| b'/'
| b']'
| b'0'..=b'9' => {
break;
}
c if c < MIN_CHAR || MAX_CHAR < c => {
break;
}
_ => {
self.pos += 1;
}
}
}
Ok(&self.txt[start..self.pos])
}
fn consume_square_brackets(&mut self) -> UcumResult<'t, ()> {
let txt_len = self.txt.len();
debug_assert!(self.pos < txt_len && self.txt[self.pos] == b'[');
self.pos += 1;
while self.pos < txt_len {
match self.txt[self.pos] {
b']' => {
self.pos += 1;
return Ok(());
}
b'[' => return self.err("square brackets must not be nested"),
c if MIN_CHAR <= c && c <= MAX_CHAR => {
self.pos += 1;
}
_ => return self.err("invalid character in square brackets"),
}
}
self.err("missing closing square bracket ']'")
}
fn consume_factor(&mut self) -> &'t [u8] {
let txt_len = self.txt.len();
let start = self.pos;
while self.pos < txt_len && matches!(self.txt[self.pos], b'0'..=b'9') {
self.pos += 1;
}
&self.txt[start..self.pos]
}
fn consume_exponent(&mut self) -> UcumResult<'t, i8> {
let txt_len = self.txt.len();
if self.pos >= txt_len || !matches!(self.txt[self.pos], b'-' | b'+' | b'0'..=b'9') {
return Ok(1);
}
let start = self.pos;
self.pos += 1;
while self.pos < txt_len && matches!(self.txt[self.pos], b'0'..=b'9') {
self.pos += 1;
}
let end = self.pos;
let exp_str: &str = unsafe { from_utf8_unchecked(&self.txt[start..end]) };
exp_str.parse::<i8>().map_err(|err| {
UcumError::new("invalid exponent", self.txt, start).with_cause(Box::new(err))
})
}
fn consume_annotation(&mut self) -> UcumResult<'t, Option<&'t [u8]>> {
let txt_len = self.txt.len();
if self.pos == txt_len || self.txt[self.pos] != b'{' {
return Ok(None);
}
self.pos += 1;
let start = self.pos;
while self.pos < txt_len {
match self.txt[self.pos] {
b'}' => {
let end = self.pos;
self.pos += 1;
return Ok(Some(&self.txt[start..end]));
}
b'{' => return self.err("curly braces must not be nested"),
c if MIN_CHAR <= c && c <= MAX_CHAR
|| self.options.ignore_ws && c.is_ascii_whitespace() =>
{
self.pos += 1;
}
_ => return self.err("invalid character in annotation"),
}
}
self.err("missing closing curly bracket '}'")
}
}
const MIN_CHAR: u8 = 33;
const MAX_CHAR: u8 = 126;
#[cfg(test)]
mod test {
use super::*;
use test_case::test_case;
#[test_case("m"; "empty")]
#[test_case("42m"; "42")]
#[test_case("+42m"; "p42")]
#[test_case("-42m"; "m42")]
#[test_case("0042m"; "0042")]
#[test_case("3.14m"; "3p14")]
#[test_case("+3.14m"; "p3p14")]
#[test_case("-3.14m"; "m3p14")]
#[test_case("1.m"; "1p")]
#[test_case(".5m"; "p5")]
#[test_case("+.5m"; "pp5")]
#[test_case("-.5m"; "mp5")]
#[test_case("9e12m"; "9e12")]
#[test_case("9E12m"; "9ee12")]
#[test_case("9e+12m"; "9ep12")]
#[test_case("9e-12m"; "9em12")]
#[test_case("3.14e3m"; "3p14e3")]
#[test_case("-3.14e+3m"; "-3p14ep3")]
#[test_case("+987.6543e-210m"; "p987p6543em210")]
fn consume_value_ok(txt: &str) {
let txt = txt.as_bytes();
let number = &txt[..txt.len() - 1];
let units = UnitSystem::<f64>::default();
let mut p = units.parser(txt);
assert_eq!(p.consume_value(), number);
let mut p = units.parser(number);
assert_eq!(p.consume_value(), number);
}
#[test_case("42 ", "42" ; "space")]
#[test_case("42é", "42" ; "out of range")]
#[test_case("2+2", "2" ; "2 plus 2")]
#[test_case("1.2.3", "1.2" ; "1p2p3")]
#[test_case("1.2+3", "1.2" ; "1p2 plus 3")]
#[test_case("1e2.3", "1e2" ; "1e2p3")]
#[test_case(".", "" ; "point")]
#[test_case("+", "" ; "plus")]
#[test_case("-", "" ; "minus")]
#[test_case("+.", "" ; "plus point")]
#[test_case("-.", "" ; "minus point")]
#[test_case("e12", "" ; "number less exponent")]
#[test_case(".e12", "" ; "point number less exponent")]
#[test_case("+.e12", "" ; "plus point number less exponent")]
#[test_case("42e", "42" ; "42e")]
#[test_case("42e-", "42" ; "42em")]
fn consume_value_partial(txt: &str, exp: &str) {
let txt = txt.as_bytes();
let exp = exp.as_bytes();
let units = UnitSystem::<f64>::default();
let mut p = units.parser(txt);
assert_eq!(p.consume_value(), exp);
}
#[test_case(""; "empty")]
#[test_case("m")]
#[test_case("kg")]
#[test_case("K")]
#[test_case("rad")]
#[test_case("%"; "percent")]
#[test_case("10*"; "ten star")]
#[test_case("10^"; "ten carret")]
#[test_case("[pi]")]
#[test_case("ab[c+ef]")]
#[test_case("ab[{]")]
fn consume_unit_ok(txt: &str) {
let txt = txt.as_bytes();
let units = UnitSystem::<f64>::default();
let mut p = units.parser(txt);
assert_eq!(p.consume_unit().unwrap(), txt);
}
#[test_case("m ", "m"; "space")]
#[test_case("m2", "m"; "digit")]
#[test_case("m+", "m"; "plus")]
#[test_case("m-", "m"; "minus")]
#[test_case("m.", "m"; "period")]
#[test_case("m/", "m"; "solidus")]
#[test_case("m{", "m"; "curly open")]
#[test_case("m}", "m"; "curly close")]
#[test_case("m(", "m"; "paren open")]
#[test_case("m)", "m"; "paren close")]
#[test_case("m]", "m"; "sqare close")]
#[test_case("m=", "m"; "equal")]
#[test_case("m\"", "m"; "quote")]
#[test_case("mé", "m"; "invalid")]
fn consume_unit_partial<'a>(txt: &'a str, exp: &str) {
let txt = txt.as_bytes();
let exp = exp.as_bytes();
let units = UnitSystem::<f64>::default();
let mut p = units.parser(txt);
assert_eq!(p.consume_unit().unwrap(), exp);
let units = UnitSystem::<f64>::default();
let mut p = units.parser(&txt[exp.len()..]);
assert_eq!(p.consume_unit().unwrap(), &b""[..]);
}
#[test_case("m["; "unclosed square bracket")]
#[test_case("m[a[b]c]"; "nested square brackets")]
#[test_case("m[é"; "invalid character")]
fn consume_unit_err(txt: &str) {
let txt = txt.as_bytes();
let units = UnitSystem::<f64>::default();
let mut p = units.parser(txt);
assert!(p.consume_unit().is_err());
}
#[test_case("", 1; "empty")]
#[test_case("a", 1; "spurious")]
#[test_case("{", 1; "annotation")]
#[test_case(" ", 1; "space")]
#[test_case("2", 2)]
#[test_case("42", 42)]
#[test_case("+42", 42; "plus 42")]
#[test_case("-42", -42; "minus 42")]
fn consume_exponent_ok(txt: &str, exp: i8) {
let txt = txt.as_bytes();
let units = UnitSystem::<f64>::default();
let mut p = units.parser(txt);
assert_eq!(p.consume_exponent().unwrap(), exp);
}
#[test_case("+"; "plus")]
#[test_case("-"; "minus")]
fn consume_exponent_err(txt: &str) {
let txt = txt.as_bytes();
let units = UnitSystem::<f64>::default();
let mut p = units.parser(txt);
assert!(p.consume_exponent().is_err());
}
#[test_case("{}"; "empty braces")]
#[test_case("{a}")]
#[test_case("{vol}")]
#[test_case("{a+- \"=][)(}"; "special characters")]
fn consume_annotation_ok<'a>(txt: &'a str) {
let txt = txt.as_bytes();
let exp = &txt[1..txt.len() - 1];
let units = UnitSystem::<f64>::default();
let mut p = units.parser(txt);
assert_eq!(p.consume_annotation().unwrap(), Some(exp));
}
#[test_case(""; "empty")]
#[test_case(" "; "space")]
#[test_case("."; "period")]
#[test_case("/"; "solidus")]
#[test_case("a"; "letter")]
fn consume_annotation_empty<'a>(txt: &'a str) {
let txt = txt.as_bytes();
let units = UnitSystem::<f64>::default();
let mut p = units.parser(txt);
assert_eq!(p.consume_annotation().unwrap(), None);
}
#[test_case("{"; "unclosed square bracket")]
#[test_case("{a{b}c}"; "nested square brackets")]
#[test_case("{aé"; "invalid character")]
fn consume_annotation_err(txt: &str) {
let txt = txt.as_bytes();
let units = UnitSystem::<f64>::default();
let mut p = units.parser(txt);
assert!(p.consume_annotation().is_err());
}
#[test_case("42", AST::factor(b"42"); "42")]
#[test_case("m", AST::unit(b"m", 1, None); "m")]
#[test_case("m2", AST::unit(b"m", 2, None); "m2")]
#[test_case(" m 2 ", AST::unit(b"m", 2, None); "m2 with spaces")]
#[test_case("s-1", AST::unit(b"s", -1, None); "s minus 1")]
#[test_case("m.s", AST::product(AST::unit(b"m", 1, None), AST::unit(b"s", 1, None)); "m s")]
#[test_case("m/s", AST::division(AST::unit(b"m", 1, None), AST::unit(b"s", 1, None)); "m per s")]
#[test_case("/s", AST::division(AST::factor(b"1"), AST::unit(b"s", 1, None)); "per s")]
#[test_case("rad/s", AST::division(AST::unit(b"rad", 1, None), AST::unit(b"s", 1, None)); "rad per s")]
#[test_case("[pi].rad", AST::product(AST::unit(b"[pi]", 1, None), AST::unit(b"rad", 1, None)); "pi rad")]
#[test_case("2.[pi].rad", AST::product(AST::product(AST::factor(b"2"), AST::unit(b"[pi]", 1, None)), AST::unit(b"rad", 1, None)); "2pi rad")]
#[test_case("% {vol}", AST::unit(b"%", 1, Some(b"vol")); "percent vol")]
#[test_case("{pcs}", AST::unit(b"", 1, Some(b"pcs")); "pcs")]
#[test_case("1/2.3", AST::product(AST::division(AST::factor(b"1"), AST::factor(b"2")), AST::factor(b"3")); "half three")]
#[test_case("1/(2.[pi])", AST::division(AST::factor(b"1"), AST::product(AST::factor(b"2"), AST::unit(b"[pi]", 1, None))); "one on 2pi")]
#[test_case("(((1)/(2)).((3)))", AST::product(AST::division(AST::factor(b"1"), AST::factor(b"2")), AST::factor(b"3")); "many parenthesis")]
#[test_case("m a", AST::unit(b"m", 1, None); "spurious letters after space")]
#[test_case("m)", AST::unit(b"m", 1, None); "spurious closing paren")]
#[test_case("m 2 {a} x", AST::unit(b"m", 2, Some(b"a")); "spurious after complex unit")]
#[test_case("2 {a} x", AST::factor(b"2"); "spurious annotation after factor")]
fn ast<'a>(txt: &'a str, ast: Box<AST<'a>>) {
let txt = txt.as_bytes();
let units = UnitSystem::<f64>::default();
let mut p = units.parser(txt).exhaustive(false);
assert_eq!(p.parse_to_ast().unwrap(), ast);
}
#[test_case("(42"; "missing closing parenthesis at eof")]
#[test_case("(42a"; "missing closing parenthesis at spurious")]
#[test_case("m-"; "wrong exponent")]
#[test_case("m{{}}"; "wrong annotation")]
#[test_case("m[[]]"; "wrong square brackets")]
fn ast_err<'a>(txt: &'a str) {
let txt = txt.as_bytes();
let units = UnitSystem::<f64>::default();
let mut p = units.parser(txt).exhaustive(false);
assert!(p.parse_to_ast().is_err());
}
}