use super::rules::{FnRule, MatchBounds, MatchResult, MyResult, RuleResult, TokenDesc};
use core::borrow::BorrowMut;
use nom::{
self, alt, char, map, map_res, named, named_args, opt, pair, preceded, recognize, tag,
take_while, tuple, types::CompleteStr, ErrorKind,
};
use chrono::{DateTime, TimeZone};
use strsim::damerau_levenshtein;
pub use super::errors::{intersection_error, DateTimeError, SemanticError};
#[derive(Debug, Clone, PartialEq, PartialOrd)]
pub struct Dist(pub usize);
macro_rules! set {
( max_dist = $max_dist: expr, $exact_match: expr ) => {
if !$exact_match {
$max_dist
} else {
crate::rules::combinators::Dist(0)
}
};
}
macro_rules! define {
( $func_name: ident: ($token: expr, $p: expr), $repr: expr, $max_dist: expr ) => (
named_args!(pub $func_name<'a>(exact_match: bool)<CompleteStr<'a>, TokenDesc>,
call!(crate::rules::combinators::recognize_word, CompleteStr($repr), set!(max_dist=$max_dist,
exact_match), crate::rules::tokens::PToken::PToken($token, $p))
);
);
( $func_name: ident: $([($token: expr, $p: expr), $repr: expr, $max_dist: expr])|* ) => (
named_args!(pub $func_name<'a>(exact_match: bool)<CompleteStr<'a>, TokenDesc>,
alt!(
$(call!(crate::rules::combinators::recognize_word, CompleteStr($repr), set!(max_dist=$max_dist,
exact_match), crate::rules::tokens::PToken::PToken($token, $p))) |*
)
);
);
}
macro_rules! define_char {
( $func_name: ident: $p: expr, $repr: expr ) => {
fn $func_name(input: CompleteStr) -> crate::rules::rules::MyResult {
if let Ok((tail, _)) = crate::rules::combinators::recognize_symbol(input, $repr) {
return Ok((
tail,
TokenDesc::new(
crate::rules::tokens::PToken::PToken(Token::Char, $p),
crate::rules::combinators::Dist(0),
),
));
}
crate::rules::combinators::wrap_error(input, crate::rules::errors::UNKNOWN)
}
};
}
macro_rules! define_num {
( $func_name: ident: ($ctor: expr, $p: expr) ) => {
fn $func_name(input: CompleteStr) -> crate::rules::rules::MyResult {
if let Ok((tail, n)) = crate::rules::combinators::recognize_int(input) {
return Ok((
tail,
TokenDesc::new(
crate::rules::tokens::PToken::PToken($ctor(n as i32), $p),
crate::rules::combinators::Dist(0),
),
));
}
crate::rules::combinators::wrap_error(input, crate::rules::errors::UNKNOWN)
}
};
}
macro_rules! combine {
( $func_name: ident => $($f: ident) |* ) => (
named_args!(pub $func_name<'a>(exact_match: bool)<CompleteStr<'a>, TokenDesc>,
call!(crate::rules::combinators::best_fit, exact_match, vec![$(&$f),*])
);
);
}
macro_rules! make_interpreter {
( positions = $n: expr ) => {
use tuple::TupleElements;
pub(crate) fn interpret<Tz: TimeZone>(
input: &str,
exact_match: bool,
tz: DateTime<Tz>,
) -> Result<RuleResult, crate::rules::errors::SemanticError> {
let mut res = RuleResult::new();
match parse(CompleteStr(input), exact_match) {
Ok((tail, (skipped, tt))) => {
let bounds =
crate::rules::combinators::match_bounds(skipped.iter().sum(), input, tail);
res.set_bounds(Some(bounds));
for idx in 0..$n {
res.set_token(tt.get(idx).unwrap());
}
res.set_tail(*tail);
match make_time(&res, tz, &input[bounds.start_idx..bounds.end_idx]) {
Ok(ctx) => res.set_context(ctx),
Err(mut err) => {
err.set_bounds(bounds);
err.set_tail(tail);
return Err(err);
}
}
}
Err(nom::Err::Error(nom::Context::Code(ref input, nom::ErrorKind::ManyTill))) => {
res.set_tail(input);
}
_ => unreachable!(),
}
Ok(res)
}
};
}
fn is_ignorable(c: char) -> bool {
!(c == '/' || c == ':' || c == '-' || c.is_alphanumeric())
}
named!(trim<CompleteStr, CompleteStr>,
take_while!(is_ignorable)
);
fn is_word_symbol(c: char) -> bool {
c == '.' || c == ':' || c.is_alphanumeric()
}
named!(tokenize_word<CompleteStr, CompleteStr>,
preceded!(trim, take_while!(is_word_symbol))
);
named!(pub(crate) tokenize_count_symbols<CompleteStr, usize>,
map!(tuple!(trim, take_while!(|c: char| c == '.' || c == ':' || c.is_alphanumeric()), trim),
|(prefix, word, suffix)| {
prefix.len() + word.len() + suffix.len()
})
);
named!(pub(crate) recognize_int<CompleteStr, i32>,
map!(
preceded!(trim, pair!(
opt!(alt!(tag!("+") | tag!("-"))),
map_res!(recognize!(nom::digit), |s: CompleteStr| s.parse::<i32>()))
),
|(sign, value): (Option<nom::types::CompleteStr<'_>>, i32)| {
sign.and_then(|s| if s == CompleteStr("-") { Some(-1) } else { None }).unwrap_or(1) * value
})
);
named_args!(pub recognize_symbol<'a>(c: char)<CompleteStr<'a>, char>,
preceded!(trim, char!(c))
);
pub(crate) fn stub(input: CompleteStr) -> MyResult {
Ok((
input,
TokenDesc::new(
crate::rules::tokens::PToken::Stub,
crate::rules::combinators::Dist(0),
),
))
}
#[inline]
pub(crate) fn wrap_error(input: CompleteStr, error_code: u32) -> MyResult {
Err(nom::Err::Error(nom::simple_errors::Context::Code(
input,
ErrorKind::Custom(error_code),
)))
}
pub(crate) fn recognize_word<'a>(
input: CompleteStr<'a>,
pattern: CompleteStr<'a>,
max_dist: crate::rules::combinators::Dist,
token: super::tokens::PToken,
) -> MyResult<'a> {
if let Ok((tail, mut word)) = tokenize_word(input) {
let normalized_word = word.borrow_mut().replace(".", "");
if max_dist == crate::rules::combinators::Dist(0) {
if normalized_word == *pattern {
return Ok((
tail,
TokenDesc::new(token, crate::rules::combinators::Dist(0)),
));
}
} else {
let dist = Dist(damerau_levenshtein(&normalized_word, *pattern));
if dist <= max_dist {
return Ok((tail, TokenDesc::new(token, dist)));
}
}
}
wrap_error(input, crate::rules::errors::UNKNOWN)
}
pub(crate) fn best_fit<'a>(
input: CompleteStr<'a>,
exact_match: bool,
combinators: Vec<&dyn Fn(CompleteStr<'a>, bool) -> MyResult<'a>>,
) -> MyResult<'a> {
let mut min_dist = Dist(std::usize::MAX);
let mut selected_token = crate::rules::tokens::PToken::Stub;
let mut selected_count = 0;
let mut selected_tail = CompleteStr("");
for comb in combinators {
if let Ok((tail, TokenDesc { token, dist })) = comb(input, exact_match) {
if min_dist > dist {
selected_token = token;
selected_tail = tail;
selected_count = 1;
min_dist = dist;
} else if min_dist == dist {
selected_count += 1;
}
}
}
if selected_count == 1 {
return Ok((selected_tail, TokenDesc::new(selected_token, min_dist)));
}
wrap_error(input, crate::rules::errors::UNKNOWN)
}
pub(crate) fn remove_overlapped<'a>(
source_str: &'a str,
matched_tokens: &'a [Result<MatchResult, SemanticError<'a>>],
) -> Vec<Result<MatchResult, SemanticError<'a>>> {
let mut result: Vec<Result<MatchResult, SemanticError>> = Vec::new();
let mut overlap: Option<MatchBounds> = None;
let mut min_idx = 0;
let mut max_idx = None;
let mut prev_elem = None;
let mut f = |item: &'a Result<MatchResult, SemanticError>, start_idx, end_idx| {
if max_idx.map_or(false, |x| x >= start_idx) {
overlap = match overlap {
None => Some(MatchBounds::new(min_idx, end_idx)),
Some(bounds) => Some(MatchBounds::new(bounds.start_idx, end_idx)),
};
} else if overlap.is_some() {
result.push(Err(intersection_error(
&source_str[overlap.unwrap().start_idx..overlap.unwrap().end_idx],
)));
overlap = None;
} else if prev_elem.is_some() {
result.push(prev_elem.take().unwrap());
}
if max_idx.map_or(true, |x| end_idx > x) {
min_idx = start_idx;
max_idx = Some(end_idx);
prev_elem = Some(item.clone());
}
};
let mut last_item = None;
for item in matched_tokens.iter() {
match item {
Ok(token) => f(&item, token.get_start_idx(), token.get_end_idx()),
Err(token) => f(&item, token.get_start_idx(), token.get_end_idx()),
}
last_item = Some(item);
}
if overlap.is_none() {
if last_item.is_some() {
result.push(last_item.unwrap().clone());
}
} else {
result.push(Err(intersection_error(
&source_str[overlap.unwrap().start_idx..overlap.unwrap().end_idx],
)));
}
result
}
#[inline]
pub(crate) fn apply_generic<'a, Tz: TimeZone + 'a>(
date_time: DateTime<Tz>,
source_str: &'a str,
rules: &'a [FnRule<Tz>],
exact_match: bool,
) -> Vec<Result<MatchResult, DateTimeError>> {
let mut matched_tokens = Vec::new();
for rule in rules {
let mut input = source_str;
let mut end_of_last_match_idx = 0;
loop {
match rule(input, exact_match, date_time.clone()) {
Ok(RuleResult {
tail,
bounds: Some(bounds),
context,
..
}) => {
matched_tokens.push(Ok(MatchResult::new(
context,
end_of_last_match_idx + bounds.start_idx,
end_of_last_match_idx + bounds.end_idx,
)));
end_of_last_match_idx += bounds.end_idx;
input = tail;
}
Ok(RuleResult { bounds: None, .. }) => {
break;
}
Err(err) => {
input = err.get_tail();
matched_tokens.push(Err(err));
}
}
}
}
matched_tokens.sort_by_key(|k| match k {
Ok(x) => x.get_start_idx(),
Err(x) => x.get_start_idx(),
});
let tmp = remove_overlapped(source_str, &matched_tokens);
tmp.iter()
.map(|item| match item {
Err(x) => Err(x.extract_error()),
Ok(x) => Ok(*x),
})
.collect()
}
#[inline]
pub(crate) fn match_bounds(
prefix_len: usize,
input: &str,
tail: CompleteStr,
) -> crate::rules::rules::MatchBounds {
crate::rules::rules::MatchBounds::new(prefix_len, input.len() - tail.len())
}