use crate::types::*;
use crate::utils::parallelism::MaybeParallelRefIterator;
use crate::{rule::id::Selector, rule::MatchSentence, rule::Rule, tokenizer::Tokenizer, Error};
use fs_err::File;
use serde::{Deserialize, Serialize};
use std::{
io::{BufReader, Read, Write},
iter::FromIterator,
path::Path,
};
#[derive(Debug, Clone, Serialize, Deserialize)]
pub(crate) struct RulesLangOptions {
pub allow_errors: bool,
#[serde(default)]
pub ids: Vec<Selector>,
#[serde(default)]
pub ignore_ids: Vec<Selector>,
}
impl Default for RulesLangOptions {
fn default() -> Self {
RulesLangOptions {
allow_errors: true,
ids: Vec::new(),
ignore_ids: Vec::new(),
}
}
}
#[derive(Serialize, Deserialize, Default)]
pub struct Rules {
pub(crate) rules: Vec<Rule>,
}
impl Rules {
pub fn new<P: AsRef<Path>>(p: P) -> Result<Self, Error> {
let reader = BufReader::new(File::open(p.as_ref())?);
let rules: Rules = bincode::deserialize_from(reader)?;
Ok(rules)
}
pub fn from_reader<R: Read>(reader: R) -> Result<Self, Error> {
Ok(bincode::deserialize_from(reader)?)
}
pub fn to_writer<W: Write>(&self, writer: W) -> Result<(), Error> {
Ok(bincode::serialize_into(writer, &self)?)
}
pub fn rules(&self) -> &[Rule] {
&self.rules
}
pub fn rules_mut(&mut self) -> &mut [Rule] {
&mut self.rules
}
pub fn select<'a>(&'a self, selector: &'a Selector) -> RulesIter<'a> {
RulesIter {
inner: self.rules.iter(),
selector: Some(selector),
}
}
pub fn select_mut<'a>(&'a mut self, selector: &'a Selector) -> RulesIterMut<'a> {
RulesIterMut {
inner: self.rules.iter_mut(),
selector: Some(selector),
}
}
pub fn apply(&self, sentence: &Sentence) -> Vec<Suggestion> {
let sentence = MatchSentence::new(sentence);
let mut output: Vec<(usize, Suggestion)> = self
.rules
.maybe_par_iter()
.enumerate()
.filter(|(_, rule)| rule.enabled())
.map(|(i, rule)| {
let mut output = Vec::new();
for suggestion in rule.apply(&sentence) {
output.push((i, suggestion));
}
output
})
.flatten()
.collect();
output.sort_by(|(ia, a), (ib, b)| {
a.span()
.char()
.start
.cmp(&b.span().char().start)
.then_with(|| ib.cmp(ia))
});
let mut mask = vec![false; sentence.text().chars().count()];
output
.into_iter()
.filter_map(|(_, suggestion)| {
let span = suggestion.span().clone().lshift(sentence.span().start());
if mask[span.char().clone()].iter().all(|x| !x) {
mask[span.char().clone()].iter_mut().for_each(|x| *x = true);
Some(suggestion)
} else {
None
}
})
.collect()
}
pub fn suggest(&self, text: &str, tokenizer: &Tokenizer) -> Vec<Suggestion> {
if text.is_empty() {
return Vec::new();
}
let mut suggestions = Vec::new();
for sentence in tokenizer.pipe(text) {
suggestions.extend(self.apply(&sentence));
}
suggestions
}
pub fn correct(&self, text: &str, tokenizer: &Tokenizer) -> String {
let suggestions = self.suggest(text, tokenizer);
apply_suggestions(text, &suggestions)
}
}
pub fn apply_suggestions(text: &str, suggestions: &[Suggestion]) -> String {
let mut offset: isize = 0;
let mut chars: Vec<_> = text.chars().collect();
for suggestion in suggestions {
let replacement: Vec<_> = suggestion.replacements()[0].chars().collect();
chars.splice(
(suggestion.span().char().start as isize + offset) as usize
..(suggestion.span().char().end as isize + offset) as usize,
replacement.iter().cloned(),
);
offset = offset + replacement.len() as isize - suggestion.span().char().len() as isize;
}
chars.into_iter().collect()
}
pub struct RulesIter<'a> {
selector: Option<&'a Selector>,
inner: std::slice::Iter<'a, Rule>,
}
impl<'a> Iterator for RulesIter<'a> {
type Item = &'a Rule;
fn next(&mut self) -> Option<Self::Item> {
let selector = self.selector.as_ref();
self.inner
.find(|rule| selector.map_or(true, |s| s.is_match(rule.id())))
}
}
pub struct RulesIterMut<'a> {
selector: Option<&'a Selector>,
inner: std::slice::IterMut<'a, Rule>,
}
impl<'a> Iterator for RulesIterMut<'a> {
type Item = &'a mut Rule;
fn next(&mut self) -> Option<Self::Item> {
let selector = self.selector.as_ref();
self.inner
.find(|rule| selector.map_or(true, |s| s.is_match(rule.id())))
}
}
impl IntoIterator for Rules {
type Item = Rule;
type IntoIter = std::vec::IntoIter<Rule>;
fn into_iter(self) -> Self::IntoIter {
self.rules.into_iter()
}
}
impl<R> FromIterator<R> for Rules
where
R: Into<Rule>,
{
fn from_iter<I: IntoIterator<Item = R>>(iter: I) -> Self {
let rules: Vec<Rule> = iter.into_iter().map(|x| x.into()).collect();
Self { rules }
}
}