use super::SingleTokenPattern;
use smallvec::SmallVec;
use crate::{CharString, Token, char_ext::CharExt};
#[derive(Debug, Default, Clone)]
pub struct WordSet {
words: SmallVec<[CharString; 4]>,
}
impl WordSet {
pub fn add(&mut self, word: &str) {
let chars = word.chars().collect();
if !self.words.contains(&chars) {
self.words.push(chars);
}
}
pub fn add_chars(&mut self, chars: &[char]) {
if !self.words.iter().any(|i| i.as_ref() == chars) {
self.words.push(chars.into());
}
}
pub fn contains(&self, word: &str) -> bool {
self.words.contains(&word.chars().collect())
}
pub fn new(words: &[&'static str]) -> Self {
let mut set = Self::default();
for str in words {
set.add(str);
}
set
}
}
impl SingleTokenPattern for WordSet {
fn matches_token(&self, token: &Token, source: &[char]) -> bool {
if !token.kind.is_word() {
return false;
}
let tok_chars = token.get_ch(source);
for word in &self.words {
if tok_chars.len() != word.len() {
continue;
}
let partial_match = tok_chars
.iter()
.map(CharExt::normalized)
.zip(word.iter().map(CharExt::normalized))
.all(|(a, b)| a.eq_ignore_ascii_case(&b));
if partial_match {
return true;
}
}
false
}
}
#[cfg(test)]
mod tests {
use crate::{Document, Span, patterns::DocPattern};
use super::WordSet;
#[test]
fn fruit() {
let set = WordSet::new(&["banana", "apple", "orange"]);
let doc = Document::new_markdown_default_curated("I ate a banana and an apple today.");
let matches = set.find_all_matches_in_doc(&doc);
assert_eq!(matches, vec![Span::new(6, 7), Span::new(12, 13)]);
}
#[test]
fn fruit_whack_capitalization() {
let set = WordSet::new(&["banana", "apple", "orange"]);
let doc = Document::new_markdown_default_curated("I Ate A bAnaNa And aN apPlE today.");
let matches = set.find_all_matches_in_doc(&doc);
assert_eq!(matches, vec![Span::new(6, 7), Span::new(12, 13)]);
}
#[test]
fn supports_typographic_apostrophes() {
let set = WordSet::new(&["They're"]);
let doc = Document::new_markdown_default_curated("They’re");
let matches = set.find_all_matches_in_doc(&doc);
assert_eq!(matches, vec![Span::new(0, 1)]);
}
}