use crate::word::Lexeme;
use std::collections::HashMap;
use std::sync::LazyLock;
static LEXICON: LazyLock<Lexicon> = LazyLock::new(make_builtin);
fn make_builtin() -> Lexicon {
let mut lex = Lexicon::default();
for (i, line) in include_str!("../res/english.csv").lines().enumerate() {
match Lexeme::try_from(line) {
Ok(word) => lex.insert(word),
Err(_) => panic!("Bad word on line {}: `{line}`", i + 1),
}
}
lex
}
pub fn builtin() -> &'static Lexicon {
&LEXICON
}
pub fn is_apostrophe(c: char) -> bool {
c == '\u{0027}' || c == '\u{02BC}' || c == '\u{2019}' || c == '\u{FF07}'
}
pub fn make_word(word: &str) -> String {
let mut w = String::with_capacity(word.len());
for c in word.chars() {
if is_apostrophe(c) {
w.push('\'');
} else {
for cl in c.to_lowercase() {
w.push(cl);
}
}
}
w
}
#[derive(Default, Clone)]
pub struct Lexicon {
words: Vec<Lexeme>,
forms: HashMap<String, Vec<usize>>,
}
impl IntoIterator for Lexicon {
type Item = Lexeme;
type IntoIter = std::vec::IntoIter<Self::Item>;
fn into_iter(mut self) -> Self::IntoIter {
self.words.sort();
self.words.into_iter()
}
}
impl Lexicon {
pub fn new() -> Self {
Lexicon::default()
}
pub fn insert(&mut self, word: Lexeme) {
for form in word.forms() {
self.insert_form(form);
}
self.words.push(word);
}
fn insert_form(&mut self, word: &str) {
let n = self.words.len();
if let Some(nums) = self.forms.get_mut(word) {
nums.push(n);
} else {
let nums = vec![n];
self.forms.insert(word.to_lowercase(), nums);
}
}
pub fn contains(&self, word: &str) -> bool {
self.forms.contains_key(&make_word(word))
}
pub fn word_entries(&self, word: &str) -> Vec<&Lexeme> {
if let Some(indices) = self.forms.get(&make_word(word)) {
let mut entries = Vec::with_capacity(indices.len());
for i in indices {
entries.push(&self.words[*i]);
}
return entries;
}
vec![]
}
pub fn forms(&self) -> impl Iterator<Item = &String> {
self.forms.keys()
}
pub fn iter(&self) -> impl Iterator<Item = &Lexeme> {
self.words.iter()
}
}