use std::collections::HashMap;
use uuid::Uuid;
use crate::config::Config;
use crate::language_entry::{self, DictionaryEntry};
use crate::store::Store;
use crate::store::hierarchy::Hierarchy;
use crate::store::node::NodeKind;
#[derive(Debug, Default, Clone)]
pub(crate) struct LanguageEntryIndex {
forms: HashMap<String, DictionaryEntry>,
}
impl LanguageEntryIndex {
pub(crate) fn is_empty(&self) -> bool {
self.forms.is_empty()
}
pub(crate) fn lookup(&self, form: &str) -> Option<&DictionaryEntry> {
self.forms.get(&form.to_lowercase())
}
}
pub(super) fn build_lexicon(
hierarchy: &Hierarchy,
cfg: &Config,
store: &Store,
) -> (super::lexicon::Lexicon, LanguageEntryIndex) {
use super::lexicon::LexCategory;
let mut places: Option<Uuid> = None;
let mut characters: Option<Uuid> = None;
let mut notes: Option<Uuid> = None;
let mut artefacts: Option<Uuid> = None;
let mut languages: Option<Uuid> = None;
for node in hierarchy.iter() {
match node.system_tag.as_deref() {
Some(crate::store::SYSTEM_TAG_PLACES) => places = Some(node.id),
Some(crate::store::SYSTEM_TAG_CHARACTERS) => characters = Some(node.id),
Some(crate::store::SYSTEM_TAG_NOTES) => notes = Some(node.id),
Some(crate::store::SYSTEM_TAG_ARTEFACTS) => artefacts = Some(node.id),
Some(crate::store::SYSTEM_TAG_LANGUAGES) => languages = Some(node.id),
_ => {}
}
}
let algos: Vec<rust_stemmers::Algorithm> = if !cfg.language.trim().is_empty() {
match crate::config::parse_stemmer_language(&cfg.language) {
Some(a) => vec![a],
None => {
tracing::warn!(
"language `{}` is not a known Snowball algorithm — \
stemmer disabled (falling back to exact-phrase matching)",
cfg.language
);
Vec::new()
}
}
} else {
cfg.editor
.stemming
.languages
.iter()
.filter_map(|name| match crate::config::parse_stemmer_language(name) {
Some(a) => Some(a),
None => {
tracing::warn!(
"editor.stemming.languages: unknown language `{name}` — skipped"
);
None
}
})
.collect()
};
let mut books: Vec<(Uuid, LexCategory)> = Vec::new();
if let Some(id) = places {
books.push((id, LexCategory::Place));
}
if let Some(id) = characters {
books.push((id, LexCategory::Character));
}
if let Some(id) = artefacts {
books.push((id, LexCategory::Artefact));
}
if let Some(id) = notes {
books.push((id, LexCategory::Note));
}
let mut dictionary_roots: Vec<Uuid> = Vec::new();
if let Some(lang_root) = languages {
for lang_book in hierarchy.children_of(Some(lang_root)) {
for chapter in hierarchy.children_of(Some(lang_book.id)) {
if chapter.title.eq_ignore_ascii_case("Dictionary") {
books.push((chapter.id, LexCategory::Language));
dictionary_roots.push(chapter.id);
}
}
}
}
let mut lexicon = super::lexicon::Lexicon::build(hierarchy, &books, algos);
let mut index = LanguageEntryIndex::default();
let mut extras: Vec<(String, super::lexicon::LexCategory)> = Vec::new();
for root in &dictionary_roots {
for id in hierarchy.collect_subtree(*root) {
if id == *root {
continue;
}
let Some(node) = hierarchy.get(id) else {
continue;
};
if node.kind != NodeKind::Paragraph {
continue;
}
let body = match store.get_content(id) {
Ok(Some(bytes)) => bytes,
_ => continue,
};
let body_str = match std::str::from_utf8(&body) {
Ok(s) => s,
Err(_) => continue, };
let entry = match language_entry::parse(body_str) {
Ok(Some(e)) => e,
Ok(None) => continue, Err(err) => {
tracing::warn!(
"language entry `{}` HJSON parse failed: {}",
node.title,
err
);
continue;
}
};
let title_lc = node.title.trim().to_lowercase();
if !title_lc.is_empty() {
index.forms.insert(title_lc, entry.clone());
}
for form in entry.surface_forms() {
let key = form.to_lowercase();
if key.is_empty() {
continue;
}
if key != node.title.trim().to_lowercase() {
extras.push((form.to_string(), super::lexicon::LexCategory::Language));
}
index.forms.entry(key).or_insert_with(|| entry.clone());
}
}
}
lexicon.add_extra_forms(extras);
(lexicon, index)
}
#[derive(Debug, Clone, Copy)]
pub(super) enum LexiconKind {
Places,
Characters,
Notes,
Artefacts,
}
impl LexiconKind {
pub(super) fn label(self) -> &'static str {
match self {
LexiconKind::Places => "Place",
LexiconKind::Characters => "Character",
LexiconKind::Notes => "Note",
LexiconKind::Artefacts => "Artefact",
}
}
pub(super) fn system_tag(self) -> &'static str {
match self {
LexiconKind::Places => crate::store::SYSTEM_TAG_PLACES,
LexiconKind::Characters => crate::store::SYSTEM_TAG_CHARACTERS,
LexiconKind::Notes => crate::store::SYSTEM_TAG_NOTES,
LexiconKind::Artefacts => crate::store::SYSTEM_TAG_ARTEFACTS,
}
}
}