use std::collections::BTreeMap;
use serde::Deserialize;
#[derive(Debug, Clone, Default, Deserialize)]
#[allow(dead_code)] pub struct DictionaryEntry {
#[serde(default)]
pub word: String,
#[serde(rename = "type", default)]
pub pos: String,
#[serde(default)]
pub translation: String,
#[serde(default)]
pub example: String,
#[serde(default)]
pub inflection: BTreeMap<String, String>,
}
impl DictionaryEntry {
pub fn surface_forms(&self) -> Vec<&str> {
let mut out: Vec<&str> = Vec::new();
let lemma = self.word.trim();
if !lemma.is_empty() {
out.push(lemma);
}
for form in self.inflection.values() {
let trimmed = form.trim();
if !trimmed.is_empty() {
out.push(trimmed);
}
}
out
}
}
pub fn parse(body: &str) -> Result<Option<DictionaryEntry>, String> {
parse_with::<DictionaryEntry>(
body,
|e| format!("dictionary entry HJSON parse failed: {e}"),
)
}
#[derive(Debug, Clone, Default, Deserialize)]
#[allow(dead_code)] pub struct MetaOverview {
#[serde(default)]
pub name: String,
#[serde(default)]
pub language_kind: String,
#[serde(default)]
pub family: String,
#[serde(default)]
pub iso_code: String,
#[serde(default)]
pub alphabet: Vec<String>,
#[serde(default)]
pub reading_direction: String,
#[serde(default)]
pub stemmer: String,
#[serde(default)]
pub example_corpus_ref: String,
}
pub fn parse_meta_overview(body: &str) -> Result<Option<MetaOverview>, String> {
parse_with::<MetaOverview>(
body,
|e| format!("meta overview HJSON parse failed: {e}"),
)
}
fn parse_with<T: serde::de::DeserializeOwned>(
body: &str,
err: impl Fn(serde_hjson::Error) -> String,
) -> Result<Option<T>, String> {
if body.trim().is_empty() {
return Ok(None);
}
if let Ok(v) = serde_hjson::from_str::<T>(body) {
return Ok(Some(v));
}
let Some(block) = extract_hjson_block(body) else {
return Ok(None);
};
let v: T = serde_hjson::from_str(block).map_err(err)?;
Ok(Some(v))
}
impl MetaOverview {
pub fn bucket_for_word(&self, word: &str) -> Option<&str> {
let first_char = word.chars().find(|c| !c.is_whitespace())?;
let needle = first_char.to_lowercase().collect::<String>();
self.alphabet
.iter()
.find(|entry| {
entry
.to_lowercase()
.chars()
.any(|c| c.to_string() == needle)
})
.map(|s| s.as_str())
}
}
fn extract_hjson_block(body: &str) -> Option<&str> {
let mut cursor = 0usize;
let mut open_end: Option<usize> = None;
for line in body.split_inclusive('\n') {
let line_start = cursor;
cursor += line.len();
let trimmed = line.trim_start().trim_end_matches('\n').trim_end();
if open_end.is_none() {
if trimmed == "```hjson" || trimmed.starts_with("```hjson ") {
open_end = Some(cursor); }
} else {
if trimmed == "```" {
let open = open_end.unwrap();
return Some(&body[open..line_start]);
}
}
}
None
}
#[cfg(test)]
mod tests {
use super::*;
const SEEDED_BODY: &str = "\
= aiya
```hjson
{
word: \"aiya\"
type: \"interjection\"
translation: \"hail\"
example: \"Aiya Eärendil!\"
}
```
# Free-form notes
Greeting used by elves of Aman.
";
const SEEDED_WITH_INFLECTION: &str = "\
= aiya
```hjson
{
word: \"aiya\"
type: \"interjection\"
translation: \"hail\"
example: \"Aiya Eärendil!\"
inflection: {
plural: \"aiyar\"
emphatic: \"aiyala\"
}
}
```
";
#[test]
fn parses_pure_hjson_entry() {
let body = r#"{
word: "aiya"
type: "interjection"
translation: "hail"
example: ""
}
"#;
let entry = parse(body).unwrap().unwrap();
assert_eq!(entry.word, "aiya");
assert_eq!(entry.pos, "interjection");
assert_eq!(entry.translation, "hail");
}
#[test]
fn parses_pure_hjson_meta_overview() {
let body = r#"{
name: "Quenya"
language_kind: "constructed"
alphabet: ["A", "B", "C"]
}
"#;
let meta = parse_meta_overview(body).unwrap().unwrap();
assert_eq!(meta.name, "Quenya");
assert_eq!(meta.language_kind, "constructed");
assert_eq!(meta.alphabet, vec!["A".to_string(), "B".to_string(), "C".to_string()]);
}
#[test]
fn parses_core_fields_from_seeded_body() {
let entry = parse(SEEDED_BODY).unwrap().expect("hjson block present");
assert_eq!(entry.word, "aiya");
assert_eq!(entry.pos, "interjection");
assert_eq!(entry.translation, "hail");
assert_eq!(entry.example, "Aiya Eärendil!");
assert!(entry.inflection.is_empty());
}
#[test]
fn parses_inflection_map() {
let entry = parse(SEEDED_WITH_INFLECTION).unwrap().unwrap();
assert_eq!(entry.inflection.get("plural"), Some(&"aiyar".to_string()));
assert_eq!(entry.inflection.get("emphatic"), Some(&"aiyala".to_string()));
}
#[test]
fn surface_forms_includes_lemma_and_inflections() {
let entry = parse(SEEDED_WITH_INFLECTION).unwrap().unwrap();
let forms = entry.surface_forms();
assert!(forms.contains(&"aiya"));
assert!(forms.contains(&"aiyar"));
assert!(forms.contains(&"aiyala"));
}
#[test]
fn surface_forms_filters_empty_inflection_values() {
let body = "\
```hjson
{
word: \"aiya\"
type: \"interjection\"
translation: \"hail\"
example: \"\"
inflection: { plural: \"aiyar\", dual: \"\" }
}
```";
let entry = parse(body).unwrap().unwrap();
let forms = entry.surface_forms();
assert_eq!(forms.len(), 2, "got: {forms:?}");
assert!(forms.contains(&"aiya"));
assert!(forms.contains(&"aiyar"));
}
#[test]
fn no_block_returns_none() {
let body = "= aiya\n\nJust a free-form description, no HJSON.\n";
assert!(parse(body).unwrap().is_none());
}
#[test]
fn close_fence_must_be_bare() {
let body = "\
```hjson
{ word: \"aiya\" }
```typst
unmatched
";
assert!(parse(body).unwrap().is_none());
}
#[test]
fn meta_overview_alphabet_buckets_first_char() {
let body = "\
```hjson
{
name: \"Quenya\"
alphabet: [\"Aa\", \"Bb\", \"Cc\"]
}
```";
let meta = parse_meta_overview(body).unwrap().unwrap();
assert_eq!(meta.bucket_for_word("aiya"), Some("Aa"));
assert_eq!(meta.bucket_for_word("Bran"), Some("Bb"));
assert_eq!(meta.bucket_for_word("zzz"), None,
"word's first char not in the declared alphabet → None (signal fall-back)");
}
#[test]
fn meta_overview_alphabet_multichar_buckets() {
let body = "\
```hjson
{
name: \"BiblicalHebrew\"
alphabet: [\"Aleph\", \"Beth\", \"Gimel\"]
}
```";
let meta = parse_meta_overview(body).unwrap().unwrap();
assert_eq!(meta.bucket_for_word("Avraham"), Some("Aleph"));
assert_eq!(meta.bucket_for_word("Beriah"), Some("Beth"));
}
#[test]
fn meta_overview_empty_alphabet_returns_none() {
let body = "\
```hjson
{
name: \"BareBones\"
alphabet: []
}
```";
let meta = parse_meta_overview(body).unwrap().unwrap();
assert_eq!(meta.bucket_for_word("anything"), None);
}
#[test]
fn malformed_hjson_reports_error() {
let body = "\
```hjson
{ word: \"aiya
```
";
assert!(parse(body).is_err());
}
}