use std::path::Path;
use crate::config::Config;
use crate::error::{Error, Result};
use crate::project::ProjectLayout;
use crate::store::hierarchy::Hierarchy;
use crate::store::{
InsertPosition, NodeKind, Store, SYSTEM_TAG_CHARACTERS, SYSTEM_TAG_LANGUAGES,
SYSTEM_TAG_PLACES,
};
use super::{LanguageCommand, LanguageExportFormat};
pub fn run(project: &Path, cmd: LanguageCommand) -> Result<()> {
match cmd {
LanguageCommand::Init { name } => init(project, &name),
LanguageCommand::AddWord {
language,
word,
r#type,
translation,
example,
import,
new,
force,
} => {
if let Some(csv_path) = import {
import_dictionary_csv(project, &language, &csv_path, new, force)
} else {
let word = word.ok_or_else(|| {
Error::Config(
"missing <WORD> — pass a word argument OR use --import <PATH>"
.into(),
)
})?;
let pos = r#type.ok_or_else(|| {
Error::Config(
"missing --type — pass a part-of-speech OR use --import".into(),
)
})?;
let translation = translation.ok_or_else(|| {
Error::Config(
"missing --translation — pass a working-language gloss OR use --import"
.into(),
)
})?;
add_word(
project,
&language,
&word,
&pos,
&translation,
example.as_deref(),
)
}
}
LanguageCommand::Doctor { language, json } => doctor(project, &language, json),
LanguageCommand::Export {
language,
format,
output,
} => export(project, &language, format, output.as_deref()),
LanguageCommand::List => list(project),
LanguageCommand::RemoveWord { language, word } => {
remove_word(project, &language, &word)
}
LanguageCommand::DefineRule {
language,
rule_id,
category,
} => define_rule(project, &language, &rule_id, &category),
LanguageCommand::GenerateWord {
language,
role,
count,
} => generate_word(project, &language, &role, count),
LanguageCommand::Syllabify { language, word } => {
syllabify_word(project, &language, &word)
}
LanguageCommand::Ipa { language, word } => ipa_surface(project, &language, &word),
LanguageCommand::Stress { language, word } => stress_word(project, &language, &word),
LanguageCommand::Romanize {
language,
text,
scheme,
reverse,
} => romanize_text(project, &language, &text, scheme.as_deref(), reverse),
LanguageCommand::Tone { language, tones } => tone_sandhi(project, &language, &tones),
LanguageCommand::Audit { language, json } => audit(project, &language, json),
LanguageCommand::LinkPlace {
place,
language,
secondary,
} => link_place(project, &place, &language, secondary),
LanguageCommand::LinkCharacter {
character,
language,
proficiency,
} => link_character(project, &character, &language, &proficiency),
LanguageCommand::Speakers { language } => speakers(project, &language),
LanguageCommand::ScanManuscript { language, json } => {
scan_manuscript(project, &language, json)
}
LanguageCommand::Paradigm {
language,
root,
template,
gloss,
} => paradigm(project, &language, &root, &template, gloss.as_deref()),
LanguageCommand::Gloss { language, text } => gloss_text(project, &language, &text),
LanguageCommand::Grammar { language, set, json } => {
grammar_questionnaire(project, &language, set.as_deref(), json)
}
LanguageCommand::IdiomAdd {
language,
form,
literal,
meaning,
register,
} => idiom_add(
project,
&language,
&form,
literal.as_deref(),
&meaning,
register.as_deref(),
),
LanguageCommand::MetaphorAdd {
language,
source,
target,
example,
} => metaphor_add(project, &language, &source, &target, example.as_deref()),
LanguageCommand::Idioms { language } => idioms_list(project, &language),
LanguageCommand::Reconstruct {
forms,
gloss,
provider,
} => reconstruct(project, &forms, gloss.as_deref(), provider.as_deref()),
LanguageCommand::RealismCheck { language, provider } => {
realism_check(project, &language, provider.as_deref())
}
LanguageCommand::FamilyTree => family_tree(project),
LanguageCommand::Cognates { proto, form } => cognates(project, &proto, &form),
LanguageCommand::SoundChange { language, form } => {
sound_change(project, &language, &form)
}
LanguageCommand::DeriveLexicon { language, yes } => {
derive_lexicon_cmd(project, &language, yes)
}
LanguageCommand::Derive {
language,
root,
gloss,
pos,
yes,
} => derive(project, &language, &root, gloss.as_deref(), pos.as_deref(), yes),
LanguageCommand::Query {
language,
register,
domain,
era,
pos,
text,
json,
} => query(
project,
&language,
register.as_deref(),
domain.as_deref(),
era.as_deref(),
pos.as_deref(),
text.as_deref(),
json,
),
LanguageCommand::GenerateLexicon {
language,
topic,
count,
era,
register,
provider,
semantic,
semantic_threshold,
yes,
} => generate_lexicon(
project,
&language,
topic.as_deref(),
count,
era.as_deref(),
register.as_deref(),
provider.as_deref(),
semantic,
semantic_threshold,
yes,
),
}
}
const LEXGEN_SYSTEM: &str = "You are a meticulous lexicographer for a constructed language. \
Reply with a SINGLE JSON object and nothing else — no prose, no preamble, no markdown fences. \
Shape: {\"entries\":[{\"form\":\"…\",\"gloss\":\"…\",\"pos\":\"…\",\"example\":\"…\",\"register\":\"…\",\
\"domain\":[\"…\"]}]}. Choose each `form` ONLY from the provided candidate list (never invent a \
form). Never assign two entries the same meaning. Keep `pos` a short lowercase tag \
(noun/verb/adjective/…). `register` is one short tag (neutral/formal/vulgar/sacred/archaic); \
`domain` is one or two short semantic-domain tags.";
fn resolve_system_node(hierarchy: &Hierarchy, system_tag: &str, name: &str) -> Option<String> {
let root = hierarchy
.iter()
.find(|n| n.kind == NodeKind::Book && n.system_tag.as_deref() == Some(system_tag))?;
hierarchy
.collect_subtree(root.id)
.into_iter()
.filter_map(|id| hierarchy.get(id))
.find(|n| n.title.eq_ignore_ascii_case(name))
.map(|n| n.title.clone())
}
fn link_place(project: &Path, place: &str, language: &str, secondary: bool) -> Result<()> {
use crate::conlang::links::ConlangLinks;
let (store, hierarchy, lang_book) = open_lang_book(project, language)?;
let place_name = match resolve_system_node(&hierarchy, SYSTEM_TAG_PLACES, place) {
Some(canonical) => canonical,
None => {
eprintln!("note: no Place named `{place}` found — recording the link anyway");
place.to_string()
}
};
let root = store.project_root();
let mut links = ConlangLinks::load(root).map_err(Error::Io)?;
if secondary {
links.add_place_secondary(&place_name, &lang_book.title);
eprintln!("{place_name} → secondary language {}", lang_book.title);
} else {
links.set_place_primary(&place_name, &lang_book.title);
eprintln!("{place_name} → primary language {}", lang_book.title);
}
links.save(root).map_err(Error::Io)?;
Ok(())
}
fn link_character(project: &Path, character: &str, language: &str, proficiency: &str) -> Result<()> {
use crate::conlang::links::{ConlangLinks, Level};
let level = Level::parse(proficiency).ok_or_else(|| {
Error::Config(format!(
"unknown proficiency `{proficiency}` — use native | fluent | conversational | broken | reading_only"
))
})?;
let (store, hierarchy, lang_book) = open_lang_book(project, language)?;
let char_name = match resolve_system_node(&hierarchy, SYSTEM_TAG_CHARACTERS, character) {
Some(canonical) => canonical,
None => {
eprintln!("note: no Character named `{character}` found — recording the link anyway");
character.to_string()
}
};
let root = store.project_root();
let mut links = ConlangLinks::load(root).map_err(Error::Io)?;
links.set_character_proficiency(&char_name, &lang_book.title, level);
links.save(root).map_err(Error::Io)?;
eprintln!("{char_name} → {} ({})", lang_book.title, level.as_str());
Ok(())
}
fn load_morphology(
store: &Store,
hierarchy: &Hierarchy,
lang_book: &crate::store::node::Node,
) -> Result<Option<crate::conlang::types::morphology::Morphology>> {
let chapters: Vec<_> = hierarchy
.children_of(Some(lang_book.id))
.into_iter()
.filter(|n| {
n.kind == NodeKind::Chapter
&& (n.title.eq_ignore_ascii_case("Morphology")
|| n.title.eq_ignore_ascii_case("Grammar"))
})
.cloned()
.collect();
for chapter in chapters {
for para in hierarchy.children_of(Some(chapter.id)) {
if para.kind != NodeKind::Paragraph {
continue;
}
let Some(bytes) = store.get_content(para.id)? else { continue };
let body = String::from_utf8_lossy(&bytes);
match crate::conlang::types::morphology::Morphology::from_hjson(&body) {
Ok(Some(m))
if !m.morphemes.is_empty()
|| !m.paradigms.is_empty()
|| !m.derivations.is_empty() =>
{
return Ok(Some(m));
}
Ok(_) | Err(_) => continue,
}
}
}
Ok(None)
}
fn load_diachronics(
store: &Store,
hierarchy: &Hierarchy,
lang_book: &crate::store::node::Node,
) -> Result<Option<crate::conlang::types::diachronic::Diachronics>> {
use crate::conlang::types::diachronic::Diachronics;
let Some(chapter) = hierarchy
.children_of(Some(lang_book.id))
.into_iter()
.find(|n| n.kind == NodeKind::Chapter && n.title.eq_ignore_ascii_case("Phonology"))
.cloned()
else {
return Ok(None);
};
for para in hierarchy.children_of(Some(chapter.id)) {
if para.kind != NodeKind::Paragraph {
continue;
}
let Ok(Some(bytes)) = store.get_content(para.id) else { continue };
if let Ok(Some(d)) = Diachronics::from_hjson(&String::from_utf8_lossy(&bytes)) {
return Ok(Some(d));
}
}
Ok(None)
}
const RECONSTRUCT_SYSTEM: &str = "You are a historical linguist applying the comparative method. \
Given cognate forms from related daughter languages, propose the single most plausible proto-form. \
Mark the proto-form with a leading asterisk. Then list the key regular sound correspondences you \
relied on, and justify the reconstruction in 2–3 sentences. Be concise; output plain text.";
const REALISM_SYSTEM: &str = "You are a historical phonologist. Assess whether a chain of diachronic \
sound changes is typologically plausible — i.e. whether each change is a naturally attested type \
(lenition, assimilation, final devoicing, palatalization, epenthesis, …) and whether the ordering \
is reasonable. Flag any rule that is unnatural or unattested, and give an overall verdict \
(plausible / mixed / implausible). Be concise; output plain text.";
fn reconstruct(
project: &Path,
forms: &str,
gloss: Option<&str>,
provider: Option<&str>,
) -> Result<()> {
let layout = ProjectLayout::new(project);
layout.require_initialized()?;
let cfg = Config::load_layered(&layout.config_path())?;
let ai = crate::ai::AiClient::from_config(&cfg.llm)?;
let (model, _env) = ai.resolve_provider(&cfg.llm, provider)?;
eprintln!("inkhaven language reconstruct · model: {model}");
let gloss_clause = gloss.map(|g| format!(" meaning '{g}'")).unwrap_or_default();
let prompt = format!(
"Cognate daughter forms{gloss_clause}: {forms}.\n\nReconstruct the proto-form."
);
let raw = crate::ai::stream::collect_blocking(
ai.client.clone(),
model.to_string(),
Some(RECONSTRUCT_SYSTEM.to_string()),
prompt,
)
.map_err(|e| Error::Store(format!("inference error: {e}")))?;
println!("{}", raw.trim());
Ok(())
}
fn realism_check(project: &Path, language: &str, provider: Option<&str>) -> Result<()> {
let (store, hierarchy, lang_book) = open_lang_book(project, language)?;
let dia = load_diachronics(&store, &hierarchy, &lang_book)?.ok_or_else(|| {
Error::Config(format!("language `{language}` has no diachronics chain to check"))
})?;
let cfg = Config::load_layered(&ProjectLayout::new(project).config_path())?;
let ai = crate::ai::AiClient::from_config(&cfg.llm)?;
let (model, _env) = ai.resolve_provider(&cfg.llm, provider)?;
eprintln!("inkhaven language realism-check · {language} · model: {model}");
let rules_text = dia
.rules
.iter()
.enumerate()
.map(|(i, r)| format!("{}. {}", i + 1, r.source))
.collect::<Vec<_>>()
.join("\n");
let proto = dia.proto.as_deref().unwrap_or("the proto-language");
let prompt = format!(
"Sound-change chain deriving {language} from {proto} (applied in order):\n{rules_text}\n\n\
Assess the plausibility, rule by rule, then give an overall verdict."
);
let raw = crate::ai::stream::collect_blocking(
ai.client.clone(),
model.to_string(),
Some(REALISM_SYSTEM.to_string()),
prompt,
)
.map_err(|e| Error::Store(format!("inference error: {e}")))?;
println!("{}", raw.trim());
Ok(())
}
fn all_language_books(hierarchy: &Hierarchy) -> Vec<crate::store::node::Node> {
let Some(lang_root) = hierarchy
.iter()
.find(|n| n.kind == NodeKind::Book && n.system_tag.as_deref() == Some(SYSTEM_TAG_LANGUAGES))
else {
return Vec::new();
};
hierarchy
.children_of(Some(lang_root.id))
.into_iter()
.filter(|n| n.kind == NodeKind::Book)
.cloned()
.collect()
}
fn family_tree(project: &Path) -> Result<()> {
let layout = ProjectLayout::new(project);
layout.require_initialized()?;
let cfg = Config::load_layered(&layout.config_path())?;
let store = Store::open(layout, &cfg)?;
let hierarchy = Hierarchy::load(&store)?;
let langs = all_language_books(&hierarchy);
if langs.is_empty() {
println!("no languages yet — `inkhaven language init <name>`");
return Ok(());
}
let mut pairs: Vec<(String, Option<String>)> = Vec::new();
for l in &langs {
let proto = load_diachronics(&store, &hierarchy, l)?.and_then(|d| d.proto);
pairs.push((l.title.clone(), proto));
}
print!("{}", crate::conlang::diachronic::family::render_tree(&pairs));
Ok(())
}
fn cognates(project: &Path, proto: &str, form: &str) -> Result<()> {
let (store, hierarchy, proto_book) = open_lang_book(project, proto)?;
let proto_phon = load_phonology(&store, &hierarchy, &proto_book)?.unwrap_or_default();
let mut reflexes: Vec<(String, String)> = Vec::new();
for l in all_language_books(&hierarchy) {
if l.id == proto_book.id {
continue;
}
let Some(dia) = load_diachronics(&store, &hierarchy, &l)? else { continue };
if dia.proto.as_deref().is_some_and(|p| p.eq_ignore_ascii_case(&proto_book.title)) {
let reflex = crate::conlang::diachronic::apply::derive_form(&proto_phon, &dia.rules, form);
reflexes.push((l.title.clone(), reflex));
}
}
reflexes.sort();
println!("cognate set · *{form} ({})", proto_book.title);
if reflexes.is_empty() {
println!(" (no daughter languages declare {} as their proto)", proto_book.title);
return Ok(());
}
for (name, reflex) in &reflexes {
println!(" {:<16} {reflex}", name);
}
Ok(())
}
fn resolve_proto(
store: &Store,
hierarchy: &Hierarchy,
dia: &crate::conlang::types::diachronic::Diachronics,
daughter: &str,
) -> Result<(crate::store::node::Node, crate::conlang::Phonology, String)> {
let proto_name = dia.proto.clone().ok_or_else(|| {
Error::Config(format!(
"language `{daughter}`'s diachronics block has no `proto` — name the parent language"
))
})?;
let lang_root = hierarchy
.iter()
.find(|n| n.kind == NodeKind::Book && n.system_tag.as_deref() == Some(SYSTEM_TAG_LANGUAGES))
.ok_or_else(|| Error::Store("Language system book missing".into()))?;
let proto_book = hierarchy
.children_of(Some(lang_root.id))
.into_iter()
.find(|n| n.kind == NodeKind::Book && n.title.eq_ignore_ascii_case(&proto_name))
.cloned()
.ok_or_else(|| {
Error::Config(format!(
"proto-language `{proto_name}` not found — `inkhaven language init {proto_name}` first"
))
})?;
let proto_phon = load_phonology(store, hierarchy, &proto_book)?.unwrap_or_default();
Ok((proto_book, proto_phon, proto_name))
}
fn sound_change(project: &Path, language: &str, form: &str) -> Result<()> {
let (store, hierarchy, daughter_book) = open_lang_book(project, language)?;
let dia = load_diachronics(&store, &hierarchy, &daughter_book)?.ok_or_else(|| {
Error::Config(format!(
"language `{language}` has no diachronics — add a `{{ diachronics: {{ proto, rules }} }}` \
block to its Phonology chapter"
))
})?;
let (_proto_book, proto_phon, proto_name) = resolve_proto(&store, &hierarchy, &dia, language)?;
let daughter = crate::conlang::diachronic::apply::derive_form(&proto_phon, &dia.rules, form);
println!("{form} > {daughter} (from {proto_name}, {} rule(s))", dia.rules.len());
Ok(())
}
fn derive_lexicon_cmd(project: &Path, language: &str, yes: bool) -> Result<()> {
let (store, hierarchy, daughter_book) = open_lang_book(project, language)?;
let dia = load_diachronics(&store, &hierarchy, &daughter_book)?.ok_or_else(|| {
Error::Config(format!("language `{language}` has no diachronics block"))
})?;
let (proto_book, proto_phon, proto_name) = resolve_proto(&store, &hierarchy, &dia, language)?;
let proto_entries = load_dictionary(&store, &hierarchy, &proto_book)?;
if proto_entries.is_empty() {
eprintln!("note: proto `{proto_name}` has no dictionary entries to derive from");
}
let derived =
crate::conlang::diachronic::apply::derive_lexicon(&proto_phon, &dia.rules, &proto_entries);
println!(
"derive {language} from {proto_name} · {} rule(s) · {} entr(y/ies):",
dia.rules.len(),
derived.len()
);
for d in &derived {
println!(" {:<14} > {:<14} {}", d.proto_form, d.form, d.gloss);
}
if yes {
let cfg = Config::load_layered(&ProjectLayout::new(project).config_path())?;
let mut added = 0usize;
for d in &derived {
let entry = ImportEntry {
word: d.form.clone(),
pos: d.pos.clone(),
translation: d.gloss.clone(),
etymology: format!("from {proto_name} {} via sound change", d.proto_form),
..Default::default()
};
match add_imported_dictionary_entry(&store, &cfg, &daughter_book, &entry) {
Ok(_) => added += 1,
Err(e) => eprintln!(" skipped {}: {e}", d.form),
}
}
eprintln!("\nadded {added} derived entr(y/ies) to {language}'s Dictionary");
} else {
eprintln!("\n(dry run — re-run with --yes to add the {} derived entr(y/ies))", derived.len());
}
Ok(())
}
fn load_expressions(
store: &Store,
hierarchy: &Hierarchy,
lang_book: &crate::store::node::Node,
) -> Result<(crate::conlang::types::expression::Expressions, Option<crate::store::node::Node>)> {
use crate::conlang::types::expression::Expressions;
let Some(chapter) = hierarchy
.children_of(Some(lang_book.id))
.into_iter()
.find(|n| n.kind == NodeKind::Chapter && n.title.eq_ignore_ascii_case("Grammar"))
.cloned()
else {
return Ok((Expressions::default(), None));
};
for para in hierarchy.children_of(Some(chapter.id)) {
if para.kind != NodeKind::Paragraph {
continue;
}
let Ok(Some(bytes)) = store.get_content(para.id) else { continue };
if let Ok(Some(e)) = Expressions::from_hjson(&String::from_utf8_lossy(&bytes)) {
return Ok((e, Some(para.clone())));
}
}
Ok((Expressions::default(), None))
}
fn save_expressions(
project: &Path,
store: &Store,
lang_book: &crate::store::node::Node,
node: Option<crate::store::node::Node>,
expr: &crate::conlang::types::expression::Expressions,
) -> Result<()> {
let cfg = Config::load_layered(&ProjectLayout::new(project).config_path())?;
let body = serde_json::to_string_pretty(expr)
.map_err(|e| Error::Store(format!("serializing expressions: {e}")))?;
upsert_grammar_paragraph(store, &cfg, lang_book, "expressions", node, &body)
}
fn idiom_add(
project: &Path,
language: &str,
form: &str,
literal: Option<&str>,
meaning: &str,
register: Option<&str>,
) -> Result<()> {
use crate::conlang::types::expression::Idiom;
let (store, hierarchy, lang_book) = open_lang_book(project, language)?;
let (mut expr, node) = load_expressions(&store, &hierarchy, &lang_book)?;
expr.idioms.push(Idiom {
form: form.trim().to_string(),
literal: literal.unwrap_or("").trim().to_string(),
meaning: meaning.trim().to_string(),
register: register.map(|r| vec![r.trim().to_string()]).unwrap_or_default(),
});
save_expressions(project, &store, &lang_book, node, &expr)?;
eprintln!("{language}: added idiom `{}` ({} total)", form.trim(), expr.idioms.len());
Ok(())
}
fn metaphor_add(
project: &Path,
language: &str,
source: &str,
target: &str,
example: Option<&str>,
) -> Result<()> {
use crate::conlang::types::expression::Metaphor;
let (store, hierarchy, lang_book) = open_lang_book(project, language)?;
let (mut expr, node) = load_expressions(&store, &hierarchy, &lang_book)?;
expr.metaphors.push(Metaphor {
source: source.trim().to_string(),
target: target.trim().to_string(),
examples: example.map(|e| vec![e.trim().to_string()]).unwrap_or_default(),
note: String::new(),
});
save_expressions(project, &store, &lang_book, node, &expr)?;
eprintln!(
"{language}: declared metaphor {} → {} ({} total)",
source.trim(),
target.trim(),
expr.metaphors.len()
);
Ok(())
}
fn idioms_list(project: &Path, language: &str) -> Result<()> {
let (store, hierarchy, lang_book) = open_lang_book(project, language)?;
let (expr, _) = load_expressions(&store, &hierarchy, &lang_book)?;
if expr.idioms.is_empty() && expr.metaphors.is_empty() {
println!("{language}: no idioms or metaphors yet");
return Ok(());
}
if !expr.idioms.is_empty() {
println!("idioms ({}):", expr.idioms.len());
for i in &expr.idioms {
let reg = if i.register.is_empty() { String::new() } else { format!(" [{}]", i.register.join(",")) };
println!(" {} — {}{}", i.form, i.meaning, reg);
if !i.literal.trim().is_empty() {
println!(" (lit. {})", i.literal);
}
}
}
if !expr.metaphors.is_empty() {
println!("\nmetaphors ({}):", expr.metaphors.len());
for m in &expr.metaphors {
let ex = if m.examples.is_empty() { String::new() } else { format!(" e.g. {}", m.examples.join("; ")) };
println!(" {} → {}{}", m.source, m.target, ex);
}
}
Ok(())
}
fn load_grammar_spec(
store: &Store,
hierarchy: &Hierarchy,
lang_book: &crate::store::node::Node,
) -> Result<(crate::conlang::types::grammar::GrammarSpec, Option<crate::store::node::Node>)> {
use crate::conlang::types::grammar::GrammarSpec;
let Some(chapter) = hierarchy
.children_of(Some(lang_book.id))
.into_iter()
.find(|n| n.kind == NodeKind::Chapter && n.title.eq_ignore_ascii_case("Grammar"))
.cloned()
else {
return Ok((GrammarSpec::default(), None));
};
for para in hierarchy.children_of(Some(chapter.id)) {
if para.kind != NodeKind::Paragraph {
continue;
}
let Ok(Some(bytes)) = store.get_content(para.id) else { continue };
if let Ok(Some(spec)) = GrammarSpec::from_hjson(&String::from_utf8_lossy(&bytes)) {
return Ok((spec, Some(para.clone())));
}
}
Ok((GrammarSpec::default(), None))
}
fn grammar_questionnaire(
project: &Path,
language: &str,
set: Option<&str>,
json: bool,
) -> Result<()> {
use crate::conlang::grammar;
let (store, hierarchy, lang_book) = open_lang_book(project, language)?;
let (mut spec, node) = load_grammar_spec(&store, &hierarchy, &lang_book)?;
if let Some(kv) = set {
let (feat, val) = kv
.split_once('=')
.ok_or_else(|| Error::Config("use --set <feature>=<value>".into()))?;
let f = grammar::feature(feat.trim()).ok_or_else(|| {
Error::Config(format!("unknown feature `{}` — run `language grammar` to list them", feat.trim()))
})?;
let val = val.trim();
if !f.is_valid(val) {
return Err(Error::Config(format!(
"`{val}` is not a valid value for `{}` — options: {}",
f.id,
f.values()
)));
}
spec.grammar.insert(f.id.to_string(), val.to_lowercase());
let cfg = Config::load_layered(&ProjectLayout::new(project).config_path())?;
let body = serde_json::to_string_pretty(&spec)
.map_err(|e| Error::Store(format!("serializing grammar: {e}")))?;
upsert_grammar_paragraph(&store, &cfg, &lang_book, "typology", node, &body)?;
eprintln!("{language}: set {} = {}", f.id, val.to_lowercase());
return Ok(());
}
if json {
println!(
"{}",
serde_json::to_string_pretty(&spec.grammar)
.map_err(|e| Error::Store(format!("serializing grammar: {e}")))?
);
return Ok(());
}
let total = grammar::catalog().len();
let answered = grammar::catalog().iter().filter(|f| spec.grammar.contains_key(f.id)).count();
println!("grammar · {language} · {answered}/{total} feature(s) set\n");
for f in grammar::catalog() {
match spec.grammar.get(f.id) {
Some(v) => println!(" ✓ {:<16} {}", f.id, v),
None => println!(" · {:<16} {}", f.id, f.question),
}
}
eprintln!("\nset an answer: inkhaven language grammar {language} --set <feature>=<value>");
eprintln!("(see the options for a feature in `Documentation/CONLANG.md` or `--help`)");
Ok(())
}
fn upsert_grammar_paragraph(
store: &Store,
cfg: &Config,
lang_book: &crate::store::node::Node,
para_title: &str,
node: Option<crate::store::node::Node>,
body: &str,
) -> Result<()> {
let mut target = match node {
Some(n) => n,
None => {
let hierarchy = Hierarchy::load(store)?;
let chapter = hierarchy
.children_of(Some(lang_book.id))
.into_iter()
.find(|n| n.kind == NodeKind::Chapter && n.title.eq_ignore_ascii_case("Grammar"))
.cloned()
.ok_or_else(|| Error::Config("no Grammar chapter to store the block in".into()))?;
store.create_node(
cfg,
&hierarchy,
NodeKind::Paragraph,
para_title,
Some(&chapter),
None,
InsertPosition::End,
)?
}
};
target.content_type = Some("hjson".to_string());
if let Some(rel) = &target.file {
let abs = store.project_root().join(rel);
std::fs::write(&abs, body.as_bytes())
.map_err(|e| Error::Store(format!("write {para_title}: {e}")))?;
}
store
.update_paragraph_content(&mut target, body.as_bytes())
.map_err(|e| Error::Store(format!("update {para_title}: {e}")))?;
Ok(())
}
fn derive(
project: &Path,
language: &str,
root: &str,
gloss: Option<&str>,
pos: Option<&str>,
yes: bool,
) -> Result<()> {
let (store, hierarchy, lang_book) = open_lang_book(project, language)?;
let phon = load_phonology(&store, &hierarchy, &lang_book)?.unwrap_or_default();
let morph = load_morphology(&store, &hierarchy, &lang_book)?.ok_or_else(|| {
Error::Config(format!(
"language `{language}` has no morphology — add `derivations` HJSON under its `Grammar` chapter"
))
})?;
if morph.derivations.is_empty() {
return Err(Error::Config(format!(
"language `{language}` declares no derivation rules"
)));
}
let root_gloss = gloss.unwrap_or(root);
let root_pos = pos.unwrap_or("");
let derived =
crate::conlang::morphology::derive::generate(&phon, &morph, root, root_gloss, root_pos);
if derived.is_empty() {
eprintln!(
"no derivation rules apply to a `{}` root",
if root_pos.is_empty() { "(unspecified pos)" } else { root_pos }
);
return Ok(());
}
println!("derivations of {root} ({root_gloss}):");
for d in &derived {
let pos = if d.pos.is_empty() { String::new() } else { format!(" {}", d.pos) };
println!(" {:<18} {:<26} [{}]{}", d.form, d.gloss, d.rule, pos);
}
if yes {
let cfg = Config::load_layered(&ProjectLayout::new(project).config_path())?;
let mut added = 0usize;
for d in &derived {
let entry = ImportEntry {
word: d.form.clone(),
pos: d.pos.clone(),
translation: d.gloss.clone(),
etymology: format!("derived from {root} via {}", d.rule),
..Default::default()
};
match add_imported_dictionary_entry(&store, &cfg, &lang_book, &entry) {
Ok(_) => added += 1,
Err(e) => eprintln!(" skipped {}: {e}", d.form),
}
}
eprintln!("\nadded {added} derived entr(y/ies) to {language}'s Dictionary");
} else {
eprintln!("\n(dry run — re-run with --yes to add the {} derived form(s))", derived.len());
}
Ok(())
}
fn gloss_text(project: &Path, language: &str, text: &str) -> Result<()> {
let (store, hierarchy, lang_book) = open_lang_book(project, language)?;
let phon = load_phonology(&store, &hierarchy, &lang_book)?.unwrap_or_default();
let morph = load_morphology(&store, &hierarchy, &lang_book)?.unwrap_or_default();
let entries = load_dictionary(&store, &hierarchy, &lang_book)?;
let index = crate::conlang::morphology::gloss::build_index(&phon, &morph, &entries);
let items = index.gloss_text(text);
if items.is_empty() {
return Ok(());
}
let mut top = String::new();
let mut bot = String::new();
let mut matched = 0usize;
for item in &items {
let g = item.gloss.clone().unwrap_or_else(|| "?".to_string());
if item.gloss.is_some() {
matched += 1;
}
let w = item.surface.chars().count();
let gw = g.chars().count();
let width = w.max(gw) + 2;
top.push_str(&format!("{:<width$}", item.surface, width = width));
bot.push_str(&format!("{:<width$}", g, width = width));
}
println!("{}", top.trim_end());
println!("{}", bot.trim_end());
eprintln!("\n{matched} / {} word(s) glossed", items.len());
Ok(())
}
fn paradigm(
project: &Path,
language: &str,
root: &str,
template: &str,
gloss: Option<&str>,
) -> Result<()> {
let (store, hierarchy, lang_book) = open_lang_book(project, language)?;
let phonology = load_phonology(&store, &hierarchy, &lang_book)?.ok_or_else(|| {
Error::Config(format!("language `{language}` has no phoneme block"))
})?;
let morph = load_morphology(&store, &hierarchy, &lang_book)?.ok_or_else(|| {
Error::Config(format!(
"language `{language}` has no morphology yet — add a `morphemes` / `paradigms` HJSON \
paragraph under its `Grammar` chapter"
))
})?;
let tmpl = morph.paradigm(template).ok_or_else(|| {
Error::Config(format!(
"language `{language}` has no paradigm template `{template}` (have: {})",
morph.paradigms.iter().map(|p| p.name.as_str()).collect::<Vec<_>>().join(", ")
))
})?;
let root_gloss = gloss.unwrap_or(root);
let rows = crate::conlang::morphology::paradigm::generate(
&phonology, &morph, tmpl, root, root_gloss,
);
println!("paradigm `{}` of {root} ({root_gloss}) · {} cell(s)", tmpl.name, rows.len());
for r in &rows {
let feats = r
.features
.iter()
.map(|(k, v)| format!("{k}={v}"))
.collect::<Vec<_>>()
.join(" ");
println!(" {:<18} {:<24} {}", r.form, r.gloss, feats);
}
Ok(())
}
fn scan_manuscript(project: &Path, language: &str, json: bool) -> Result<()> {
use std::collections::HashSet;
use unicode_segmentation::UnicodeSegmentation;
let (store, hierarchy, lang_book) = open_lang_book(project, language)?;
let phonology = load_phonology(&store, &hierarchy, &lang_book)?.ok_or_else(|| {
Error::Config(format!(
"language `{language}` has no phoneme block — the scan needs the inventory to tell \
conlang words from prose"
))
})?;
let entries = load_dictionary(&store, &hierarchy, &lang_book)?;
let known: HashSet<String> = entries
.iter()
.flat_map(|e| e.surface_forms().into_iter().map(|s| s.to_lowercase()))
.collect();
if known.is_empty() {
eprintln!("note: {language} has no dictionary entries yet — nothing anchors the scan");
}
let mut paragraphs: Vec<Vec<String>> = Vec::new();
for node in hierarchy.iter() {
if node.kind != NodeKind::Paragraph {
continue;
}
let mut cursor = Some(node.id);
let mut is_system = false;
while let Some(id) = cursor {
match hierarchy.get(id) {
Some(n) if n.system_tag.is_some() => {
is_system = true;
break;
}
Some(n) => cursor = n.parent_id,
None => break,
}
}
if is_system {
continue;
}
let Ok(Some(bytes)) = store.get_content(node.id) else { continue };
let Ok(body) = std::str::from_utf8(&bytes) else { continue };
paragraphs.push(body.unicode_words().map(String::from).collect());
}
let report = crate::conlang::lexicon::scan_undefined(&phonology, &known, ¶graphs);
if json {
println!(
"{}",
serde_json::to_string_pretty(&report)
.map_err(|e| Error::Store(format!("serializing scan: {e}")))?
);
return Ok(());
}
println!(
"scan {language} · {} paragraph(s), {} in a conlang context",
report.paragraphs_scanned, report.conlang_paragraphs
);
if report.candidates.is_empty() {
println!(" ✓ no undefined conlang words found");
return Ok(());
}
println!("\n candidate undefined words ({}):", report.candidates.len());
for c in &report.candidates {
println!(" {:<16} ×{}", c.word, c.count);
}
eprintln!("\n(heuristic — `add-word` the real ones, fix the typos)");
Ok(())
}
fn speakers(project: &Path, language: &str) -> Result<()> {
use crate::conlang::links::ConlangLinks;
let (store, _hierarchy, lang_book) = open_lang_book(project, language)?;
let links = ConlangLinks::load(store.project_root()).map_err(Error::Io)?;
let (places, characters) = links.speakers_of(&lang_book.title);
println!("speakers of {}", lang_book.title);
if places.is_empty() && characters.is_empty() {
println!(" (none linked yet — see `inkhaven language link-place` / `link-character`)");
return Ok(());
}
if !places.is_empty() {
println!("\n places ({}):", places.len());
for p in &places {
println!(" {p}");
}
}
if !characters.is_empty() {
println!("\n characters ({}):", characters.len());
for (name, level) in &characters {
println!(" {name:<20} {level}");
}
}
Ok(())
}
#[allow(clippy::too_many_arguments)]
fn query(
project: &Path,
language: &str,
register: Option<&str>,
domain: Option<&str>,
era: Option<&str>,
pos: Option<&str>,
text: Option<&str>,
json: bool,
) -> Result<()> {
let (store, hierarchy, lang_book) = open_lang_book(project, language)?;
let entries = load_dictionary(&store, &hierarchy, &lang_book)?;
let f = crate::conlang::lexicon::Filter { register, domain, era, pos, text };
let matches = crate::conlang::lexicon::filter(&entries, &f);
if json {
println!(
"{}",
serde_json::to_string_pretty(&matches)
.map_err(|e| Error::Store(format!("serializing query: {e}")))?
);
return Ok(());
}
println!("{} / {} entr(y/ies) match", matches.len(), entries.len());
for e in &matches {
let mut tags = Vec::new();
if !e.registers.is_empty() {
tags.push(format!("[{}]", e.registers.join(",")));
}
if !e.domain.is_empty() {
tags.push(format!("{{{}}}", e.domain.join(",")));
}
if let Some(era) = &e.era {
tags.push(format!("<{era}>"));
}
let pos = if e.pos.trim().is_empty() { String::new() } else { format!(" ({})", e.pos) };
println!(
" {:<16} {}{}{}",
e.word,
e.translation,
pos,
if tags.is_empty() { String::new() } else { format!(" {}", tags.join(" ")) }
);
}
Ok(())
}
#[allow(clippy::too_many_arguments)]
fn generate_lexicon(
project: &Path,
language: &str,
topic: Option<&str>,
count: usize,
era: Option<&str>,
register: Option<&str>,
provider: Option<&str>,
semantic: bool,
semantic_threshold: f32,
yes: bool,
) -> Result<()> {
use crate::conlang::generate::lexicon as lexgen;
let (store, hierarchy, lang_book) = open_lang_book(project, language)?;
let cfg = Config::load_layered(&ProjectLayout::new(project).config_path())?;
let phonology = load_phonology(&store, &hierarchy, &lang_book)?.ok_or_else(|| {
Error::Config(format!(
"language `{language}` has no phoneme block — add `phonemes` / `classes` / `templates` \
HJSON under its `Phonology` chapter first"
))
})?;
if phonology.templates_for(crate::conlang::TemplateRole::Root).is_empty() {
return Err(Error::Config(format!(
"language `{language}` declares no `root` templates — needed to generate forms"
)));
}
let existing = load_dictionary(&store, &hierarchy, &lang_book)?;
let pool = lexgen::build_pool(&phonology, &existing, count);
if pool.is_empty() {
return Err(Error::Config(
"could not generate any valid candidate forms — loosen the phonotactic constraints".into(),
));
}
let ai = crate::ai::AiClient::from_config(&cfg.llm)?;
let (model, _env) = ai.resolve_provider(&cfg.llm, provider)?;
let work_lang = if cfg.language.trim().is_empty() { "english" } else { cfg.language.trim() };
eprintln!(
"inkhaven language generate-lexicon · {language} · model: {model} · glosses in {work_lang}"
);
let prompt = build_lexgen_prompt(language, topic, count, era, register, work_lang, &pool);
let raw = crate::ai::stream::collect_blocking(
ai.client.clone(),
model.to_string(),
Some(LEXGEN_SYSTEM.to_string()),
prompt,
)
.map_err(|e| Error::Store(format!("inference error: {e}")))?;
let proposals = match lexgen::parse_proposals(&raw) {
Ok(p) => p,
Err(why) => {
eprintln!("could not parse model reply: {why}\n---- raw ----\n{raw}\n---- end ----");
return Ok(());
}
};
let (mut kept, rejected) = lexgen::dedup(&phonology, &existing, proposals);
let mut near_synonyms: Vec<(lexgen::LexProposal, f32)> = Vec::new();
if semantic && !kept.is_empty() {
let existing_glosses: Vec<&str> = existing
.iter()
.map(|e| e.translation.trim())
.filter(|g| !g.is_empty())
.collect();
let kept_glosses: Vec<&str> = kept.iter().map(|p| p.gloss.trim()).collect();
let existing_vecs = if existing_glosses.is_empty() {
Vec::new()
} else {
store.embed_batch(&existing_glosses)?
};
let kept_vecs = store.embed_batch(&kept_glosses)?;
let (sem_kept, sem_rejected) =
lexgen::semantic_filter(kept, &existing_vecs, &kept_vecs, semantic_threshold);
kept = sem_kept;
near_synonyms = sem_rejected;
}
println!(
"proposed {} entr(y/ies) for {language}{} ({} rejected by the dedup gate):",
kept.len(),
topic.map(|t| format!(" · topic: {t}")).unwrap_or_default(),
rejected.len()
);
for p in &kept {
let pos = if p.pos.trim().is_empty() { "?" } else { p.pos.trim() };
println!(" {:<16} {} ({})", p.form, p.gloss, pos);
}
if !rejected.is_empty() {
eprintln!("\nrejected:");
for (p, reason) in &rejected {
eprintln!(" {:<16} {} — {}", p.form, p.gloss, reason.as_str());
}
}
if !near_synonyms.is_empty() {
eprintln!("\nrejected (near-synonyms, cosine > {semantic_threshold:.2}):");
for (p, sim) in &near_synonyms {
eprintln!(" {:<16} {} — too close ({sim:.2})", p.form, p.gloss);
}
}
if yes {
let mut added = 0usize;
for p in &kept {
let entry = ImportEntry {
word: p.form.trim().to_string(),
pos: if p.pos.trim().is_empty() { "noun".into() } else { p.pos.trim().to_string() },
translation: p.gloss.trim().to_string(),
example: p.example.trim().to_string(),
register: p.register.trim().to_string(),
domain: p.domain.iter().map(|d| d.trim().to_string()).filter(|d| !d.is_empty()).collect(),
era: era.unwrap_or("").trim().to_string(),
..Default::default()
};
match add_imported_dictionary_entry(&store, &cfg, &lang_book, &entry) {
Ok(_) => added += 1,
Err(e) => eprintln!(" skipped {}: {e}", p.form),
}
}
eprintln!("\nadded {added} entr(y/ies) to {language}'s Dictionary");
} else {
eprintln!(
"\n(dry run — re-run with --yes to add the {} kept entr(y/ies))",
kept.len()
);
}
Ok(())
}
fn build_lexgen_prompt(
language: &str,
topic: Option<&str>,
count: usize,
era: Option<&str>,
register: Option<&str>,
work_lang: &str,
pool: &[String],
) -> String {
let domain = topic.unwrap_or("core everyday life");
let candidates = pool
.iter()
.map(|f| format!("\"{f}\""))
.collect::<Vec<_>>()
.join(", ");
let mut constraints = format!(
"Language: {language}. Produce {count} dictionary entries for the semantic domain: {domain}."
);
if let Some(e) = era {
constraints.push_str(&format!(" In-world era: {e}."));
}
if let Some(r) = register {
constraints.push_str(&format!(" Register: {r}."));
}
format!(
"{constraints}\n\n\
Pick a coherent set of {count} concepts a culture needs for this domain, then assign each \
a distinct `form` chosen ONLY from the candidate list below. Write every `gloss` and \
`example` in {work_lang}. Do not repeat a meaning. Keep `pos` a short lowercase tag. Tag \
each entry with a `register` and one or two `domain` tags appropriate to its concept.\n\n\
Candidate forms (choose from these): [{candidates}]\n\n\
Reply with the JSON object only."
)
}
fn tone_sandhi(project: &Path, language: &str, tones: &str) -> Result<()> {
let (_store, phonology) = open_phonology(project, language)?;
let system = phonology.tone.as_ref().ok_or_else(|| {
Error::Config(format!(
"language `{language}` declares no `tone` system in its Phonology block"
))
})?;
let input: Vec<String> = tones.split_whitespace().map(String::from).collect();
let surface = crate::conlang::phonology::tone_eval::apply_sandhi(system, &input);
println!("{}", surface.join(" "));
Ok(())
}
fn romanize_text(
project: &Path,
language: &str,
text: &str,
scheme: Option<&str>,
reverse: bool,
) -> Result<()> {
use crate::conlang::phonology::romanize;
let (_store, phonology) = open_phonology(project, language)?;
let scheme_ref = phonology.scheme(scheme).ok_or_else(|| {
Error::Config(match scheme {
Some(s) => format!("language `{language}` has no romanization scheme `{s}`"),
None => format!(
"language `{language}` declares no romanization schemes — add a `romanizations` \
block to its Phonology, or rely on the per-phoneme `romanize` field"
),
})
})?;
if reverse {
let seq = romanize::deromanize(scheme_ref, &phonology, text);
println!("/{}/", seq.join(""));
} else {
let seq: Vec<String> = text.split_whitespace().map(String::from).collect();
println!("{}", romanize::romanize(scheme_ref, &phonology, &seq));
}
Ok(())
}
fn stress_word(project: &Path, language: &str, word: &str) -> Result<()> {
use crate::conlang::phonology::{stress_eval, syllable};
let (_store, phonology) = open_phonology(project, language)?;
let rule = phonology.stress.clone().ok_or_else(|| {
Error::Config(format!(
"language `{language}` declares no `stress` rule in its Phonology block \
(e.g. `stress: \"penultimate\"`)"
))
})?;
let seq = phonology.segment(word);
let sylls = syllable::syllabify(&phonology, &seq);
let stressed = stress_eval::primary_stress(&rule, &sylls);
let g = |ipa: &String| {
phonology
.phoneme(ipa)
.map(|p| p.grapheme().to_string())
.unwrap_or_else(|| ipa.clone())
};
let out = sylls
.iter()
.enumerate()
.map(|(i, s)| {
let body: String = s.onset.iter().chain(&s.nucleus).chain(&s.coda).map(&g).collect();
if Some(i) == stressed {
format!("ˈ{body}")
} else {
body
}
})
.collect::<Vec<_>>()
.join(".");
println!("{out}");
Ok(())
}
fn ipa_surface(project: &Path, language: &str, word: &str) -> Result<()> {
let (_store, phonology) = open_phonology(project, language)?;
let underlying = phonology.segment(word);
let surface = crate::conlang::phonology::allophony_eval::surface_form(&phonology, &underlying);
let render_ipa = |seq: &[String]| seq.join("");
let render_roman = |seq: &[String]| -> String {
seq.iter()
.map(|ipa| {
phonology
.phoneme(ipa)
.map(|p| p.grapheme().to_string())
.unwrap_or_else(|| ipa.clone())
})
.collect()
};
println!("underlying /{}/", render_ipa(&underlying));
println!("surface [{}]", render_ipa(&surface));
println!("romanized {}", render_roman(&surface));
Ok(())
}
fn syllabify_word(project: &Path, language: &str, word: &str) -> Result<()> {
let (_store, phonology) = open_phonology(project, language)?;
let seq = phonology.segment(word);
let sylls = crate::conlang::phonology::syllable::syllabify(&phonology, &seq);
println!("{}", crate::conlang::phonology::syllable::render(&phonology, &sylls));
eprintln!(
"{} → {} syllable(s), {} phoneme(s)",
word,
sylls.len(),
seq.len()
);
Ok(())
}
fn generate_word(project: &Path, language: &str, role: &str, count: usize) -> Result<()> {
let role = crate::conlang::TemplateRole::parse(role).ok_or_else(|| {
Error::Config(format!(
"unknown role `{role}` — use root | prefix | suffix | infix | circumfix | compound"
))
})?;
let (_store, phonology) = open_phonology(project, language)?;
if phonology.templates_for(role).is_empty() {
return Err(Error::Config(format!(
"language `{language}` declares no `{}` templates in its Phonology block",
role.as_str()
)));
}
let words = crate::conlang::generate::word::generate_words(&phonology, role, count);
if words.is_empty() {
eprintln!(
"no words satisfied the constraints in {} attempts — loosen the phonotactic constraints",
count
);
return Ok(());
}
for w in &words {
println!("{w}");
}
eprintln!(
"generated {} / {} requested `{}` word(s) for {}",
words.len(),
count,
role.as_str(),
language
);
Ok(())
}
fn open_lang_book(
project: &Path,
language: &str,
) -> Result<(Store, Hierarchy, crate::store::node::Node)> {
let layout = ProjectLayout::new(project);
layout.require_initialized()?;
let cfg = Config::load_layered(&layout.config_path())?;
let store = Store::open(layout, &cfg)?;
let hierarchy = Hierarchy::load(&store)?;
let lang_root = hierarchy
.iter()
.find(|n| {
n.kind == NodeKind::Book && n.system_tag.as_deref() == Some(SYSTEM_TAG_LANGUAGES)
})
.ok_or_else(|| {
Error::Store("Language system book missing — re-open the project to seed it".into())
})?
.clone();
let lang_book = hierarchy
.children_of(Some(lang_root.id))
.into_iter()
.find(|n| n.kind == NodeKind::Book && n.title.eq_ignore_ascii_case(language))
.cloned()
.ok_or_else(|| {
Error::Config(format!(
"language `{language}` not found — run `inkhaven language init {language}` first"
))
})?;
Ok((store, hierarchy, lang_book))
}
fn open_phonology(project: &Path, language: &str) -> Result<(Store, crate::conlang::Phonology)> {
let (store, hierarchy, lang_book) = open_lang_book(project, language)?;
let phonology = load_phonology(&store, &hierarchy, &lang_book)?.ok_or_else(|| {
Error::Config(format!(
"language `{language}` has no phoneme block yet — add `phonemes` / `classes` / \
`templates` HJSON under its `Phonology` chapter (see Documentation/PROPOSALS/LANG-1_PLAN.md)"
))
})?;
Ok((store, phonology))
}
fn load_dictionary(
store: &Store,
hierarchy: &Hierarchy,
lang_book: &crate::store::node::Node,
) -> Result<Vec<crate::language_entry::DictionaryEntry>> {
let Some(chapter) = hierarchy
.children_of(Some(lang_book.id))
.into_iter()
.find(|n| n.kind == NodeKind::Chapter && n.title.eq_ignore_ascii_case("Dictionary"))
.cloned()
else {
return Ok(Vec::new());
};
let mut out = Vec::new();
for id in hierarchy.collect_subtree(chapter.id) {
let Some(node) = hierarchy.get(id) else { continue };
if node.kind != NodeKind::Paragraph {
continue;
}
let Ok(Some(bytes)) = store.get_content(node.id) else { continue };
let body = String::from_utf8_lossy(&bytes);
if let Ok(Some(entry)) = crate::language_entry::parse(&body) {
out.push(entry);
}
}
Ok(out)
}
fn audit(project: &Path, language: &str, json: bool) -> Result<()> {
let (store, hierarchy, lang_book) = open_lang_book(project, language)?;
let phonology = load_phonology(&store, &hierarchy, &lang_book)?.unwrap_or_default();
let entries = load_dictionary(&store, &hierarchy, &lang_book)?;
let report = crate::conlang::lexicon::analyze(&phonology, &entries);
if json {
println!("{}", serde_json::to_string_pretty(&report).map_err(|e| {
Error::Store(format!("serializing lexicon report: {e}"))
})?);
return Ok(());
}
println!("lexicon audit · {language} · {} entr(y/ies)", report.total);
if report.issue_count() == 0 {
println!(" ✓ no issues");
return Ok(());
}
if !report.phonotactic_violations.is_empty() {
println!("\n ⚠ phonotactic violations ({}):", report.phonotactic_violations.len());
for v in &report.phonotactic_violations {
println!(" {} (/{}/) breaks the language's constraints", v.headword, v.underlying);
}
}
if !report.homophones.is_empty() {
println!("\n ⚠ homophones ({} group(s)):", report.homophones.len());
for c in &report.homophones {
let m = c.members.iter().map(|m| format!("{} ({})", m.headword, m.gloss)).collect::<Vec<_>>();
println!(" [{}] {}", c.key, m.join(", "));
}
}
if !report.duplicate_meanings.is_empty() {
println!("\n ⚠ duplicate meanings ({} group(s)):", report.duplicate_meanings.len());
for c in &report.duplicate_meanings {
let m = c.members.iter().map(|m| m.headword.clone()).collect::<Vec<_>>();
println!(" \"{}\" — {}", c.key, m.join(", "));
}
}
Ok(())
}
fn load_phonology(
store: &Store,
hierarchy: &Hierarchy,
lang_book: &crate::store::node::Node,
) -> Result<Option<crate::conlang::Phonology>> {
let Some(chapter) = hierarchy
.children_of(Some(lang_book.id))
.into_iter()
.find(|n| n.kind == NodeKind::Chapter && n.title.eq_ignore_ascii_case("Phonology"))
.cloned()
else {
return Ok(None);
};
for para in hierarchy.children_of(Some(chapter.id)) {
if para.kind != NodeKind::Paragraph {
continue;
}
let Some(bytes) = store.get_content(para.id)? else {
continue;
};
let body = String::from_utf8_lossy(&bytes);
match crate::conlang::Phonology::from_hjson(&body) {
Ok(Some(p)) if !p.phonemes.is_empty() => return Ok(Some(p)),
Ok(_) => continue,
Err(e) => return Err(Error::Config(e)),
}
}
Ok(None)
}
const STANDARD_CHAPTERS: &[&str] = &[
"Meta",
"Dictionary",
"Grammar",
"Phonology",
"Sample texts",
];
const META_OVERVIEW_BODY: &str = "{
// ──────────────────────────────────────────────────
// IDENTITY
// ──────────────────────────────────────────────────
// Display name for the language.
name: \"\"
// Sibling languages (e.g. Elvish, Romance, Slavic).
// Phase D.2 will use this for cross-language family
// browsing in the sidebar.
family: \"\"
// \"constructed\" | \"natural\" — drives default
// assumptions in the AI translator. Constructed
// languages get stricter adherence to the explicit
// rules below; natural languages let the LLM lean
// more on its pretraining.
language_kind: constructed
// Optional ISO 639-3 code (e.g. \"qya\" for Quenya).
// Used by the multilingual prompt resolver when
// mixing this language with the project's working
// language flow.
iso_code: \"\"
// ──────────────────────────────────────────────────
// ORTHOGRAPHY
// ──────────────────────────────────────────────────
// Alphabet entries in canonical order. For non-
// Latin orthographies, override with the author's
// declared groupings:
// * paired-case Latin: [\"Aa\", \"Bb\", \"Cc\"]
// * Hebrew letter names: [\"Aleph\", \"Beth\", \"Gimel\"]
// * Greek: [\"Α\", \"Β\", \"Γ\"]
// * Cyrillic: [\"А\", \"Б\", \"В\"]
// * Polish digraphs: [\"A\", \"Cz\", \"Dz\", \"Sz\"]
// Drives Dictionary bucket auto-creation in
// `inkhaven language add-word` and the in-TUI `+`
// chord.
alphabet: [\"A\", \"B\", \"C\", \"D\", \"E\", \"F\", \"G\", \"H\", \"I\",
\"J\", \"K\", \"L\", \"M\", \"N\", \"O\", \"P\", \"Q\", \"R\",
\"S\", \"T\", \"U\", \"V\", \"W\", \"X\", \"Y\", \"Z\"]
// \"ltr\" (default) | \"rtl\" | \"ttb\" (top-to-bottom)
reading_direction: ltr
// Script / writing system name (Latin, Cyrillic,
// Tengwar, Devanagari, …). Free-form; informational.
script: \"\"
// ──────────────────────────────────────────────────
// LINGUISTIC SHAPE — quick-reference summary the
// AI translator reads before composing prompts.
// ──────────────────────────────────────────────────
// Word order: SVO | SOV | VSO | VOS | OSV | OVS | free
word_order: \"\"
// Morphological type: isolating | agglutinative |
// fusional | polysynthetic | mixed
morphology: \"\"
// Tonal: true | false (informational only).
tonal: false
// Has grammatical case (declension)?
has_cases: false
// Has grammatical gender?
has_gender: false
// ──────────────────────────────────────────────────
// RUNTIME / TOOLING
// ──────────────────────────────────────────────────
// Optional Snowball stemmer algo name (\"english\",
// \"russian\", \"french\", \"spanish\", \"german\").
// Rare for conlangs — leave empty to let the
// lexicon overlay rely on the dictionary
// `inflection` paradigm fields instead.
stemmer: \"\"
// Free-form citation for the canonical sample
// corpus the LLM should treat as authoritative
// (Tolkien's Etymologies, Klingon Dictionary, etc.).
example_corpus_ref: \"\"
// ──────────────────────────────────────────────────
// NOTES
// ──────────────────────────────────────────────────
// Worldbuilding context — who speaks the language,
// where, in what era, what register. Read by the
// human author; the LLM only consumes the
// structured fields above when composing
// translation prompts.
notes: \"\"
}
";
fn init(project: &Path, name: &str) -> Result<()> {
let layout = ProjectLayout::new(project);
layout.require_initialized()?;
let cfg = Config::load_layered(&layout.config_path())?;
let store = Store::open(layout, &cfg)?;
let hierarchy = Hierarchy::load(&store)?;
let lang_book = hierarchy
.iter()
.find(|n| {
n.kind == NodeKind::Book
&& n.system_tag.as_deref() == Some(SYSTEM_TAG_LANGUAGES)
})
.cloned()
.ok_or_else(|| {
Error::Store(
"Language system book missing — re-open the project to seed it"
.into(),
)
})?;
if hierarchy
.children_of(Some(lang_book.id))
.iter()
.any(|n| n.title.eq_ignore_ascii_case(name))
{
return Err(Error::Config(format!(
"language `{name}` already exists under Language"
)));
}
let hierarchy = Hierarchy::load(&store)?;
let per_lang = store.create_node(
&cfg,
&hierarchy,
NodeKind::Book,
name,
Some(&lang_book),
None,
InsertPosition::End,
)?;
eprintln!(
"created language book `{name}` at {}",
hierarchy.slug_path(&per_lang),
);
scaffold_language_chapters(&store, &cfg, &per_lang, |chapter_title| {
eprintln!(" · {chapter_title}");
})?;
eprintln!("\nNext steps:");
eprintln!(
" · edit `Language/{name}/Meta/overview` to set the alphabet + metadata"
);
eprintln!(
" · add dictionary entries under `Language/{name}/Dictionary` (`inkhaven language add-word`)"
);
eprintln!(
" · add grammar rules under `Language/{name}/Grammar` for the AI translation flow"
);
Ok(())
}
pub(crate) fn scaffold_language_chapters(
store: &Store,
cfg: &Config,
per_lang: &crate::store::node::Node,
mut on_chapter: impl FnMut(&str),
) -> Result<()> {
for title in STANDARD_CHAPTERS {
let hierarchy = Hierarchy::load(store)?;
let chapter = store.create_node(
cfg,
&hierarchy,
NodeKind::Chapter,
title,
Some(per_lang),
None,
InsertPosition::End,
)?;
on_chapter(title);
if *title == "Meta" {
let hierarchy = Hierarchy::load(store)?;
let mut overview = store.create_node(
cfg,
&hierarchy,
NodeKind::Paragraph,
"overview",
Some(&chapter),
None,
InsertPosition::End,
)?;
overview.content_type = Some("hjson".to_string());
if let Some(rel) = &overview.file {
let abs = store.project_root().join(rel);
std::fs::write(&abs, META_OVERVIEW_BODY.as_bytes())
.map_err(|e| Error::Store(format!("write overview: {e}")))?;
}
store
.update_paragraph_content(&mut overview, META_OVERVIEW_BODY.as_bytes())
.map_err(|e| Error::Store(format!("seed overview: {e}")))?;
}
}
Ok(())
}
fn add_word(
project: &Path,
language: &str,
word: &str,
pos: &str,
translation: &str,
example: Option<&str>,
) -> Result<()> {
let layout = ProjectLayout::new(project);
layout.require_initialized()?;
let cfg = Config::load_layered(&layout.config_path())?;
let store = Store::open(layout, &cfg)?;
let hierarchy = Hierarchy::load(&store)?;
let lang_root = hierarchy
.iter()
.find(|n| {
n.kind == NodeKind::Book
&& n.system_tag.as_deref() == Some(SYSTEM_TAG_LANGUAGES)
})
.ok_or_else(|| {
Error::Store(
"Language system book missing — re-open the project to seed it"
.into(),
)
})?
.clone();
let lang_book = hierarchy
.children_of(Some(lang_root.id))
.into_iter()
.find(|n| {
n.kind == NodeKind::Book && n.title.eq_ignore_ascii_case(language)
})
.cloned()
.ok_or_else(|| {
Error::Config(format!(
"language `{language}` not found — run `inkhaven language init {language}` first"
))
})?;
let (entry, bucket) = add_dictionary_entry_impl(
&store,
&cfg,
&lang_book,
word,
pos,
translation,
example,
)?;
let _ = entry;
eprintln!(
"added `{word}` to `{language}/Dictionary/{bucket}` ({pos} · {translation})"
);
Ok(())
}
pub(crate) fn add_dictionary_entry_impl(
store: &Store,
cfg: &Config,
lang_book: &crate::store::node::Node,
word: &str,
pos: &str,
translation: &str,
example: Option<&str>,
) -> Result<(crate::store::node::Node, String)> {
let body = seed_dictionary_entry_body(word, pos, translation, example);
create_dictionary_entry(store, cfg, lang_book, word, &body)
}
#[derive(Debug, Default, Clone)]
pub(crate) struct ImportEntry {
pub word: String,
pub pos: String,
pub translation: String,
pub example: String,
pub pronunciation: String,
pub etymology: String,
pub related: Vec<String>,
pub inflection: std::collections::BTreeMap<String, String>,
pub examples: Vec<String>,
pub register: String,
pub era: String,
pub notes: String,
pub domain: Vec<String>,
}
pub(crate) fn add_imported_dictionary_entry(
store: &Store,
cfg: &Config,
lang_book: &crate::store::node::Node,
entry: &ImportEntry,
) -> Result<(crate::store::node::Node, String)> {
let body = build_imported_entry_body(entry);
create_dictionary_entry(store, cfg, lang_book, &entry.word, &body)
}
fn create_dictionary_entry(
store: &Store,
cfg: &Config,
lang_book: &crate::store::node::Node,
word: &str,
body: &str,
) -> Result<(crate::store::node::Node, String)> {
let hierarchy = Hierarchy::load(store)?;
let dictionary = hierarchy
.children_of(Some(lang_book.id))
.into_iter()
.find(|n| {
n.kind == NodeKind::Chapter && n.title.eq_ignore_ascii_case("Dictionary")
})
.cloned()
.ok_or_else(|| {
Error::Config(format!(
"language `{}` has no `Dictionary` chapter — likely scaffolded with a pre-Phase-A inkhaven",
lang_book.title
))
})?;
let bucket = derive_alphabet_bucket(store, &hierarchy, lang_book, word)?
.or_else(|| alphabet_bucket(word))
.ok_or_else(|| {
Error::Config(format!("could not derive alphabet bucket from `{word}`"))
})?;
let dictionary_kids = hierarchy.children_of(Some(dictionary.id));
let subchapter = match dictionary_kids
.iter()
.find(|n| n.kind == NodeKind::Subchapter && n.title == bucket)
.cloned()
{
Some(existing) => existing.clone(),
None => {
let hierarchy = Hierarchy::load(store)?;
store.create_node(
cfg,
&hierarchy,
NodeKind::Subchapter,
&bucket,
Some(&dictionary),
None,
InsertPosition::End,
)?
}
};
let hierarchy = Hierarchy::load(store)?;
if hierarchy
.children_of(Some(subchapter.id))
.iter()
.any(|n| n.title.eq_ignore_ascii_case(word))
{
return Err(Error::Config(format!(
"word `{word}` already defined under `{}/Dictionary/{bucket}`",
lang_book.title
)));
}
let hierarchy = Hierarchy::load(store)?;
let mut entry = store.create_node(
cfg,
&hierarchy,
NodeKind::Paragraph,
word,
Some(&subchapter),
None,
InsertPosition::End,
)?;
entry.content_type = Some("hjson".to_string());
if let Some(rel) = &entry.file {
let abs = store.project_root().join(rel);
std::fs::write(&abs, body.as_bytes())
.map_err(|e| Error::Store(format!("write entry: {e}")))?;
}
store
.update_paragraph_content(&mut entry, body.as_bytes())
.map_err(|e| Error::Store(format!("seed entry: {e}")))?;
Ok((entry, bucket))
}
fn build_imported_entry_body(entry: &ImportEntry) -> String {
let mut out = String::from("{\n");
out.push_str(&format!(" word: \"{}\"\n", escape_hjson(&entry.word)));
out.push_str(&format!(" type: \"{}\"\n", escape_hjson(&entry.pos)));
out.push_str(&format!(
" translation: \"{}\"\n",
escape_hjson(&entry.translation)
));
if !entry.example.is_empty() {
out.push_str(&format!(
" example: \"{}\"\n",
escape_hjson(&entry.example)
));
}
if !entry.examples.is_empty() {
out.push_str(" examples: [\n");
for ex in &entry.examples {
out.push_str(&format!(" \"{}\"\n", escape_hjson(ex)));
}
out.push_str(" ]\n");
}
if !entry.pronunciation.is_empty() {
out.push_str(&format!(
" pronunciation: \"{}\"\n",
escape_hjson(&entry.pronunciation)
));
}
if !entry.etymology.is_empty() {
out.push_str(&format!(
" etymology: \"{}\"\n",
escape_hjson(&entry.etymology)
));
}
if !entry.related.is_empty() {
let items: Vec<String> = entry
.related
.iter()
.map(|r| format!("\"{}\"", escape_hjson(r)))
.collect();
out.push_str(&format!(" related: [{}]\n", items.join(", ")));
}
if !entry.inflection.is_empty() {
out.push_str(" inflection: {\n");
for (k, v) in &entry.inflection {
out.push_str(&format!(
" {}: \"{}\"\n",
k,
escape_hjson(v)
));
}
out.push_str(" }\n");
}
if !entry.register.is_empty() {
out.push_str(&format!(
" register: \"{}\"\n",
escape_hjson(&entry.register)
));
}
if !entry.era.is_empty() {
out.push_str(&format!(" era: \"{}\"\n", escape_hjson(&entry.era)));
}
if !entry.notes.is_empty() {
out.push_str(&format!(
" notes: \"{}\"\n",
escape_hjson(&entry.notes)
));
}
if !entry.domain.is_empty() {
let items = entry
.domain
.iter()
.map(|d| format!("\"{}\"", escape_hjson(d)))
.collect::<Vec<_>>()
.join(", ");
out.push_str(&format!(" domain: [{items}]\n"));
}
out.push_str("}\n");
out
}
pub(crate) const GRAMMAR_RULE_SEED_BODY: &str = "{
// ──────────────────────────────────────────────────
// IDENTITY
// ──────────────────────────────────────────────────
// Identifier the AI translation prompt references
// in applied-rules lists. Lowercase + hyphens.
// Example: \"noun-case-system\",
// \"verb-tense-aspect\", \"reduplication\".
rule_id: \"\"
// Human-readable title for the rule card renderer.
title: \"\"
// Category — drives Phase D.2 grammar export
// sectioning AND the in-prompt grouping.
// morphology — word-formation, inflection
// syntax — clause structure, word order
// phonology — sound rules
// orthography — spelling conventions
// semantics — meaning relationships
// pragmatics — usage / discourse rules
category: \"\"
// ──────────────────────────────────────────────────
// RULE BODY — read by both the LLM and the human.
// Plain text inside an HJSON multi-line string;
// tabular layouts work fine.
// ──────────────────────────────────────────────────
rule: '''
Describe the rule here. This text is fed
verbatim to the AI translator at translation
time, so be explicit:
* State the input → output transformation.
* Show the morpheme boundaries (- or .).
* Show ALL exceptions inline so the LLM
doesn't have to guess.
Example layout for a case system:
NOM: zero suffix. aran (king)
ACC: -n. aran → aranin
DAT: -en. aran → aranen
GEN: -o. aran → arano
'''
// ──────────────────────────────────────────────────
// FEW-SHOT EXAMPLES — bundled into the translation
// prompt envelope so the LLM sees the rule applied.
// ──────────────────────────────────────────────────
examples: [
// { source: \"\", target: \"\", gloss: \"\" }
// { source: \"\", target: \"\", gloss: \"\" }
]
// ──────────────────────────────────────────────────
// RAG TRIGGERING — when this rule should be
// included in the translation prompt envelope.
// ──────────────────────────────────────────────────
// Plain-language condition the LLM evaluates
// against the source sentence. Tight applies_when
// keeps the prompt focused (Phase C envelope
// includes only matching rules; default cap is 6).
applies_when: \"\"
// Sibling rules this one builds on, by rule_id.
// The RAG layer pulls dependent rules
// automatically. Example: a verb-conjugation
// rule depends on the stem-formation rule.
depends_on: []
// Rules that conflict with this one — only one
// should fire per translation pass. Phase D.2
// `language doctor` will surface conflicting
// pairs that lack an `applies_when` disambiguator.
conflicts_with: []
// ──────────────────────────────────────────────────
// METADATA / NOTES
// ──────────────────────────────────────────────────
// Productivity — how broadly the rule applies.
// \"core\" — fires on most sentences
// \"common\" — fires on a recognisable
// subset of constructions
// \"specialised\" — narrow / register-bound
// \"vestigial\" — historical residue only
productivity: \"\"
// Register / style restrictions, if any:
// formal | informal | literary | sacred | archaic.
register: \"\"
// Author's notes — historical motivation,
// worldbuilding rationale, comparison to natural-
// language analogues. Not read by the LLM.
notes: \"\"
}
";
pub(crate) const PHONOLOGY_RULE_SEED_BODY: &str = "{
// ──────────────────────────────────────────────────
// IDENTITY
// ──────────────────────────────────────────────────
// Identifier — lowercase + hyphens. Referenced by
// grammar rules' `depends_on` field and by the
// phonotactic generator (`Ctrl+B Shift+W` in the
// Language book — Phase D.2).
// Examples: \"consonant-inventory\",
// \"vowel-harmony\", \"syllable-template\",
// \"intervocalic-voicing\".
rule_id: \"\"
// Human-readable title for the rule card renderer.
title: \"\"
// Category — drives Phase D.2 phonology export
// sectioning AND the phonotactic generator's
// weighting.
// consonants — IPA inventory of consonants
// vowels — IPA inventory of vowels
// phonotactics — allowed onset / nucleus / coda
// syllable — syllable template (CV, CVC, …)
// stress — stress placement rule
// tone — tonal system / pitch rules
// sound-changes — historical or allophonic shifts
// prosody — intonation / rhythm patterns
category: \"\"
// ──────────────────────────────────────────────────
// RULE BODY — read by both the LLM and the human.
// ──────────────────────────────────────────────────
rule: '''
Describe the rule here. Use IPA inside
/slashes/ for phonemic and [brackets] for
phonetic.
Example layouts:
Phonotactic template:
ONSET: zero | C | CC (only stop+liquid)
NUCLEUS: V | VV (long vowels)
CODA: zero | C | CC (limited to /s, n, r, l/)
Sound change:
/s/ → [z] / V_V (intervocalic voicing)
Vowel harmony:
Front vowels {i, e} co-occur in roots;
back vowels {a, o, u} co-occur in roots;
suffixes harmonise with the root.
'''
// ──────────────────────────────────────────────────
// INVENTORIES — for consonants / vowels categories.
// ──────────────────────────────────────────────────
// List of phonemes (IPA strings). Optional; used
// by the phonotactic generator to constrain output.
// phonemes: []
// Allophonic variants by environment. Map of
// phoneme → list of (environment, realisation).
// allophones: {}
// ──────────────────────────────────────────────────
// ENVIRONMENT — for sound-changes / allophony.
// ──────────────────────────────────────────────────
// Where the rule applies (LLM evaluates against the
// source's phonetic context).
// environment: \"\"
// ──────────────────────────────────────────────────
// EXAMPLES — IPA pairs showing the rule in action.
// ──────────────────────────────────────────────────
examples: [
// { input: \"\", output: \"\", gloss: \"\" }
]
// Known exceptions — words / morphemes where the
// rule does NOT apply.
exceptions: []
// ──────────────────────────────────────────────────
// NOTES
// ──────────────────────────────────────────────────
// Register / style restrictions, if any.
register: \"\"
// Author's notes — historical motivation, source
// dialect, comparison to natural-language analogues.
notes: \"\"
}
";
fn alphabet_bucket(word: &str) -> Option<String> {
let ch = word.chars().find(|c| !c.is_whitespace())?;
Some(ch.to_uppercase().to_string())
}
fn derive_alphabet_bucket(
store: &Store,
hierarchy: &Hierarchy,
lang_book: &crate::store::node::Node,
word: &str,
) -> Result<Option<String>> {
let Some(meta_chapter) = hierarchy
.children_of(Some(lang_book.id))
.into_iter()
.find(|n| {
n.kind == NodeKind::Chapter && n.title.eq_ignore_ascii_case("Meta")
})
.cloned()
else {
return Ok(None);
};
let Some(overview) = hierarchy
.children_of(Some(meta_chapter.id))
.into_iter()
.find(|n| {
n.kind == NodeKind::Paragraph && n.title.eq_ignore_ascii_case("overview")
})
.cloned()
else {
return Ok(None);
};
let Some(bytes) = store.get_content(overview.id)? else {
return Ok(None);
};
let body = std::str::from_utf8(&bytes).map_err(|e| {
Error::Config(format!("Meta/overview body is not UTF-8: {e}"))
})?;
let meta = match crate::language_entry::parse_meta_overview(body)
.map_err(Error::Config)?
{
Some(m) => m,
None => return Ok(None),
};
Ok(meta.bucket_for_word(word).map(|s| s.to_string()))
}
fn seed_dictionary_entry_body(
word: &str,
pos: &str,
translation: &str,
example: Option<&str>,
) -> String {
let example_value = example.unwrap_or("").trim();
format!(
"{{\n \
// ──────────────────────────────────────────────────\n \
// CORE — required for the entry to function as a\n \
// lexicon-overlay target + translation-prompt source.\n \
// ──────────────────────────────────────────────────\n \
\n \
word: \"{word}\"\n \
\n \
// Part of speech. Free-form string; the\n \
// proposal suggests: noun | verb | adjective |\n \
// adverb | pronoun | preposition | conjunction |\n \
// interjection | particle. Language-specific\n \
// categories (\"classifier\", \"evidential\",\n \
// \"applicative\") are fine.\n \
type: \"{pos}\"\n \
\n \
// Working-language gloss — what this word\n \
// means in the project's `language` (the value\n \
// the AI translator maps to/from).\n \
translation: \"{translation}\"\n \
\n \
// Canonical sample sentence the author wants\n \
// frozen into the entry. Becomes few-shot\n \
// anchor data in the translation prompt.\n \
example: \"{example}\"\n \
\n \
// ──────────────────────────────────────────────────\n \
// OPTIONAL — uncomment and fill the ones you need.\n \
// Each is consumed by either the translation\n \
// prompt envelope (Phase C) or the future\n \
// dictionary card renderer (Phase D.2).\n \
// ──────────────────────────────────────────────────\n \
\n \
// Additional example sentences beyond the\n \
// canonical one. Phase C translation flow\n \
// uses every example as few-shot data.\n \
// examples: [\n \
// \"\"\n \
// \"\"\n \
// ]\n \
\n \
// IPA transcription (between slashes for\n \
// phonemic, brackets for phonetic).\n \
// pronunciation: \"\"\n \
\n \
// Etymology / derivation. Plain text or\n \
// [[wikilink]] style cross-reference to a\n \
// proto-form entry.\n \
// etymology: \"\"\n \
\n \
// Cross-references to sibling entries — other\n \
// words in this language that share roots,\n \
// contrast in register, or commonly co-occur.\n \
// related: []\n \
\n \
// Paradigm forms. Every VALUE here gets\n \
// added to the lexicon overlay so inflected\n \
// words light up in prose alongside the\n \
// lemma. KEY names are free-form and feed\n \
// the translation prompt as paradigm hints.\n \
// inflection: {{\n \
// plural: \"\"\n \
// genitive: \"\"\n \
// accusative: \"\"\n \
// dative: \"\"\n \
// ablative: \"\"\n \
// }}\n \
\n \
// Register / style: formal | informal |\n \
// archaic | literary | colloquial | sacred.\n \
// register: \"\"\n \
\n \
// Era — when the word entered the language.\n \
// Useful for historical-fiction projects.\n \
// era: \"\"\n \
\n \
// Auto-tracked count of mentions in the\n \
// manuscript. Phase D.2 `language doctor`\n \
// updates this; leave 0 for now.\n \
// frequency: 0\n \
\n \
// Free-form usage notes — register cues,\n \
// taboos, mnemonic etymology, whatever\n \
// helps you remember the word.\n \
notes: \"\"\n\
}}\n",
word = escape_hjson(word),
pos = escape_hjson(pos),
translation = escape_hjson(translation),
example = escape_hjson(example_value),
)
}
fn escape_hjson(s: &str) -> String {
s.replace('\\', "\\\\").replace('"', "\\\"")
}
fn doctor(project: &Path, language: &str, json: bool) -> Result<()> {
use crate::store::node::NodeKind;
let layout = ProjectLayout::new(project);
layout.require_initialized()?;
let cfg = Config::load_layered(&layout.config_path())?;
let store = Store::open(layout, &cfg)?;
let hierarchy = Hierarchy::load(&store)?;
let lang_root = hierarchy
.iter()
.find(|n| {
n.kind == NodeKind::Book
&& n.system_tag.as_deref() == Some(SYSTEM_TAG_LANGUAGES)
})
.cloned()
.ok_or_else(|| {
Error::Store(
"Language system book missing — re-open the project to seed it".into(),
)
})?;
let lang_book = hierarchy
.children_of(Some(lang_root.id))
.into_iter()
.find(|n| {
n.kind == NodeKind::Book && n.title.eq_ignore_ascii_case(language)
})
.cloned()
.ok_or_else(|| {
Error::Config(format!(
"language `{language}` not found — run `inkhaven language init {language}` first"
))
})?;
let chapters = hierarchy.children_of(Some(lang_book.id));
let mut dict_entries: Vec<(String, crate::language_entry::DictionaryEntry)> =
Vec::new();
let mut dict_unparseable = 0usize;
let mut grammar_count = 0usize;
let mut phonology_count = 0usize;
let mut sample_count = 0usize;
let mut meta: Option<crate::language_entry::MetaOverview> = None;
for chapter in &chapters {
let title_lc = chapter.title.to_lowercase();
let paragraphs: Vec<_> = hierarchy
.collect_subtree(chapter.id)
.into_iter()
.filter_map(|id| hierarchy.get(id))
.filter(|n| n.kind == NodeKind::Paragraph)
.cloned()
.collect();
match title_lc.as_str() {
"dictionary" => {
for p in ¶graphs {
let Ok(Some(bytes)) = store.get_content(p.id) else {
continue;
};
let Ok(body) = std::str::from_utf8(&bytes) else {
continue;
};
match crate::language_entry::parse(body) {
Ok(Some(e)) => dict_entries.push((p.title.clone(), e)),
Ok(None) => dict_unparseable += 1,
Err(_) => dict_unparseable += 1,
}
}
}
"grammar" => grammar_count = paragraphs.len(),
"phonology" => phonology_count = paragraphs.len(),
"sample texts" => sample_count = paragraphs.len(),
"meta" => {
for p in ¶graphs {
if p.title.eq_ignore_ascii_case("overview") {
let Ok(Some(bytes)) = store.get_content(p.id) else {
continue;
};
if let Ok(body) = std::str::from_utf8(&bytes) {
if let Ok(Some(m)) =
crate::language_entry::parse_meta_overview(body)
{
meta = Some(m);
}
}
}
}
}
_ => {}
}
}
let total_entries = dict_entries.len();
let with_examples = dict_entries
.iter()
.filter(|(_, e)| !e.example.trim().is_empty())
.count();
let with_inflection = dict_entries
.iter()
.filter(|(_, e)| !e.inflection.is_empty())
.count();
let missing_examples = total_entries.saturating_sub(with_examples);
let missing_inflection = total_entries.saturating_sub(with_inflection);
use unicode_segmentation::UnicodeSegmentation;
let dictionary_translations: std::collections::HashSet<String> = dict_entries
.iter()
.filter_map(|(_, e)| {
let t = e.translation.trim().to_lowercase();
if t.is_empty() { None } else { Some(t) }
})
.collect();
let mut manuscript_words: std::collections::HashSet<String> =
std::collections::HashSet::new();
for node in hierarchy.iter() {
if node.kind != NodeKind::Paragraph {
continue;
}
let mut cursor = Some(node.id);
let mut is_system = false;
while let Some(id) = cursor {
if let Some(n) = hierarchy.get(id) {
if n.system_tag.is_some() {
is_system = true;
break;
}
cursor = n.parent_id;
} else {
break;
}
}
if is_system {
continue;
}
if let Ok(Some(bytes)) = store.get_content(node.id) {
if let Ok(body) = std::str::from_utf8(&bytes) {
for w in UnicodeSegmentation::unicode_words(body) {
let lc = w.to_lowercase();
if lc.chars().count() < 2 {
continue;
}
manuscript_words.insert(lc);
}
}
}
}
let manuscript_word_count = manuscript_words.len();
let undefined_words: Vec<String> = manuscript_words
.difference(&dictionary_translations)
.cloned()
.collect();
if json {
use serde_json::{json, Map, Value};
let mut sorted_undefined: Vec<String> =
undefined_words.iter().take(50).cloned().collect();
sorted_undefined.sort();
let example_pct = if total_entries > 0 {
with_examples * 100 / total_entries
} else {
0
};
let inflection_pct = if total_entries > 0 {
with_inflection * 100 / total_entries
} else {
0
};
let coverage_pct = if manuscript_word_count > 0 {
manuscript_word_count.saturating_sub(undefined_words.len()) * 100
/ manuscript_word_count
} else {
0
};
let mut report = Map::new();
report.insert("language".into(), Value::String(lang_book.title.clone()));
report.insert(
"meta".into(),
meta.as_ref()
.map(|m| json!({
"name": m.name,
"language_kind": m.language_kind,
"family": m.family,
"iso_code": m.iso_code,
"alphabet_count": m.alphabet.len(),
"reading_direction": m.reading_direction,
}))
.unwrap_or(Value::Null),
);
report.insert(
"chapters".into(),
json!({
"dictionary_parseable": total_entries,
"dictionary_unparseable": dict_unparseable,
"grammar": grammar_count,
"phonology": phonology_count,
"sample_texts": sample_count,
}),
);
report.insert(
"coverage".into(),
json!({
"with_example": with_examples,
"with_example_pct": example_pct,
"with_paradigm": with_inflection,
"with_paradigm_pct": inflection_pct,
"missing_example": missing_examples,
"missing_paradigm": missing_inflection,
}),
);
report.insert(
"manuscript_gap".into(),
json!({
"unique_words": manuscript_word_count,
"uncovered_count": undefined_words.len(),
"coverage_pct": coverage_pct,
"uncovered_sample": sorted_undefined,
}),
);
let s = serde_json::to_string_pretty(&Value::Object(report))
.map_err(|e| Error::Config(format!("json serialise: {e}")))?;
println!("{s}");
return Ok(());
}
println!("Language doctor — `{}`", lang_book.title);
println!();
if let Some(m) = meta.as_ref() {
if !m.name.is_empty() {
println!(" name : {}", m.name);
}
if !m.language_kind.is_empty() {
println!(" kind : {}", m.language_kind);
}
if !m.family.is_empty() {
println!(" family : {}", m.family);
}
if !m.iso_code.is_empty() {
println!(" iso_code : {}", m.iso_code);
}
if !m.alphabet.is_empty() {
println!(" alphabet : {} entries", m.alphabet.len());
}
if !m.reading_direction.is_empty() {
println!(" direction : {}", m.reading_direction);
}
println!();
} else {
println!(" Meta/overview : MISSING or unparseable");
println!();
}
println!("Chapters");
println!(" Dictionary : {total_entries} parseable entries");
if dict_unparseable > 0 {
println!(
" {dict_unparseable} unparseable (no HJSON block — pre-Phase-B authoring)"
);
}
println!(" Grammar : {grammar_count} rules");
println!(" Phonology : {phonology_count} rules");
println!(" Sample texts : {sample_count} samples");
println!();
println!("Dictionary coverage");
if total_entries > 0 {
let example_pct = with_examples * 100 / total_entries;
let inflection_pct = with_inflection * 100 / total_entries;
println!(
" with example : {with_examples}/{total_entries} ({example_pct}%)"
);
println!(
" with paradigm : {with_inflection}/{total_entries} ({inflection_pct}%)"
);
if missing_examples > 0 {
println!(" missing example: {missing_examples}");
}
if missing_inflection > 0 {
println!(
" missing paradigm: {missing_inflection} (overlay won't catch inflected forms)"
);
}
} else {
println!(" no dictionary entries yet — try `inkhaven language add-word`");
}
println!();
println!("Manuscript gap analysis");
println!(" unique words (≥2 chars) in manuscript prose: {manuscript_word_count}");
let undefined_count = undefined_words.len();
if total_entries > 0 {
let covered = manuscript_word_count.saturating_sub(undefined_count);
let pct = if manuscript_word_count > 0 {
covered * 100 / manuscript_word_count
} else {
0
};
println!(" covered by dictionary: {covered}/{manuscript_word_count} ({pct}%)");
if undefined_count > 0 {
println!(" uncovered words (sample, max 15):");
let mut sample: Vec<&String> = undefined_words.iter().take(15).collect();
sample.sort();
for w in sample {
println!(" · {w}");
}
if undefined_count > 15 {
println!(" ... and {} more", undefined_count - 15);
}
}
} else {
println!(" (skipping — no dictionary entries to compare against)");
}
Ok(())
}
fn export(
project: &Path,
language: &str,
format: LanguageExportFormat,
output: Option<&Path>,
) -> Result<()> {
use crate::store::node::NodeKind;
let layout = ProjectLayout::new(project);
layout.require_initialized()?;
let cfg = Config::load_layered(&layout.config_path())?;
let store = Store::open(layout, &cfg)?;
let hierarchy = Hierarchy::load(&store)?;
let lang_root = hierarchy
.iter()
.find(|n| {
n.kind == NodeKind::Book
&& n.system_tag.as_deref() == Some(SYSTEM_TAG_LANGUAGES)
})
.cloned()
.ok_or_else(|| {
Error::Store(
"Language system book missing — re-open the project to seed it".into(),
)
})?;
let lang_book = hierarchy
.children_of(Some(lang_root.id))
.into_iter()
.find(|n| {
n.kind == NodeKind::Book && n.title.eq_ignore_ascii_case(language)
})
.cloned()
.ok_or_else(|| {
Error::Config(format!(
"language `{language}` not found"
))
})?;
let chapters = hierarchy.children_of(Some(lang_book.id));
let mut entries: Vec<(String, crate::language_entry::DictionaryEntry)> = Vec::new();
let mut meta: Option<crate::language_entry::MetaOverview> = None;
let mut grammar_bodies: Vec<(String, String)> = Vec::new();
let mut phonology_bodies: Vec<(String, String)> = Vec::new();
let mut sample_bodies: Vec<(String, String)> = Vec::new();
for chapter in &chapters {
let title_lc = chapter.title.to_lowercase();
match title_lc.as_str() {
"dictionary" => {
for id in hierarchy.collect_subtree(chapter.id) {
let Some(n) = hierarchy.get(id) else { continue; };
if n.kind != NodeKind::Paragraph {
continue;
}
let Ok(Some(bytes)) = store.get_content(n.id) else { continue; };
let Ok(body) = std::str::from_utf8(&bytes) else { continue; };
if let Ok(Some(e)) = crate::language_entry::parse(body) {
entries.push((n.title.clone(), e));
}
}
}
"grammar" | "phonology" | "sample texts" => {
let bucket = match title_lc.as_str() {
"grammar" => &mut grammar_bodies,
"phonology" => &mut phonology_bodies,
_ => &mut sample_bodies,
};
for n in hierarchy
.children_of(Some(chapter.id))
.into_iter()
.filter(|n| n.kind == NodeKind::Paragraph)
{
if let Ok(Some(bytes)) = store.get_content(n.id) {
if let Ok(body) = std::str::from_utf8(&bytes) {
bucket.push((n.title.clone(), body.to_string()));
}
}
}
}
"meta" => {
if let Some(overview) = hierarchy
.children_of(Some(chapter.id))
.into_iter()
.find(|n| {
n.kind == NodeKind::Paragraph
&& n.title.eq_ignore_ascii_case("overview")
})
{
if let Ok(Some(bytes)) = store.get_content(overview.id) {
if let Ok(body) = std::str::from_utf8(&bytes) {
if let Ok(Some(m)) =
crate::language_entry::parse_meta_overview(body)
{
meta = Some(m);
}
}
}
}
}
_ => {}
}
}
entries.sort_by(|a, b| a.0.to_lowercase().cmp(&b.0.to_lowercase()));
let rendered: Vec<u8> = match format {
LanguageExportFormat::Json => render_json(
&lang_book.title,
meta.as_ref(),
&entries,
&grammar_bodies,
&phonology_bodies,
&sample_bodies,
)?,
LanguageExportFormat::Anki => render_anki(&entries)?,
LanguageExportFormat::DictionaryTwocol => render_dictionary_twocol(
&lang_book.title,
meta.as_ref(),
&entries,
),
LanguageExportFormat::Csv => render_csv(&entries),
LanguageExportFormat::Grammar => render_grammar(
&lang_book.title,
&grammar_bodies,
&phonology_bodies,
),
LanguageExportFormat::Phrasebook => render_phrasebook(
&lang_book.title,
&sample_bodies,
),
};
match (output, format) {
(Some(path), _) => {
crate::io_atomic::write(path, &rendered).map_err(|e| {
Error::Config(format!("write {}: {e}", path.display()))
})?;
eprintln!("wrote {} bytes to {}", rendered.len(), path.display());
}
(None, LanguageExportFormat::DictionaryTwocol)
| (None, LanguageExportFormat::Grammar)
| (None, LanguageExportFormat::Phrasebook) => {
return Err(Error::Config(
"this export format needs --output <path.typ> — \
the Typst renderer doesn't stream to stdout"
.into(),
));
}
(None, _) => {
use std::io::Write;
std::io::stdout()
.write_all(&rendered)
.map_err(|e| Error::Config(format!("stdout write: {e}")))?;
}
}
Ok(())
}
fn render_json(
language_name: &str,
meta: Option<&crate::language_entry::MetaOverview>,
entries: &[(String, crate::language_entry::DictionaryEntry)],
grammar: &[(String, String)],
phonology: &[(String, String)],
samples: &[(String, String)],
) -> Result<Vec<u8>> {
use serde_json::{json, Map, Value};
let mut root = Map::new();
root.insert("language".into(), Value::String(language_name.to_string()));
if let Some(m) = meta {
root.insert("meta".into(), json!({
"name": m.name,
"language_kind": m.language_kind,
"family": m.family,
"iso_code": m.iso_code,
"alphabet": m.alphabet,
"reading_direction": m.reading_direction,
"stemmer": m.stemmer,
"example_corpus_ref": m.example_corpus_ref,
}));
}
let entries_json: Vec<Value> = entries
.iter()
.map(|(title, e)| {
json!({
"title": title,
"word": e.word,
"type": e.pos,
"translation": e.translation,
"example": e.example,
"inflection": e.inflection,
})
})
.collect();
root.insert("dictionary".into(), Value::Array(entries_json));
root.insert(
"grammar".into(),
Value::Array(
grammar
.iter()
.map(|(t, b)| json!({ "title": t, "body": b }))
.collect(),
),
);
root.insert(
"phonology".into(),
Value::Array(
phonology
.iter()
.map(|(t, b)| json!({ "title": t, "body": b }))
.collect(),
),
);
root.insert(
"sample_texts".into(),
Value::Array(
samples
.iter()
.map(|(t, b)| json!({ "title": t, "body": b }))
.collect(),
),
);
let mut buf = serde_json::to_vec_pretty(&Value::Object(root))
.map_err(|e| Error::Config(format!("json serialise: {e}")))?;
buf.push(b'\n');
Ok(buf)
}
fn render_anki(
entries: &[(String, crate::language_entry::DictionaryEntry)],
) -> Result<Vec<u8>> {
let mut out = String::new();
out.push_str("word,translation,type,example,inflection\n");
for (_, e) in entries {
let infl: String = e
.inflection
.iter()
.map(|(k, v)| format!("{k}={v}"))
.collect::<Vec<_>>()
.join("; ");
out.push_str(&format!(
"{},{},{},{},{}\n",
csv_field(&e.word),
csv_field(&e.translation),
csv_field(&e.pos),
csv_field(&e.example),
csv_field(&infl),
));
}
Ok(out.into_bytes())
}
fn csv_field(s: &str) -> String {
if s.contains(',') || s.contains('"') || s.contains('\n') {
format!("\"{}\"", s.replace('"', "\"\""))
} else {
s.to_string()
}
}
fn render_dictionary_twocol(
language_name: &str,
meta: Option<&crate::language_entry::MetaOverview>,
entries: &[(String, crate::language_entry::DictionaryEntry)],
) -> Vec<u8> {
let mut by_bucket: std::collections::BTreeMap<String, Vec<&(String, crate::language_entry::DictionaryEntry)>> =
std::collections::BTreeMap::new();
for entry in entries {
let bucket = entry
.0
.chars()
.find(|c| !c.is_whitespace())
.map(|c| c.to_uppercase().to_string())
.unwrap_or_else(|| "?".into());
by_bucket.entry(bucket).or_default().push(entry);
}
let mut s = String::new();
s.push_str(&format!("#set page(paper: \"a4\", columns: 2)\n"));
s.push_str("#set text(font: \"New Computer Modern\", size: 10pt)\n");
s.push_str("#set par(justify: true)\n");
s.push('\n');
s.push_str(&format!("#align(center)[= {} dictionary]\n", language_name));
if let Some(m) = meta {
if !m.language_kind.is_empty() || !m.family.is_empty() {
s.push_str("#align(center)[#text(style: \"italic\")[");
if !m.language_kind.is_empty() {
s.push_str(&m.language_kind);
}
if !m.family.is_empty() {
if !m.language_kind.is_empty() {
s.push_str(" · ");
}
s.push_str(&m.family);
}
s.push_str("]]\n");
}
}
s.push('\n');
for (bucket, group) in &by_bucket {
s.push_str(&format!(
"#align(center)[#text(size: 14pt, weight: \"bold\")[— {bucket} —]]\n"
));
s.push('\n');
for (title, e) in group {
s.push_str(&format!(
"*{title}* #text(style: \"italic\")[{}] {}\n",
typst_escape(&e.pos),
typst_escape(&e.translation),
));
if !e.example.trim().is_empty() {
s.push_str(&format!(
" #pad(left: 2em)[#text(style: \"italic\")[{}]]\n",
typst_escape(e.example.trim()),
));
}
if !e.inflection.is_empty() {
let pretty: Vec<String> = e
.inflection
.iter()
.map(|(k, v)| format!("{k}: {v}"))
.collect();
s.push_str(&format!(
" #pad(left: 2em)[#text(size: 8pt)[forms — {}]]\n",
typst_escape(&pretty.join(", ")),
));
}
s.push('\n');
}
}
s.into_bytes()
}
fn typst_escape(s: &str) -> String {
let mut out = String::with_capacity(s.len());
for c in s.chars() {
match c {
'*' | '_' | '#' | '[' | ']' | '\\' => {
out.push('\\');
out.push(c);
}
_ => out.push(c),
}
}
out
}
fn render_csv(entries: &[(String, crate::language_entry::DictionaryEntry)]) -> Vec<u8> {
let mut out = String::new();
out.push_str("word,type,translation,example,inflection\n");
for (_lemma, e) in entries {
out.push_str(&csv_field(&e.word));
out.push(',');
out.push_str(&csv_field(&e.pos));
out.push(',');
out.push_str(&csv_field(&e.translation));
out.push(',');
out.push_str(&csv_field(&e.example));
out.push(',');
out.push_str(&csv_field(&join_inflection(&e.inflection)));
out.push('\n');
}
out.into_bytes()
}
fn join_inflection(inflection: &std::collections::BTreeMap<String, String>) -> String {
let mut parts: Vec<String> =
inflection.iter().map(|(k, v)| format!("{k}={v}")).collect();
parts.sort();
parts.join(";")
}
fn render_grammar(
language_title: &str,
grammar_bodies: &[(String, String)],
phonology_bodies: &[(String, String)],
) -> Vec<u8> {
let mut out = String::new();
out.push_str("#set page(paper: \"a4\", margin: 2cm)\n");
out.push_str("#set heading(numbering: \"1.\")\n");
out.push_str("#set text(font: (\"New Computer Modern\", \"DejaVu Serif\"), size: 11pt)\n");
out.push_str(&format!(
"#align(center)[#text(20pt, weight: \"bold\")[{} — grammar reference]]\n\n",
typst_escape(language_title),
));
out.push_str("#outline()\n\n");
out.push_str("#pagebreak()\n\n");
let mut by_category: std::collections::BTreeMap<String, Vec<&(String, String)>> =
std::collections::BTreeMap::new();
for entry in grammar_bodies {
let cat = extract_hjson_string_field(&entry.1, "category")
.unwrap_or_else(|| "Uncategorised".to_string());
by_category.entry(cat).or_default().push(entry);
}
out.push_str("= Grammar rules\n\n");
for (cat, rules) in &by_category {
out.push_str(&format!("== {}\n\n", typst_escape(cat)));
for (title, body) in rules {
out.push_str(&format!("=== {}\n\n", typst_escape(title)));
if let Some(rule) = extract_hjson_string_field(body, "rule") {
out.push_str(&format!("*Rule:* {}\n\n", typst_escape(&rule)));
}
if let Some(examples_block) =
extract_hjson_examples(body)
{
if !examples_block.is_empty() {
out.push_str("*Examples:*\n\n");
for ex in &examples_block {
out.push_str(&format!("- {}\n", typst_escape(ex)));
}
out.push('\n');
}
}
}
}
if !phonology_bodies.is_empty() {
out.push_str("\n= Phonology rules\n\n");
for (title, body) in phonology_bodies {
out.push_str(&format!("== {}\n\n", typst_escape(title)));
if let Some(rule) = extract_hjson_string_field(body, "rule") {
out.push_str(&format!("*Rule:* {}\n\n", typst_escape(&rule)));
}
if let Some(pattern) = extract_hjson_string_field(body, "pattern") {
out.push_str(&format!("*Pattern:* `{}`\n\n", pattern));
}
}
}
out.into_bytes()
}
fn render_phrasebook(
language_title: &str,
sample_bodies: &[(String, String)],
) -> Vec<u8> {
let mut out = String::new();
out.push_str("#set page(paper: \"a4\", margin: 2cm)\n");
out.push_str("#set text(font: (\"New Computer Modern\", \"DejaVu Serif\"), size: 11pt)\n");
out.push_str(&format!(
"#align(center)[#text(20pt, weight: \"bold\")[{} — phrasebook]]\n\n",
typst_escape(language_title),
));
if sample_bodies.is_empty() {
out.push_str("_No sample texts in the project yet._\n");
return out.into_bytes();
}
for (title, body) in sample_bodies {
let gloss = extract_hjson_string_field(body, "gloss")
.or_else(|| extract_hjson_string_field(body, "translation"));
let original = extract_hjson_string_field(body, "original")
.or_else(|| extract_hjson_string_field(body, "text"));
out.push_str(&format!("== {}\n\n", typst_escape(title)));
out.push_str("#grid(columns: (1fr, 1fr), gutter: 1em,\n");
out.push_str(&format!(
" [#text(weight: \"semibold\")[Gloss]\\\n{}],\n",
typst_escape(gloss.as_deref().unwrap_or(body)),
));
out.push_str(&format!(
" [#text(weight: \"semibold\")[Original]\\\n{}],\n",
typst_escape(original.as_deref().unwrap_or("(no original supplied)")),
));
out.push_str(")\n\n");
}
out.into_bytes()
}
fn extract_hjson_string_field(body: &str, field: &str) -> Option<String> {
let needle = format!("{field}:");
for line in body.lines() {
let trimmed = line.trim_start();
if !trimmed.starts_with(&needle) {
continue;
}
let rest = trimmed[needle.len()..].trim();
let v = rest.trim_matches('"').trim_matches('\'').trim();
if v.is_empty() {
return None;
}
return Some(v.to_string());
}
None
}
fn extract_hjson_examples(body: &str) -> Option<Vec<String>> {
let mut found = false;
let mut single_line: Option<String> = None;
let mut block_lines: Vec<String> = Vec::new();
let mut in_block = false;
for line in body.lines() {
let trimmed = line.trim_start();
if !found && trimmed.starts_with("examples:") {
found = true;
let rest = trimmed["examples:".len()..].trim();
if rest.starts_with('[') && rest.ends_with(']') {
single_line = Some(rest[1..rest.len() - 1].to_string());
break;
}
if rest.starts_with('[') {
in_block = true;
}
continue;
}
if in_block {
if trimmed.starts_with(']') {
break;
}
block_lines.push(trimmed.trim_end_matches(',').to_string());
}
}
if !found {
return None;
}
if let Some(sl) = single_line {
return Some(
sl.split(',')
.map(|s| s.trim().trim_matches('"').trim_matches('\'').to_string())
.filter(|s| !s.is_empty())
.collect(),
);
}
Some(
block_lines
.into_iter()
.map(|s| s.trim_matches('"').trim_matches('\'').to_string())
.filter(|s| !s.is_empty())
.collect(),
)
}
fn define_rule(
project: &Path,
language: &str,
rule_id: &str,
category: &str,
) -> Result<()> {
let category_norm = category.to_lowercase();
if category_norm != "grammar" && category_norm != "phonology" {
return Err(Error::Config(format!(
"--category must be `grammar` or `phonology` (got `{category}`)"
)));
}
let layout = ProjectLayout::new(project);
layout.require_initialized()?;
let cfg = Config::load_layered(&layout.config_path())?;
let store = Store::open(layout.clone(), &cfg)?;
let hierarchy = Hierarchy::load(&store)?;
use crate::store::node::NodeKind;
let lang_root = hierarchy
.iter()
.find(|n| {
n.kind == NodeKind::Book
&& n.system_tag.as_deref() == Some(SYSTEM_TAG_LANGUAGES)
})
.cloned()
.ok_or_else(|| {
Error::Store(
"Language system book missing — re-open the project to seed it".into(),
)
})?;
let lang_book = hierarchy
.children_of(Some(lang_root.id))
.into_iter()
.find(|n| {
n.kind == NodeKind::Book && n.title.eq_ignore_ascii_case(language)
})
.cloned()
.ok_or_else(|| {
Error::Config(format!("language `{language}` not found"))
})?;
let category_chapter = hierarchy
.children_of(Some(lang_book.id))
.into_iter()
.find(|n| n.title.eq_ignore_ascii_case(&category_norm))
.cloned()
.ok_or_else(|| {
Error::Config(format!(
"`{category_norm}` chapter not found under language `{language}` — \
was it scaffolded? Try `inkhaven language init {language}`"
))
})?;
let existing = hierarchy
.collect_subtree(category_chapter.id)
.into_iter()
.filter_map(|id| hierarchy.get(id).cloned())
.find(|n| {
n.kind == NodeKind::Paragraph
&& n.slug.eq_ignore_ascii_case(rule_id)
});
let seed = if let Some(node) = &existing {
match store.get_content(node.id) {
Ok(Some(b)) => String::from_utf8_lossy(&b).into_owned(),
_ => String::new(),
}
} else {
rule_template(rule_id, &category_norm)
};
let edited = open_in_editor(&seed, &format!("{rule_id}-{category_norm}"))?;
if let Some(node) = existing {
let mut n = node;
store
.update_paragraph_content(&mut n, edited.as_bytes())
.map_err(|e| Error::Store(format!("save rule: {e}")))?;
if let Some(rel) = &n.file {
crate::io_atomic::write(&store.project_root().join(rel), edited.as_bytes())
.map_err(Error::Io)?;
}
eprintln!("updated rule `{rule_id}` under {category_norm}");
} else {
let mut created = store
.create_node(
&cfg,
&hierarchy,
NodeKind::Paragraph,
rule_id,
Some(&category_chapter),
None,
crate::store::InsertPosition::End,
)
.map_err(|e| Error::Store(format!("create rule paragraph: {e}")))?;
if let Some(rel) = &created.file {
crate::io_atomic::write(
&store.project_root().join(rel),
edited.as_bytes(),
)
.map_err(Error::Io)?;
store
.update_paragraph_content(&mut created, edited.as_bytes())
.map_err(|e| Error::Store(format!("save rule: {e}")))?;
}
eprintln!("created rule `{rule_id}` under {category_norm}");
}
Ok(())
}
fn rule_template(rule_id: &str, category: &str) -> String {
let cat_examples = if category == "grammar" {
"[\n \"example 1 in invented language — translation\",\n \"example 2 — translation\"\n ]"
} else {
"[\n \"phoneme example 1\",\n \"phoneme example 2\"\n ]"
};
format!(
"{{\n rule_id: \"{rule_id}\"\n category: \"\"\n rule: \"\"\n examples: {cat_examples}\n applies_when: \"\"\n depends_on: []\n}}\n"
)
}
fn open_in_editor(seed: &str, label: &str) -> Result<String> {
let editor = std::env::var("EDITOR").unwrap_or_else(|_| {
if cfg!(windows) {
"notepad".into()
} else {
"vi".into()
}
});
let tmp_dir = std::env::temp_dir();
let tmp_path = tmp_dir.join(format!(
"inkhaven-define-rule-{}-{}.hjson",
std::process::id(),
label
));
std::fs::write(&tmp_path, seed.as_bytes()).map_err(Error::Io)?;
let status = std::process::Command::new(&editor)
.arg(&tmp_path)
.status()
.map_err(|e| Error::Config(format!("spawn `{editor}`: {e}")))?;
if !status.success() {
let _ = std::fs::remove_file(&tmp_path);
return Err(Error::Config(format!(
"editor `{editor}` exited with status {status}"
)));
}
let body = std::fs::read_to_string(&tmp_path).map_err(Error::Io)?;
let _ = std::fs::remove_file(&tmp_path);
Ok(body)
}
fn import_dictionary_csv(
project: &Path,
language: &str,
csv_path: &Path,
new: bool,
force: bool,
) -> Result<()> {
use crate::store::node::NodeKind;
let layout = ProjectLayout::new(project);
layout.require_initialized()?;
let cfg = Config::load_layered(&layout.config_path())?;
let store = Store::open(layout.clone(), &cfg)?;
let hierarchy = Hierarchy::load(&store)?;
let lang_root = hierarchy
.iter()
.find(|n| {
n.kind == NodeKind::Book
&& n.system_tag.as_deref() == Some(SYSTEM_TAG_LANGUAGES)
})
.ok_or_else(|| {
Error::Store(
"Language system book missing — re-open the project to seed it".into(),
)
})?
.clone();
let lang_book = hierarchy
.children_of(Some(lang_root.id))
.into_iter()
.find(|n| {
n.kind == NodeKind::Book && n.title.eq_ignore_ascii_case(language)
})
.cloned()
.ok_or_else(|| {
Error::Config(format!(
"language `{language}` not found — run `inkhaven language init {language}` first"
))
})?;
let raw = std::fs::read_to_string(csv_path).map_err(|e| {
Error::Config(format!(
"could not read CSV file {}: {e}",
csv_path.display()
))
})?;
let rows = parse_csv(&raw)
.map_err(|e| Error::Config(format!("CSV parse error: {e}")))?;
let mut rows = rows.into_iter();
let header = rows
.next()
.ok_or_else(|| Error::Config("CSV is empty (no header row)".into()))?;
let columns = resolve_csv_columns(&header)?;
let data_rows: Vec<Vec<String>> = rows.collect();
if !force {
let meta = read_meta_overview(&store, &hierarchy, &lang_book)?;
let phoneme_inventories =
collect_phonology_inventories(&store, &hierarchy, &lang_book)?;
let alphabet: Vec<String> = meta
.as_ref()
.map(|m| m.alphabet.clone())
.unwrap_or_default();
let mut violations: Vec<String> = Vec::new();
for (row_idx, row) in data_rows.iter().enumerate() {
let display_row = row_idx + 2;
let word = row
.get(columns.word)
.cloned()
.unwrap_or_default()
.trim()
.to_string();
if word.is_empty() || word.starts_with('#') {
continue;
}
if !alphabet.is_empty() {
if let Some(bad) = first_unknown_letter(&word, &alphabet) {
violations.push(format!(
"row {display_row}: `{word}` contains `{bad}` not in Meta/overview.alphabet"
));
continue; }
}
if !phoneme_inventories.is_empty() {
if let Some(bad) = first_unknown_letter(&word, &phoneme_inventories) {
violations.push(format!(
"row {display_row}: `{word}` contains `{bad}` not in any Phonology inventory"
));
}
}
}
if !violations.is_empty() {
eprintln!(
"Pre-flight validation failed — {} violation(s) found:\n",
violations.len()
);
for v in &violations {
eprintln!(" · {v}");
}
eprintln!(
"\nFix by either:\n \
· updating Meta/overview.alphabet to include the missing characters, OR\n \
· updating a Phonology rule's `phonemes` list to include them, OR\n \
· correcting the CSV, OR\n \
· re-running with --force to bypass validation."
);
return Err(Error::Config(format!(
"import aborted — {} alphabet/phonology violation(s)",
violations.len()
)));
}
}
if new {
wipe_dictionary(&store, &hierarchy, &lang_book, language)?;
}
let mut imported = 0usize;
let mut skipped_blank = 0usize;
let mut skipped_comment = 0usize;
let mut skipped_duplicate = 0usize;
let mut failed = 0usize;
for (row_idx, row) in data_rows.into_iter().enumerate() {
let display_row = row_idx + 2;
let entry = match build_import_entry_from_row(&columns, &row) {
Ok(e) => e,
Err(e) => {
eprintln!("row {display_row}: {e} — skipped");
failed += 1;
continue;
}
};
let trimmed = entry.word.trim();
if trimmed.is_empty() {
skipped_blank += 1;
continue;
}
if trimmed.starts_with('#') {
skipped_comment += 1;
continue;
}
match add_imported_dictionary_entry(&store, &cfg, &lang_book, &entry) {
Ok((_, bucket)) => {
eprintln!("imported `{}` → {language}/Dictionary/{bucket}", entry.word);
imported += 1;
}
Err(e) => {
let msg = e.to_string();
if msg.contains("already defined") {
eprintln!("row {display_row}: `{}` already exists — skipped", entry.word);
skipped_duplicate += 1;
} else {
eprintln!("row {display_row}: import `{}` failed: {msg}", entry.word);
failed += 1;
}
}
}
}
eprintln!();
eprintln!("Import summary for `{language}`");
eprintln!(" imported: {imported}");
if skipped_blank > 0 {
eprintln!(" skipped (blank): {skipped_blank}");
}
if skipped_comment > 0 {
eprintln!(" skipped (#): {skipped_comment}");
}
if skipped_duplicate > 0 {
eprintln!(" skipped (dup): {skipped_duplicate}");
}
if failed > 0 {
eprintln!(" failed: {failed}");
}
Ok(())
}
struct CsvColumns {
word: usize,
pos: usize,
translation: usize,
example: Option<usize>,
pronunciation: Option<usize>,
etymology: Option<usize>,
related: Option<usize>,
inflection: Option<usize>,
examples: Option<usize>,
register: Option<usize>,
era: Option<usize>,
notes: Option<usize>,
}
fn resolve_csv_columns(header: &[String]) -> Result<CsvColumns> {
let lookup = |name: &str| -> Option<usize> {
header.iter().position(|h| h.trim().eq_ignore_ascii_case(name))
};
let word = lookup("word").ok_or_else(|| {
Error::Config("CSV missing required column `word`".into())
})?;
let pos = lookup("type").ok_or_else(|| {
Error::Config("CSV missing required column `type`".into())
})?;
let translation = lookup("translation").ok_or_else(|| {
Error::Config("CSV missing required column `translation`".into())
})?;
Ok(CsvColumns {
word,
pos,
translation,
example: lookup("example"),
pronunciation: lookup("pronunciation"),
etymology: lookup("etymology"),
related: lookup("related"),
inflection: lookup("inflection"),
examples: lookup("examples"),
register: lookup("register"),
era: lookup("era"),
notes: lookup("notes"),
})
}
fn build_import_entry_from_row(
cols: &CsvColumns,
row: &[String],
) -> std::result::Result<ImportEntry, String> {
let get = |idx: usize| -> String {
row.get(idx).cloned().unwrap_or_default()
};
let opt = |maybe_idx: Option<usize>| -> String {
maybe_idx.map(get).unwrap_or_default()
};
let inflection_raw = opt(cols.inflection);
let inflection = parse_inflection_field(&inflection_raw);
let examples_raw = opt(cols.examples);
let examples = split_pipe(&examples_raw);
let related_raw = opt(cols.related);
let related = split_semicolon(&related_raw);
Ok(ImportEntry {
word: get(cols.word).trim().to_string(),
pos: get(cols.pos).trim().to_string(),
translation: get(cols.translation).trim().to_string(),
example: opt(cols.example).trim().to_string(),
pronunciation: opt(cols.pronunciation).trim().to_string(),
etymology: opt(cols.etymology).trim().to_string(),
related,
inflection,
examples,
register: opt(cols.register).trim().to_string(),
era: opt(cols.era).trim().to_string(),
notes: opt(cols.notes).trim().to_string(),
domain: Vec::new(),
})
}
fn parse_inflection_field(
raw: &str,
) -> std::collections::BTreeMap<String, String> {
let mut out = std::collections::BTreeMap::new();
for pair in raw.split(';') {
let pair = pair.trim();
if pair.is_empty() {
continue;
}
if let Some(eq) = pair.find('=') {
let key = pair[..eq].trim().to_string();
let value = pair[eq + 1..].trim().to_string();
if !key.is_empty() && !value.is_empty() {
out.insert(key, value);
}
}
}
out
}
fn split_pipe(raw: &str) -> Vec<String> {
raw.split('|')
.map(|s| s.trim().to_string())
.filter(|s| !s.is_empty())
.collect()
}
fn split_semicolon(raw: &str) -> Vec<String> {
raw.split(';')
.map(|s| s.trim().to_string())
.filter(|s| !s.is_empty())
.collect()
}
fn read_meta_overview(
store: &Store,
hierarchy: &Hierarchy,
lang_book: &crate::store::node::Node,
) -> Result<Option<crate::language_entry::MetaOverview>> {
use crate::store::node::NodeKind;
let Some(meta_chapter) = hierarchy
.children_of(Some(lang_book.id))
.into_iter()
.find(|n| {
n.kind == NodeKind::Chapter && n.title.eq_ignore_ascii_case("Meta")
})
.cloned()
else {
return Ok(None);
};
let Some(overview) = hierarchy
.children_of(Some(meta_chapter.id))
.into_iter()
.find(|n| {
n.kind == NodeKind::Paragraph && n.title.eq_ignore_ascii_case("overview")
})
.cloned()
else {
return Ok(None);
};
let Some(bytes) = store.get_content(overview.id)? else {
return Ok(None);
};
let body = match std::str::from_utf8(&bytes) {
Ok(s) => s,
Err(_) => return Ok(None),
};
Ok(crate::language_entry::parse_meta_overview(body)
.map_err(Error::Config)?)
}
fn collect_phonology_inventories(
store: &Store,
hierarchy: &Hierarchy,
lang_book: &crate::store::node::Node,
) -> Result<Vec<String>> {
use crate::store::node::NodeKind;
use serde::Deserialize;
#[derive(Deserialize)]
struct PhonologyRule {
#[serde(default)]
phonemes: Vec<String>,
}
let Some(phonology) = hierarchy
.children_of(Some(lang_book.id))
.into_iter()
.find(|n| {
n.kind == NodeKind::Chapter && n.title.eq_ignore_ascii_case("Phonology")
})
.cloned()
else {
return Ok(Vec::new());
};
let mut out: Vec<String> = Vec::new();
for id in hierarchy.collect_subtree(phonology.id) {
let Some(node) = hierarchy.get(id) else { continue; };
if node.kind != NodeKind::Paragraph {
continue;
}
let Ok(Some(bytes)) = store.get_content(id) else { continue; };
let Ok(body) = std::str::from_utf8(&bytes) else { continue; };
let parsed: Option<PhonologyRule> = serde_hjson::from_str(body)
.ok()
.or_else(|| {
None
});
if let Some(rule) = parsed {
out.extend(rule.phonemes);
}
}
Ok(out)
}
fn first_unknown_letter(word: &str, inventory: &[String]) -> Option<char> {
let inventory_lower: Vec<String> = inventory
.iter()
.map(|s| s.to_lowercase())
.collect();
for c in word.chars() {
if c.is_whitespace() || c.is_ascii_punctuation() {
continue;
}
let c_lower = c.to_lowercase().collect::<String>();
let found = inventory_lower
.iter()
.any(|entry| entry.contains(&c_lower));
if !found {
return Some(c);
}
}
None
}
fn wipe_dictionary(
store: &Store,
hierarchy: &Hierarchy,
lang_book: &crate::store::node::Node,
language: &str,
) -> Result<()> {
use crate::store::node::NodeKind;
let dictionary = hierarchy
.children_of(Some(lang_book.id))
.into_iter()
.find(|n| {
n.kind == NodeKind::Chapter && n.title.eq_ignore_ascii_case("Dictionary")
})
.cloned()
.ok_or_else(|| {
Error::Config(format!(
"language `{language}` has no Dictionary chapter to wipe"
))
})?;
let buckets: Vec<_> =
hierarchy.children_of(Some(dictionary.id)).into_iter().cloned().collect();
let bucket_count = buckets.len();
let mut entry_count = 0usize;
let dummy_layout = ProjectLayout::new(store.project_root());
for bucket in buckets.into_iter().rev() {
let fresh = Hierarchy::load(store)?;
let ids = fresh.collect_subtree(bucket.id);
entry_count += ids.len().saturating_sub(1);
let Some(refreshed_bucket) = fresh.get(bucket.id) else { continue; };
let fs_rel = fresh.fs_path(refreshed_bucket, &dummy_layout);
store
.delete_subtree(&fs_rel, &ids)
.map_err(|e| Error::Store(format!("wipe bucket `{}`: {e}", bucket.title)))?;
}
eprintln!(
"--new: wiped {entry_count} existing entries across {bucket_count} buckets from `{language}/Dictionary`"
);
Ok(())
}
fn parse_csv(raw: &str) -> std::result::Result<Vec<Vec<String>>, String> {
let mut rows: Vec<Vec<String>> = Vec::new();
let mut row: Vec<String> = Vec::new();
let mut field = String::new();
let mut in_quoted = false;
let mut chars = raw.chars().peekable();
while let Some(c) = chars.next() {
if in_quoted {
match c {
'"' => {
if chars.peek() == Some(&'"') {
chars.next();
field.push('"');
} else {
in_quoted = false;
}
}
_ => field.push(c),
}
} else {
match c {
'"' => in_quoted = true,
',' => {
row.push(std::mem::take(&mut field));
}
'\r' => {
if chars.peek() == Some(&'\n') {
chars.next();
}
row.push(std::mem::take(&mut field));
rows.push(std::mem::take(&mut row));
}
'\n' => {
row.push(std::mem::take(&mut field));
rows.push(std::mem::take(&mut row));
}
_ => field.push(c),
}
}
}
if in_quoted {
return Err("unclosed quote at end of file".into());
}
if !field.is_empty() || !row.is_empty() {
row.push(field);
rows.push(row);
}
Ok(rows)
}
fn list(project: &Path) -> Result<()> {
use crate::store::node::NodeKind;
let layout = ProjectLayout::new(project);
layout.require_initialized()?;
let cfg = Config::load_layered(&layout.config_path())?;
let store = Store::open(layout, &cfg)?;
let hierarchy = Hierarchy::load(&store)?;
let lang_root = hierarchy
.iter()
.find(|n| {
n.kind == NodeKind::Book
&& n.system_tag.as_deref() == Some(SYSTEM_TAG_LANGUAGES)
})
.cloned()
.ok_or_else(|| {
Error::Store(
"Language system book missing — re-open the project to seed it".into(),
)
})?;
let languages = hierarchy.children_of(Some(lang_root.id));
if languages.is_empty() {
eprintln!("no languages defined — run `inkhaven language init <name>`");
return Ok(());
}
let mut rows: Vec<(String, usize, usize, usize, usize)> =
Vec::with_capacity(languages.len());
for lang in &languages {
let chapters = hierarchy.children_of(Some(lang.id));
let mut entries = 0usize;
let mut grammar = 0usize;
let mut phonology = 0usize;
let mut samples = 0usize;
for chapter in &chapters {
let title_lc = chapter.title.to_lowercase();
let paragraph_count = hierarchy
.collect_subtree(chapter.id)
.into_iter()
.filter_map(|id| hierarchy.get(id))
.filter(|n| n.kind == NodeKind::Paragraph)
.count();
match title_lc.as_str() {
"dictionary" => entries = paragraph_count,
"grammar" => grammar = paragraph_count,
"phonology" => phonology = paragraph_count,
"sample texts" => samples = paragraph_count,
_ => {}
}
}
rows.push((lang.title.clone(), entries, grammar, phonology, samples));
}
let max_name = rows.iter().map(|r| r.0.chars().count()).max().unwrap_or(8);
let name_w = max_name.max(8);
println!(
" {:<width$} {:>6} {:>7} {:>9} {:>7}",
"name", "words", "grammar", "phonology", "samples",
width = name_w,
);
println!(
" {}",
"-".repeat(name_w + 36)
);
for (name, entries, grammar, phonology, samples) in &rows {
println!(
" {:<width$} {:>6} {:>7} {:>9} {:>7}",
name, entries, grammar, phonology, samples,
width = name_w,
);
}
Ok(())
}
fn remove_word(project: &Path, language: &str, word: &str) -> Result<()> {
use crate::store::node::NodeKind;
let layout = ProjectLayout::new(project);
layout.require_initialized()?;
let cfg = Config::load_layered(&layout.config_path())?;
let store = Store::open(layout.clone(), &cfg)?;
let hierarchy = Hierarchy::load(&store)?;
let lang_root = hierarchy
.iter()
.find(|n| {
n.kind == NodeKind::Book
&& n.system_tag.as_deref() == Some(SYSTEM_TAG_LANGUAGES)
})
.ok_or_else(|| {
Error::Store(
"Language system book missing — re-open the project to seed it".into(),
)
})?
.clone();
let lang_book = hierarchy
.children_of(Some(lang_root.id))
.into_iter()
.find(|n| {
n.kind == NodeKind::Book && n.title.eq_ignore_ascii_case(language)
})
.cloned()
.ok_or_else(|| {
Error::Config(format!("language `{language}` not found"))
})?;
let dictionary = hierarchy
.children_of(Some(lang_book.id))
.into_iter()
.find(|n| {
n.kind == NodeKind::Chapter
&& n.title.eq_ignore_ascii_case("Dictionary")
})
.cloned()
.ok_or_else(|| {
Error::Config(format!(
"language `{language}` has no Dictionary chapter"
))
})?;
let bucket = derive_alphabet_bucket(&store, &hierarchy, &lang_book, word)?
.or_else(|| alphabet_bucket(word))
.ok_or_else(|| {
Error::Config(format!("could not derive alphabet bucket from `{word}`"))
})?;
let subchapter = hierarchy
.children_of(Some(dictionary.id))
.into_iter()
.find(|n| {
n.kind == NodeKind::Subchapter
&& n.title.eq_ignore_ascii_case(&bucket)
})
.cloned()
.ok_or_else(|| {
Error::Config(format!(
"no bucket subchapter `{bucket}` under `{language}/Dictionary` — `{word}` isn't defined"
))
})?;
let entry = hierarchy
.children_of(Some(subchapter.id))
.into_iter()
.find(|n| {
n.kind == NodeKind::Paragraph
&& n.title.eq_ignore_ascii_case(word)
})
.cloned()
.ok_or_else(|| {
Error::Config(format!(
"word `{word}` not found under `{language}/Dictionary/{bucket}`"
))
})?;
let ids = hierarchy.collect_subtree(entry.id);
let fs_rel = entry
.file
.as_ref()
.map(std::path::PathBuf::from)
.unwrap_or_default();
store
.delete_subtree(&fs_rel, &ids)
.map_err(|e| Error::Store(format!("delete entry: {e}")))?;
eprintln!(
"removed `{word}` from `{language}/Dictionary/{bucket}`"
);
Ok(())
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn standard_chapters_match_proposal() {
assert_eq!(
STANDARD_CHAPTERS,
&["Meta", "Dictionary", "Grammar", "Phonology", "Sample texts"]
);
}
#[test]
fn first_unknown_letter_passes_when_all_chars_in_inventory() {
let inv = vec!["A".into(), "B".into(), "C".into()];
assert_eq!(first_unknown_letter("abc", &inv), None);
assert_eq!(first_unknown_letter("ABC", &inv), None);
assert_eq!(first_unknown_letter("a-b'c", &inv), None);
assert_eq!(first_unknown_letter("a b c", &inv), None);
}
#[test]
fn first_unknown_letter_returns_first_violation() {
let inv = vec!["A".into(), "B".into()];
assert_eq!(first_unknown_letter("abz", &inv), Some('z'));
assert_eq!(first_unknown_letter("xyz", &inv), Some('x'));
}
#[test]
fn first_unknown_letter_handles_multichar_inventory_entries() {
let inv = vec!["Aa".into(), "Bb".into(), "Cc".into()];
assert_eq!(first_unknown_letter("aBc", &inv), None);
assert_eq!(first_unknown_letter("aBz", &inv), Some('z'));
}
#[test]
fn first_unknown_letter_handles_non_latin() {
let inv = vec!["А".into(), "Б".into()];
assert_eq!(first_unknown_letter("аб", &inv), None);
assert_eq!(first_unknown_letter("абя", &inv), Some('я'));
}
#[test]
fn csv_parser_handles_quoted_fields() {
let csv = "word,type,translation\n\
atal,noun,river\n\
sora,verb,\"to flow, swiftly\"\n\
nan,pronoun,\"\"\"you\"\"\"\n";
let rows = parse_csv(csv).unwrap();
assert_eq!(rows.len(), 4);
assert_eq!(rows[0], vec!["word", "type", "translation"]);
assert_eq!(rows[1], vec!["atal", "noun", "river"]);
assert_eq!(rows[2], vec!["sora", "verb", "to flow, swiftly"]);
assert_eq!(rows[3], vec!["nan", "pronoun", "\"you\""]);
}
#[test]
fn csv_parser_handles_newlines_in_quoted_fields() {
let csv = "word,notes\natal,\"line1\nline2\"\n";
let rows = parse_csv(csv).unwrap();
assert_eq!(rows.len(), 2);
assert_eq!(rows[1], vec!["atal", "line1\nline2"]);
}
#[test]
fn csv_parser_handles_crlf_and_missing_trailing_newline() {
let csv = "a,b\r\nc,d";
let rows = parse_csv(csv).unwrap();
assert_eq!(rows.len(), 2);
assert_eq!(rows[0], vec!["a", "b"]);
assert_eq!(rows[1], vec!["c", "d"]);
}
#[test]
fn csv_parser_errors_on_unclosed_quote() {
assert!(parse_csv("word\n\"unclosed").is_err());
}
#[test]
fn inflection_parser_extracts_pairs() {
let m = parse_inflection_field("nominative=atal;genitive=atale;plural=atatal");
assert_eq!(m.len(), 3);
assert_eq!(m.get("nominative"), Some(&"atal".to_string()));
assert_eq!(m.get("genitive"), Some(&"atale".to_string()));
assert_eq!(m.get("plural"), Some(&"atatal".to_string()));
}
#[test]
fn inflection_parser_tolerates_whitespace_and_skips_malformed() {
let m = parse_inflection_field(" plural = atatal ; bad-no-equals ; genitive=atale ");
assert_eq!(m.len(), 2);
assert!(m.contains_key("plural"));
assert!(m.contains_key("genitive"));
}
#[test]
fn split_helpers_filter_empty_tokens() {
assert_eq!(
split_pipe("a|b||c"),
vec!["a".to_string(), "b".to_string(), "c".to_string()]
);
assert_eq!(
split_semicolon("a;b;;c"),
vec!["a".to_string(), "b".to_string(), "c".to_string()]
);
}
#[test]
fn resolve_csv_columns_requires_word_type_translation() {
let header = vec!["word".into(), "type".into(), "translation".into()];
let cols = resolve_csv_columns(&header).unwrap();
assert_eq!(cols.word, 0);
assert_eq!(cols.pos, 1);
assert_eq!(cols.translation, 2);
assert!(cols.example.is_none());
}
#[test]
fn resolve_csv_columns_errors_on_missing_required() {
let header = vec!["word".into(), "type".into()];
assert!(resolve_csv_columns(&header).is_err());
}
#[test]
fn resolve_csv_columns_is_case_insensitive_and_order_independent() {
let header = vec![
"Notes".into(),
"Translation".into(),
"TYPE".into(),
"Word".into(),
"inflection".into(),
];
let cols = resolve_csv_columns(&header).unwrap();
assert_eq!(cols.word, 3);
assert_eq!(cols.pos, 2);
assert_eq!(cols.translation, 1);
assert_eq!(cols.notes, Some(0));
assert_eq!(cols.inflection, Some(4));
}
#[test]
fn imported_entry_body_skips_empty_optionals() {
let entry = ImportEntry {
word: "atal".into(),
pos: "noun".into(),
translation: "river".into(),
..Default::default()
};
let body = build_imported_entry_body(&entry);
assert!(body.contains("word:"));
assert!(body.contains("type:"));
assert!(body.contains("translation:"));
assert!(!body.contains("example:"));
assert!(!body.contains("pronunciation:"));
assert!(!body.contains("notes:"));
assert!(!body.contains("inflection:"));
}
#[test]
fn imported_entry_body_emits_inflection_and_examples() {
let mut entry = ImportEntry {
word: "atal".into(),
pos: "noun".into(),
translation: "river".into(),
..Default::default()
};
entry.inflection.insert("plural".into(), "atatal".into());
entry.inflection.insert("genitive".into(), "atale".into());
entry.examples = vec!["Atal sora-mi.".into(), "Atal kima.".into()];
let body = build_imported_entry_body(&entry);
assert!(body.contains("inflection: {"));
assert!(body.contains("plural: \"atatal\""));
assert!(body.contains("genitive: \"atale\""));
assert!(body.contains("examples: ["));
assert!(body.contains("\"Atal sora-mi.\""));
let parsed: serde_hjson::Value =
serde_hjson::from_str(&body).expect("imported entry body must parse");
let _ = parsed;
}
#[test]
fn meta_overview_seed_parses() {
let _: serde_hjson::Value = serde_hjson::from_str(META_OVERVIEW_BODY)
.expect("META_OVERVIEW_BODY must be valid HJSON");
}
#[test]
fn dictionary_entry_seed_parses() {
let body = seed_dictionary_entry_body(
"aiya", "interjection", "hail", Some("Aiya!"),
);
let _: serde_hjson::Value = serde_hjson::from_str(&body)
.expect("dictionary entry seed must be valid HJSON");
}
#[test]
fn grammar_rule_seed_parses() {
let _: serde_hjson::Value = serde_hjson::from_str(GRAMMAR_RULE_SEED_BODY)
.expect("GRAMMAR_RULE_SEED_BODY must be valid HJSON");
}
#[test]
fn phonology_rule_seed_parses() {
let _: serde_hjson::Value = serde_hjson::from_str(PHONOLOGY_RULE_SEED_BODY)
.expect("PHONOLOGY_RULE_SEED_BODY must be valid HJSON");
}
#[test]
fn meta_overview_body_contains_alphabet_field() {
assert!(META_OVERVIEW_BODY.contains("alphabet:"));
assert!(META_OVERVIEW_BODY.contains("language_kind:"));
}
#[test]
fn alphabet_bucket_uppercases_first_char() {
assert_eq!(alphabet_bucket("aiya"), Some("A".to_string()));
assert_eq!(alphabet_bucket("Bran"), Some("B".to_string()));
assert_eq!(alphabet_bucket(" zeta"), Some("Z".to_string()));
}
#[test]
fn alphabet_bucket_handles_non_latin() {
assert_eq!(alphabet_bucket("ярости"), Some("Я".to_string()));
assert_eq!(alphabet_bucket("αυτός"), Some("Α".to_string()));
}
#[test]
fn alphabet_bucket_returns_none_for_whitespace() {
assert_eq!(alphabet_bucket(""), None);
assert_eq!(alphabet_bucket(" "), None);
}
#[test]
fn seed_dictionary_entry_includes_core_fields() {
let body = seed_dictionary_entry_body(
"aiya",
"interjection",
"hail",
Some("Aiya Eärendil!"),
);
assert!(body.contains("word:"));
assert!(body.contains("type:"));
assert!(body.contains("translation:"));
assert!(body.contains("example:"));
assert!(body.contains("aiya"));
assert!(body.contains("interjection"));
assert!(body.contains("hail"));
assert!(body.contains("Aiya Eärendil!"));
}
#[test]
fn csv_field_quotes_when_needed() {
assert_eq!(csv_field("aiya"), "aiya");
assert_eq!(csv_field("hail, friend"), "\"hail, friend\"");
assert_eq!(csv_field("he said \"hi\""), "\"he said \"\"hi\"\"\"");
assert_eq!(csv_field("line1\nline2"), "\"line1\nline2\"");
}
#[test]
fn typst_escape_handles_markup_chars() {
assert_eq!(typst_escape("plain"), "plain");
assert_eq!(typst_escape("a*b"), "a\\*b");
assert_eq!(typst_escape("[bracket]"), "\\[bracket\\]");
assert_eq!(typst_escape("#hash"), "\\#hash");
assert_eq!(typst_escape("with_under"), "with\\_under");
assert_eq!(typst_escape("ñ'olor"), "ñ'olor");
}
#[test]
fn render_anki_emits_header_row() {
let out = render_anki(&[]).unwrap();
let s = String::from_utf8(out).unwrap();
assert!(s.starts_with("word,translation,type,example,inflection\n"));
}
#[test]
fn render_anki_renders_entry_row() {
let mut entry = crate::language_entry::DictionaryEntry::default();
entry.word = "aiya".into();
entry.translation = "hail".into();
entry.pos = "interjection".into();
entry.example = "Aiya Eärendil!".into();
let out = render_anki(&[("aiya".into(), entry)]).unwrap();
let s = String::from_utf8(out).unwrap();
let lines: Vec<&str> = s.lines().collect();
assert_eq!(lines.len(), 2, "got: {s:?}");
assert!(lines[1].contains("aiya"));
assert!(lines[1].contains("hail"));
assert!(lines[1].contains("interjection"));
assert!(lines[1].contains("Aiya Eärendil!"));
}
#[test]
fn render_csv_emits_header_row() {
let out = render_csv(&[]);
let s = String::from_utf8(out).unwrap();
assert!(s.starts_with("word,type,translation,example,inflection\n"));
}
#[test]
fn render_csv_round_trip_columns_match_in_memory_struct() {
let mut entry = crate::language_entry::DictionaryEntry::default();
entry.word = "stelle".into();
entry.pos = "noun".into();
entry.translation = "star".into();
entry.example = "Le stelle brillano.".into();
entry.inflection.insert("plural".into(), "stelle".into());
entry
.inflection
.insert("singular".into(), "stella".into());
let out = render_csv(&[("stelle".into(), entry)]);
let s = String::from_utf8(out).unwrap();
let lines: Vec<&str> = s.lines().collect();
assert_eq!(lines.len(), 2);
assert!(
lines[1].contains("plural=stelle;singular=stella"),
"unexpected inflection serialisation: {}",
lines[1]
);
assert!(lines[1].contains("stelle,noun,star,Le stelle brillano."));
}
#[test]
fn render_csv_quotes_fields_with_commas_and_quotes() {
let mut entry = crate::language_entry::DictionaryEntry::default();
entry.word = "salve".into();
entry.pos = "interjection".into();
entry.translation = "hello, hi".into(); entry.example = "She said \"salve\".".into(); let out = render_csv(&[("salve".into(), entry)]);
let s = String::from_utf8(out).unwrap();
let lines: Vec<&str> = s.lines().collect();
assert!(
lines[1].contains("\"hello, hi\""),
"comma field should be quoted: {}",
lines[1]
);
assert!(
lines[1].contains("\"She said \"\"salve\"\".\""),
"quote field should escape inner quotes: {}",
lines[1]
);
}
#[test]
fn extract_hjson_finds_simple_string_field() {
let body = "{\n rule: \"i becomes y before vowel\"\n category: \"phonology\"\n}";
assert_eq!(
extract_hjson_string_field(body, "rule"),
Some("i becomes y before vowel".into())
);
assert_eq!(
extract_hjson_string_field(body, "category"),
Some("phonology".into())
);
assert_eq!(extract_hjson_string_field(body, "missing"), None);
}
#[test]
fn extract_hjson_skips_empty_fields() {
let body = "{\n rule: \"\"\n category: \"grammar\"\n}";
assert_eq!(extract_hjson_string_field(body, "rule"), None);
assert_eq!(
extract_hjson_string_field(body, "category"),
Some("grammar".into())
);
}
#[test]
fn extract_hjson_examples_inline_array() {
let body = "{\n examples: [\"one\", \"two\", \"three\"]\n}";
let got = extract_hjson_examples(body).unwrap();
assert_eq!(got, vec!["one", "two", "three"]);
}
#[test]
fn extract_hjson_examples_block_form() {
let body = "{\n examples: [\n \"alpha\",\n \"beta\"\n ]\n}";
let got = extract_hjson_examples(body).unwrap();
assert_eq!(got, vec!["alpha", "beta"]);
}
#[test]
fn rule_template_includes_id_and_grammar_examples() {
let t = rule_template("noun-cases", "grammar");
assert!(t.contains("rule_id: \"noun-cases\""));
assert!(t.contains("invented language"));
}
#[test]
fn rule_template_uses_phonology_examples_when_category_phonology() {
let t = rule_template("vowel-shift", "phonology");
assert!(t.contains("rule_id: \"vowel-shift\""));
assert!(t.contains("phoneme example"));
}
#[test]
fn render_dictionary_twocol_groups_by_alphabet() {
let mut a_entry = crate::language_entry::DictionaryEntry::default();
a_entry.word = "aiya".into();
a_entry.pos = "interj.".into();
a_entry.translation = "hail".into();
let mut b_entry = crate::language_entry::DictionaryEntry::default();
b_entry.word = "bara".into();
b_entry.pos = "noun".into();
b_entry.translation = "fire".into();
let out = render_dictionary_twocol(
"Quenya",
None,
&[("aiya".into(), a_entry), ("bara".into(), b_entry)],
);
let s = String::from_utf8(out).unwrap();
assert!(s.contains("— A —"), "got: {s}");
assert!(s.contains("— B —"), "got: {s}");
assert!(s.contains("#set page(paper: \"a4\", columns: 2)"));
assert!(s.contains("*aiya*"));
assert!(s.contains("*bara*"));
assert!(s.contains("Quenya dictionary"));
}
#[test]
fn escape_hjson_handles_quotes_and_backslashes() {
assert_eq!(escape_hjson(r#"he said "hi""#), r#"he said \"hi\""#);
assert_eq!(escape_hjson(r"a\b"), r"a\\b");
}
}