use super::super::roles::{
ROLE_DEFINITION_COMMAND, ROLE_IMPLEMENTATION_LANGUAGE_NOUN,
ROLE_IMPLEMENTATION_LANGUAGE_PREPOSITION, ROLE_INTERROGATIVE_OPENER,
ROLE_LINKS_NOTATION_FORMAT, ROLE_MECHANISM_INQUIRY, ROLE_PROCEDURAL_REQUEST,
ROLE_PROGRAM_ARTIFACT, ROLE_PROGRAM_MODIFICATION, ROLE_TRANSLATION_ACTION,
};
use super::*;
const SUPPORTED_LANGUAGES: [&str; 4] = ["en", "ru", "hi", "zh"];
#[test]
fn lexicon_is_non_empty_and_well_formed() {
let lex = lexicon();
assert!(lex.meanings.len() >= 10, "expected a real lexicon");
for meaning in &lex.meanings {
assert!(!meaning.slug.is_empty(), "meaning needs a slug");
assert!(
!meaning.gloss.trim().is_empty(),
"{} needs a conceptual gloss",
meaning.slug
);
assert!(
!meaning.wiktionary.trim().is_empty(),
"{} must be grounded in real lexical data (wiktionary)",
meaning.slug
);
assert!(
!meaning.roles.is_empty(),
"{} must declare at least one semantic role",
meaning.slug
);
}
}
#[test]
fn every_meaning_is_self_describing() {
let lex = lexicon();
let slugs: BTreeSet<&str> = lex.meanings.iter().map(|m| m.slug.as_str()).collect();
for meaning in &lex.meanings {
assert!(
!meaning.defined_by.is_empty(),
"{} must be defined by other meanings",
meaning.slug
);
for target in &meaning.defined_by {
assert!(
slugs.contains(target.as_str()),
"{} is defined_by `{target}`, which is not itself a defined meaning",
meaning.slug
);
}
}
}
#[test]
fn every_meaning_covers_all_supported_languages() {
let lex = lexicon();
for meaning in &lex.meanings {
let languages = meaning.languages();
for language in SUPPORTED_LANGUAGES {
assert!(
languages.contains(language),
"{} is missing a `{language}` lexeme — meanings must translate to every supported language",
meaning.slug
);
}
for lexeme in &meaning.lexemes {
assert!(
!lexeme.words.is_empty(),
"{} / {} lexeme must list at least one surface word",
meaning.slug,
lexeme.language
);
}
}
}
#[test]
fn word_forms_round_trip_and_describe_word_resolves() {
let lex = lexicon();
let meaning = &lex.meanings[0];
let from_words: Vec<&str> = meaning.words().collect();
let from_forms: Vec<&str> = meaning.word_forms().map(|f| f.text.as_str()).collect();
assert_eq!(
from_words, from_forms,
"words() and word_forms() must enumerate the same surfaces in order"
);
let first = from_words[0];
assert!(
meaning.describe_word(first).is_some(),
"describe_word must resolve a recorded surface form"
);
assert!(
meaning
.describe_word("\u{0}-definitely-not-a-recorded-surface")
.is_none(),
"describe_word must return None for an unknown surface"
);
}
#[test]
fn descriptions_are_parsed_from_the_seed() {
let lex = lexicon();
let described = lex
.meanings
.iter()
.flat_map(Meaning::word_forms)
.filter(|f| !f.description.trim().is_empty())
.count();
assert!(
described > 0,
"the parser must read `description` children off the seed"
);
}
#[test]
fn every_word_form_is_described() {
let lex = lexicon();
let mut missing: Vec<String> = Vec::new();
for meaning in &lex.meanings {
for lexeme in &meaning.lexemes {
for form in &lexeme.words {
if form.description.trim().is_empty() {
missing.push(format!(
"{} / {} / {}",
meaning.slug, lexeme.language, form.text
));
}
}
}
}
assert!(
missing.is_empty(),
"{} word form(s) lack a description, e.g. {}",
missing.len(),
missing
.iter()
.take(5)
.cloned()
.collect::<Vec<_>>()
.join(" | ")
);
}
#[test]
fn program_roles_are_populated() {
let lex = lexicon();
assert!(
!lex.words_for_role(ROLE_PROGRAM_ARTIFACT).is_empty(),
"program_artifact role must have surface words"
);
assert!(
!lex.words_for_role(ROLE_PROGRAM_MODIFICATION).is_empty(),
"program_modification role must have surface words"
);
}
#[test]
fn word_form_slot_is_derived_from_the_ellipsis_marker() {
let form = |text: &str| WordForm {
text: text.to_string(),
description: String::new(),
action: String::new(),
};
let bare = form("how it works");
assert_eq!(bare.slot(), Slot::Bare);
assert_eq!(bare.before_slot(), "how it works");
assert_eq!(bare.after_slot(), "");
let prefix = form("how does …");
assert_eq!(prefix.slot(), Slot::Prefix);
assert_eq!(prefix.before_slot(), "how does ");
assert_eq!(prefix.after_slot(), "");
let suffix = form("… как работает");
assert_eq!(suffix.slot(), Slot::Suffix);
assert_eq!(suffix.before_slot(), "");
assert_eq!(suffix.after_slot(), " как работает");
let circumfix = form("how … works");
assert_eq!(circumfix.slot(), Slot::Circumfix);
assert_eq!(circumfix.before_slot(), "how ");
assert_eq!(circumfix.after_slot(), " works");
}
#[test]
fn how_cluster_roles_populate_and_classify() {
let lex = lexicon();
let mech = lex.role_word_forms(ROLE_MECHANISM_INQUIRY);
assert!(
!mech.is_empty(),
"mechanism_inquiry must contribute surface forms"
);
assert!(mech.iter().any(|f| f.slot() == Slot::Bare));
assert!(mech.iter().any(|f| f.slot() == Slot::Prefix));
assert!(mech.iter().any(|f| f.slot() == Slot::Suffix));
assert!(mech.iter().any(|f| f.slot() == Slot::Circumfix));
assert!(
mech.iter()
.any(|f| f.slot() == Slot::Bare && f.text == "how it works"),
"the bare English how-it-works phrase must be present"
);
assert!(
mech.iter()
.any(|f| f.slot() == Slot::Prefix && f.before_slot() == "how does "),
"the `how does …` prefix surface must be present"
);
assert!(
mech.iter().any(|f| f.slot() == Slot::Circumfix
&& f.before_slot() == "how "
&& f.after_slot() == " works"),
"the `how … works` circumfix surface must be present"
);
assert!(
mech.iter()
.any(|f| f.slot() == Slot::Suffix && f.after_slot() == " как работает"),
"the `… как работает` suffix surface must be present"
);
let proc = lex.role_word_forms(ROLE_PROCEDURAL_REQUEST);
assert!(
!proc.is_empty(),
"procedural_request must contribute surface forms"
);
assert!(
proc.iter().all(|f| f.slot() == Slot::Prefix),
"every procedural surface positions the task after the slot"
);
assert!(
proc.iter()
.any(|f| f.before_slot() == "how to do " && f.action == "do"),
"`how to do …` must name the do action explicitly"
);
assert!(
proc.iter()
.any(|f| f.before_slot() == "how to " && f.action.is_empty()),
"`how to …` must leave the action to the task"
);
assert!(
proc.iter()
.any(|f| f.before_slot() == "如何做 " && f.action == "do"),
"the Chinese `如何做 …` surface must carry its trailing space and do action"
);
}
#[test]
fn mentions_role_honours_cjk_and_token_boundaries() {
let lex = lexicon();
assert!(lex.mentions_role(ROLE_PROGRAM_MODIFICATION, "отмени сортировку"));
assert!(!lex.mentions_role(ROLE_PROGRAM_MODIFICATION, "отменительный разговор"));
assert!(lex.mentions_role(ROLE_PROGRAM_MODIFICATION, "取消排序"));
assert!(lex.mentions_role(ROLE_PROGRAM_ARTIFACT, "取消排序"));
}
#[test]
fn mentions_role_raw_matches_inflected_stems() {
let lex = lexicon();
assert!(lex.mentions_role_raw(ROLE_PROGRAM_MODIFICATION, "отменительный разговор"));
assert!(!lex.mentions_role(ROLE_PROGRAM_MODIFICATION, "отменительный разговор"));
assert!(lex.mentions_role_raw(ROLE_PROGRAM_MODIFICATION, "отмени сортировку"));
assert!(!lex.mentions_role_raw(ROLE_PROGRAM_MODIFICATION, "привет мир"));
}
#[test]
fn words_for_role_partition_by_language() {
let lex = lexicon();
let head_initial = lex.words_for_role_in_languages(ROLE_TRANSLATION_ACTION, &["en", "ru"]);
assert!(head_initial.iter().any(|w| w == "translate"));
assert!(head_initial.iter().any(|w| w == "переведи"));
assert!(head_initial.iter().any(|w| w == "опиши"));
assert!(!head_initial.iter().any(|w| w == "翻译"));
let head_final = lex.words_for_role_in_languages(ROLE_TRANSLATION_ACTION, &["hi", "zh"]);
assert!(head_final.iter().any(|w| w == "翻译"));
assert!(head_final.iter().any(|w| w == "अनुवाद"));
assert!(!head_final.iter().any(|w| w == "translate"));
}
#[test]
fn implementation_language_marker_roles_expose_head_initial_surfaces() {
let lex = lexicon();
let prepositions =
lex.words_for_role_in_languages(ROLE_IMPLEMENTATION_LANGUAGE_PREPOSITION, &["en", "ru"]);
assert!(
prepositions.iter().any(|w| w == "in"),
"English target preposition surface missing, got: {prepositions:?}"
);
assert!(
prepositions.iter().any(|w| w == "на"),
"Russian target preposition surface missing, got: {prepositions:?}"
);
let nouns = lex.words_for_role_in_languages(ROLE_IMPLEMENTATION_LANGUAGE_NOUN, &["en", "ru"]);
assert!(
nouns.iter().any(|w| w == "language"),
"English language-noun surface missing, got: {nouns:?}"
);
assert!(
nouns.iter().any(|w| w == "языке"),
"Russian language-noun surface missing, got: {nouns:?}"
);
for role in [
ROLE_IMPLEMENTATION_LANGUAGE_PREPOSITION,
ROLE_IMPLEMENTATION_LANGUAGE_NOUN,
] {
let mut count = 0;
for meaning in lex.meanings_with_role(role) {
count += 1;
assert!(
lex.reaches_root(&meaning.slug),
"meaning {} (role {role}) must reduce to the link root",
meaning.slug
);
}
assert_eq!(count, 1, "exactly one meaning should carry role {role}");
}
}
#[test]
fn define_in_links_roles_expose_the_scanned_surfaces() {
let lex = lexicon();
let mut verbs = lex.words_for_role_in_languages(ROLE_DEFINITION_COMMAND, &["en"]);
verbs.sort();
assert_eq!(
verbs,
vec!["define".to_owned()],
"English define-command surface set drifted from the original gate"
);
let mut markers = lex.words_for_role_in_languages(ROLE_LINKS_NOTATION_FORMAT, &["en", "ru"]);
markers.sort();
assert_eq!(
markers,
vec!["links notation".to_owned(), "в links".to_owned()],
"English/Russian links-notation marker set drifted from the original gate"
);
for role in [ROLE_DEFINITION_COMMAND, ROLE_LINKS_NOTATION_FORMAT] {
let mut count = 0;
for meaning in lex.meanings_with_role(role) {
count += 1;
assert!(
lex.reaches_root(&meaning.slug),
"meaning {} (role {role}) must reduce to the link root",
meaning.slug
);
}
assert_eq!(count, 1, "exactly one meaning should carry role {role}");
}
}
#[test]
fn interrogative_opener_role_exposes_head_initial_question_words() {
let lex = lexicon();
let mut english = lex.words_for_role_in_languages(ROLE_INTERROGATIVE_OPENER, &["en"]);
english.sort();
assert_eq!(
english,
vec![
"how".to_owned(),
"what".to_owned(),
"when".to_owned(),
"where".to_owned(),
"which".to_owned(),
"who".to_owned(),
"why".to_owned(),
],
"English interrogative-opener set drifted from the original classifier list"
);
let mut russian = lex.words_for_role_in_languages(ROLE_INTERROGATIVE_OPENER, &["ru"]);
russian.sort();
assert_eq!(
russian,
vec![
"где".to_owned(),
"как".to_owned(),
"когда".to_owned(),
"кто".to_owned(),
"почему".to_owned(),
"что".to_owned(),
],
"Russian interrogative-opener set drifted from the original classifier list"
);
let head_initial = lex.words_for_role_in_languages(ROLE_INTERROGATIVE_OPENER, &["en", "ru"]);
for cjk in ["क्या", "什么", "कौन", "谁"] {
assert!(
!head_initial.iter().any(|w| w == cjk),
"head-final surface {cjk} leaked into the head-initial partition"
);
}
let mut count = 0;
for meaning in lex.meanings_with_role(ROLE_INTERROGATIVE_OPENER) {
count += 1;
assert!(
lex.reaches_root(&meaning.slug),
"meaning {} (role interrogative_opener) must reduce to the link root",
meaning.slug
);
}
assert_eq!(
count, 1,
"exactly one meaning should carry role interrogative_opener"
);
}
#[test]
fn first_role_language_reads_the_command_language() {
let lex = lexicon();
let priority = ["ru", "hi", "zh"];
assert_eq!(
lex.first_role_language(ROLE_TRANSLATION_ACTION, "переведи apple", &priority),
Some("ru")
);
assert_eq!(
lex.first_role_language(ROLE_TRANSLATION_ACTION, "apple का अनुवाद करो", &priority),
Some("hi")
);
assert_eq!(
lex.first_role_language(ROLE_TRANSLATION_ACTION, "把 apple 翻译成中文", &priority),
Some("zh")
);
assert_eq!(
lex.first_role_language(ROLE_TRANSLATION_ACTION, "what is apple", &priority),
None
);
}
#[test]
fn the_ontology_has_a_single_link_root() {
let lex = lexicon();
let roots: Vec<&Meaning> = lex.meanings_with_role(ROLE_ONTOLOGY_ROOT).collect();
assert_eq!(
roots.len(),
1,
"the merged ontology must have exactly one root, found {}",
roots.len()
);
let root = roots[0];
assert_eq!(root.slug, "link", "the ontology root must be `link`");
assert!(
root.defined_by.iter().any(|t| t == "link"),
"the root `link` must be defined by itself (self-rooted)"
);
let type_root = lex
.ontology_type_root()
.expect("a type-system sub-root (role ontology_type) must exist");
assert!(
lex.reaches_root(&type_root.slug),
"the type sub-root must reduce to the link root"
);
let categories: Vec<&Meaning> = lex.ontology_categories().collect();
assert!(
categories.len() >= 2,
"the ontology must define top-level categories under the root, found {}",
categories.len()
);
for category in categories {
assert!(
lex.reaches_root(&category.slug),
"ontology category {} must reduce to the link root",
category.slug
);
}
}
#[test]
fn every_meaning_reaches_the_link_root() {
let lex = lexicon();
assert!(lex.ontology_root().is_some(), "an ontology root must exist");
for meaning in &lex.meanings {
assert!(
lex.reaches_root(&meaning.slug),
"{} does not reach the `link` ontology root via defined_by",
meaning.slug
);
}
}