use std::borrow::Cow;
use serde::{Deserialize, Serialize};
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
#[non_exhaustive]
pub enum LanguageQuery {
IdentifyLanguage { text: String },
ExplainPhonology { language: String, topic: String },
ExamplesForPhoneme {
language: String,
ipa: String,
count: u8,
},
ComparativeAnalysis {
languages: Vec<String>,
aspect: ComparisonAspect,
},
EtymologyLookup {
word: String,
source_language: String,
},
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
#[non_exhaustive]
pub enum ComparisonAspect {
Phonology,
Grammar,
Script,
Lexicon,
}
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct QueryResponse {
pub query_id: Option<Cow<'static, str>>,
pub source: ResponseSource,
pub content: String,
pub confidence: Option<f64>,
pub structured_data: Option<serde_json::Value>,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
#[non_exhaustive]
pub enum ResponseSource {
VarnaData,
LlmGenerated,
Hybrid,
}
#[must_use]
pub fn identify(text: impl Into<String>) -> LanguageQuery {
LanguageQuery::IdentifyLanguage { text: text.into() }
}
#[must_use]
pub fn explain_phonology(language: impl Into<String>, topic: impl Into<String>) -> LanguageQuery {
LanguageQuery::ExplainPhonology {
language: language.into(),
topic: topic.into(),
}
}
#[must_use]
pub fn compare(languages: Vec<String>, aspect: ComparisonAspect) -> LanguageQuery {
LanguageQuery::ComparativeAnalysis { languages, aspect }
}
#[must_use]
pub fn answer_from_data(query: &LanguageQuery) -> Option<QueryResponse> {
tracing::trace!("attempting data-only query resolution");
match query {
LanguageQuery::ExamplesForPhoneme { language, ipa, .. } => {
let inv = crate::registry::phonemes(language)?;
if inv.has(ipa) {
Some(QueryResponse {
query_id: None,
source: ResponseSource::VarnaData,
content: format!(
"/{ipa}/ is present in {lang} ({c}C + {v}V inventory)",
lang = inv.language_name,
c = inv.consonant_count(),
v = inv.vowel_count(),
),
confidence: Some(1.0),
structured_data: serde_json::to_value(inv.find(ipa)).ok(),
})
} else {
Some(QueryResponse {
query_id: None,
source: ResponseSource::VarnaData,
content: format!(
"/{ipa}/ is not in the {lang} phoneme inventory",
lang = inv.language_name
),
confidence: Some(1.0),
structured_data: None,
})
}
}
LanguageQuery::ComparativeAnalysis {
languages,
aspect: ComparisonAspect::Phonology,
} if languages.len() == 2 => {
let inv1 = crate::registry::phonemes(&languages[0])?;
let inv2 = crate::registry::phonemes(&languages[1])?;
Some(QueryResponse {
query_id: None,
source: ResponseSource::VarnaData,
content: format!(
"{} has {}C+{}V, {} has {}C+{}V",
inv1.language_name,
inv1.consonant_count(),
inv1.vowel_count(),
inv2.language_name,
inv2.consonant_count(),
inv2.vowel_count(),
),
confidence: Some(1.0),
structured_data: None,
})
}
_ => None, }
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_identify_query() {
let q = identify("hello world");
assert!(matches!(q, LanguageQuery::IdentifyLanguage { .. }));
}
#[test]
fn test_explain_query() {
let q = explain_phonology("en", "vowel reduction");
assert!(matches!(q, LanguageQuery::ExplainPhonology { .. }));
}
#[test]
fn test_compare_query() {
let q = compare(vec!["en".into(), "de".into()], ComparisonAspect::Phonology);
assert!(matches!(q, LanguageQuery::ComparativeAnalysis { .. }));
}
#[test]
fn test_answer_from_data_phoneme_exists() {
let q = LanguageQuery::ExamplesForPhoneme {
language: "en".into(),
ipa: "θ".into(),
count: 3,
};
let resp = answer_from_data(&q).unwrap();
assert_eq!(resp.source, ResponseSource::VarnaData);
assert!(resp.content.contains("present"));
assert_eq!(resp.confidence, Some(1.0));
}
#[test]
fn test_answer_from_data_phoneme_missing() {
let q = LanguageQuery::ExamplesForPhoneme {
language: "en".into(),
ipa: "ʀ".into(),
count: 3,
};
let resp = answer_from_data(&q).unwrap();
assert!(resp.content.contains("not in"));
}
#[test]
fn test_answer_from_data_comparison() {
let q = compare(vec!["en".into(), "ja".into()], ComparisonAspect::Phonology);
let resp = answer_from_data(&q).unwrap();
assert!(resp.content.contains("English"));
assert!(resp.content.contains("Japanese"));
}
#[test]
fn test_answer_from_data_needs_llm() {
let q = identify("some text");
assert!(answer_from_data(&q).is_none());
}
#[test]
fn test_query_serde_roundtrip() {
let q = explain_phonology("ru", "palatalization");
let json = serde_json::to_string(&q).unwrap();
let back: LanguageQuery = serde_json::from_str(&json).unwrap();
assert_eq!(q, back);
}
#[test]
fn test_response_serde_roundtrip() {
let resp = QueryResponse {
query_id: Some(Cow::Borrowed("test-1")),
source: ResponseSource::VarnaData,
content: "test response".into(),
confidence: Some(0.95),
structured_data: None,
};
let json = serde_json::to_string(&resp).unwrap();
let back: QueryResponse = serde_json::from_str(&json).unwrap();
assert_eq!(resp.source, back.source);
assert_eq!(resp.content, back.content);
}
}