use crate::state::*;
use axum::http::StatusCode;
use std::collections::HashMap;
pub fn build_intent_labels(router: µresolve::Resolver) -> String {
let mut ids = router.intent_ids();
ids.sort();
ids.iter()
.map(|id| {
let desc = router.intent(id).map(|i| i.description).unwrap_or_default();
if desc.is_empty() {
format!("- {} [NO DESCRIPTION — cannot classify reliably]", id)
} else {
format!("- {} ({})", id, desc)
}
})
.collect::<Vec<_>>()
.join("\n")
}
fn intent_phrases_context(
router: µresolve::Resolver,
intent_ids: &[String],
cap: usize,
) -> String {
intent_ids
.iter()
.map(|id| {
let info = router.intent(id);
let desc = info.as_ref().map(|i| i.description.as_str()).unwrap_or("");
let phrases = router.training(id).unwrap_or_default();
let shown: Vec<&String> = phrases.iter().rev().take(cap).collect();
let desc_str = if desc.is_empty() {
String::new()
} else {
format!(" ({})", desc)
};
format!(" {}{}: {:?}", id, desc_str, shown)
})
.collect::<Vec<_>>()
.join("\n")
}
pub fn extract_json(text: &str) -> &str {
let trimmed = text.trim();
if let Some(fence_start) = trimmed.find("```") {
let after_fence = &trimmed[fence_start + 3..];
let content_start = after_fence.find('\n').map(|i| i + 1).unwrap_or(0);
let content = &after_fence[content_start..];
let content = if let Some(end) = content.find("```") {
content[..end].trim()
} else {
content.trim()
};
let arr = content.find('[');
let obj = content.find('{');
let use_array = match (arr, obj) {
(Some(a), Some(o)) => a <= o,
(Some(_), None) => true,
_ => false,
};
if use_array {
if let (Some(s), Some(e)) = (content.find('['), content.rfind(']')) {
return &content[s..=e];
}
}
if let (Some(s), Some(e)) = (content.find('{'), content.rfind('}')) {
return &content[s..=e];
}
}
let last_array_end = trimmed.rfind(']');
let last_object_end = trimmed.rfind('}');
match (last_array_end, last_object_end) {
(Some(ae), Some(oe)) if ae > oe => {
if let (Some(s), Some(e)) = (trimmed.find('['), trimmed.rfind(']')) {
return &trimmed[s..=e];
}
}
(None, Some(_)) | (Some(_), Some(_)) => {
if let (Some(s), Some(e)) = (trimmed.find('{'), trimmed.rfind('}')) {
return &trimmed[s..=e];
}
}
(Some(_), None) => {
if let (Some(s), Some(e)) = (trimmed.find('['), trimmed.rfind(']')) {
return &trimmed[s..=e];
}
}
_ => {}
}
trimmed
}
pub async fn call_llm(
state: &ServerState,
prompt: &str,
max_tokens: u32,
) -> Result<String, (StatusCode, String)> {
let provider = std::env::var("LLM_PROVIDER").unwrap_or_else(|_| "anthropic".to_string());
let model = std::env::var("LLM_MODEL").unwrap_or_else(|_| match provider.as_str() {
"gemini" => "gemini-2.5-flash".to_string(),
_ => "claude-haiku-4-5-20251001".to_string(),
});
call_llm_with_model(state, prompt, max_tokens, &model).await
}
async fn call_llm_with_model(
state: &ServerState,
prompt: &str,
max_tokens: u32,
model: &str,
) -> Result<String, (StatusCode, String)> {
match call_llm_once(state, prompt, max_tokens, model).await {
Ok(text) => Ok(text),
Err((status, msg)) => {
let is_rate_limit = status == StatusCode::TOO_MANY_REQUESTS
|| msg.contains("429")
|| msg.contains("rate")
|| msg.contains("quota")
|| status == StatusCode::SERVICE_UNAVAILABLE
|| msg.contains("503")
|| msg.contains("overloaded");
if is_rate_limit {
eprintln!("[llm] rate limited, waiting 3s then retrying once");
tokio::time::sleep(tokio::time::Duration::from_secs(3)).await;
call_llm_once(state, prompt, max_tokens, model).await
} else {
Err((status, msg))
}
}
}
}
async fn call_llm_once(
state: &ServerState,
prompt: &str,
max_tokens: u32,
model: &str,
) -> Result<String, (StatusCode, String)> {
let key = state.llm_key.as_ref().ok_or_else(|| {
(
StatusCode::SERVICE_UNAVAILABLE,
"LLM_API_KEY not set. Add it to .env file.".to_string(),
)
})?;
let provider = std::env::var("LLM_PROVIDER").unwrap_or_else(|_| "anthropic".to_string());
let effective_max = if provider == "gemini" {
max_tokens.max(512)
} else {
max_tokens
};
let resp = match provider.as_str() {
"gemini" => {
let url = format!(
"https://generativelanguage.googleapis.com/v1beta/models/{}:generateContent?key={}",
model, key
);
let body = serde_json::json!({
"contents": [{"parts": [{"text": prompt}]}],
"generationConfig": {
"maxOutputTokens": effective_max,
"temperature": 0.3,
"thinkingConfig": { "thinkingBudget": 0 }
}
});
state
.http
.post(&url)
.header("content-type", "application/json")
.json(&body)
.send()
.await
}
"anthropic" => {
let url = std::env::var("LLM_API_URL")
.unwrap_or_else(|_| "https://api.anthropic.com/v1/messages".to_string());
let body = serde_json::json!({
"model": model,
"max_tokens": max_tokens,
"messages": [{"role": "user", "content": prompt}],
});
state
.http
.post(&url)
.header("x-api-key", key)
.header("anthropic-version", "2023-06-01")
.header("content-type", "application/json")
.json(&body)
.send()
.await
}
_ => {
let url = std::env::var("LLM_API_URL")
.unwrap_or_else(|_| "https://api.openai.com/v1/chat/completions".to_string());
let body = serde_json::json!({
"model": model,
"max_tokens": max_tokens,
"messages": [{"role": "user", "content": prompt}],
});
state
.http
.post(&url)
.header("Authorization", format!("Bearer {}", key))
.header("content-type", "application/json")
.json(&body)
.send()
.await
}
}
.map_err(|e| {
(
StatusCode::BAD_GATEWAY,
format!("LLM request failed: {}", e),
)
})?;
if !resp.status().is_success() {
let status = resp.status().as_u16();
let text = resp.text().await.unwrap_or_default();
return Err((
StatusCode::BAD_GATEWAY,
format!("LLM API {}: {}", status, text),
));
}
let data: serde_json::Value = resp
.json()
.await
.map_err(|e| (StatusCode::BAD_GATEWAY, format!("Bad response: {}", e)))?;
match provider.as_str() {
"gemini" => {
data["candidates"][0]["content"]["parts"][0]["text"]
.as_str()
.map(|s| s.trim().to_string())
.ok_or_else(|| {
let raw = serde_json::to_string(&data).unwrap_or_default();
(
StatusCode::BAD_GATEWAY,
format!("Invalid JSON from LLM: {}", &raw[..raw.len().min(200)]),
)
})
}
"anthropic" => data["content"][0]["text"]
.as_str()
.map(|s| s.trim().to_string())
.ok_or_else(|| (StatusCode::BAD_GATEWAY, "No text in response".to_string())),
_ => data["choices"][0]["message"]["content"]
.as_str()
.map(|s| s.trim().to_string())
.ok_or_else(|| (StatusCode::BAD_GATEWAY, "No text in response".to_string())),
}
}
#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
pub struct PhrasePipelineResult {
pub added: Vec<(String, String)>,
pub blocked: Vec<(String, String, String)>,
pub initially_blocked: usize,
pub recovered_by_retry: usize,
pub suggestions: Vec<(String, String)>, }
pub async fn phrase_pipeline(
state: &AppState,
app_id: &str,
phrases_by_intent: &HashMap<String, Vec<String>>,
_auto_apply_retry: bool,
lang: &str,
) -> PhrasePipelineResult {
let mut added = Vec::new();
let blocked_final = Vec::new();
if let Some(h) = state.engine.try_namespace(app_id) {
for (intent_id, phrases) in phrases_by_intent {
for phrase in phrases {
let s = phrase.trim().to_string();
if s.is_empty() {
continue;
}
let result = h.with_resolver_mut(|r| r.add_phrase_checked(intent_id, &s, lang));
if result.added {
added.push((intent_id.clone(), s));
}
}
}
if !added.is_empty() {
maybe_commit(state, app_id);
}
}
PhrasePipelineResult {
added,
blocked: blocked_final,
initially_blocked: 0,
recovered_by_retry: 0,
suggestions: vec![],
}
}
#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
pub struct BlockedPhrase {
pub intent: String,
pub phrase: String,
pub reason: String,
}
#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
pub struct FullReviewResult {
pub correct_intents: Vec<String>,
pub wrong_detections: Vec<String>,
pub missed_intents: Vec<String>,
pub languages: Vec<String>,
pub detection_perfect: bool,
pub phrases_to_add: HashMap<String, Vec<String>>,
pub phrases_blocked: Vec<BlockedPhrase>,
pub summary: String,
#[serde(default)]
pub spans_to_learn: Vec<(String, String)>, }
pub async fn full_review(
state: &AppState,
app_id: &str,
query: &str,
detected: &[String],
ground_truth: Option<&[String]>,
) -> Result<FullReviewResult, String> {
if let Some(gt) = ground_truth {
use std::collections::HashSet;
let gt_set: HashSet<&str> = gt.iter().map(|s| s.as_str()).collect();
let det_set: HashSet<&str> = detected.iter().map(|s| s.as_str()).collect();
let correct_intents: Vec<String> = detected
.iter()
.filter(|s| gt_set.contains(s.as_str()))
.cloned()
.collect();
let wrong_detections: Vec<String> = detected
.iter()
.filter(|s| !gt_set.contains(s.as_str()))
.cloned()
.collect();
let missed_intents: Vec<String> = gt
.iter()
.filter(|s| !det_set.contains(s.as_str()))
.cloned()
.collect();
eprintln!("[full_review] ground_truth provided — skipping Turn 1. correct={:?} wrong={:?} missed={:?}",
correct_intents, wrong_detections, missed_intents);
if wrong_detections.is_empty() && missed_intents.is_empty() {
eprintln!("[full_review] detection perfect");
return Ok(FullReviewResult {
correct_intents,
wrong_detections,
missed_intents,
languages: vec!["en".to_string()],
detection_perfect: true,
phrases_to_add: HashMap::new(),
phrases_blocked: Vec::new(),
summary: "Detection correct, no changes needed.".to_string(),
spans_to_learn: vec![],
});
}
return full_review_from_sets(
state,
app_id,
query,
correct_intents,
wrong_detections,
missed_intents,
vec!["en".to_string()],
)
.await;
}
let skip_threshold = state.ui_settings.read().unwrap().review_skip_threshold;
if skip_threshold > 0.0 && !detected.is_empty() {
let top_score = state
.engine
.try_namespace(app_id)
.map(|h| {
h.with_resolver(|router| {
let pre = router.l1().preprocess(query);
let (all_scores, _) =
router
.l2()
.score_multi_normalized(&pre.expanded, 0.0, 100.0);
detected
.iter()
.filter_map(|id| {
all_scores.iter().find(|(s, _)| s == id).map(|(_, sc)| *sc)
})
.fold(0.0f32, f32::max)
})
})
.unwrap_or(0.0);
if top_score >= skip_threshold {
eprintln!("[full_review] confidence short-circuit: top score {:.2} >= threshold {:.2} — skipping Turn 1", top_score, skip_threshold);
return Ok(FullReviewResult {
correct_intents: detected.to_vec(),
wrong_detections: vec![],
missed_intents: vec![],
languages: vec!["en".to_string()],
detection_perfect: true,
phrases_to_add: HashMap::new(),
phrases_blocked: Vec::new(),
summary: format!(
"High confidence ({:.0}%) — routing trusted, Turn 1 skipped.",
top_score * 100.0
),
spans_to_learn: vec![],
});
}
}
let intent_labels = state
.engine
.try_namespace(app_id)
.map(|h| h.with_resolver(|r| build_intent_labels(r)))
.unwrap_or_default();
let detected_with_scores: String = state
.engine
.try_namespace(app_id)
.map(|h| {
h.with_resolver(|router| {
let pre = router.l1().preprocess(query);
let (all_scores, _) = router
.l2()
.score_multi_normalized(&pre.expanded, 0.0, 100.0);
let score_map: HashMap<&str, f32> =
all_scores.iter().map(|(id, s)| (id.as_str(), *s)).collect();
if detected.is_empty() {
"(none detected)".to_string()
} else {
detected
.iter()
.map(|id| {
let score = score_map.get(id.as_str()).copied().unwrap_or(0.0);
format!(" {} (L2 score: {:.2})", id, score)
})
.collect::<Vec<_>>()
.join("\n")
}
})
})
.unwrap_or_else(|| {
if detected.is_empty() {
"(none detected)".to_string()
} else {
detected
.iter()
.map(|id| format!(" {}", id))
.collect::<Vec<_>>()
.join("\n")
}
});
let l1_context: String = state.engine.try_namespace(app_id)
.map(|h| h.with_resolver(|router| {
let pre = router.l1().preprocess(query);
if pre.was_modified && !pre.injected.is_empty() {
format!("Resolver expanded query via synonyms: injected {:?} → processed as \"{}\"\n", pre.injected, pre.expanded)
} else {
String::new()
}
}))
.unwrap_or_default();
let turn1_prompt = format!(
"Customer query: \"{query}\"\n\
{l1_context}\
Resolver detected:\n{detected_with_scores}\n\n\
Available intents:\n{intent_labels}\n\n\
Which intents does this query EXPLICITLY express? Only literal, not implied.\n\
Which detected intents are WRONG (false positives)?\n\
Which intents does the query express that were NOT detected (missed)?\n\
What language is the query in?\n\
Is this query completely irrelevant to all available intents? (pure filler, wrong domain, only pronouns with no actionable signal)\n\
Respond with ONLY JSON:\n\
{{\"correct_intents\": [\"intent_id\"], \"wrong_detections\": [\"intent_id\"], \"missed_intents\": [\"intent_id\"], \"languages\": [\"en\"], \"out_of_scope\": false}}\n"
);
let t1_response = call_llm(state, &turn1_prompt, 256)
.await
.map_err(|e| format!("Turn 1 failed: {}", e.1))?;
let t1_parsed: serde_json::Value = serde_json::from_str(extract_json(&t1_response))
.map_err(|e| format!("Turn 1 parse failed: {}", e))?;
let correct_intents: Vec<String> = t1_parsed["correct_intents"]
.as_array()
.map(|a| {
a.iter()
.filter_map(|v| v.as_str().map(String::from))
.collect()
})
.unwrap_or_default();
let wrong_detections: Vec<String> = t1_parsed["wrong_detections"]
.as_array()
.map(|a| {
a.iter()
.filter_map(|v| v.as_str().map(String::from))
.collect()
})
.unwrap_or_default();
let missed_intents: Vec<String> = t1_parsed["missed_intents"]
.as_array()
.map(|a| {
a.iter()
.filter_map(|v| v.as_str().map(String::from))
.collect()
})
.unwrap_or_default();
let languages: Vec<String> = t1_parsed["languages"]
.as_array()
.map(|a| {
a.iter()
.filter_map(|v| v.as_str().map(String::from))
.collect()
})
.unwrap_or_else(|| vec!["en".to_string()]);
let out_of_scope = t1_parsed["out_of_scope"].as_bool().unwrap_or(false);
eprintln!(
"[full_review] Turn 1: correct={:?}, wrong={:?}, missed={:?}, langs={:?}, out_of_scope={}",
correct_intents, wrong_detections, missed_intents, languages, out_of_scope
);
if out_of_scope {
eprintln!("[full_review] Query out of scope for namespace — skipping all learning");
return Ok(FullReviewResult {
correct_intents: vec![],
wrong_detections: vec![],
missed_intents: vec![],
languages,
detection_perfect: false,
phrases_to_add: HashMap::new(),
phrases_blocked: Vec::new(),
summary: "Query out of scope for this namespace — no learning applied.".to_string(),
spans_to_learn: vec![],
});
}
if wrong_detections.is_empty() && missed_intents.is_empty() {
eprintln!("[full_review] Detection perfect — skipping Turns 2+3");
return Ok(FullReviewResult {
correct_intents,
wrong_detections,
missed_intents,
languages,
detection_perfect: true,
phrases_to_add: HashMap::new(),
phrases_blocked: Vec::new(),
summary: "Detection correct, no changes needed.".to_string(),
spans_to_learn: vec![],
});
}
full_review_from_sets(
state,
app_id,
query,
correct_intents,
wrong_detections,
missed_intents,
languages,
)
.await
}
async fn full_review_from_sets(
state: &AppState,
app_id: &str,
query: &str,
correct_intents: Vec<String>,
wrong_detections: Vec<String>,
missed_intents: Vec<String>,
languages: Vec<String>,
) -> Result<FullReviewResult, String> {
if missed_intents.is_empty() {
eprintln!(
"[full_review] no missed intents — skipping Turn 2 (no phrase-generation needed)"
);
return Ok(FullReviewResult {
correct_intents,
wrong_detections,
missed_intents,
languages,
detection_perfect: false,
phrases_to_add: HashMap::new(),
phrases_blocked: Vec::new(),
summary: String::new(),
spans_to_learn: Vec::new(),
});
}
let all_relevant_intents: Vec<String> = missed_intents
.iter()
.chain(correct_intents.iter())
.cloned()
.collect::<std::collections::LinkedList<_>>()
.into_iter()
.collect::<std::collections::HashSet<_>>()
.into_iter()
.collect();
let existing_phrases: String = state
.engine
.try_namespace(app_id)
.map(|h| h.with_resolver(|r| intent_phrases_context(r, &all_relevant_intents, 15)))
.unwrap_or_default();
let detected_lang = languages.first().map(|s| s.as_str()).unwrap_or("en");
let lang_instruction = if detected_lang == "en" {
String::new()
} else {
format!("\nThe query is in \"{detected_lang}\". Generate phrases in \"{detected_lang}\" for missed intents.\n")
};
let missed_labels: String = state
.engine
.try_namespace(app_id)
.map(|h| {
h.with_resolver(|router| {
missed_intents
.iter()
.map(|id| {
let desc = router.intent(id).map(|i| i.description).unwrap_or_default();
let count = router.training(id).unwrap_or_default().len();
let coverage = if count >= 20 {
format!(" [{} phrases — well covered, be very targeted]", count)
} else if count >= 10 {
format!(" [{} phrases — add vocabulary not yet represented]", count)
} else {
format!(" [{} phrases — add diverse new vocabulary]", count)
};
if desc.is_empty() {
format!(" - {}{}", id, coverage)
} else {
format!(" - {} ({}){}", id, desc, coverage)
}
})
.collect::<Vec<_>>()
.join("\n")
})
})
.unwrap_or_else(|| {
missed_intents
.iter()
.map(|id| format!(" - {}", id))
.collect::<Vec<_>>()
.join("\n")
});
let turn2_prompt = format!(
"{guidelines}\n\n\
Customer query: \"{query}\"\n\n\
Intents that need more training coverage:\n{missed_labels}\n\n\
Phrases already in the system for these intents (do not duplicate):\n{existing_phrases}\n\
{lang_instruction}\
Respond with ONLY JSON:\n\
{{\"phrases_by_intent\": {{\"intent_id\": \"extracted span\"}}}}\n",
guidelines = microresolve::phrase::REVIEW_FIX_GUIDELINES,
query = query,
);
let t2_response = call_llm(state, &turn2_prompt, 150)
.await
.map_err(|e| format!("Turn 2 failed: {}", e.1))?;
let t2_parsed: serde_json::Value =
serde_json::from_str(extract_json(&t2_response)).map_err(|e| {
eprintln!(
"[full_review] Turn 2 parse error: {}. Raw: {}",
e,
&t2_response[..t2_response.len().min(300)]
);
format!("Turn 2 parse failed: {}", e)
})?;
let mut phrases_to_add: HashMap<String, Vec<String>> = HashMap::new();
let phrases_blocked = Vec::new();
let spans_to_learn: Vec<(String, String)> = Vec::new();
if let Some(sbi) = t2_parsed
.get("phrases_by_intent")
.and_then(|v| v.as_object())
{
if let Some(h) = state.engine.try_namespace(app_id) {
for (intent_id, phrase_val) in sbi {
let exists = h.with_resolver(|r| r.training(intent_id).is_some());
if !exists {
eprintln!("[auto-learn/guard] skipping LLM-hallucinated intent '{}' (not in namespace)", intent_id);
continue;
}
let phrase_str = if let Some(s) = phrase_val.as_str() {
Some(s.to_string())
} else if let Some(arr) = phrase_val.as_array() {
arr.first().and_then(|v| v.as_str()).map(|s| s.to_string())
} else {
None
};
if let Some(s) = phrase_str {
let s = s.trim().to_string();
if s.is_empty() {
continue;
}
let check = h.with_resolver(|r| r.check_phrase(intent_id, &s));
if !check.redundant
&& check.warning.as_deref() != Some("No content terms after tokenization")
{
phrases_to_add.entry(intent_id.clone()).or_default().push(s);
}
}
}
}
}
eprintln!(
"[full_review] Turn 2: phrases_to_add={:?}, blocked={}",
phrases_to_add,
phrases_blocked.len()
);
let summary = String::new();
Ok(FullReviewResult {
correct_intents,
wrong_detections,
missed_intents,
languages,
detection_perfect: false,
phrases_to_add,
phrases_blocked,
summary,
spans_to_learn,
})
}
pub async fn apply_review(
state: &AppState,
app_id: &str,
result: &FullReviewResult,
original_query: &str,
) -> usize {
let mut added = 0;
if !result.phrases_to_add.is_empty() {
let lang = result.languages.first().map(|s| s.as_str()).unwrap_or("en");
let pipeline_result =
phrase_pipeline(state, app_id, &result.phrases_to_add, true, lang).await;
added = pipeline_result.added.len();
}
let has_learning = !result.correct_intents.is_empty()
|| !result.missed_intents.is_empty()
|| !result.wrong_detections.is_empty();
let mut word_refs_owned: Vec<String> = Vec::new();
if has_learning {
let Some(h) = state.engine.try_namespace(app_id) else {
return added;
};
let no_phrases: std::collections::HashMap<String, Vec<String>> =
std::collections::HashMap::new();
h.with_resolver_mut(|router| {
router.apply_review_local(
&no_phrases,
&result.spans_to_learn,
&result.wrong_detections,
original_query,
0.1,
);
});
if !result.wrong_detections.is_empty() {
eprintln!(
"[auto-learn/L2b] shrink weights on query tokens for wrong intents: {:?}",
result.wrong_detections
);
}
for (span_intent, span_text) in &result.spans_to_learn {
eprintln!(
"[auto-learn/query] span '{}' → '{}'",
span_text, span_intent
);
}
let normalized = h.with_resolver(|r| r.l1().preprocess(original_query).expanded);
word_refs_owned = microresolve::tokenizer::tokenize(&normalized);
if let Err(e) = h.flush() {
eprintln!("[auto-learn/L2] flush error: {}", e);
} else {
eprintln!("[auto-learn/L2] state persisted for '{}'", app_id);
}
}
if has_learning && !result.missed_intents.is_empty() {
let new_to_l1: Vec<String> = state
.engine
.try_namespace(app_id)
.map(|h| {
h.with_resolver(|r| {
word_refs_owned
.iter()
.filter(|w| !r.l1().edges.contains_key(w.as_str()))
.cloned()
.collect()
})
})
.unwrap_or_else(|| word_refs_owned.clone());
if !new_to_l1.is_empty() {
eprintln!("[auto-learn/L1] morphology discovery: {:?}", new_to_l1);
learn_l1_morphology(state, app_id, &new_to_l1, original_query).await;
} else {
eprintln!("[auto-learn/L1] skipping — all words already in L1");
}
}
added
}
async fn learn_l1_morphology(
state: &AppState,
app_id: &str,
new_words: &[String],
context_query: &str,
) {
if state.llm_key.is_none() {
return;
}
let words_str = new_words.join(", ");
let prompt = format!(
"These words appeared in a user query (\"{context_query}\") and were just learned by an intent classification engine:\n\
Words: [{words_str}]\n\n\
For each word, list ONLY morphological variants (inflected forms) that users would naturally type:\n\
- verb forms: -ing, -ed, -s, -ion, -er suffixes\n\
- Do NOT include synonyms or semantically related words — only inflected forms of the same word\n\
- Do NOT include the word itself\n\
- Skip words that have no useful variants (e.g. nouns like 'team')\n\n\
Respond with ONLY JSON:\n\
{{\"variants\": {{\"canonical_word\": [\"variant1\", \"variant2\"]}}}}\n\
Example: {{\"variants\": {{\"ping\": [\"pinging\", \"pinged\", \"pings\", \"pinged\"]}}}}"
);
match call_llm(state, &prompt, 400).await {
Ok(response) => {
let json_str = extract_json(&response);
match serde_json::from_str::<serde_json::Value>(json_str) {
Ok(parsed) => {
if let Some(variants_map) = parsed["variants"].as_object() {
if let Some(h) = state.engine.try_namespace(app_id) {
let mut learned = 0usize;
for (canonical, var_list) in variants_map {
if let Some(arr) = var_list.as_array() {
for v in arr {
if let Some(variant) = v.as_str() {
let variant = variant.trim().to_lowercase();
let canonical_s = canonical.clone();
if !variant.is_empty()
&& variant != canonical_s.as_str()
{
h.with_resolver_mut(|r| {
r.l1_mut().add(&variant, &canonical_s, 0.97,
microresolve::scoring::EdgeKind::Morphological)
});
eprintln!(
"[auto-learn/L1] {} → {} (morphological)",
variant, canonical_s
);
learned += 1;
}
}
}
}
}
if learned > 0 {
if let Err(e) = h.flush() {
eprintln!("[auto-learn/L1] flush error: {}", e);
} else {
eprintln!(
"[auto-learn/L1] {} edges added, state persisted",
learned
);
}
} else {
eprintln!(
"[auto-learn/L1] no morphological variants found for {:?}",
new_words
);
}
}
}
}
Err(e) => eprintln!(
"[auto-learn/L1] parse error: {} — raw: {}",
e,
&response[..response.len().min(200)]
),
}
}
Err((_, e)) => eprintln!("[auto-learn/L1] LLM call failed: {}", e),
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_extract_json_fenced_array() {
let s = "```json\n[\n {\"from\": \"x\", \"to\": \"y\"}\n]\n```";
let r = extract_json(s);
assert!(r.starts_with('['), "expected array, got: {:?}", r);
}
#[test]
fn test_extract_json_preamble_then_fence() {
let s = "Here are edges for {cancel_sub, create_repo}:\n```json\n[\n {\"from\": \"x\"}\n]\n```";
let r = extract_json(s);
assert!(r.starts_with('['), "expected array, got: {:?}", r);
}
}