use crate::pipeline::*;
use crate::state::*;
use axum::{
extract::{Query, State},
http::{HeaderMap, StatusCode},
routing::{get, post},
Json,
};
use std::collections::HashMap;
pub fn seed_into_l2(state: &AppState, app_id: &str, accepted: &[(String, String)]) {
if accepted.is_empty() {
return;
}
let Some(h) = state.engine.try_namespace(app_id) else {
return;
};
for (intent_id, phrase) in accepted {
h.index_phrase(intent_id, phrase)
.expect("server is standalone; ConnectMode unreachable");
}
h.rebuild_caches()
.expect("server is standalone; ConnectMode unreachable");
eprintln!(
"[import/L2] seeded {} phrases into count model for '{}'",
accepted.len(),
app_id
);
if let Err(e) = h.flush() {
eprintln!("[import/L2] flush error for {}: {}", app_id, e);
}
}
pub fn routes() -> axum::Router<AppState> {
axum::Router::new()
.route("/api/import/parse", post(import_parse))
.route("/api/import/apply", post(import_apply))
.route("/api/import/params", get(import_params))
.route("/api/import/mcp/search", get(mcp_search))
.route("/api/import/mcp/fetch", get(mcp_fetch))
.route("/api/import/mcp/parse", post(mcp_parse))
.route("/api/import/mcp/apply", post(mcp_apply))
}
pub fn seed_gen_params(num_langs: usize) -> (usize, u32, u32) {
let n = num_langs.max(1) as u32;
let tokens_per_intent = n * 100 + 30; let batch_size = ((8192 / tokens_per_intent) as usize).clamp(1, 10);
let max_tokens = 8192;
(batch_size, max_tokens, tokens_per_intent)
}
#[derive(serde::Deserialize)]
pub struct ImportParamsQuery {
num_langs: usize,
#[serde(default = "default_tool_count")]
num_tools: usize,
}
fn default_tool_count() -> usize {
10
}
pub async fn import_params(Query(q): Query<ImportParamsQuery>) -> Json<serde_json::Value> {
let (batch_size, max_tokens, tokens_per_tool) = seed_gen_params(q.num_langs);
let total_batches = q.num_tools.div_ceil(batch_size);
let expected_output_per_batch = batch_size as u32 * tokens_per_tool;
let total_output_tokens = total_batches as u32 * expected_output_per_batch;
let total_input_tokens = q.num_tools as u32 * 300 + total_batches as u32 * 200;
let total_tokens = total_input_tokens + total_output_tokens;
Json(serde_json::json!({
"batch_size": batch_size,
"max_tokens_per_call": max_tokens,
"tokens_per_tool": tokens_per_tool,
"total_batches": total_batches,
"total_output_tokens": total_output_tokens,
"total_input_tokens": total_input_tokens,
"total_tokens": total_tokens,
"phrases_per_tool": q.num_langs * 5,
}))
}
#[derive(serde::Deserialize)]
pub struct McpSearchParams {
q: String,
#[serde(default = "default_page_size")]
limit: usize,
}
pub fn default_page_size() -> usize {
20
}
pub async fn mcp_search(
State(state): State<AppState>,
Query(params): Query<McpSearchParams>,
) -> Result<Json<serde_json::Value>, (StatusCode, String)> {
let url = format!(
"https://api.smithery.ai/servers?q={}&pageSize={}",
urlencoding(¶ms.q),
params.limit
);
let resp = state.http.get(&url).send().await.map_err(|e| {
(
StatusCode::BAD_GATEWAY,
format!("Smithery fetch failed: {}", e),
)
})?;
if !resp.status().is_success() {
return Err((
StatusCode::BAD_GATEWAY,
format!("Smithery returned {}", resp.status()),
));
}
let data: serde_json::Value = resp.json().await.map_err(|e| {
(
StatusCode::BAD_GATEWAY,
format!("Smithery parse failed: {}", e),
)
})?;
Ok(Json(data))
}
#[derive(serde::Deserialize)]
pub struct McpFetchParams {
name: String,
}
pub async fn mcp_fetch(
State(state): State<AppState>,
Query(params): Query<McpFetchParams>,
) -> Result<Json<serde_json::Value>, (StatusCode, String)> {
let url = format!(
"https://api.smithery.ai/servers/{}",
urlencoding(¶ms.name)
);
let resp = state.http.get(&url).send().await.map_err(|e| {
(
StatusCode::BAD_GATEWAY,
format!("Smithery fetch failed: {}", e),
)
})?;
if !resp.status().is_success() {
return Err((
StatusCode::BAD_GATEWAY,
format!("Smithery returned {} for '{}'", resp.status(), params.name),
));
}
let data: serde_json::Value = resp.json().await.map_err(|e| {
(
StatusCode::BAD_GATEWAY,
format!("Smithery parse failed: {}", e),
)
})?;
Ok(Json(data))
}
fn urlencoding(s: &str) -> String {
s.chars()
.map(|c| match c {
' ' => "%20".to_string(),
'/' => "%2F".to_string(),
'@' => "%40".to_string(),
_ if c.is_alphanumeric() || c == '-' || c == '_' || c == '.' => c.to_string(),
_ => format!("%{:02X}", c as u8),
})
.collect()
}
#[derive(serde::Deserialize)]
pub struct ImportParseRequest {
spec: String,
}
pub async fn import_parse(
Json(req): Json<ImportParseRequest>,
) -> Result<Json<serde_json::Value>, (StatusCode, String)> {
let parsed =
microresolve::import::parse_spec(&req.spec).map_err(|e| (StatusCode::BAD_REQUEST, e))?;
let operations: Vec<serde_json::Value> = parsed
.operations
.iter()
.map(|op| {
serde_json::json!({
"id": op.id,
"name": op.name,
"method": op.method,
"path": op.path,
"summary": op.summary,
"description": op.description,
"tags": op.tags,
"parameters": op.parameters.iter().map(|p| serde_json::json!({
"name": p.name,
"in": p.location,
"required": p.required,
})).collect::<Vec<_>>(),
"has_body": op.request_body.is_some(),
})
})
.collect();
let tags = if parsed.tags.is_empty() {
let mut t: Vec<String> = parsed
.operations
.iter()
.flat_map(|op| op.tags.clone())
.collect::<std::collections::HashSet<_>>()
.into_iter()
.collect();
t.sort();
t
} else {
parsed.tags.clone()
};
Ok(Json(serde_json::json!({
"title": parsed.title,
"version": parsed.version,
"description": parsed.description,
"total_operations": parsed.operations.len(),
"tags": tags,
"operations": operations,
})))
}
#[derive(serde::Deserialize)]
pub struct ImportApplyRequest {
spec: String,
selected: Vec<String>,
#[serde(default)]
domain: String,
}
pub async fn import_apply(
State(state): State<AppState>,
headers: HeaderMap,
Json(req): Json<ImportApplyRequest>,
) -> Result<Json<serde_json::Value>, (StatusCode, String)> {
let app_id = app_id_from_headers(&headers);
let parsed =
microresolve::import::parse_spec(&req.spec).map_err(|e| (StatusCode::BAD_REQUEST, e))?;
let selected_set: std::collections::HashSet<&str> =
req.selected.iter().map(|s| s.as_str()).collect();
let selected_ops: Vec<µresolve::import::openapi::ParsedOperation> = parsed
.operations
.iter()
.filter(|op| selected_set.contains(op.id.as_str()))
.collect();
if selected_ops.is_empty() {
return Err((
StatusCode::BAD_REQUEST,
"No operations selected".to_string(),
));
}
{
let h = state.engine.namespace(&app_id);
for op in &selected_ops {
let base_name =
microresolve::import::to_snake_case(op.operation_id.as_deref().unwrap_or(&op.id));
let intent_name = if req.domain.is_empty() {
base_name
} else {
format!("{}:{}", req.domain, base_name)
};
let name_words = op.name.to_lowercase();
if name_words.is_empty() {
continue;
}
let _ = h.add_intent(&intent_name, &[name_words.as_str()][..]);
let description = op
.summary
.as_deref()
.or(Some(op.name.as_str()))
.unwrap_or("");
let endpoint = format!("{} {}", op.method, op.path);
let schema = serde_json::json!({
"method": op.method,
"path": op.path,
"operation_id": op.operation_id,
"summary": op.summary,
"tags": op.tags,
"parameters": op.parameters.iter().map(|p| serde_json::json!({
"name": p.name,
"in": p.location,
"required": p.required,
})).collect::<Vec<_>>(),
"has_body": op.request_body.is_some(),
});
let _ = h.update_intent(
&intent_name,
microresolve::IntentEdit {
description: if description.is_empty() {
None
} else {
Some(description.to_string())
},
source: Some(
microresolve::IntentSource::new("openapi").with_label(parsed.title.clone()),
),
schema: Some(schema),
target: Some(
microresolve::IntentTarget::new("api_endpoint")
.with_handler(endpoint.clone()),
),
..Default::default()
},
);
}
maybe_commit(&state, &app_id);
}
let mut total_added = 0usize;
let mut total_blocked = 0usize;
let mut per_intent_added: HashMap<String, usize> = HashMap::new();
let mut per_intent_blocked: HashMap<String, usize> = HashMap::new();
let mut per_intent_recovered: HashMap<String, usize> = HashMap::new();
let mut all_accepted: Vec<(String, String)> = Vec::new();
if state.llm_key.is_some() {
let setting_langs = state.ui_settings.read().unwrap().languages.clone();
let (batch_size, max_tokens, _) = seed_gen_params(setting_langs.len());
for batch in selected_ops.chunks(batch_size) {
let ops_desc: Vec<String> = batch
.iter()
.map(|op| {
let base = microresolve::import::to_snake_case(
op.operation_id.as_deref().unwrap_or(&op.id),
);
let intent_name = if req.domain.is_empty() {
base
} else {
format!("{}:{}", req.domain, base)
};
format!(
"- {} ({}): {} — {}",
intent_name,
op.method,
op.summary.as_deref().unwrap_or(&op.name),
if op.description.len() > 100 {
&op.description[..100]
} else {
&op.description
}
)
})
.collect();
let existing_seeds: String = state
.engine
.try_namespace(&app_id)
.map(|h| {
batch
.iter()
.map(|op| {
let base = microresolve::import::to_snake_case(
op.operation_id.as_deref().unwrap_or(&op.id),
);
let name = if req.domain.is_empty() {
base
} else {
format!("{}:{}", req.domain, base)
};
let seeds = h.training(&name).unwrap_or_default();
format!(" {}: {:?}", name, seeds.iter().take(3).collect::<Vec<_>>())
})
.collect::<Vec<_>>()
.join("\n")
})
.unwrap_or_default();
let langs: Vec<&str> = setting_langs.iter().map(|s| s.as_str()).collect();
let is_multilang = langs.len() > 1;
let example_intent = {
let op = &batch[0];
let base = microresolve::import::to_snake_case(
op.operation_id.as_deref().unwrap_or(&op.id),
);
if req.domain.is_empty() {
base
} else {
format!("{}:{}", req.domain, base)
}
};
let (lang_instruction, response_format) = if is_multilang {
let lang_list = langs.join(", ");
let lang_keys: String = langs
.iter()
.map(|l| format!("\"{}\": [\"phrase1\", \"phrase2\"]", l))
.collect::<Vec<_>>()
.join(", ");
(
format!("Generate training phrases in ALL of these languages: {}. Include phrases for EACH language.\n\n", lang_list),
format!("Respond with ONLY valid JSON (no extra text):\n{{\"phrases_by_intent\": {{\"{}\": {{{}}}}}}}\n", example_intent, lang_keys),
)
} else {
(String::new(), format!("Respond with ONLY valid JSON (no extra text):\n{{\"phrases_by_intent\": {{\"{}\": [\"phrase1\", \"phrase2\", \"phrase3\"]}}}}\n", example_intent))
};
let prompt = format!(
"Generate 10 diverse training phrases for each API operation below.\n\
You have the full operation context — use it to generate TARGETED phrases.\n\
These phrases train both a keyword router and a statistical count model.\n\
Phrases must use vocabulary specific to THIS operation, not generic API terms.\n\n\
{}Operations:\n{}\n\n\
Current phrases (avoid duplicating these):\n{}\n\n\
{}\n\n\
For each operation, generate phrases a developer or user would say when they want this action.\n\
Use imperative commands and short tool-distinctive requests. Avoid leading with generic conversational stems.\n\n\
{}",
lang_instruction, ops_desc.join("\n"), existing_seeds, microresolve::phrase::PHRASE_QUALITY_RULES, response_format
);
let llm_result = call_llm(&state, &prompt, max_tokens).await;
if let Err((status, msg)) = &llm_result {
eprintln!(
"[import] LLM seed generation failed ({}): {}",
status,
msg.chars().take(300).collect::<String>()
);
}
if let Ok(response) = llm_result {
if let Ok(seeds_json) =
serde_json::from_str::<serde_json::Value>(extract_json(&response))
{
if let Some(sbi) = seeds_json
.get("phrases_by_intent")
.or_else(|| seeds_json.get("seeds_by_intent"))
.and_then(|v| v.as_object())
{
if is_multilang {
for lang in &langs {
let lang_map: HashMap<String, Vec<String>> = sbi
.iter()
.filter_map(|(intent, by_lang)| {
by_lang.get(lang).and_then(|v| v.as_array()).map(|arr| {
(
intent.clone(),
arr.iter()
.filter_map(|s| s.as_str().map(String::from))
.collect(),
)
})
})
.collect();
if !lang_map.is_empty() {
let result =
phrase_pipeline(&state, &app_id, &lang_map, true, lang)
.await;
for (id, _) in &result.added {
*per_intent_added.entry(id.clone()).or_default() += 1;
}
for (id, _, _) in &result.blocked {
*per_intent_blocked.entry(id.clone()).or_default() += 1;
}
if result.recovered_by_retry > 0 {
*per_intent_recovered
.entry(
lang_map.keys().next().cloned().unwrap_or_default(),
)
.or_default() += result.recovered_by_retry;
}
all_accepted.extend(result.added.iter().cloned());
total_added += result.added.len();
total_blocked += result.blocked.len();
}
}
} else {
let phrases_map: HashMap<String, Vec<String>> = sbi
.iter()
.filter_map(|(k, v)| {
v.as_array().map(|arr| {
(
k.clone(),
arr.iter()
.filter_map(|s| s.as_str().map(String::from))
.collect(),
)
})
})
.collect();
let result =
phrase_pipeline(&state, &app_id, &phrases_map, true, langs[0])
.await;
for (id, _) in &result.added {
*per_intent_added.entry(id.clone()).or_default() += 1;
}
for (id, _, _) in &result.blocked {
*per_intent_blocked.entry(id.clone()).or_default() += 1;
}
all_accepted.extend(result.added.iter().cloned());
total_added += result.added.len();
total_blocked += result.blocked.len();
}
}
}
}
}
}
seed_into_l2(&state, &app_id, &all_accepted);
let intent_names: Vec<String> = selected_ops
.iter()
.map(|op| {
let base =
microresolve::import::to_snake_case(op.operation_id.as_deref().unwrap_or(&op.id));
if req.domain.is_empty() {
base
} else {
format!("{}:{}", req.domain, base)
}
})
.collect();
let per_intent: Vec<serde_json::Value> = intent_names.iter().map(|name| {
let added = per_intent_added.get(name).copied().unwrap_or(0);
let blocked = per_intent_blocked.get(name).copied().unwrap_or(0);
let recovered = per_intent_recovered.get(name).copied().unwrap_or(0);
serde_json::json!({ "name": name, "phrases_added": added, "blocked": blocked, "recovered": recovered })
}).collect();
let l2_words = state
.engine
.try_namespace(&app_id)
.map(|h| h.vocab_size())
.unwrap_or(0);
Ok(Json(serde_json::json!({
"title": parsed.title,
"version": parsed.version,
"imported": intent_names.len(),
"phrases_added": total_added,
"phrases_blocked": total_blocked,
"vocab_size": l2_words,
"intents": intent_names,
"per_intent": per_intent,
})))
}
#[derive(serde::Deserialize)]
pub struct McpParseRequest {
tools_json: String,
}
fn normalize_tool(tool: &serde_json::Value) -> serde_json::Value {
if tool.get("type").and_then(|t| t.as_str()) == Some("function") {
if let Some(func) = tool.get("function") {
return serde_json::json!({
"name": func.get("name").cloned().unwrap_or(serde_json::Value::Null),
"description": func.get("description").cloned().unwrap_or(serde_json::json!("")),
"inputSchema": func.get("parameters").cloned().unwrap_or(serde_json::Value::Null),
});
}
}
if tool.get("args_schema").is_some() {
return serde_json::json!({
"name": tool.get("name").cloned().unwrap_or(serde_json::Value::Null),
"description": tool.get("description").cloned().unwrap_or(serde_json::json!("")),
"inputSchema": tool.get("args_schema").cloned().unwrap_or(serde_json::Value::Null),
});
}
tool.clone()
}
fn detect_source_type(tool: &serde_json::Value) -> &'static str {
if tool.get("type").and_then(|t| t.as_str()) == Some("function") {
return "function";
}
if tool.get("args_schema").is_some() {
return "langchain";
}
"mcp"
}
fn extract_tools(
parsed: &serde_json::Value,
) -> Result<Vec<serde_json::Value>, (StatusCode, String)> {
let raw = if let Some(arr) = parsed.as_array() {
arr.clone()
} else if let Some(arr) = parsed.get("tools").and_then(|t| t.as_array()) {
arr.clone()
} else {
return Err((StatusCode::BAD_REQUEST,
"Expected array of tools or {\"tools\": [...]}. Supports MCP, OpenAI function calling, and LangChain tool formats.".to_string()));
};
Ok(raw.iter().map(normalize_tool).collect())
}
fn extract_tools_with_source(
parsed: &serde_json::Value,
) -> Result<Vec<(serde_json::Value, &'static str)>, (StatusCode, String)> {
let raw = if let Some(arr) = parsed.as_array() {
arr.clone()
} else if let Some(arr) = parsed.get("tools").and_then(|t| t.as_array()) {
arr.clone()
} else {
return Err((StatusCode::BAD_REQUEST,
"Expected array of tools or {\"tools\": [...]}. Supports MCP, OpenAI function calling, and LangChain tool formats.".to_string()));
};
Ok(raw
.iter()
.map(|t| (normalize_tool(t), detect_source_type(t)))
.collect())
}
pub async fn mcp_parse(
Json(req): Json<McpParseRequest>,
) -> Result<Json<serde_json::Value>, (StatusCode, String)> {
let parsed: serde_json::Value = serde_json::from_str(&req.tools_json)
.map_err(|e| (StatusCode::BAD_REQUEST, format!("Invalid JSON: {}", e)))?;
let tools_array = extract_tools(&parsed)?;
let tools: Vec<serde_json::Value> = tools_array
.iter()
.map(|tool| {
let name = tool
.get("name")
.and_then(|v| v.as_str())
.unwrap_or("unnamed");
let description = tool
.get("description")
.and_then(|v| v.as_str())
.unwrap_or("");
let has_input = tool.get("inputSchema").is_some();
let params: Vec<String> = tool
.get("inputSchema")
.and_then(|s| s.get("properties"))
.and_then(|p| p.as_object())
.map(|props| props.keys().cloned().collect())
.unwrap_or_default();
let required: Vec<String> = tool
.get("inputSchema")
.and_then(|s| s.get("required"))
.and_then(|r| r.as_array())
.map(|arr| {
arr.iter()
.filter_map(|v| v.as_str().map(String::from))
.collect()
})
.unwrap_or_default();
let read_only = tool
.get("annotations")
.and_then(|a| a.get("readOnlyHint"))
.and_then(|v| v.as_bool())
.unwrap_or(false);
serde_json::json!({
"name": name,
"description": description,
"has_input": has_input,
"params": params,
"required_params": required,
"read_only": read_only,
})
})
.collect();
Ok(Json(serde_json::json!({
"total_tools": tools.len(),
"tools": tools,
})))
}
#[derive(serde::Deserialize)]
pub struct McpApplyRequest {
tools_json: String,
selected: Vec<String>,
#[serde(default)]
domain: String,
}
pub async fn mcp_apply(
State(state): State<AppState>,
headers: HeaderMap,
Json(req): Json<McpApplyRequest>,
) -> Result<Json<serde_json::Value>, (StatusCode, String)> {
let app_id = app_id_from_headers(&headers);
let parsed: serde_json::Value = serde_json::from_str(&req.tools_json)
.map_err(|e| (StatusCode::BAD_REQUEST, format!("Invalid JSON: {}", e)))?;
let tools_with_source = extract_tools_with_source(&parsed)?;
let selected_set: std::collections::HashSet<&str> =
req.selected.iter().map(|s| s.as_str()).collect();
let selected_tools: Vec<(&serde_json::Value, &str)> = tools_with_source
.iter()
.filter(|(t, _)| {
t.get("name")
.and_then(|n| n.as_str())
.map(|n| selected_set.contains(n))
.unwrap_or(false)
})
.map(|(t, s)| (t, *s))
.collect();
if selected_tools.is_empty() {
return Err((StatusCode::BAD_REQUEST, "No tools selected".to_string()));
}
{
let h = state.engine.namespace(&app_id);
for (tool, source_type) in &selected_tools {
let base_name = tool
.get("name")
.and_then(|v| v.as_str())
.unwrap_or("unnamed");
let name = if req.domain.is_empty() {
base_name.to_string()
} else {
format!("{}:{}", req.domain, base_name)
};
let description = tool
.get("description")
.and_then(|v| v.as_str())
.unwrap_or("");
let name_words = base_name.replace('_', " ");
let _ = h.add_intent(&name, &[name_words.as_str()][..]);
let _ = h.update_intent(
&name,
microresolve::IntentEdit {
description: if description.is_empty() {
None
} else {
Some(description.to_string())
},
source: Some(microresolve::IntentSource::new(*source_type)),
schema: Some((*tool).clone()),
target: Some(microresolve::IntentTarget::new(if *source_type == "mcp" {
"mcp_server"
} else {
"handler"
})),
..Default::default()
},
);
}
maybe_commit(&state, &app_id);
}
let mut total_added = 0usize;
let mut total_blocked = 0usize;
let mut per_intent_added: HashMap<String, usize> = HashMap::new();
let mut per_intent_blocked: HashMap<String, usize> = HashMap::new();
let mut per_intent_recovered: HashMap<String, usize> = HashMap::new();
let mut all_accepted: Vec<(String, String)> = Vec::new();
if state.llm_key.is_some() {
let setting_langs = state.ui_settings.read().unwrap().languages.clone();
let (batch_size, max_tokens, _) = seed_gen_params(setting_langs.len());
for batch in selected_tools.chunks(batch_size) {
let tools_desc: Vec<String> = batch
.iter()
.map(|(t, _)| {
let base = t.get("name").and_then(|v| v.as_str()).unwrap_or("?");
let name = if req.domain.is_empty() {
base.to_string()
} else {
format!("{}:{}", req.domain, base)
};
let desc = t.get("description").and_then(|v| v.as_str()).unwrap_or("");
format!("- {} : {}", name, &desc[..desc.len().min(100)])
})
.collect();
let existing_seeds: String = state
.engine
.try_namespace(&app_id)
.map(|h| {
batch
.iter()
.map(|(t, _)| {
let base = t.get("name").and_then(|v| v.as_str()).unwrap_or("?");
let name = if req.domain.is_empty() {
base.to_string()
} else {
format!("{}:{}", req.domain, base)
};
let seeds = h.training(&name).unwrap_or_default();
format!(" {}: {:?}", name, seeds.iter().take(3).collect::<Vec<_>>())
})
.collect::<Vec<_>>()
.join("\n")
})
.unwrap_or_default();
let langs: Vec<&str> = setting_langs.iter().map(|s| s.as_str()).collect();
let is_multilang = langs.len() > 1;
let example_intent = {
let base = batch[0]
.0
.get("name")
.and_then(|v| v.as_str())
.unwrap_or("tool");
if req.domain.is_empty() {
base.to_string()
} else {
format!("{}:{}", req.domain, base)
}
};
let (lang_instruction, response_format) = if is_multilang {
let lang_list = langs.join(", ");
let lang_keys: String = langs
.iter()
.map(|l| format!("\"{}\": [\"phrase1\", \"phrase2\"]", l))
.collect::<Vec<_>>()
.join(", ");
(
format!("Generate training phrases in ALL of these languages: {}. Include phrases for EACH language.\n\n", lang_list),
format!("Respond with ONLY valid JSON (no extra text):\n{{\"phrases_by_intent\": {{\"{}\": {{{}}}}}}}\n", example_intent, lang_keys),
)
} else {
(String::new(), format!("Respond with ONLY valid JSON (no extra text):\n{{\"phrases_by_intent\": {{\"{}\": [\"phrase1\", \"phrase2\", \"phrase3\"]}}}}\n", example_intent))
};
let prompt = format!(
"Generate 10 diverse training phrases for each MCP tool below.\n\
You have the full tool description — use it to generate TARGETED phrases.\n\
These phrases train both a keyword router and a statistical count model.\n\
Phrases must use vocabulary specific to THIS tool, not generic action terms.\n\n\
{}Tools:\n{}\n\n\
Current phrases (avoid duplicating):\n{}\n\n\
{}\n\n\
For each tool, generate phrases a user would say when they want this action.\n\
Use imperative commands and short tool-distinctive requests. Avoid leading with generic conversational stems.\n\n\
{}",
lang_instruction, tools_desc.join("\n"), existing_seeds, microresolve::phrase::PHRASE_QUALITY_RULES, response_format
);
let llm_result = call_llm(&state, &prompt, max_tokens).await;
if let Err((status, msg)) = &llm_result {
eprintln!(
"[import] LLM seed generation failed ({}): {}",
status,
msg.chars().take(300).collect::<String>()
);
}
if let Ok(response) = llm_result {
if let Ok(seeds_json) =
serde_json::from_str::<serde_json::Value>(extract_json(&response))
{
if let Some(sbi) = seeds_json
.get("phrases_by_intent")
.or_else(|| seeds_json.get("seeds_by_intent"))
.and_then(|v| v.as_object())
{
if is_multilang {
for lang in &langs {
let lang_map: HashMap<String, Vec<String>> = sbi
.iter()
.filter_map(|(intent, by_lang)| {
by_lang.get(lang).and_then(|v| v.as_array()).map(|arr| {
(
intent.clone(),
arr.iter()
.filter_map(|s| s.as_str().map(String::from))
.collect(),
)
})
})
.collect();
if !lang_map.is_empty() {
let result =
phrase_pipeline(&state, &app_id, &lang_map, true, lang)
.await;
for (id, _) in &result.added {
*per_intent_added.entry(id.clone()).or_default() += 1;
}
for (id, _, _) in &result.blocked {
*per_intent_blocked.entry(id.clone()).or_default() += 1;
}
if result.recovered_by_retry > 0 {
*per_intent_recovered
.entry(
lang_map.keys().next().cloned().unwrap_or_default(),
)
.or_default() += result.recovered_by_retry;
}
all_accepted.extend(result.added.iter().cloned());
total_added += result.added.len();
total_blocked += result.blocked.len();
}
}
} else {
let phrases_map: HashMap<String, Vec<String>> = sbi
.iter()
.filter_map(|(k, v)| {
v.as_array().map(|arr| {
(
k.clone(),
arr.iter()
.filter_map(|s| s.as_str().map(String::from))
.collect(),
)
})
})
.collect();
let result =
phrase_pipeline(&state, &app_id, &phrases_map, true, langs[0])
.await;
for (id, _) in &result.added {
*per_intent_added.entry(id.clone()).or_default() += 1;
}
for (id, _, _) in &result.blocked {
*per_intent_blocked.entry(id.clone()).or_default() += 1;
}
all_accepted.extend(result.added.iter().cloned());
total_added += result.added.len();
total_blocked += result.blocked.len();
}
}
}
}
}
}
seed_into_l2(&state, &app_id, &all_accepted);
let tool_names: Vec<String> = selected_tools
.iter()
.filter_map(|(t, _)| {
t.get("name").and_then(|v| v.as_str()).map(|base| {
if req.domain.is_empty() {
base.to_string()
} else {
format!("{}:{}", req.domain, base)
}
})
})
.collect();
let per_intent: Vec<serde_json::Value> = tool_names.iter().map(|name| {
let added = per_intent_added.get(name).copied().unwrap_or(0);
let blocked = per_intent_blocked.get(name).copied().unwrap_or(0);
let recovered = per_intent_recovered.get(name).copied().unwrap_or(0);
serde_json::json!({ "name": name, "phrases_added": added, "blocked": blocked, "recovered": recovered })
}).collect();
let l2_words = state
.engine
.try_namespace(&app_id)
.map(|h| h.vocab_size())
.unwrap_or(0);
Ok(Json(serde_json::json!({
"imported": tool_names.len(),
"phrases_added": total_added,
"phrases_blocked": total_blocked,
"vocab_size": l2_words,
"intents": tool_names,
"per_intent": per_intent,
})))
}