use std::collections::BTreeMap;
pub(crate) mod bm25;
pub(crate) mod regex;
#[derive(Clone, Debug)]
pub(crate) struct ToolCandidate {
pub name: String,
pub description: String,
pub param_text: Vec<String>,
pub tags: Vec<String>,
}
impl ToolCandidate {
pub(crate) fn corpus_text(&self) -> String {
let mut out = String::new();
out.push_str(&self.name);
out.push(' ');
out.push_str(&self.description);
for p in &self.param_text {
out.push(' ');
out.push_str(p);
}
for tag in &self.tags {
out.push(' ');
out.push_str(tag);
}
out
}
}
pub(crate) fn candidates_from_native(native_tools: &[serde_json::Value]) -> Vec<ToolCandidate> {
native_tools
.iter()
.filter_map(candidate_from_native_entry)
.collect()
}
fn candidate_from_native_entry(tool: &serde_json::Value) -> Option<ToolCandidate> {
let (name, description, input_schema) = if tool.get("type").is_none() {
let name = tool.get("name")?.as_str()?.to_string();
let description = tool
.get("description")
.and_then(|v| v.as_str())
.unwrap_or_default()
.to_string();
let input_schema = tool.get("input_schema").cloned();
(name, description, input_schema)
} else {
let function = tool.get("function")?;
let name = function.get("name")?.as_str()?.to_string();
let description = function
.get("description")
.and_then(|v| v.as_str())
.unwrap_or_default()
.to_string();
let input_schema = function.get("parameters").cloned();
(name, description, input_schema)
};
let param_text = extract_param_text(input_schema.as_ref());
let mut tags = Vec::new();
if let Some(server) = tool
.get("_mcp_server")
.or_else(|| tool.get("function").and_then(|f| f.get("_mcp_server")))
.and_then(|v| v.as_str())
{
tags.push(format!("mcp:{server}"));
tags.push(server.to_string());
}
Some(ToolCandidate {
name,
description,
param_text,
tags,
})
}
fn extract_param_text(schema: Option<&serde_json::Value>) -> Vec<String> {
let Some(schema) = schema else {
return Vec::new();
};
let Some(properties) = schema.get("properties").and_then(|v| v.as_object()) else {
return Vec::new();
};
let mut out = Vec::new();
for (name, prop) in properties {
let description = prop
.get("description")
.and_then(|v| v.as_str())
.unwrap_or_default();
if description.is_empty() {
out.push(name.clone());
} else {
out.push(format!("{name}: {description}"));
}
}
out
}
pub(crate) const MIN_QUERY_CHARS: usize = 1;
pub(crate) const DEFAULT_MAX_RESULTS: usize = 20;
#[derive(Clone, Debug)]
pub(crate) struct SearchOutcome {
pub tool_names: Vec<String>,
pub diagnostic: Option<String>,
}
impl SearchOutcome {
pub(crate) fn into_tool_result(self) -> serde_json::Value {
let mut obj = BTreeMap::new();
obj.insert(
"tool_names".to_string(),
serde_json::Value::Array(
self.tool_names
.into_iter()
.map(serde_json::Value::String)
.collect(),
),
);
if let Some(diag) = self.diagnostic {
obj.insert("diagnostic".to_string(), serde_json::Value::String(diag));
}
serde_json::Value::Object(obj.into_iter().collect())
}
pub(crate) fn empty(diagnostic: impl Into<String>) -> Self {
Self {
tool_names: Vec::new(),
diagnostic: Some(diagnostic.into()),
}
}
}
pub(crate) fn run_in_tree(
strategy: InTreeStrategy,
query: &str,
candidates: &[ToolCandidate],
max_results: usize,
) -> SearchOutcome {
let query_trimmed = query.trim();
if query_trimmed.chars().count() < MIN_QUERY_CHARS {
return SearchOutcome::empty("empty query; specify search terms or a regex pattern");
}
match strategy {
InTreeStrategy::Bm25 => bm25::search(query_trimmed, candidates, max_results),
InTreeStrategy::Regex => regex::search(query_trimmed, candidates, max_results),
}
}
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub(crate) enum InTreeStrategy {
Bm25,
Regex,
}
#[cfg(test)]
mod tests;