agentroot_core/llm/
http_query_parser.rs

1//! HTTP-based query parser using external LLM service
2
3use super::{ChatMessage, LLMClient, ParsedQuery, SearchType, TemporalFilter};
4use crate::config::LLMServiceConfig;
5use crate::error::{AgentRootError, Result};
6use chrono::{Duration, Utc};
7use std::sync::Arc;
8
9/// Query parser using external HTTP LLM service
10pub struct HttpQueryParser {
11    client: Arc<dyn LLMClient>,
12}
13
14impl HttpQueryParser {
15    /// Create from LLM client
16    pub fn new(client: Arc<dyn LLMClient>) -> Self {
17        Self { client }
18    }
19
20    /// Create from configuration
21    pub fn from_config(config: LLMServiceConfig) -> Result<Self> {
22        let client = super::VLLMClient::new(config)?;
23        Ok(Self {
24            client: Arc::new(client),
25        })
26    }
27
28    /// Create from environment variables
29    pub fn from_env() -> Result<Self> {
30        let client = super::VLLMClient::from_env()?;
31        Ok(Self {
32            client: Arc::new(client),
33        })
34    }
35
36    /// Parse natural language query
37    pub async fn parse(&self, query: &str) -> Result<ParsedQuery> {
38        let prompt = build_query_parsing_prompt(query);
39
40        let messages = vec![
41            ChatMessage::system(
42                "You are a search query parser. Extract structured information from user queries. \
43                 Output ONLY valid JSON with these fields: \
44                 search_terms (string), temporal_filter (object or null), metadata_filters (array), \
45                 search_type (bm25/vector/hybrid), confidence (0.0-1.0)"
46            ),
47            ChatMessage::user(prompt),
48        ];
49
50        let response = self.client.chat_completion(messages).await?;
51
52        parse_query_response(&response, query)
53    }
54}
55
56fn build_query_parsing_prompt(query: &str) -> String {
57    format!(
58        r#"Parse this search query and extract structured information:
59
60Query: "{}"
61
62Output JSON with:
63- search_terms: main keywords (string)
64- temporal_filter: {{"description": "...", "relative_hours": N}} or null
65- metadata_filters: [{{"field": "...", "value": "...", "operator": "contains"}}] or []
66- search_type: "bm25" | "vector" | "hybrid"
67- confidence: 0.0-1.0
68
69Examples:
70Input: "files that were edit recently"
71Output: {{"search_terms": "files", "temporal_filter": {{"description": "recently", "relative_hours": 24}}, "metadata_filters": [], "search_type": "hybrid", "confidence": 0.9}}
72
73Input: "rust code by Alice from last week"
74Output: {{"search_terms": "rust code", "temporal_filter": {{"description": "last week", "relative_hours": 168}}, "metadata_filters": [{{"field": "author", "value": "Alice", "operator": "contains"}}], "search_type": "hybrid", "confidence": 0.95}}
75
76Input: "python functions"
77Output: {{"search_terms": "python functions", "temporal_filter": null, "metadata_filters": [], "search_type": "hybrid", "confidence": 0.85}}
78
79Now parse the query above. Output only JSON:"#,
80        query
81    )
82}
83
84fn parse_query_response(response: &str, original_query: &str) -> Result<ParsedQuery> {
85    // Extract JSON from response (handle markdown code blocks and extra text)
86    let json_str = if let Some(start) = response.find('{') {
87        if let Some(end) = response.rfind('}') {
88            &response[start..=end]
89        } else {
90            response
91        }
92    } else {
93        // No JSON found, use fallback
94        return Ok(ParsedQuery {
95            search_terms: original_query.to_string(),
96            temporal_filter: None,
97            metadata_filters: vec![],
98            search_type: SearchType::Hybrid,
99            confidence: 0.5,
100        });
101    };
102
103    // Parse JSON
104    let parsed_json: serde_json::Value = serde_json::from_str(json_str).map_err(|e| {
105        tracing::warn!("Failed to parse query JSON: {}, using fallback", e);
106        AgentRootError::Llm(format!("JSON parse error: {}", e))
107    })?;
108
109    let search_terms = parsed_json["search_terms"]
110        .as_str()
111        .unwrap_or(original_query)
112        .to_string();
113
114    let temporal_filter = if let Some(tf) = parsed_json.get("temporal_filter") {
115        if !tf.is_null() {
116            let hours = tf["relative_hours"].as_i64().unwrap_or(24);
117            let description = tf["description"].as_str().unwrap_or("").to_string();
118            let now = Utc::now();
119            let start = now - Duration::hours(hours);
120            Some(TemporalFilter {
121                start: Some(start.to_rfc3339()),
122                end: Some(now.to_rfc3339()),
123                description,
124            })
125        } else {
126            None
127        }
128    } else {
129        None
130    };
131
132    let metadata_filters = if let Some(filters) = parsed_json["metadata_filters"].as_array() {
133        filters
134            .iter()
135            .filter_map(|f| {
136                Some(super::MetadataFilterHint {
137                    field: f["field"].as_str()?.to_string(),
138                    value: f["value"].as_str()?.to_string(),
139                    operator: f["operator"].as_str().unwrap_or("contains").to_string(),
140                })
141            })
142            .collect()
143    } else {
144        vec![]
145    };
146
147    let search_type = match parsed_json["search_type"].as_str() {
148        Some("bm25") => SearchType::Bm25,
149        Some("vector") => SearchType::Vector,
150        Some("hybrid") => SearchType::Hybrid,
151        _ => SearchType::Hybrid,
152    };
153
154    let confidence = parsed_json["confidence"].as_f64().unwrap_or(0.8);
155
156    Ok(ParsedQuery {
157        search_terms,
158        temporal_filter,
159        metadata_filters,
160        search_type,
161        confidence,
162    })
163}