1pub const QUERY_EXPANSION_SYSTEM: &str = r#"You are a code symbol query expander. Given a query (either a symbol name or natural language), return a JSON array of related symbol/function names that might be relevant in a codebase.
17
18Rules:
191. Return 5-15 symbol names as a JSON array of strings
202. Include the original query term if it's a valid symbol name
213. Include common variations: camelCase, snake_case, abbreviated forms
224. Include semantically related concepts
235. For natural language queries, extract the key concepts
246. Return ONLY the JSON array, no explanation
25
26Examples:
27- "auth" -> ["auth", "authenticate", "login", "logout", "session", "token", "verify", "validateToken", "checkAuth", "isAuthenticated"]
28- "how does user creation work" -> ["createUser", "newUser", "registerUser", "addUser", "signup", "register", "userCreation", "insertUser", "saveUser"]
29- "validateEmail" -> ["validateEmail", "checkEmail", "isValidEmail", "emailValidator", "verifyEmail", "emailValidation", "isEmail", "parseEmail"]"#;
30
31pub const TRACE_RERANK_SYSTEM: &str = r#"You are a code trace reranker. Given a query and numbered invocation paths, return ONLY a JSON array of path indices ordered by relevance to the query.
34
35Consider these factors when ranking:
361. How directly the path relates to the query concept
372. Paths through main/entry point functions are often more important
383. Shorter, more direct paths may be more relevant
394. Paths involving the queried concept in business logic rank higher
40
41Return the JSON array of indices, most relevant first. Example: [2, 0, 5, 1, 3, 4]"#;
42
43pub const NL_QUERY_SYSTEM: &str = r#"You are a code query analyzer. Given a natural language question about code, extract the key concepts and return a JSON object with:
46
471. "intent": The type of query (one of: "trace", "refs", "flow", "impact", "dead_code")
482. "symbols": Array of symbol names to search for
493. "filters": Optional filters like file patterns or kinds
50
51Return ONLY the JSON object.
52
53Examples:
54- "how is the login function called?" -> {"intent": "trace", "symbols": ["login", "authenticate", "doLogin"], "filters": null}
55- "what reads the userId variable?" -> {"intent": "refs", "symbols": ["userId", "user_id"], "filters": {"kind": "read"}}
56- "what would break if I change validateUser?" -> {"intent": "impact", "symbols": ["validateUser"], "filters": null}"#;
57
58pub fn build_expansion_prompt(query: &str) -> String {
64 format!(
65 "Expand this code query into related symbol names: \"{}\"\n\nReturn ONLY the JSON array.",
66 query
67 )
68}
69
70pub fn build_trace_rerank_prompt(query: &str, paths: &[String]) -> String {
72 let mut prompt = format!("Query: {}\n\nInvocation paths:\n", query);
73 for (i, path) in paths.iter().enumerate() {
74 prompt.push_str(&format!("\n--- Path {} ---\n{}\n", i, path));
75 }
76 prompt.push_str("\nReturn ONLY the JSON array of indices ordered by relevance.");
77 prompt
78}
79
80pub fn build_nl_query_prompt(query: &str) -> String {
82 format!(
83 "Analyze this natural language code query: \"{}\"\n\nReturn ONLY the JSON object with intent, symbols, and filters.",
84 query
85 )
86}
87
88use serde::Deserialize;
93
94#[derive(Debug, Deserialize)]
96pub struct ExpandedQuery {
97 #[serde(skip)]
99 pub original: String,
100 pub symbols: Vec<String>,
102}
103
104#[derive(Debug, Deserialize)]
106pub struct NlQueryAnalysis {
107 pub intent: String,
109 pub symbols: Vec<String>,
111 pub filters: Option<NlQueryFilters>,
113}
114
115#[derive(Debug, Deserialize)]
117pub struct NlQueryFilters {
118 pub kind: Option<String>,
120 pub file_pattern: Option<String>,
122}
123
124pub fn parse_expansion_response(response: &str) -> Vec<String> {
130 let text = response.trim();
131
132 if let Ok(symbols) = serde_json::from_str::<Vec<String>>(text) {
134 return symbols;
135 }
136
137 if let Some(start) = text.find('[') {
139 if let Some(end) = text.rfind(']') {
140 let json_str = &text[start..=end];
141 if let Ok(symbols) = serde_json::from_str::<Vec<String>>(json_str) {
142 return symbols;
143 }
144 }
145 }
146
147 Vec::new()
149}
150
151pub fn parse_rerank_response(response: &str, count: usize) -> Vec<usize> {
153 let text = response.trim();
154
155 if let Ok(indices) = serde_json::from_str::<Vec<usize>>(text) {
157 return indices.into_iter().filter(|&i| i < count).collect();
158 }
159
160 if let Some(start) = text.find('[') {
162 if let Some(end) = text.rfind(']') {
163 let json_str = &text[start..=end];
164 if let Ok(indices) = serde_json::from_str::<Vec<usize>>(json_str) {
165 return indices.into_iter().filter(|&i| i < count).collect();
166 }
167 }
168 }
169
170 (0..count).collect()
172}
173
174pub fn parse_nl_query_response(response: &str) -> Option<NlQueryAnalysis> {
176 let text = response.trim();
177
178 if let Ok(analysis) = serde_json::from_str::<NlQueryAnalysis>(text) {
180 return Some(analysis);
181 }
182
183 if let Some(start) = text.find('{') {
185 if let Some(end) = text.rfind('}') {
186 let json_str = &text[start..=end];
187 if let Ok(analysis) = serde_json::from_str::<NlQueryAnalysis>(json_str) {
188 return Some(analysis);
189 }
190 }
191 }
192
193 None
194}
195
196pub fn is_natural_language_query(query: &str) -> bool {
198 let query_lower = query.to_lowercase();
204 let words: Vec<&str> = query.split_whitespace().collect();
205
206 if words.len() < 2 {
207 return false;
208 }
209
210 let question_words = [
212 "how", "what", "where", "when", "why", "which", "who", "does", "is", "are", "can", "show",
213 "find", "list",
214 ];
215 if question_words.iter().any(|&w| query_lower.starts_with(w)) {
216 return true;
217 }
218
219 let common_words = [
221 "the", "a", "an", "to", "from", "in", "of", "for", "with", "by", "all", "every", "any",
222 ];
223 let common_word_count = words
224 .iter()
225 .filter(|w| common_words.contains(&w.to_lowercase().as_str()))
226 .count();
227
228 common_word_count >= 1 && words.len() >= 3
229}
230
231#[cfg(test)]
232mod tests {
233 use super::*;
234
235 #[test]
236 fn test_is_natural_language_query() {
237 assert!(is_natural_language_query("how does auth work"));
238 assert!(is_natural_language_query("what calls the login function"));
239 assert!(is_natural_language_query("show all references to userId"));
240 assert!(!is_natural_language_query("validateEmail"));
241 assert!(!is_natural_language_query("auth"));
242 assert!(!is_natural_language_query("UserService"));
243 }
244
245 #[test]
246 fn test_parse_expansion_response() {
247 let response = r#"["auth", "login", "authenticate"]"#;
248 let symbols = parse_expansion_response(response);
249 assert_eq!(symbols, vec!["auth", "login", "authenticate"]);
250 }
251
252 #[test]
253 fn test_parse_expansion_response_with_text() {
254 let response = r#"Here are the related symbols: ["auth", "login", "authenticate"]"#;
255 let symbols = parse_expansion_response(response);
256 assert_eq!(symbols, vec!["auth", "login", "authenticate"]);
257 }
258
259 #[test]
260 fn test_parse_rerank_response() {
261 let response = "[2, 0, 1, 3]";
262 let indices = parse_rerank_response(response, 5);
263 assert_eq!(indices, vec![2, 0, 1, 3]);
264 }
265
266 #[test]
267 fn test_parse_rerank_response_filters_invalid() {
268 let response = "[2, 0, 10, 1, 3]";
269 let indices = parse_rerank_response(response, 5);
270 assert_eq!(indices, vec![2, 0, 1, 3]); }
272
273 #[test]
274 fn test_parse_nl_query_response() {
275 let response = r#"{"intent": "trace", "symbols": ["login"], "filters": null}"#;
276 let analysis = parse_nl_query_response(response).unwrap();
277 assert_eq!(analysis.intent, "trace");
278 assert_eq!(analysis.symbols, vec!["login"]);
279 }
280}