greppy/ai/
trace_prompts.rs

1//! Trace-specific AI prompts
2//!
3//! Provides prompts for AI-enhanced trace operations:
4//! - Query expansion: "auth" -> [auth, login, authenticate, session, token]
5//! - Natural language parsing: "how does auth work" -> [authenticate, login, session]
6//! - Result reranking: prioritize most relevant invocation paths
7//!
8//! @module ai/trace_prompts
9
10// =============================================================================
11// SYSTEM PROMPTS
12// =============================================================================
13
14/// System prompt for query expansion
15/// Converts a symbol name or natural language query into related symbol names
16pub const QUERY_EXPANSION_SYSTEM: &str = r#"You are a code symbol query expander. Given a query (either a symbol name or natural language), return a JSON array of related symbol/function names that might be relevant in a codebase.
17
18Rules:
191. Return 5-15 symbol names as a JSON array of strings
202. Include the original query term if it's a valid symbol name
213. Include common variations: camelCase, snake_case, abbreviated forms
224. Include semantically related concepts
235. For natural language queries, extract the key concepts
246. Return ONLY the JSON array, no explanation
25
26Examples:
27- "auth" -> ["auth", "authenticate", "login", "logout", "session", "token", "verify", "validateToken", "checkAuth", "isAuthenticated"]
28- "how does user creation work" -> ["createUser", "newUser", "registerUser", "addUser", "signup", "register", "userCreation", "insertUser", "saveUser"]
29- "validateEmail" -> ["validateEmail", "checkEmail", "isValidEmail", "emailValidator", "verifyEmail", "emailValidation", "isEmail", "parseEmail"]"#;
30
31/// System prompt for reranking trace results
32/// Takes invocation paths and reorders by relevance to the query
33pub const TRACE_RERANK_SYSTEM: &str = r#"You are a code trace reranker. Given a query and numbered invocation paths, return ONLY a JSON array of path indices ordered by relevance to the query.
34
35Consider these factors when ranking:
361. How directly the path relates to the query concept
372. Paths through main/entry point functions are often more important
383. Shorter, more direct paths may be more relevant
394. Paths involving the queried concept in business logic rank higher
40
41Return the JSON array of indices, most relevant first. Example: [2, 0, 5, 1, 3, 4]"#;
42
43/// System prompt for natural language query understanding
44/// Extracts intent and key symbols from natural language
45pub const NL_QUERY_SYSTEM: &str = r#"You are a code query analyzer. Given a natural language question about code, extract the key concepts and return a JSON object with:
46
471. "intent": The type of query (one of: "trace", "refs", "flow", "impact", "dead_code")
482. "symbols": Array of symbol names to search for
493. "filters": Optional filters like file patterns or kinds
50
51Return ONLY the JSON object.
52
53Examples:
54- "how is the login function called?" -> {"intent": "trace", "symbols": ["login", "authenticate", "doLogin"], "filters": null}
55- "what reads the userId variable?" -> {"intent": "refs", "symbols": ["userId", "user_id"], "filters": {"kind": "read"}}
56- "what would break if I change validateUser?" -> {"intent": "impact", "symbols": ["validateUser"], "filters": null}"#;
57
58// =============================================================================
59// USER PROMPT BUILDERS
60// =============================================================================
61
62/// Build user prompt for query expansion
63pub fn build_expansion_prompt(query: &str) -> String {
64    format!(
65        "Expand this code query into related symbol names: \"{}\"\n\nReturn ONLY the JSON array.",
66        query
67    )
68}
69
70/// Build user prompt for trace reranking
71pub fn build_trace_rerank_prompt(query: &str, paths: &[String]) -> String {
72    let mut prompt = format!("Query: {}\n\nInvocation paths:\n", query);
73    for (i, path) in paths.iter().enumerate() {
74        prompt.push_str(&format!("\n--- Path {} ---\n{}\n", i, path));
75    }
76    prompt.push_str("\nReturn ONLY the JSON array of indices ordered by relevance.");
77    prompt
78}
79
80/// Build user prompt for natural language query analysis
81pub fn build_nl_query_prompt(query: &str) -> String {
82    format!(
83        "Analyze this natural language code query: \"{}\"\n\nReturn ONLY the JSON object with intent, symbols, and filters.",
84        query
85    )
86}
87
88// =============================================================================
89// RESPONSE TYPES
90// =============================================================================
91
92use serde::Deserialize;
93
94/// Response from query expansion
95#[derive(Debug, Deserialize)]
96pub struct ExpandedQuery {
97    /// The original query
98    #[serde(skip)]
99    pub original: String,
100    /// Expanded symbol names to search for
101    pub symbols: Vec<String>,
102}
103
104/// Response from natural language query analysis
105#[derive(Debug, Deserialize)]
106pub struct NlQueryAnalysis {
107    /// The detected intent
108    pub intent: String,
109    /// Symbols to search for
110    pub symbols: Vec<String>,
111    /// Optional filters
112    pub filters: Option<NlQueryFilters>,
113}
114
115/// Filters extracted from natural language query
116#[derive(Debug, Deserialize)]
117pub struct NlQueryFilters {
118    /// Filter by reference kind (read, write, call, etc.)
119    pub kind: Option<String>,
120    /// Filter by file pattern
121    pub file_pattern: Option<String>,
122}
123
124// =============================================================================
125// HELPER FUNCTIONS
126// =============================================================================
127
128/// Parse expanded query symbols from AI response
129pub fn parse_expansion_response(response: &str) -> Vec<String> {
130    let text = response.trim();
131
132    // Try direct parse
133    if let Ok(symbols) = serde_json::from_str::<Vec<String>>(text) {
134        return symbols;
135    }
136
137    // Try to find JSON array in the text
138    if let Some(start) = text.find('[') {
139        if let Some(end) = text.rfind(']') {
140            let json_str = &text[start..=end];
141            if let Ok(symbols) = serde_json::from_str::<Vec<String>>(json_str) {
142                return symbols;
143            }
144        }
145    }
146
147    // Fallback: return empty
148    Vec::new()
149}
150
151/// Parse reranked indices from AI response
152pub fn parse_rerank_response(response: &str, count: usize) -> Vec<usize> {
153    let text = response.trim();
154
155    // Try direct parse
156    if let Ok(indices) = serde_json::from_str::<Vec<usize>>(text) {
157        return indices.into_iter().filter(|&i| i < count).collect();
158    }
159
160    // Try to find JSON array in the text
161    if let Some(start) = text.find('[') {
162        if let Some(end) = text.rfind(']') {
163            let json_str = &text[start..=end];
164            if let Ok(indices) = serde_json::from_str::<Vec<usize>>(json_str) {
165                return indices.into_iter().filter(|&i| i < count).collect();
166            }
167        }
168    }
169
170    // Fallback: return original order
171    (0..count).collect()
172}
173
174/// Parse natural language query analysis from AI response
175pub fn parse_nl_query_response(response: &str) -> Option<NlQueryAnalysis> {
176    let text = response.trim();
177
178    // Try direct parse
179    if let Ok(analysis) = serde_json::from_str::<NlQueryAnalysis>(text) {
180        return Some(analysis);
181    }
182
183    // Try to find JSON object in the text
184    if let Some(start) = text.find('{') {
185        if let Some(end) = text.rfind('}') {
186            let json_str = &text[start..=end];
187            if let Ok(analysis) = serde_json::from_str::<NlQueryAnalysis>(json_str) {
188                return Some(analysis);
189            }
190        }
191    }
192
193    None
194}
195
196/// Check if a query looks like natural language (vs a symbol name)
197pub fn is_natural_language_query(query: &str) -> bool {
198    // Heuristics for detecting natural language:
199    // 1. Contains spaces and common words
200    // 2. Starts with question words
201    // 3. Contains multiple words
202
203    let query_lower = query.to_lowercase();
204    let words: Vec<&str> = query.split_whitespace().collect();
205
206    if words.len() < 2 {
207        return false;
208    }
209
210    // Check for question words
211    let question_words = [
212        "how", "what", "where", "when", "why", "which", "who", "does", "is", "are", "can", "show",
213        "find", "list",
214    ];
215    if question_words.iter().any(|&w| query_lower.starts_with(w)) {
216        return true;
217    }
218
219    // Check for common English words
220    let common_words = [
221        "the", "a", "an", "to", "from", "in", "of", "for", "with", "by", "all", "every", "any",
222    ];
223    let common_word_count = words
224        .iter()
225        .filter(|w| common_words.contains(&w.to_lowercase().as_str()))
226        .count();
227
228    common_word_count >= 1 && words.len() >= 3
229}
230
231#[cfg(test)]
232mod tests {
233    use super::*;
234
235    #[test]
236    fn test_is_natural_language_query() {
237        assert!(is_natural_language_query("how does auth work"));
238        assert!(is_natural_language_query("what calls the login function"));
239        assert!(is_natural_language_query("show all references to userId"));
240        assert!(!is_natural_language_query("validateEmail"));
241        assert!(!is_natural_language_query("auth"));
242        assert!(!is_natural_language_query("UserService"));
243    }
244
245    #[test]
246    fn test_parse_expansion_response() {
247        let response = r#"["auth", "login", "authenticate"]"#;
248        let symbols = parse_expansion_response(response);
249        assert_eq!(symbols, vec!["auth", "login", "authenticate"]);
250    }
251
252    #[test]
253    fn test_parse_expansion_response_with_text() {
254        let response = r#"Here are the related symbols: ["auth", "login", "authenticate"]"#;
255        let symbols = parse_expansion_response(response);
256        assert_eq!(symbols, vec!["auth", "login", "authenticate"]);
257    }
258
259    #[test]
260    fn test_parse_rerank_response() {
261        let response = "[2, 0, 1, 3]";
262        let indices = parse_rerank_response(response, 5);
263        assert_eq!(indices, vec![2, 0, 1, 3]);
264    }
265
266    #[test]
267    fn test_parse_rerank_response_filters_invalid() {
268        let response = "[2, 0, 10, 1, 3]";
269        let indices = parse_rerank_response(response, 5);
270        assert_eq!(indices, vec![2, 0, 1, 3]); // 10 filtered out
271    }
272
273    #[test]
274    fn test_parse_nl_query_response() {
275        let response = r#"{"intent": "trace", "symbols": ["login"], "filters": null}"#;
276        let analysis = parse_nl_query_response(response).unwrap();
277        assert_eq!(analysis.intent, "trace");
278        assert_eq!(analysis.symbols, vec!["login"]);
279    }
280}