Skip to main content

ygrep_core/search/
results.rs

1use serde::{Deserialize, Serialize};
2
3/// Type of match for a search hit
4#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
5pub enum MatchType {
6    /// Matched via BM25 text search
7    Text,
8    /// Matched via semantic vector search
9    Semantic,
10    /// Matched by both text and semantic search
11    Hybrid,
12}
13
14impl std::fmt::Display for MatchType {
15    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
16        match self {
17            MatchType::Text => write!(f, "text"),
18            MatchType::Semantic => write!(f, "semantic"),
19            MatchType::Hybrid => write!(f, "hybrid"),
20        }
21    }
22}
23
24/// Result of a search operation
25#[derive(Debug, Clone, Serialize, Deserialize)]
26pub struct SearchResult {
27    /// Search hits
28    pub hits: Vec<SearchHit>,
29    /// Total number of results (may be more than hits if limited)
30    pub total: usize,
31    /// Query execution time in milliseconds
32    pub query_time_ms: u64,
33    /// Number of hits from text search
34    #[serde(default)]
35    pub text_hits: usize,
36    /// Number of hits from semantic search
37    #[serde(default)]
38    pub semantic_hits: usize,
39}
40
41/// A single search hit
42#[derive(Debug, Clone, Serialize, Deserialize)]
43pub struct SearchHit {
44    /// File path (relative to workspace)
45    pub path: String,
46    /// Line range (start-end)
47    pub line_start: u64,
48    pub line_end: u64,
49    /// Content snippet
50    pub snippet: String,
51    /// Relevance score (0.0-1.0)
52    pub score: f32,
53    /// Whether this is a chunk or full document
54    pub is_chunk: bool,
55    /// Document ID
56    pub doc_id: String,
57    /// Type of match (text, semantic, or hybrid)
58    #[serde(default = "default_match_type")]
59    pub match_type: MatchType,
60    /// 0-based offset of the matching line within the snippet
61    #[serde(default)]
62    pub match_line_in_snippet: usize,
63}
64
65fn default_match_type() -> MatchType {
66    MatchType::Text
67}
68
69impl SearchHit {
70    /// Format line range as string (e.g., "10-25")
71    pub fn lines_str(&self) -> String {
72        if self.line_start == self.line_end {
73            format!("{}", self.line_start)
74        } else {
75            format!("{}-{}", self.line_start, self.line_end)
76        }
77    }
78}
79
80impl SearchResult {
81    /// Create an empty result
82    pub fn empty() -> Self {
83        Self {
84            hits: vec![],
85            total: 0,
86            query_time_ms: 0,
87            text_hits: 0,
88            semantic_hits: 0,
89        }
90    }
91
92    /// Check if there are any results
93    pub fn is_empty(&self) -> bool {
94        self.hits.is_empty()
95    }
96
97    /// Format search type summary (e.g., "5 text + 3 semantic" or "text")
98    fn search_type_summary(&self) -> String {
99        if self.text_hits > 0 && self.semantic_hits > 0 {
100            format!("{} text + {} semantic", self.text_hits, self.semantic_hits)
101        } else if self.semantic_hits > 0 {
102            "semantic".to_string()
103        } else {
104            "text".to_string()
105        }
106    }
107
108    /// Normalize score for display (RRF scores are tiny ~0.01, we want 0-100 range)
109    fn display_score(score: f32) -> f32 {
110        // RRF scores max out around 0.016 for K=60, scale to 0-100
111        // A document appearing in both BM25 and vector results at rank 1 would be ~0.033
112        (score * 3000.0).min(99.9)
113    }
114
115    /// Format results for AI-optimized output (minimal tokens, maximum density)
116    pub fn format_ai(&self) -> String {
117        let mut output = String::new();
118
119        // Header with count and search type breakdown
120        output.push_str(&format!(
121            "# {} results ({})\n\n",
122            self.hits.len(),
123            self.search_type_summary()
124        ));
125
126        for hit in &self.hits {
127            // Single line format: path:line (score%) [match_type]
128            let score_pct = Self::display_score(hit.score);
129            let match_indicator = match hit.match_type {
130                MatchType::Hybrid => " +",   // both text and semantic
131                MatchType::Semantic => " ~", // semantic only
132                MatchType::Text => "",       // text only (default, no indicator)
133            };
134            // Report the actual matching line, not the snippet start
135            let reported_line = hit.line_start + hit.match_line_in_snippet as u64;
136            output.push_str(&format!(
137                "{}:{} ({:.0}%){}\n",
138                hit.path, reported_line, score_pct, match_indicator
139            ));
140
141            // Show the actual matching line from the snippet, not the first line
142            if let Some(match_line) = hit.snippet.lines().nth(hit.match_line_in_snippet) {
143                let trimmed = match_line.trim();
144                let preview = if trimmed.len() > 100 {
145                    let boundary = trimmed.floor_char_boundary(100);
146                    format!("{}...", &trimmed[..boundary])
147                } else {
148                    trimmed.to_string()
149                };
150                output.push_str(&format!("  {}\n", preview));
151            }
152            output.push('\n');
153        }
154
155        output
156    }
157
158    /// Format results as JSON (includes all metadata)
159    pub fn format_json(&self) -> String {
160        serde_json::to_string_pretty(self).unwrap_or_else(|_| "{}".to_string())
161    }
162
163    /// Format results for human-readable output (more context, line numbers)
164    pub fn format_pretty(&self) -> String {
165        let mut output = String::new();
166
167        // Header with breakdown
168        let type_info = if self.text_hits > 0 || self.semantic_hits > 0 {
169            format!(" ({})", self.search_type_summary())
170        } else {
171            String::new()
172        };
173        output.push_str(&format!("# {} results{}\n\n", self.hits.len(), type_info));
174
175        for hit in &self.hits {
176            // Header: path:line_range
177            output.push_str(&format!("{}:{}\n", hit.path, hit.lines_str()));
178
179            // Show first few lines of snippet with line numbers
180            for (i, line) in hit.snippet.lines().take(3).enumerate() {
181                let line_num = hit.line_start + i as u64;
182                let trimmed = line.trim();
183                let preview = if trimmed.len() > 80 {
184                    let boundary = trimmed.floor_char_boundary(80);
185                    format!("{}...", &trimmed[..boundary])
186                } else {
187                    trimmed.to_string()
188                };
189                output.push_str(&format!("  {}: {}\n", line_num, preview));
190            }
191            output.push('\n');
192        }
193
194        output
195    }
196}
197
198#[cfg(test)]
199mod tests {
200    use super::*;
201
202    #[test]
203    fn test_lines_str() {
204        let hit = SearchHit {
205            path: "test.rs".to_string(),
206            line_start: 10,
207            line_end: 25,
208            snippet: "content".to_string(),
209            score: 0.8,
210            is_chunk: false,
211            doc_id: "abc123".to_string(),
212            match_type: MatchType::Text,
213            match_line_in_snippet: 0,
214        };
215        assert_eq!(hit.lines_str(), "10-25");
216
217        let single_line = SearchHit {
218            line_start: 5,
219            line_end: 5,
220            ..hit.clone()
221        };
222        assert_eq!(single_line.lines_str(), "5");
223    }
224
225    #[test]
226    fn test_format_ai() {
227        let result = SearchResult {
228            hits: vec![SearchHit {
229                path: "src/main.rs".to_string(),
230                line_start: 1,
231                line_end: 10,
232                snippet: "fn main() {\n    println!(\"hello\");\n}".to_string(),
233                score: 0.01,
234                is_chunk: false,
235                doc_id: "abc".to_string(),
236                match_type: MatchType::Text,
237                match_line_in_snippet: 0,
238            }],
239            total: 1,
240            query_time_ms: 15,
241            text_hits: 1,
242            semantic_hits: 0,
243        };
244
245        let output = result.format_ai();
246        assert!(output.contains("# 1 results"));
247        assert!(output.contains("src/main.rs:1"));
248        assert!(output.contains("(30%)"));
249    }
250
251    fn make_hit(path: &str, score: f32, match_type: MatchType) -> SearchHit {
252        SearchHit {
253            path: path.to_string(),
254            line_start: 1,
255            line_end: 5,
256            snippet: "fn example() {\n    // code\n}".to_string(),
257            score,
258            is_chunk: false,
259            doc_id: "test".to_string(),
260            match_type,
261            match_line_in_snippet: 0,
262        }
263    }
264
265    fn make_result(hits: Vec<SearchHit>) -> SearchResult {
266        let text_hits = hits
267            .iter()
268            .filter(|h| matches!(h.match_type, MatchType::Text | MatchType::Hybrid))
269            .count();
270        let semantic_hits = hits
271            .iter()
272            .filter(|h| matches!(h.match_type, MatchType::Semantic | MatchType::Hybrid))
273            .count();
274        let total = hits.len();
275        SearchResult {
276            hits,
277            total,
278            query_time_ms: 10,
279            text_hits,
280            semantic_hits,
281        }
282    }
283
284    #[test]
285    fn test_format_json_valid() {
286        let result = make_result(vec![make_hit("src/main.rs", 0.01, MatchType::Text)]);
287        let json = result.format_json();
288        let parsed: serde_json::Value = serde_json::from_str(&json).unwrap();
289        assert!(parsed.get("hits").unwrap().is_array());
290        assert_eq!(parsed["total"], 1);
291    }
292
293    #[test]
294    fn test_format_pretty_includes_path_and_line_numbers() {
295        let result = make_result(vec![make_hit("src/lib.rs", 0.01, MatchType::Text)]);
296        let output = result.format_pretty();
297        assert!(output.contains("src/lib.rs:1"));
298        assert!(output.contains("1: fn example()"));
299    }
300
301    #[test]
302    fn test_empty_result_formatting() {
303        let result = SearchResult::empty();
304        assert!(result.is_empty());
305
306        let ai = result.format_ai();
307        assert!(ai.contains("# 0 results"));
308
309        let pretty = result.format_pretty();
310        assert!(pretty.contains("# 0 results"));
311
312        let json = result.format_json();
313        let parsed: serde_json::Value = serde_json::from_str(&json).unwrap();
314        assert_eq!(parsed["hits"].as_array().unwrap().len(), 0);
315    }
316
317    #[test]
318    fn test_format_ai_match_type_indicators() {
319        let result = make_result(vec![
320            make_hit("src/hybrid.rs", 0.02, MatchType::Hybrid),
321            make_hit("src/semantic.rs", 0.01, MatchType::Semantic),
322            make_hit("src/text.rs", 0.01, MatchType::Text),
323        ]);
324        let output = result.format_ai();
325
326        // Hybrid gets " +" indicator
327        assert!(output.contains(" +\n"));
328        // Semantic gets " ~" indicator
329        assert!(output.contains(" ~\n"));
330        // Text gets no indicator — line ends with "%)"\n
331    }
332}