searchfox_lib/
search.rs

1use crate::client::SearchfoxClient;
2use crate::types::{File, SearchfoxResponse};
3use anyhow::Result;
4use log::{debug, warn};
5use reqwest::Url;
6
7#[derive(Debug, Clone, Copy, PartialEq, Eq)]
8pub enum CategoryFilter {
9    All,
10    ExcludeTests,
11    ExcludeGenerated,
12    ExcludeTestsAndGenerated,
13    OnlyTests,
14    OnlyGenerated,
15    OnlyNormal,
16}
17
18impl CategoryFilter {
19    pub fn should_include(&self, category: &str) -> bool {
20        match self {
21            CategoryFilter::All => true,
22            CategoryFilter::ExcludeTests => category != "test",
23            CategoryFilter::ExcludeGenerated => category != "generated",
24            CategoryFilter::ExcludeTestsAndGenerated => {
25                category != "test" && category != "generated"
26            }
27            CategoryFilter::OnlyTests => category == "test",
28            CategoryFilter::OnlyGenerated => category == "generated",
29            CategoryFilter::OnlyNormal => category == "normal",
30        }
31    }
32}
33
34#[derive(Debug, Clone)]
35pub struct SearchOptions {
36    pub query: Option<String>,
37    pub path: Option<String>,
38    pub case: bool,
39    pub regexp: bool,
40    pub limit: usize,
41    pub context: Option<usize>,
42    pub symbol: Option<String>,
43    pub id: Option<String>,
44    pub cpp: bool,
45    pub c_lang: bool,
46    pub webidl: bool,
47    pub js: bool,
48    pub category_filter: CategoryFilter,
49}
50
51impl Default for SearchOptions {
52    fn default() -> Self {
53        Self {
54            query: None,
55            path: None,
56            case: false,
57            regexp: false,
58            limit: 50,
59            context: None,
60            symbol: None,
61            id: None,
62            cpp: false,
63            c_lang: false,
64            webidl: false,
65            js: false,
66            category_filter: CategoryFilter::All,
67        }
68    }
69}
70
71impl SearchOptions {
72    /// Check if this search is expensive (doesn't use searchfox's index)
73    pub fn is_expensive_search(&self) -> bool {
74        // Only symbol: and id: prefixes use the optimized index
75        if self.symbol.is_some() || self.id.is_some() {
76            return false;
77        }
78
79        if let Some(query) = &self.query {
80            // Check if query contains indexed prefixes
81            !query.contains("symbol:") && !query.contains("id:")
82        } else {
83            false
84        }
85    }
86
87    pub fn matches_language_filter(&self, path: &str) -> bool {
88        if !self.cpp && !self.c_lang && !self.webidl && !self.js {
89            return true;
90        }
91
92        let path_lower = path.to_lowercase();
93
94        if self.cpp
95            && (path_lower.ends_with(".cc")
96                || path_lower.ends_with(".cpp")
97                || path_lower.ends_with(".h")
98                || path_lower.ends_with(".hh")
99                || path_lower.ends_with(".hpp"))
100        {
101            return true;
102        }
103
104        if self.c_lang && (path_lower.ends_with(".c") || path_lower.ends_with(".h")) {
105            return true;
106        }
107
108        if self.webidl && path_lower.ends_with(".webidl") {
109            return true;
110        }
111
112        if self.js
113            && (path_lower.ends_with(".js")
114                || path_lower.ends_with(".mjs")
115                || path_lower.ends_with(".ts")
116                || path_lower.ends_with(".cjs")
117                || path_lower.ends_with(".jsx")
118                || path_lower.ends_with(".tsx"))
119        {
120            return true;
121        }
122
123        false
124    }
125
126    pub fn build_query(&self) -> String {
127        if let Some(symbol) = &self.symbol {
128            format!("symbol:{symbol}")
129        } else if let Some(id) = &self.id {
130            format!("id:{id}")
131        } else if let Some(q) = &self.query {
132            if q.contains("path:")
133                || q.contains("pathre:")
134                || q.contains("symbol:")
135                || q.contains("id:")
136                || q.contains("text:")
137                || q.contains("re:")
138            {
139                q.clone()
140            } else if let Some(context) = self.context {
141                format!("context:{context} text:{q}")
142            } else {
143                q.clone()
144            }
145        } else {
146            String::new()
147        }
148    }
149}
150
151pub struct SearchResult {
152    pub path: String,
153    pub line_number: usize,
154    pub line: String,
155}
156
157impl SearchfoxClient {
158    /// Warns about expensive searches to stderr (for library users)
159    pub fn warn_if_expensive_search(&self, options: &SearchOptions) {
160        if options.is_expensive_search() {
161            if let Some(query) = &options.query {
162                eprintln!("⚠️  WARNING: Expensive full-text search detected");
163                eprintln!("Query '{}' doesn't use searchfox's optimized index", query);
164                eprintln!("Consider using symbol: or id: prefixes, or use ripgrep locally");
165                eprintln!("For LLM tools: Use find_and_display_definition() for definitions");
166            }
167        }
168    }
169
170    pub async fn search(&self, options: &SearchOptions) -> Result<Vec<SearchResult>> {
171        let query = options.build_query();
172
173        let mut url = Url::parse(&format!("https://searchfox.org/{}/search", self.repo))?;
174        url.query_pairs_mut()
175            .append_pair("q", &query)
176            .append_pair("case", if options.case { "true" } else { "false" })
177            .append_pair("regexp", if options.regexp { "true" } else { "false" });
178        if let Some(path) = &options.path {
179            url.query_pairs_mut().append_pair("path", path);
180        }
181
182        let response = self.get(url).await?;
183
184        if !response.status().is_success() {
185            anyhow::bail!("Request failed: {}", response.status());
186        }
187
188        let response_text = response.text().await?;
189        let json: SearchfoxResponse = serde_json::from_str(&response_text)?;
190
191        let mut results = Vec::new();
192        let mut count = 0;
193
194        for (key, value) in &json {
195            if key.starts_with('*') {
196                continue;
197            }
198
199            if !options.category_filter.should_include(key) {
200                continue;
201            }
202
203            if let Some(files_array) = value.as_array() {
204                for file in files_array {
205                    let file: File = match serde_json::from_value(file.clone()) {
206                        Ok(f) => f,
207                        Err(e) => {
208                            warn!("Failed to parse file JSON: {e}");
209                            continue;
210                        }
211                    };
212
213                    if !options.matches_language_filter(&file.path) {
214                        continue;
215                    }
216
217                    if options.path.is_some()
218                        && options.query.is_none()
219                        && options.symbol.is_none()
220                        && options.id.is_none()
221                    {
222                        if count >= options.limit {
223                            break;
224                        }
225                        results.push(SearchResult {
226                            path: file.path.clone(),
227                            line_number: 0,
228                            line: String::new(),
229                        });
230                        count += 1;
231                    } else {
232                        for line in file.lines {
233                            if count >= options.limit {
234                                break;
235                            }
236                            results.push(SearchResult {
237                                path: file.path.clone(),
238                                line_number: line.lno,
239                                line: line.line.trim_end().to_string(),
240                            });
241                            count += 1;
242                        }
243                    }
244                }
245            } else if let Some(obj) = value.as_object() {
246                for (_category, file_list) in obj {
247                    if let Some(files) = file_list.as_array() {
248                        for file in files {
249                            let file: File = match serde_json::from_value(file.clone()) {
250                                Ok(f) => f,
251                                Err(_) => continue,
252                            };
253
254                            if !options.matches_language_filter(&file.path) {
255                                continue;
256                            }
257
258                            if options.path.is_some()
259                                && options.query.is_none()
260                                && options.symbol.is_none()
261                                && options.id.is_none()
262                            {
263                                if count >= options.limit {
264                                    break;
265                                }
266                                results.push(SearchResult {
267                                    path: file.path.clone(),
268                                    line_number: 0,
269                                    line: String::new(),
270                                });
271                                count += 1;
272                            } else {
273                                for line in file.lines {
274                                    if count >= options.limit {
275                                        break;
276                                    }
277                                    results.push(SearchResult {
278                                        path: file.path.clone(),
279                                        line_number: line.lno,
280                                        line: line.line.trim_end().to_string(),
281                                    });
282                                    count += 1;
283                                }
284                            }
285                        }
286                    }
287                }
288            }
289
290            if count >= options.limit {
291                break;
292            }
293        }
294
295        Ok(results)
296    }
297
298    pub async fn find_symbol_locations(
299        &self,
300        symbol: &str,
301        path_filter: Option<&str>,
302        options: &SearchOptions,
303    ) -> Result<Vec<(String, usize)>> {
304        let query = format!("id:{symbol}");
305        let mut url = Url::parse(&format!("https://searchfox.org/{}/search", self.repo))?;
306        url.query_pairs_mut().append_pair("q", &query);
307        if let Some(path) = path_filter {
308            url.query_pairs_mut().append_pair("path", path);
309        }
310
311        let response = self.get(url).await?;
312
313        if !response.status().is_success() {
314            anyhow::bail!("Request failed: {}", response.status());
315        }
316
317        let response_text = response.text().await?;
318        let json: SearchfoxResponse = serde_json::from_str(&response_text)?;
319        let mut file_locations = Vec::new();
320
321        debug!("Analyzing search results...");
322
323        for (key, value) in &json {
324            if key.starts_with('*') {
325                continue;
326            }
327
328            if let Some(files_array) = value.as_array() {
329                debug!("Found {} files in array for key {}", files_array.len(), key);
330                for file in files_array {
331                    match serde_json::from_value::<File>(file.clone()) {
332                        Ok(file) => {
333                            if !options.matches_language_filter(&file.path) {
334                                continue;
335                            }
336
337                            debug!(
338                                "Processing file: {} with {} lines",
339                                file.path,
340                                file.lines.len()
341                            );
342                            for line in file.lines {
343                                if crate::utils::is_potential_definition(&line, symbol) {
344                                    debug!(
345                                        "Found potential definition: {}:{} - {}",
346                                        file.path,
347                                        line.lno,
348                                        line.line.trim()
349                                    );
350                                    file_locations.push((file.path.clone(), line.lno));
351                                }
352                            }
353                        }
354                        Err(e) => {
355                            warn!("Failed to parse file JSON: {e}");
356                        }
357                    }
358                }
359            } else if let Some(categories) = value.as_object() {
360                let symbol_name = symbol.strip_prefix("id:").unwrap_or(symbol);
361                let is_method_search = symbol_name.contains("::");
362
363                if !is_method_search {
364                    let class_def_key = format!("Definitions ({symbol_name})");
365                    if let Some(files_array) =
366                        categories.get(&class_def_key).and_then(|v| v.as_array())
367                    {
368                        for file in files_array {
369                            match serde_json::from_value::<File>(file.clone()) {
370                                Ok(file) => {
371                                    if !options.matches_language_filter(&file.path) {
372                                        continue;
373                                    }
374
375                                    for line in file.lines {
376                                        if line.line.contains("class ")
377                                            || line.line.contains("struct ")
378                                        {
379                                            debug!(
380                                                "Found class/struct definition: {}:{} - {}",
381                                                file.path,
382                                                line.lno,
383                                                line.line.trim()
384                                            );
385                                            file_locations.push((file.path.clone(), line.lno));
386                                        }
387                                    }
388                                }
389                                Err(_) => continue,
390                            }
391                        }
392                    }
393                }
394
395                let search_order = if is_method_search {
396                    vec!["Definitions", "Declarations"]
397                } else {
398                    vec!["Declarations", "Definitions"]
399                };
400
401                for search_type in search_order {
402                    for (category_name, category_value) in categories {
403                        if !is_method_search {
404                            let class_def_key = format!("Definitions ({symbol_name})");
405                            if category_name == &class_def_key {
406                                continue;
407                            }
408                        }
409
410                        if category_name.contains(search_type)
411                            && (category_name.contains(symbol_name)
412                                || category_name
413                                    .to_lowercase()
414                                    .contains(&symbol_name.to_lowercase()))
415                        {
416                            if let Some(files_array) = category_value.as_array() {
417                                for file in files_array {
418                                    match serde_json::from_value::<File>(file.clone()) {
419                                        Ok(file) => {
420                                            if !options.matches_language_filter(&file.path) {
421                                                continue;
422                                            }
423
424                                            for line in file.lines {
425                                                if let Some(upsearch) = &line.upsearch {
426                                                    if upsearch.starts_with("symbol:_Z") {
427                                                        return Ok(vec![(
428                                                            file.path.clone(),
429                                                            line.lno,
430                                                        )]);
431                                                    }
432                                                }
433                                                file_locations.push((file.path.clone(), line.lno));
434                                            }
435                                        }
436                                        Err(_) => continue,
437                                    }
438                                }
439                            }
440                        }
441                    }
442
443                    if !file_locations.is_empty() {
444                        break;
445                    }
446                }
447            }
448        }
449
450        Ok(file_locations)
451    }
452}