searchfox_lib/
search.rs

1use crate::client::SearchfoxClient;
2use crate::types::{File, SearchfoxResponse};
3use anyhow::Result;
4use log::{debug, warn};
5use reqwest::Url;
6
7#[derive(Debug, Clone)]
8pub struct SearchOptions {
9    pub query: Option<String>,
10    pub path: Option<String>,
11    pub case: bool,
12    pub regexp: bool,
13    pub limit: usize,
14    pub context: Option<usize>,
15    pub symbol: Option<String>,
16    pub id: Option<String>,
17    pub cpp: bool,
18    pub c_lang: bool,
19    pub webidl: bool,
20    pub js: bool,
21}
22
23impl Default for SearchOptions {
24    fn default() -> Self {
25        Self {
26            query: None,
27            path: None,
28            case: false,
29            regexp: false,
30            limit: 50,
31            context: None,
32            symbol: None,
33            id: None,
34            cpp: false,
35            c_lang: false,
36            webidl: false,
37            js: false,
38        }
39    }
40}
41
42impl SearchOptions {
43    /// Check if this search is expensive (doesn't use searchfox's index)
44    pub fn is_expensive_search(&self) -> bool {
45        // Only symbol: and id: prefixes use the optimized index
46        if self.symbol.is_some() || self.id.is_some() {
47            return false;
48        }
49
50        if let Some(query) = &self.query {
51            // Check if query contains indexed prefixes
52            !query.contains("symbol:") && !query.contains("id:")
53        } else {
54            false
55        }
56    }
57
58    pub fn matches_language_filter(&self, path: &str) -> bool {
59        if !self.cpp && !self.c_lang && !self.webidl && !self.js {
60            return true;
61        }
62
63        let path_lower = path.to_lowercase();
64
65        if self.cpp
66            && (path_lower.ends_with(".cc")
67                || path_lower.ends_with(".cpp")
68                || path_lower.ends_with(".h")
69                || path_lower.ends_with(".hh")
70                || path_lower.ends_with(".hpp"))
71        {
72            return true;
73        }
74
75        if self.c_lang && (path_lower.ends_with(".c") || path_lower.ends_with(".h")) {
76            return true;
77        }
78
79        if self.webidl && path_lower.ends_with(".webidl") {
80            return true;
81        }
82
83        if self.js
84            && (path_lower.ends_with(".js")
85                || path_lower.ends_with(".mjs")
86                || path_lower.ends_with(".ts")
87                || path_lower.ends_with(".cjs")
88                || path_lower.ends_with(".jsx")
89                || path_lower.ends_with(".tsx"))
90        {
91            return true;
92        }
93
94        false
95    }
96
97    pub fn build_query(&self) -> String {
98        if let Some(symbol) = &self.symbol {
99            format!("symbol:{symbol}")
100        } else if let Some(id) = &self.id {
101            format!("id:{id}")
102        } else if let Some(q) = &self.query {
103            if q.contains("path:")
104                || q.contains("pathre:")
105                || q.contains("symbol:")
106                || q.contains("id:")
107                || q.contains("text:")
108                || q.contains("re:")
109            {
110                q.clone()
111            } else if let Some(context) = self.context {
112                format!("context:{context} text:{q}")
113            } else {
114                q.clone()
115            }
116        } else {
117            String::new()
118        }
119    }
120}
121
122pub struct SearchResult {
123    pub path: String,
124    pub line_number: usize,
125    pub line: String,
126}
127
128impl SearchfoxClient {
129    /// Warns about expensive searches to stderr (for library users)
130    pub fn warn_if_expensive_search(&self, options: &SearchOptions) {
131        if options.is_expensive_search() {
132            if let Some(query) = &options.query {
133                eprintln!("⚠️  WARNING: Expensive full-text search detected");
134                eprintln!("Query '{}' doesn't use searchfox's optimized index", query);
135                eprintln!("Consider using symbol: or id: prefixes, or use ripgrep locally");
136                eprintln!("For LLM tools: Use find_and_display_definition() for definitions");
137            }
138        }
139    }
140
141    pub async fn search(&self, options: &SearchOptions) -> Result<Vec<SearchResult>> {
142        let query = options.build_query();
143
144        let mut url = Url::parse(&format!("https://searchfox.org/{}/search", self.repo))?;
145        url.query_pairs_mut()
146            .append_pair("q", &query)
147            .append_pair("case", if options.case { "true" } else { "false" })
148            .append_pair("regexp", if options.regexp { "true" } else { "false" });
149        if let Some(path) = &options.path {
150            url.query_pairs_mut().append_pair("path", path);
151        }
152
153        let response = self.get(url).await?;
154
155        if !response.status().is_success() {
156            anyhow::bail!("Request failed: {}", response.status());
157        }
158
159        let response_text = response.text().await?;
160        let json: SearchfoxResponse = serde_json::from_str(&response_text)?;
161
162        let mut results = Vec::new();
163        let mut count = 0;
164
165        for (key, value) in &json {
166            if key.starts_with('*') {
167                continue;
168            }
169
170            if let Some(files_array) = value.as_array() {
171                for file in files_array {
172                    let file: File = match serde_json::from_value(file.clone()) {
173                        Ok(f) => f,
174                        Err(e) => {
175                            warn!("Failed to parse file JSON: {e}");
176                            continue;
177                        }
178                    };
179
180                    if !options.matches_language_filter(&file.path) {
181                        continue;
182                    }
183
184                    if options.path.is_some()
185                        && options.query.is_none()
186                        && options.symbol.is_none()
187                        && options.id.is_none()
188                    {
189                        if count >= options.limit {
190                            break;
191                        }
192                        results.push(SearchResult {
193                            path: file.path.clone(),
194                            line_number: 0,
195                            line: String::new(),
196                        });
197                        count += 1;
198                    } else {
199                        for line in file.lines {
200                            if count >= options.limit {
201                                break;
202                            }
203                            results.push(SearchResult {
204                                path: file.path.clone(),
205                                line_number: line.lno,
206                                line: line.line.trim_end().to_string(),
207                            });
208                            count += 1;
209                        }
210                    }
211                }
212            } else if let Some(obj) = value.as_object() {
213                for (_category, file_list) in obj {
214                    if let Some(files) = file_list.as_array() {
215                        for file in files {
216                            let file: File = match serde_json::from_value(file.clone()) {
217                                Ok(f) => f,
218                                Err(_) => continue,
219                            };
220
221                            if !options.matches_language_filter(&file.path) {
222                                continue;
223                            }
224
225                            if options.path.is_some()
226                                && options.query.is_none()
227                                && options.symbol.is_none()
228                                && options.id.is_none()
229                            {
230                                if count >= options.limit {
231                                    break;
232                                }
233                                results.push(SearchResult {
234                                    path: file.path.clone(),
235                                    line_number: 0,
236                                    line: String::new(),
237                                });
238                                count += 1;
239                            } else {
240                                for line in file.lines {
241                                    if count >= options.limit {
242                                        break;
243                                    }
244                                    results.push(SearchResult {
245                                        path: file.path.clone(),
246                                        line_number: line.lno,
247                                        line: line.line.trim_end().to_string(),
248                                    });
249                                    count += 1;
250                                }
251                            }
252                        }
253                    }
254                }
255            }
256
257            if count >= options.limit {
258                break;
259            }
260        }
261
262        Ok(results)
263    }
264
265    pub async fn find_symbol_locations(
266        &self,
267        symbol: &str,
268        path_filter: Option<&str>,
269        options: &SearchOptions,
270    ) -> Result<Vec<(String, usize)>> {
271        let query = format!("id:{symbol}");
272        let mut url = Url::parse(&format!("https://searchfox.org/{}/search", self.repo))?;
273        url.query_pairs_mut().append_pair("q", &query);
274        if let Some(path) = path_filter {
275            url.query_pairs_mut().append_pair("path", path);
276        }
277
278        let response = self.get(url).await?;
279
280        if !response.status().is_success() {
281            anyhow::bail!("Request failed: {}", response.status());
282        }
283
284        let response_text = response.text().await?;
285        let json: SearchfoxResponse = serde_json::from_str(&response_text)?;
286        let mut file_locations = Vec::new();
287
288        debug!("Analyzing search results...");
289
290        for (key, value) in &json {
291            if key.starts_with('*') {
292                continue;
293            }
294
295            if let Some(files_array) = value.as_array() {
296                debug!("Found {} files in array for key {}", files_array.len(), key);
297                for file in files_array {
298                    match serde_json::from_value::<File>(file.clone()) {
299                        Ok(file) => {
300                            if !options.matches_language_filter(&file.path) {
301                                continue;
302                            }
303
304                            debug!(
305                                "Processing file: {} with {} lines",
306                                file.path,
307                                file.lines.len()
308                            );
309                            for line in file.lines {
310                                if crate::utils::is_potential_definition(&line, symbol) {
311                                    debug!(
312                                        "Found potential definition: {}:{} - {}",
313                                        file.path,
314                                        line.lno,
315                                        line.line.trim()
316                                    );
317                                    file_locations.push((file.path.clone(), line.lno));
318                                }
319                            }
320                        }
321                        Err(e) => {
322                            warn!("Failed to parse file JSON: {e}");
323                        }
324                    }
325                }
326            } else if let Some(categories) = value.as_object() {
327                let symbol_name = symbol.strip_prefix("id:").unwrap_or(symbol);
328                let is_method_search = symbol_name.contains("::");
329
330                if !is_method_search {
331                    let class_def_key = format!("Definitions ({symbol_name})");
332                    if let Some(files_array) =
333                        categories.get(&class_def_key).and_then(|v| v.as_array())
334                    {
335                        for file in files_array {
336                            match serde_json::from_value::<File>(file.clone()) {
337                                Ok(file) => {
338                                    if !options.matches_language_filter(&file.path) {
339                                        continue;
340                                    }
341
342                                    for line in file.lines {
343                                        if line.line.contains("class ")
344                                            || line.line.contains("struct ")
345                                        {
346                                            debug!(
347                                                "Found class/struct definition: {}:{} - {}",
348                                                file.path,
349                                                line.lno,
350                                                line.line.trim()
351                                            );
352                                            file_locations.push((file.path.clone(), line.lno));
353                                        }
354                                    }
355                                }
356                                Err(_) => continue,
357                            }
358                        }
359                    }
360                }
361
362                let search_order = if is_method_search {
363                    vec!["Definitions", "Declarations"]
364                } else {
365                    vec!["Declarations", "Definitions"]
366                };
367
368                for search_type in search_order {
369                    for (category_name, category_value) in categories {
370                        if !is_method_search {
371                            let class_def_key = format!("Definitions ({symbol_name})");
372                            if category_name == &class_def_key {
373                                continue;
374                            }
375                        }
376
377                        if category_name.contains(search_type)
378                            && (category_name.contains(symbol_name)
379                                || category_name
380                                    .to_lowercase()
381                                    .contains(&symbol_name.to_lowercase()))
382                        {
383                            if let Some(files_array) = category_value.as_array() {
384                                for file in files_array {
385                                    match serde_json::from_value::<File>(file.clone()) {
386                                        Ok(file) => {
387                                            if !options.matches_language_filter(&file.path) {
388                                                continue;
389                                            }
390
391                                            for line in file.lines {
392                                                if let Some(upsearch) = &line.upsearch {
393                                                    if upsearch.starts_with("symbol:_Z") {
394                                                        return Ok(vec![(
395                                                            file.path.clone(),
396                                                            line.lno,
397                                                        )]);
398                                                    }
399                                                }
400                                                file_locations.push((file.path.clone(), line.lno));
401                                            }
402                                        }
403                                        Err(_) => continue,
404                                    }
405                                }
406                            }
407                        }
408                    }
409
410                    if !file_locations.is_empty() {
411                        break;
412                    }
413                }
414            }
415        }
416
417        Ok(file_locations)
418    }
419}