Skip to main content

gobby_code/search/fts/
counts.rs

1use postgres::Client;
2
3use crate::config::Context;
4
5use super::common::{
6    PgParam, SymbolFilters, escape_like, param_refs, push_param, push_path_filter,
7    push_symbol_filters, push_visible_project_file_filter, query_count, sanitize_pg_search_query,
8};
9
10pub fn count_text(
11    conn: &mut Client,
12    query: &str,
13    project_id: &str,
14    language: Option<&str>,
15    paths: &[String],
16) -> usize {
17    if query.trim().is_empty() {
18        return 0;
19    }
20
21    let bm25_query = sanitize_pg_search_query(query);
22    // Intentional fallback: when BM25 sanitization empties the query, use
23    // count_symbols_by_name_like, which may count LIKE matches BM25 filtered out.
24    if bm25_query.is_empty() {
25        return count_symbols_by_name_like(conn, query, project_id, language, paths);
26    }
27
28    let mut params = Vec::new();
29    let query_placeholder = push_param(&mut params, bm25_query);
30    let project_placeholder = push_param(&mut params, project_id.to_string());
31    let mut conditions = vec![
32        format!(
33            "(cs.name @@@ {q} OR cs.qualified_name @@@ {q} OR cs.signature @@@ {q} OR cs.docstring @@@ {q} OR cs.summary @@@ {q})",
34            q = query_placeholder
35        ),
36        format!("cs.project_id = {project_placeholder}"),
37    ];
38    let path_filter_fallback = push_symbol_filters(
39        &mut conditions,
40        &mut params,
41        "cs",
42        SymbolFilters {
43            kind: None,
44            language,
45            paths,
46        },
47    );
48    if path_filter_fallback {
49        return count_symbol_file_path_rows(conn, conditions, params, paths).unwrap_or(0);
50    }
51    let refs = param_refs(&params);
52    let sql = format!(
53        "SELECT COUNT(*)::BIGINT AS count
54         FROM code_symbols cs
55         JOIN code_indexed_files cf
56           ON cf.project_id = cs.project_id AND cf.file_path = cs.file_path
57         WHERE {}",
58        conditions.join(" AND ")
59    );
60    match conn.query_one(&sql, &refs) {
61        Ok(row) => row.try_get::<_, i64>("count").unwrap_or(0) as usize,
62        Err(error) => {
63            log::warn!("BM25 symbol count failed; falling back to LIKE count: {error}");
64            count_symbols_by_name_like(conn, query, project_id, language, paths)
65        }
66    }
67}
68
69fn count_symbols_by_name_like(
70    conn: &mut Client,
71    query: &str,
72    project_id: &str,
73    language: Option<&str>,
74    paths: &[String],
75) -> usize {
76    let escaped_query = escape_like(query);
77    let pattern = format!("%{escaped_query}%");
78    let mut params = Vec::new();
79    let project_placeholder = push_param(&mut params, project_id.to_string());
80    let name_placeholder = push_param(&mut params, pattern.clone());
81    let qualified_placeholder = push_param(&mut params, pattern.clone());
82    let signature_placeholder = push_param(&mut params, pattern.clone());
83    let docstring_placeholder = push_param(&mut params, pattern.clone());
84    let summary_placeholder = push_param(&mut params, pattern);
85    let mut conditions = vec![
86        format!("cs.project_id = {project_placeholder}"),
87        format!(
88            "(cs.name LIKE {name_placeholder} ESCAPE '\\' OR cs.qualified_name LIKE {qualified_placeholder} ESCAPE '\\' OR cs.signature LIKE {signature_placeholder} ESCAPE '\\' OR cs.docstring LIKE {docstring_placeholder} ESCAPE '\\' OR cs.summary LIKE {summary_placeholder} ESCAPE '\\')"
89        ),
90    ];
91    let path_filter_fallback = push_symbol_filters(
92        &mut conditions,
93        &mut params,
94        "cs",
95        SymbolFilters {
96            kind: None,
97            language,
98            paths,
99        },
100    );
101    if path_filter_fallback {
102        return count_symbol_file_path_rows(conn, conditions, params, paths).unwrap_or(0);
103    }
104    let refs = param_refs(&params);
105    let sql = format!(
106        "SELECT COUNT(*)::BIGINT AS count
107         FROM code_symbols cs
108         JOIN code_indexed_files cf
109           ON cf.project_id = cs.project_id AND cf.file_path = cs.file_path
110         WHERE {}",
111        conditions.join(" AND ")
112    );
113    conn.query_one(&sql, &refs)
114        .ok()
115        .and_then(|row| row.try_get::<_, i64>("count").ok())
116        .unwrap_or(0) as usize
117}
118
119/// Count matching content chunks using pg_search BM25, with LIKE fallback.
120pub fn count_content(
121    conn: &mut Client,
122    query: &str,
123    project_id: &str,
124    language: Option<&str>,
125    paths: &[String],
126) -> usize {
127    if query.trim().is_empty() {
128        return 0;
129    }
130
131    let bm25_query = sanitize_pg_search_query(query);
132    if bm25_query.is_empty() {
133        return count_content_like(conn, query, project_id, language, paths);
134    }
135    let mut params = Vec::new();
136    let query_placeholder = push_param(&mut params, bm25_query);
137    let project_placeholder = push_param(&mut params, project_id.to_string());
138    let mut conditions = vec![
139        format!("c.content @@@ {query_placeholder}"),
140        format!("c.project_id = {project_placeholder}"),
141    ];
142    if let Some(lang) = language {
143        let placeholder = push_param(&mut params, lang.to_string());
144        conditions.push(format!("c.language = {placeholder}"));
145    }
146    push_path_filter(&mut conditions, &mut params, "c", paths);
147    let refs = param_refs(&params);
148    let sql = format!(
149        "SELECT COUNT(*)::BIGINT AS count
150         FROM code_content_chunks c
151         JOIN code_indexed_files cf
152           ON cf.project_id = c.project_id AND cf.file_path = c.file_path
153         WHERE {}",
154        conditions.join(" AND ")
155    );
156    match conn.query_one(&sql, &refs) {
157        Ok(row) => row.try_get::<_, i64>("count").unwrap_or(0) as usize,
158        Err(error) => {
159            log::warn!("BM25 content count failed; falling back to LIKE count: {error}");
160            count_content_like(conn, query, project_id, language, paths)
161        }
162    }
163}
164
165fn count_content_like(
166    conn: &mut Client,
167    query: &str,
168    project_id: &str,
169    language: Option<&str>,
170    paths: &[String],
171) -> usize {
172    let escaped_query = escape_like(query);
173    let like_query = format!("%{escaped_query}%");
174    let mut params = Vec::new();
175    let project_placeholder = push_param(&mut params, project_id.to_string());
176    let like_placeholder = push_param(&mut params, like_query);
177    let mut conditions = vec![
178        format!("c.project_id = {project_placeholder}"),
179        format!("c.content LIKE {like_placeholder} ESCAPE '\\'"),
180    ];
181    if let Some(lang) = language {
182        let placeholder = push_param(&mut params, lang.to_string());
183        conditions.push(format!("c.language = {placeholder}"));
184    }
185    push_path_filter(&mut conditions, &mut params, "c", paths);
186    let refs = param_refs(&params);
187    let sql = format!(
188        "SELECT COUNT(*)::BIGINT AS count
189         FROM code_content_chunks c
190         JOIN code_indexed_files cf
191           ON cf.project_id = c.project_id AND cf.file_path = c.file_path
192         WHERE {}",
193        conditions.join(" AND ")
194    );
195    conn.query_one(&sql, &refs)
196        .ok()
197        .and_then(|row| row.try_get::<_, i64>("count").ok())
198        .unwrap_or(0) as usize
199}
200
201fn count_visible_symbols_by_conditions(
202    conn: &mut Client,
203    ctx: &Context,
204    mut conditions: Vec<String>,
205    mut params: Vec<PgParam>,
206    language: Option<&str>,
207    paths: &[String],
208) -> Result<usize, postgres::Error> {
209    let path_filter_fallback = push_symbol_filters(
210        &mut conditions,
211        &mut params,
212        "cs",
213        SymbolFilters {
214            kind: None,
215            language,
216            paths,
217        },
218    );
219    push_visible_project_file_filter(&mut conditions, &mut params, "cs", "cf", ctx);
220    if path_filter_fallback {
221        return count_symbol_file_path_rows(conn, conditions, params, paths);
222    }
223    let sql = format!(
224        "SELECT COUNT(*)::BIGINT AS count
225         FROM code_symbols cs
226         JOIN code_indexed_files cf
227           ON cf.project_id = cs.project_id AND cf.file_path = cs.file_path
228         WHERE {}",
229        conditions.join(" AND ")
230    );
231    query_count(conn, &sql, &params)
232}
233
234fn count_symbol_file_path_rows(
235    conn: &mut Client,
236    mut conditions: Vec<String>,
237    mut params: Vec<PgParam>,
238    paths: &[String],
239) -> Result<usize, postgres::Error> {
240    push_pg_regex_path_filter(&mut conditions, &mut params, "cs", paths);
241    let sql = format!(
242        "SELECT COUNT(*)::BIGINT AS count
243         FROM code_symbols cs
244         JOIN code_indexed_files cf
245           ON cf.project_id = cs.project_id AND cf.file_path = cs.file_path
246         WHERE {}",
247        conditions.join(" AND ")
248    );
249    query_count(conn, &sql, &params)
250}
251
252fn push_pg_regex_path_filter(
253    conditions: &mut Vec<String>,
254    params: &mut Vec<PgParam>,
255    alias: &str,
256    paths: &[String],
257) {
258    if paths.is_empty() {
259        return;
260    }
261    let regexes = paths
262        .iter()
263        .filter_map(|path| match glob_to_pg_regex(path) {
264            Some(regex) => Some(regex),
265            None => {
266                log::warn!("omitting invalid post-query count path glob `{path}`");
267                None
268            }
269        })
270        .collect::<Vec<_>>();
271    if regexes.is_empty() {
272        conditions.push("FALSE".to_string());
273        return;
274    }
275    let placeholder = push_param(params, regexes);
276    conditions.push(format!("{alias}.file_path ~ ANY({placeholder}::TEXT[])"));
277}
278
279fn glob_to_pg_regex(pattern: &str) -> Option<String> {
280    // Convert the post-query count path glob subset to PostgreSQL regex.
281    //
282    // Supported syntax is `*` for one path segment fragment, `**` for any
283    // path depth, `?` for one non-slash character, and bracket classes.
284    let mut regex = String::from("^");
285    let mut chars = pattern.chars().peekable();
286    while let Some(ch) = chars.next() {
287        match ch {
288            '*' => {
289                let mut star_count = 1usize;
290                while chars.peek() == Some(&'*') {
291                    chars.next();
292                    star_count += 1;
293                }
294                if star_count > 1 {
295                    regex.push_str(".*");
296                } else {
297                    regex.push_str("[^/]*");
298                }
299            }
300            '?' => regex.push_str("[^/]"),
301            '[' => {
302                regex.push('[');
303                if chars.peek() == Some(&'!') {
304                    chars.next();
305                    regex.push('^');
306                }
307                let mut closed = false;
308                for class_ch in chars.by_ref() {
309                    regex.push(class_ch);
310                    if class_ch == ']' {
311                        closed = true;
312                        break;
313                    }
314                }
315                if !closed {
316                    return None;
317                }
318            }
319            '\\' => regex.push_str("\\\\"),
320            '.' | '+' | '(' | ')' | '|' | '^' | '$' | '{' | '}' | ']' => {
321                regex.push('\\');
322                regex.push(ch);
323            }
324            ch => regex.push(ch),
325        }
326    }
327    regex.push('$');
328    Some(regex)
329}
330
331fn count_symbols_fts_visible(
332    conn: &mut Client,
333    bm25_query: &str,
334    ctx: &Context,
335    language: Option<&str>,
336    paths: &[String],
337) -> Result<usize, postgres::Error> {
338    let mut params = Vec::new();
339    let query_placeholder = push_param(&mut params, bm25_query.to_string());
340    let conditions = vec![format!(
341        "(cs.name @@@ {q} OR cs.qualified_name @@@ {q} OR cs.signature @@@ {q} OR cs.docstring @@@ {q} OR cs.summary @@@ {q})",
342        q = query_placeholder
343    )];
344    count_visible_symbols_by_conditions(conn, ctx, conditions, params, language, paths)
345}
346
347fn count_symbols_by_name_like_visible(
348    conn: &mut Client,
349    query: &str,
350    ctx: &Context,
351    language: Option<&str>,
352    paths: &[String],
353) -> usize {
354    let escaped_query = escape_like(query);
355    let pattern = format!("%{escaped_query}%");
356    let mut params = Vec::new();
357    let name_placeholder = push_param(&mut params, pattern.clone());
358    let qualified_placeholder = push_param(&mut params, pattern.clone());
359    let signature_placeholder = push_param(&mut params, pattern.clone());
360    let docstring_placeholder = push_param(&mut params, pattern.clone());
361    let summary_placeholder = push_param(&mut params, pattern);
362    let conditions = vec![format!(
363        "(cs.name LIKE {name_placeholder} ESCAPE '\\' OR cs.qualified_name LIKE {qualified_placeholder} ESCAPE '\\' OR cs.signature LIKE {signature_placeholder} ESCAPE '\\' OR cs.docstring LIKE {docstring_placeholder} ESCAPE '\\' OR cs.summary LIKE {summary_placeholder} ESCAPE '\\')"
364    )];
365    count_visible_symbols_by_conditions(conn, ctx, conditions, params, language, paths).unwrap_or(0)
366}
367
368fn push_content_filters(
369    conditions: &mut Vec<String>,
370    params: &mut Vec<PgParam>,
371    alias: &str,
372    language: Option<&str>,
373    paths: &[String],
374) {
375    if let Some(lang) = language {
376        let placeholder = push_param(params, lang.to_string());
377        conditions.push(format!("{alias}.language = {placeholder}"));
378    }
379    push_path_filter(conditions, params, alias, paths);
380}
381
382fn count_visible_content_by_conditions(
383    conn: &mut Client,
384    ctx: &Context,
385    mut conditions: Vec<String>,
386    mut params: Vec<PgParam>,
387    language: Option<&str>,
388    paths: &[String],
389) -> Result<usize, postgres::Error> {
390    push_content_filters(&mut conditions, &mut params, "c", language, paths);
391    push_visible_project_file_filter(&mut conditions, &mut params, "c", "cf", ctx);
392    let sql = format!(
393        "SELECT COUNT(*)::BIGINT AS count
394         FROM code_content_chunks c
395         JOIN code_indexed_files cf
396           ON cf.project_id = c.project_id AND cf.file_path = c.file_path
397         WHERE {}",
398        conditions.join(" AND ")
399    );
400    query_count(conn, &sql, &params)
401}
402
403fn count_content_bm25_visible(
404    conn: &mut Client,
405    bm25_query: &str,
406    ctx: &Context,
407    language: Option<&str>,
408    paths: &[String],
409) -> Result<usize, postgres::Error> {
410    let mut params = Vec::new();
411    let query_placeholder = push_param(&mut params, bm25_query.to_string());
412    let conditions = vec![format!("c.content @@@ {query_placeholder}")];
413    count_visible_content_by_conditions(conn, ctx, conditions, params, language, paths)
414}
415
416fn count_content_like_visible(
417    conn: &mut Client,
418    query: &str,
419    ctx: &Context,
420    language: Option<&str>,
421    paths: &[String],
422) -> usize {
423    let escaped_query = escape_like(query);
424    let like_query = format!("%{escaped_query}%");
425    let mut params = Vec::new();
426    let like_placeholder = push_param(&mut params, like_query);
427    let conditions = vec![format!("c.content LIKE {like_placeholder} ESCAPE '\\'")];
428    count_visible_content_by_conditions(conn, ctx, conditions, params, language, paths).unwrap_or(0)
429}
430
431pub fn count_text_visible(
432    conn: &mut Client,
433    query: &str,
434    ctx: &Context,
435    language: Option<&str>,
436    paths: &[String],
437) -> usize {
438    if query.trim().is_empty() {
439        return 0;
440    }
441
442    let bm25_query = sanitize_pg_search_query(query);
443    if bm25_query.is_empty() {
444        return count_symbols_by_name_like_visible(conn, query, ctx, language, paths);
445    }
446
447    match count_symbols_fts_visible(conn, &bm25_query, ctx, language, paths) {
448        Ok(count) => count,
449        Err(error) => {
450            log::warn!("visible BM25 symbol count failed; falling back to LIKE count: {error}");
451            count_symbols_by_name_like_visible(conn, query, ctx, language, paths)
452        }
453    }
454}
455
456pub fn count_content_visible(
457    conn: &mut Client,
458    query: &str,
459    ctx: &Context,
460    language: Option<&str>,
461    paths: &[String],
462) -> usize {
463    if query.trim().is_empty() {
464        return 0;
465    }
466
467    let bm25_query = sanitize_pg_search_query(query);
468    if bm25_query.is_empty() {
469        return count_content_like_visible(conn, query, ctx, language, paths);
470    }
471
472    match count_content_bm25_visible(conn, &bm25_query, ctx, language, paths) {
473        Ok(count) => count,
474        Err(error) => {
475            log::warn!("visible BM25 content count failed; falling back to LIKE count: {error}");
476            count_content_like_visible(conn, query, ctx, language, paths)
477        }
478    }
479}
480
481#[cfg(test)]
482mod tests {
483    use super::glob_to_pg_regex;
484
485    #[test]
486    fn glob_to_pg_regex_anchors_and_escapes_patterns() {
487        assert_eq!(glob_to_pg_regex("*.rs").as_deref(), Some("^[^/]*\\.rs$"));
488        assert_eq!(
489            glob_to_pg_regex("src/foo?.[ch]").as_deref(),
490            Some("^src/foo[^/]\\.[ch]$")
491        );
492        assert_eq!(
493            glob_to_pg_regex("src/literal].rs").as_deref(),
494            Some("^src/literal\\]\\.rs$")
495        );
496        assert_eq!(
497            glob_to_pg_regex("src/**/*.rs").as_deref(),
498            Some("^src/.*/[^/]*\\.rs$")
499        );
500        assert_eq!(
501            glob_to_pg_regex("src/***/main.rs").as_deref(),
502            Some("^src/.*/main\\.rs$")
503        );
504        assert_eq!(glob_to_pg_regex("src/["), None);
505    }
506}