Skip to main content

gobby_code/search/fts/
counts.rs

1use postgres::Client;
2
3use crate::config::Context;
4
5use super::common::{
6    PgParam, SymbolFilters, param_refs, push_param, push_path_filter, push_symbol_filters,
7    push_visible_project_file_filter, query_count, sanitize_pg_search_query,
8};
9
10pub fn count_text(
11    conn: &mut Client,
12    query: &str,
13    project_id: &str,
14    language: Option<&str>,
15    paths: &[String],
16) -> usize {
17    if query.trim().is_empty() {
18        return 0;
19    }
20
21    let bm25_query = sanitize_pg_search_query(query);
22    if bm25_query.is_empty() {
23        log::warn!("BM25 symbol count skipped because query contains no pg_search terms");
24        return 0;
25    }
26
27    let mut params = Vec::new();
28    let query_placeholder = push_param(&mut params, bm25_query);
29    let project_placeholder = push_param(&mut params, project_id.to_string());
30    let mut conditions = vec![
31        format!(
32            "(cs.name @@@ {q} OR cs.qualified_name @@@ {q} OR cs.signature @@@ {q} OR cs.docstring @@@ {q} OR cs.summary @@@ {q})",
33            q = query_placeholder
34        ),
35        format!("cs.project_id = {project_placeholder}"),
36    ];
37    let path_filter_requires_post_filter = push_symbol_filters(
38        &mut conditions,
39        &mut params,
40        "cs",
41        SymbolFilters {
42            kind: None,
43            language,
44            paths,
45        },
46    );
47    if path_filter_requires_post_filter {
48        return count_symbol_file_path_rows(conn, conditions, params, paths).unwrap_or(0);
49    }
50    let refs = param_refs(&params);
51    let sql = format!(
52        "SELECT COUNT(*)::BIGINT AS count
53         FROM code_symbols cs
54         JOIN code_indexed_files cf
55           ON cf.project_id = cs.project_id AND cf.file_path = cs.file_path
56         WHERE {}",
57        conditions.join(" AND ")
58    );
59    match conn.query_one(&sql, &refs) {
60        Ok(row) => row.try_get::<_, i64>("count").unwrap_or(0) as usize,
61        Err(error) => {
62            log::error!("BM25 symbol count failed; pg_search is required: {error}");
63            0
64        }
65    }
66}
67
68/// Count matching content chunks using pg_search BM25.
69pub fn count_content(
70    conn: &mut Client,
71    query: &str,
72    project_id: &str,
73    language: Option<&str>,
74    paths: &[String],
75) -> usize {
76    if query.trim().is_empty() {
77        return 0;
78    }
79
80    let bm25_query = sanitize_pg_search_query(query);
81    if bm25_query.is_empty() {
82        log::warn!("BM25 content count skipped because query contains no pg_search terms");
83        return 0;
84    }
85    let mut params = Vec::new();
86    let query_placeholder = push_param(&mut params, bm25_query);
87    let project_placeholder = push_param(&mut params, project_id.to_string());
88    let mut conditions = vec![
89        format!("c.content @@@ {query_placeholder}"),
90        format!("c.project_id = {project_placeholder}"),
91    ];
92    if let Some(lang) = language {
93        let placeholder = push_param(&mut params, lang.to_string());
94        conditions.push(format!("c.language = {placeholder}"));
95    }
96    push_path_filter(&mut conditions, &mut params, "c", paths);
97    let refs = param_refs(&params);
98    let sql = format!(
99        "SELECT COUNT(*)::BIGINT AS count
100         FROM code_content_chunks c
101         JOIN code_indexed_files cf
102           ON cf.project_id = c.project_id AND cf.file_path = c.file_path
103         WHERE {}",
104        conditions.join(" AND ")
105    );
106    match conn.query_one(&sql, &refs) {
107        Ok(row) => row.try_get::<_, i64>("count").unwrap_or(0) as usize,
108        Err(error) => {
109            log::error!("BM25 content count failed; pg_search is required: {error}");
110            0
111        }
112    }
113}
114
115fn count_visible_symbols_by_conditions(
116    conn: &mut Client,
117    ctx: &Context,
118    mut conditions: Vec<String>,
119    mut params: Vec<PgParam>,
120    language: Option<&str>,
121    paths: &[String],
122) -> Result<usize, postgres::Error> {
123    let path_filter_requires_post_filter = push_symbol_filters(
124        &mut conditions,
125        &mut params,
126        "cs",
127        SymbolFilters {
128            kind: None,
129            language,
130            paths,
131        },
132    );
133    push_visible_project_file_filter(&mut conditions, &mut params, "cs", "cf", ctx);
134    if path_filter_requires_post_filter {
135        return count_symbol_file_path_rows(conn, conditions, params, paths);
136    }
137    let sql = format!(
138        "SELECT COUNT(*)::BIGINT AS count
139         FROM code_symbols cs
140         JOIN code_indexed_files cf
141           ON cf.project_id = cs.project_id AND cf.file_path = cs.file_path
142         WHERE {}",
143        conditions.join(" AND ")
144    );
145    query_count(conn, &sql, &params)
146}
147
148fn count_symbol_file_path_rows(
149    conn: &mut Client,
150    mut conditions: Vec<String>,
151    mut params: Vec<PgParam>,
152    paths: &[String],
153) -> Result<usize, postgres::Error> {
154    push_pg_regex_path_filter(&mut conditions, &mut params, "cs", paths);
155    let sql = format!(
156        "SELECT COUNT(*)::BIGINT AS count
157         FROM code_symbols cs
158         JOIN code_indexed_files cf
159           ON cf.project_id = cs.project_id AND cf.file_path = cs.file_path
160         WHERE {}",
161        conditions.join(" AND ")
162    );
163    query_count(conn, &sql, &params)
164}
165
166fn push_pg_regex_path_filter(
167    conditions: &mut Vec<String>,
168    params: &mut Vec<PgParam>,
169    alias: &str,
170    paths: &[String],
171) {
172    if paths.is_empty() {
173        return;
174    }
175    let regexes = paths
176        .iter()
177        .filter_map(|path| match glob_to_pg_regex(path) {
178            Some(regex) => Some(regex),
179            None => {
180                log::warn!("omitting invalid post-query count path glob `{path}`");
181                None
182            }
183        })
184        .collect::<Vec<_>>();
185    if regexes.is_empty() {
186        conditions.push("FALSE".to_string());
187        return;
188    }
189    let placeholder = push_param(params, regexes);
190    conditions.push(format!("{alias}.file_path ~ ANY({placeholder}::TEXT[])"));
191}
192
193fn glob_to_pg_regex(pattern: &str) -> Option<String> {
194    // Convert the post-query count path glob subset to PostgreSQL regex.
195    //
196    // Supported syntax is `*` for one path segment fragment, `**` for any
197    // path depth, `?` for one non-slash character, and bracket classes.
198    let mut regex = String::from("^");
199    let mut chars = pattern.chars().peekable();
200    while let Some(ch) = chars.next() {
201        match ch {
202            '*' => {
203                let mut star_count = 1usize;
204                while chars.peek() == Some(&'*') {
205                    chars.next();
206                    star_count += 1;
207                }
208                if star_count > 1 {
209                    regex.push_str(".*");
210                } else {
211                    regex.push_str("[^/]*");
212                }
213            }
214            '?' => regex.push_str("[^/]"),
215            '[' => {
216                regex.push('[');
217                if chars.peek() == Some(&'!') {
218                    chars.next();
219                    regex.push('^');
220                }
221                let mut closed = false;
222                for class_ch in chars.by_ref() {
223                    regex.push(class_ch);
224                    if class_ch == ']' {
225                        closed = true;
226                        break;
227                    }
228                }
229                if !closed {
230                    return None;
231                }
232            }
233            '\\' => regex.push_str("\\\\"),
234            '.' | '+' | '(' | ')' | '|' | '^' | '$' | '{' | '}' | ']' => {
235                regex.push('\\');
236                regex.push(ch);
237            }
238            ch => regex.push(ch),
239        }
240    }
241    regex.push('$');
242    Some(regex)
243}
244
245fn count_symbols_fts_visible(
246    conn: &mut Client,
247    bm25_query: &str,
248    ctx: &Context,
249    language: Option<&str>,
250    paths: &[String],
251) -> Result<usize, postgres::Error> {
252    let mut params = Vec::new();
253    let query_placeholder = push_param(&mut params, bm25_query.to_string());
254    let conditions = vec![format!(
255        "(cs.name @@@ {q} OR cs.qualified_name @@@ {q} OR cs.signature @@@ {q} OR cs.docstring @@@ {q} OR cs.summary @@@ {q})",
256        q = query_placeholder
257    )];
258    count_visible_symbols_by_conditions(conn, ctx, conditions, params, language, paths)
259}
260
261fn push_content_filters(
262    conditions: &mut Vec<String>,
263    params: &mut Vec<PgParam>,
264    alias: &str,
265    language: Option<&str>,
266    paths: &[String],
267) {
268    if let Some(lang) = language {
269        let placeholder = push_param(params, lang.to_string());
270        conditions.push(format!("{alias}.language = {placeholder}"));
271    }
272    push_path_filter(conditions, params, alias, paths);
273}
274
275fn count_visible_content_by_conditions(
276    conn: &mut Client,
277    ctx: &Context,
278    mut conditions: Vec<String>,
279    mut params: Vec<PgParam>,
280    language: Option<&str>,
281    paths: &[String],
282) -> Result<usize, postgres::Error> {
283    push_content_filters(&mut conditions, &mut params, "c", language, paths);
284    push_visible_project_file_filter(&mut conditions, &mut params, "c", "cf", ctx);
285    let sql = format!(
286        "SELECT COUNT(*)::BIGINT AS count
287         FROM code_content_chunks c
288         JOIN code_indexed_files cf
289           ON cf.project_id = c.project_id AND cf.file_path = c.file_path
290         WHERE {}",
291        conditions.join(" AND ")
292    );
293    query_count(conn, &sql, &params)
294}
295
296fn count_content_bm25_visible(
297    conn: &mut Client,
298    bm25_query: &str,
299    ctx: &Context,
300    language: Option<&str>,
301    paths: &[String],
302) -> Result<usize, postgres::Error> {
303    let mut params = Vec::new();
304    let query_placeholder = push_param(&mut params, bm25_query.to_string());
305    let conditions = vec![format!("c.content @@@ {query_placeholder}")];
306    count_visible_content_by_conditions(conn, ctx, conditions, params, language, paths)
307}
308
309pub fn count_text_visible(
310    conn: &mut Client,
311    query: &str,
312    ctx: &Context,
313    language: Option<&str>,
314    paths: &[String],
315) -> usize {
316    if query.trim().is_empty() {
317        return 0;
318    }
319
320    let bm25_query = sanitize_pg_search_query(query);
321    if bm25_query.is_empty() {
322        log::warn!("visible BM25 symbol count skipped because query contains no pg_search terms");
323        return 0;
324    }
325
326    match count_symbols_fts_visible(conn, &bm25_query, ctx, language, paths) {
327        Ok(count) => count,
328        Err(error) => {
329            log::error!("visible BM25 symbol count failed; pg_search is required: {error}");
330            0
331        }
332    }
333}
334
335pub fn count_content_visible(
336    conn: &mut Client,
337    query: &str,
338    ctx: &Context,
339    language: Option<&str>,
340    paths: &[String],
341) -> usize {
342    if query.trim().is_empty() {
343        return 0;
344    }
345
346    let bm25_query = sanitize_pg_search_query(query);
347    if bm25_query.is_empty() {
348        log::warn!("visible BM25 content count skipped because query contains no pg_search terms");
349        return 0;
350    }
351
352    match count_content_bm25_visible(conn, &bm25_query, ctx, language, paths) {
353        Ok(count) => count,
354        Err(error) => {
355            log::error!("visible BM25 content count failed; pg_search is required: {error}");
356            0
357        }
358    }
359}
360
361#[cfg(test)]
362mod tests {
363    use super::glob_to_pg_regex;
364
365    #[test]
366    fn glob_to_pg_regex_anchors_and_escapes_patterns() {
367        assert_eq!(glob_to_pg_regex("*.rs").as_deref(), Some("^[^/]*\\.rs$"));
368        assert_eq!(
369            glob_to_pg_regex("src/foo?.[ch]").as_deref(),
370            Some("^src/foo[^/]\\.[ch]$")
371        );
372        assert_eq!(
373            glob_to_pg_regex("src/literal].rs").as_deref(),
374            Some("^src/literal\\]\\.rs$")
375        );
376        assert_eq!(
377            glob_to_pg_regex("src/**/*.rs").as_deref(),
378            Some("^src/.*/[^/]*\\.rs$")
379        );
380        assert_eq!(
381            glob_to_pg_regex("src/***/main.rs").as_deref(),
382            Some("^src/.*/main\\.rs$")
383        );
384        assert_eq!(glob_to_pg_regex("src/["), None);
385    }
386}