Skip to main content

gobby_code/search/
fts.rs

1//! PostgreSQL pg_search BM25 query sanitization and execution.
2//!
3//! The module name stays `fts` to keep command wiring stable; runtime keyword
4//! search is pg_search BM25 against Gobby's PostgreSQL hub.
5
6use std::collections::HashSet;
7
8use postgres::Client;
9use postgres::types::ToSql;
10
11use crate::db;
12use crate::models::{ContentSearchHit, SearchResult, Symbol};
13
14type PgParam = Box<dyn ToSql + Sync>;
15
16#[derive(Debug, Clone, PartialEq, Eq)]
17pub struct ResolvedGraphSymbol {
18    pub id: String,
19    pub display_name: String,
20}
21
22#[derive(Debug, Clone, Copy, Default)]
23struct SymbolFilters<'a> {
24    kind: Option<&'a str>,
25    language: Option<&'a str>,
26    paths: &'a [String],
27}
28
29pub const FILTERED_FETCH_CAP: usize = 10_000;
30
31fn push_param<T>(params: &mut Vec<PgParam>, value: T) -> String
32where
33    T: ToSql + Sync + 'static,
34{
35    params.push(Box::new(value));
36    format!("${}", params.len())
37}
38
39fn param_refs(params: &[PgParam]) -> Vec<&(dyn ToSql + Sync)> {
40    params
41        .iter()
42        .map(|param| param.as_ref() as &(dyn ToSql + Sync))
43        .collect()
44}
45
46/// Escape LIKE wildcards (`%`, `_`) and the backslash escape char itself.
47fn escape_like(s: &str) -> String {
48    let mut out = String::with_capacity(s.len());
49    for c in s.chars() {
50        if matches!(c, '\\' | '%' | '_') {
51            out.push('\\');
52        }
53        out.push(c);
54    }
55    out
56}
57
58/// Extract a SQL LIKE prefix from a glob pattern for index-assisted pre-filtering.
59fn glob_to_like_prefix(pattern: &str) -> Option<String> {
60    let prefix: String = pattern
61        .chars()
62        .take_while(|c| !matches!(c, '*' | '?' | '['))
63        .collect();
64    if prefix.is_empty() {
65        None
66    } else {
67        Some(format!("{}%", escape_like(&prefix)))
68    }
69}
70
71fn has_glob_meta(path: &str) -> bool {
72    path.chars().any(|c| matches!(c, '*' | '?' | '['))
73}
74
75pub fn expand_paths(paths: &[String]) -> Vec<String> {
76    let mut expanded = Vec::new();
77    let mut seen = HashSet::new();
78    for path in paths {
79        let trimmed = path.trim().trim_end_matches('/');
80        if trimmed.is_empty() {
81            continue;
82        }
83
84        let patterns = if has_glob_meta(trimmed) {
85            vec![trimmed.to_string()]
86        } else {
87            vec![trimmed.to_string(), format!("{trimmed}/**")]
88        };
89        for pattern in patterns {
90            if seen.insert(pattern.clone()) {
91                expanded.push(pattern);
92            }
93        }
94    }
95    expanded
96}
97
98pub fn compile_patterns(paths: &[String]) -> anyhow::Result<Vec<glob::Pattern>> {
99    paths
100        .iter()
101        .map(|path| {
102            glob::Pattern::new(path).map_err(|e| anyhow::anyhow!("invalid path glob `{path}`: {e}"))
103        })
104        .collect()
105}
106
107fn path_like_prefixes(paths: &[String]) -> Option<Vec<String>> {
108    if paths.is_empty() {
109        return Some(Vec::new());
110    }
111
112    let mut prefixes = Vec::with_capacity(paths.len());
113    for path in paths {
114        prefixes.push(glob_to_like_prefix(path)?);
115    }
116    Some(prefixes)
117}
118
119pub fn path_filter_falls_back(paths: &[String]) -> bool {
120    !paths.is_empty() && path_like_prefixes(paths).is_none()
121}
122
123fn push_path_filter(
124    conditions: &mut Vec<String>,
125    params: &mut Vec<PgParam>,
126    alias: &str,
127    paths: &[String],
128) -> bool {
129    let Some(prefixes) = path_like_prefixes(paths) else {
130        for path in paths
131            .iter()
132            .filter(|path| glob_to_like_prefix(path).is_none())
133        {
134            log::warn!(
135                "omitting SQL path filter for alias `{alias}` because path filter `{path}` cannot be converted to a LIKE prefix; relying on post-query glob matching",
136            );
137        }
138        return true;
139    };
140    if prefixes.is_empty() {
141        return false;
142    }
143
144    let predicates = prefixes
145        .into_iter()
146        .map(|prefix| {
147            let placeholder = push_param(params, prefix);
148            format!("{alias}.file_path LIKE {placeholder} ESCAPE '\\'")
149        })
150        .collect::<Vec<_>>();
151    conditions.push(format!("({})", predicates.join(" OR ")));
152    false
153}
154
155fn push_symbol_filters(
156    conditions: &mut Vec<String>,
157    params: &mut Vec<PgParam>,
158    alias: &str,
159    filters: SymbolFilters<'_>,
160) {
161    if let Some(kind) = filters.kind {
162        let placeholder = push_param(params, kind.to_string());
163        conditions.push(format!("{alias}.kind = {placeholder}"));
164    }
165    if let Some(language) = filters.language {
166        let placeholder = push_param(params, language.to_string());
167        conditions.push(format!("{alias}.language = {placeholder}"));
168    }
169    push_path_filter(conditions, params, alias, filters.paths);
170}
171
172fn append_unique_symbols(
173    out: &mut Vec<Symbol>,
174    seen: &mut HashSet<String>,
175    symbols: Vec<Symbol>,
176    limit: usize,
177) {
178    for symbol in symbols {
179        if seen.insert(symbol.id.clone()) {
180            out.push(symbol);
181            if out.len() >= limit {
182                return;
183            }
184        }
185    }
186}
187
188fn query_symbols_by_conditions(
189    conn: &mut Client,
190    mut conditions: Vec<String>,
191    mut params: Vec<PgParam>,
192    filters: SymbolFilters<'_>,
193    limit: usize,
194    order_by: &str,
195) -> Vec<Symbol> {
196    push_symbol_filters(&mut conditions, &mut params, "cs", filters);
197    let limit_placeholder = push_param(&mut params, limit as i64);
198    let where_clause = conditions.join(" AND ");
199    let columns = db::symbol_select_columns("cs");
200    let sql = format!(
201        "SELECT {columns}
202         FROM code_symbols cs
203         JOIN code_indexed_files cf
204           ON cf.project_id = cs.project_id AND cf.file_path = cs.file_path
205         WHERE {where_clause}
206         ORDER BY {order_by}
207         LIMIT {limit_placeholder}"
208    );
209    let refs = param_refs(&params);
210    conn.query(&sql, &refs)
211        .ok()
212        .map(|rows| {
213            rows.iter()
214                .filter_map(|row| Symbol::from_row(row).ok())
215                .collect()
216        })
217        .unwrap_or_default()
218}
219
220/// Sanitize user input for pg_search's BM25 query DSL.
221pub fn sanitize_pg_search_query(query: &str) -> String {
222    let cleaned: String = query
223        .chars()
224        .map(|ch| {
225            if ch.is_alphanumeric() || matches!(ch, ' ' | '_' | '-') {
226                ch
227            } else {
228                ' '
229            }
230        })
231        .collect();
232    cleaned
233        .split_whitespace()
234        .filter(|token| !token.is_empty())
235        .collect::<Vec<_>>()
236        .join(" ")
237}
238
239/// BM25 search across symbol names, qualified names, signatures, docstrings, and summaries.
240pub fn search_symbols_fts(
241    conn: &mut Client,
242    query: &str,
243    project_id: &str,
244    kind: Option<&str>,
245    language: Option<&str>,
246    paths: &[String],
247    limit: usize,
248) -> Vec<Symbol> {
249    let bm25_query = sanitize_pg_search_query(query);
250    if bm25_query.is_empty() || limit == 0 {
251        return Vec::new();
252    }
253
254    let mut params = Vec::new();
255    let query_placeholder = push_param(&mut params, bm25_query);
256    let project_placeholder = push_param(&mut params, project_id.to_string());
257    let conditions = vec![
258        format!(
259            "(cs.name @@@ {q} OR cs.qualified_name @@@ {q} OR cs.signature @@@ {q} OR cs.docstring @@@ {q} OR cs.summary @@@ {q})",
260            q = query_placeholder
261        ),
262        format!("cs.project_id = {project_placeholder}"),
263    ];
264    let filters = SymbolFilters {
265        kind,
266        language,
267        paths,
268    };
269    query_symbols_by_conditions(
270        conn,
271        conditions,
272        params,
273        filters,
274        limit,
275        "pdb.score(cs.id) DESC, cs.id ASC",
276    )
277}
278
279/// Fallback LIKE search on symbol names.
280pub fn search_symbols_by_name(
281    conn: &mut Client,
282    query: &str,
283    project_id: &str,
284    kind: Option<&str>,
285    language: Option<&str>,
286    paths: &[String],
287    limit: usize,
288) -> Vec<Symbol> {
289    if query.trim().is_empty() || limit == 0 {
290        return Vec::new();
291    }
292    let escaped_query = escape_like(query);
293    let pattern = format!("%{escaped_query}%");
294    let mut params = Vec::new();
295    let project_placeholder = push_param(&mut params, project_id.to_string());
296    let name_placeholder = push_param(&mut params, pattern.clone());
297    let qualified_placeholder = push_param(&mut params, pattern);
298    let conditions = vec![
299        format!("cs.project_id = {project_placeholder}"),
300        format!(
301            "(cs.name LIKE {name_placeholder} ESCAPE '\\' OR cs.qualified_name LIKE {qualified_placeholder} ESCAPE '\\')"
302        ),
303    ];
304    query_symbols_by_conditions(
305        conn,
306        conditions,
307        params,
308        SymbolFilters {
309            kind,
310            language,
311            paths,
312        },
313        limit,
314        "cs.name ASC, cs.file_path ASC, cs.line_start ASC",
315    )
316}
317
318pub fn search_symbols_exact_first(
319    conn: &mut Client,
320    query: &str,
321    project_id: &str,
322    kind: Option<&str>,
323    language: Option<&str>,
324    paths: &[String],
325    limit: usize,
326) -> Vec<Symbol> {
327    if query.trim().is_empty() || limit == 0 {
328        return Vec::new();
329    }
330
331    let mut results = Vec::new();
332    let mut seen = HashSet::new();
333    let filters = SymbolFilters {
334        kind,
335        language,
336        paths,
337    };
338
339    let mut params = Vec::new();
340    let project = push_param(&mut params, project_id.to_string());
341    let name = push_param(&mut params, query.to_string());
342    let qualified = push_param(&mut params, query.to_string());
343    let exact = query_symbols_by_conditions(
344        conn,
345        vec![
346            format!("cs.project_id = {project}"),
347            format!("(cs.name = {name} OR cs.qualified_name = {qualified})"),
348        ],
349        params,
350        filters,
351        limit,
352        "cs.file_path ASC, cs.line_start ASC",
353    );
354    append_unique_symbols(&mut results, &mut seen, exact, limit);
355    if results.len() >= limit {
356        return results;
357    }
358
359    let mut params = Vec::new();
360    let project = push_param(&mut params, project_id.to_string());
361    let name = push_param(&mut params, query.to_string());
362    let qualified = push_param(&mut params, query.to_string());
363    let ci_exact = query_symbols_by_conditions(
364        conn,
365        vec![
366            format!("cs.project_id = {project}"),
367            format!(
368                "(lower(cs.name) = lower({name}) OR lower(cs.qualified_name) = lower({qualified}))"
369            ),
370        ],
371        params,
372        filters,
373        limit,
374        "cs.file_path ASC, cs.line_start ASC",
375    );
376    append_unique_symbols(&mut results, &mut seen, ci_exact, limit);
377    if results.len() >= limit {
378        return results;
379    }
380
381    let prefix_pattern = format!("{}%", escape_like(query));
382    let mut params = Vec::new();
383    let project = push_param(&mut params, project_id.to_string());
384    let name = push_param(&mut params, prefix_pattern.clone());
385    let qualified = push_param(&mut params, prefix_pattern);
386    let prefix_matches = query_symbols_by_conditions(
387        conn,
388        vec![
389            format!("cs.project_id = {project}"),
390            format!(
391                "(cs.name LIKE {name} ESCAPE '\\' OR cs.qualified_name LIKE {qualified} ESCAPE '\\')"
392            ),
393        ],
394        params,
395        filters,
396        limit,
397        "cs.name ASC, cs.file_path ASC, cs.line_start ASC",
398    );
399    append_unique_symbols(&mut results, &mut seen, prefix_matches, limit);
400    if results.len() >= limit {
401        return results;
402    }
403
404    let contains = search_symbols_by_name(conn, query, project_id, kind, language, paths, limit);
405    append_unique_symbols(&mut results, &mut seen, contains, limit);
406    if results.len() >= limit {
407        return results;
408    }
409
410    let fts = search_symbols_fts(conn, query, project_id, kind, language, paths, limit);
411    append_unique_symbols(&mut results, &mut seen, fts, limit);
412
413    results
414}
415
416fn exact_symbol_matches(
417    conn: &mut Client,
418    project_id: &str,
419    column: &str,
420    input: &str,
421    limit: usize,
422) -> Vec<Symbol> {
423    if !matches!(column, "id" | "qualified_name" | "name") {
424        return Vec::new();
425    }
426    let columns = db::symbol_select_columns("");
427    let sql = format!(
428        "SELECT {columns}
429         FROM code_symbols
430         WHERE project_id = $1 AND {column} = $2
431         ORDER BY file_path ASC, line_start ASC
432         LIMIT $3"
433    );
434    conn.query(&sql, &[&project_id, &input, &(limit as i64)])
435        .ok()
436        .map(|rows| {
437            rows.iter()
438                .filter_map(|row| Symbol::from_row(row).ok())
439                .collect()
440        })
441        .unwrap_or_default()
442}
443
444fn suggestion_label(symbol: &Symbol) -> String {
445    format!(
446        "{} ({}:{})",
447        symbol.qualified_name, symbol.file_path, symbol.line_start
448    )
449}
450
451fn resolved_symbol(symbol: &Symbol) -> ResolvedGraphSymbol {
452    ResolvedGraphSymbol {
453        id: symbol.id.clone(),
454        display_name: symbol.name.clone(),
455    }
456}
457
458fn resolve_from_candidates(candidates: Vec<Symbol>) -> (Option<ResolvedGraphSymbol>, Vec<String>) {
459    match candidates.len() {
460        0 => (None, vec![]),
461        1 => (Some(resolved_symbol(&candidates[0])), vec![]),
462        _ => {
463            let mut suggestions = Vec::new();
464            let mut seen = HashSet::new();
465            for symbol in &candidates {
466                let label = suggestion_label(symbol);
467                if seen.insert(label.clone()) {
468                    suggestions.push(label);
469                }
470            }
471            (None, suggestions)
472        }
473    }
474}
475
476/// Resolve user input to a canonical symbol id for graph queries.
477///
478/// Resolution is fail-closed: ambiguous matches return `None` with suggestions.
479pub fn resolve_graph_symbol(
480    conn: &mut Client,
481    input: &str,
482    project_id: &str,
483) -> (Option<ResolvedGraphSymbol>, Vec<String>) {
484    let ids = exact_symbol_matches(conn, project_id, "id", input, 2);
485    let (resolved, suggestions) = resolve_from_candidates(ids);
486    if resolved.is_some() || !suggestions.is_empty() {
487        return (resolved, suggestions);
488    }
489
490    let qualified = exact_symbol_matches(conn, project_id, "qualified_name", input, 6);
491    let (resolved, suggestions) = resolve_from_candidates(qualified);
492    if resolved.is_some() || !suggestions.is_empty() {
493        return (resolved, suggestions);
494    }
495
496    let exact = exact_symbol_matches(conn, project_id, "name", input, 6);
497    let (resolved, suggestions) = resolve_from_candidates(exact);
498    if resolved.is_some() || !suggestions.is_empty() {
499        return (resolved, suggestions);
500    }
501
502    let like_matches = search_symbols_by_name(conn, input, project_id, None, None, &[], 6);
503    let (resolved, suggestions) = resolve_from_candidates(like_matches);
504    if resolved.is_some() || !suggestions.is_empty() {
505        return (resolved, suggestions);
506    }
507
508    let fts_results = search_symbols_fts(conn, input, project_id, None, None, &[], 6);
509    resolve_from_candidates(fts_results)
510}
511
512/// Count matching symbols using pg_search BM25, with LIKE fallback.
513pub fn count_text(
514    conn: &mut Client,
515    query: &str,
516    project_id: &str,
517    language: Option<&str>,
518    paths: &[String],
519) -> usize {
520    if query.trim().is_empty() {
521        return 0;
522    }
523
524    let bm25_query = sanitize_pg_search_query(query);
525    // Intentional fallback: when BM25 sanitization empties the query, use
526    // count_symbols_by_name_like, which may count LIKE matches BM25 filtered out.
527    if bm25_query.is_empty() {
528        return count_symbols_by_name_like(conn, query, project_id, language, paths);
529    }
530
531    let mut params = Vec::new();
532    let query_placeholder = push_param(&mut params, bm25_query);
533    let project_placeholder = push_param(&mut params, project_id.to_string());
534    let mut conditions = vec![
535        format!(
536            "(cs.name @@@ {q} OR cs.qualified_name @@@ {q} OR cs.signature @@@ {q} OR cs.docstring @@@ {q} OR cs.summary @@@ {q})",
537            q = query_placeholder
538        ),
539        format!("cs.project_id = {project_placeholder}"),
540    ];
541    push_symbol_filters(
542        &mut conditions,
543        &mut params,
544        "cs",
545        SymbolFilters {
546            kind: None,
547            language,
548            paths,
549        },
550    );
551    let refs = param_refs(&params);
552    let sql = format!(
553        "SELECT COUNT(*)::BIGINT AS count
554         FROM code_symbols cs
555         JOIN code_indexed_files cf
556           ON cf.project_id = cs.project_id AND cf.file_path = cs.file_path
557         WHERE {}",
558        conditions.join(" AND ")
559    );
560    let count = conn
561        .query_one(&sql, &refs)
562        .ok()
563        .and_then(|row| row.try_get::<_, i64>("count").ok())
564        .unwrap_or(0);
565    if count > 0 {
566        return count as usize;
567    }
568
569    count_symbols_by_name_like(conn, query, project_id, language, paths)
570}
571
572fn count_symbols_by_name_like(
573    conn: &mut Client,
574    query: &str,
575    project_id: &str,
576    language: Option<&str>,
577    paths: &[String],
578) -> usize {
579    let escaped_query = escape_like(query);
580    let pattern = format!("%{escaped_query}%");
581    let mut params = Vec::new();
582    let project_placeholder = push_param(&mut params, project_id.to_string());
583    let name_placeholder = push_param(&mut params, pattern.clone());
584    let qualified_placeholder = push_param(&mut params, pattern);
585    let mut conditions = vec![
586        format!("cs.project_id = {project_placeholder}"),
587        format!(
588            "(cs.name LIKE {name_placeholder} ESCAPE '\\' OR cs.qualified_name LIKE {qualified_placeholder} ESCAPE '\\')"
589        ),
590    ];
591    push_symbol_filters(
592        &mut conditions,
593        &mut params,
594        "cs",
595        SymbolFilters {
596            kind: None,
597            language,
598            paths,
599        },
600    );
601    let refs = param_refs(&params);
602    let sql = format!(
603        "SELECT COUNT(*)::BIGINT AS count
604         FROM code_symbols cs
605         JOIN code_indexed_files cf
606           ON cf.project_id = cs.project_id AND cf.file_path = cs.file_path
607         WHERE {}",
608        conditions.join(" AND ")
609    );
610    conn.query_one(&sql, &refs)
611        .ok()
612        .and_then(|row| row.try_get::<_, i64>("count").ok())
613        .unwrap_or(0) as usize
614}
615
616/// Count matching content chunks using pg_search BM25, with LIKE fallback.
617pub fn count_content(
618    conn: &mut Client,
619    query: &str,
620    project_id: &str,
621    language: Option<&str>,
622    paths: &[String],
623) -> usize {
624    if query.trim().is_empty() {
625        return 0;
626    }
627
628    let bm25_query = sanitize_pg_search_query(query);
629    if bm25_query.is_empty() {
630        return count_content_like(conn, query, project_id, language, paths);
631    }
632    let mut params = Vec::new();
633    let query_placeholder = push_param(&mut params, bm25_query);
634    let project_placeholder = push_param(&mut params, project_id.to_string());
635    let mut conditions = vec![
636        format!("c.content @@@ {query_placeholder}"),
637        format!("c.project_id = {project_placeholder}"),
638    ];
639    if let Some(lang) = language {
640        let placeholder = push_param(&mut params, lang.to_string());
641        conditions.push(format!("c.language = {placeholder}"));
642    }
643    push_path_filter(&mut conditions, &mut params, "c", paths);
644    let refs = param_refs(&params);
645    let sql = format!(
646        "SELECT COUNT(*)::BIGINT AS count
647         FROM code_content_chunks c
648         JOIN code_indexed_files cf
649           ON cf.project_id = c.project_id AND cf.file_path = c.file_path
650         WHERE {}",
651        conditions.join(" AND ")
652    );
653    let count = conn
654        .query_one(&sql, &refs)
655        .ok()
656        .and_then(|row| row.try_get::<_, i64>("count").ok())
657        .unwrap_or(0);
658    if count > 0 {
659        return count as usize;
660    }
661
662    count_content_like(conn, query, project_id, language, paths)
663}
664
665fn count_content_like(
666    conn: &mut Client,
667    query: &str,
668    project_id: &str,
669    language: Option<&str>,
670    paths: &[String],
671) -> usize {
672    let escaped_query = escape_like(query);
673    let like_query = format!("%{escaped_query}%");
674    let mut params = Vec::new();
675    let project_placeholder = push_param(&mut params, project_id.to_string());
676    let like_placeholder = push_param(&mut params, like_query);
677    let mut conditions = vec![
678        format!("c.project_id = {project_placeholder}"),
679        format!("c.content LIKE {like_placeholder} ESCAPE '\\'"),
680    ];
681    if let Some(lang) = language {
682        let placeholder = push_param(&mut params, lang.to_string());
683        conditions.push(format!("c.language = {placeholder}"));
684    }
685    push_path_filter(&mut conditions, &mut params, "c", paths);
686    let refs = param_refs(&params);
687    let sql = format!(
688        "SELECT COUNT(*)::BIGINT AS count
689         FROM code_content_chunks c
690         JOIN code_indexed_files cf
691           ON cf.project_id = c.project_id AND cf.file_path = c.file_path
692         WHERE {}",
693        conditions.join(" AND ")
694    );
695    conn.query_one(&sql, &refs)
696        .ok()
697        .and_then(|row| row.try_get::<_, i64>("count").ok())
698        .unwrap_or(0) as usize
699}
700
701/// Full-text search for symbols: BM25 with LIKE fallback.
702pub fn search_text(
703    conn: &mut Client,
704    query: &str,
705    project_id: &str,
706    language: Option<&str>,
707    paths: &[String],
708    limit: usize,
709) -> Vec<SearchResult> {
710    let mut results = search_symbols_fts(conn, query, project_id, None, language, paths, limit);
711    if results.is_empty() {
712        results = search_symbols_by_name(conn, query, project_id, None, language, paths, limit);
713    }
714    results.into_iter().map(|s| s.to_brief()).collect()
715}
716
717/// Full-text search across file content chunks.
718pub fn search_content(
719    conn: &mut Client,
720    query: &str,
721    project_id: &str,
722    language: Option<&str>,
723    paths: &[String],
724    limit: usize,
725) -> Vec<ContentSearchHit> {
726    if query.trim().is_empty() || limit == 0 {
727        return Vec::new();
728    }
729
730    let bm25_query = sanitize_pg_search_query(query);
731    if bm25_query.is_empty() {
732        return search_content_like(conn, query, project_id, language, paths, limit);
733    }
734    let mut params = Vec::new();
735    let query_placeholder = push_param(&mut params, bm25_query);
736    let project_placeholder = push_param(&mut params, project_id.to_string());
737    let mut conditions = vec![
738        format!("c.content @@@ {query_placeholder}"),
739        format!("c.project_id = {project_placeholder}"),
740    ];
741    if let Some(lang) = language {
742        let placeholder = push_param(&mut params, lang.to_string());
743        conditions.push(format!("c.language = {placeholder}"));
744    }
745    push_path_filter(&mut conditions, &mut params, "c", paths);
746    let limit_placeholder = push_param(&mut params, limit as i64);
747    let refs = param_refs(&params);
748    let sql = format!(
749        "SELECT c.file_path,
750                c.line_start::BIGINT AS line_start,
751                c.line_end::BIGINT AS line_end,
752                c.language,
753                c.content
754         FROM code_content_chunks c
755         JOIN code_indexed_files cf
756           ON cf.project_id = c.project_id AND cf.file_path = c.file_path
757         WHERE {}
758         ORDER BY pdb.score(c.id) DESC, c.id ASC
759         LIMIT {limit_placeholder}",
760        conditions.join(" AND ")
761    );
762
763    let hits: Vec<ContentSearchHit> = conn
764        .query(&sql, &refs)
765        .ok()
766        .map(|rows| {
767            rows.iter()
768                .filter_map(|row| {
769                    let content: String = row.try_get("content").ok()?;
770                    Some(ContentSearchHit {
771                        file_path: row.try_get("file_path").ok()?,
772                        line_start: row.try_get::<_, i64>("line_start").ok()? as usize,
773                        line_end: row.try_get::<_, i64>("line_end").ok()? as usize,
774                        snippet: make_snippet(&content, query),
775                        language: row.try_get("language").ok()?,
776                    })
777                })
778                .collect()
779        })
780        .unwrap_or_default();
781
782    if !hits.is_empty() {
783        return hits;
784    }
785
786    search_content_like(conn, query, project_id, language, paths, limit)
787}
788
789fn search_content_like(
790    conn: &mut Client,
791    query: &str,
792    project_id: &str,
793    language: Option<&str>,
794    paths: &[String],
795    limit: usize,
796) -> Vec<ContentSearchHit> {
797    let escaped_query = escape_like(query);
798    let like_query = format!("%{escaped_query}%");
799    let mut params = Vec::new();
800    let project_placeholder = push_param(&mut params, project_id.to_string());
801    let like_placeholder = push_param(&mut params, like_query);
802    let mut conditions = vec![
803        format!("c.project_id = {project_placeholder}"),
804        format!("c.content LIKE {like_placeholder} ESCAPE '\\'"),
805    ];
806    if let Some(lang) = language {
807        let placeholder = push_param(&mut params, lang.to_string());
808        conditions.push(format!("c.language = {placeholder}"));
809    }
810    push_path_filter(&mut conditions, &mut params, "c", paths);
811    let limit_placeholder = push_param(&mut params, limit as i64);
812    let refs = param_refs(&params);
813    let sql = format!(
814        "SELECT c.file_path,
815                c.line_start::BIGINT AS line_start,
816                c.line_end::BIGINT AS line_end,
817                c.language,
818                c.content
819         FROM code_content_chunks c
820         JOIN code_indexed_files cf
821           ON cf.project_id = c.project_id AND cf.file_path = c.file_path
822         WHERE {}
823         ORDER BY c.file_path ASC, c.line_start ASC
824         LIMIT {limit_placeholder}",
825        conditions.join(" AND ")
826    );
827
828    conn.query(&sql, &refs)
829        .ok()
830        .map(|rows| {
831            rows.iter()
832                .filter_map(|row| {
833                    let content: String = row.try_get("content").ok()?;
834                    Some(ContentSearchHit {
835                        file_path: row.try_get("file_path").ok()?,
836                        line_start: row.try_get::<_, i64>("line_start").ok()? as usize,
837                        line_end: row.try_get::<_, i64>("line_end").ok()? as usize,
838                        snippet: make_snippet(&content, query),
839                        language: row.try_get("language").ok()?,
840                    })
841                })
842                .collect()
843        })
844        .unwrap_or_default()
845}
846
847fn make_snippet(content: &str, query: &str) -> String {
848    let tokens: Vec<String> = query
849        .split_whitespace()
850        .map(str::to_lowercase)
851        .filter(|token| !token.is_empty())
852        .collect();
853    let (lower_content, lower_byte_to_original_char) = lowercase_with_original_char_map(content);
854    let mut match_at = None;
855    for token in tokens {
856        if let Some(byte_index) = lower_content.find(&token) {
857            match_at = lower_byte_to_original_char
858                .get(byte_index)
859                .copied()
860                .or(Some(0));
861            break;
862        }
863    }
864    let match_at = match_at.unwrap_or(0);
865    let start = match_at.saturating_sub(60);
866    let end = (match_at + 120).min(content.chars().count());
867    content.chars().skip(start).take(end - start).collect()
868}
869
870fn lowercase_with_original_char_map(content: &str) -> (String, Vec<usize>) {
871    let mut lower = String::with_capacity(content.len());
872    let mut lower_byte_to_original_char = Vec::with_capacity(content.len());
873    for (original_char_index, ch) in content.chars().enumerate() {
874        for lower_ch in ch.to_lowercase() {
875            let mut buf = [0; 4];
876            let encoded = lower_ch.encode_utf8(&mut buf);
877            lower_byte_to_original_char
878                .extend(std::iter::repeat_n(original_char_index, encoded.len()));
879            lower.push(lower_ch);
880        }
881    }
882    (lower, lower_byte_to_original_char)
883}
884
885#[cfg(test)]
886mod tests {
887    use super::*;
888
889    #[test]
890    fn sanitize_pg_search_query_matches_gobby_rules() {
891        assert_eq!(
892            sanitize_pg_search_query("foo::bar baz-qux _id + \"drop\""),
893            "foo bar baz-qux _id drop"
894        );
895    }
896
897    #[test]
898    fn sanitize_pg_search_query_drops_empty_queries() {
899        assert_eq!(sanitize_pg_search_query(":: + ()"), "");
900    }
901
902    #[test]
903    fn glob_to_like_prefix_escapes_like_wildcards() {
904        assert_eq!(
905            glob_to_like_prefix("src/foo_bar/*.rs").as_deref(),
906            Some("src/foo\\_bar/%")
907        );
908    }
909
910    #[test]
911    fn expand_paths_trims_skips_empty_and_expands_bare_paths() {
912        let paths = vec![
913            " src/gobby ".to_string(),
914            "".to_string(),
915            "crates/**/*.rs".to_string(),
916            "src/gobby/".to_string(),
917        ];
918
919        assert_eq!(
920            expand_paths(&paths),
921            vec!["src/gobby", "src/gobby/**", "crates/**/*.rs"]
922        );
923    }
924
925    #[test]
926    fn compile_patterns_reports_invalid_glob() {
927        let err = compile_patterns(&["src/[".to_string()])
928            .expect_err("invalid glob should fail")
929            .to_string();
930
931        assert!(err.contains("invalid path glob `src/[`"));
932    }
933
934    #[test]
935    fn path_like_prefixes_escape_and_require_all_patterns() {
936        let paths = vec![
937            "src/foo_bar".to_string(),
938            "src/foo_bar/**".to_string(),
939            "src/100%/**".to_string(),
940        ];
941        assert_eq!(
942            path_like_prefixes(&paths).expect("prefixes"),
943            vec!["src/foo\\_bar%", "src/foo\\_bar/%", "src/100\\%/%"]
944        );
945
946        let mixed = vec!["src/**".to_string(), "*.rs".to_string()];
947        assert!(path_like_prefixes(&mixed).is_none());
948        assert!(path_filter_falls_back(&mixed));
949        assert!(!path_filter_falls_back(&paths));
950    }
951
952    #[test]
953    fn snippet_centers_first_matching_token() {
954        let content = "before ".repeat(20) + "target call here";
955        let snippet = make_snippet(&content, "target");
956
957        assert!(snippet.contains("target call here"));
958        assert!(snippet.len() <= 180);
959    }
960
961    #[test]
962    fn snippet_handles_unicode_before_match() {
963        let content = "é".repeat(80) + " target call here";
964        let snippet = make_snippet(&content, "target");
965
966        assert!(snippet.contains("target call here"));
967        assert!(snippet.chars().count() <= 180);
968
969        let content = "\u{0130}".repeat(80) + " target call here";
970        let snippet = make_snippet(&content, "target");
971
972        assert!(snippet.contains("target call here"));
973        assert!(snippet.chars().count() <= 180);
974    }
975}