1use std::collections::HashSet;
7
8use postgres::Client;
9use postgres::types::ToSql;
10
11use crate::db;
12use crate::models::{ContentSearchHit, SearchResult, Symbol};
13
14type PgParam = Box<dyn ToSql + Sync>;
15
16#[derive(Debug, Clone, PartialEq, Eq)]
17pub struct ResolvedGraphSymbol {
18 pub id: String,
19 pub display_name: String,
20}
21
22#[derive(Debug, Clone, Copy, Default)]
23struct SymbolFilters<'a> {
24 kind: Option<&'a str>,
25 language: Option<&'a str>,
26 paths: &'a [String],
27}
28
29pub const FILTERED_FETCH_CAP: usize = 10_000;
30
31fn push_param<T>(params: &mut Vec<PgParam>, value: T) -> String
32where
33 T: ToSql + Sync + 'static,
34{
35 params.push(Box::new(value));
36 format!("${}", params.len())
37}
38
39fn param_refs(params: &[PgParam]) -> Vec<&(dyn ToSql + Sync)> {
40 params
41 .iter()
42 .map(|param| param.as_ref() as &(dyn ToSql + Sync))
43 .collect()
44}
45
46fn escape_like(s: &str) -> String {
48 let mut out = String::with_capacity(s.len());
49 for c in s.chars() {
50 if matches!(c, '\\' | '%' | '_') {
51 out.push('\\');
52 }
53 out.push(c);
54 }
55 out
56}
57
58fn glob_to_like_prefix(pattern: &str) -> Option<String> {
60 let prefix: String = pattern
61 .chars()
62 .take_while(|c| !matches!(c, '*' | '?' | '['))
63 .collect();
64 if prefix.is_empty() {
65 None
66 } else {
67 Some(format!("{}%", escape_like(&prefix)))
68 }
69}
70
71fn has_glob_meta(path: &str) -> bool {
72 path.chars().any(|c| matches!(c, '*' | '?' | '['))
73}
74
75pub fn expand_paths(paths: &[String]) -> Vec<String> {
76 let mut expanded = Vec::new();
77 let mut seen = HashSet::new();
78 for path in paths {
79 let trimmed = path.trim().trim_end_matches('/');
80 if trimmed.is_empty() {
81 continue;
82 }
83
84 let patterns = if has_glob_meta(trimmed) {
85 vec![trimmed.to_string()]
86 } else {
87 vec![trimmed.to_string(), format!("{trimmed}/**")]
88 };
89 for pattern in patterns {
90 if seen.insert(pattern.clone()) {
91 expanded.push(pattern);
92 }
93 }
94 }
95 expanded
96}
97
98pub fn compile_patterns(paths: &[String]) -> anyhow::Result<Vec<glob::Pattern>> {
99 paths
100 .iter()
101 .map(|path| {
102 glob::Pattern::new(path).map_err(|e| anyhow::anyhow!("invalid path glob `{path}`: {e}"))
103 })
104 .collect()
105}
106
107fn path_like_prefixes(paths: &[String]) -> Option<Vec<String>> {
108 if paths.is_empty() {
109 return Some(Vec::new());
110 }
111
112 let mut prefixes = Vec::with_capacity(paths.len());
113 for path in paths {
114 prefixes.push(glob_to_like_prefix(path)?);
115 }
116 Some(prefixes)
117}
118
119pub fn path_filter_falls_back(paths: &[String]) -> bool {
120 !paths.is_empty() && path_like_prefixes(paths).is_none()
121}
122
123fn push_path_filter(
124 conditions: &mut Vec<String>,
125 params: &mut Vec<PgParam>,
126 alias: &str,
127 paths: &[String],
128) -> bool {
129 let Some(prefixes) = path_like_prefixes(paths) else {
130 for path in paths
131 .iter()
132 .filter(|path| glob_to_like_prefix(path).is_none())
133 {
134 log::warn!(
135 "omitting SQL path filter for alias `{alias}` because path filter `{path}` cannot be converted to a LIKE prefix; relying on post-query glob matching",
136 );
137 }
138 return true;
139 };
140 if prefixes.is_empty() {
141 return false;
142 }
143
144 let predicates = prefixes
145 .into_iter()
146 .map(|prefix| {
147 let placeholder = push_param(params, prefix);
148 format!("{alias}.file_path LIKE {placeholder} ESCAPE '\\'")
149 })
150 .collect::<Vec<_>>();
151 conditions.push(format!("({})", predicates.join(" OR ")));
152 false
153}
154
155fn push_symbol_filters(
156 conditions: &mut Vec<String>,
157 params: &mut Vec<PgParam>,
158 alias: &str,
159 filters: SymbolFilters<'_>,
160) {
161 if let Some(kind) = filters.kind {
162 let placeholder = push_param(params, kind.to_string());
163 conditions.push(format!("{alias}.kind = {placeholder}"));
164 }
165 if let Some(language) = filters.language {
166 let placeholder = push_param(params, language.to_string());
167 conditions.push(format!("{alias}.language = {placeholder}"));
168 }
169 push_path_filter(conditions, params, alias, filters.paths);
170}
171
172fn append_unique_symbols(
173 out: &mut Vec<Symbol>,
174 seen: &mut HashSet<String>,
175 symbols: Vec<Symbol>,
176 limit: usize,
177) {
178 for symbol in symbols {
179 if seen.insert(symbol.id.clone()) {
180 out.push(symbol);
181 if out.len() >= limit {
182 return;
183 }
184 }
185 }
186}
187
188fn query_symbols_by_conditions(
189 conn: &mut Client,
190 mut conditions: Vec<String>,
191 mut params: Vec<PgParam>,
192 filters: SymbolFilters<'_>,
193 limit: usize,
194 order_by: &str,
195) -> Vec<Symbol> {
196 push_symbol_filters(&mut conditions, &mut params, "cs", filters);
197 let limit_placeholder = push_param(&mut params, limit as i64);
198 let where_clause = conditions.join(" AND ");
199 let columns = db::symbol_select_columns("cs");
200 let sql = format!(
201 "SELECT {columns}
202 FROM code_symbols cs
203 JOIN code_indexed_files cf
204 ON cf.project_id = cs.project_id AND cf.file_path = cs.file_path
205 WHERE {where_clause}
206 ORDER BY {order_by}
207 LIMIT {limit_placeholder}"
208 );
209 let refs = param_refs(¶ms);
210 conn.query(&sql, &refs)
211 .ok()
212 .map(|rows| {
213 rows.iter()
214 .filter_map(|row| Symbol::from_row(row).ok())
215 .collect()
216 })
217 .unwrap_or_default()
218}
219
220pub fn sanitize_pg_search_query(query: &str) -> String {
222 let cleaned: String = query
223 .chars()
224 .map(|ch| {
225 if ch.is_alphanumeric() || matches!(ch, ' ' | '_' | '-') {
226 ch
227 } else {
228 ' '
229 }
230 })
231 .collect();
232 cleaned
233 .split_whitespace()
234 .filter(|token| !token.is_empty())
235 .collect::<Vec<_>>()
236 .join(" ")
237}
238
239pub fn search_symbols_fts(
241 conn: &mut Client,
242 query: &str,
243 project_id: &str,
244 kind: Option<&str>,
245 language: Option<&str>,
246 paths: &[String],
247 limit: usize,
248) -> Vec<Symbol> {
249 let bm25_query = sanitize_pg_search_query(query);
250 if bm25_query.is_empty() || limit == 0 {
251 return Vec::new();
252 }
253
254 let mut params = Vec::new();
255 let query_placeholder = push_param(&mut params, bm25_query);
256 let project_placeholder = push_param(&mut params, project_id.to_string());
257 let conditions = vec![
258 format!(
259 "(cs.name @@@ {q} OR cs.qualified_name @@@ {q} OR cs.signature @@@ {q} OR cs.docstring @@@ {q} OR cs.summary @@@ {q})",
260 q = query_placeholder
261 ),
262 format!("cs.project_id = {project_placeholder}"),
263 ];
264 let filters = SymbolFilters {
265 kind,
266 language,
267 paths,
268 };
269 query_symbols_by_conditions(
270 conn,
271 conditions,
272 params,
273 filters,
274 limit,
275 "pdb.score(cs.id) DESC, cs.id ASC",
276 )
277}
278
279pub fn search_symbols_by_name(
281 conn: &mut Client,
282 query: &str,
283 project_id: &str,
284 kind: Option<&str>,
285 language: Option<&str>,
286 paths: &[String],
287 limit: usize,
288) -> Vec<Symbol> {
289 if query.trim().is_empty() || limit == 0 {
290 return Vec::new();
291 }
292 let escaped_query = escape_like(query);
293 let pattern = format!("%{escaped_query}%");
294 let mut params = Vec::new();
295 let project_placeholder = push_param(&mut params, project_id.to_string());
296 let name_placeholder = push_param(&mut params, pattern.clone());
297 let qualified_placeholder = push_param(&mut params, pattern);
298 let conditions = vec![
299 format!("cs.project_id = {project_placeholder}"),
300 format!(
301 "(cs.name LIKE {name_placeholder} ESCAPE '\\' OR cs.qualified_name LIKE {qualified_placeholder} ESCAPE '\\')"
302 ),
303 ];
304 query_symbols_by_conditions(
305 conn,
306 conditions,
307 params,
308 SymbolFilters {
309 kind,
310 language,
311 paths,
312 },
313 limit,
314 "cs.name ASC, cs.file_path ASC, cs.line_start ASC",
315 )
316}
317
318pub fn search_symbols_exact_first(
319 conn: &mut Client,
320 query: &str,
321 project_id: &str,
322 kind: Option<&str>,
323 language: Option<&str>,
324 paths: &[String],
325 limit: usize,
326) -> Vec<Symbol> {
327 if query.trim().is_empty() || limit == 0 {
328 return Vec::new();
329 }
330
331 let mut results = Vec::new();
332 let mut seen = HashSet::new();
333 let filters = SymbolFilters {
334 kind,
335 language,
336 paths,
337 };
338
339 let mut params = Vec::new();
340 let project = push_param(&mut params, project_id.to_string());
341 let name = push_param(&mut params, query.to_string());
342 let qualified = push_param(&mut params, query.to_string());
343 let exact = query_symbols_by_conditions(
344 conn,
345 vec![
346 format!("cs.project_id = {project}"),
347 format!("(cs.name = {name} OR cs.qualified_name = {qualified})"),
348 ],
349 params,
350 filters,
351 limit,
352 "cs.file_path ASC, cs.line_start ASC",
353 );
354 append_unique_symbols(&mut results, &mut seen, exact, limit);
355 if results.len() >= limit {
356 return results;
357 }
358
359 let mut params = Vec::new();
360 let project = push_param(&mut params, project_id.to_string());
361 let name = push_param(&mut params, query.to_string());
362 let qualified = push_param(&mut params, query.to_string());
363 let ci_exact = query_symbols_by_conditions(
364 conn,
365 vec![
366 format!("cs.project_id = {project}"),
367 format!(
368 "(lower(cs.name) = lower({name}) OR lower(cs.qualified_name) = lower({qualified}))"
369 ),
370 ],
371 params,
372 filters,
373 limit,
374 "cs.file_path ASC, cs.line_start ASC",
375 );
376 append_unique_symbols(&mut results, &mut seen, ci_exact, limit);
377 if results.len() >= limit {
378 return results;
379 }
380
381 let prefix_pattern = format!("{}%", escape_like(query));
382 let mut params = Vec::new();
383 let project = push_param(&mut params, project_id.to_string());
384 let name = push_param(&mut params, prefix_pattern.clone());
385 let qualified = push_param(&mut params, prefix_pattern);
386 let prefix_matches = query_symbols_by_conditions(
387 conn,
388 vec![
389 format!("cs.project_id = {project}"),
390 format!(
391 "(cs.name LIKE {name} ESCAPE '\\' OR cs.qualified_name LIKE {qualified} ESCAPE '\\')"
392 ),
393 ],
394 params,
395 filters,
396 limit,
397 "cs.name ASC, cs.file_path ASC, cs.line_start ASC",
398 );
399 append_unique_symbols(&mut results, &mut seen, prefix_matches, limit);
400 if results.len() >= limit {
401 return results;
402 }
403
404 let contains = search_symbols_by_name(conn, query, project_id, kind, language, paths, limit);
405 append_unique_symbols(&mut results, &mut seen, contains, limit);
406 if results.len() >= limit {
407 return results;
408 }
409
410 let fts = search_symbols_fts(conn, query, project_id, kind, language, paths, limit);
411 append_unique_symbols(&mut results, &mut seen, fts, limit);
412
413 results
414}
415
416fn exact_symbol_matches(
417 conn: &mut Client,
418 project_id: &str,
419 column: &str,
420 input: &str,
421 limit: usize,
422) -> Vec<Symbol> {
423 if !matches!(column, "id" | "qualified_name" | "name") {
424 return Vec::new();
425 }
426 let columns = db::symbol_select_columns("");
427 let sql = format!(
428 "SELECT {columns}
429 FROM code_symbols
430 WHERE project_id = $1 AND {column} = $2
431 ORDER BY file_path ASC, line_start ASC
432 LIMIT $3"
433 );
434 conn.query(&sql, &[&project_id, &input, &(limit as i64)])
435 .ok()
436 .map(|rows| {
437 rows.iter()
438 .filter_map(|row| Symbol::from_row(row).ok())
439 .collect()
440 })
441 .unwrap_or_default()
442}
443
444fn suggestion_label(symbol: &Symbol) -> String {
445 format!(
446 "{} ({}:{})",
447 symbol.qualified_name, symbol.file_path, symbol.line_start
448 )
449}
450
451fn resolved_symbol(symbol: &Symbol) -> ResolvedGraphSymbol {
452 ResolvedGraphSymbol {
453 id: symbol.id.clone(),
454 display_name: symbol.name.clone(),
455 }
456}
457
458fn resolve_from_candidates(candidates: Vec<Symbol>) -> (Option<ResolvedGraphSymbol>, Vec<String>) {
459 match candidates.len() {
460 0 => (None, vec![]),
461 1 => (Some(resolved_symbol(&candidates[0])), vec![]),
462 _ => {
463 let mut suggestions = Vec::new();
464 let mut seen = HashSet::new();
465 for symbol in &candidates {
466 let label = suggestion_label(symbol);
467 if seen.insert(label.clone()) {
468 suggestions.push(label);
469 }
470 }
471 (None, suggestions)
472 }
473 }
474}
475
476pub fn resolve_graph_symbol(
480 conn: &mut Client,
481 input: &str,
482 project_id: &str,
483) -> (Option<ResolvedGraphSymbol>, Vec<String>) {
484 let ids = exact_symbol_matches(conn, project_id, "id", input, 2);
485 let (resolved, suggestions) = resolve_from_candidates(ids);
486 if resolved.is_some() || !suggestions.is_empty() {
487 return (resolved, suggestions);
488 }
489
490 let qualified = exact_symbol_matches(conn, project_id, "qualified_name", input, 6);
491 let (resolved, suggestions) = resolve_from_candidates(qualified);
492 if resolved.is_some() || !suggestions.is_empty() {
493 return (resolved, suggestions);
494 }
495
496 let exact = exact_symbol_matches(conn, project_id, "name", input, 6);
497 let (resolved, suggestions) = resolve_from_candidates(exact);
498 if resolved.is_some() || !suggestions.is_empty() {
499 return (resolved, suggestions);
500 }
501
502 let like_matches = search_symbols_by_name(conn, input, project_id, None, None, &[], 6);
503 let (resolved, suggestions) = resolve_from_candidates(like_matches);
504 if resolved.is_some() || !suggestions.is_empty() {
505 return (resolved, suggestions);
506 }
507
508 let fts_results = search_symbols_fts(conn, input, project_id, None, None, &[], 6);
509 resolve_from_candidates(fts_results)
510}
511
512pub fn count_text(
514 conn: &mut Client,
515 query: &str,
516 project_id: &str,
517 language: Option<&str>,
518 paths: &[String],
519) -> usize {
520 if query.trim().is_empty() {
521 return 0;
522 }
523
524 let bm25_query = sanitize_pg_search_query(query);
525 if bm25_query.is_empty() {
528 return count_symbols_by_name_like(conn, query, project_id, language, paths);
529 }
530
531 let mut params = Vec::new();
532 let query_placeholder = push_param(&mut params, bm25_query);
533 let project_placeholder = push_param(&mut params, project_id.to_string());
534 let mut conditions = vec![
535 format!(
536 "(cs.name @@@ {q} OR cs.qualified_name @@@ {q} OR cs.signature @@@ {q} OR cs.docstring @@@ {q} OR cs.summary @@@ {q})",
537 q = query_placeholder
538 ),
539 format!("cs.project_id = {project_placeholder}"),
540 ];
541 push_symbol_filters(
542 &mut conditions,
543 &mut params,
544 "cs",
545 SymbolFilters {
546 kind: None,
547 language,
548 paths,
549 },
550 );
551 let refs = param_refs(¶ms);
552 let sql = format!(
553 "SELECT COUNT(*)::BIGINT AS count
554 FROM code_symbols cs
555 JOIN code_indexed_files cf
556 ON cf.project_id = cs.project_id AND cf.file_path = cs.file_path
557 WHERE {}",
558 conditions.join(" AND ")
559 );
560 let count = conn
561 .query_one(&sql, &refs)
562 .ok()
563 .and_then(|row| row.try_get::<_, i64>("count").ok())
564 .unwrap_or(0);
565 if count > 0 {
566 return count as usize;
567 }
568
569 count_symbols_by_name_like(conn, query, project_id, language, paths)
570}
571
572fn count_symbols_by_name_like(
573 conn: &mut Client,
574 query: &str,
575 project_id: &str,
576 language: Option<&str>,
577 paths: &[String],
578) -> usize {
579 let escaped_query = escape_like(query);
580 let pattern = format!("%{escaped_query}%");
581 let mut params = Vec::new();
582 let project_placeholder = push_param(&mut params, project_id.to_string());
583 let name_placeholder = push_param(&mut params, pattern.clone());
584 let qualified_placeholder = push_param(&mut params, pattern);
585 let mut conditions = vec![
586 format!("cs.project_id = {project_placeholder}"),
587 format!(
588 "(cs.name LIKE {name_placeholder} ESCAPE '\\' OR cs.qualified_name LIKE {qualified_placeholder} ESCAPE '\\')"
589 ),
590 ];
591 push_symbol_filters(
592 &mut conditions,
593 &mut params,
594 "cs",
595 SymbolFilters {
596 kind: None,
597 language,
598 paths,
599 },
600 );
601 let refs = param_refs(¶ms);
602 let sql = format!(
603 "SELECT COUNT(*)::BIGINT AS count
604 FROM code_symbols cs
605 JOIN code_indexed_files cf
606 ON cf.project_id = cs.project_id AND cf.file_path = cs.file_path
607 WHERE {}",
608 conditions.join(" AND ")
609 );
610 conn.query_one(&sql, &refs)
611 .ok()
612 .and_then(|row| row.try_get::<_, i64>("count").ok())
613 .unwrap_or(0) as usize
614}
615
616pub fn count_content(
618 conn: &mut Client,
619 query: &str,
620 project_id: &str,
621 language: Option<&str>,
622 paths: &[String],
623) -> usize {
624 if query.trim().is_empty() {
625 return 0;
626 }
627
628 let bm25_query = sanitize_pg_search_query(query);
629 if bm25_query.is_empty() {
630 return count_content_like(conn, query, project_id, language, paths);
631 }
632 let mut params = Vec::new();
633 let query_placeholder = push_param(&mut params, bm25_query);
634 let project_placeholder = push_param(&mut params, project_id.to_string());
635 let mut conditions = vec![
636 format!("c.content @@@ {query_placeholder}"),
637 format!("c.project_id = {project_placeholder}"),
638 ];
639 if let Some(lang) = language {
640 let placeholder = push_param(&mut params, lang.to_string());
641 conditions.push(format!("c.language = {placeholder}"));
642 }
643 push_path_filter(&mut conditions, &mut params, "c", paths);
644 let refs = param_refs(¶ms);
645 let sql = format!(
646 "SELECT COUNT(*)::BIGINT AS count
647 FROM code_content_chunks c
648 JOIN code_indexed_files cf
649 ON cf.project_id = c.project_id AND cf.file_path = c.file_path
650 WHERE {}",
651 conditions.join(" AND ")
652 );
653 let count = conn
654 .query_one(&sql, &refs)
655 .ok()
656 .and_then(|row| row.try_get::<_, i64>("count").ok())
657 .unwrap_or(0);
658 if count > 0 {
659 return count as usize;
660 }
661
662 count_content_like(conn, query, project_id, language, paths)
663}
664
665fn count_content_like(
666 conn: &mut Client,
667 query: &str,
668 project_id: &str,
669 language: Option<&str>,
670 paths: &[String],
671) -> usize {
672 let escaped_query = escape_like(query);
673 let like_query = format!("%{escaped_query}%");
674 let mut params = Vec::new();
675 let project_placeholder = push_param(&mut params, project_id.to_string());
676 let like_placeholder = push_param(&mut params, like_query);
677 let mut conditions = vec![
678 format!("c.project_id = {project_placeholder}"),
679 format!("c.content LIKE {like_placeholder} ESCAPE '\\'"),
680 ];
681 if let Some(lang) = language {
682 let placeholder = push_param(&mut params, lang.to_string());
683 conditions.push(format!("c.language = {placeholder}"));
684 }
685 push_path_filter(&mut conditions, &mut params, "c", paths);
686 let refs = param_refs(¶ms);
687 let sql = format!(
688 "SELECT COUNT(*)::BIGINT AS count
689 FROM code_content_chunks c
690 JOIN code_indexed_files cf
691 ON cf.project_id = c.project_id AND cf.file_path = c.file_path
692 WHERE {}",
693 conditions.join(" AND ")
694 );
695 conn.query_one(&sql, &refs)
696 .ok()
697 .and_then(|row| row.try_get::<_, i64>("count").ok())
698 .unwrap_or(0) as usize
699}
700
701pub fn search_text(
703 conn: &mut Client,
704 query: &str,
705 project_id: &str,
706 language: Option<&str>,
707 paths: &[String],
708 limit: usize,
709) -> Vec<SearchResult> {
710 let mut results = search_symbols_fts(conn, query, project_id, None, language, paths, limit);
711 if results.is_empty() {
712 results = search_symbols_by_name(conn, query, project_id, None, language, paths, limit);
713 }
714 results.into_iter().map(|s| s.to_brief()).collect()
715}
716
717pub fn search_content(
719 conn: &mut Client,
720 query: &str,
721 project_id: &str,
722 language: Option<&str>,
723 paths: &[String],
724 limit: usize,
725) -> Vec<ContentSearchHit> {
726 if query.trim().is_empty() || limit == 0 {
727 return Vec::new();
728 }
729
730 let bm25_query = sanitize_pg_search_query(query);
731 if bm25_query.is_empty() {
732 return search_content_like(conn, query, project_id, language, paths, limit);
733 }
734 let mut params = Vec::new();
735 let query_placeholder = push_param(&mut params, bm25_query);
736 let project_placeholder = push_param(&mut params, project_id.to_string());
737 let mut conditions = vec![
738 format!("c.content @@@ {query_placeholder}"),
739 format!("c.project_id = {project_placeholder}"),
740 ];
741 if let Some(lang) = language {
742 let placeholder = push_param(&mut params, lang.to_string());
743 conditions.push(format!("c.language = {placeholder}"));
744 }
745 push_path_filter(&mut conditions, &mut params, "c", paths);
746 let limit_placeholder = push_param(&mut params, limit as i64);
747 let refs = param_refs(¶ms);
748 let sql = format!(
749 "SELECT c.file_path,
750 c.line_start::BIGINT AS line_start,
751 c.line_end::BIGINT AS line_end,
752 c.language,
753 c.content
754 FROM code_content_chunks c
755 JOIN code_indexed_files cf
756 ON cf.project_id = c.project_id AND cf.file_path = c.file_path
757 WHERE {}
758 ORDER BY pdb.score(c.id) DESC, c.id ASC
759 LIMIT {limit_placeholder}",
760 conditions.join(" AND ")
761 );
762
763 let hits: Vec<ContentSearchHit> = conn
764 .query(&sql, &refs)
765 .ok()
766 .map(|rows| {
767 rows.iter()
768 .filter_map(|row| {
769 let content: String = row.try_get("content").ok()?;
770 Some(ContentSearchHit {
771 file_path: row.try_get("file_path").ok()?,
772 line_start: row.try_get::<_, i64>("line_start").ok()? as usize,
773 line_end: row.try_get::<_, i64>("line_end").ok()? as usize,
774 snippet: make_snippet(&content, query),
775 language: row.try_get("language").ok()?,
776 })
777 })
778 .collect()
779 })
780 .unwrap_or_default();
781
782 if !hits.is_empty() {
783 return hits;
784 }
785
786 search_content_like(conn, query, project_id, language, paths, limit)
787}
788
789fn search_content_like(
790 conn: &mut Client,
791 query: &str,
792 project_id: &str,
793 language: Option<&str>,
794 paths: &[String],
795 limit: usize,
796) -> Vec<ContentSearchHit> {
797 let escaped_query = escape_like(query);
798 let like_query = format!("%{escaped_query}%");
799 let mut params = Vec::new();
800 let project_placeholder = push_param(&mut params, project_id.to_string());
801 let like_placeholder = push_param(&mut params, like_query);
802 let mut conditions = vec![
803 format!("c.project_id = {project_placeholder}"),
804 format!("c.content LIKE {like_placeholder} ESCAPE '\\'"),
805 ];
806 if let Some(lang) = language {
807 let placeholder = push_param(&mut params, lang.to_string());
808 conditions.push(format!("c.language = {placeholder}"));
809 }
810 push_path_filter(&mut conditions, &mut params, "c", paths);
811 let limit_placeholder = push_param(&mut params, limit as i64);
812 let refs = param_refs(¶ms);
813 let sql = format!(
814 "SELECT c.file_path,
815 c.line_start::BIGINT AS line_start,
816 c.line_end::BIGINT AS line_end,
817 c.language,
818 c.content
819 FROM code_content_chunks c
820 JOIN code_indexed_files cf
821 ON cf.project_id = c.project_id AND cf.file_path = c.file_path
822 WHERE {}
823 ORDER BY c.file_path ASC, c.line_start ASC
824 LIMIT {limit_placeholder}",
825 conditions.join(" AND ")
826 );
827
828 conn.query(&sql, &refs)
829 .ok()
830 .map(|rows| {
831 rows.iter()
832 .filter_map(|row| {
833 let content: String = row.try_get("content").ok()?;
834 Some(ContentSearchHit {
835 file_path: row.try_get("file_path").ok()?,
836 line_start: row.try_get::<_, i64>("line_start").ok()? as usize,
837 line_end: row.try_get::<_, i64>("line_end").ok()? as usize,
838 snippet: make_snippet(&content, query),
839 language: row.try_get("language").ok()?,
840 })
841 })
842 .collect()
843 })
844 .unwrap_or_default()
845}
846
847fn make_snippet(content: &str, query: &str) -> String {
848 let tokens: Vec<String> = query
849 .split_whitespace()
850 .map(str::to_lowercase)
851 .filter(|token| !token.is_empty())
852 .collect();
853 let (lower_content, lower_byte_to_original_char) = lowercase_with_original_char_map(content);
854 let mut match_at = None;
855 for token in tokens {
856 if let Some(byte_index) = lower_content.find(&token) {
857 match_at = lower_byte_to_original_char
858 .get(byte_index)
859 .copied()
860 .or(Some(0));
861 break;
862 }
863 }
864 let match_at = match_at.unwrap_or(0);
865 let start = match_at.saturating_sub(60);
866 let end = (match_at + 120).min(content.chars().count());
867 content.chars().skip(start).take(end - start).collect()
868}
869
870fn lowercase_with_original_char_map(content: &str) -> (String, Vec<usize>) {
871 let mut lower = String::with_capacity(content.len());
872 let mut lower_byte_to_original_char = Vec::with_capacity(content.len());
873 for (original_char_index, ch) in content.chars().enumerate() {
874 for lower_ch in ch.to_lowercase() {
875 let mut buf = [0; 4];
876 let encoded = lower_ch.encode_utf8(&mut buf);
877 lower_byte_to_original_char
878 .extend(std::iter::repeat_n(original_char_index, encoded.len()));
879 lower.push(lower_ch);
880 }
881 }
882 (lower, lower_byte_to_original_char)
883}
884
885#[cfg(test)]
886mod tests {
887 use super::*;
888
889 #[test]
890 fn sanitize_pg_search_query_matches_gobby_rules() {
891 assert_eq!(
892 sanitize_pg_search_query("foo::bar baz-qux _id + \"drop\""),
893 "foo bar baz-qux _id drop"
894 );
895 }
896
897 #[test]
898 fn sanitize_pg_search_query_drops_empty_queries() {
899 assert_eq!(sanitize_pg_search_query(":: + ()"), "");
900 }
901
902 #[test]
903 fn glob_to_like_prefix_escapes_like_wildcards() {
904 assert_eq!(
905 glob_to_like_prefix("src/foo_bar/*.rs").as_deref(),
906 Some("src/foo\\_bar/%")
907 );
908 }
909
910 #[test]
911 fn expand_paths_trims_skips_empty_and_expands_bare_paths() {
912 let paths = vec![
913 " src/gobby ".to_string(),
914 "".to_string(),
915 "crates/**/*.rs".to_string(),
916 "src/gobby/".to_string(),
917 ];
918
919 assert_eq!(
920 expand_paths(&paths),
921 vec!["src/gobby", "src/gobby/**", "crates/**/*.rs"]
922 );
923 }
924
925 #[test]
926 fn compile_patterns_reports_invalid_glob() {
927 let err = compile_patterns(&["src/[".to_string()])
928 .expect_err("invalid glob should fail")
929 .to_string();
930
931 assert!(err.contains("invalid path glob `src/[`"));
932 }
933
934 #[test]
935 fn path_like_prefixes_escape_and_require_all_patterns() {
936 let paths = vec![
937 "src/foo_bar".to_string(),
938 "src/foo_bar/**".to_string(),
939 "src/100%/**".to_string(),
940 ];
941 assert_eq!(
942 path_like_prefixes(&paths).expect("prefixes"),
943 vec!["src/foo\\_bar%", "src/foo\\_bar/%", "src/100\\%/%"]
944 );
945
946 let mixed = vec!["src/**".to_string(), "*.rs".to_string()];
947 assert!(path_like_prefixes(&mixed).is_none());
948 assert!(path_filter_falls_back(&mixed));
949 assert!(!path_filter_falls_back(&paths));
950 }
951
952 #[test]
953 fn snippet_centers_first_matching_token() {
954 let content = "before ".repeat(20) + "target call here";
955 let snippet = make_snippet(&content, "target");
956
957 assert!(snippet.contains("target call here"));
958 assert!(snippet.len() <= 180);
959 }
960
961 #[test]
962 fn snippet_handles_unicode_before_match() {
963 let content = "é".repeat(80) + " target call here";
964 let snippet = make_snippet(&content, "target");
965
966 assert!(snippet.contains("target call here"));
967 assert!(snippet.chars().count() <= 180);
968
969 let content = "\u{0130}".repeat(80) + " target call here";
970 let snippet = make_snippet(&content, "target");
971
972 assert!(snippet.contains("target call here"));
973 assert!(snippet.chars().count() <= 180);
974 }
975}