1use postgres::Client;
2use postgres::Row;
3
4use crate::config::{Context, ProjectIndexScope};
5use crate::models::ContentSearchHit;
6use crate::visibility::TOMBSTONE_LANGUAGE;
7
8use super::common::{
9 PgParam, bm25_score_expr, param_refs, push_param, push_path_filter, sanitize_pg_search_query,
10 trusted_row_id,
11};
12
13fn content_bm25_order_by_sql(tiebreakers: &[&str]) -> String {
14 let row_id = trusted_row_id("c.id");
15 let mut order_by = format!("{} DESC", bm25_score_expr(&row_id));
16 for tiebreaker in tiebreakers {
17 order_by.push_str(", ");
18 order_by.push_str(tiebreaker);
19 }
20 order_by
21}
22
23pub fn search_content(
25 conn: &mut Client,
26 query: &str,
27 project_id: &str,
28 language: Option<&str>,
29 paths: &[String],
30 limit: usize,
31) -> Vec<ContentSearchHit> {
32 if query.trim().is_empty() || limit == 0 {
33 return Vec::new();
34 }
35
36 let bm25_query = sanitize_pg_search_query(query);
37 if bm25_query.is_empty() {
38 eprintln!(
39 "gcode: content BM25 search skipped because query contains no pg_search terms; use `gcode grep` for exact text"
40 );
41 return Vec::new();
42 }
43
44 let mut params = Vec::new();
45 let query_placeholder = push_param(&mut params, bm25_query);
46 let project_placeholder = push_param(&mut params, project_id.to_string());
47 let mut conditions = vec![
48 format!("c.content @@@ {query_placeholder}"),
49 format!("c.project_id = {project_placeholder}"),
50 ];
51 if let Some(lang) = language {
52 let placeholder = push_param(&mut params, lang.to_string());
53 conditions.push(format!("c.language = {placeholder}"));
54 }
55 push_path_filter(&mut conditions, &mut params, "c", paths);
56 let limit_placeholder = push_param(&mut params, limit as i64);
57 let order_by = content_bm25_order_by_sql(&["c.id ASC"]);
58 let refs = param_refs(¶ms);
59 let sql = format!(
60 "SELECT c.file_path,
61 c.line_start::BIGINT AS line_start,
62 c.line_end::BIGINT AS line_end,
63 c.language,
64 c.content
65 FROM code_content_chunks c
66 JOIN code_indexed_files cf
67 ON cf.project_id = c.project_id AND cf.file_path = c.file_path
68 WHERE {}
69 ORDER BY {order_by}
70 LIMIT {limit_placeholder}",
71 conditions.join(" AND ")
72 );
73
74 match conn.query(&sql, &refs) {
75 Ok(rows) => content_hits_from_rows(&rows, query),
76 Err(error) => {
77 eprintln!("gcode: content BM25 search failed; pg_search is required: {error}");
78 Vec::new()
79 }
80 }
81}
82
83pub fn search_content_visible(
84 conn: &mut Client,
85 query: &str,
86 ctx: &Context,
87 language: Option<&str>,
88 paths: &[String],
89 limit: usize,
90) -> Vec<ContentSearchHit> {
91 if query.trim().is_empty() || limit == 0 {
92 return Vec::new();
93 }
94
95 let bm25_query = sanitize_pg_search_query(query);
96 if bm25_query.is_empty() {
97 eprintln!(
98 "gcode: visible content BM25 search skipped because query contains no pg_search terms; use `gcode grep` for exact text"
99 );
100 return Vec::new();
101 }
102
103 let mut params = Vec::new();
104 let visible_files_sql = visible_files_sql(ctx, &mut params);
105 let query_placeholder = push_param(&mut params, bm25_query);
106 let mut conditions = vec![format!("c.content @@@ {query_placeholder}")];
107 if let Some(lang) = language {
108 let placeholder = push_param(&mut params, lang.to_string());
109 conditions.push(format!("c.language = {placeholder}"));
110 }
111 push_path_filter(&mut conditions, &mut params, "c", paths);
112 let limit_placeholder = push_param(&mut params, limit as i64);
113 let order_by = content_bm25_order_by_sql(&["c.project_id ASC", "c.id ASC"]);
114 let refs = param_refs(¶ms);
115 let sql = format!(
116 "WITH visible_files AS ({visible_files_sql})
117 SELECT c.file_path,
118 c.line_start::BIGINT AS line_start,
119 c.line_end::BIGINT AS line_end,
120 c.language,
121 c.content
122 FROM code_content_chunks c
123 JOIN visible_files vf
124 ON vf.project_id = c.project_id AND vf.file_path = c.file_path
125 WHERE {}
126 ORDER BY {order_by}
127 LIMIT {limit_placeholder}",
128 conditions.join(" AND ")
129 );
130
131 match conn.query(&sql, &refs) {
132 Ok(rows) => content_hits_from_rows(&rows, query),
133 Err(error) => {
134 eprintln!("gcode: visible content BM25 search failed; pg_search is required: {error}");
135 Vec::new()
136 }
137 }
138}
139
140fn visible_files_sql(ctx: &Context, params: &mut Vec<PgParam>) -> String {
141 match &ctx.index_scope {
142 ProjectIndexScope::Single => {
143 let project_placeholder = push_param(params, ctx.project_id.clone());
144 let tombstone_placeholder = push_param(params, TOMBSTONE_LANGUAGE.to_string());
145 format!(
146 "SELECT file_path, project_id
147 FROM code_indexed_files
148 WHERE project_id = {project_placeholder}
149 AND language != {tombstone_placeholder}"
150 )
151 }
152 ProjectIndexScope::Overlay {
153 overlay_project_id,
154 parent_project_id,
155 ..
156 } => {
157 let overlay_placeholder = push_param(params, overlay_project_id.clone());
158 let parent_placeholder = push_param(params, parent_project_id.clone());
159 let tombstone_placeholder = push_param(params, TOMBSTONE_LANGUAGE.to_string());
160 format!(
161 "SELECT file_path, project_id
162 FROM code_indexed_files
163 WHERE project_id = {overlay_placeholder}
164 AND language != {tombstone_placeholder}
165 UNION ALL
166 SELECT pf.file_path, pf.project_id
167 FROM code_indexed_files pf
168 WHERE pf.project_id = {parent_placeholder}
169 AND pf.language != {tombstone_placeholder}
170 AND NOT EXISTS (
171 SELECT 1 FROM code_indexed_files of
172 WHERE of.project_id = {overlay_placeholder}
173 AND of.file_path = pf.file_path
174 )"
175 )
176 }
177 }
178}
179
180fn content_hits_from_rows(rows: &[Row], query: &str) -> Vec<ContentSearchHit> {
181 let tokens = snippet_tokens(query);
182 rows.iter()
183 .filter_map(|row| {
184 let content: String = row.try_get("content").ok()?;
185 let line_start = usize::try_from(row.try_get::<_, i64>("line_start").ok()?).ok()?;
186 let line_end = usize::try_from(row.try_get::<_, i64>("line_end").ok()?).ok()?;
187 Some(ContentSearchHit {
188 file_path: row.try_get("file_path").ok()?,
189 line_start,
190 line_end,
191 snippet: make_snippet_with_tokens(&content, &tokens),
192 language: row.try_get("language").ok()?,
193 })
194 })
195 .collect()
196}
197
198#[cfg(test)]
199pub(super) fn make_snippet(content: &str, query: &str) -> String {
200 let tokens = snippet_tokens(query);
201 make_snippet_with_tokens(content, &tokens)
202}
203
204fn snippet_tokens(query: &str) -> Vec<String> {
205 query
206 .split_whitespace()
207 .map(str::to_lowercase)
208 .filter(|token| !token.is_empty())
209 .collect()
210}
211
212fn make_snippet_with_tokens(content: &str, tokens: &[String]) -> String {
213 let (lower_content, lower_byte_to_original_char) = lowercase_with_original_char_map(content);
214 let match_at = tokens
215 .iter()
216 .filter_map(|token| {
217 lower_content
218 .find(token)
219 .and_then(|byte_index| lower_byte_to_original_char.get(byte_index).copied())
220 })
221 .min();
222 let match_at = match_at.unwrap_or(0);
223 let start = match_at.saturating_sub(60);
224 let content_len = content.chars().count();
225 let end = match_at.saturating_add(120).min(content_len);
226 content.chars().skip(start).take(end - start).collect()
227}
228
229fn lowercase_with_original_char_map(content: &str) -> (String, Vec<usize>) {
230 let reserve = content.len().saturating_mul(2);
232 let mut lower = String::with_capacity(reserve);
233 let mut lower_byte_to_original_char = Vec::with_capacity(reserve);
234 for (original_char_index, ch) in content.chars().enumerate() {
235 for lower_ch in ch.to_lowercase() {
236 let mut buf = [0; 4];
237 let encoded = lower_ch.encode_utf8(&mut buf);
238 lower_byte_to_original_char
239 .extend(std::iter::repeat_n(original_char_index, encoded.len()));
240 lower.push(lower_ch);
241 }
242 }
243 (lower, lower_byte_to_original_char)
244}
245
246#[cfg(test)]
247mod tests {
248 use super::*;
249
250 fn assert_uses_pdb_score(sql: &str) {
251 assert!(sql.contains("pdb.score(c.id)"));
252 assert!(!sql.contains("pg_search.score"));
253 }
254
255 #[test]
256 fn content_bm25_order_by_uses_pdb_score() {
257 let sql = content_bm25_order_by_sql(&["c.id ASC"]);
258
259 assert_eq!(sql, "pdb.score(c.id) DESC, c.id ASC");
260 assert_uses_pdb_score(&sql);
261 }
262
263 #[test]
264 fn visible_content_bm25_order_by_uses_pdb_score() {
265 let sql = content_bm25_order_by_sql(&["c.project_id ASC", "c.id ASC"]);
266
267 assert_eq!(sql, "pdb.score(c.id) DESC, c.project_id ASC, c.id ASC");
268 assert_uses_pdb_score(&sql);
269 }
270}