1use postgres::GenericClient;
2use reqwest::StatusCode;
3use serde::{Deserialize, Serialize};
4use serde_json::{Map, Value, json};
5use std::fmt;
6use std::time::Duration;
7
8use crate::config::{
9 CODE_SYMBOL_COLLECTION_PREFIX, CodeVectorSettings, Context, EmbeddingConfig, QdrantConfig,
10};
11use crate::db;
12use crate::models::{ProjectionMetadata, ProjectionProvenance, Symbol};
13use gobby_core::degradation::ServiceState;
14use gobby_core::qdrant::{CollectionScope, SearchRequest, UpsertRequest};
15
16pub const VECTOR_DISTANCE_COSINE: &str = "Cosine";
18const DIMENSION_PROBE_TEXT: &str = "dimension_probe";
19const HTTP_TIMEOUT: Duration = Duration::from_secs(10);
20
21#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
22pub struct CodeSymbolVectorSearchRequest {
23 pub project_id: String,
24 pub query: String,
25 pub limit: usize,
26 pub collection_prefix: String,
27}
28
29#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
30pub struct CodeSymbolVectorSearchHit {
31 pub symbol_id: String,
32 pub score: f64,
33}
34
35#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
36pub struct CodeSymbolVectorPayload {
37 pub project_id: String,
38 pub file_path: String,
39 pub symbol_id: String,
40 pub name: String,
41 pub kind: String,
42 pub language: String,
43 pub line_start: usize,
44 pub line_end: usize,
45 pub byte_start: usize,
46 pub byte_end: usize,
47 #[serde(skip_serializing_if = "Option::is_none")]
48 pub signature: Option<String>,
49 #[serde(skip_serializing_if = "Option::is_none")]
50 pub docstring: Option<String>,
51 pub provenance: ProjectionProvenance,
52 #[serde(skip_serializing_if = "Option::is_none")]
53 pub confidence: Option<f64>,
54 pub source_system: String,
55 pub source_file_path: String,
56 pub source_line: usize,
57 pub source_line_start: usize,
58 pub source_line_end: usize,
59 pub source_byte_start: usize,
60 pub source_byte_end: usize,
61 pub source_symbol_id: String,
62 #[serde(skip_serializing_if = "Option::is_none")]
63 pub summary: Option<String>,
64}
65
66impl CodeSymbolVectorPayload {
67 pub fn from_symbol(symbol: &Symbol) -> Self {
68 let metadata = ProjectionMetadata::gcode_extracted()
69 .with_source_file_path(&symbol.file_path)
70 .with_source_line(symbol.line_start)
71 .with_source_symbol_id(&symbol.id);
72
73 Self {
74 project_id: symbol.project_id.clone(),
75 file_path: symbol.file_path.clone(),
76 symbol_id: symbol.id.clone(),
77 name: symbol.name.clone(),
78 kind: symbol.kind.clone(),
79 language: symbol.language.clone(),
80 line_start: symbol.line_start,
81 line_end: symbol.line_end,
82 byte_start: symbol.byte_start,
83 byte_end: symbol.byte_end,
84 signature: symbol.signature.clone(),
85 docstring: symbol.docstring.clone(),
86 provenance: metadata.provenance,
87 confidence: metadata.confidence,
88 source_system: metadata.source_system,
89 source_file_path: metadata
90 .source_file_path
91 .unwrap_or_else(|| symbol.file_path.clone()),
92 source_line: metadata.source_line.unwrap_or(symbol.line_start),
93 source_line_start: symbol.line_start,
94 source_line_end: symbol.line_end,
95 source_byte_start: symbol.byte_start,
96 source_byte_end: symbol.byte_end,
97 source_symbol_id: metadata
98 .source_symbol_id
99 .unwrap_or_else(|| symbol.id.clone()),
100 summary: symbol.summary.clone(),
101 }
102 }
103}
104
105#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
106#[serde(rename_all = "snake_case")]
107pub enum CodeSymbolVectorLifecycleAction {
108 Ensure,
109 SyncFile,
110 Clear,
111 Rebuild,
112}
113
114#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
115pub struct CodeSymbolVectorLifecycleStatus {
116 pub project_id: String,
117 pub collection: String,
118 pub action: CodeSymbolVectorLifecycleAction,
119}
120
121#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
122pub struct VectorCollectionSchema {
123 pub size: usize,
124 pub distance: String,
125}
126
127#[derive(Debug, Clone, PartialEq, Eq)]
128struct ExistingVectorCollectionSchema {
129 size: Option<usize>,
130 distance: Option<String>,
131}
132
133#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
134pub struct CodeSymbolVectorLifecycleOutput {
135 pub project_id: String,
136 pub collection: String,
137 pub action: CodeSymbolVectorLifecycleAction,
138 pub file_path: Option<String>,
139 pub symbols: usize,
140 pub vectors_upserted: usize,
141 pub vectors_deleted: usize,
142 pub summary: String,
143}
144
145#[derive(Debug, Clone, PartialEq, Eq)]
146pub enum VectorLifecycleError {
147 MissingQdrantConfig,
148 MissingEmbeddingConfig,
149 EmbeddingHttp {
150 status: u16,
151 body: String,
152 },
153 EmbeddingResponse(String),
154 QdrantHttp {
155 operation: &'static str,
156 status: u16,
157 body: String,
158 },
159 QdrantOperation(String),
160 DimensionMismatch {
161 collection: String,
162 expected_size: usize,
163 found_size: Option<usize>,
164 expected_distance: &'static str,
165 found_distance: Option<String>,
166 },
167}
168
169impl fmt::Display for VectorLifecycleError {
170 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
171 match self {
172 Self::MissingQdrantConfig => {
173 write!(f, "Qdrant config is required for vector lifecycle commands")
174 }
175 Self::MissingEmbeddingConfig => write!(
176 f,
177 "embedding config is required for vector lifecycle commands"
178 ),
179 Self::EmbeddingHttp { status, body } => {
180 write!(f, "embedding request failed: HTTP {status}: {body}")
181 }
182 Self::EmbeddingResponse(reason) => {
183 write!(f, "embedding response was invalid: {reason}")
184 }
185 Self::QdrantHttp {
186 operation,
187 status,
188 body,
189 } => write!(f, "Qdrant {operation} failed: HTTP {status}: {body}"),
190 Self::QdrantOperation(reason) => write!(f, "Qdrant operation failed: {reason}"),
191 Self::DimensionMismatch {
192 collection,
193 expected_size,
194 found_size,
195 expected_distance,
196 found_distance,
197 } => write!(
198 f,
199 "Qdrant collection `{collection}` has incompatible vector schema: expected size {expected_size} distance {expected_distance}, found size {} distance {}. Refusing to migrate, drop, or recreate the collection.",
200 found_size
201 .map(|value| value.to_string())
202 .unwrap_or_else(|| "unknown".to_string()),
203 found_distance.as_deref().unwrap_or("unknown")
204 ),
205 }
206 }
207}
208
209impl std::error::Error for VectorLifecycleError {}
210
211#[derive(Debug)]
212pub struct CodeSymbolVectorLifecycle {
213 project_id: String,
214 collection: String,
215 qdrant: QdrantConfig,
216 embedding: EmbeddingConfig,
217 settings: CodeVectorSettings,
218 probed_vector_size: Option<usize>,
219 client: reqwest::blocking::Client,
220}
221
222pub fn collection_name(collection_prefix: &str, project_id: &str) -> String {
223 let collection = format!("{collection_prefix}{project_id}");
224 gobby_core::qdrant::collection_name("gcode", CollectionScope::Custom(&collection))
225}
226
227pub fn delete_project_collection(
228 qdrant: &QdrantConfig,
229 project_id: &str,
230) -> Result<bool, VectorLifecycleError> {
231 let client = qdrant_http_client()?;
232 let collection = collection_name(CODE_SYMBOL_COLLECTION_PREFIX, project_id);
233 delete_qdrant_collection(&client, qdrant, &collection)
234}
235
236pub fn delete_file_vectors(
237 qdrant: &QdrantConfig,
238 project_id: &str,
239 file_path: &str,
240) -> Result<bool, VectorLifecycleError> {
241 let client = qdrant_http_client()?;
242 let collection = collection_name(CODE_SYMBOL_COLLECTION_PREFIX, project_id);
243 delete_vectors_for_filter(&client, qdrant, &collection, project_id, Some(file_path))
244}
245
246pub fn delete_code_symbol_collections_with_prefix(
247 qdrant: &QdrantConfig,
248) -> Result<Vec<String>, VectorLifecycleError> {
249 let client = qdrant_http_client()?;
250 let resp = qdrant_request_for_config(&client, qdrant, reqwest::Method::GET, "/collections")?
251 .send()
252 .map_err(|err| VectorLifecycleError::QdrantOperation(err.to_string()))?;
253 let status = resp.status();
254 if !status.is_success() {
255 return Err(qdrant_http_error("list collections", status, resp));
256 }
257
258 let data: Value = resp
259 .json()
260 .map_err(|err| VectorLifecycleError::QdrantOperation(err.to_string()))?;
261 let collections = parse_collection_names(&data)
262 .into_iter()
263 .filter(|name| name.starts_with(CODE_SYMBOL_COLLECTION_PREFIX))
264 .collect::<Vec<_>>();
265
266 let mut deleted = Vec::new();
267 for collection in collections {
268 if delete_qdrant_collection(&client, qdrant, &collection)? {
269 deleted.push(collection);
270 }
271 }
272 Ok(deleted)
273}
274
275pub fn resolve_lifecycle_qdrant_config(
276 source: &mut impl gobby_core::config::ConfigSource,
277) -> Option<QdrantConfig> {
278 gobby_core::config::resolve_qdrant_config(source)
279}
280
281pub fn lifecycle_status(
282 project_id: impl Into<String>,
283 collection_prefix: &str,
284 action: CodeSymbolVectorLifecycleAction,
285) -> CodeSymbolVectorLifecycleStatus {
286 let project_id = project_id.into();
287 CodeSymbolVectorLifecycleStatus {
288 collection: collection_name(collection_prefix, &project_id),
289 project_id,
290 action,
291 }
292}
293
294pub fn embed_text(config: &EmbeddingConfig, text: &str) -> Result<Vec<f32>, VectorLifecycleError> {
295 let client = reqwest::blocking::Client::builder()
296 .timeout(HTTP_TIMEOUT)
297 .build()
298 .map_err(|err| VectorLifecycleError::EmbeddingResponse(err.to_string()))?;
299
300 let body = json!({
301 "model": config.model,
302 "input": text,
303 });
304
305 let url = format!("{}/embeddings", config.api_base.trim_end_matches('/'));
306 let mut req = client.post(&url).json(&body);
307
308 if let Some(key) = &config.api_key {
309 req = req.header("Authorization", format!("Bearer {key}"));
310 }
311
312 let resp = req
313 .send()
314 .map_err(|err| VectorLifecycleError::EmbeddingResponse(err.to_string()))?;
315 if !resp.status().is_success() {
316 let status = resp.status().as_u16();
317 let body = resp.text().unwrap_or_default();
318 return Err(VectorLifecycleError::EmbeddingHttp { status, body });
319 }
320
321 let data: Value = resp
322 .json()
323 .map_err(|err| VectorLifecycleError::EmbeddingResponse(err.to_string()))?;
324 let embedding: Vec<f32> = data
325 .get("data")
326 .and_then(Value::as_array)
327 .and_then(|values| values.first())
328 .and_then(|value| value.get("embedding"))
329 .and_then(Value::as_array)
330 .ok_or_else(|| {
331 VectorLifecycleError::EmbeddingResponse("missing data[0].embedding array".to_string())
332 })?
333 .iter()
334 .map(|value| {
335 value.as_f64().map(|f| f as f32).ok_or_else(|| {
336 VectorLifecycleError::EmbeddingResponse(
337 "embedding array contains a non-number".to_string(),
338 )
339 })
340 })
341 .collect::<Result<Vec<_>, _>>()?;
342
343 if embedding.is_empty() {
344 Err(VectorLifecycleError::EmbeddingResponse(
345 "embedding vector was empty".to_string(),
346 ))
347 } else {
348 Ok(embedding)
349 }
350}
351
352pub fn embed_query(config: &EmbeddingConfig, text: &str) -> Option<Vec<f32>> {
353 embed_text(config, &format!("search_query: {text}")).ok()
354}
355
356pub fn vector_text_for_symbol(symbol: &Symbol) -> String {
357 let mut lines = vec![
358 format!("name: {}", symbol.name),
359 format!("qualified_name: {}", symbol.qualified_name),
360 format!("kind: {}", symbol.kind),
361 format!("language: {}", symbol.language),
362 format!("file_path: {}", symbol.file_path),
363 format!("range: {}-{}", symbol.line_start, symbol.line_end),
364 ];
365 if let Some(signature) = symbol
366 .signature
367 .as_deref()
368 .filter(|value| !value.trim().is_empty())
369 {
370 lines.push(format!("signature: {signature}"));
371 }
372 if let Some(docstring) = symbol
373 .docstring
374 .as_deref()
375 .filter(|value| !value.trim().is_empty())
376 {
377 lines.push(format!("docstring: {docstring}"));
378 }
379 if let Some(summary) = symbol
380 .summary
381 .as_deref()
382 .filter(|value| !value.trim().is_empty())
383 {
384 lines.push(format!("summary: {summary}"));
385 }
386 lines.join("\n")
387}
388
389pub fn vector_search(
390 config: &QdrantConfig,
391 collection: &str,
392 query_vector: &[f32],
393 limit: usize,
394) -> anyhow::Result<Vec<(String, f64)>> {
395 let request = SearchRequest {
396 vector: query_vector.to_vec(),
397 limit,
398 filter: None,
399 };
400 let (hits, _) = gobby_core::qdrant::with_qdrant(Some(config), Vec::new(), |config| {
401 gobby_core::qdrant::search(config, collection, request)
402 })?;
403 Ok(hits
404 .into_iter()
405 .map(|hit| (hit.id, f64::from(hit.score)))
406 .collect())
407}
408
409impl CodeSymbolVectorLifecycle {
410 pub fn new(
411 project_id: String,
412 qdrant: QdrantConfig,
413 embedding: EmbeddingConfig,
414 settings: CodeVectorSettings,
415 ) -> Result<Self, VectorLifecycleError> {
416 if qdrant
417 .url
418 .as_deref()
419 .filter(|url| !url.trim().is_empty())
420 .is_none()
421 {
422 return Err(VectorLifecycleError::MissingQdrantConfig);
423 }
424 if embedding.api_base.trim().is_empty() {
425 return Err(VectorLifecycleError::MissingEmbeddingConfig);
426 }
427
428 let collection = collection_name(CODE_SYMBOL_COLLECTION_PREFIX, &project_id);
429 let client = reqwest::blocking::Client::builder()
430 .timeout(HTTP_TIMEOUT)
431 .build()
432 .map_err(|err| VectorLifecycleError::QdrantOperation(err.to_string()))?;
433 Ok(Self {
434 project_id,
435 collection,
436 qdrant,
437 embedding,
438 settings,
439 probed_vector_size: None,
440 client,
441 })
442 }
443
444 pub fn collection(&self) -> &str {
445 &self.collection
446 }
447
448 pub fn ensure_collection(&mut self) -> Result<VectorCollectionSchema, VectorLifecycleError> {
449 let expected = self.expected_schema()?;
450 self.require_qdrant_boundary()?;
451 match self.get_collection_schema()? {
452 Some(found) => self.ensure_compatible_schema(expected, found),
453 None => {
454 self.create_collection(&expected)?;
455 Ok(expected)
456 }
457 }
458 }
459
460 pub fn sync_file_symbols(
461 &mut self,
462 file_path: &str,
463 symbols: &[Symbol],
464 ) -> Result<CodeSymbolVectorLifecycleOutput, VectorLifecycleError> {
465 self.ensure_collection()?;
466 let points = self.points_for_symbols(symbols)?;
467 let point_ids = point_ids(&points);
468 self.upsert_points(points)?;
469 self.delete_stale_vectors(Some(file_path), &point_ids)?;
470
471 Ok(self.output(
472 CodeSymbolVectorLifecycleAction::SyncFile,
473 Some(file_path.to_string()),
474 symbols.len(),
475 symbols.len(),
476 1,
477 ))
478 }
479
480 pub fn clear_project_vectors(
481 &mut self,
482 ) -> Result<CodeSymbolVectorLifecycleOutput, VectorLifecycleError> {
483 self.require_qdrant_boundary()?;
484 let deleted = match self.get_collection_schema()? {
485 Some(found) => {
486 if let Some(size) = self.settings.vector_dim {
487 self.ensure_compatible_schema(
488 VectorCollectionSchema {
489 size,
490 distance: VECTOR_DISTANCE_COSINE.to_string(),
491 },
492 found,
493 )?;
494 }
495 self.delete_vectors(None)?;
496 1
497 }
498 None => 0,
499 };
500
501 Ok(self.output(CodeSymbolVectorLifecycleAction::Clear, None, 0, 0, deleted))
502 }
503
504 pub fn rebuild_symbols(
505 &mut self,
506 symbols: &[Symbol],
507 ) -> Result<CodeSymbolVectorLifecycleOutput, VectorLifecycleError> {
508 self.ensure_collection()?;
509 let points = self.points_for_symbols(symbols)?;
510 let point_ids = point_ids(&points);
511 self.upsert_points(points)?;
512 self.delete_stale_vectors(None, &point_ids)?;
513
514 Ok(self.output(
515 CodeSymbolVectorLifecycleAction::Rebuild,
516 None,
517 symbols.len(),
518 symbols.len(),
519 1,
520 ))
521 }
522
523 fn output(
524 &self,
525 action: CodeSymbolVectorLifecycleAction,
526 file_path: Option<String>,
527 symbols: usize,
528 vectors_upserted: usize,
529 vectors_deleted: usize,
530 ) -> CodeSymbolVectorLifecycleOutput {
531 CodeSymbolVectorLifecycleOutput {
532 project_id: self.project_id.clone(),
533 collection: self.collection.clone(),
534 action,
535 file_path,
536 symbols,
537 vectors_upserted,
538 vectors_deleted,
539 summary: format!(
540 "{vectors_upserted} vector(s) upserted, {vectors_deleted} delete operation(s) issued"
541 ),
542 }
543 }
544
545 fn expected_schema(&mut self) -> Result<VectorCollectionSchema, VectorLifecycleError> {
546 let size = match self.settings.vector_dim {
547 Some(size) => size,
548 None => match self.probed_vector_size {
549 Some(size) => size,
550 None => {
551 let size = embed_text(&self.embedding, DIMENSION_PROBE_TEXT)?.len();
552 self.probed_vector_size = Some(size);
553 size
554 }
555 },
556 };
557
558 Ok(VectorCollectionSchema {
559 size,
560 distance: VECTOR_DISTANCE_COSINE.to_string(),
561 })
562 }
563
564 fn require_qdrant_boundary(&self) -> Result<(), VectorLifecycleError> {
565 let ((), state) = gobby_core::qdrant::with_qdrant(Some(&self.qdrant), (), |_| Ok(()))
566 .map_err(|err| VectorLifecycleError::QdrantOperation(err.to_string()))?;
567 match state {
568 ServiceState::Available => Ok(()),
569 ServiceState::NotConfigured => Err(VectorLifecycleError::MissingQdrantConfig),
570 other => Err(VectorLifecycleError::QdrantOperation(format!(
571 "unexpected Qdrant service state: {other:?}"
572 ))),
573 }
574 }
575
576 fn ensure_compatible_schema(
577 &self,
578 expected: VectorCollectionSchema,
579 found: ExistingVectorCollectionSchema,
580 ) -> Result<VectorCollectionSchema, VectorLifecycleError> {
581 if found.size == Some(expected.size)
582 && found.distance.as_deref() == Some(&expected.distance)
583 {
584 return Ok(VectorCollectionSchema {
585 size: expected.size,
586 distance: expected.distance,
587 });
588 }
589
590 Err(VectorLifecycleError::DimensionMismatch {
591 collection: self.collection.clone(),
592 expected_size: expected.size,
593 found_size: found.size,
594 expected_distance: VECTOR_DISTANCE_COSINE,
595 found_distance: found.distance,
596 })
597 }
598
599 fn get_collection_schema(
600 &self,
601 ) -> Result<Option<ExistingVectorCollectionSchema>, VectorLifecycleError> {
602 let resp = self
603 .qdrant_request(
604 reqwest::Method::GET,
605 &format!("/collections/{}", self.collection),
606 )?
607 .send()
608 .map_err(|err| VectorLifecycleError::QdrantOperation(err.to_string()))?;
609 let status = resp.status();
610 if status == StatusCode::NOT_FOUND {
611 return Ok(None);
612 }
613 if !status.is_success() {
614 return Err(qdrant_http_error("get collection", status, resp));
615 }
616
617 let data: Value = resp
618 .json()
619 .map_err(|err| VectorLifecycleError::QdrantOperation(err.to_string()))?;
620 Ok(parse_collection_schema(&data))
621 }
622
623 fn create_collection(
624 &self,
625 schema: &VectorCollectionSchema,
626 ) -> Result<(), VectorLifecycleError> {
627 let body = json!({
628 "vectors": {
629 "size": schema.size,
630 "distance": schema.distance,
631 },
632 });
633 let resp = self
634 .qdrant_request(
635 reqwest::Method::PUT,
636 &format!("/collections/{}", self.collection),
637 )?
638 .json(&body)
639 .send()
640 .map_err(|err| VectorLifecycleError::QdrantOperation(err.to_string()))?;
641 if !resp.status().is_success() {
642 return Err(qdrant_http_error("create collection", resp.status(), resp));
643 }
644 Ok(())
645 }
646
647 fn delete_vectors(&self, file_path: Option<&str>) -> Result<(), VectorLifecycleError> {
648 delete_vectors_for_filter(
649 &self.client,
650 &self.qdrant,
651 &self.collection,
652 &self.project_id,
653 file_path,
654 )
655 .map(|_| ())
656 }
657
658 fn delete_stale_vectors(
659 &self,
660 file_path: Option<&str>,
661 keep_point_ids: &[String],
662 ) -> Result<(), VectorLifecycleError> {
663 delete_vectors_for_filter_excluding_ids(
664 &self.client,
665 &self.qdrant,
666 &self.collection,
667 &self.project_id,
668 file_path,
669 keep_point_ids,
670 )
671 .map(|_| ())
672 }
673
674 fn upsert_points(&self, points: Vec<UpsertRequest>) -> Result<(), VectorLifecycleError> {
675 if points.is_empty() {
676 return Ok(());
677 }
678 let ((), state) = gobby_core::qdrant::with_qdrant(Some(&self.qdrant), (), |config| {
679 gobby_core::qdrant::upsert(config, &self.collection, points)
680 })
681 .map_err(|err| VectorLifecycleError::QdrantOperation(err.to_string()))?;
682 match state {
683 ServiceState::Available => Ok(()),
684 ServiceState::NotConfigured => Err(VectorLifecycleError::MissingQdrantConfig),
685 other => Err(VectorLifecycleError::QdrantOperation(format!(
686 "unexpected Qdrant service state: {other:?}"
687 ))),
688 }
689 }
690
691 fn points_for_symbols(
692 &self,
693 symbols: &[Symbol],
694 ) -> Result<Vec<UpsertRequest>, VectorLifecycleError> {
695 symbols
696 .iter()
697 .map(|symbol| {
698 let vector = embed_text(&self.embedding, &vector_text_for_symbol(symbol))?;
699 let payload = payload_map(CodeSymbolVectorPayload::from_symbol(symbol))?;
700 Ok(UpsertRequest {
701 id: symbol.id.clone(),
702 vector,
703 payload,
704 })
705 })
706 .collect()
707 }
708
709 fn qdrant_request(
710 &self,
711 method: reqwest::Method,
712 path: &str,
713 ) -> Result<reqwest::blocking::RequestBuilder, VectorLifecycleError> {
714 qdrant_request_for_config(&self.client, &self.qdrant, method, path)
715 }
716}
717
718pub fn fetch_symbols_for_file(
719 conn: &mut impl GenericClient,
720 project_id: &str,
721 file_path: &str,
722) -> anyhow::Result<Vec<Symbol>> {
723 let columns = db::symbol_select_columns("");
724 conn.query(
725 &format!(
726 "SELECT {columns} FROM code_symbols
727 WHERE project_id = $1 AND file_path = $2
728 ORDER BY file_path, byte_start, id"
729 ),
730 &[&project_id, &file_path],
731 )?
732 .into_iter()
733 .map(|row| Symbol::from_row(&row))
734 .collect()
735}
736
737pub fn fetch_symbols_for_project(
738 conn: &mut impl GenericClient,
739 project_id: &str,
740) -> anyhow::Result<Vec<Symbol>> {
741 let columns = db::symbol_select_columns("");
742 conn.query(
743 &format!(
744 "SELECT {columns} FROM code_symbols
745 WHERE project_id = $1
746 ORDER BY file_path, byte_start, id"
747 ),
748 &[&project_id],
749 )?
750 .into_iter()
751 .map(|row| Symbol::from_row(&row))
752 .collect()
753}
754
755fn payload_map(
756 payload: CodeSymbolVectorPayload,
757) -> Result<Map<String, Value>, VectorLifecycleError> {
758 match serde_json::to_value(payload)
759 .map_err(|err| VectorLifecycleError::QdrantOperation(err.to_string()))?
760 {
761 Value::Object(map) => Ok(map),
762 _ => Err(VectorLifecycleError::QdrantOperation(
763 "vector payload did not serialize to an object".to_string(),
764 )),
765 }
766}
767
768fn point_ids(points: &[UpsertRequest]) -> Vec<String> {
769 points.iter().map(|point| point.id.clone()).collect()
770}
771
772fn parse_collection_schema(data: &Value) -> Option<ExistingVectorCollectionSchema> {
773 let vectors = data.pointer("/result/config/params/vectors")?;
774 let size = vectors
775 .get("size")
776 .and_then(Value::as_u64)
777 .map(|size| size as usize);
778 let distance = vectors
779 .get("distance")
780 .and_then(Value::as_str)
781 .map(str::to_string);
782 Some(ExistingVectorCollectionSchema { size, distance })
783}
784
785fn parse_collection_names(data: &Value) -> Vec<String> {
786 data.pointer("/result/collections")
787 .and_then(Value::as_array)
788 .map(|collections| {
789 collections
790 .iter()
791 .filter_map(|collection| {
792 collection
793 .get("name")
794 .and_then(Value::as_str)
795 .map(str::to_string)
796 })
797 .collect()
798 })
799 .unwrap_or_default()
800}
801
802fn qdrant_http_client() -> Result<reqwest::blocking::Client, VectorLifecycleError> {
803 reqwest::blocking::Client::builder()
804 .timeout(HTTP_TIMEOUT)
805 .build()
806 .map_err(|err| VectorLifecycleError::QdrantOperation(err.to_string()))
807}
808
809fn qdrant_request_for_config(
810 client: &reqwest::blocking::Client,
811 qdrant: &QdrantConfig,
812 method: reqwest::Method,
813 path: &str,
814) -> Result<reqwest::blocking::RequestBuilder, VectorLifecycleError> {
815 let base = qdrant
816 .url
817 .as_deref()
818 .ok_or(VectorLifecycleError::MissingQdrantConfig)?
819 .trim_end_matches('/');
820 let url = format!("{base}{path}");
821 let mut req = client.request(method, url);
822 if let Some(key) = &qdrant.api_key {
823 req = req.header("api-key", key);
824 }
825 Ok(req)
826}
827
828fn delete_qdrant_collection(
829 client: &reqwest::blocking::Client,
830 qdrant: &QdrantConfig,
831 collection: &str,
832) -> Result<bool, VectorLifecycleError> {
833 let resp = qdrant_request_for_config(
834 client,
835 qdrant,
836 reqwest::Method::DELETE,
837 &format!("/collections/{collection}"),
838 )?
839 .send()
840 .map_err(|err| VectorLifecycleError::QdrantOperation(err.to_string()))?;
841 let status = resp.status();
842 if status == StatusCode::NOT_FOUND {
843 return Ok(false);
844 }
845 if !status.is_success() {
846 return Err(qdrant_http_error("delete collection", status, resp));
847 }
848 Ok(true)
849}
850
851fn delete_vectors_for_filter(
852 client: &reqwest::blocking::Client,
853 qdrant: &QdrantConfig,
854 collection: &str,
855 project_id: &str,
856 file_path: Option<&str>,
857) -> Result<bool, VectorLifecycleError> {
858 delete_vectors_for_filter_excluding_ids(client, qdrant, collection, project_id, file_path, &[])
859}
860
861fn delete_vectors_for_filter_excluding_ids(
862 client: &reqwest::blocking::Client,
863 qdrant: &QdrantConfig,
864 collection: &str,
865 project_id: &str,
866 file_path: Option<&str>,
867 keep_point_ids: &[String],
868) -> Result<bool, VectorLifecycleError> {
869 let mut must = vec![json!({
870 "key": "project_id",
871 "match": {"value": project_id},
872 })];
873 if let Some(file_path) = file_path {
874 must.push(json!({
875 "key": "file_path",
876 "match": {"value": file_path},
877 }));
878 }
879 let mut filter = json!({ "must": must });
880 if !keep_point_ids.is_empty()
881 && let Some(filter) = filter.as_object_mut()
882 {
883 filter.insert(
884 "must_not".to_string(),
885 json!([{ "has_id": keep_point_ids }]),
886 );
887 }
888 let body = json!({ "filter": filter });
889 let resp = qdrant_request_for_config(
890 client,
891 qdrant,
892 reqwest::Method::POST,
893 &format!("/collections/{collection}/points/delete"),
894 )?
895 .json(&body)
896 .send()
897 .map_err(|err| VectorLifecycleError::QdrantOperation(err.to_string()))?;
898 let status = resp.status();
899 if status == StatusCode::NOT_FOUND {
900 return Ok(false);
901 }
902 if !status.is_success() {
903 return Err(qdrant_http_error("delete points", status, resp));
904 }
905 Ok(true)
906}
907
908fn qdrant_http_error(
909 operation: &'static str,
910 status: StatusCode,
911 resp: reqwest::blocking::Response,
912) -> VectorLifecycleError {
913 VectorLifecycleError::QdrantHttp {
914 operation,
915 status: status.as_u16(),
916 body: resp.text().unwrap_or_default(),
917 }
918}
919
920pub fn search_code_symbols(
921 ctx: &Context,
922 request: &CodeSymbolVectorSearchRequest,
923) -> Vec<CodeSymbolVectorSearchHit> {
924 let qdrant_config = match &ctx.qdrant {
925 Some(c) => c,
926 None => return vec![],
927 };
928
929 let embedding_config = match &ctx.embedding {
930 Some(c) => c,
931 None => return vec![],
932 };
933
934 let embedding = match embed_query(embedding_config, &request.query) {
935 Some(e) => e,
936 None => return vec![],
937 };
938
939 let collection = collection_name(&request.collection_prefix, &request.project_id);
940 vector_search(qdrant_config, &collection, &embedding, request.limit)
941 .unwrap_or_default()
942 .into_iter()
943 .map(|(symbol_id, score)| CodeSymbolVectorSearchHit { symbol_id, score })
944 .collect()
945}
946
947pub fn semantic_search(ctx: &Context, query: &str, limit: usize) -> Vec<(String, f64)> {
948 if ctx.qdrant.is_none() {
949 return vec![];
950 }
951
952 let request = CodeSymbolVectorSearchRequest {
953 project_id: ctx.project_id.clone(),
954 query: query.to_string(),
955 limit,
956 collection_prefix: CODE_SYMBOL_COLLECTION_PREFIX.to_string(),
957 };
958
959 search_code_symbols(ctx, &request)
960 .into_iter()
961 .map(|hit| (hit.symbol_id, hit.score))
962 .collect()
963}
964
965#[cfg(test)]
966mod tests {
967 use super::*;
968 use crate::config::{CodeVectorSettings, QdrantConfig};
969 use crate::models::{SOURCE_SYSTEM_GCODE, Symbol};
970 use serde_json::{Value, json};
971 use std::io::{Read, Write};
972 use std::net::TcpListener;
973 use std::thread;
974
975 fn test_symbol(summary: Option<String>) -> Symbol {
976 Symbol {
977 id: "symbol-1".to_string(),
978 project_id: "project-1".to_string(),
979 file_path: "src/lib.rs".to_string(),
980 name: "run".to_string(),
981 qualified_name: "crate::run".to_string(),
982 kind: "function".to_string(),
983 language: "rust".to_string(),
984 byte_start: 10,
985 byte_end: 40,
986 line_start: 3,
987 line_end: 5,
988 signature: None,
989 docstring: None,
990 parent_symbol_id: None,
991 content_hash: "hash".to_string(),
992 summary,
993 created_at: String::new(),
994 updated_at: String::new(),
995 }
996 }
997
998 #[test]
999 fn payloads_carry_provenance_metadata() {
1000 let payload = CodeSymbolVectorPayload::from_symbol(&test_symbol(Some("does work".into())));
1001
1002 assert_eq!(payload.provenance, ProjectionProvenance::Extracted);
1003 assert_eq!(payload.confidence, Some(1.0));
1004 assert_eq!(payload.source_system, SOURCE_SYSTEM_GCODE);
1005 assert_eq!(payload.source_file_path, "src/lib.rs");
1006 assert_eq!(payload.source_line_start, 3);
1007 assert_eq!(payload.source_line_end, 5);
1008 assert_eq!(payload.source_byte_start, 10);
1009 assert_eq!(payload.source_byte_end, 40);
1010 assert_eq!(payload.source_line, 3);
1011 assert_eq!(payload.source_symbol_id, "symbol-1");
1012 assert_eq!(payload.summary.as_deref(), Some("does work"));
1013 assert_eq!(payload.signature, None);
1014 assert_eq!(payload.docstring, None);
1015
1016 let value = serde_json::to_value(payload).expect("payload serializes");
1017 assert_eq!(value["provenance"], "EXTRACTED");
1018 assert_eq!(value["confidence"], 1.0);
1019 assert_eq!(value["source_system"], SOURCE_SYSTEM_GCODE);
1020 assert_eq!(value["source_file_path"], "src/lib.rs");
1021 assert_eq!(value["source_line_start"], 3);
1022 assert_eq!(value["source_line_end"], 5);
1023 assert_eq!(value["source_byte_start"], 10);
1024 assert_eq!(value["source_byte_end"], 40);
1025 assert_eq!(value["source_symbol_id"], "symbol-1");
1026 }
1027
1028 #[test]
1029 fn summaries_are_optional_enrichment() {
1030 let symbol = test_symbol(None);
1031 let payload = CodeSymbolVectorPayload::from_symbol(&symbol);
1032 let vector_text = vector_text_for_symbol(&symbol);
1033 let value = serde_json::to_value(payload).expect("payload serializes");
1034
1035 assert!(value.get("summary").is_none());
1036 assert!(vector_text.contains("name: run"));
1037 assert!(!vector_text.contains("summary:"));
1038 }
1039
1040 #[test]
1041 fn collection_name_compatibility() {
1042 assert_eq!(
1043 collection_name(CODE_SYMBOL_COLLECTION_PREFIX, "project-1"),
1044 "code_symbols_project-1"
1045 );
1046 }
1047
1048 #[test]
1049 fn delete_project_collection_targets_only_project_collection() {
1050 let (qdrant_url, handle) = spawn_http_responses(vec![(200, json!({"result": true}))]);
1051 let deleted = delete_project_collection(
1052 &QdrantConfig {
1053 url: Some(qdrant_url),
1054 api_key: Some("qdrant-key".to_string()),
1055 },
1056 "project-1",
1057 )
1058 .expect("delete collection");
1059 let requests = handle.join().expect("qdrant requests");
1060
1061 assert!(deleted);
1062 assert_eq!(requests.len(), 1);
1063 assert!(requests[0].contains("DELETE /collections/code_symbols_project-1 HTTP/1.1"));
1064 assert!(requests[0].contains("api-key: qdrant-key"));
1065 assert!(!requests[0].contains("project-2"));
1066 }
1067
1068 #[test]
1069 fn delete_file_vectors_filters_by_project_and_file_without_embedding() {
1070 let (qdrant_url, handle) =
1071 spawn_http_responses(vec![(200, json!({"result": {"operation_id": 1}}))]);
1072 let deleted = delete_file_vectors(
1073 &QdrantConfig {
1074 url: Some(qdrant_url),
1075 api_key: Some("qdrant-key".to_string()),
1076 },
1077 "project-1",
1078 "src/lib.rs",
1079 )
1080 .expect("delete vectors");
1081 let requests = handle.join().expect("qdrant requests");
1082
1083 assert!(deleted);
1084 assert_eq!(requests.len(), 1);
1085 assert!(
1086 requests[0].contains("POST /collections/code_symbols_project-1/points/delete HTTP/1.1")
1087 );
1088 assert!(requests[0].contains("api-key: qdrant-key"));
1089 assert!(requests[0].contains(r#""key":"project_id""#));
1090 assert!(requests[0].contains(r#""value":"project-1""#));
1091 assert!(requests[0].contains(r#""key":"file_path""#));
1092 assert!(requests[0].contains(r#""value":"src/lib.rs""#));
1093 }
1094
1095 #[test]
1096 fn clear_project_vectors_does_not_touch_memory_vector_collections() {
1097 let (qdrant_url, handle) = spawn_http_responses(vec![
1098 (
1099 200,
1100 json!({"result": {"config": {"params": {"vectors": {"size": 3, "distance": "Cosine"}}}}}),
1101 ),
1102 (200, json!({"result": {"operation_id": 1}})),
1103 ]);
1104 let mut lifecycle = CodeSymbolVectorLifecycle::new(
1105 "project-1".to_string(),
1106 QdrantConfig {
1107 url: Some(qdrant_url),
1108 api_key: None,
1109 },
1110 EmbeddingConfig {
1111 api_base: "http://127.0.0.1:9/v1".to_string(),
1112 model: "unused".to_string(),
1113 api_key: None,
1114 },
1115 CodeVectorSettings {
1116 vector_dim: Some(3),
1117 },
1118 )
1119 .expect("lifecycle");
1120
1121 let cleared = lifecycle.clear_project_vectors().expect("clear vectors");
1122 let requests = handle.join().expect("qdrant requests");
1123
1124 assert_eq!(cleared.vectors_deleted, 1);
1125 assert_eq!(requests.len(), 2);
1126 assert!(requests[0].contains("GET /collections/code_symbols_project-1 HTTP/1.1"));
1127 assert!(
1128 requests[1].contains("POST /collections/code_symbols_project-1/points/delete HTTP/1.1")
1129 );
1130 assert!(requests[1].contains(r#""key":"project_id""#));
1131 assert!(requests[1].contains(r#""value":"project-1""#));
1132 assert!(!requests[1].contains(r#""key":"file_path""#));
1133 assert!(requests.iter().all(|request| !request.contains("memory")));
1134 assert!(
1135 requests
1136 .iter()
1137 .all(|request| !request.contains("GET /collections HTTP/1.1"))
1138 );
1139 assert!(
1140 requests
1141 .iter()
1142 .all(|request| !request.contains("DELETE /collections/"))
1143 );
1144 }
1145
1146 #[test]
1147 fn delete_prefixed_collections_deletes_only_code_symbol_collections() {
1148 let (qdrant_url, handle) = spawn_http_responses(vec![
1149 (
1150 200,
1151 json!({
1152 "result": {
1153 "collections": [
1154 {"name": "code_symbols_project-1"},
1155 {"name": "memory_vectors"},
1156 {"name": "code_symbols_project-2"}
1157 ]
1158 }
1159 }),
1160 ),
1161 (200, json!({"result": true})),
1162 (200, json!({"result": true})),
1163 ]);
1164 let deleted = delete_code_symbol_collections_with_prefix(&QdrantConfig {
1165 url: Some(qdrant_url),
1166 api_key: None,
1167 })
1168 .expect("delete prefixed collections");
1169 let requests = handle.join().expect("qdrant requests");
1170
1171 assert_eq!(
1172 deleted,
1173 vec![
1174 "code_symbols_project-1".to_string(),
1175 "code_symbols_project-2".to_string()
1176 ]
1177 );
1178 assert_eq!(requests.len(), 3);
1179 assert!(requests[0].contains("GET /collections HTTP/1.1"));
1180 assert!(requests[1].contains("DELETE /collections/code_symbols_project-1 HTTP/1.1"));
1181 assert!(requests[2].contains("DELETE /collections/code_symbols_project-2 HTTP/1.1"));
1182 assert!(
1183 requests
1184 .iter()
1185 .all(|request| !request.contains("DELETE /collections/memory_vectors"))
1186 );
1187 }
1188
1189 #[test]
1190 fn embedding_request_response() {
1191 let (base_url, handle) = spawn_http_responses(vec![(
1192 200,
1193 json!({"data": [{"embedding": [0.25, 0.5, 0.75]}]}),
1194 )]);
1195 let config = EmbeddingConfig {
1196 api_base: format!("{base_url}/v1"),
1197 model: "embed-small".to_string(),
1198 api_key: Some("embedding-key".to_string()),
1199 };
1200
1201 let embedding = embed_text(&config, "dimension_probe").expect("embedding response");
1202 let requests = handle.join().expect("server thread");
1203
1204 assert_eq!(embedding, vec![0.25, 0.5, 0.75]);
1205 assert_eq!(requests.len(), 1);
1206 assert!(requests[0].contains("POST /v1/embeddings HTTP/1.1"));
1207 assert!(requests[0].contains("authorization: Bearer embedding-key"));
1208 assert!(requests[0].contains(r#""model":"embed-small""#));
1209 assert!(requests[0].contains(r#""input":"dimension_probe""#));
1210 }
1211
1212 #[test]
1213 fn ensure_collection_resolves_vector_size_and_distance() {
1214 let (embedding_url, embedding_handle) = spawn_http_responses(vec![(
1215 200,
1216 json!({"data": [{"embedding": [0.1, 0.2, 0.3]}]}),
1217 )]);
1218 let (qdrant_url, qdrant_handle) = spawn_http_responses(vec![
1219 (404, json!({"status": "not found"})),
1220 (200, json!({"result": true})),
1221 (
1222 200,
1223 json!({"result": {"config": {"params": {"vectors": {"size": 3, "distance": "Cosine"}}}}}),
1224 ),
1225 ]);
1226 let mut lifecycle = CodeSymbolVectorLifecycle::new(
1227 "project-1".to_string(),
1228 QdrantConfig {
1229 url: Some(qdrant_url),
1230 api_key: None,
1231 },
1232 EmbeddingConfig {
1233 api_base: format!("{embedding_url}/v1"),
1234 model: "embed-small".to_string(),
1235 api_key: None,
1236 },
1237 CodeVectorSettings { vector_dim: None },
1238 )
1239 .expect("lifecycle");
1240
1241 let created = lifecycle.ensure_collection().expect("create collection");
1242 let reused = lifecycle.ensure_collection().expect("reuse collection");
1243 let embedding_requests = embedding_handle.join().expect("embedding requests");
1244 let qdrant_requests = qdrant_handle.join().expect("qdrant requests");
1245
1246 assert_eq!(created.size, 3);
1247 assert_eq!(created.distance, VECTOR_DISTANCE_COSINE);
1248 assert_eq!(reused.size, 3);
1249 assert_eq!(embedding_requests.len(), 1, "dimension probe is cached");
1250 assert!(qdrant_requests[1].contains("PUT /collections/code_symbols_project-1 HTTP/1.1"));
1251 assert!(qdrant_requests[1].contains(r#""size":3"#));
1252 assert!(qdrant_requests[1].contains(r#""distance":"Cosine""#));
1253
1254 let (explicit_qdrant_url, explicit_handle) = spawn_http_responses(vec![
1255 (404, json!({"status": "not found"})),
1256 (200, json!({"result": true})),
1257 ]);
1258 let mut explicit = CodeSymbolVectorLifecycle::new(
1259 "project-1".to_string(),
1260 QdrantConfig {
1261 url: Some(explicit_qdrant_url),
1262 api_key: None,
1263 },
1264 EmbeddingConfig {
1265 api_base: "http://127.0.0.1:9/v1".to_string(),
1266 model: "unused".to_string(),
1267 api_key: None,
1268 },
1269 CodeVectorSettings {
1270 vector_dim: Some(1536),
1271 },
1272 )
1273 .expect("lifecycle with explicit size");
1274
1275 let schema = explicit.ensure_collection().expect("explicit size create");
1276 let explicit_requests = explicit_handle.join().expect("explicit qdrant requests");
1277 assert_eq!(schema.size, 1536);
1278 assert!(explicit_requests[1].contains(r#""size":1536"#));
1279 }
1280
1281 #[test]
1282 fn lifecycle_http_scoped_to_module() {
1283 let manifest_dir = std::path::Path::new(env!("CARGO_MANIFEST_DIR"));
1284 let src_dir = manifest_dir.join("src");
1285 let mut offenders = Vec::new();
1286
1287 fn visit(path: &std::path::Path, offenders: &mut Vec<std::path::PathBuf>) {
1288 for entry in std::fs::read_dir(path).expect("read source directory") {
1289 let entry = entry.expect("source entry");
1290 let path = entry.path();
1291 if path.is_dir() {
1292 visit(&path, offenders);
1293 continue;
1294 }
1295 if path.extension().and_then(|ext| ext.to_str()) != Some("rs") {
1296 continue;
1297 }
1298 let source = std::fs::read_to_string(&path).expect("read source file");
1299 let lifecycle_rest = [
1300 "/points/delete",
1301 "points/delete",
1302 "collections/{collection}",
1303 "/collections/{collection}",
1304 ];
1305 if lifecycle_rest.iter().any(|needle| source.contains(needle))
1306 && !path.ends_with("vector/code_symbols.rs")
1307 {
1308 offenders.push(path);
1309 }
1310 }
1311 }
1312
1313 visit(&src_dir, &mut offenders);
1314 assert!(
1315 offenders.is_empty(),
1316 "Qdrant lifecycle REST must stay scoped to vector/code_symbols.rs: {offenders:?}"
1317 );
1318 }
1319
1320 #[test]
1321 fn routes_through_gobby_core_qdrant() {
1322 let source = include_str!("code_symbols.rs");
1323 assert!(source.contains("gobby_core::config::resolve_qdrant_config"));
1324 assert!(source.contains("gobby_core::qdrant::with_qdrant"));
1325 assert!(source.contains("gobby_core::qdrant::collection_name"));
1326 assert!(source.contains("CollectionScope::Custom"));
1327 assert!(source.contains("gobby_core::qdrant::search"));
1328 assert!(source.contains("gobby_core::qdrant::upsert"));
1329 }
1330
1331 fn spawn_http_responses(
1332 responses: Vec<(u16, Value)>,
1333 ) -> (String, thread::JoinHandle<Vec<String>>) {
1334 let listener = TcpListener::bind("127.0.0.1:0").expect("bind test server");
1335 let addr = listener.local_addr().expect("local addr");
1336 let handle = thread::spawn(move || {
1337 let mut requests = Vec::new();
1338 for (status, body) in responses {
1339 let (mut stream, _) = listener.accept().expect("accept request");
1340 requests.push(read_http_request(&mut stream));
1341
1342 let body = body.to_string();
1343 write!(
1344 stream,
1345 "HTTP/1.1 {status} OK\r\nContent-Type: application/json\r\nContent-Length: {}\r\nConnection: close\r\n\r\n{body}",
1346 body.len()
1347 )
1348 .expect("write response");
1349 }
1350 requests
1351 });
1352
1353 (format!("http://{addr}"), handle)
1354 }
1355
1356 fn read_http_request(stream: &mut impl Read) -> String {
1357 let mut request = Vec::new();
1358 let mut buffer = [0; 4096];
1359 let mut expected_len = None;
1360
1361 loop {
1362 let n = stream.read(&mut buffer).expect("read request");
1363 if n == 0 {
1364 break;
1365 }
1366 request.extend_from_slice(&buffer[..n]);
1367
1368 if expected_len.is_none()
1369 && let Some(header_end) =
1370 request.windows(4).position(|window| window == b"\r\n\r\n")
1371 {
1372 let headers = String::from_utf8_lossy(&request[..header_end]);
1373 let content_len = headers
1374 .lines()
1375 .find_map(|line| {
1376 line.to_ascii_lowercase()
1377 .strip_prefix("content-length: ")
1378 .and_then(|value| value.parse::<usize>().ok())
1379 })
1380 .unwrap_or(0);
1381 expected_len = Some(header_end + 4 + content_len);
1382 }
1383
1384 if let Some(expected_len) = expected_len
1385 && request.len() >= expected_len
1386 {
1387 break;
1388 }
1389 }
1390
1391 String::from_utf8_lossy(&request).into_owned()
1392 }
1393}