1use postgres::GenericClient;
2use reqwest::StatusCode;
3use serde::{Deserialize, Serialize};
4use serde_json::{Map, Value, json};
5use std::fmt;
6use std::time::Duration;
7
8use crate::config::{
9 CODE_SYMBOL_COLLECTION_PREFIX, CodeVectorSettings, Context, EmbeddingConfig, QdrantConfig,
10};
11use crate::db;
12use crate::models::{ProjectionMetadata, ProjectionProvenance, Symbol};
13use gobby_core::degradation::ServiceState;
14use gobby_core::qdrant::{CollectionScope, SearchRequest, UpsertRequest};
15
16pub const VECTOR_DISTANCE_COSINE: &str = "Cosine";
18const DIMENSION_PROBE_TEXT: &str = "dimension_probe";
19const HTTP_TIMEOUT: Duration = Duration::from_secs(10);
20
21#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
22pub struct CodeSymbolVectorSearchRequest {
23 pub project_id: String,
24 pub query: String,
25 pub limit: usize,
26 pub collection_prefix: String,
27}
28
29#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
30pub struct CodeSymbolVectorSearchHit {
31 pub symbol_id: String,
32 pub score: f64,
33}
34
35#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
36pub struct CodeSymbolVectorPayload {
37 pub project_id: String,
38 pub file_path: String,
39 pub symbol_id: String,
40 pub name: String,
41 pub kind: String,
42 pub language: String,
43 pub line_start: usize,
44 pub line_end: usize,
45 pub byte_start: usize,
46 pub byte_end: usize,
47 #[serde(skip_serializing_if = "Option::is_none")]
48 pub signature: Option<String>,
49 #[serde(skip_serializing_if = "Option::is_none")]
50 pub docstring: Option<String>,
51 pub provenance: ProjectionProvenance,
52 #[serde(skip_serializing_if = "Option::is_none")]
53 pub confidence: Option<f64>,
54 pub source_system: String,
55 pub source_file_path: String,
56 pub source_line: usize,
57 pub source_line_start: usize,
58 pub source_line_end: usize,
59 pub source_byte_start: usize,
60 pub source_byte_end: usize,
61 pub source_symbol_id: String,
62 #[serde(skip_serializing_if = "Option::is_none")]
63 pub summary: Option<String>,
64}
65
66impl CodeSymbolVectorPayload {
67 pub fn from_symbol(symbol: &Symbol) -> Self {
68 let metadata = ProjectionMetadata::gcode_extracted()
69 .with_source_file_path(&symbol.file_path)
70 .with_source_line(symbol.line_start)
71 .with_source_symbol_id(&symbol.id);
72
73 Self {
74 project_id: symbol.project_id.clone(),
75 file_path: symbol.file_path.clone(),
76 symbol_id: symbol.id.clone(),
77 name: symbol.name.clone(),
78 kind: symbol.kind.clone(),
79 language: symbol.language.clone(),
80 line_start: symbol.line_start,
81 line_end: symbol.line_end,
82 byte_start: symbol.byte_start,
83 byte_end: symbol.byte_end,
84 signature: symbol.signature.clone(),
85 docstring: symbol.docstring.clone(),
86 provenance: metadata.provenance,
87 confidence: metadata.confidence,
88 source_system: metadata.source_system,
89 source_file_path: metadata
90 .source_file_path
91 .unwrap_or_else(|| symbol.file_path.clone()),
92 source_line: metadata.source_line.unwrap_or(symbol.line_start),
93 source_line_start: symbol.line_start,
94 source_line_end: symbol.line_end,
95 source_byte_start: symbol.byte_start,
96 source_byte_end: symbol.byte_end,
97 source_symbol_id: metadata
98 .source_symbol_id
99 .unwrap_or_else(|| symbol.id.clone()),
100 summary: symbol.summary.clone(),
101 }
102 }
103}
104
105#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
106#[serde(rename_all = "snake_case")]
107pub enum CodeSymbolVectorLifecycleAction {
108 Ensure,
109 SyncFile,
110 Clear,
111 Rebuild,
112}
113
114#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
115pub struct CodeSymbolVectorLifecycleStatus {
116 pub project_id: String,
117 pub collection: String,
118 pub action: CodeSymbolVectorLifecycleAction,
119}
120
121#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
122pub struct VectorCollectionSchema {
123 pub size: usize,
124 pub distance: String,
125}
126
127#[derive(Debug, Clone, PartialEq, Eq)]
128struct ExistingVectorCollectionSchema {
129 size: Option<usize>,
130 distance: Option<String>,
131}
132
133#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
134pub struct CodeSymbolVectorLifecycleOutput {
135 pub project_id: String,
136 pub collection: String,
137 pub action: CodeSymbolVectorLifecycleAction,
138 pub file_path: Option<String>,
139 pub symbols: usize,
140 pub vectors_upserted: usize,
141 pub vectors_deleted: usize,
142 pub summary: String,
143}
144
145#[derive(Debug, Clone, PartialEq, Eq)]
146pub enum VectorLifecycleError {
147 MissingQdrantConfig,
148 MissingEmbeddingConfig,
149 EmbeddingHttp {
150 status: u16,
151 body: String,
152 },
153 EmbeddingResponse(String),
154 QdrantHttp {
155 operation: &'static str,
156 status: u16,
157 body: String,
158 },
159 QdrantOperation(String),
160 DimensionMismatch {
161 collection: String,
162 expected_size: usize,
163 found_size: Option<usize>,
164 expected_distance: &'static str,
165 found_distance: Option<String>,
166 },
167}
168
169impl fmt::Display for VectorLifecycleError {
170 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
171 match self {
172 Self::MissingQdrantConfig => {
173 write!(f, "Qdrant config is required for vector lifecycle commands")
174 }
175 Self::MissingEmbeddingConfig => write!(
176 f,
177 "embedding config is required for vector lifecycle commands"
178 ),
179 Self::EmbeddingHttp { status, body } => {
180 write!(f, "embedding request failed: HTTP {status}: {body}")
181 }
182 Self::EmbeddingResponse(reason) => {
183 write!(f, "embedding response was invalid: {reason}")
184 }
185 Self::QdrantHttp {
186 operation,
187 status,
188 body,
189 } => write!(f, "Qdrant {operation} failed: HTTP {status}: {body}"),
190 Self::QdrantOperation(reason) => write!(f, "Qdrant operation failed: {reason}"),
191 Self::DimensionMismatch {
192 collection,
193 expected_size,
194 found_size,
195 expected_distance,
196 found_distance,
197 } => write!(
198 f,
199 "Qdrant collection `{collection}` has incompatible vector schema: expected size {expected_size} distance {expected_distance}, found size {} distance {}. Refusing to migrate, drop, or recreate the collection.",
200 found_size
201 .map(|value| value.to_string())
202 .unwrap_or_else(|| "unknown".to_string()),
203 found_distance.as_deref().unwrap_or("unknown")
204 ),
205 }
206 }
207}
208
209impl std::error::Error for VectorLifecycleError {}
210
211#[derive(Debug)]
212pub struct CodeSymbolVectorLifecycle {
213 project_id: String,
214 collection: String,
215 qdrant: QdrantConfig,
216 embedding: EmbeddingConfig,
217 settings: CodeVectorSettings,
218 probed_vector_size: Option<usize>,
219 client: reqwest::blocking::Client,
220}
221
222pub fn collection_name(collection_prefix: &str, project_id: &str) -> String {
223 let collection = format!("{collection_prefix}{project_id}");
224 gobby_core::qdrant::collection_name("gcode", CollectionScope::Custom(&collection))
225}
226
227pub fn delete_project_collection(
228 qdrant: &QdrantConfig,
229 project_id: &str,
230) -> Result<bool, VectorLifecycleError> {
231 let client = qdrant_http_client()?;
232 let collection = collection_name(CODE_SYMBOL_COLLECTION_PREFIX, project_id);
233 delete_qdrant_collection(&client, qdrant, &collection)
234}
235
236pub fn delete_file_vectors(
237 qdrant: &QdrantConfig,
238 project_id: &str,
239 file_path: &str,
240) -> Result<bool, VectorLifecycleError> {
241 let client = qdrant_http_client()?;
242 let collection = collection_name(CODE_SYMBOL_COLLECTION_PREFIX, project_id);
243 delete_vectors_for_filter(&client, qdrant, &collection, project_id, Some(file_path))
244}
245
246pub fn delete_code_symbol_collections_with_prefix(
247 qdrant: &QdrantConfig,
248) -> Result<Vec<String>, VectorLifecycleError> {
249 let client = qdrant_http_client()?;
250 let resp = qdrant_request_for_config(&client, qdrant, reqwest::Method::GET, "/collections")?
251 .send()
252 .map_err(|err| VectorLifecycleError::QdrantOperation(err.to_string()))?;
253 let status = resp.status();
254 if !status.is_success() {
255 return Err(qdrant_http_error("list collections", status, resp));
256 }
257
258 let data: Value = resp
259 .json()
260 .map_err(|err| VectorLifecycleError::QdrantOperation(err.to_string()))?;
261 let collections = parse_collection_names(&data)
262 .into_iter()
263 .filter(|name| name.starts_with(CODE_SYMBOL_COLLECTION_PREFIX))
264 .collect::<Vec<_>>();
265
266 let mut deleted = Vec::new();
267 for collection in collections {
268 if delete_qdrant_collection(&client, qdrant, &collection)? {
269 deleted.push(collection);
270 }
271 }
272 Ok(deleted)
273}
274
275pub fn resolve_lifecycle_qdrant_config(
276 source: &mut impl gobby_core::config::ConfigSource,
277) -> Option<QdrantConfig> {
278 gobby_core::config::resolve_qdrant_config(source)
279}
280
281pub fn lifecycle_status(
282 project_id: impl Into<String>,
283 collection_prefix: &str,
284 action: CodeSymbolVectorLifecycleAction,
285) -> CodeSymbolVectorLifecycleStatus {
286 let project_id = project_id.into();
287 CodeSymbolVectorLifecycleStatus {
288 collection: collection_name(collection_prefix, &project_id),
289 project_id,
290 action,
291 }
292}
293
294pub fn embed_text(config: &EmbeddingConfig, text: &str) -> Result<Vec<f32>, VectorLifecycleError> {
295 let client = reqwest::blocking::Client::builder()
296 .timeout(HTTP_TIMEOUT)
297 .build()
298 .map_err(|err| VectorLifecycleError::EmbeddingResponse(err.to_string()))?;
299
300 let body = json!({
301 "model": config.model,
302 "input": text,
303 });
304
305 let url = format!("{}/embeddings", config.api_base.trim_end_matches('/'));
306 let mut req = client.post(&url).json(&body);
307
308 if let Some(key) = &config.api_key {
309 req = req.header("Authorization", format!("Bearer {key}"));
310 }
311
312 let resp = req
313 .send()
314 .map_err(|err| VectorLifecycleError::EmbeddingResponse(err.to_string()))?;
315 if !resp.status().is_success() {
316 let status = resp.status().as_u16();
317 let body = resp.text().unwrap_or_default();
318 return Err(VectorLifecycleError::EmbeddingHttp { status, body });
319 }
320
321 let data: Value = resp
322 .json()
323 .map_err(|err| VectorLifecycleError::EmbeddingResponse(err.to_string()))?;
324 let embedding: Vec<f32> = data
325 .get("data")
326 .and_then(Value::as_array)
327 .and_then(|values| values.first())
328 .and_then(|value| value.get("embedding"))
329 .and_then(Value::as_array)
330 .ok_or_else(|| {
331 VectorLifecycleError::EmbeddingResponse("missing data[0].embedding array".to_string())
332 })?
333 .iter()
334 .map(|value| {
335 value.as_f64().map(|f| f as f32).ok_or_else(|| {
336 VectorLifecycleError::EmbeddingResponse(
337 "embedding array contains a non-number".to_string(),
338 )
339 })
340 })
341 .collect::<Result<Vec<_>, _>>()?;
342
343 if embedding.is_empty() {
344 Err(VectorLifecycleError::EmbeddingResponse(
345 "embedding vector was empty".to_string(),
346 ))
347 } else {
348 Ok(embedding)
349 }
350}
351
352pub fn embed_query(config: &EmbeddingConfig, text: &str) -> Option<Vec<f32>> {
353 embed_text(config, &format!("search_query: {text}")).ok()
354}
355
356pub fn vector_text_for_symbol(symbol: &Symbol) -> String {
357 let mut lines = vec![
358 format!("name: {}", symbol.name),
359 format!("qualified_name: {}", symbol.qualified_name),
360 format!("kind: {}", symbol.kind),
361 format!("language: {}", symbol.language),
362 format!("file_path: {}", symbol.file_path),
363 format!("range: {}-{}", symbol.line_start, symbol.line_end),
364 ];
365 if let Some(signature) = symbol
366 .signature
367 .as_deref()
368 .filter(|value| !value.trim().is_empty())
369 {
370 lines.push(format!("signature: {signature}"));
371 }
372 if let Some(docstring) = symbol
373 .docstring
374 .as_deref()
375 .filter(|value| !value.trim().is_empty())
376 {
377 lines.push(format!("docstring: {docstring}"));
378 }
379 if let Some(summary) = symbol
380 .summary
381 .as_deref()
382 .filter(|value| !value.trim().is_empty())
383 {
384 lines.push(format!("summary: {summary}"));
385 }
386 lines.join("\n")
387}
388
389pub fn vector_search(
390 config: &QdrantConfig,
391 collection: &str,
392 query_vector: &[f32],
393 limit: usize,
394) -> anyhow::Result<Vec<(String, f64)>> {
395 let request = SearchRequest {
396 vector: query_vector.to_vec(),
397 limit,
398 filter: None,
399 };
400 let (hits, _) = gobby_core::qdrant::with_qdrant(Some(config), Vec::new(), |config| {
401 gobby_core::qdrant::search(config, collection, request)
402 })?;
403 Ok(hits
404 .into_iter()
405 .map(|hit| (hit.id, f64::from(hit.score)))
406 .collect())
407}
408
409impl CodeSymbolVectorLifecycle {
410 pub fn new(
411 project_id: String,
412 qdrant: QdrantConfig,
413 embedding: EmbeddingConfig,
414 settings: CodeVectorSettings,
415 ) -> Result<Self, VectorLifecycleError> {
416 if qdrant
417 .url
418 .as_deref()
419 .filter(|url| !url.trim().is_empty())
420 .is_none()
421 {
422 return Err(VectorLifecycleError::MissingQdrantConfig);
423 }
424 if embedding.api_base.trim().is_empty() {
425 return Err(VectorLifecycleError::MissingEmbeddingConfig);
426 }
427
428 let collection = collection_name(CODE_SYMBOL_COLLECTION_PREFIX, &project_id);
429 let client = reqwest::blocking::Client::builder()
430 .timeout(HTTP_TIMEOUT)
431 .build()
432 .map_err(|err| VectorLifecycleError::QdrantOperation(err.to_string()))?;
433 Ok(Self {
434 project_id,
435 collection,
436 qdrant,
437 embedding,
438 settings,
439 probed_vector_size: None,
440 client,
441 })
442 }
443
444 pub fn collection(&self) -> &str {
445 &self.collection
446 }
447
448 pub fn ensure_collection(&mut self) -> Result<VectorCollectionSchema, VectorLifecycleError> {
449 let expected = self.expected_schema()?;
450 self.require_qdrant_boundary()?;
451 match self.get_collection_schema()? {
452 Some(found) => self.ensure_compatible_schema(expected, found),
453 None => {
454 self.create_collection(&expected)?;
455 Ok(expected)
456 }
457 }
458 }
459
460 pub fn sync_file_symbols(
461 &mut self,
462 file_path: &str,
463 symbols: &[Symbol],
464 ) -> Result<CodeSymbolVectorLifecycleOutput, VectorLifecycleError> {
465 self.ensure_collection()?;
466 let points = self.points_for_symbols(symbols)?;
467 self.delete_vectors(Some(file_path))?;
468 self.upsert_points(points)?;
469
470 Ok(self.output(
471 CodeSymbolVectorLifecycleAction::SyncFile,
472 Some(file_path.to_string()),
473 symbols.len(),
474 symbols.len(),
475 1,
476 ))
477 }
478
479 pub fn clear_project_vectors(
480 &mut self,
481 ) -> Result<CodeSymbolVectorLifecycleOutput, VectorLifecycleError> {
482 let expected = self.expected_schema()?;
483 self.require_qdrant_boundary()?;
484 let deleted = match self.get_collection_schema()? {
485 Some(found) => {
486 self.ensure_compatible_schema(expected, found)?;
487 self.delete_vectors(None)?;
488 1
489 }
490 None => 0,
491 };
492
493 Ok(self.output(CodeSymbolVectorLifecycleAction::Clear, None, 0, 0, deleted))
494 }
495
496 pub fn rebuild_symbols(
497 &mut self,
498 symbols: &[Symbol],
499 ) -> Result<CodeSymbolVectorLifecycleOutput, VectorLifecycleError> {
500 self.ensure_collection()?;
501 let points = self.points_for_symbols(symbols)?;
502 self.delete_vectors(None)?;
503 self.upsert_points(points)?;
504
505 Ok(self.output(
506 CodeSymbolVectorLifecycleAction::Rebuild,
507 None,
508 symbols.len(),
509 symbols.len(),
510 1,
511 ))
512 }
513
514 fn output(
515 &self,
516 action: CodeSymbolVectorLifecycleAction,
517 file_path: Option<String>,
518 symbols: usize,
519 vectors_upserted: usize,
520 vectors_deleted: usize,
521 ) -> CodeSymbolVectorLifecycleOutput {
522 CodeSymbolVectorLifecycleOutput {
523 project_id: self.project_id.clone(),
524 collection: self.collection.clone(),
525 action,
526 file_path,
527 symbols,
528 vectors_upserted,
529 vectors_deleted,
530 summary: format!(
531 "{vectors_upserted} vector(s) upserted, {vectors_deleted} delete operation(s) issued"
532 ),
533 }
534 }
535
536 fn expected_schema(&mut self) -> Result<VectorCollectionSchema, VectorLifecycleError> {
537 let size = match self.settings.vector_dim {
538 Some(size) => size,
539 None => match self.probed_vector_size {
540 Some(size) => size,
541 None => {
542 let size = embed_text(&self.embedding, DIMENSION_PROBE_TEXT)?.len();
543 self.probed_vector_size = Some(size);
544 size
545 }
546 },
547 };
548
549 Ok(VectorCollectionSchema {
550 size,
551 distance: VECTOR_DISTANCE_COSINE.to_string(),
552 })
553 }
554
555 fn require_qdrant_boundary(&self) -> Result<(), VectorLifecycleError> {
556 let ((), state) = gobby_core::qdrant::with_qdrant(Some(&self.qdrant), (), |_| Ok(()))
557 .map_err(|err| VectorLifecycleError::QdrantOperation(err.to_string()))?;
558 match state {
559 ServiceState::Available => Ok(()),
560 ServiceState::NotConfigured => Err(VectorLifecycleError::MissingQdrantConfig),
561 other => Err(VectorLifecycleError::QdrantOperation(format!(
562 "unexpected Qdrant service state: {other:?}"
563 ))),
564 }
565 }
566
567 fn ensure_compatible_schema(
568 &self,
569 expected: VectorCollectionSchema,
570 found: ExistingVectorCollectionSchema,
571 ) -> Result<VectorCollectionSchema, VectorLifecycleError> {
572 if found.size == Some(expected.size)
573 && found.distance.as_deref() == Some(&expected.distance)
574 {
575 return Ok(VectorCollectionSchema {
576 size: expected.size,
577 distance: expected.distance,
578 });
579 }
580
581 Err(VectorLifecycleError::DimensionMismatch {
582 collection: self.collection.clone(),
583 expected_size: expected.size,
584 found_size: found.size,
585 expected_distance: VECTOR_DISTANCE_COSINE,
586 found_distance: found.distance,
587 })
588 }
589
590 fn get_collection_schema(
591 &self,
592 ) -> Result<Option<ExistingVectorCollectionSchema>, VectorLifecycleError> {
593 let resp = self
594 .qdrant_request(
595 reqwest::Method::GET,
596 &format!("/collections/{}", self.collection),
597 )?
598 .send()
599 .map_err(|err| VectorLifecycleError::QdrantOperation(err.to_string()))?;
600 let status = resp.status();
601 if status == StatusCode::NOT_FOUND {
602 return Ok(None);
603 }
604 if !status.is_success() {
605 return Err(qdrant_http_error("get collection", status, resp));
606 }
607
608 let data: Value = resp
609 .json()
610 .map_err(|err| VectorLifecycleError::QdrantOperation(err.to_string()))?;
611 Ok(parse_collection_schema(&data))
612 }
613
614 fn create_collection(
615 &self,
616 schema: &VectorCollectionSchema,
617 ) -> Result<(), VectorLifecycleError> {
618 let body = json!({
619 "vectors": {
620 "size": schema.size,
621 "distance": schema.distance,
622 },
623 });
624 let resp = self
625 .qdrant_request(
626 reqwest::Method::PUT,
627 &format!("/collections/{}", self.collection),
628 )?
629 .json(&body)
630 .send()
631 .map_err(|err| VectorLifecycleError::QdrantOperation(err.to_string()))?;
632 if !resp.status().is_success() {
633 return Err(qdrant_http_error("create collection", resp.status(), resp));
634 }
635 Ok(())
636 }
637
638 fn delete_vectors(&self, file_path: Option<&str>) -> Result<(), VectorLifecycleError> {
639 delete_vectors_for_filter(
640 &self.client,
641 &self.qdrant,
642 &self.collection,
643 &self.project_id,
644 file_path,
645 )
646 .map(|_| ())
647 }
648
649 fn upsert_points(&self, points: Vec<UpsertRequest>) -> Result<(), VectorLifecycleError> {
650 if points.is_empty() {
651 return Ok(());
652 }
653 let ((), state) = gobby_core::qdrant::with_qdrant(Some(&self.qdrant), (), |config| {
654 gobby_core::qdrant::upsert(config, &self.collection, points)
655 })
656 .map_err(|err| VectorLifecycleError::QdrantOperation(err.to_string()))?;
657 match state {
658 ServiceState::Available => Ok(()),
659 ServiceState::NotConfigured => Err(VectorLifecycleError::MissingQdrantConfig),
660 other => Err(VectorLifecycleError::QdrantOperation(format!(
661 "unexpected Qdrant service state: {other:?}"
662 ))),
663 }
664 }
665
666 fn points_for_symbols(
667 &self,
668 symbols: &[Symbol],
669 ) -> Result<Vec<UpsertRequest>, VectorLifecycleError> {
670 symbols
671 .iter()
672 .map(|symbol| {
673 let vector = embed_text(&self.embedding, &vector_text_for_symbol(symbol))?;
674 let payload = payload_map(CodeSymbolVectorPayload::from_symbol(symbol))?;
675 Ok(UpsertRequest {
676 id: symbol.id.clone(),
677 vector,
678 payload,
679 })
680 })
681 .collect()
682 }
683
684 fn qdrant_request(
685 &self,
686 method: reqwest::Method,
687 path: &str,
688 ) -> Result<reqwest::blocking::RequestBuilder, VectorLifecycleError> {
689 qdrant_request_for_config(&self.client, &self.qdrant, method, path)
690 }
691}
692
693pub fn fetch_symbols_for_file(
694 conn: &mut impl GenericClient,
695 project_id: &str,
696 file_path: &str,
697) -> anyhow::Result<Vec<Symbol>> {
698 let columns = db::symbol_select_columns("");
699 conn.query(
700 &format!(
701 "SELECT {columns} FROM code_symbols
702 WHERE project_id = $1 AND file_path = $2
703 ORDER BY file_path, byte_start, id"
704 ),
705 &[&project_id, &file_path],
706 )?
707 .into_iter()
708 .map(|row| Symbol::from_row(&row))
709 .collect()
710}
711
712pub fn fetch_symbols_for_project(
713 conn: &mut impl GenericClient,
714 project_id: &str,
715) -> anyhow::Result<Vec<Symbol>> {
716 let columns = db::symbol_select_columns("");
717 conn.query(
718 &format!(
719 "SELECT {columns} FROM code_symbols
720 WHERE project_id = $1
721 ORDER BY file_path, byte_start, id"
722 ),
723 &[&project_id],
724 )?
725 .into_iter()
726 .map(|row| Symbol::from_row(&row))
727 .collect()
728}
729
730fn payload_map(
731 payload: CodeSymbolVectorPayload,
732) -> Result<Map<String, Value>, VectorLifecycleError> {
733 match serde_json::to_value(payload)
734 .map_err(|err| VectorLifecycleError::QdrantOperation(err.to_string()))?
735 {
736 Value::Object(map) => Ok(map),
737 _ => Err(VectorLifecycleError::QdrantOperation(
738 "vector payload did not serialize to an object".to_string(),
739 )),
740 }
741}
742
743fn parse_collection_schema(data: &Value) -> Option<ExistingVectorCollectionSchema> {
744 let vectors = data.pointer("/result/config/params/vectors")?;
745 let size = vectors
746 .get("size")
747 .and_then(Value::as_u64)
748 .map(|size| size as usize);
749 let distance = vectors
750 .get("distance")
751 .and_then(Value::as_str)
752 .map(str::to_string);
753 Some(ExistingVectorCollectionSchema { size, distance })
754}
755
756fn parse_collection_names(data: &Value) -> Vec<String> {
757 data.pointer("/result/collections")
758 .and_then(Value::as_array)
759 .map(|collections| {
760 collections
761 .iter()
762 .filter_map(|collection| {
763 collection
764 .get("name")
765 .and_then(Value::as_str)
766 .map(str::to_string)
767 })
768 .collect()
769 })
770 .unwrap_or_default()
771}
772
773fn qdrant_http_client() -> Result<reqwest::blocking::Client, VectorLifecycleError> {
774 reqwest::blocking::Client::builder()
775 .timeout(HTTP_TIMEOUT)
776 .build()
777 .map_err(|err| VectorLifecycleError::QdrantOperation(err.to_string()))
778}
779
780fn qdrant_request_for_config(
781 client: &reqwest::blocking::Client,
782 qdrant: &QdrantConfig,
783 method: reqwest::Method,
784 path: &str,
785) -> Result<reqwest::blocking::RequestBuilder, VectorLifecycleError> {
786 let base = qdrant
787 .url
788 .as_deref()
789 .ok_or(VectorLifecycleError::MissingQdrantConfig)?
790 .trim_end_matches('/');
791 let url = format!("{base}{path}");
792 let mut req = client.request(method, url);
793 if let Some(key) = &qdrant.api_key {
794 req = req.header("api-key", key);
795 }
796 Ok(req)
797}
798
799fn delete_qdrant_collection(
800 client: &reqwest::blocking::Client,
801 qdrant: &QdrantConfig,
802 collection: &str,
803) -> Result<bool, VectorLifecycleError> {
804 let resp = qdrant_request_for_config(
805 client,
806 qdrant,
807 reqwest::Method::DELETE,
808 &format!("/collections/{collection}"),
809 )?
810 .send()
811 .map_err(|err| VectorLifecycleError::QdrantOperation(err.to_string()))?;
812 let status = resp.status();
813 if status == StatusCode::NOT_FOUND {
814 return Ok(false);
815 }
816 if !status.is_success() {
817 return Err(qdrant_http_error("delete collection", status, resp));
818 }
819 Ok(true)
820}
821
822fn delete_vectors_for_filter(
823 client: &reqwest::blocking::Client,
824 qdrant: &QdrantConfig,
825 collection: &str,
826 project_id: &str,
827 file_path: Option<&str>,
828) -> Result<bool, VectorLifecycleError> {
829 let mut must = vec![json!({
830 "key": "project_id",
831 "match": {"value": project_id},
832 })];
833 if let Some(file_path) = file_path {
834 must.push(json!({
835 "key": "file_path",
836 "match": {"value": file_path},
837 }));
838 }
839 let body = json!({
840 "filter": {
841 "must": must,
842 },
843 });
844 let resp = qdrant_request_for_config(
845 client,
846 qdrant,
847 reqwest::Method::POST,
848 &format!("/collections/{collection}/points/delete"),
849 )?
850 .json(&body)
851 .send()
852 .map_err(|err| VectorLifecycleError::QdrantOperation(err.to_string()))?;
853 let status = resp.status();
854 if status == StatusCode::NOT_FOUND {
855 return Ok(false);
856 }
857 if !status.is_success() {
858 return Err(qdrant_http_error("delete points", status, resp));
859 }
860 Ok(true)
861}
862
863fn qdrant_http_error(
864 operation: &'static str,
865 status: StatusCode,
866 resp: reqwest::blocking::Response,
867) -> VectorLifecycleError {
868 VectorLifecycleError::QdrantHttp {
869 operation,
870 status: status.as_u16(),
871 body: resp.text().unwrap_or_default(),
872 }
873}
874
875pub fn search_code_symbols(
876 ctx: &Context,
877 request: &CodeSymbolVectorSearchRequest,
878) -> Vec<CodeSymbolVectorSearchHit> {
879 let qdrant_config = match &ctx.qdrant {
880 Some(c) => c,
881 None => return vec![],
882 };
883
884 let embedding_config = match &ctx.embedding {
885 Some(c) => c,
886 None => return vec![],
887 };
888
889 let embedding = match embed_query(embedding_config, &request.query) {
890 Some(e) => e,
891 None => return vec![],
892 };
893
894 let collection = collection_name(&request.collection_prefix, &request.project_id);
895 vector_search(qdrant_config, &collection, &embedding, request.limit)
896 .unwrap_or_default()
897 .into_iter()
898 .map(|(symbol_id, score)| CodeSymbolVectorSearchHit { symbol_id, score })
899 .collect()
900}
901
902pub fn semantic_search(ctx: &Context, query: &str, limit: usize) -> Vec<(String, f64)> {
903 if ctx.qdrant.is_none() {
904 return vec![];
905 }
906
907 let request = CodeSymbolVectorSearchRequest {
908 project_id: ctx.project_id.clone(),
909 query: query.to_string(),
910 limit,
911 collection_prefix: CODE_SYMBOL_COLLECTION_PREFIX.to_string(),
912 };
913
914 search_code_symbols(ctx, &request)
915 .into_iter()
916 .map(|hit| (hit.symbol_id, hit.score))
917 .collect()
918}
919
920#[cfg(test)]
921mod tests {
922 use super::*;
923 use crate::config::{CodeVectorSettings, QdrantConfig};
924 use crate::models::{SOURCE_SYSTEM_GCODE, Symbol};
925 use serde_json::{Value, json};
926 use std::io::{Read, Write};
927 use std::net::TcpListener;
928 use std::thread;
929
930 fn test_symbol(summary: Option<String>) -> Symbol {
931 Symbol {
932 id: "symbol-1".to_string(),
933 project_id: "project-1".to_string(),
934 file_path: "src/lib.rs".to_string(),
935 name: "run".to_string(),
936 qualified_name: "crate::run".to_string(),
937 kind: "function".to_string(),
938 language: "rust".to_string(),
939 byte_start: 10,
940 byte_end: 40,
941 line_start: 3,
942 line_end: 5,
943 signature: None,
944 docstring: None,
945 parent_symbol_id: None,
946 content_hash: "hash".to_string(),
947 summary,
948 created_at: String::new(),
949 updated_at: String::new(),
950 }
951 }
952
953 #[test]
954 fn payloads_carry_provenance_metadata() {
955 let payload = CodeSymbolVectorPayload::from_symbol(&test_symbol(Some("does work".into())));
956
957 assert_eq!(payload.provenance, ProjectionProvenance::Extracted);
958 assert_eq!(payload.confidence, Some(1.0));
959 assert_eq!(payload.source_system, SOURCE_SYSTEM_GCODE);
960 assert_eq!(payload.source_file_path, "src/lib.rs");
961 assert_eq!(payload.source_line_start, 3);
962 assert_eq!(payload.source_line_end, 5);
963 assert_eq!(payload.source_byte_start, 10);
964 assert_eq!(payload.source_byte_end, 40);
965 assert_eq!(payload.source_line, 3);
966 assert_eq!(payload.source_symbol_id, "symbol-1");
967 assert_eq!(payload.summary.as_deref(), Some("does work"));
968 assert_eq!(payload.signature, None);
969 assert_eq!(payload.docstring, None);
970
971 let value = serde_json::to_value(payload).expect("payload serializes");
972 assert_eq!(value["provenance"], "EXTRACTED");
973 assert_eq!(value["confidence"], 1.0);
974 assert_eq!(value["source_system"], SOURCE_SYSTEM_GCODE);
975 assert_eq!(value["source_file_path"], "src/lib.rs");
976 assert_eq!(value["source_line_start"], 3);
977 assert_eq!(value["source_line_end"], 5);
978 assert_eq!(value["source_byte_start"], 10);
979 assert_eq!(value["source_byte_end"], 40);
980 assert_eq!(value["source_symbol_id"], "symbol-1");
981 }
982
983 #[test]
984 fn summaries_are_optional_enrichment() {
985 let symbol = test_symbol(None);
986 let payload = CodeSymbolVectorPayload::from_symbol(&symbol);
987 let vector_text = vector_text_for_symbol(&symbol);
988 let value = serde_json::to_value(payload).expect("payload serializes");
989
990 assert!(value.get("summary").is_none());
991 assert!(vector_text.contains("name: run"));
992 assert!(!vector_text.contains("summary:"));
993 }
994
995 #[test]
996 fn collection_name_compatibility() {
997 assert_eq!(
998 collection_name(CODE_SYMBOL_COLLECTION_PREFIX, "project-1"),
999 "code_symbols_project-1"
1000 );
1001 }
1002
1003 #[test]
1004 fn delete_project_collection_targets_only_project_collection() {
1005 let (qdrant_url, handle) = spawn_http_responses(vec![(200, json!({"result": true}))]);
1006 let deleted = delete_project_collection(
1007 &QdrantConfig {
1008 url: Some(qdrant_url),
1009 api_key: Some("qdrant-key".to_string()),
1010 },
1011 "project-1",
1012 )
1013 .expect("delete collection");
1014 let requests = handle.join().expect("qdrant requests");
1015
1016 assert!(deleted);
1017 assert_eq!(requests.len(), 1);
1018 assert!(requests[0].contains("DELETE /collections/code_symbols_project-1 HTTP/1.1"));
1019 assert!(requests[0].contains("api-key: qdrant-key"));
1020 assert!(!requests[0].contains("project-2"));
1021 }
1022
1023 #[test]
1024 fn delete_file_vectors_filters_by_project_and_file_without_embedding() {
1025 let (qdrant_url, handle) =
1026 spawn_http_responses(vec![(200, json!({"result": {"operation_id": 1}}))]);
1027 let deleted = delete_file_vectors(
1028 &QdrantConfig {
1029 url: Some(qdrant_url),
1030 api_key: Some("qdrant-key".to_string()),
1031 },
1032 "project-1",
1033 "src/lib.rs",
1034 )
1035 .expect("delete vectors");
1036 let requests = handle.join().expect("qdrant requests");
1037
1038 assert!(deleted);
1039 assert_eq!(requests.len(), 1);
1040 assert!(
1041 requests[0].contains("POST /collections/code_symbols_project-1/points/delete HTTP/1.1")
1042 );
1043 assert!(requests[0].contains("api-key: qdrant-key"));
1044 assert!(requests[0].contains(r#""key":"project_id""#));
1045 assert!(requests[0].contains(r#""value":"project-1""#));
1046 assert!(requests[0].contains(r#""key":"file_path""#));
1047 assert!(requests[0].contains(r#""value":"src/lib.rs""#));
1048 }
1049
1050 #[test]
1051 fn clear_project_vectors_does_not_touch_memory_vector_collections() {
1052 let (qdrant_url, handle) = spawn_http_responses(vec![
1053 (
1054 200,
1055 json!({"result": {"config": {"params": {"vectors": {"size": 3, "distance": "Cosine"}}}}}),
1056 ),
1057 (200, json!({"result": {"operation_id": 1}})),
1058 ]);
1059 let mut lifecycle = CodeSymbolVectorLifecycle::new(
1060 "project-1".to_string(),
1061 QdrantConfig {
1062 url: Some(qdrant_url),
1063 api_key: None,
1064 },
1065 EmbeddingConfig {
1066 api_base: "http://127.0.0.1:9/v1".to_string(),
1067 model: "unused".to_string(),
1068 api_key: None,
1069 },
1070 CodeVectorSettings {
1071 vector_dim: Some(3),
1072 },
1073 )
1074 .expect("lifecycle");
1075
1076 let cleared = lifecycle.clear_project_vectors().expect("clear vectors");
1077 let requests = handle.join().expect("qdrant requests");
1078
1079 assert_eq!(cleared.vectors_deleted, 1);
1080 assert_eq!(requests.len(), 2);
1081 assert!(requests[0].contains("GET /collections/code_symbols_project-1 HTTP/1.1"));
1082 assert!(
1083 requests[1].contains("POST /collections/code_symbols_project-1/points/delete HTTP/1.1")
1084 );
1085 assert!(requests[1].contains(r#""key":"project_id""#));
1086 assert!(requests[1].contains(r#""value":"project-1""#));
1087 assert!(!requests[1].contains(r#""key":"file_path""#));
1088 assert!(requests.iter().all(|request| !request.contains("memory")));
1089 assert!(
1090 requests
1091 .iter()
1092 .all(|request| !request.contains("GET /collections HTTP/1.1"))
1093 );
1094 assert!(
1095 requests
1096 .iter()
1097 .all(|request| !request.contains("DELETE /collections/"))
1098 );
1099 }
1100
1101 #[test]
1102 fn delete_prefixed_collections_deletes_only_code_symbol_collections() {
1103 let (qdrant_url, handle) = spawn_http_responses(vec![
1104 (
1105 200,
1106 json!({
1107 "result": {
1108 "collections": [
1109 {"name": "code_symbols_project-1"},
1110 {"name": "memory_vectors"},
1111 {"name": "code_symbols_project-2"}
1112 ]
1113 }
1114 }),
1115 ),
1116 (200, json!({"result": true})),
1117 (200, json!({"result": true})),
1118 ]);
1119 let deleted = delete_code_symbol_collections_with_prefix(&QdrantConfig {
1120 url: Some(qdrant_url),
1121 api_key: None,
1122 })
1123 .expect("delete prefixed collections");
1124 let requests = handle.join().expect("qdrant requests");
1125
1126 assert_eq!(
1127 deleted,
1128 vec![
1129 "code_symbols_project-1".to_string(),
1130 "code_symbols_project-2".to_string()
1131 ]
1132 );
1133 assert_eq!(requests.len(), 3);
1134 assert!(requests[0].contains("GET /collections HTTP/1.1"));
1135 assert!(requests[1].contains("DELETE /collections/code_symbols_project-1 HTTP/1.1"));
1136 assert!(requests[2].contains("DELETE /collections/code_symbols_project-2 HTTP/1.1"));
1137 assert!(
1138 requests
1139 .iter()
1140 .all(|request| !request.contains("DELETE /collections/memory_vectors"))
1141 );
1142 }
1143
1144 #[test]
1145 fn embedding_request_response() {
1146 let (base_url, handle) = spawn_http_responses(vec![(
1147 200,
1148 json!({"data": [{"embedding": [0.25, 0.5, 0.75]}]}),
1149 )]);
1150 let config = EmbeddingConfig {
1151 api_base: format!("{base_url}/v1"),
1152 model: "embed-small".to_string(),
1153 api_key: Some("embedding-key".to_string()),
1154 };
1155
1156 let embedding = embed_text(&config, "dimension_probe").expect("embedding response");
1157 let requests = handle.join().expect("server thread");
1158
1159 assert_eq!(embedding, vec![0.25, 0.5, 0.75]);
1160 assert_eq!(requests.len(), 1);
1161 assert!(requests[0].contains("POST /v1/embeddings HTTP/1.1"));
1162 assert!(requests[0].contains("authorization: Bearer embedding-key"));
1163 assert!(requests[0].contains(r#""model":"embed-small""#));
1164 assert!(requests[0].contains(r#""input":"dimension_probe""#));
1165 }
1166
1167 #[test]
1168 fn ensure_collection_resolves_vector_size_and_distance() {
1169 let (embedding_url, embedding_handle) = spawn_http_responses(vec![(
1170 200,
1171 json!({"data": [{"embedding": [0.1, 0.2, 0.3]}]}),
1172 )]);
1173 let (qdrant_url, qdrant_handle) = spawn_http_responses(vec![
1174 (404, json!({"status": "not found"})),
1175 (200, json!({"result": true})),
1176 (
1177 200,
1178 json!({"result": {"config": {"params": {"vectors": {"size": 3, "distance": "Cosine"}}}}}),
1179 ),
1180 ]);
1181 let mut lifecycle = CodeSymbolVectorLifecycle::new(
1182 "project-1".to_string(),
1183 QdrantConfig {
1184 url: Some(qdrant_url),
1185 api_key: None,
1186 },
1187 EmbeddingConfig {
1188 api_base: format!("{embedding_url}/v1"),
1189 model: "embed-small".to_string(),
1190 api_key: None,
1191 },
1192 CodeVectorSettings { vector_dim: None },
1193 )
1194 .expect("lifecycle");
1195
1196 let created = lifecycle.ensure_collection().expect("create collection");
1197 let reused = lifecycle.ensure_collection().expect("reuse collection");
1198 let embedding_requests = embedding_handle.join().expect("embedding requests");
1199 let qdrant_requests = qdrant_handle.join().expect("qdrant requests");
1200
1201 assert_eq!(created.size, 3);
1202 assert_eq!(created.distance, VECTOR_DISTANCE_COSINE);
1203 assert_eq!(reused.size, 3);
1204 assert_eq!(embedding_requests.len(), 1, "dimension probe is cached");
1205 assert!(qdrant_requests[1].contains("PUT /collections/code_symbols_project-1 HTTP/1.1"));
1206 assert!(qdrant_requests[1].contains(r#""size":3"#));
1207 assert!(qdrant_requests[1].contains(r#""distance":"Cosine""#));
1208
1209 let (explicit_qdrant_url, explicit_handle) = spawn_http_responses(vec![
1210 (404, json!({"status": "not found"})),
1211 (200, json!({"result": true})),
1212 ]);
1213 let mut explicit = CodeSymbolVectorLifecycle::new(
1214 "project-1".to_string(),
1215 QdrantConfig {
1216 url: Some(explicit_qdrant_url),
1217 api_key: None,
1218 },
1219 EmbeddingConfig {
1220 api_base: "http://127.0.0.1:9/v1".to_string(),
1221 model: "unused".to_string(),
1222 api_key: None,
1223 },
1224 CodeVectorSettings {
1225 vector_dim: Some(1536),
1226 },
1227 )
1228 .expect("lifecycle with explicit size");
1229
1230 let schema = explicit.ensure_collection().expect("explicit size create");
1231 let explicit_requests = explicit_handle.join().expect("explicit qdrant requests");
1232 assert_eq!(schema.size, 1536);
1233 assert!(explicit_requests[1].contains(r#""size":1536"#));
1234 }
1235
1236 #[test]
1237 fn lifecycle_http_scoped_to_module() {
1238 let manifest_dir = std::path::Path::new(env!("CARGO_MANIFEST_DIR"));
1239 let src_dir = manifest_dir.join("src");
1240 let mut offenders = Vec::new();
1241
1242 fn visit(path: &std::path::Path, offenders: &mut Vec<std::path::PathBuf>) {
1243 for entry in std::fs::read_dir(path).expect("read source directory") {
1244 let entry = entry.expect("source entry");
1245 let path = entry.path();
1246 if path.is_dir() {
1247 visit(&path, offenders);
1248 continue;
1249 }
1250 if path.extension().and_then(|ext| ext.to_str()) != Some("rs") {
1251 continue;
1252 }
1253 let source = std::fs::read_to_string(&path).expect("read source file");
1254 let lifecycle_rest = [
1255 "/points/delete",
1256 "points/delete",
1257 "collections/{collection}",
1258 "/collections/{collection}",
1259 ];
1260 if lifecycle_rest.iter().any(|needle| source.contains(needle))
1261 && !path.ends_with("vector/code_symbols.rs")
1262 {
1263 offenders.push(path);
1264 }
1265 }
1266 }
1267
1268 visit(&src_dir, &mut offenders);
1269 assert!(
1270 offenders.is_empty(),
1271 "Qdrant lifecycle REST must stay scoped to vector/code_symbols.rs: {offenders:?}"
1272 );
1273 }
1274
1275 #[test]
1276 fn routes_through_gobby_core_qdrant() {
1277 let source = include_str!("code_symbols.rs");
1278 assert!(source.contains("gobby_core::config::resolve_qdrant_config"));
1279 assert!(source.contains("gobby_core::qdrant::with_qdrant"));
1280 assert!(source.contains("gobby_core::qdrant::collection_name"));
1281 assert!(source.contains("CollectionScope::Custom"));
1282 assert!(source.contains("gobby_core::qdrant::search"));
1283 assert!(source.contains("gobby_core::qdrant::upsert"));
1284 }
1285
1286 fn spawn_http_responses(
1287 responses: Vec<(u16, Value)>,
1288 ) -> (String, thread::JoinHandle<Vec<String>>) {
1289 let listener = TcpListener::bind("127.0.0.1:0").expect("bind test server");
1290 let addr = listener.local_addr().expect("local addr");
1291 let handle = thread::spawn(move || {
1292 let mut requests = Vec::new();
1293 for (status, body) in responses {
1294 let (mut stream, _) = listener.accept().expect("accept request");
1295 requests.push(read_http_request(&mut stream));
1296
1297 let body = body.to_string();
1298 write!(
1299 stream,
1300 "HTTP/1.1 {status} OK\r\nContent-Type: application/json\r\nContent-Length: {}\r\nConnection: close\r\n\r\n{body}",
1301 body.len()
1302 )
1303 .expect("write response");
1304 }
1305 requests
1306 });
1307
1308 (format!("http://{addr}"), handle)
1309 }
1310
1311 fn read_http_request(stream: &mut impl Read) -> String {
1312 let mut request = Vec::new();
1313 let mut buffer = [0; 4096];
1314 let mut expected_len = None;
1315
1316 loop {
1317 let n = stream.read(&mut buffer).expect("read request");
1318 if n == 0 {
1319 break;
1320 }
1321 request.extend_from_slice(&buffer[..n]);
1322
1323 if expected_len.is_none()
1324 && let Some(header_end) =
1325 request.windows(4).position(|window| window == b"\r\n\r\n")
1326 {
1327 let headers = String::from_utf8_lossy(&request[..header_end]);
1328 let content_len = headers
1329 .lines()
1330 .find_map(|line| {
1331 line.to_ascii_lowercase()
1332 .strip_prefix("content-length: ")
1333 .and_then(|value| value.parse::<usize>().ok())
1334 })
1335 .unwrap_or(0);
1336 expected_len = Some(header_end + 4 + content_len);
1337 }
1338
1339 if let Some(expected_len) = expected_len
1340 && request.len() >= expected_len
1341 {
1342 break;
1343 }
1344 }
1345
1346 String::from_utf8_lossy(&request).into_owned()
1347 }
1348}