1use postgres::GenericClient;
2use reqwest::StatusCode;
3use serde::{Deserialize, Serialize};
4use serde_json::{Map, Value, json};
5use std::fmt;
6use std::time::Duration;
7
8use crate::config::{
9 CODE_SYMBOL_COLLECTION_PREFIX, CodeVectorSettings, Context, EmbeddingConfig, QdrantConfig,
10};
11use crate::db;
12use crate::models::{ProjectionMetadata, ProjectionProvenance, Symbol};
13use gobby_core::degradation::ServiceState;
14use gobby_core::qdrant::{CollectionScope, SearchRequest, UpsertRequest};
15
16pub const VECTOR_DISTANCE_COSINE: &str = "Cosine";
18const DIMENSION_PROBE_TEXT: &str = "dimension_probe";
19const HTTP_TIMEOUT: Duration = Duration::from_secs(10);
20
21#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
22pub struct CodeSymbolVectorSearchRequest {
23 pub project_id: String,
24 pub query: String,
25 pub limit: usize,
26 pub collection_prefix: String,
27}
28
29#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
30pub struct CodeSymbolVectorSearchHit {
31 pub symbol_id: String,
32 pub score: f64,
33}
34
35#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
36pub struct CodeSymbolVectorPayload {
37 pub project_id: String,
38 pub file_path: String,
39 pub symbol_id: String,
40 pub name: String,
41 pub kind: String,
42 pub language: String,
43 pub line_start: usize,
44 pub line_end: usize,
45 pub byte_start: usize,
46 pub byte_end: usize,
47 #[serde(skip_serializing_if = "Option::is_none")]
48 pub signature: Option<String>,
49 #[serde(skip_serializing_if = "Option::is_none")]
50 pub docstring: Option<String>,
51 pub provenance: ProjectionProvenance,
52 #[serde(skip_serializing_if = "Option::is_none")]
53 pub confidence: Option<f64>,
54 pub source_system: String,
55 pub source_file_path: String,
56 pub source_line: usize,
57 pub source_line_start: usize,
58 pub source_line_end: usize,
59 pub source_byte_start: usize,
60 pub source_byte_end: usize,
61 pub source_symbol_id: String,
62 #[serde(skip_serializing_if = "Option::is_none")]
63 pub summary: Option<String>,
64}
65
66impl CodeSymbolVectorPayload {
67 pub fn from_symbol(symbol: &Symbol) -> Self {
68 let metadata = ProjectionMetadata::gcode_extracted()
69 .with_source_file_path(&symbol.file_path)
70 .with_source_line(symbol.line_start)
71 .with_source_symbol_id(&symbol.id);
72
73 Self {
74 project_id: symbol.project_id.clone(),
75 file_path: symbol.file_path.clone(),
76 symbol_id: symbol.id.clone(),
77 name: symbol.name.clone(),
78 kind: symbol.kind.clone(),
79 language: symbol.language.clone(),
80 line_start: symbol.line_start,
81 line_end: symbol.line_end,
82 byte_start: symbol.byte_start,
83 byte_end: symbol.byte_end,
84 signature: symbol.signature.clone(),
85 docstring: symbol.docstring.clone(),
86 provenance: metadata.provenance,
87 confidence: metadata.confidence,
88 source_system: metadata.source_system,
89 source_file_path: metadata
90 .source_file_path
91 .unwrap_or_else(|| symbol.file_path.clone()),
92 source_line: metadata.source_line.unwrap_or(symbol.line_start),
93 source_line_start: symbol.line_start,
94 source_line_end: symbol.line_end,
95 source_byte_start: symbol.byte_start,
96 source_byte_end: symbol.byte_end,
97 source_symbol_id: metadata
98 .source_symbol_id
99 .unwrap_or_else(|| symbol.id.clone()),
100 summary: symbol.summary.clone(),
101 }
102 }
103}
104
105#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
106#[serde(rename_all = "snake_case")]
107pub enum CodeSymbolVectorLifecycleAction {
108 Ensure,
109 SyncFile,
110 Clear,
111 Rebuild,
112}
113
114#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
115pub struct CodeSymbolVectorLifecycleStatus {
116 pub project_id: String,
117 pub collection: String,
118 pub action: CodeSymbolVectorLifecycleAction,
119}
120
121#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
122pub struct VectorCollectionSchema {
123 pub size: usize,
124 pub distance: String,
125}
126
127#[derive(Debug, Clone, PartialEq, Eq)]
128struct ExistingVectorCollectionSchema {
129 size: Option<usize>,
130 distance: Option<String>,
131}
132
133#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
134pub struct CodeSymbolVectorLifecycleOutput {
135 pub project_id: String,
136 pub collection: String,
137 pub action: CodeSymbolVectorLifecycleAction,
138 pub file_path: Option<String>,
139 pub symbols: usize,
140 pub vectors_upserted: usize,
141 pub vectors_deleted: usize,
142 pub summary: String,
143}
144
145#[derive(Debug, Clone, PartialEq, Eq)]
146pub enum VectorLifecycleError {
147 MissingQdrantConfig,
148 MissingEmbeddingConfig,
149 EmbeddingHttp {
150 status: u16,
151 body: String,
152 },
153 EmbeddingResponse(String),
154 QdrantHttp {
155 operation: &'static str,
156 status: u16,
157 body: String,
158 },
159 QdrantOperation(String),
160 DimensionMismatch {
161 collection: String,
162 expected_size: usize,
163 found_size: Option<usize>,
164 expected_distance: &'static str,
165 found_distance: Option<String>,
166 },
167}
168
169impl fmt::Display for VectorLifecycleError {
170 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
171 match self {
172 Self::MissingQdrantConfig => {
173 write!(f, "Qdrant config is required for vector lifecycle commands")
174 }
175 Self::MissingEmbeddingConfig => write!(
176 f,
177 "embedding config is required for vector lifecycle commands"
178 ),
179 Self::EmbeddingHttp { status, body } => {
180 write!(f, "embedding request failed: HTTP {status}: {body}")
181 }
182 Self::EmbeddingResponse(reason) => {
183 write!(f, "embedding response was invalid: {reason}")
184 }
185 Self::QdrantHttp {
186 operation,
187 status,
188 body,
189 } => write!(f, "Qdrant {operation} failed: HTTP {status}: {body}"),
190 Self::QdrantOperation(reason) => write!(f, "Qdrant operation failed: {reason}"),
191 Self::DimensionMismatch {
192 collection,
193 expected_size,
194 found_size,
195 expected_distance,
196 found_distance,
197 } => write!(
198 f,
199 "Qdrant collection `{collection}` has incompatible vector schema: expected size {expected_size} distance {expected_distance}, found size {} distance {}. Refusing to migrate, drop, or recreate the collection.",
200 found_size
201 .map(|value| value.to_string())
202 .unwrap_or_else(|| "unknown".to_string()),
203 found_distance.as_deref().unwrap_or("unknown")
204 ),
205 }
206 }
207}
208
209impl std::error::Error for VectorLifecycleError {}
210
211#[derive(Debug)]
212pub struct CodeSymbolVectorLifecycle {
213 project_id: String,
214 collection: String,
215 qdrant: QdrantConfig,
216 embedding: EmbeddingConfig,
217 settings: CodeVectorSettings,
218 probed_vector_size: Option<usize>,
219 client: reqwest::blocking::Client,
220}
221
222pub fn collection_name(collection_prefix: &str, project_id: &str) -> String {
223 let collection = format!("{collection_prefix}{project_id}");
224 gobby_core::qdrant::collection_name("gcode", CollectionScope::Custom(&collection))
225}
226
227pub fn delete_project_collection(
228 qdrant: &QdrantConfig,
229 project_id: &str,
230) -> Result<bool, VectorLifecycleError> {
231 let client = qdrant_http_client()?;
232 let collection = collection_name(CODE_SYMBOL_COLLECTION_PREFIX, project_id);
233 delete_qdrant_collection(&client, qdrant, &collection)
234}
235
236pub fn delete_code_symbol_collections_with_prefix(
237 qdrant: &QdrantConfig,
238) -> Result<Vec<String>, VectorLifecycleError> {
239 let client = qdrant_http_client()?;
240 let resp = qdrant_request_for_config(&client, qdrant, reqwest::Method::GET, "/collections")?
241 .send()
242 .map_err(|err| VectorLifecycleError::QdrantOperation(err.to_string()))?;
243 let status = resp.status();
244 if !status.is_success() {
245 return Err(qdrant_http_error("list collections", status, resp));
246 }
247
248 let data: Value = resp
249 .json()
250 .map_err(|err| VectorLifecycleError::QdrantOperation(err.to_string()))?;
251 let collections = parse_collection_names(&data)
252 .into_iter()
253 .filter(|name| name.starts_with(CODE_SYMBOL_COLLECTION_PREFIX))
254 .collect::<Vec<_>>();
255
256 let mut deleted = Vec::new();
257 for collection in collections {
258 if delete_qdrant_collection(&client, qdrant, &collection)? {
259 deleted.push(collection);
260 }
261 }
262 Ok(deleted)
263}
264
265pub fn resolve_lifecycle_qdrant_config(
266 source: &mut impl gobby_core::config::ConfigSource,
267) -> Option<QdrantConfig> {
268 gobby_core::config::resolve_qdrant_config(source)
269}
270
271pub fn lifecycle_status(
272 project_id: impl Into<String>,
273 collection_prefix: &str,
274 action: CodeSymbolVectorLifecycleAction,
275) -> CodeSymbolVectorLifecycleStatus {
276 let project_id = project_id.into();
277 CodeSymbolVectorLifecycleStatus {
278 collection: collection_name(collection_prefix, &project_id),
279 project_id,
280 action,
281 }
282}
283
284pub fn embed_text(config: &EmbeddingConfig, text: &str) -> Result<Vec<f32>, VectorLifecycleError> {
285 let client = reqwest::blocking::Client::builder()
286 .timeout(HTTP_TIMEOUT)
287 .build()
288 .map_err(|err| VectorLifecycleError::EmbeddingResponse(err.to_string()))?;
289
290 let body = json!({
291 "model": config.model,
292 "input": text,
293 });
294
295 let url = format!("{}/embeddings", config.api_base.trim_end_matches('/'));
296 let mut req = client.post(&url).json(&body);
297
298 if let Some(key) = &config.api_key {
299 req = req.header("Authorization", format!("Bearer {key}"));
300 }
301
302 let resp = req
303 .send()
304 .map_err(|err| VectorLifecycleError::EmbeddingResponse(err.to_string()))?;
305 if !resp.status().is_success() {
306 let status = resp.status().as_u16();
307 let body = resp.text().unwrap_or_default();
308 return Err(VectorLifecycleError::EmbeddingHttp { status, body });
309 }
310
311 let data: Value = resp
312 .json()
313 .map_err(|err| VectorLifecycleError::EmbeddingResponse(err.to_string()))?;
314 let embedding: Vec<f32> = data
315 .get("data")
316 .and_then(Value::as_array)
317 .and_then(|values| values.first())
318 .and_then(|value| value.get("embedding"))
319 .and_then(Value::as_array)
320 .ok_or_else(|| {
321 VectorLifecycleError::EmbeddingResponse("missing data[0].embedding array".to_string())
322 })?
323 .iter()
324 .map(|value| {
325 value.as_f64().map(|f| f as f32).ok_or_else(|| {
326 VectorLifecycleError::EmbeddingResponse(
327 "embedding array contains a non-number".to_string(),
328 )
329 })
330 })
331 .collect::<Result<Vec<_>, _>>()?;
332
333 if embedding.is_empty() {
334 Err(VectorLifecycleError::EmbeddingResponse(
335 "embedding vector was empty".to_string(),
336 ))
337 } else {
338 Ok(embedding)
339 }
340}
341
342pub fn embed_query(config: &EmbeddingConfig, text: &str) -> Option<Vec<f32>> {
343 embed_text(config, &format!("search_query: {text}")).ok()
344}
345
346pub fn vector_text_for_symbol(symbol: &Symbol) -> String {
347 let mut lines = vec![
348 format!("name: {}", symbol.name),
349 format!("qualified_name: {}", symbol.qualified_name),
350 format!("kind: {}", symbol.kind),
351 format!("language: {}", symbol.language),
352 format!("file_path: {}", symbol.file_path),
353 format!("range: {}-{}", symbol.line_start, symbol.line_end),
354 ];
355 if let Some(signature) = symbol
356 .signature
357 .as_deref()
358 .filter(|value| !value.trim().is_empty())
359 {
360 lines.push(format!("signature: {signature}"));
361 }
362 if let Some(docstring) = symbol
363 .docstring
364 .as_deref()
365 .filter(|value| !value.trim().is_empty())
366 {
367 lines.push(format!("docstring: {docstring}"));
368 }
369 if let Some(summary) = symbol
370 .summary
371 .as_deref()
372 .filter(|value| !value.trim().is_empty())
373 {
374 lines.push(format!("summary: {summary}"));
375 }
376 lines.join("\n")
377}
378
379pub fn vector_search(
380 config: &QdrantConfig,
381 collection: &str,
382 query_vector: &[f32],
383 limit: usize,
384) -> anyhow::Result<Vec<(String, f64)>> {
385 let request = SearchRequest {
386 vector: query_vector.to_vec(),
387 limit,
388 filter: None,
389 };
390 let (hits, _) = gobby_core::qdrant::with_qdrant(Some(config), Vec::new(), |config| {
391 gobby_core::qdrant::search(config, collection, request)
392 })?;
393 Ok(hits
394 .into_iter()
395 .map(|hit| (hit.id, f64::from(hit.score)))
396 .collect())
397}
398
399impl CodeSymbolVectorLifecycle {
400 pub fn new(
401 project_id: String,
402 qdrant: QdrantConfig,
403 embedding: EmbeddingConfig,
404 settings: CodeVectorSettings,
405 ) -> Result<Self, VectorLifecycleError> {
406 if qdrant
407 .url
408 .as_deref()
409 .filter(|url| !url.trim().is_empty())
410 .is_none()
411 {
412 return Err(VectorLifecycleError::MissingQdrantConfig);
413 }
414 if embedding.api_base.trim().is_empty() {
415 return Err(VectorLifecycleError::MissingEmbeddingConfig);
416 }
417
418 let collection = collection_name(CODE_SYMBOL_COLLECTION_PREFIX, &project_id);
419 let client = reqwest::blocking::Client::builder()
420 .timeout(HTTP_TIMEOUT)
421 .build()
422 .map_err(|err| VectorLifecycleError::QdrantOperation(err.to_string()))?;
423 Ok(Self {
424 project_id,
425 collection,
426 qdrant,
427 embedding,
428 settings,
429 probed_vector_size: None,
430 client,
431 })
432 }
433
434 pub fn collection(&self) -> &str {
435 &self.collection
436 }
437
438 pub fn ensure_collection(&mut self) -> Result<VectorCollectionSchema, VectorLifecycleError> {
439 let expected = self.expected_schema()?;
440 self.require_qdrant_boundary()?;
441 match self.get_collection_schema()? {
442 Some(found) => self.ensure_compatible_schema(expected, found),
443 None => {
444 self.create_collection(&expected)?;
445 Ok(expected)
446 }
447 }
448 }
449
450 pub fn sync_file_symbols(
451 &mut self,
452 file_path: &str,
453 symbols: &[Symbol],
454 ) -> Result<CodeSymbolVectorLifecycleOutput, VectorLifecycleError> {
455 self.ensure_collection()?;
456 let points = self.points_for_symbols(symbols)?;
457 self.delete_vectors(Some(file_path))?;
458 self.upsert_points(points)?;
459
460 Ok(self.output(
461 CodeSymbolVectorLifecycleAction::SyncFile,
462 Some(file_path.to_string()),
463 symbols.len(),
464 symbols.len(),
465 1,
466 ))
467 }
468
469 pub fn clear_project_vectors(
470 &mut self,
471 ) -> Result<CodeSymbolVectorLifecycleOutput, VectorLifecycleError> {
472 let expected = self.expected_schema()?;
473 self.require_qdrant_boundary()?;
474 let deleted = match self.get_collection_schema()? {
475 Some(found) => {
476 self.ensure_compatible_schema(expected, found)?;
477 self.delete_vectors(None)?;
478 1
479 }
480 None => 0,
481 };
482
483 Ok(self.output(CodeSymbolVectorLifecycleAction::Clear, None, 0, 0, deleted))
484 }
485
486 pub fn rebuild_symbols(
487 &mut self,
488 symbols: &[Symbol],
489 ) -> Result<CodeSymbolVectorLifecycleOutput, VectorLifecycleError> {
490 self.ensure_collection()?;
491 let points = self.points_for_symbols(symbols)?;
492 self.delete_vectors(None)?;
493 self.upsert_points(points)?;
494
495 Ok(self.output(
496 CodeSymbolVectorLifecycleAction::Rebuild,
497 None,
498 symbols.len(),
499 symbols.len(),
500 1,
501 ))
502 }
503
504 fn output(
505 &self,
506 action: CodeSymbolVectorLifecycleAction,
507 file_path: Option<String>,
508 symbols: usize,
509 vectors_upserted: usize,
510 vectors_deleted: usize,
511 ) -> CodeSymbolVectorLifecycleOutput {
512 CodeSymbolVectorLifecycleOutput {
513 project_id: self.project_id.clone(),
514 collection: self.collection.clone(),
515 action,
516 file_path,
517 symbols,
518 vectors_upserted,
519 vectors_deleted,
520 summary: format!(
521 "{vectors_upserted} vector(s) upserted, {vectors_deleted} delete operation(s) issued"
522 ),
523 }
524 }
525
526 fn expected_schema(&mut self) -> Result<VectorCollectionSchema, VectorLifecycleError> {
527 let size = match self.settings.vector_dim {
528 Some(size) => size,
529 None => match self.probed_vector_size {
530 Some(size) => size,
531 None => {
532 let size = embed_text(&self.embedding, DIMENSION_PROBE_TEXT)?.len();
533 self.probed_vector_size = Some(size);
534 size
535 }
536 },
537 };
538
539 Ok(VectorCollectionSchema {
540 size,
541 distance: VECTOR_DISTANCE_COSINE.to_string(),
542 })
543 }
544
545 fn require_qdrant_boundary(&self) -> Result<(), VectorLifecycleError> {
546 let ((), state) = gobby_core::qdrant::with_qdrant(Some(&self.qdrant), (), |_| Ok(()))
547 .map_err(|err| VectorLifecycleError::QdrantOperation(err.to_string()))?;
548 match state {
549 ServiceState::Available => Ok(()),
550 ServiceState::NotConfigured => Err(VectorLifecycleError::MissingQdrantConfig),
551 other => Err(VectorLifecycleError::QdrantOperation(format!(
552 "unexpected Qdrant service state: {other:?}"
553 ))),
554 }
555 }
556
557 fn ensure_compatible_schema(
558 &self,
559 expected: VectorCollectionSchema,
560 found: ExistingVectorCollectionSchema,
561 ) -> Result<VectorCollectionSchema, VectorLifecycleError> {
562 if found.size == Some(expected.size)
563 && found.distance.as_deref() == Some(&expected.distance)
564 {
565 return Ok(VectorCollectionSchema {
566 size: expected.size,
567 distance: expected.distance,
568 });
569 }
570
571 Err(VectorLifecycleError::DimensionMismatch {
572 collection: self.collection.clone(),
573 expected_size: expected.size,
574 found_size: found.size,
575 expected_distance: VECTOR_DISTANCE_COSINE,
576 found_distance: found.distance,
577 })
578 }
579
580 fn get_collection_schema(
581 &self,
582 ) -> Result<Option<ExistingVectorCollectionSchema>, VectorLifecycleError> {
583 let resp = self
584 .qdrant_request(
585 reqwest::Method::GET,
586 &format!("/collections/{}", self.collection),
587 )?
588 .send()
589 .map_err(|err| VectorLifecycleError::QdrantOperation(err.to_string()))?;
590 let status = resp.status();
591 if status == StatusCode::NOT_FOUND {
592 return Ok(None);
593 }
594 if !status.is_success() {
595 return Err(qdrant_http_error("get collection", status, resp));
596 }
597
598 let data: Value = resp
599 .json()
600 .map_err(|err| VectorLifecycleError::QdrantOperation(err.to_string()))?;
601 Ok(parse_collection_schema(&data))
602 }
603
604 fn create_collection(
605 &self,
606 schema: &VectorCollectionSchema,
607 ) -> Result<(), VectorLifecycleError> {
608 let body = json!({
609 "vectors": {
610 "size": schema.size,
611 "distance": schema.distance,
612 },
613 });
614 let resp = self
615 .qdrant_request(
616 reqwest::Method::PUT,
617 &format!("/collections/{}", self.collection),
618 )?
619 .json(&body)
620 .send()
621 .map_err(|err| VectorLifecycleError::QdrantOperation(err.to_string()))?;
622 if !resp.status().is_success() {
623 return Err(qdrant_http_error("create collection", resp.status(), resp));
624 }
625 Ok(())
626 }
627
628 fn delete_vectors(&self, file_path: Option<&str>) -> Result<(), VectorLifecycleError> {
629 let mut must = vec![json!({
630 "key": "project_id",
631 "match": {"value": self.project_id},
632 })];
633 if let Some(file_path) = file_path {
634 must.push(json!({
635 "key": "file_path",
636 "match": {"value": file_path},
637 }));
638 }
639 let body = json!({
640 "filter": {
641 "must": must,
642 },
643 });
644 let resp = self
645 .qdrant_request(
646 reqwest::Method::POST,
647 &format!("/collections/{}/points/delete", self.collection),
648 )?
649 .json(&body)
650 .send()
651 .map_err(|err| VectorLifecycleError::QdrantOperation(err.to_string()))?;
652 if !resp.status().is_success() {
653 return Err(qdrant_http_error("delete points", resp.status(), resp));
654 }
655 Ok(())
656 }
657
658 fn upsert_points(&self, points: Vec<UpsertRequest>) -> Result<(), VectorLifecycleError> {
659 if points.is_empty() {
660 return Ok(());
661 }
662 let ((), state) = gobby_core::qdrant::with_qdrant(Some(&self.qdrant), (), |config| {
663 gobby_core::qdrant::upsert(config, &self.collection, points)
664 })
665 .map_err(|err| VectorLifecycleError::QdrantOperation(err.to_string()))?;
666 match state {
667 ServiceState::Available => Ok(()),
668 ServiceState::NotConfigured => Err(VectorLifecycleError::MissingQdrantConfig),
669 other => Err(VectorLifecycleError::QdrantOperation(format!(
670 "unexpected Qdrant service state: {other:?}"
671 ))),
672 }
673 }
674
675 fn points_for_symbols(
676 &self,
677 symbols: &[Symbol],
678 ) -> Result<Vec<UpsertRequest>, VectorLifecycleError> {
679 symbols
680 .iter()
681 .map(|symbol| {
682 let vector = embed_text(&self.embedding, &vector_text_for_symbol(symbol))?;
683 let payload = payload_map(CodeSymbolVectorPayload::from_symbol(symbol))?;
684 Ok(UpsertRequest {
685 id: symbol.id.clone(),
686 vector,
687 payload,
688 })
689 })
690 .collect()
691 }
692
693 fn qdrant_request(
694 &self,
695 method: reqwest::Method,
696 path: &str,
697 ) -> Result<reqwest::blocking::RequestBuilder, VectorLifecycleError> {
698 qdrant_request_for_config(&self.client, &self.qdrant, method, path)
699 }
700}
701
702pub fn fetch_symbols_for_file(
703 conn: &mut impl GenericClient,
704 project_id: &str,
705 file_path: &str,
706) -> anyhow::Result<Vec<Symbol>> {
707 let columns = db::symbol_select_columns("");
708 conn.query(
709 &format!(
710 "SELECT {columns} FROM code_symbols
711 WHERE project_id = $1 AND file_path = $2
712 ORDER BY file_path, byte_start, id"
713 ),
714 &[&project_id, &file_path],
715 )?
716 .into_iter()
717 .map(|row| Symbol::from_row(&row))
718 .collect()
719}
720
721pub fn fetch_symbols_for_project(
722 conn: &mut impl GenericClient,
723 project_id: &str,
724) -> anyhow::Result<Vec<Symbol>> {
725 let columns = db::symbol_select_columns("");
726 conn.query(
727 &format!(
728 "SELECT {columns} FROM code_symbols
729 WHERE project_id = $1
730 ORDER BY file_path, byte_start, id"
731 ),
732 &[&project_id],
733 )?
734 .into_iter()
735 .map(|row| Symbol::from_row(&row))
736 .collect()
737}
738
739fn payload_map(
740 payload: CodeSymbolVectorPayload,
741) -> Result<Map<String, Value>, VectorLifecycleError> {
742 match serde_json::to_value(payload)
743 .map_err(|err| VectorLifecycleError::QdrantOperation(err.to_string()))?
744 {
745 Value::Object(map) => Ok(map),
746 _ => Err(VectorLifecycleError::QdrantOperation(
747 "vector payload did not serialize to an object".to_string(),
748 )),
749 }
750}
751
752fn parse_collection_schema(data: &Value) -> Option<ExistingVectorCollectionSchema> {
753 let vectors = data.pointer("/result/config/params/vectors")?;
754 let size = vectors
755 .get("size")
756 .and_then(Value::as_u64)
757 .map(|size| size as usize);
758 let distance = vectors
759 .get("distance")
760 .and_then(Value::as_str)
761 .map(str::to_string);
762 Some(ExistingVectorCollectionSchema { size, distance })
763}
764
765fn parse_collection_names(data: &Value) -> Vec<String> {
766 data.pointer("/result/collections")
767 .and_then(Value::as_array)
768 .map(|collections| {
769 collections
770 .iter()
771 .filter_map(|collection| {
772 collection
773 .get("name")
774 .and_then(Value::as_str)
775 .map(str::to_string)
776 })
777 .collect()
778 })
779 .unwrap_or_default()
780}
781
782fn qdrant_http_client() -> Result<reqwest::blocking::Client, VectorLifecycleError> {
783 reqwest::blocking::Client::builder()
784 .timeout(HTTP_TIMEOUT)
785 .build()
786 .map_err(|err| VectorLifecycleError::QdrantOperation(err.to_string()))
787}
788
789fn qdrant_request_for_config(
790 client: &reqwest::blocking::Client,
791 qdrant: &QdrantConfig,
792 method: reqwest::Method,
793 path: &str,
794) -> Result<reqwest::blocking::RequestBuilder, VectorLifecycleError> {
795 let base = qdrant
796 .url
797 .as_deref()
798 .ok_or(VectorLifecycleError::MissingQdrantConfig)?
799 .trim_end_matches('/');
800 let url = format!("{base}{path}");
801 let mut req = client.request(method, url);
802 if let Some(key) = &qdrant.api_key {
803 req = req.header("api-key", key);
804 }
805 Ok(req)
806}
807
808fn delete_qdrant_collection(
809 client: &reqwest::blocking::Client,
810 qdrant: &QdrantConfig,
811 collection: &str,
812) -> Result<bool, VectorLifecycleError> {
813 let resp = qdrant_request_for_config(
814 client,
815 qdrant,
816 reqwest::Method::DELETE,
817 &format!("/collections/{collection}"),
818 )?
819 .send()
820 .map_err(|err| VectorLifecycleError::QdrantOperation(err.to_string()))?;
821 let status = resp.status();
822 if status == StatusCode::NOT_FOUND {
823 return Ok(false);
824 }
825 if !status.is_success() {
826 return Err(qdrant_http_error("delete collection", status, resp));
827 }
828 Ok(true)
829}
830
831fn qdrant_http_error(
832 operation: &'static str,
833 status: StatusCode,
834 resp: reqwest::blocking::Response,
835) -> VectorLifecycleError {
836 VectorLifecycleError::QdrantHttp {
837 operation,
838 status: status.as_u16(),
839 body: resp.text().unwrap_or_default(),
840 }
841}
842
843pub fn search_code_symbols(
844 ctx: &Context,
845 request: &CodeSymbolVectorSearchRequest,
846) -> Vec<CodeSymbolVectorSearchHit> {
847 let qdrant_config = match &ctx.qdrant {
848 Some(c) => c,
849 None => return vec![],
850 };
851
852 let embedding_config = match &ctx.embedding {
853 Some(c) => c,
854 None => return vec![],
855 };
856
857 let embedding = match embed_query(embedding_config, &request.query) {
858 Some(e) => e,
859 None => return vec![],
860 };
861
862 let collection = collection_name(&request.collection_prefix, &request.project_id);
863 vector_search(qdrant_config, &collection, &embedding, request.limit)
864 .unwrap_or_default()
865 .into_iter()
866 .map(|(symbol_id, score)| CodeSymbolVectorSearchHit { symbol_id, score })
867 .collect()
868}
869
870pub fn semantic_search(ctx: &Context, query: &str, limit: usize) -> Vec<(String, f64)> {
871 if ctx.qdrant.is_none() {
872 return vec![];
873 }
874
875 let request = CodeSymbolVectorSearchRequest {
876 project_id: ctx.project_id.clone(),
877 query: query.to_string(),
878 limit,
879 collection_prefix: CODE_SYMBOL_COLLECTION_PREFIX.to_string(),
880 };
881
882 search_code_symbols(ctx, &request)
883 .into_iter()
884 .map(|hit| (hit.symbol_id, hit.score))
885 .collect()
886}
887
888#[cfg(test)]
889mod tests {
890 use super::*;
891 use crate::config::{CodeVectorSettings, QdrantConfig};
892 use crate::models::{SOURCE_SYSTEM_GCODE, Symbol};
893 use serde_json::{Value, json};
894 use std::io::{Read, Write};
895 use std::net::TcpListener;
896 use std::thread;
897
898 fn test_symbol(summary: Option<String>) -> Symbol {
899 Symbol {
900 id: "symbol-1".to_string(),
901 project_id: "project-1".to_string(),
902 file_path: "src/lib.rs".to_string(),
903 name: "run".to_string(),
904 qualified_name: "crate::run".to_string(),
905 kind: "function".to_string(),
906 language: "rust".to_string(),
907 byte_start: 10,
908 byte_end: 40,
909 line_start: 3,
910 line_end: 5,
911 signature: None,
912 docstring: None,
913 parent_symbol_id: None,
914 content_hash: "hash".to_string(),
915 summary,
916 created_at: String::new(),
917 updated_at: String::new(),
918 }
919 }
920
921 #[test]
922 fn payloads_carry_provenance_metadata() {
923 let payload = CodeSymbolVectorPayload::from_symbol(&test_symbol(Some("does work".into())));
924
925 assert_eq!(payload.provenance, ProjectionProvenance::Extracted);
926 assert_eq!(payload.confidence, Some(1.0));
927 assert_eq!(payload.source_system, SOURCE_SYSTEM_GCODE);
928 assert_eq!(payload.source_file_path, "src/lib.rs");
929 assert_eq!(payload.source_line_start, 3);
930 assert_eq!(payload.source_line_end, 5);
931 assert_eq!(payload.source_byte_start, 10);
932 assert_eq!(payload.source_byte_end, 40);
933 assert_eq!(payload.source_line, 3);
934 assert_eq!(payload.source_symbol_id, "symbol-1");
935 assert_eq!(payload.summary.as_deref(), Some("does work"));
936 assert_eq!(payload.signature, None);
937 assert_eq!(payload.docstring, None);
938
939 let value = serde_json::to_value(payload).expect("payload serializes");
940 assert_eq!(value["provenance"], "EXTRACTED");
941 assert_eq!(value["confidence"], 1.0);
942 assert_eq!(value["source_system"], SOURCE_SYSTEM_GCODE);
943 assert_eq!(value["source_file_path"], "src/lib.rs");
944 assert_eq!(value["source_line_start"], 3);
945 assert_eq!(value["source_line_end"], 5);
946 assert_eq!(value["source_byte_start"], 10);
947 assert_eq!(value["source_byte_end"], 40);
948 assert_eq!(value["source_symbol_id"], "symbol-1");
949 }
950
951 #[test]
952 fn summaries_are_optional_enrichment() {
953 let symbol = test_symbol(None);
954 let payload = CodeSymbolVectorPayload::from_symbol(&symbol);
955 let vector_text = vector_text_for_symbol(&symbol);
956 let value = serde_json::to_value(payload).expect("payload serializes");
957
958 assert!(value.get("summary").is_none());
959 assert!(vector_text.contains("name: run"));
960 assert!(!vector_text.contains("summary:"));
961 }
962
963 #[test]
964 fn collection_name_compatibility() {
965 assert_eq!(
966 collection_name(CODE_SYMBOL_COLLECTION_PREFIX, "project-1"),
967 "code_symbols_project-1"
968 );
969 }
970
971 #[test]
972 fn delete_project_collection_targets_only_project_collection() {
973 let (qdrant_url, handle) = spawn_http_responses(vec![(200, json!({"result": true}))]);
974 let deleted = delete_project_collection(
975 &QdrantConfig {
976 url: Some(qdrant_url),
977 api_key: Some("qdrant-key".to_string()),
978 },
979 "project-1",
980 )
981 .expect("delete collection");
982 let requests = handle.join().expect("qdrant requests");
983
984 assert!(deleted);
985 assert_eq!(requests.len(), 1);
986 assert!(requests[0].contains("DELETE /collections/code_symbols_project-1 HTTP/1.1"));
987 assert!(requests[0].contains("api-key: qdrant-key"));
988 assert!(!requests[0].contains("project-2"));
989 }
990
991 #[test]
992 fn delete_prefixed_collections_deletes_only_code_symbol_collections() {
993 let (qdrant_url, handle) = spawn_http_responses(vec![
994 (
995 200,
996 json!({
997 "result": {
998 "collections": [
999 {"name": "code_symbols_project-1"},
1000 {"name": "memory_vectors"},
1001 {"name": "code_symbols_project-2"}
1002 ]
1003 }
1004 }),
1005 ),
1006 (200, json!({"result": true})),
1007 (200, json!({"result": true})),
1008 ]);
1009 let deleted = delete_code_symbol_collections_with_prefix(&QdrantConfig {
1010 url: Some(qdrant_url),
1011 api_key: None,
1012 })
1013 .expect("delete prefixed collections");
1014 let requests = handle.join().expect("qdrant requests");
1015
1016 assert_eq!(
1017 deleted,
1018 vec![
1019 "code_symbols_project-1".to_string(),
1020 "code_symbols_project-2".to_string()
1021 ]
1022 );
1023 assert_eq!(requests.len(), 3);
1024 assert!(requests[0].contains("GET /collections HTTP/1.1"));
1025 assert!(requests[1].contains("DELETE /collections/code_symbols_project-1 HTTP/1.1"));
1026 assert!(requests[2].contains("DELETE /collections/code_symbols_project-2 HTTP/1.1"));
1027 assert!(
1028 requests
1029 .iter()
1030 .all(|request| !request.contains("DELETE /collections/memory_vectors"))
1031 );
1032 }
1033
1034 #[test]
1035 fn embedding_request_response() {
1036 let (base_url, handle) = spawn_http_responses(vec![(
1037 200,
1038 json!({"data": [{"embedding": [0.25, 0.5, 0.75]}]}),
1039 )]);
1040 let config = EmbeddingConfig {
1041 api_base: format!("{base_url}/v1"),
1042 model: "embed-small".to_string(),
1043 api_key: Some("embedding-key".to_string()),
1044 };
1045
1046 let embedding = embed_text(&config, "dimension_probe").expect("embedding response");
1047 let requests = handle.join().expect("server thread");
1048
1049 assert_eq!(embedding, vec![0.25, 0.5, 0.75]);
1050 assert_eq!(requests.len(), 1);
1051 assert!(requests[0].contains("POST /v1/embeddings HTTP/1.1"));
1052 assert!(requests[0].contains("authorization: Bearer embedding-key"));
1053 assert!(requests[0].contains(r#""model":"embed-small""#));
1054 assert!(requests[0].contains(r#""input":"dimension_probe""#));
1055 }
1056
1057 #[test]
1058 fn ensure_collection_resolves_vector_size_and_distance() {
1059 let (embedding_url, embedding_handle) = spawn_http_responses(vec![(
1060 200,
1061 json!({"data": [{"embedding": [0.1, 0.2, 0.3]}]}),
1062 )]);
1063 let (qdrant_url, qdrant_handle) = spawn_http_responses(vec![
1064 (404, json!({"status": "not found"})),
1065 (200, json!({"result": true})),
1066 (
1067 200,
1068 json!({"result": {"config": {"params": {"vectors": {"size": 3, "distance": "Cosine"}}}}}),
1069 ),
1070 ]);
1071 let mut lifecycle = CodeSymbolVectorLifecycle::new(
1072 "project-1".to_string(),
1073 QdrantConfig {
1074 url: Some(qdrant_url),
1075 api_key: None,
1076 },
1077 EmbeddingConfig {
1078 api_base: format!("{embedding_url}/v1"),
1079 model: "embed-small".to_string(),
1080 api_key: None,
1081 },
1082 CodeVectorSettings { vector_dim: None },
1083 )
1084 .expect("lifecycle");
1085
1086 let created = lifecycle.ensure_collection().expect("create collection");
1087 let reused = lifecycle.ensure_collection().expect("reuse collection");
1088 let embedding_requests = embedding_handle.join().expect("embedding requests");
1089 let qdrant_requests = qdrant_handle.join().expect("qdrant requests");
1090
1091 assert_eq!(created.size, 3);
1092 assert_eq!(created.distance, VECTOR_DISTANCE_COSINE);
1093 assert_eq!(reused.size, 3);
1094 assert_eq!(embedding_requests.len(), 1, "dimension probe is cached");
1095 assert!(qdrant_requests[1].contains("PUT /collections/code_symbols_project-1 HTTP/1.1"));
1096 assert!(qdrant_requests[1].contains(r#""size":3"#));
1097 assert!(qdrant_requests[1].contains(r#""distance":"Cosine""#));
1098
1099 let (explicit_qdrant_url, explicit_handle) = spawn_http_responses(vec![
1100 (404, json!({"status": "not found"})),
1101 (200, json!({"result": true})),
1102 ]);
1103 let mut explicit = CodeSymbolVectorLifecycle::new(
1104 "project-1".to_string(),
1105 QdrantConfig {
1106 url: Some(explicit_qdrant_url),
1107 api_key: None,
1108 },
1109 EmbeddingConfig {
1110 api_base: "http://127.0.0.1:9/v1".to_string(),
1111 model: "unused".to_string(),
1112 api_key: None,
1113 },
1114 CodeVectorSettings {
1115 vector_dim: Some(1536),
1116 },
1117 )
1118 .expect("lifecycle with explicit size");
1119
1120 let schema = explicit.ensure_collection().expect("explicit size create");
1121 let explicit_requests = explicit_handle.join().expect("explicit qdrant requests");
1122 assert_eq!(schema.size, 1536);
1123 assert!(explicit_requests[1].contains(r#""size":1536"#));
1124 }
1125
1126 #[test]
1127 fn lifecycle_http_scoped_to_module() {
1128 let manifest_dir = std::path::Path::new(env!("CARGO_MANIFEST_DIR"));
1129 let src_dir = manifest_dir.join("src");
1130 let mut offenders = Vec::new();
1131
1132 fn visit(path: &std::path::Path, offenders: &mut Vec<std::path::PathBuf>) {
1133 for entry in std::fs::read_dir(path).expect("read source directory") {
1134 let entry = entry.expect("source entry");
1135 let path = entry.path();
1136 if path.is_dir() {
1137 visit(&path, offenders);
1138 continue;
1139 }
1140 if path.extension().and_then(|ext| ext.to_str()) != Some("rs") {
1141 continue;
1142 }
1143 let source = std::fs::read_to_string(&path).expect("read source file");
1144 let lifecycle_rest = [
1145 "/points/delete",
1146 "points/delete",
1147 "collections/{collection}",
1148 "/collections/{collection}",
1149 ];
1150 if lifecycle_rest.iter().any(|needle| source.contains(needle))
1151 && !path.ends_with("vector/code_symbols.rs")
1152 {
1153 offenders.push(path);
1154 }
1155 }
1156 }
1157
1158 visit(&src_dir, &mut offenders);
1159 assert!(
1160 offenders.is_empty(),
1161 "Qdrant lifecycle REST must stay scoped to vector/code_symbols.rs: {offenders:?}"
1162 );
1163 }
1164
1165 #[test]
1166 fn routes_through_gobby_core_qdrant() {
1167 let source = include_str!("code_symbols.rs");
1168 assert!(source.contains("gobby_core::config::resolve_qdrant_config"));
1169 assert!(source.contains("gobby_core::qdrant::with_qdrant"));
1170 assert!(source.contains("gobby_core::qdrant::collection_name"));
1171 assert!(source.contains("CollectionScope::Custom"));
1172 assert!(source.contains("gobby_core::qdrant::search"));
1173 assert!(source.contains("gobby_core::qdrant::upsert"));
1174 }
1175
1176 fn spawn_http_responses(
1177 responses: Vec<(u16, Value)>,
1178 ) -> (String, thread::JoinHandle<Vec<String>>) {
1179 let listener = TcpListener::bind("127.0.0.1:0").expect("bind test server");
1180 let addr = listener.local_addr().expect("local addr");
1181 let handle = thread::spawn(move || {
1182 let mut requests = Vec::new();
1183 for (status, body) in responses {
1184 let (mut stream, _) = listener.accept().expect("accept request");
1185 requests.push(read_http_request(&mut stream));
1186
1187 let body = body.to_string();
1188 write!(
1189 stream,
1190 "HTTP/1.1 {status} OK\r\nContent-Type: application/json\r\nContent-Length: {}\r\nConnection: close\r\n\r\n{body}",
1191 body.len()
1192 )
1193 .expect("write response");
1194 }
1195 requests
1196 });
1197
1198 (format!("http://{addr}"), handle)
1199 }
1200
1201 fn read_http_request(stream: &mut impl Read) -> String {
1202 let mut request = Vec::new();
1203 let mut buffer = [0; 4096];
1204 let mut expected_len = None;
1205
1206 loop {
1207 let n = stream.read(&mut buffer).expect("read request");
1208 if n == 0 {
1209 break;
1210 }
1211 request.extend_from_slice(&buffer[..n]);
1212
1213 if expected_len.is_none()
1214 && let Some(header_end) =
1215 request.windows(4).position(|window| window == b"\r\n\r\n")
1216 {
1217 let headers = String::from_utf8_lossy(&request[..header_end]);
1218 let content_len = headers
1219 .lines()
1220 .find_map(|line| {
1221 line.to_ascii_lowercase()
1222 .strip_prefix("content-length: ")
1223 .and_then(|value| value.parse::<usize>().ok())
1224 })
1225 .unwrap_or(0);
1226 expected_len = Some(header_end + 4 + content_len);
1227 }
1228
1229 if let Some(expected_len) = expected_len
1230 && request.len() >= expected_len
1231 {
1232 break;
1233 }
1234 }
1235
1236 String::from_utf8_lossy(&request).into_owned()
1237 }
1238}