Skip to main content

gobby_code/vector/
code_symbols.rs

1use postgres::GenericClient;
2use reqwest::StatusCode;
3use serde::{Deserialize, Serialize};
4use serde_json::{Map, Value, json};
5use std::fmt;
6use std::time::Duration;
7
8use crate::config::{
9    CODE_SYMBOL_COLLECTION_PREFIX, CodeVectorSettings, Context, EmbeddingConfig, QdrantConfig,
10};
11use crate::db;
12use crate::models::{ProjectionMetadata, ProjectionProvenance, Symbol};
13use gobby_core::degradation::ServiceState;
14use gobby_core::qdrant::{CollectionScope, SearchRequest, UpsertRequest};
15
16// Keep code-symbol collections compatible with the Python daemon's Qdrant schema.
17pub const VECTOR_DISTANCE_COSINE: &str = "Cosine";
18const DIMENSION_PROBE_TEXT: &str = "dimension_probe";
19const HTTP_TIMEOUT: Duration = Duration::from_secs(10);
20
21#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
22pub struct CodeSymbolVectorSearchRequest {
23    pub project_id: String,
24    pub query: String,
25    pub limit: usize,
26    pub collection_prefix: String,
27}
28
29#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
30pub struct CodeSymbolVectorSearchHit {
31    pub symbol_id: String,
32    pub score: f64,
33}
34
35#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
36pub struct CodeSymbolVectorPayload {
37    pub project_id: String,
38    pub file_path: String,
39    pub symbol_id: String,
40    pub name: String,
41    pub kind: String,
42    pub language: String,
43    pub line_start: usize,
44    pub line_end: usize,
45    pub byte_start: usize,
46    pub byte_end: usize,
47    #[serde(skip_serializing_if = "Option::is_none")]
48    pub signature: Option<String>,
49    #[serde(skip_serializing_if = "Option::is_none")]
50    pub docstring: Option<String>,
51    pub provenance: ProjectionProvenance,
52    #[serde(skip_serializing_if = "Option::is_none")]
53    pub confidence: Option<f64>,
54    pub source_system: String,
55    pub source_file_path: String,
56    pub source_line: usize,
57    pub source_line_start: usize,
58    pub source_line_end: usize,
59    pub source_byte_start: usize,
60    pub source_byte_end: usize,
61    pub source_symbol_id: String,
62    #[serde(skip_serializing_if = "Option::is_none")]
63    pub summary: Option<String>,
64}
65
66impl CodeSymbolVectorPayload {
67    pub fn from_symbol(symbol: &Symbol) -> Self {
68        let metadata = ProjectionMetadata::gcode_extracted()
69            .with_source_file_path(&symbol.file_path)
70            .with_source_line(symbol.line_start)
71            .with_source_symbol_id(&symbol.id);
72
73        Self {
74            project_id: symbol.project_id.clone(),
75            file_path: symbol.file_path.clone(),
76            symbol_id: symbol.id.clone(),
77            name: symbol.name.clone(),
78            kind: symbol.kind.clone(),
79            language: symbol.language.clone(),
80            line_start: symbol.line_start,
81            line_end: symbol.line_end,
82            byte_start: symbol.byte_start,
83            byte_end: symbol.byte_end,
84            signature: symbol.signature.clone(),
85            docstring: symbol.docstring.clone(),
86            provenance: metadata.provenance,
87            confidence: metadata.confidence,
88            source_system: metadata.source_system,
89            source_file_path: metadata
90                .source_file_path
91                .unwrap_or_else(|| symbol.file_path.clone()),
92            source_line: metadata.source_line.unwrap_or(symbol.line_start),
93            source_line_start: symbol.line_start,
94            source_line_end: symbol.line_end,
95            source_byte_start: symbol.byte_start,
96            source_byte_end: symbol.byte_end,
97            source_symbol_id: metadata
98                .source_symbol_id
99                .unwrap_or_else(|| symbol.id.clone()),
100            summary: symbol.summary.clone(),
101        }
102    }
103}
104
105#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
106#[serde(rename_all = "snake_case")]
107pub enum CodeSymbolVectorLifecycleAction {
108    Ensure,
109    SyncFile,
110    Clear,
111    Rebuild,
112}
113
114#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
115pub struct CodeSymbolVectorLifecycleStatus {
116    pub project_id: String,
117    pub collection: String,
118    pub action: CodeSymbolVectorLifecycleAction,
119}
120
121#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
122pub struct VectorCollectionSchema {
123    pub size: usize,
124    pub distance: String,
125}
126
127#[derive(Debug, Clone, PartialEq, Eq)]
128struct ExistingVectorCollectionSchema {
129    size: Option<usize>,
130    distance: Option<String>,
131}
132
133#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
134pub struct CodeSymbolVectorLifecycleOutput {
135    pub project_id: String,
136    pub collection: String,
137    pub action: CodeSymbolVectorLifecycleAction,
138    pub file_path: Option<String>,
139    pub symbols: usize,
140    pub vectors_upserted: usize,
141    pub vectors_deleted: usize,
142    pub summary: String,
143}
144
145#[derive(Debug, Clone, PartialEq, Eq)]
146pub enum VectorLifecycleError {
147    MissingQdrantConfig,
148    MissingEmbeddingConfig,
149    EmbeddingHttp {
150        status: u16,
151        body: String,
152    },
153    EmbeddingResponse(String),
154    QdrantHttp {
155        operation: &'static str,
156        status: u16,
157        body: String,
158    },
159    QdrantOperation(String),
160    DimensionMismatch {
161        collection: String,
162        expected_size: usize,
163        found_size: Option<usize>,
164        expected_distance: &'static str,
165        found_distance: Option<String>,
166    },
167}
168
169impl fmt::Display for VectorLifecycleError {
170    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
171        match self {
172            Self::MissingQdrantConfig => {
173                write!(f, "Qdrant config is required for vector lifecycle commands")
174            }
175            Self::MissingEmbeddingConfig => write!(
176                f,
177                "embedding config is required for vector lifecycle commands"
178            ),
179            Self::EmbeddingHttp { status, body } => {
180                write!(f, "embedding request failed: HTTP {status}: {body}")
181            }
182            Self::EmbeddingResponse(reason) => {
183                write!(f, "embedding response was invalid: {reason}")
184            }
185            Self::QdrantHttp {
186                operation,
187                status,
188                body,
189            } => write!(f, "Qdrant {operation} failed: HTTP {status}: {body}"),
190            Self::QdrantOperation(reason) => write!(f, "Qdrant operation failed: {reason}"),
191            Self::DimensionMismatch {
192                collection,
193                expected_size,
194                found_size,
195                expected_distance,
196                found_distance,
197            } => write!(
198                f,
199                "Qdrant collection `{collection}` has incompatible vector schema: expected size {expected_size} distance {expected_distance}, found size {} distance {}. Refusing to migrate, drop, or recreate the collection.",
200                found_size
201                    .map(|value| value.to_string())
202                    .unwrap_or_else(|| "unknown".to_string()),
203                found_distance.as_deref().unwrap_or("unknown")
204            ),
205        }
206    }
207}
208
209impl std::error::Error for VectorLifecycleError {}
210
211#[derive(Debug)]
212pub struct CodeSymbolVectorLifecycle {
213    project_id: String,
214    collection: String,
215    qdrant: QdrantConfig,
216    embedding: EmbeddingConfig,
217    settings: CodeVectorSettings,
218    probed_vector_size: Option<usize>,
219    client: reqwest::blocking::Client,
220}
221
222pub fn collection_name(collection_prefix: &str, project_id: &str) -> String {
223    let collection = format!("{collection_prefix}{project_id}");
224    gobby_core::qdrant::collection_name("gcode", CollectionScope::Custom(&collection))
225}
226
227pub fn delete_project_collection(
228    qdrant: &QdrantConfig,
229    project_id: &str,
230) -> Result<bool, VectorLifecycleError> {
231    let client = qdrant_http_client()?;
232    let collection = collection_name(CODE_SYMBOL_COLLECTION_PREFIX, project_id);
233    delete_qdrant_collection(&client, qdrant, &collection)
234}
235
236pub fn delete_code_symbol_collections_with_prefix(
237    qdrant: &QdrantConfig,
238) -> Result<Vec<String>, VectorLifecycleError> {
239    let client = qdrant_http_client()?;
240    let resp = qdrant_request_for_config(&client, qdrant, reqwest::Method::GET, "/collections")?
241        .send()
242        .map_err(|err| VectorLifecycleError::QdrantOperation(err.to_string()))?;
243    let status = resp.status();
244    if !status.is_success() {
245        return Err(qdrant_http_error("list collections", status, resp));
246    }
247
248    let data: Value = resp
249        .json()
250        .map_err(|err| VectorLifecycleError::QdrantOperation(err.to_string()))?;
251    let collections = parse_collection_names(&data)
252        .into_iter()
253        .filter(|name| name.starts_with(CODE_SYMBOL_COLLECTION_PREFIX))
254        .collect::<Vec<_>>();
255
256    let mut deleted = Vec::new();
257    for collection in collections {
258        if delete_qdrant_collection(&client, qdrant, &collection)? {
259            deleted.push(collection);
260        }
261    }
262    Ok(deleted)
263}
264
265pub fn resolve_lifecycle_qdrant_config(
266    source: &mut impl gobby_core::config::ConfigSource,
267) -> Option<QdrantConfig> {
268    gobby_core::config::resolve_qdrant_config(source)
269}
270
271pub fn lifecycle_status(
272    project_id: impl Into<String>,
273    collection_prefix: &str,
274    action: CodeSymbolVectorLifecycleAction,
275) -> CodeSymbolVectorLifecycleStatus {
276    let project_id = project_id.into();
277    CodeSymbolVectorLifecycleStatus {
278        collection: collection_name(collection_prefix, &project_id),
279        project_id,
280        action,
281    }
282}
283
284pub fn embed_text(config: &EmbeddingConfig, text: &str) -> Result<Vec<f32>, VectorLifecycleError> {
285    let client = reqwest::blocking::Client::builder()
286        .timeout(HTTP_TIMEOUT)
287        .build()
288        .map_err(|err| VectorLifecycleError::EmbeddingResponse(err.to_string()))?;
289
290    let body = json!({
291        "model": config.model,
292        "input": text,
293    });
294
295    let url = format!("{}/embeddings", config.api_base.trim_end_matches('/'));
296    let mut req = client.post(&url).json(&body);
297
298    if let Some(key) = &config.api_key {
299        req = req.header("Authorization", format!("Bearer {key}"));
300    }
301
302    let resp = req
303        .send()
304        .map_err(|err| VectorLifecycleError::EmbeddingResponse(err.to_string()))?;
305    if !resp.status().is_success() {
306        let status = resp.status().as_u16();
307        let body = resp.text().unwrap_or_default();
308        return Err(VectorLifecycleError::EmbeddingHttp { status, body });
309    }
310
311    let data: Value = resp
312        .json()
313        .map_err(|err| VectorLifecycleError::EmbeddingResponse(err.to_string()))?;
314    let embedding: Vec<f32> = data
315        .get("data")
316        .and_then(Value::as_array)
317        .and_then(|values| values.first())
318        .and_then(|value| value.get("embedding"))
319        .and_then(Value::as_array)
320        .ok_or_else(|| {
321            VectorLifecycleError::EmbeddingResponse("missing data[0].embedding array".to_string())
322        })?
323        .iter()
324        .map(|value| {
325            value.as_f64().map(|f| f as f32).ok_or_else(|| {
326                VectorLifecycleError::EmbeddingResponse(
327                    "embedding array contains a non-number".to_string(),
328                )
329            })
330        })
331        .collect::<Result<Vec<_>, _>>()?;
332
333    if embedding.is_empty() {
334        Err(VectorLifecycleError::EmbeddingResponse(
335            "embedding vector was empty".to_string(),
336        ))
337    } else {
338        Ok(embedding)
339    }
340}
341
342pub fn embed_query(config: &EmbeddingConfig, text: &str) -> Option<Vec<f32>> {
343    embed_text(config, &format!("search_query: {text}")).ok()
344}
345
346pub fn vector_text_for_symbol(symbol: &Symbol) -> String {
347    let mut lines = vec![
348        format!("name: {}", symbol.name),
349        format!("qualified_name: {}", symbol.qualified_name),
350        format!("kind: {}", symbol.kind),
351        format!("language: {}", symbol.language),
352        format!("file_path: {}", symbol.file_path),
353        format!("range: {}-{}", symbol.line_start, symbol.line_end),
354    ];
355    if let Some(signature) = symbol
356        .signature
357        .as_deref()
358        .filter(|value| !value.trim().is_empty())
359    {
360        lines.push(format!("signature: {signature}"));
361    }
362    if let Some(docstring) = symbol
363        .docstring
364        .as_deref()
365        .filter(|value| !value.trim().is_empty())
366    {
367        lines.push(format!("docstring: {docstring}"));
368    }
369    if let Some(summary) = symbol
370        .summary
371        .as_deref()
372        .filter(|value| !value.trim().is_empty())
373    {
374        lines.push(format!("summary: {summary}"));
375    }
376    lines.join("\n")
377}
378
379pub fn vector_search(
380    config: &QdrantConfig,
381    collection: &str,
382    query_vector: &[f32],
383    limit: usize,
384) -> anyhow::Result<Vec<(String, f64)>> {
385    let request = SearchRequest {
386        vector: query_vector.to_vec(),
387        limit,
388        filter: None,
389    };
390    let (hits, _) = gobby_core::qdrant::with_qdrant(Some(config), Vec::new(), |config| {
391        gobby_core::qdrant::search(config, collection, request)
392    })?;
393    Ok(hits
394        .into_iter()
395        .map(|hit| (hit.id, f64::from(hit.score)))
396        .collect())
397}
398
399impl CodeSymbolVectorLifecycle {
400    pub fn new(
401        project_id: String,
402        qdrant: QdrantConfig,
403        embedding: EmbeddingConfig,
404        settings: CodeVectorSettings,
405    ) -> Result<Self, VectorLifecycleError> {
406        if qdrant
407            .url
408            .as_deref()
409            .filter(|url| !url.trim().is_empty())
410            .is_none()
411        {
412            return Err(VectorLifecycleError::MissingQdrantConfig);
413        }
414        if embedding.api_base.trim().is_empty() {
415            return Err(VectorLifecycleError::MissingEmbeddingConfig);
416        }
417
418        let collection = collection_name(CODE_SYMBOL_COLLECTION_PREFIX, &project_id);
419        let client = reqwest::blocking::Client::builder()
420            .timeout(HTTP_TIMEOUT)
421            .build()
422            .map_err(|err| VectorLifecycleError::QdrantOperation(err.to_string()))?;
423        Ok(Self {
424            project_id,
425            collection,
426            qdrant,
427            embedding,
428            settings,
429            probed_vector_size: None,
430            client,
431        })
432    }
433
434    pub fn collection(&self) -> &str {
435        &self.collection
436    }
437
438    pub fn ensure_collection(&mut self) -> Result<VectorCollectionSchema, VectorLifecycleError> {
439        let expected = self.expected_schema()?;
440        self.require_qdrant_boundary()?;
441        match self.get_collection_schema()? {
442            Some(found) => self.ensure_compatible_schema(expected, found),
443            None => {
444                self.create_collection(&expected)?;
445                Ok(expected)
446            }
447        }
448    }
449
450    pub fn sync_file_symbols(
451        &mut self,
452        file_path: &str,
453        symbols: &[Symbol],
454    ) -> Result<CodeSymbolVectorLifecycleOutput, VectorLifecycleError> {
455        self.ensure_collection()?;
456        let points = self.points_for_symbols(symbols)?;
457        self.delete_vectors(Some(file_path))?;
458        self.upsert_points(points)?;
459
460        Ok(self.output(
461            CodeSymbolVectorLifecycleAction::SyncFile,
462            Some(file_path.to_string()),
463            symbols.len(),
464            symbols.len(),
465            1,
466        ))
467    }
468
469    pub fn clear_project_vectors(
470        &mut self,
471    ) -> Result<CodeSymbolVectorLifecycleOutput, VectorLifecycleError> {
472        let expected = self.expected_schema()?;
473        self.require_qdrant_boundary()?;
474        let deleted = match self.get_collection_schema()? {
475            Some(found) => {
476                self.ensure_compatible_schema(expected, found)?;
477                self.delete_vectors(None)?;
478                1
479            }
480            None => 0,
481        };
482
483        Ok(self.output(CodeSymbolVectorLifecycleAction::Clear, None, 0, 0, deleted))
484    }
485
486    pub fn rebuild_symbols(
487        &mut self,
488        symbols: &[Symbol],
489    ) -> Result<CodeSymbolVectorLifecycleOutput, VectorLifecycleError> {
490        self.ensure_collection()?;
491        let points = self.points_for_symbols(symbols)?;
492        self.delete_vectors(None)?;
493        self.upsert_points(points)?;
494
495        Ok(self.output(
496            CodeSymbolVectorLifecycleAction::Rebuild,
497            None,
498            symbols.len(),
499            symbols.len(),
500            1,
501        ))
502    }
503
504    fn output(
505        &self,
506        action: CodeSymbolVectorLifecycleAction,
507        file_path: Option<String>,
508        symbols: usize,
509        vectors_upserted: usize,
510        vectors_deleted: usize,
511    ) -> CodeSymbolVectorLifecycleOutput {
512        CodeSymbolVectorLifecycleOutput {
513            project_id: self.project_id.clone(),
514            collection: self.collection.clone(),
515            action,
516            file_path,
517            symbols,
518            vectors_upserted,
519            vectors_deleted,
520            summary: format!(
521                "{vectors_upserted} vector(s) upserted, {vectors_deleted} delete operation(s) issued"
522            ),
523        }
524    }
525
526    fn expected_schema(&mut self) -> Result<VectorCollectionSchema, VectorLifecycleError> {
527        let size = match self.settings.vector_dim {
528            Some(size) => size,
529            None => match self.probed_vector_size {
530                Some(size) => size,
531                None => {
532                    let size = embed_text(&self.embedding, DIMENSION_PROBE_TEXT)?.len();
533                    self.probed_vector_size = Some(size);
534                    size
535                }
536            },
537        };
538
539        Ok(VectorCollectionSchema {
540            size,
541            distance: VECTOR_DISTANCE_COSINE.to_string(),
542        })
543    }
544
545    fn require_qdrant_boundary(&self) -> Result<(), VectorLifecycleError> {
546        let ((), state) = gobby_core::qdrant::with_qdrant(Some(&self.qdrant), (), |_| Ok(()))
547            .map_err(|err| VectorLifecycleError::QdrantOperation(err.to_string()))?;
548        match state {
549            ServiceState::Available => Ok(()),
550            ServiceState::NotConfigured => Err(VectorLifecycleError::MissingQdrantConfig),
551            other => Err(VectorLifecycleError::QdrantOperation(format!(
552                "unexpected Qdrant service state: {other:?}"
553            ))),
554        }
555    }
556
557    fn ensure_compatible_schema(
558        &self,
559        expected: VectorCollectionSchema,
560        found: ExistingVectorCollectionSchema,
561    ) -> Result<VectorCollectionSchema, VectorLifecycleError> {
562        if found.size == Some(expected.size)
563            && found.distance.as_deref() == Some(&expected.distance)
564        {
565            return Ok(VectorCollectionSchema {
566                size: expected.size,
567                distance: expected.distance,
568            });
569        }
570
571        Err(VectorLifecycleError::DimensionMismatch {
572            collection: self.collection.clone(),
573            expected_size: expected.size,
574            found_size: found.size,
575            expected_distance: VECTOR_DISTANCE_COSINE,
576            found_distance: found.distance,
577        })
578    }
579
580    fn get_collection_schema(
581        &self,
582    ) -> Result<Option<ExistingVectorCollectionSchema>, VectorLifecycleError> {
583        let resp = self
584            .qdrant_request(
585                reqwest::Method::GET,
586                &format!("/collections/{}", self.collection),
587            )?
588            .send()
589            .map_err(|err| VectorLifecycleError::QdrantOperation(err.to_string()))?;
590        let status = resp.status();
591        if status == StatusCode::NOT_FOUND {
592            return Ok(None);
593        }
594        if !status.is_success() {
595            return Err(qdrant_http_error("get collection", status, resp));
596        }
597
598        let data: Value = resp
599            .json()
600            .map_err(|err| VectorLifecycleError::QdrantOperation(err.to_string()))?;
601        Ok(parse_collection_schema(&data))
602    }
603
604    fn create_collection(
605        &self,
606        schema: &VectorCollectionSchema,
607    ) -> Result<(), VectorLifecycleError> {
608        let body = json!({
609            "vectors": {
610                "size": schema.size,
611                "distance": schema.distance,
612            },
613        });
614        let resp = self
615            .qdrant_request(
616                reqwest::Method::PUT,
617                &format!("/collections/{}", self.collection),
618            )?
619            .json(&body)
620            .send()
621            .map_err(|err| VectorLifecycleError::QdrantOperation(err.to_string()))?;
622        if !resp.status().is_success() {
623            return Err(qdrant_http_error("create collection", resp.status(), resp));
624        }
625        Ok(())
626    }
627
628    fn delete_vectors(&self, file_path: Option<&str>) -> Result<(), VectorLifecycleError> {
629        let mut must = vec![json!({
630            "key": "project_id",
631            "match": {"value": self.project_id},
632        })];
633        if let Some(file_path) = file_path {
634            must.push(json!({
635                "key": "file_path",
636                "match": {"value": file_path},
637            }));
638        }
639        let body = json!({
640            "filter": {
641                "must": must,
642            },
643        });
644        let resp = self
645            .qdrant_request(
646                reqwest::Method::POST,
647                &format!("/collections/{}/points/delete", self.collection),
648            )?
649            .json(&body)
650            .send()
651            .map_err(|err| VectorLifecycleError::QdrantOperation(err.to_string()))?;
652        if !resp.status().is_success() {
653            return Err(qdrant_http_error("delete points", resp.status(), resp));
654        }
655        Ok(())
656    }
657
658    fn upsert_points(&self, points: Vec<UpsertRequest>) -> Result<(), VectorLifecycleError> {
659        if points.is_empty() {
660            return Ok(());
661        }
662        let ((), state) = gobby_core::qdrant::with_qdrant(Some(&self.qdrant), (), |config| {
663            gobby_core::qdrant::upsert(config, &self.collection, points)
664        })
665        .map_err(|err| VectorLifecycleError::QdrantOperation(err.to_string()))?;
666        match state {
667            ServiceState::Available => Ok(()),
668            ServiceState::NotConfigured => Err(VectorLifecycleError::MissingQdrantConfig),
669            other => Err(VectorLifecycleError::QdrantOperation(format!(
670                "unexpected Qdrant service state: {other:?}"
671            ))),
672        }
673    }
674
675    fn points_for_symbols(
676        &self,
677        symbols: &[Symbol],
678    ) -> Result<Vec<UpsertRequest>, VectorLifecycleError> {
679        symbols
680            .iter()
681            .map(|symbol| {
682                let vector = embed_text(&self.embedding, &vector_text_for_symbol(symbol))?;
683                let payload = payload_map(CodeSymbolVectorPayload::from_symbol(symbol))?;
684                Ok(UpsertRequest {
685                    id: symbol.id.clone(),
686                    vector,
687                    payload,
688                })
689            })
690            .collect()
691    }
692
693    fn qdrant_request(
694        &self,
695        method: reqwest::Method,
696        path: &str,
697    ) -> Result<reqwest::blocking::RequestBuilder, VectorLifecycleError> {
698        qdrant_request_for_config(&self.client, &self.qdrant, method, path)
699    }
700}
701
702pub fn fetch_symbols_for_file(
703    conn: &mut impl GenericClient,
704    project_id: &str,
705    file_path: &str,
706) -> anyhow::Result<Vec<Symbol>> {
707    let columns = db::symbol_select_columns("");
708    conn.query(
709        &format!(
710            "SELECT {columns} FROM code_symbols
711             WHERE project_id = $1 AND file_path = $2
712             ORDER BY file_path, byte_start, id"
713        ),
714        &[&project_id, &file_path],
715    )?
716    .into_iter()
717    .map(|row| Symbol::from_row(&row))
718    .collect()
719}
720
721pub fn fetch_symbols_for_project(
722    conn: &mut impl GenericClient,
723    project_id: &str,
724) -> anyhow::Result<Vec<Symbol>> {
725    let columns = db::symbol_select_columns("");
726    conn.query(
727        &format!(
728            "SELECT {columns} FROM code_symbols
729             WHERE project_id = $1
730             ORDER BY file_path, byte_start, id"
731        ),
732        &[&project_id],
733    )?
734    .into_iter()
735    .map(|row| Symbol::from_row(&row))
736    .collect()
737}
738
739fn payload_map(
740    payload: CodeSymbolVectorPayload,
741) -> Result<Map<String, Value>, VectorLifecycleError> {
742    match serde_json::to_value(payload)
743        .map_err(|err| VectorLifecycleError::QdrantOperation(err.to_string()))?
744    {
745        Value::Object(map) => Ok(map),
746        _ => Err(VectorLifecycleError::QdrantOperation(
747            "vector payload did not serialize to an object".to_string(),
748        )),
749    }
750}
751
752fn parse_collection_schema(data: &Value) -> Option<ExistingVectorCollectionSchema> {
753    let vectors = data.pointer("/result/config/params/vectors")?;
754    let size = vectors
755        .get("size")
756        .and_then(Value::as_u64)
757        .map(|size| size as usize);
758    let distance = vectors
759        .get("distance")
760        .and_then(Value::as_str)
761        .map(str::to_string);
762    Some(ExistingVectorCollectionSchema { size, distance })
763}
764
765fn parse_collection_names(data: &Value) -> Vec<String> {
766    data.pointer("/result/collections")
767        .and_then(Value::as_array)
768        .map(|collections| {
769            collections
770                .iter()
771                .filter_map(|collection| {
772                    collection
773                        .get("name")
774                        .and_then(Value::as_str)
775                        .map(str::to_string)
776                })
777                .collect()
778        })
779        .unwrap_or_default()
780}
781
782fn qdrant_http_client() -> Result<reqwest::blocking::Client, VectorLifecycleError> {
783    reqwest::blocking::Client::builder()
784        .timeout(HTTP_TIMEOUT)
785        .build()
786        .map_err(|err| VectorLifecycleError::QdrantOperation(err.to_string()))
787}
788
789fn qdrant_request_for_config(
790    client: &reqwest::blocking::Client,
791    qdrant: &QdrantConfig,
792    method: reqwest::Method,
793    path: &str,
794) -> Result<reqwest::blocking::RequestBuilder, VectorLifecycleError> {
795    let base = qdrant
796        .url
797        .as_deref()
798        .ok_or(VectorLifecycleError::MissingQdrantConfig)?
799        .trim_end_matches('/');
800    let url = format!("{base}{path}");
801    let mut req = client.request(method, url);
802    if let Some(key) = &qdrant.api_key {
803        req = req.header("api-key", key);
804    }
805    Ok(req)
806}
807
808fn delete_qdrant_collection(
809    client: &reqwest::blocking::Client,
810    qdrant: &QdrantConfig,
811    collection: &str,
812) -> Result<bool, VectorLifecycleError> {
813    let resp = qdrant_request_for_config(
814        client,
815        qdrant,
816        reqwest::Method::DELETE,
817        &format!("/collections/{collection}"),
818    )?
819    .send()
820    .map_err(|err| VectorLifecycleError::QdrantOperation(err.to_string()))?;
821    let status = resp.status();
822    if status == StatusCode::NOT_FOUND {
823        return Ok(false);
824    }
825    if !status.is_success() {
826        return Err(qdrant_http_error("delete collection", status, resp));
827    }
828    Ok(true)
829}
830
831fn qdrant_http_error(
832    operation: &'static str,
833    status: StatusCode,
834    resp: reqwest::blocking::Response,
835) -> VectorLifecycleError {
836    VectorLifecycleError::QdrantHttp {
837        operation,
838        status: status.as_u16(),
839        body: resp.text().unwrap_or_default(),
840    }
841}
842
843pub fn search_code_symbols(
844    ctx: &Context,
845    request: &CodeSymbolVectorSearchRequest,
846) -> Vec<CodeSymbolVectorSearchHit> {
847    let qdrant_config = match &ctx.qdrant {
848        Some(c) => c,
849        None => return vec![],
850    };
851
852    let embedding_config = match &ctx.embedding {
853        Some(c) => c,
854        None => return vec![],
855    };
856
857    let embedding = match embed_query(embedding_config, &request.query) {
858        Some(e) => e,
859        None => return vec![],
860    };
861
862    let collection = collection_name(&request.collection_prefix, &request.project_id);
863    vector_search(qdrant_config, &collection, &embedding, request.limit)
864        .unwrap_or_default()
865        .into_iter()
866        .map(|(symbol_id, score)| CodeSymbolVectorSearchHit { symbol_id, score })
867        .collect()
868}
869
870pub fn semantic_search(ctx: &Context, query: &str, limit: usize) -> Vec<(String, f64)> {
871    if ctx.qdrant.is_none() {
872        return vec![];
873    }
874
875    let request = CodeSymbolVectorSearchRequest {
876        project_id: ctx.project_id.clone(),
877        query: query.to_string(),
878        limit,
879        collection_prefix: CODE_SYMBOL_COLLECTION_PREFIX.to_string(),
880    };
881
882    search_code_symbols(ctx, &request)
883        .into_iter()
884        .map(|hit| (hit.symbol_id, hit.score))
885        .collect()
886}
887
888#[cfg(test)]
889mod tests {
890    use super::*;
891    use crate::config::{CodeVectorSettings, QdrantConfig};
892    use crate::models::{SOURCE_SYSTEM_GCODE, Symbol};
893    use serde_json::{Value, json};
894    use std::io::{Read, Write};
895    use std::net::TcpListener;
896    use std::thread;
897
898    fn test_symbol(summary: Option<String>) -> Symbol {
899        Symbol {
900            id: "symbol-1".to_string(),
901            project_id: "project-1".to_string(),
902            file_path: "src/lib.rs".to_string(),
903            name: "run".to_string(),
904            qualified_name: "crate::run".to_string(),
905            kind: "function".to_string(),
906            language: "rust".to_string(),
907            byte_start: 10,
908            byte_end: 40,
909            line_start: 3,
910            line_end: 5,
911            signature: None,
912            docstring: None,
913            parent_symbol_id: None,
914            content_hash: "hash".to_string(),
915            summary,
916            created_at: String::new(),
917            updated_at: String::new(),
918        }
919    }
920
921    #[test]
922    fn payloads_carry_provenance_metadata() {
923        let payload = CodeSymbolVectorPayload::from_symbol(&test_symbol(Some("does work".into())));
924
925        assert_eq!(payload.provenance, ProjectionProvenance::Extracted);
926        assert_eq!(payload.confidence, Some(1.0));
927        assert_eq!(payload.source_system, SOURCE_SYSTEM_GCODE);
928        assert_eq!(payload.source_file_path, "src/lib.rs");
929        assert_eq!(payload.source_line_start, 3);
930        assert_eq!(payload.source_line_end, 5);
931        assert_eq!(payload.source_byte_start, 10);
932        assert_eq!(payload.source_byte_end, 40);
933        assert_eq!(payload.source_line, 3);
934        assert_eq!(payload.source_symbol_id, "symbol-1");
935        assert_eq!(payload.summary.as_deref(), Some("does work"));
936        assert_eq!(payload.signature, None);
937        assert_eq!(payload.docstring, None);
938
939        let value = serde_json::to_value(payload).expect("payload serializes");
940        assert_eq!(value["provenance"], "EXTRACTED");
941        assert_eq!(value["confidence"], 1.0);
942        assert_eq!(value["source_system"], SOURCE_SYSTEM_GCODE);
943        assert_eq!(value["source_file_path"], "src/lib.rs");
944        assert_eq!(value["source_line_start"], 3);
945        assert_eq!(value["source_line_end"], 5);
946        assert_eq!(value["source_byte_start"], 10);
947        assert_eq!(value["source_byte_end"], 40);
948        assert_eq!(value["source_symbol_id"], "symbol-1");
949    }
950
951    #[test]
952    fn summaries_are_optional_enrichment() {
953        let symbol = test_symbol(None);
954        let payload = CodeSymbolVectorPayload::from_symbol(&symbol);
955        let vector_text = vector_text_for_symbol(&symbol);
956        let value = serde_json::to_value(payload).expect("payload serializes");
957
958        assert!(value.get("summary").is_none());
959        assert!(vector_text.contains("name: run"));
960        assert!(!vector_text.contains("summary:"));
961    }
962
963    #[test]
964    fn collection_name_compatibility() {
965        assert_eq!(
966            collection_name(CODE_SYMBOL_COLLECTION_PREFIX, "project-1"),
967            "code_symbols_project-1"
968        );
969    }
970
971    #[test]
972    fn delete_project_collection_targets_only_project_collection() {
973        let (qdrant_url, handle) = spawn_http_responses(vec![(200, json!({"result": true}))]);
974        let deleted = delete_project_collection(
975            &QdrantConfig {
976                url: Some(qdrant_url),
977                api_key: Some("qdrant-key".to_string()),
978            },
979            "project-1",
980        )
981        .expect("delete collection");
982        let requests = handle.join().expect("qdrant requests");
983
984        assert!(deleted);
985        assert_eq!(requests.len(), 1);
986        assert!(requests[0].contains("DELETE /collections/code_symbols_project-1 HTTP/1.1"));
987        assert!(requests[0].contains("api-key: qdrant-key"));
988        assert!(!requests[0].contains("project-2"));
989    }
990
991    #[test]
992    fn delete_prefixed_collections_deletes_only_code_symbol_collections() {
993        let (qdrant_url, handle) = spawn_http_responses(vec![
994            (
995                200,
996                json!({
997                    "result": {
998                        "collections": [
999                            {"name": "code_symbols_project-1"},
1000                            {"name": "memory_vectors"},
1001                            {"name": "code_symbols_project-2"}
1002                        ]
1003                    }
1004                }),
1005            ),
1006            (200, json!({"result": true})),
1007            (200, json!({"result": true})),
1008        ]);
1009        let deleted = delete_code_symbol_collections_with_prefix(&QdrantConfig {
1010            url: Some(qdrant_url),
1011            api_key: None,
1012        })
1013        .expect("delete prefixed collections");
1014        let requests = handle.join().expect("qdrant requests");
1015
1016        assert_eq!(
1017            deleted,
1018            vec![
1019                "code_symbols_project-1".to_string(),
1020                "code_symbols_project-2".to_string()
1021            ]
1022        );
1023        assert_eq!(requests.len(), 3);
1024        assert!(requests[0].contains("GET /collections HTTP/1.1"));
1025        assert!(requests[1].contains("DELETE /collections/code_symbols_project-1 HTTP/1.1"));
1026        assert!(requests[2].contains("DELETE /collections/code_symbols_project-2 HTTP/1.1"));
1027        assert!(
1028            requests
1029                .iter()
1030                .all(|request| !request.contains("DELETE /collections/memory_vectors"))
1031        );
1032    }
1033
1034    #[test]
1035    fn embedding_request_response() {
1036        let (base_url, handle) = spawn_http_responses(vec![(
1037            200,
1038            json!({"data": [{"embedding": [0.25, 0.5, 0.75]}]}),
1039        )]);
1040        let config = EmbeddingConfig {
1041            api_base: format!("{base_url}/v1"),
1042            model: "embed-small".to_string(),
1043            api_key: Some("embedding-key".to_string()),
1044        };
1045
1046        let embedding = embed_text(&config, "dimension_probe").expect("embedding response");
1047        let requests = handle.join().expect("server thread");
1048
1049        assert_eq!(embedding, vec![0.25, 0.5, 0.75]);
1050        assert_eq!(requests.len(), 1);
1051        assert!(requests[0].contains("POST /v1/embeddings HTTP/1.1"));
1052        assert!(requests[0].contains("authorization: Bearer embedding-key"));
1053        assert!(requests[0].contains(r#""model":"embed-small""#));
1054        assert!(requests[0].contains(r#""input":"dimension_probe""#));
1055    }
1056
1057    #[test]
1058    fn ensure_collection_resolves_vector_size_and_distance() {
1059        let (embedding_url, embedding_handle) = spawn_http_responses(vec![(
1060            200,
1061            json!({"data": [{"embedding": [0.1, 0.2, 0.3]}]}),
1062        )]);
1063        let (qdrant_url, qdrant_handle) = spawn_http_responses(vec![
1064            (404, json!({"status": "not found"})),
1065            (200, json!({"result": true})),
1066            (
1067                200,
1068                json!({"result": {"config": {"params": {"vectors": {"size": 3, "distance": "Cosine"}}}}}),
1069            ),
1070        ]);
1071        let mut lifecycle = CodeSymbolVectorLifecycle::new(
1072            "project-1".to_string(),
1073            QdrantConfig {
1074                url: Some(qdrant_url),
1075                api_key: None,
1076            },
1077            EmbeddingConfig {
1078                api_base: format!("{embedding_url}/v1"),
1079                model: "embed-small".to_string(),
1080                api_key: None,
1081            },
1082            CodeVectorSettings { vector_dim: None },
1083        )
1084        .expect("lifecycle");
1085
1086        let created = lifecycle.ensure_collection().expect("create collection");
1087        let reused = lifecycle.ensure_collection().expect("reuse collection");
1088        let embedding_requests = embedding_handle.join().expect("embedding requests");
1089        let qdrant_requests = qdrant_handle.join().expect("qdrant requests");
1090
1091        assert_eq!(created.size, 3);
1092        assert_eq!(created.distance, VECTOR_DISTANCE_COSINE);
1093        assert_eq!(reused.size, 3);
1094        assert_eq!(embedding_requests.len(), 1, "dimension probe is cached");
1095        assert!(qdrant_requests[1].contains("PUT /collections/code_symbols_project-1 HTTP/1.1"));
1096        assert!(qdrant_requests[1].contains(r#""size":3"#));
1097        assert!(qdrant_requests[1].contains(r#""distance":"Cosine""#));
1098
1099        let (explicit_qdrant_url, explicit_handle) = spawn_http_responses(vec![
1100            (404, json!({"status": "not found"})),
1101            (200, json!({"result": true})),
1102        ]);
1103        let mut explicit = CodeSymbolVectorLifecycle::new(
1104            "project-1".to_string(),
1105            QdrantConfig {
1106                url: Some(explicit_qdrant_url),
1107                api_key: None,
1108            },
1109            EmbeddingConfig {
1110                api_base: "http://127.0.0.1:9/v1".to_string(),
1111                model: "unused".to_string(),
1112                api_key: None,
1113            },
1114            CodeVectorSettings {
1115                vector_dim: Some(1536),
1116            },
1117        )
1118        .expect("lifecycle with explicit size");
1119
1120        let schema = explicit.ensure_collection().expect("explicit size create");
1121        let explicit_requests = explicit_handle.join().expect("explicit qdrant requests");
1122        assert_eq!(schema.size, 1536);
1123        assert!(explicit_requests[1].contains(r#""size":1536"#));
1124    }
1125
1126    #[test]
1127    fn lifecycle_http_scoped_to_module() {
1128        let manifest_dir = std::path::Path::new(env!("CARGO_MANIFEST_DIR"));
1129        let src_dir = manifest_dir.join("src");
1130        let mut offenders = Vec::new();
1131
1132        fn visit(path: &std::path::Path, offenders: &mut Vec<std::path::PathBuf>) {
1133            for entry in std::fs::read_dir(path).expect("read source directory") {
1134                let entry = entry.expect("source entry");
1135                let path = entry.path();
1136                if path.is_dir() {
1137                    visit(&path, offenders);
1138                    continue;
1139                }
1140                if path.extension().and_then(|ext| ext.to_str()) != Some("rs") {
1141                    continue;
1142                }
1143                let source = std::fs::read_to_string(&path).expect("read source file");
1144                let lifecycle_rest = [
1145                    "/points/delete",
1146                    "points/delete",
1147                    "collections/{collection}",
1148                    "/collections/{collection}",
1149                ];
1150                if lifecycle_rest.iter().any(|needle| source.contains(needle))
1151                    && !path.ends_with("vector/code_symbols.rs")
1152                {
1153                    offenders.push(path);
1154                }
1155            }
1156        }
1157
1158        visit(&src_dir, &mut offenders);
1159        assert!(
1160            offenders.is_empty(),
1161            "Qdrant lifecycle REST must stay scoped to vector/code_symbols.rs: {offenders:?}"
1162        );
1163    }
1164
1165    #[test]
1166    fn routes_through_gobby_core_qdrant() {
1167        let source = include_str!("code_symbols.rs");
1168        assert!(source.contains("gobby_core::config::resolve_qdrant_config"));
1169        assert!(source.contains("gobby_core::qdrant::with_qdrant"));
1170        assert!(source.contains("gobby_core::qdrant::collection_name"));
1171        assert!(source.contains("CollectionScope::Custom"));
1172        assert!(source.contains("gobby_core::qdrant::search"));
1173        assert!(source.contains("gobby_core::qdrant::upsert"));
1174    }
1175
1176    fn spawn_http_responses(
1177        responses: Vec<(u16, Value)>,
1178    ) -> (String, thread::JoinHandle<Vec<String>>) {
1179        let listener = TcpListener::bind("127.0.0.1:0").expect("bind test server");
1180        let addr = listener.local_addr().expect("local addr");
1181        let handle = thread::spawn(move || {
1182            let mut requests = Vec::new();
1183            for (status, body) in responses {
1184                let (mut stream, _) = listener.accept().expect("accept request");
1185                requests.push(read_http_request(&mut stream));
1186
1187                let body = body.to_string();
1188                write!(
1189                    stream,
1190                    "HTTP/1.1 {status} OK\r\nContent-Type: application/json\r\nContent-Length: {}\r\nConnection: close\r\n\r\n{body}",
1191                    body.len()
1192                )
1193                .expect("write response");
1194            }
1195            requests
1196        });
1197
1198        (format!("http://{addr}"), handle)
1199    }
1200
1201    fn read_http_request(stream: &mut impl Read) -> String {
1202        let mut request = Vec::new();
1203        let mut buffer = [0; 4096];
1204        let mut expected_len = None;
1205
1206        loop {
1207            let n = stream.read(&mut buffer).expect("read request");
1208            if n == 0 {
1209                break;
1210            }
1211            request.extend_from_slice(&buffer[..n]);
1212
1213            if expected_len.is_none()
1214                && let Some(header_end) =
1215                    request.windows(4).position(|window| window == b"\r\n\r\n")
1216            {
1217                let headers = String::from_utf8_lossy(&request[..header_end]);
1218                let content_len = headers
1219                    .lines()
1220                    .find_map(|line| {
1221                        line.to_ascii_lowercase()
1222                            .strip_prefix("content-length: ")
1223                            .and_then(|value| value.parse::<usize>().ok())
1224                    })
1225                    .unwrap_or(0);
1226                expected_len = Some(header_end + 4 + content_len);
1227            }
1228
1229            if let Some(expected_len) = expected_len
1230                && request.len() >= expected_len
1231            {
1232                break;
1233            }
1234        }
1235
1236        String::from_utf8_lossy(&request).into_owned()
1237    }
1238}