Skip to main content

gobby_code/vector/
code_symbols.rs

1use postgres::GenericClient;
2use reqwest::StatusCode;
3use serde::{Deserialize, Serialize};
4use serde_json::{Map, Value, json};
5use std::fmt;
6use std::time::Duration;
7
8use crate::config::{
9    CODE_SYMBOL_COLLECTION_PREFIX, CodeVectorSettings, Context, EmbeddingConfig, QdrantConfig,
10};
11use crate::db;
12use crate::models::{ProjectionMetadata, ProjectionProvenance, Symbol};
13use gobby_core::degradation::ServiceState;
14use gobby_core::qdrant::{CollectionScope, SearchRequest, UpsertRequest};
15
16// Keep code-symbol collections compatible with the Python daemon's Qdrant schema.
17pub const VECTOR_DISTANCE_COSINE: &str = "Cosine";
18const DIMENSION_PROBE_TEXT: &str = "dimension_probe";
19const HTTP_TIMEOUT: Duration = Duration::from_secs(10);
20
21#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
22pub struct CodeSymbolVectorSearchRequest {
23    pub project_id: String,
24    pub query: String,
25    pub limit: usize,
26    pub collection_prefix: String,
27}
28
29#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
30pub struct CodeSymbolVectorSearchHit {
31    pub symbol_id: String,
32    pub score: f64,
33}
34
35#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
36pub struct CodeSymbolVectorPayload {
37    pub project_id: String,
38    pub file_path: String,
39    pub symbol_id: String,
40    pub name: String,
41    pub kind: String,
42    pub language: String,
43    pub line_start: usize,
44    pub line_end: usize,
45    pub byte_start: usize,
46    pub byte_end: usize,
47    #[serde(skip_serializing_if = "Option::is_none")]
48    pub signature: Option<String>,
49    #[serde(skip_serializing_if = "Option::is_none")]
50    pub docstring: Option<String>,
51    pub provenance: ProjectionProvenance,
52    #[serde(skip_serializing_if = "Option::is_none")]
53    pub confidence: Option<f64>,
54    pub source_system: String,
55    pub source_file_path: String,
56    pub source_line: usize,
57    pub source_line_start: usize,
58    pub source_line_end: usize,
59    pub source_byte_start: usize,
60    pub source_byte_end: usize,
61    pub source_symbol_id: String,
62    #[serde(skip_serializing_if = "Option::is_none")]
63    pub summary: Option<String>,
64}
65
66impl CodeSymbolVectorPayload {
67    pub fn from_symbol(symbol: &Symbol) -> Self {
68        let metadata = ProjectionMetadata::gcode_extracted()
69            .with_source_file_path(&symbol.file_path)
70            .with_source_line(symbol.line_start)
71            .with_source_symbol_id(&symbol.id);
72
73        Self {
74            project_id: symbol.project_id.clone(),
75            file_path: symbol.file_path.clone(),
76            symbol_id: symbol.id.clone(),
77            name: symbol.name.clone(),
78            kind: symbol.kind.clone(),
79            language: symbol.language.clone(),
80            line_start: symbol.line_start,
81            line_end: symbol.line_end,
82            byte_start: symbol.byte_start,
83            byte_end: symbol.byte_end,
84            signature: symbol.signature.clone(),
85            docstring: symbol.docstring.clone(),
86            provenance: metadata.provenance,
87            confidence: metadata.confidence,
88            source_system: metadata.source_system,
89            source_file_path: metadata
90                .source_file_path
91                .unwrap_or_else(|| symbol.file_path.clone()),
92            source_line: metadata.source_line.unwrap_or(symbol.line_start),
93            source_line_start: symbol.line_start,
94            source_line_end: symbol.line_end,
95            source_byte_start: symbol.byte_start,
96            source_byte_end: symbol.byte_end,
97            source_symbol_id: metadata
98                .source_symbol_id
99                .unwrap_or_else(|| symbol.id.clone()),
100            summary: symbol.summary.clone(),
101        }
102    }
103}
104
105#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
106#[serde(rename_all = "snake_case")]
107pub enum CodeSymbolVectorLifecycleAction {
108    Ensure,
109    SyncFile,
110    Clear,
111    Rebuild,
112}
113
114#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
115pub struct CodeSymbolVectorLifecycleStatus {
116    pub project_id: String,
117    pub collection: String,
118    pub action: CodeSymbolVectorLifecycleAction,
119}
120
121#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
122pub struct VectorCollectionSchema {
123    pub size: usize,
124    pub distance: String,
125}
126
127#[derive(Debug, Clone, PartialEq, Eq)]
128struct ExistingVectorCollectionSchema {
129    size: Option<usize>,
130    distance: Option<String>,
131}
132
133#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
134pub struct CodeSymbolVectorLifecycleOutput {
135    pub project_id: String,
136    pub collection: String,
137    pub action: CodeSymbolVectorLifecycleAction,
138    pub file_path: Option<String>,
139    pub symbols: usize,
140    pub vectors_upserted: usize,
141    pub vectors_deleted: usize,
142    pub summary: String,
143}
144
145#[derive(Debug, Clone, PartialEq, Eq)]
146pub enum VectorLifecycleError {
147    MissingQdrantConfig,
148    MissingEmbeddingConfig,
149    EmbeddingHttp {
150        status: u16,
151        body: String,
152    },
153    EmbeddingResponse(String),
154    QdrantHttp {
155        operation: &'static str,
156        status: u16,
157        body: String,
158    },
159    QdrantOperation(String),
160    DimensionMismatch {
161        collection: String,
162        expected_size: usize,
163        found_size: Option<usize>,
164        expected_distance: &'static str,
165        found_distance: Option<String>,
166    },
167}
168
169impl fmt::Display for VectorLifecycleError {
170    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
171        match self {
172            Self::MissingQdrantConfig => {
173                write!(f, "Qdrant config is required for vector lifecycle commands")
174            }
175            Self::MissingEmbeddingConfig => write!(
176                f,
177                "embedding config is required for vector lifecycle commands"
178            ),
179            Self::EmbeddingHttp { status, body } => {
180                write!(f, "embedding request failed: HTTP {status}: {body}")
181            }
182            Self::EmbeddingResponse(reason) => {
183                write!(f, "embedding response was invalid: {reason}")
184            }
185            Self::QdrantHttp {
186                operation,
187                status,
188                body,
189            } => write!(f, "Qdrant {operation} failed: HTTP {status}: {body}"),
190            Self::QdrantOperation(reason) => write!(f, "Qdrant operation failed: {reason}"),
191            Self::DimensionMismatch {
192                collection,
193                expected_size,
194                found_size,
195                expected_distance,
196                found_distance,
197            } => write!(
198                f,
199                "Qdrant collection `{collection}` has incompatible vector schema: expected size {expected_size} distance {expected_distance}, found size {} distance {}. Refusing to migrate, drop, or recreate the collection.",
200                found_size
201                    .map(|value| value.to_string())
202                    .unwrap_or_else(|| "unknown".to_string()),
203                found_distance.as_deref().unwrap_or("unknown")
204            ),
205        }
206    }
207}
208
209impl std::error::Error for VectorLifecycleError {}
210
211#[derive(Debug)]
212pub struct CodeSymbolVectorLifecycle {
213    project_id: String,
214    collection: String,
215    qdrant: QdrantConfig,
216    embedding: EmbeddingConfig,
217    settings: CodeVectorSettings,
218    probed_vector_size: Option<usize>,
219    client: reqwest::blocking::Client,
220}
221
222pub fn collection_name(collection_prefix: &str, project_id: &str) -> String {
223    let collection = format!("{collection_prefix}{project_id}");
224    gobby_core::qdrant::collection_name("gcode", CollectionScope::Custom(&collection))
225}
226
227pub fn delete_project_collection(
228    qdrant: &QdrantConfig,
229    project_id: &str,
230) -> Result<bool, VectorLifecycleError> {
231    let client = qdrant_http_client()?;
232    let collection = collection_name(CODE_SYMBOL_COLLECTION_PREFIX, project_id);
233    delete_qdrant_collection(&client, qdrant, &collection)
234}
235
236pub fn delete_file_vectors(
237    qdrant: &QdrantConfig,
238    project_id: &str,
239    file_path: &str,
240) -> Result<bool, VectorLifecycleError> {
241    let client = qdrant_http_client()?;
242    let collection = collection_name(CODE_SYMBOL_COLLECTION_PREFIX, project_id);
243    delete_vectors_for_filter(&client, qdrant, &collection, project_id, Some(file_path))
244}
245
246pub fn delete_code_symbol_collections_with_prefix(
247    qdrant: &QdrantConfig,
248) -> Result<Vec<String>, VectorLifecycleError> {
249    let client = qdrant_http_client()?;
250    let resp = qdrant_request_for_config(&client, qdrant, reqwest::Method::GET, "/collections")?
251        .send()
252        .map_err(|err| VectorLifecycleError::QdrantOperation(err.to_string()))?;
253    let status = resp.status();
254    if !status.is_success() {
255        return Err(qdrant_http_error("list collections", status, resp));
256    }
257
258    let data: Value = resp
259        .json()
260        .map_err(|err| VectorLifecycleError::QdrantOperation(err.to_string()))?;
261    let collections = parse_collection_names(&data)
262        .into_iter()
263        .filter(|name| name.starts_with(CODE_SYMBOL_COLLECTION_PREFIX))
264        .collect::<Vec<_>>();
265
266    let mut deleted = Vec::new();
267    for collection in collections {
268        if delete_qdrant_collection(&client, qdrant, &collection)? {
269            deleted.push(collection);
270        }
271    }
272    Ok(deleted)
273}
274
275pub fn resolve_lifecycle_qdrant_config(
276    source: &mut impl gobby_core::config::ConfigSource,
277) -> Option<QdrantConfig> {
278    gobby_core::config::resolve_qdrant_config(source)
279}
280
281pub fn lifecycle_status(
282    project_id: impl Into<String>,
283    collection_prefix: &str,
284    action: CodeSymbolVectorLifecycleAction,
285) -> CodeSymbolVectorLifecycleStatus {
286    let project_id = project_id.into();
287    CodeSymbolVectorLifecycleStatus {
288        collection: collection_name(collection_prefix, &project_id),
289        project_id,
290        action,
291    }
292}
293
294pub fn embed_text(config: &EmbeddingConfig, text: &str) -> Result<Vec<f32>, VectorLifecycleError> {
295    let client = reqwest::blocking::Client::builder()
296        .timeout(HTTP_TIMEOUT)
297        .build()
298        .map_err(|err| VectorLifecycleError::EmbeddingResponse(err.to_string()))?;
299
300    let body = json!({
301        "model": config.model,
302        "input": text,
303    });
304
305    let url = format!("{}/embeddings", config.api_base.trim_end_matches('/'));
306    let mut req = client.post(&url).json(&body);
307
308    if let Some(key) = &config.api_key {
309        req = req.header("Authorization", format!("Bearer {key}"));
310    }
311
312    let resp = req
313        .send()
314        .map_err(|err| VectorLifecycleError::EmbeddingResponse(err.to_string()))?;
315    if !resp.status().is_success() {
316        let status = resp.status().as_u16();
317        let body = resp.text().unwrap_or_default();
318        return Err(VectorLifecycleError::EmbeddingHttp { status, body });
319    }
320
321    let data: Value = resp
322        .json()
323        .map_err(|err| VectorLifecycleError::EmbeddingResponse(err.to_string()))?;
324    let embedding: Vec<f32> = data
325        .get("data")
326        .and_then(Value::as_array)
327        .and_then(|values| values.first())
328        .and_then(|value| value.get("embedding"))
329        .and_then(Value::as_array)
330        .ok_or_else(|| {
331            VectorLifecycleError::EmbeddingResponse("missing data[0].embedding array".to_string())
332        })?
333        .iter()
334        .map(|value| {
335            value.as_f64().map(|f| f as f32).ok_or_else(|| {
336                VectorLifecycleError::EmbeddingResponse(
337                    "embedding array contains a non-number".to_string(),
338                )
339            })
340        })
341        .collect::<Result<Vec<_>, _>>()?;
342
343    if embedding.is_empty() {
344        Err(VectorLifecycleError::EmbeddingResponse(
345            "embedding vector was empty".to_string(),
346        ))
347    } else {
348        Ok(embedding)
349    }
350}
351
352pub fn embed_query(config: &EmbeddingConfig, text: &str) -> Option<Vec<f32>> {
353    embed_text(config, &format!("search_query: {text}")).ok()
354}
355
356pub fn vector_text_for_symbol(symbol: &Symbol) -> String {
357    let mut lines = vec![
358        format!("name: {}", symbol.name),
359        format!("qualified_name: {}", symbol.qualified_name),
360        format!("kind: {}", symbol.kind),
361        format!("language: {}", symbol.language),
362        format!("file_path: {}", symbol.file_path),
363        format!("range: {}-{}", symbol.line_start, symbol.line_end),
364    ];
365    if let Some(signature) = symbol
366        .signature
367        .as_deref()
368        .filter(|value| !value.trim().is_empty())
369    {
370        lines.push(format!("signature: {signature}"));
371    }
372    if let Some(docstring) = symbol
373        .docstring
374        .as_deref()
375        .filter(|value| !value.trim().is_empty())
376    {
377        lines.push(format!("docstring: {docstring}"));
378    }
379    if let Some(summary) = symbol
380        .summary
381        .as_deref()
382        .filter(|value| !value.trim().is_empty())
383    {
384        lines.push(format!("summary: {summary}"));
385    }
386    lines.join("\n")
387}
388
389pub fn vector_search(
390    config: &QdrantConfig,
391    collection: &str,
392    query_vector: &[f32],
393    limit: usize,
394) -> anyhow::Result<Vec<(String, f64)>> {
395    let request = SearchRequest {
396        vector: query_vector.to_vec(),
397        limit,
398        filter: None,
399    };
400    let (hits, _) = gobby_core::qdrant::with_qdrant(Some(config), Vec::new(), |config| {
401        gobby_core::qdrant::search(config, collection, request)
402    })?;
403    Ok(hits
404        .into_iter()
405        .map(|hit| (hit.id, f64::from(hit.score)))
406        .collect())
407}
408
409impl CodeSymbolVectorLifecycle {
410    pub fn new(
411        project_id: String,
412        qdrant: QdrantConfig,
413        embedding: EmbeddingConfig,
414        settings: CodeVectorSettings,
415    ) -> Result<Self, VectorLifecycleError> {
416        if qdrant
417            .url
418            .as_deref()
419            .filter(|url| !url.trim().is_empty())
420            .is_none()
421        {
422            return Err(VectorLifecycleError::MissingQdrantConfig);
423        }
424        if embedding.api_base.trim().is_empty() {
425            return Err(VectorLifecycleError::MissingEmbeddingConfig);
426        }
427
428        let collection = collection_name(CODE_SYMBOL_COLLECTION_PREFIX, &project_id);
429        let client = reqwest::blocking::Client::builder()
430            .timeout(HTTP_TIMEOUT)
431            .build()
432            .map_err(|err| VectorLifecycleError::QdrantOperation(err.to_string()))?;
433        Ok(Self {
434            project_id,
435            collection,
436            qdrant,
437            embedding,
438            settings,
439            probed_vector_size: None,
440            client,
441        })
442    }
443
444    pub fn collection(&self) -> &str {
445        &self.collection
446    }
447
448    pub fn ensure_collection(&mut self) -> Result<VectorCollectionSchema, VectorLifecycleError> {
449        let expected = self.expected_schema()?;
450        self.require_qdrant_boundary()?;
451        match self.get_collection_schema()? {
452            Some(found) => self.ensure_compatible_schema(expected, found),
453            None => {
454                self.create_collection(&expected)?;
455                Ok(expected)
456            }
457        }
458    }
459
460    pub fn sync_file_symbols(
461        &mut self,
462        file_path: &str,
463        symbols: &[Symbol],
464    ) -> Result<CodeSymbolVectorLifecycleOutput, VectorLifecycleError> {
465        self.ensure_collection()?;
466        let points = self.points_for_symbols(symbols)?;
467        self.delete_vectors(Some(file_path))?;
468        self.upsert_points(points)?;
469
470        Ok(self.output(
471            CodeSymbolVectorLifecycleAction::SyncFile,
472            Some(file_path.to_string()),
473            symbols.len(),
474            symbols.len(),
475            1,
476        ))
477    }
478
479    pub fn clear_project_vectors(
480        &mut self,
481    ) -> Result<CodeSymbolVectorLifecycleOutput, VectorLifecycleError> {
482        let expected = self.expected_schema()?;
483        self.require_qdrant_boundary()?;
484        let deleted = match self.get_collection_schema()? {
485            Some(found) => {
486                self.ensure_compatible_schema(expected, found)?;
487                self.delete_vectors(None)?;
488                1
489            }
490            None => 0,
491        };
492
493        Ok(self.output(CodeSymbolVectorLifecycleAction::Clear, None, 0, 0, deleted))
494    }
495
496    pub fn rebuild_symbols(
497        &mut self,
498        symbols: &[Symbol],
499    ) -> Result<CodeSymbolVectorLifecycleOutput, VectorLifecycleError> {
500        self.ensure_collection()?;
501        let points = self.points_for_symbols(symbols)?;
502        self.delete_vectors(None)?;
503        self.upsert_points(points)?;
504
505        Ok(self.output(
506            CodeSymbolVectorLifecycleAction::Rebuild,
507            None,
508            symbols.len(),
509            symbols.len(),
510            1,
511        ))
512    }
513
514    fn output(
515        &self,
516        action: CodeSymbolVectorLifecycleAction,
517        file_path: Option<String>,
518        symbols: usize,
519        vectors_upserted: usize,
520        vectors_deleted: usize,
521    ) -> CodeSymbolVectorLifecycleOutput {
522        CodeSymbolVectorLifecycleOutput {
523            project_id: self.project_id.clone(),
524            collection: self.collection.clone(),
525            action,
526            file_path,
527            symbols,
528            vectors_upserted,
529            vectors_deleted,
530            summary: format!(
531                "{vectors_upserted} vector(s) upserted, {vectors_deleted} delete operation(s) issued"
532            ),
533        }
534    }
535
536    fn expected_schema(&mut self) -> Result<VectorCollectionSchema, VectorLifecycleError> {
537        let size = match self.settings.vector_dim {
538            Some(size) => size,
539            None => match self.probed_vector_size {
540                Some(size) => size,
541                None => {
542                    let size = embed_text(&self.embedding, DIMENSION_PROBE_TEXT)?.len();
543                    self.probed_vector_size = Some(size);
544                    size
545                }
546            },
547        };
548
549        Ok(VectorCollectionSchema {
550            size,
551            distance: VECTOR_DISTANCE_COSINE.to_string(),
552        })
553    }
554
555    fn require_qdrant_boundary(&self) -> Result<(), VectorLifecycleError> {
556        let ((), state) = gobby_core::qdrant::with_qdrant(Some(&self.qdrant), (), |_| Ok(()))
557            .map_err(|err| VectorLifecycleError::QdrantOperation(err.to_string()))?;
558        match state {
559            ServiceState::Available => Ok(()),
560            ServiceState::NotConfigured => Err(VectorLifecycleError::MissingQdrantConfig),
561            other => Err(VectorLifecycleError::QdrantOperation(format!(
562                "unexpected Qdrant service state: {other:?}"
563            ))),
564        }
565    }
566
567    fn ensure_compatible_schema(
568        &self,
569        expected: VectorCollectionSchema,
570        found: ExistingVectorCollectionSchema,
571    ) -> Result<VectorCollectionSchema, VectorLifecycleError> {
572        if found.size == Some(expected.size)
573            && found.distance.as_deref() == Some(&expected.distance)
574        {
575            return Ok(VectorCollectionSchema {
576                size: expected.size,
577                distance: expected.distance,
578            });
579        }
580
581        Err(VectorLifecycleError::DimensionMismatch {
582            collection: self.collection.clone(),
583            expected_size: expected.size,
584            found_size: found.size,
585            expected_distance: VECTOR_DISTANCE_COSINE,
586            found_distance: found.distance,
587        })
588    }
589
590    fn get_collection_schema(
591        &self,
592    ) -> Result<Option<ExistingVectorCollectionSchema>, VectorLifecycleError> {
593        let resp = self
594            .qdrant_request(
595                reqwest::Method::GET,
596                &format!("/collections/{}", self.collection),
597            )?
598            .send()
599            .map_err(|err| VectorLifecycleError::QdrantOperation(err.to_string()))?;
600        let status = resp.status();
601        if status == StatusCode::NOT_FOUND {
602            return Ok(None);
603        }
604        if !status.is_success() {
605            return Err(qdrant_http_error("get collection", status, resp));
606        }
607
608        let data: Value = resp
609            .json()
610            .map_err(|err| VectorLifecycleError::QdrantOperation(err.to_string()))?;
611        Ok(parse_collection_schema(&data))
612    }
613
614    fn create_collection(
615        &self,
616        schema: &VectorCollectionSchema,
617    ) -> Result<(), VectorLifecycleError> {
618        let body = json!({
619            "vectors": {
620                "size": schema.size,
621                "distance": schema.distance,
622            },
623        });
624        let resp = self
625            .qdrant_request(
626                reqwest::Method::PUT,
627                &format!("/collections/{}", self.collection),
628            )?
629            .json(&body)
630            .send()
631            .map_err(|err| VectorLifecycleError::QdrantOperation(err.to_string()))?;
632        if !resp.status().is_success() {
633            return Err(qdrant_http_error("create collection", resp.status(), resp));
634        }
635        Ok(())
636    }
637
638    fn delete_vectors(&self, file_path: Option<&str>) -> Result<(), VectorLifecycleError> {
639        delete_vectors_for_filter(
640            &self.client,
641            &self.qdrant,
642            &self.collection,
643            &self.project_id,
644            file_path,
645        )
646        .map(|_| ())
647    }
648
649    fn upsert_points(&self, points: Vec<UpsertRequest>) -> Result<(), VectorLifecycleError> {
650        if points.is_empty() {
651            return Ok(());
652        }
653        let ((), state) = gobby_core::qdrant::with_qdrant(Some(&self.qdrant), (), |config| {
654            gobby_core::qdrant::upsert(config, &self.collection, points)
655        })
656        .map_err(|err| VectorLifecycleError::QdrantOperation(err.to_string()))?;
657        match state {
658            ServiceState::Available => Ok(()),
659            ServiceState::NotConfigured => Err(VectorLifecycleError::MissingQdrantConfig),
660            other => Err(VectorLifecycleError::QdrantOperation(format!(
661                "unexpected Qdrant service state: {other:?}"
662            ))),
663        }
664    }
665
666    fn points_for_symbols(
667        &self,
668        symbols: &[Symbol],
669    ) -> Result<Vec<UpsertRequest>, VectorLifecycleError> {
670        symbols
671            .iter()
672            .map(|symbol| {
673                let vector = embed_text(&self.embedding, &vector_text_for_symbol(symbol))?;
674                let payload = payload_map(CodeSymbolVectorPayload::from_symbol(symbol))?;
675                Ok(UpsertRequest {
676                    id: symbol.id.clone(),
677                    vector,
678                    payload,
679                })
680            })
681            .collect()
682    }
683
684    fn qdrant_request(
685        &self,
686        method: reqwest::Method,
687        path: &str,
688    ) -> Result<reqwest::blocking::RequestBuilder, VectorLifecycleError> {
689        qdrant_request_for_config(&self.client, &self.qdrant, method, path)
690    }
691}
692
693pub fn fetch_symbols_for_file(
694    conn: &mut impl GenericClient,
695    project_id: &str,
696    file_path: &str,
697) -> anyhow::Result<Vec<Symbol>> {
698    let columns = db::symbol_select_columns("");
699    conn.query(
700        &format!(
701            "SELECT {columns} FROM code_symbols
702             WHERE project_id = $1 AND file_path = $2
703             ORDER BY file_path, byte_start, id"
704        ),
705        &[&project_id, &file_path],
706    )?
707    .into_iter()
708    .map(|row| Symbol::from_row(&row))
709    .collect()
710}
711
712pub fn fetch_symbols_for_project(
713    conn: &mut impl GenericClient,
714    project_id: &str,
715) -> anyhow::Result<Vec<Symbol>> {
716    let columns = db::symbol_select_columns("");
717    conn.query(
718        &format!(
719            "SELECT {columns} FROM code_symbols
720             WHERE project_id = $1
721             ORDER BY file_path, byte_start, id"
722        ),
723        &[&project_id],
724    )?
725    .into_iter()
726    .map(|row| Symbol::from_row(&row))
727    .collect()
728}
729
730fn payload_map(
731    payload: CodeSymbolVectorPayload,
732) -> Result<Map<String, Value>, VectorLifecycleError> {
733    match serde_json::to_value(payload)
734        .map_err(|err| VectorLifecycleError::QdrantOperation(err.to_string()))?
735    {
736        Value::Object(map) => Ok(map),
737        _ => Err(VectorLifecycleError::QdrantOperation(
738            "vector payload did not serialize to an object".to_string(),
739        )),
740    }
741}
742
743fn parse_collection_schema(data: &Value) -> Option<ExistingVectorCollectionSchema> {
744    let vectors = data.pointer("/result/config/params/vectors")?;
745    let size = vectors
746        .get("size")
747        .and_then(Value::as_u64)
748        .map(|size| size as usize);
749    let distance = vectors
750        .get("distance")
751        .and_then(Value::as_str)
752        .map(str::to_string);
753    Some(ExistingVectorCollectionSchema { size, distance })
754}
755
756fn parse_collection_names(data: &Value) -> Vec<String> {
757    data.pointer("/result/collections")
758        .and_then(Value::as_array)
759        .map(|collections| {
760            collections
761                .iter()
762                .filter_map(|collection| {
763                    collection
764                        .get("name")
765                        .and_then(Value::as_str)
766                        .map(str::to_string)
767                })
768                .collect()
769        })
770        .unwrap_or_default()
771}
772
773fn qdrant_http_client() -> Result<reqwest::blocking::Client, VectorLifecycleError> {
774    reqwest::blocking::Client::builder()
775        .timeout(HTTP_TIMEOUT)
776        .build()
777        .map_err(|err| VectorLifecycleError::QdrantOperation(err.to_string()))
778}
779
780fn qdrant_request_for_config(
781    client: &reqwest::blocking::Client,
782    qdrant: &QdrantConfig,
783    method: reqwest::Method,
784    path: &str,
785) -> Result<reqwest::blocking::RequestBuilder, VectorLifecycleError> {
786    let base = qdrant
787        .url
788        .as_deref()
789        .ok_or(VectorLifecycleError::MissingQdrantConfig)?
790        .trim_end_matches('/');
791    let url = format!("{base}{path}");
792    let mut req = client.request(method, url);
793    if let Some(key) = &qdrant.api_key {
794        req = req.header("api-key", key);
795    }
796    Ok(req)
797}
798
799fn delete_qdrant_collection(
800    client: &reqwest::blocking::Client,
801    qdrant: &QdrantConfig,
802    collection: &str,
803) -> Result<bool, VectorLifecycleError> {
804    let resp = qdrant_request_for_config(
805        client,
806        qdrant,
807        reqwest::Method::DELETE,
808        &format!("/collections/{collection}"),
809    )?
810    .send()
811    .map_err(|err| VectorLifecycleError::QdrantOperation(err.to_string()))?;
812    let status = resp.status();
813    if status == StatusCode::NOT_FOUND {
814        return Ok(false);
815    }
816    if !status.is_success() {
817        return Err(qdrant_http_error("delete collection", status, resp));
818    }
819    Ok(true)
820}
821
822fn delete_vectors_for_filter(
823    client: &reqwest::blocking::Client,
824    qdrant: &QdrantConfig,
825    collection: &str,
826    project_id: &str,
827    file_path: Option<&str>,
828) -> Result<bool, VectorLifecycleError> {
829    let mut must = vec![json!({
830        "key": "project_id",
831        "match": {"value": project_id},
832    })];
833    if let Some(file_path) = file_path {
834        must.push(json!({
835            "key": "file_path",
836            "match": {"value": file_path},
837        }));
838    }
839    let body = json!({
840        "filter": {
841            "must": must,
842        },
843    });
844    let resp = qdrant_request_for_config(
845        client,
846        qdrant,
847        reqwest::Method::POST,
848        &format!("/collections/{collection}/points/delete"),
849    )?
850    .json(&body)
851    .send()
852    .map_err(|err| VectorLifecycleError::QdrantOperation(err.to_string()))?;
853    let status = resp.status();
854    if status == StatusCode::NOT_FOUND {
855        return Ok(false);
856    }
857    if !status.is_success() {
858        return Err(qdrant_http_error("delete points", status, resp));
859    }
860    Ok(true)
861}
862
863fn qdrant_http_error(
864    operation: &'static str,
865    status: StatusCode,
866    resp: reqwest::blocking::Response,
867) -> VectorLifecycleError {
868    VectorLifecycleError::QdrantHttp {
869        operation,
870        status: status.as_u16(),
871        body: resp.text().unwrap_or_default(),
872    }
873}
874
875pub fn search_code_symbols(
876    ctx: &Context,
877    request: &CodeSymbolVectorSearchRequest,
878) -> Vec<CodeSymbolVectorSearchHit> {
879    let qdrant_config = match &ctx.qdrant {
880        Some(c) => c,
881        None => return vec![],
882    };
883
884    let embedding_config = match &ctx.embedding {
885        Some(c) => c,
886        None => return vec![],
887    };
888
889    let embedding = match embed_query(embedding_config, &request.query) {
890        Some(e) => e,
891        None => return vec![],
892    };
893
894    let collection = collection_name(&request.collection_prefix, &request.project_id);
895    vector_search(qdrant_config, &collection, &embedding, request.limit)
896        .unwrap_or_default()
897        .into_iter()
898        .map(|(symbol_id, score)| CodeSymbolVectorSearchHit { symbol_id, score })
899        .collect()
900}
901
902pub fn semantic_search(ctx: &Context, query: &str, limit: usize) -> Vec<(String, f64)> {
903    if ctx.qdrant.is_none() {
904        return vec![];
905    }
906
907    let request = CodeSymbolVectorSearchRequest {
908        project_id: ctx.project_id.clone(),
909        query: query.to_string(),
910        limit,
911        collection_prefix: CODE_SYMBOL_COLLECTION_PREFIX.to_string(),
912    };
913
914    search_code_symbols(ctx, &request)
915        .into_iter()
916        .map(|hit| (hit.symbol_id, hit.score))
917        .collect()
918}
919
920#[cfg(test)]
921mod tests {
922    use super::*;
923    use crate::config::{CodeVectorSettings, QdrantConfig};
924    use crate::models::{SOURCE_SYSTEM_GCODE, Symbol};
925    use serde_json::{Value, json};
926    use std::io::{Read, Write};
927    use std::net::TcpListener;
928    use std::thread;
929
930    fn test_symbol(summary: Option<String>) -> Symbol {
931        Symbol {
932            id: "symbol-1".to_string(),
933            project_id: "project-1".to_string(),
934            file_path: "src/lib.rs".to_string(),
935            name: "run".to_string(),
936            qualified_name: "crate::run".to_string(),
937            kind: "function".to_string(),
938            language: "rust".to_string(),
939            byte_start: 10,
940            byte_end: 40,
941            line_start: 3,
942            line_end: 5,
943            signature: None,
944            docstring: None,
945            parent_symbol_id: None,
946            content_hash: "hash".to_string(),
947            summary,
948            created_at: String::new(),
949            updated_at: String::new(),
950        }
951    }
952
953    #[test]
954    fn payloads_carry_provenance_metadata() {
955        let payload = CodeSymbolVectorPayload::from_symbol(&test_symbol(Some("does work".into())));
956
957        assert_eq!(payload.provenance, ProjectionProvenance::Extracted);
958        assert_eq!(payload.confidence, Some(1.0));
959        assert_eq!(payload.source_system, SOURCE_SYSTEM_GCODE);
960        assert_eq!(payload.source_file_path, "src/lib.rs");
961        assert_eq!(payload.source_line_start, 3);
962        assert_eq!(payload.source_line_end, 5);
963        assert_eq!(payload.source_byte_start, 10);
964        assert_eq!(payload.source_byte_end, 40);
965        assert_eq!(payload.source_line, 3);
966        assert_eq!(payload.source_symbol_id, "symbol-1");
967        assert_eq!(payload.summary.as_deref(), Some("does work"));
968        assert_eq!(payload.signature, None);
969        assert_eq!(payload.docstring, None);
970
971        let value = serde_json::to_value(payload).expect("payload serializes");
972        assert_eq!(value["provenance"], "EXTRACTED");
973        assert_eq!(value["confidence"], 1.0);
974        assert_eq!(value["source_system"], SOURCE_SYSTEM_GCODE);
975        assert_eq!(value["source_file_path"], "src/lib.rs");
976        assert_eq!(value["source_line_start"], 3);
977        assert_eq!(value["source_line_end"], 5);
978        assert_eq!(value["source_byte_start"], 10);
979        assert_eq!(value["source_byte_end"], 40);
980        assert_eq!(value["source_symbol_id"], "symbol-1");
981    }
982
983    #[test]
984    fn summaries_are_optional_enrichment() {
985        let symbol = test_symbol(None);
986        let payload = CodeSymbolVectorPayload::from_symbol(&symbol);
987        let vector_text = vector_text_for_symbol(&symbol);
988        let value = serde_json::to_value(payload).expect("payload serializes");
989
990        assert!(value.get("summary").is_none());
991        assert!(vector_text.contains("name: run"));
992        assert!(!vector_text.contains("summary:"));
993    }
994
995    #[test]
996    fn collection_name_compatibility() {
997        assert_eq!(
998            collection_name(CODE_SYMBOL_COLLECTION_PREFIX, "project-1"),
999            "code_symbols_project-1"
1000        );
1001    }
1002
1003    #[test]
1004    fn delete_project_collection_targets_only_project_collection() {
1005        let (qdrant_url, handle) = spawn_http_responses(vec![(200, json!({"result": true}))]);
1006        let deleted = delete_project_collection(
1007            &QdrantConfig {
1008                url: Some(qdrant_url),
1009                api_key: Some("qdrant-key".to_string()),
1010            },
1011            "project-1",
1012        )
1013        .expect("delete collection");
1014        let requests = handle.join().expect("qdrant requests");
1015
1016        assert!(deleted);
1017        assert_eq!(requests.len(), 1);
1018        assert!(requests[0].contains("DELETE /collections/code_symbols_project-1 HTTP/1.1"));
1019        assert!(requests[0].contains("api-key: qdrant-key"));
1020        assert!(!requests[0].contains("project-2"));
1021    }
1022
1023    #[test]
1024    fn delete_file_vectors_filters_by_project_and_file_without_embedding() {
1025        let (qdrant_url, handle) =
1026            spawn_http_responses(vec![(200, json!({"result": {"operation_id": 1}}))]);
1027        let deleted = delete_file_vectors(
1028            &QdrantConfig {
1029                url: Some(qdrant_url),
1030                api_key: Some("qdrant-key".to_string()),
1031            },
1032            "project-1",
1033            "src/lib.rs",
1034        )
1035        .expect("delete vectors");
1036        let requests = handle.join().expect("qdrant requests");
1037
1038        assert!(deleted);
1039        assert_eq!(requests.len(), 1);
1040        assert!(
1041            requests[0].contains("POST /collections/code_symbols_project-1/points/delete HTTP/1.1")
1042        );
1043        assert!(requests[0].contains("api-key: qdrant-key"));
1044        assert!(requests[0].contains(r#""key":"project_id""#));
1045        assert!(requests[0].contains(r#""value":"project-1""#));
1046        assert!(requests[0].contains(r#""key":"file_path""#));
1047        assert!(requests[0].contains(r#""value":"src/lib.rs""#));
1048    }
1049
1050    #[test]
1051    fn clear_project_vectors_does_not_touch_memory_vector_collections() {
1052        let (qdrant_url, handle) = spawn_http_responses(vec![
1053            (
1054                200,
1055                json!({"result": {"config": {"params": {"vectors": {"size": 3, "distance": "Cosine"}}}}}),
1056            ),
1057            (200, json!({"result": {"operation_id": 1}})),
1058        ]);
1059        let mut lifecycle = CodeSymbolVectorLifecycle::new(
1060            "project-1".to_string(),
1061            QdrantConfig {
1062                url: Some(qdrant_url),
1063                api_key: None,
1064            },
1065            EmbeddingConfig {
1066                api_base: "http://127.0.0.1:9/v1".to_string(),
1067                model: "unused".to_string(),
1068                api_key: None,
1069            },
1070            CodeVectorSettings {
1071                vector_dim: Some(3),
1072            },
1073        )
1074        .expect("lifecycle");
1075
1076        let cleared = lifecycle.clear_project_vectors().expect("clear vectors");
1077        let requests = handle.join().expect("qdrant requests");
1078
1079        assert_eq!(cleared.vectors_deleted, 1);
1080        assert_eq!(requests.len(), 2);
1081        assert!(requests[0].contains("GET /collections/code_symbols_project-1 HTTP/1.1"));
1082        assert!(
1083            requests[1].contains("POST /collections/code_symbols_project-1/points/delete HTTP/1.1")
1084        );
1085        assert!(requests[1].contains(r#""key":"project_id""#));
1086        assert!(requests[1].contains(r#""value":"project-1""#));
1087        assert!(!requests[1].contains(r#""key":"file_path""#));
1088        assert!(requests.iter().all(|request| !request.contains("memory")));
1089        assert!(
1090            requests
1091                .iter()
1092                .all(|request| !request.contains("GET /collections HTTP/1.1"))
1093        );
1094        assert!(
1095            requests
1096                .iter()
1097                .all(|request| !request.contains("DELETE /collections/"))
1098        );
1099    }
1100
1101    #[test]
1102    fn delete_prefixed_collections_deletes_only_code_symbol_collections() {
1103        let (qdrant_url, handle) = spawn_http_responses(vec![
1104            (
1105                200,
1106                json!({
1107                    "result": {
1108                        "collections": [
1109                            {"name": "code_symbols_project-1"},
1110                            {"name": "memory_vectors"},
1111                            {"name": "code_symbols_project-2"}
1112                        ]
1113                    }
1114                }),
1115            ),
1116            (200, json!({"result": true})),
1117            (200, json!({"result": true})),
1118        ]);
1119        let deleted = delete_code_symbol_collections_with_prefix(&QdrantConfig {
1120            url: Some(qdrant_url),
1121            api_key: None,
1122        })
1123        .expect("delete prefixed collections");
1124        let requests = handle.join().expect("qdrant requests");
1125
1126        assert_eq!(
1127            deleted,
1128            vec![
1129                "code_symbols_project-1".to_string(),
1130                "code_symbols_project-2".to_string()
1131            ]
1132        );
1133        assert_eq!(requests.len(), 3);
1134        assert!(requests[0].contains("GET /collections HTTP/1.1"));
1135        assert!(requests[1].contains("DELETE /collections/code_symbols_project-1 HTTP/1.1"));
1136        assert!(requests[2].contains("DELETE /collections/code_symbols_project-2 HTTP/1.1"));
1137        assert!(
1138            requests
1139                .iter()
1140                .all(|request| !request.contains("DELETE /collections/memory_vectors"))
1141        );
1142    }
1143
1144    #[test]
1145    fn embedding_request_response() {
1146        let (base_url, handle) = spawn_http_responses(vec![(
1147            200,
1148            json!({"data": [{"embedding": [0.25, 0.5, 0.75]}]}),
1149        )]);
1150        let config = EmbeddingConfig {
1151            api_base: format!("{base_url}/v1"),
1152            model: "embed-small".to_string(),
1153            api_key: Some("embedding-key".to_string()),
1154        };
1155
1156        let embedding = embed_text(&config, "dimension_probe").expect("embedding response");
1157        let requests = handle.join().expect("server thread");
1158
1159        assert_eq!(embedding, vec![0.25, 0.5, 0.75]);
1160        assert_eq!(requests.len(), 1);
1161        assert!(requests[0].contains("POST /v1/embeddings HTTP/1.1"));
1162        assert!(requests[0].contains("authorization: Bearer embedding-key"));
1163        assert!(requests[0].contains(r#""model":"embed-small""#));
1164        assert!(requests[0].contains(r#""input":"dimension_probe""#));
1165    }
1166
1167    #[test]
1168    fn ensure_collection_resolves_vector_size_and_distance() {
1169        let (embedding_url, embedding_handle) = spawn_http_responses(vec![(
1170            200,
1171            json!({"data": [{"embedding": [0.1, 0.2, 0.3]}]}),
1172        )]);
1173        let (qdrant_url, qdrant_handle) = spawn_http_responses(vec![
1174            (404, json!({"status": "not found"})),
1175            (200, json!({"result": true})),
1176            (
1177                200,
1178                json!({"result": {"config": {"params": {"vectors": {"size": 3, "distance": "Cosine"}}}}}),
1179            ),
1180        ]);
1181        let mut lifecycle = CodeSymbolVectorLifecycle::new(
1182            "project-1".to_string(),
1183            QdrantConfig {
1184                url: Some(qdrant_url),
1185                api_key: None,
1186            },
1187            EmbeddingConfig {
1188                api_base: format!("{embedding_url}/v1"),
1189                model: "embed-small".to_string(),
1190                api_key: None,
1191            },
1192            CodeVectorSettings { vector_dim: None },
1193        )
1194        .expect("lifecycle");
1195
1196        let created = lifecycle.ensure_collection().expect("create collection");
1197        let reused = lifecycle.ensure_collection().expect("reuse collection");
1198        let embedding_requests = embedding_handle.join().expect("embedding requests");
1199        let qdrant_requests = qdrant_handle.join().expect("qdrant requests");
1200
1201        assert_eq!(created.size, 3);
1202        assert_eq!(created.distance, VECTOR_DISTANCE_COSINE);
1203        assert_eq!(reused.size, 3);
1204        assert_eq!(embedding_requests.len(), 1, "dimension probe is cached");
1205        assert!(qdrant_requests[1].contains("PUT /collections/code_symbols_project-1 HTTP/1.1"));
1206        assert!(qdrant_requests[1].contains(r#""size":3"#));
1207        assert!(qdrant_requests[1].contains(r#""distance":"Cosine""#));
1208
1209        let (explicit_qdrant_url, explicit_handle) = spawn_http_responses(vec![
1210            (404, json!({"status": "not found"})),
1211            (200, json!({"result": true})),
1212        ]);
1213        let mut explicit = CodeSymbolVectorLifecycle::new(
1214            "project-1".to_string(),
1215            QdrantConfig {
1216                url: Some(explicit_qdrant_url),
1217                api_key: None,
1218            },
1219            EmbeddingConfig {
1220                api_base: "http://127.0.0.1:9/v1".to_string(),
1221                model: "unused".to_string(),
1222                api_key: None,
1223            },
1224            CodeVectorSettings {
1225                vector_dim: Some(1536),
1226            },
1227        )
1228        .expect("lifecycle with explicit size");
1229
1230        let schema = explicit.ensure_collection().expect("explicit size create");
1231        let explicit_requests = explicit_handle.join().expect("explicit qdrant requests");
1232        assert_eq!(schema.size, 1536);
1233        assert!(explicit_requests[1].contains(r#""size":1536"#));
1234    }
1235
1236    #[test]
1237    fn lifecycle_http_scoped_to_module() {
1238        let manifest_dir = std::path::Path::new(env!("CARGO_MANIFEST_DIR"));
1239        let src_dir = manifest_dir.join("src");
1240        let mut offenders = Vec::new();
1241
1242        fn visit(path: &std::path::Path, offenders: &mut Vec<std::path::PathBuf>) {
1243            for entry in std::fs::read_dir(path).expect("read source directory") {
1244                let entry = entry.expect("source entry");
1245                let path = entry.path();
1246                if path.is_dir() {
1247                    visit(&path, offenders);
1248                    continue;
1249                }
1250                if path.extension().and_then(|ext| ext.to_str()) != Some("rs") {
1251                    continue;
1252                }
1253                let source = std::fs::read_to_string(&path).expect("read source file");
1254                let lifecycle_rest = [
1255                    "/points/delete",
1256                    "points/delete",
1257                    "collections/{collection}",
1258                    "/collections/{collection}",
1259                ];
1260                if lifecycle_rest.iter().any(|needle| source.contains(needle))
1261                    && !path.ends_with("vector/code_symbols.rs")
1262                {
1263                    offenders.push(path);
1264                }
1265            }
1266        }
1267
1268        visit(&src_dir, &mut offenders);
1269        assert!(
1270            offenders.is_empty(),
1271            "Qdrant lifecycle REST must stay scoped to vector/code_symbols.rs: {offenders:?}"
1272        );
1273    }
1274
1275    #[test]
1276    fn routes_through_gobby_core_qdrant() {
1277        let source = include_str!("code_symbols.rs");
1278        assert!(source.contains("gobby_core::config::resolve_qdrant_config"));
1279        assert!(source.contains("gobby_core::qdrant::with_qdrant"));
1280        assert!(source.contains("gobby_core::qdrant::collection_name"));
1281        assert!(source.contains("CollectionScope::Custom"));
1282        assert!(source.contains("gobby_core::qdrant::search"));
1283        assert!(source.contains("gobby_core::qdrant::upsert"));
1284    }
1285
1286    fn spawn_http_responses(
1287        responses: Vec<(u16, Value)>,
1288    ) -> (String, thread::JoinHandle<Vec<String>>) {
1289        let listener = TcpListener::bind("127.0.0.1:0").expect("bind test server");
1290        let addr = listener.local_addr().expect("local addr");
1291        let handle = thread::spawn(move || {
1292            let mut requests = Vec::new();
1293            for (status, body) in responses {
1294                let (mut stream, _) = listener.accept().expect("accept request");
1295                requests.push(read_http_request(&mut stream));
1296
1297                let body = body.to_string();
1298                write!(
1299                    stream,
1300                    "HTTP/1.1 {status} OK\r\nContent-Type: application/json\r\nContent-Length: {}\r\nConnection: close\r\n\r\n{body}",
1301                    body.len()
1302                )
1303                .expect("write response");
1304            }
1305            requests
1306        });
1307
1308        (format!("http://{addr}"), handle)
1309    }
1310
1311    fn read_http_request(stream: &mut impl Read) -> String {
1312        let mut request = Vec::new();
1313        let mut buffer = [0; 4096];
1314        let mut expected_len = None;
1315
1316        loop {
1317            let n = stream.read(&mut buffer).expect("read request");
1318            if n == 0 {
1319                break;
1320            }
1321            request.extend_from_slice(&buffer[..n]);
1322
1323            if expected_len.is_none()
1324                && let Some(header_end) =
1325                    request.windows(4).position(|window| window == b"\r\n\r\n")
1326            {
1327                let headers = String::from_utf8_lossy(&request[..header_end]);
1328                let content_len = headers
1329                    .lines()
1330                    .find_map(|line| {
1331                        line.to_ascii_lowercase()
1332                            .strip_prefix("content-length: ")
1333                            .and_then(|value| value.parse::<usize>().ok())
1334                    })
1335                    .unwrap_or(0);
1336                expected_len = Some(header_end + 4 + content_len);
1337            }
1338
1339            if let Some(expected_len) = expected_len
1340                && request.len() >= expected_len
1341            {
1342                break;
1343            }
1344        }
1345
1346        String::from_utf8_lossy(&request).into_owned()
1347    }
1348}