Skip to main content

gobby_code/
models.rs

1use postgres::Row;
2use serde::{Deserialize, Serialize};
3use uuid::Uuid;
4
5use crate::utils::i64_to_usize;
6
7/// Stable namespace for deterministic symbol UUIDs.
8/// Must match Python: uuid.UUID("c0de1de0-0000-4000-8000-000000000000")
9pub const CODE_INDEX_UUID_NAMESPACE: Uuid = Uuid::from_bytes([
10    0xc0, 0xde, 0x1d, 0xe0, 0x00, 0x00, 0x40, 0x00, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
11]);
12
13pub const SOURCE_SYSTEM_GCODE: &str = "gcode";
14
15/// Producer confidence classification for graph and vector projection facts.
16#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
17#[serde(rename_all = "SCREAMING_SNAKE_CASE")]
18pub enum ProjectionProvenance {
19    Extracted,
20    Inferred,
21    Ambiguous,
22}
23
24impl ProjectionProvenance {
25    pub fn from_wire_value(value: &str) -> Option<Self> {
26        match value {
27            "EXTRACTED" | "extracted" => Some(Self::Extracted),
28            "INFERRED" | "inferred" => Some(Self::Inferred),
29            "AMBIGUOUS" | "ambiguous" => Some(Self::Ambiguous),
30            _ => None,
31        }
32    }
33}
34
35/// Optional provenance attached to graph results and projection payloads.
36#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
37pub struct ProjectionMetadata {
38    pub provenance: ProjectionProvenance,
39    #[serde(skip_serializing_if = "Option::is_none")]
40    pub confidence: Option<f64>,
41    pub source_system: String,
42    #[serde(skip_serializing_if = "Option::is_none")]
43    pub source_file_path: Option<String>,
44    #[serde(skip_serializing_if = "Option::is_none")]
45    pub source_line: Option<usize>,
46    #[serde(skip_serializing_if = "Option::is_none")]
47    pub source_symbol_id: Option<String>,
48    #[serde(skip_serializing_if = "Option::is_none")]
49    pub matching_method: Option<String>,
50}
51
52impl ProjectionMetadata {
53    pub fn new(provenance: ProjectionProvenance, source_system: impl Into<String>) -> Self {
54        Self {
55            provenance,
56            confidence: None,
57            source_system: source_system.into(),
58            source_file_path: None,
59            source_line: None,
60            source_symbol_id: None,
61            matching_method: None,
62        }
63    }
64
65    pub fn gcode_extracted() -> Self {
66        Self::new(ProjectionProvenance::Extracted, SOURCE_SYSTEM_GCODE).with_confidence(Some(1.0))
67    }
68
69    pub fn inferred(source_system: impl Into<String>, confidence: Option<f64>) -> Self {
70        Self::new(ProjectionProvenance::Inferred, source_system).with_confidence(confidence)
71    }
72
73    pub fn ambiguous(source_system: impl Into<String>, confidence: Option<f64>) -> Self {
74        Self::new(ProjectionProvenance::Ambiguous, source_system).with_confidence(confidence)
75    }
76
77    pub fn with_confidence(mut self, confidence: Option<f64>) -> Self {
78        self.confidence = confidence;
79        self
80    }
81
82    pub fn with_source_file_path(mut self, file_path: impl Into<String>) -> Self {
83        self.source_file_path = Some(file_path.into());
84        self
85    }
86
87    pub fn with_source_line(mut self, line: usize) -> Self {
88        self.source_line = Some(line);
89        self
90    }
91
92    pub fn with_source_symbol_id(mut self, symbol_id: impl Into<String>) -> Self {
93        self.source_symbol_id = Some(symbol_id.into());
94        self
95    }
96
97    pub fn with_matching_method(mut self, matching_method: impl Into<String>) -> Self {
98        self.matching_method = Some(matching_method.into());
99        self
100    }
101
102    pub fn is_hypothesis(&self) -> bool {
103        matches!(
104            self.provenance,
105            ProjectionProvenance::Inferred | ProjectionProvenance::Ambiguous
106        )
107    }
108}
109
110/// A code symbol extracted from AST parsing.
111#[derive(Debug, Clone, Serialize, Deserialize)]
112pub struct Symbol {
113    pub id: String,
114    pub project_id: String,
115    pub file_path: String,
116    pub name: String,
117    pub qualified_name: String,
118    pub kind: String,
119    pub language: String,
120    pub byte_start: usize,
121    pub byte_end: usize,
122    pub line_start: usize,
123    pub line_end: usize,
124    #[serde(skip_serializing_if = "Option::is_none")]
125    pub signature: Option<String>,
126    #[serde(skip_serializing_if = "Option::is_none")]
127    pub docstring: Option<String>,
128    #[serde(skip_serializing_if = "Option::is_none")]
129    pub parent_symbol_id: Option<String>,
130    #[serde(default)]
131    pub content_hash: String,
132    #[serde(skip_serializing_if = "Option::is_none")]
133    pub summary: Option<String>,
134    #[serde(default)]
135    pub created_at: String,
136    #[serde(default)]
137    pub updated_at: String,
138}
139
140impl Symbol {
141    /// Generate deterministic UUID5 for a symbol.
142    /// Must produce identical IDs to Python Symbol.make_id().
143    pub fn make_id(
144        project_id: &str,
145        file_path: &str,
146        name: &str,
147        kind: &str,
148        byte_start: usize,
149    ) -> String {
150        let key = format!("{project_id}:{file_path}:{name}:{kind}:{byte_start}");
151        Uuid::new_v5(&CODE_INDEX_UUID_NAMESPACE, key.as_bytes()).to_string()
152    }
153
154    /// Read a Symbol from a PostgreSQL row.
155    ///
156    /// Callers should select via `crate::db::symbol_select_columns()` so integer
157    /// and timestamp fields are cast to stable Rust-readable types.
158    pub fn from_row(row: &Row) -> anyhow::Result<Self> {
159        Ok(Self {
160            id: row.try_get("id")?,
161            project_id: row.try_get("project_id")?,
162            file_path: row.try_get("file_path")?,
163            name: row.try_get("name")?,
164            qualified_name: row.try_get("qualified_name")?,
165            kind: row.try_get("kind")?,
166            language: row.try_get("language")?,
167            byte_start: i64_to_usize(row.try_get("byte_start")?, "byte_start")?,
168            byte_end: i64_to_usize(row.try_get("byte_end")?, "byte_end")?,
169            line_start: i64_to_usize(row.try_get("line_start")?, "line_start")?,
170            line_end: i64_to_usize(row.try_get("line_end")?, "line_end")?,
171            signature: row.try_get("signature")?,
172            docstring: row.try_get("docstring")?,
173            parent_symbol_id: row.try_get("parent_symbol_id")?,
174            content_hash: row
175                .try_get::<_, Option<String>>("content_hash")?
176                .unwrap_or_default(),
177            summary: row.try_get("summary")?,
178            created_at: row
179                .try_get::<_, Option<String>>("created_at")?
180                .unwrap_or_default(),
181            updated_at: row
182                .try_get::<_, Option<String>>("updated_at")?
183                .unwrap_or_default(),
184        })
185    }
186
187    /// Slim representation for outline output.
188    pub fn to_outline(&self) -> OutlineSymbol {
189        OutlineSymbol {
190            id: self.id.clone(),
191            name: self.name.clone(),
192            kind: self.kind.clone(),
193            line_start: self.line_start,
194            line_end: self.line_end,
195            signature: self.signature.clone(),
196        }
197    }
198
199    /// Brief dict-like representation for search results.
200    pub fn to_brief(&self) -> SearchResult {
201        SearchResult {
202            id: self.id.clone(),
203            name: self.name.clone(),
204            qualified_name: self.qualified_name.clone(),
205            kind: self.kind.clone(),
206            language: self.language.clone(),
207            file_path: self.file_path.clone(),
208            line_start: self.line_start,
209            line_end: self.line_end,
210            score: 0.0,
211            rrf_score: None,
212            summary: self.summary.clone(),
213            signature: self.signature.clone(),
214            sources: None,
215        }
216    }
217}
218
219pub fn make_unresolved_callee_id(project_id: &str, callee_name: &str) -> String {
220    let key = format!("unresolved:{project_id}:{callee_name}");
221    Uuid::new_v5(&CODE_INDEX_UUID_NAMESPACE, key.as_bytes()).to_string()
222}
223
224pub fn make_external_symbol_id(
225    project_id: &str,
226    callee_name: &str,
227    module: Option<&str>,
228) -> String {
229    let module_key = module.unwrap_or_default();
230    let key = format!("external:{project_id}:{module_key}:{callee_name}");
231    Uuid::new_v5(&CODE_INDEX_UUID_NAMESPACE, key.as_bytes()).to_string()
232}
233
234/// Metadata for an indexed file.
235#[derive(Debug, Clone, Serialize, Deserialize)]
236pub struct IndexedFile {
237    pub id: String,
238    pub project_id: String,
239    pub file_path: String,
240    pub language: String,
241    pub content_hash: String,
242    pub symbol_count: usize,
243    pub byte_size: usize,
244    pub indexed_at: String,
245}
246
247impl IndexedFile {
248    pub fn make_id(project_id: &str, file_path: &str) -> String {
249        let key = format!("{project_id}:{file_path}");
250        Uuid::new_v5(&CODE_INDEX_UUID_NAMESPACE, key.as_bytes()).to_string()
251    }
252}
253
254/// A chunk of file content for FTS search.
255#[derive(Debug, Clone, Serialize, Deserialize)]
256pub struct ContentChunk {
257    pub id: String,
258    pub project_id: String,
259    pub file_path: String,
260    pub chunk_index: usize,
261    pub line_start: usize,
262    pub line_end: usize,
263    pub content: String,
264    pub language: String,
265    pub created_at: String,
266}
267
268impl ContentChunk {
269    pub fn make_id(project_id: &str, file_path: &str, chunk_index: usize) -> String {
270        let key = format!("{project_id}:{file_path}:chunk:{chunk_index}");
271        Uuid::new_v5(&CODE_INDEX_UUID_NAMESPACE, key.as_bytes()).to_string()
272    }
273}
274
275/// Import relationship extracted from AST.
276#[derive(Debug, Clone)]
277pub struct ImportRelation {
278    pub file_path: String,
279    pub module_name: String,
280}
281
282/// Call relationship extracted from AST.
283#[derive(Debug, Clone, Copy, PartialEq, Eq)]
284pub enum CallTargetKind {
285    Symbol,
286    Unresolved,
287    External,
288}
289
290impl CallTargetKind {
291    pub fn as_str(self) -> &'static str {
292        match self {
293            Self::Symbol => "symbol",
294            Self::Unresolved => "unresolved",
295            Self::External => "external",
296        }
297    }
298}
299
300/// Call relationship extracted from AST.
301#[derive(Debug, Clone)]
302pub struct CallRelation {
303    pub caller_symbol_id: String,
304    pub callee_symbol_id: Option<String>,
305    pub callee_name: String,
306    pub callee_target_kind: CallTargetKind,
307    pub callee_external_module: Option<String>,
308    pub file_path: String,
309    pub line: usize,
310}
311
312impl CallRelation {
313    pub fn new(
314        caller_symbol_id: String,
315        callee_name: String,
316        file_path: String,
317        line: usize,
318    ) -> Self {
319        Self {
320            caller_symbol_id,
321            callee_symbol_id: None,
322            callee_name,
323            callee_target_kind: CallTargetKind::Unresolved,
324            callee_external_module: None,
325            file_path,
326            line,
327        }
328    }
329
330    pub fn with_symbol_target(mut self, callee_symbol_id: String) -> Self {
331        self.callee_symbol_id = Some(callee_symbol_id);
332        self.callee_target_kind = CallTargetKind::Symbol;
333        self
334    }
335
336    pub fn with_external_target(
337        mut self,
338        callee_name: String,
339        callee_external_module: String,
340    ) -> Self {
341        self.callee_name = callee_name;
342        self.callee_target_kind = CallTargetKind::External;
343        self.callee_external_module = Some(callee_external_module);
344        self
345    }
346}
347
348/// Project index statistics.
349#[derive(Debug, Clone, Serialize, Deserialize)]
350pub struct IndexedProject {
351    pub id: String,
352    pub root_path: String,
353    pub total_files: usize,
354    pub total_symbols: usize,
355    pub last_indexed_at: String,
356    pub index_duration_ms: u64,
357    #[serde(skip_serializing_if = "Option::is_none")]
358    pub total_eligible_files: Option<usize>,
359}
360
361/// Search result with score.
362#[derive(Debug, Clone, Serialize, Deserialize)]
363pub struct SearchResult {
364    pub id: String,
365    pub name: String,
366    pub qualified_name: String,
367    pub kind: String,
368    pub language: String,
369    pub file_path: String,
370    pub line_start: usize,
371    pub line_end: usize,
372    pub score: f64,
373    #[serde(skip_serializing_if = "Option::is_none")]
374    pub rrf_score: Option<f64>,
375    #[serde(skip_serializing_if = "Option::is_none")]
376    pub summary: Option<String>,
377    #[serde(skip_serializing_if = "Option::is_none")]
378    pub signature: Option<String>,
379    #[serde(skip_serializing_if = "Option::is_none")]
380    pub sources: Option<Vec<String>>,
381}
382
383/// Graph query result (callers, usages).
384#[derive(Debug, Clone, Serialize, Deserialize)]
385pub struct GraphResult {
386    pub id: String,
387    pub name: String,
388    pub file_path: String,
389    pub line: usize,
390    #[serde(skip_serializing_if = "Option::is_none")]
391    pub relation: Option<String>,
392    #[serde(skip_serializing_if = "Option::is_none")]
393    pub distance: Option<usize>,
394    #[serde(default, skip_serializing_if = "Option::is_none")]
395    pub metadata: Option<ProjectionMetadata>,
396}
397
398/// Result of parsing a single file.
399pub struct ParseResult {
400    pub symbols: Vec<Symbol>,
401    pub imports: Vec<ImportRelation>,
402    pub calls: Vec<CallRelation>,
403    /// Raw file bytes — carried through for body snippet extraction at embedding time.
404    pub source: Vec<u8>,
405}
406
407/// Aggregate result of indexing a directory.
408#[derive(Debug, Clone, Serialize, Deserialize)]
409pub struct IndexResult {
410    pub project_id: String,
411    pub files_indexed: usize,
412    pub files_skipped: usize,
413    pub symbols_found: usize,
414    pub errors: Vec<String>,
415    pub duration_ms: u64,
416}
417
418/// Paginated response envelope for JSON output.
419/// Hoists `project_id` to avoid repeating it on every result.
420#[derive(Debug, Clone, Serialize)]
421pub struct PagedResponse<T: Serialize> {
422    pub project_id: String,
423    pub total: usize,
424    pub offset: usize,
425    pub limit: usize,
426    pub results: Vec<T>,
427    #[serde(skip_serializing_if = "Option::is_none")]
428    pub hint: Option<String>,
429}
430
431/// Slim symbol for outline output — only what agents need.
432#[derive(Debug, Clone, Serialize)]
433pub struct OutlineSymbol {
434    pub id: String,
435    pub name: String,
436    pub kind: String,
437    pub line_start: usize,
438    pub line_end: usize,
439    #[serde(skip_serializing_if = "Option::is_none")]
440    pub signature: Option<String>,
441}
442
443/// Content search hit from FTS.
444#[derive(Debug, Clone, Serialize, Deserialize)]
445pub struct ContentSearchHit {
446    pub file_path: String,
447    pub line_start: usize,
448    pub line_end: usize,
449    pub snippet: String,
450    #[serde(skip_serializing_if = "Option::is_none")]
451    pub language: Option<String>,
452}
453
454#[cfg(test)]
455mod tests {
456    use super::*;
457
458    #[test]
459    fn symbol_make_id_matches_python_uuid5_golden_vectors() {
460        assert_eq!(
461            CODE_INDEX_UUID_NAMESPACE.to_string(),
462            "c0de1de0-0000-4000-8000-000000000000"
463        );
464
465        let cases = [
466            (
467                "proj1",
468                "src/main.py",
469                "foo",
470                "function",
471                42,
472                "403e2117-92e7-5390-ad83-226629486481",
473            ),
474            (
475                "3bf57fe7-2a0c-4074-8912-a83d9cd4df01",
476                "crates/gcode/src/models.rs",
477                "Symbol",
478                "struct",
479                111,
480                "d28e80d3-a95e-5c2a-91c3-92551f75a2b1",
481            ),
482            (
483                "proj-with-dashes",
484                "src/lib.rs",
485                "Widget::render",
486                "method",
487                0,
488                "44da4f31-7218-5b3b-97c4-5a5eca9f0451",
489            ),
490            (
491                "overlay:child",
492                "nested/path/file.ts",
493                "HTTPClient.new",
494                "class",
495                987654321,
496                "f9531553-f2a7-5425-b487-6fb5b31d57bb",
497            ),
498        ];
499
500        for (project_id, file_path, name, kind, byte_start, expected) in cases {
501            assert_eq!(
502                Symbol::make_id(project_id, file_path, name, kind, byte_start),
503                expected,
504                "Python UUID5 parity failed for {project_id}:{file_path}:{name}:{kind}:{byte_start}"
505            );
506        }
507    }
508
509    #[test]
510    fn unresolved_and_external_ids_match_python_uuid5_golden_vectors() {
511        assert_eq!(
512            make_unresolved_callee_id("proj1", "missing_func"),
513            "42693df1-99e6-5daa-be29-3535096cd2b5"
514        );
515        assert_eq!(
516            make_external_symbol_id("proj1", "get", Some("requests")),
517            "7c7e6ebe-47c6-5a3d-a83d-d5160f10cb74"
518        );
519        assert_eq!(
520            make_external_symbol_id("proj1", "println", None),
521            "c6b97498-448e-5ef1-9cb5-ab1cf37b6596"
522        );
523    }
524    #[test]
525    fn test_call_relation_promotes_symbol_targets() {
526        let call = CallRelation::new(
527            "caller-id".to_string(),
528            "foo".to_string(),
529            "src/main.py".to_string(),
530            12,
531        )
532        .with_symbol_target("callee-id".to_string());
533
534        assert_eq!(call.callee_symbol_id.as_deref(), Some("callee-id"));
535        assert_eq!(call.callee_target_kind, CallTargetKind::Symbol);
536    }
537
538    #[test]
539    fn graph_result_metadata_remains_optional_in_json_contract() {
540        let json = serde_json::json!({
541            "id": "sym-1",
542            "name": "foo",
543            "file_path": "src/main.rs",
544            "line": 10
545        });
546
547        let parsed: GraphResult =
548            serde_json::from_value(json).expect("graph result JSON parses without metadata");
549        assert!(parsed.metadata.is_none());
550
551        let serialized = serde_json::to_value(&parsed).expect("graph result serializes");
552        assert!(serialized.get("metadata").is_none());
553    }
554
555    #[test]
556    fn graph_result_without_metadata_omits_metadata_when_serialized() {
557        let strategy = (
558            proptest::string::string_regex("[ -~]{0,32}").expect("valid id regex"),
559            proptest::string::string_regex("[ -~]{0,32}").expect("valid name regex"),
560            proptest::string::string_regex("[ -~]{0,64}").expect("valid path regex"),
561            0usize..1_000_000,
562            proptest::option::of(
563                proptest::string::string_regex("[ -~]{0,32}").expect("valid relation regex"),
564            ),
565            proptest::option::of(0usize..1_000),
566        );
567
568        proptest::test_runner::TestRunner::default()
569            .run(
570                &strategy,
571                |(id, name, file_path, line, relation, distance)| {
572                    let result = GraphResult {
573                        id,
574                        name,
575                        file_path,
576                        line,
577                        relation,
578                        distance,
579                        metadata: None,
580                    };
581
582                    let serialized =
583                        serde_json::to_value(&result).expect("graph result serializes");
584                    assert_eq!(serialized.get("metadata"), None);
585
586                    Ok(())
587                },
588            )
589            .expect("metadata omission property holds");
590    }
591}