Skip to main content

contextqa_core/
entity.rs

1//! Entity model using SCIP-style human-readable, globally-unique symbol strings.
2//!
3//! Identity format: `scip:<manager>/<package>/<version>/<path>#<symbol>`
4//! Example: `scip:npm/my-app/1.0/src/auth/service.ts/AuthService#validateToken`
5//!
6//! This approach is drawn from Sourcegraph's SCIP protocol — because each document
7//! is self-contained with symbol strings rather than graph-local IDs, individual
8//! files can be re-indexed independently without global coordination.
9//! Reference: https://github.com/sourcegraph/scip
10
11use serde::{Deserialize, Serialize};
12
13/// SCIP-style globally unique identifier for any code entity.
14///
15/// Format: `scip:<manager>/<package>/<version>/<path>#<symbol>`
16/// The string encoding ensures files can be re-indexed independently.
17#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
18pub struct EntityId(pub String);
19
20impl EntityId {
21    /// Create a new SCIP-style entity ID.
22    ///
23    /// # Arguments
24    /// - `manager` - Package manager (e.g., "npm", "pip", "cargo", "local")
25    /// - `package` - Package name (e.g., "my-app")
26    /// - `version` - Version string (e.g., "1.0.0")
27    /// - `path` - File path relative to package root
28    /// - `symbol` - Symbol name within the file (function, class, etc.)
29    pub fn new(manager: &str, package: &str, version: &str, path: &str, symbol: &str) -> Self {
30        Self(format!(
31            "scip:{manager}/{package}/{version}/{path}#{symbol}"
32        ))
33    }
34
35    /// Create a simple local entity ID (for single-project analysis).
36    pub fn local(path: &str, symbol: &str) -> Self {
37        Self(format!("scip:local/project/0.0.0/{path}#{symbol}"))
38    }
39
40    /// Extract the file path component from the SCIP ID.
41    pub fn file_path(&self) -> Option<&str> {
42        let after_version = self.0.split('/').skip(3).collect::<Vec<_>>().join("/");
43        let path = after_version.split('#').next()?;
44        // Return from the original string to avoid allocation
45        let start = self.0.find(path)?;
46        let end = self.0.find('#').unwrap_or(self.0.len());
47        Some(&self.0[start..end])
48    }
49
50    /// Extract the symbol name from the SCIP ID.
51    pub fn symbol_name(&self) -> Option<&str> {
52        self.0.split('#').nth(1)
53    }
54
55    pub fn as_str(&self) -> &str {
56        &self.0
57    }
58}
59
60impl std::fmt::Display for EntityId {
61    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
62        write!(f, "{}", self.0)
63    }
64}
65
66/// The kind of code entity in the context graph.
67///
68/// This union-type approach (inspired by Glean's `code.Entity` sum type)
69/// provides a unified view across languages while carrying language-specific
70/// metadata in each variant.
71#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
72pub enum EntityKind {
73    /// A function or method definition
74    Function {
75        is_async: bool,
76        parameter_count: usize,
77        return_type: Option<String>,
78    },
79    /// An API endpoint (REST, GraphQL, gRPC)
80    ApiEndpoint {
81        method: String,  // GET, POST, etc.
82        route: String,   // /api/v1/users
83        handler: String, // function that handles the route
84    },
85    /// A data model / struct / class / table
86    DataModel { fields: Vec<String> },
87    /// A feature or capability (extracted from tickets/docs)
88    Feature {
89        description: String,
90        source: String, // "jira", "docs", etc.
91    },
92    /// A test case
93    TestCase {
94        test_type: TestType,
95        targets: Vec<EntityId>, // what entities this test covers
96    },
97    /// A requirement (from Jira, docs, specs)
98    Requirement {
99        ticket_id: Option<String>,
100        acceptance_criteria: Vec<String>,
101    },
102    /// A module or file-level entity
103    Module {
104        language: String,
105        exports: Vec<String>,
106    },
107}
108
109#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
110pub enum TestType {
111    Unit,
112    Integration,
113    E2E,
114    Property,
115}
116
117/// A node in the context graph — an entity with its identity and metadata.
118#[derive(Debug, Clone, Serialize, Deserialize)]
119pub struct UcmEntity {
120    /// SCIP-style globally unique identifier
121    pub id: EntityId,
122    /// What kind of entity this is
123    pub kind: EntityKind,
124    /// Human-readable name
125    pub name: String,
126    /// Source file path (relative to project root)
127    pub file_path: String,
128    /// Line range in source file [start, end]
129    pub line_range: Option<(usize, usize)>,
130    /// Programming language
131    pub language: String,
132    /// When this entity was first discovered
133    pub discovered_at: chrono::DateTime<chrono::Utc>,
134    /// Which ingestion source discovered it
135    pub discovery_source: DiscoverySource,
136}
137
138/// How an entity was discovered — determines base confidence and decay rate.
139#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
140pub enum DiscoverySource {
141    /// Extracted from source code via tree-sitter AST parsing
142    StaticAnalysis,
143    /// Inferred from git diff comparison
144    GitDiff,
145    /// Extracted from Jira/ticket system
146    TicketSystem,
147    /// Observed in API traffic logs
148    ApiTraffic,
149    /// Loaded from historical context snapshot
150    HistoricalContext,
151    /// Manually specified
152    Manual,
153}
154
155impl UcmEntity {
156    pub fn new(
157        id: EntityId,
158        kind: EntityKind,
159        name: impl Into<String>,
160        file_path: impl Into<String>,
161        language: impl Into<String>,
162        source: DiscoverySource,
163    ) -> Self {
164        Self {
165            id,
166            kind,
167            name: name.into(),
168            file_path: file_path.into(),
169            line_range: None,
170            language: language.into(),
171            discovered_at: chrono::Utc::now(),
172            discovery_source: source,
173        }
174    }
175
176    pub fn with_line_range(mut self, start: usize, end: usize) -> Self {
177        self.line_range = Some((start, end));
178        self
179    }
180}
181
182#[cfg(test)]
183mod tests {
184    use super::*;
185
186    #[test]
187    fn test_scip_id_construction() {
188        let id = EntityId::new(
189            "npm",
190            "my-app",
191            "1.0.0",
192            "src/auth/service.ts",
193            "validateToken",
194        );
195        assert_eq!(
196            id.as_str(),
197            "scip:npm/my-app/1.0.0/src/auth/service.ts#validateToken"
198        );
199    }
200
201    #[test]
202    fn test_scip_id_local() {
203        let id = EntityId::local("src/main.rs", "main");
204        assert!(id.as_str().contains("local/project"));
205    }
206
207    #[test]
208    fn test_entity_symbol_name() {
209        let id = EntityId::new(
210            "npm",
211            "my-app",
212            "1.0.0",
213            "src/auth/service.ts",
214            "validateToken",
215        );
216        assert_eq!(id.symbol_name(), Some("validateToken"));
217    }
218}