contextqa_core/entity.rs
1//! Entity model using SCIP-style human-readable, globally-unique symbol strings.
2//!
3//! Identity format: `scip:<manager>/<package>/<version>/<path>#<symbol>`
4//! Example: `scip:npm/my-app/1.0/src/auth/service.ts/AuthService#validateToken`
5//!
6//! This approach is drawn from Sourcegraph's SCIP protocol — because each document
7//! is self-contained with symbol strings rather than graph-local IDs, individual
8//! files can be re-indexed independently without global coordination.
9//! Reference: https://github.com/sourcegraph/scip
10
11use serde::{Deserialize, Serialize};
12
13/// SCIP-style globally unique identifier for any code entity.
14///
15/// Format: `scip:<manager>/<package>/<version>/<path>#<symbol>`
16/// The string encoding ensures files can be re-indexed independently.
17#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
18pub struct EntityId(pub String);
19
20impl EntityId {
21 /// Create a new SCIP-style entity ID.
22 ///
23 /// # Arguments
24 /// - `manager` - Package manager (e.g., "npm", "pip", "cargo", "local")
25 /// - `package` - Package name (e.g., "my-app")
26 /// - `version` - Version string (e.g., "1.0.0")
27 /// - `path` - File path relative to package root
28 /// - `symbol` - Symbol name within the file (function, class, etc.)
29 pub fn new(manager: &str, package: &str, version: &str, path: &str, symbol: &str) -> Self {
30 Self(format!(
31 "scip:{manager}/{package}/{version}/{path}#{symbol}"
32 ))
33 }
34
35 /// Create a simple local entity ID (for single-project analysis).
36 pub fn local(path: &str, symbol: &str) -> Self {
37 Self(format!("scip:local/project/0.0.0/{path}#{symbol}"))
38 }
39
40 /// Extract the file path component from the SCIP ID.
41 pub fn file_path(&self) -> Option<&str> {
42 let after_version = self.0.split('/').skip(3).collect::<Vec<_>>().join("/");
43 let path = after_version.split('#').next()?;
44 // Return from the original string to avoid allocation
45 let start = self.0.find(path)?;
46 let end = self.0.find('#').unwrap_or(self.0.len());
47 Some(&self.0[start..end])
48 }
49
50 /// Extract the symbol name from the SCIP ID.
51 pub fn symbol_name(&self) -> Option<&str> {
52 self.0.split('#').nth(1)
53 }
54
55 pub fn as_str(&self) -> &str {
56 &self.0
57 }
58}
59
60impl std::fmt::Display for EntityId {
61 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
62 write!(f, "{}", self.0)
63 }
64}
65
66/// The kind of code entity in the context graph.
67///
68/// This union-type approach (inspired by Glean's `code.Entity` sum type)
69/// provides a unified view across languages while carrying language-specific
70/// metadata in each variant.
71#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
72pub enum EntityKind {
73 /// A function or method definition
74 Function {
75 is_async: bool,
76 parameter_count: usize,
77 return_type: Option<String>,
78 },
79 /// An API endpoint (REST, GraphQL, gRPC)
80 ApiEndpoint {
81 method: String, // GET, POST, etc.
82 route: String, // /api/v1/users
83 handler: String, // function that handles the route
84 },
85 /// A data model / struct / class / table
86 DataModel { fields: Vec<String> },
87 /// A feature or capability (extracted from tickets/docs)
88 Feature {
89 description: String,
90 source: String, // "jira", "docs", etc.
91 },
92 /// A test case
93 TestCase {
94 test_type: TestType,
95 targets: Vec<EntityId>, // what entities this test covers
96 },
97 /// A requirement (from Jira, docs, specs)
98 Requirement {
99 ticket_id: Option<String>,
100 acceptance_criteria: Vec<String>,
101 },
102 /// A module or file-level entity
103 Module {
104 language: String,
105 exports: Vec<String>,
106 },
107}
108
109#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
110pub enum TestType {
111 Unit,
112 Integration,
113 E2E,
114 Property,
115}
116
117/// A node in the context graph — an entity with its identity and metadata.
118#[derive(Debug, Clone, Serialize, Deserialize)]
119pub struct UcmEntity {
120 /// SCIP-style globally unique identifier
121 pub id: EntityId,
122 /// What kind of entity this is
123 pub kind: EntityKind,
124 /// Human-readable name
125 pub name: String,
126 /// Source file path (relative to project root)
127 pub file_path: String,
128 /// Line range in source file [start, end]
129 pub line_range: Option<(usize, usize)>,
130 /// Programming language
131 pub language: String,
132 /// When this entity was first discovered
133 pub discovered_at: chrono::DateTime<chrono::Utc>,
134 /// Which ingestion source discovered it
135 pub discovery_source: DiscoverySource,
136}
137
138/// How an entity was discovered — determines base confidence and decay rate.
139#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
140pub enum DiscoverySource {
141 /// Extracted from source code via tree-sitter AST parsing
142 StaticAnalysis,
143 /// Inferred from git diff comparison
144 GitDiff,
145 /// Extracted from Jira/ticket system
146 TicketSystem,
147 /// Observed in API traffic logs
148 ApiTraffic,
149 /// Loaded from historical context snapshot
150 HistoricalContext,
151 /// Manually specified
152 Manual,
153}
154
155impl UcmEntity {
156 pub fn new(
157 id: EntityId,
158 kind: EntityKind,
159 name: impl Into<String>,
160 file_path: impl Into<String>,
161 language: impl Into<String>,
162 source: DiscoverySource,
163 ) -> Self {
164 Self {
165 id,
166 kind,
167 name: name.into(),
168 file_path: file_path.into(),
169 line_range: None,
170 language: language.into(),
171 discovered_at: chrono::Utc::now(),
172 discovery_source: source,
173 }
174 }
175
176 pub fn with_line_range(mut self, start: usize, end: usize) -> Self {
177 self.line_range = Some((start, end));
178 self
179 }
180}
181
182#[cfg(test)]
183mod tests {
184 use super::*;
185
186 #[test]
187 fn test_scip_id_construction() {
188 let id = EntityId::new(
189 "npm",
190 "my-app",
191 "1.0.0",
192 "src/auth/service.ts",
193 "validateToken",
194 );
195 assert_eq!(
196 id.as_str(),
197 "scip:npm/my-app/1.0.0/src/auth/service.ts#validateToken"
198 );
199 }
200
201 #[test]
202 fn test_scip_id_local() {
203 let id = EntityId::local("src/main.rs", "main");
204 assert!(id.as_str().contains("local/project"));
205 }
206
207 #[test]
208 fn test_entity_symbol_name() {
209 let id = EntityId::new(
210 "npm",
211 "my-app",
212 "1.0.0",
213 "src/auth/service.ts",
214 "validateToken",
215 );
216 assert_eq!(id.symbol_name(), Some("validateToken"));
217 }
218}