Skip to main content

codelens_engine/symbols/
types.rs

1use crate::db::IndexDb;
2use serde::{Deserialize, Serialize};
3
4/// Structural ownership category derived from file path.
5/// Used by the ranker to disambiguate same-name symbols across
6/// crate boundaries without hardcoding specific symbol names.
7#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
8#[serde(rename_all = "snake_case")]
9pub enum SymbolProvenance {
10    /// Core engine implementation (codelens-engine/src/)
11    #[default]
12    EngineCore,
13    /// MCP tool handler (codelens-mcp/src/tools/)
14    McpTool,
15    /// MCP dispatch/protocol layer (codelens-mcp/src/dispatch/, protocol.rs)
16    McpInfra,
17    /// TUI surface layer (codelens-tui/src/)
18    TuiSurface,
19    /// Test code (**/tests/, *_tests.rs)
20    Test,
21    /// Benchmark/script code (benchmarks/, scripts/)
22    Benchmark,
23}
24
25impl SymbolProvenance {
26    /// Derive provenance from a relative file path using package/path structure.
27    ///
28    /// Classification is based on stable path-role conventions rather than
29    /// repository-specific crate names:
30    /// - Test: `/tests/`, `_tests.rs`, `/integration_tests/`
31    /// - Benchmark: `benchmarks/`, `scripts/`, `models/`
32    /// - TuiSurface: `src/ui/`, `src/tui/`, `src/cli/`, `src/app/` or package
33    ///   names ending in `-ui`, `-tui`, `-cli`, `-app`
34    /// - McpTool: `src/tools/` directory (tool handler convention)
35    /// - McpInfra: `src/dispatch/`, `src/server/`, `src/runtime/`,
36    ///   `protocol.rs`, `transport.rs`, or package names ending in `-mcp`
37    /// - EngineCore: everything else (library source)
38    pub fn from_path(path: &str) -> Self {
39        let normalized = path.replace('\\', "/");
40
41        // Test detection (universal pattern)
42        if normalized.contains("/tests/")
43            || normalized.contains("/tests.")
44            || normalized.ends_with("_tests.rs")
45            || normalized.contains("/integration_tests/")
46            || normalized.contains("/test_helpers")
47        {
48            return Self::Test;
49        }
50        // Benchmark/scripts (universal pattern)
51        if normalized.starts_with("benchmarks/")
52            || normalized.starts_with("scripts/")
53            || normalized.starts_with("models/")
54        {
55            return Self::Benchmark;
56        }
57
58        let segments: Vec<&str> = normalized
59            .split('/')
60            .filter(|segment| !segment.is_empty())
61            .collect();
62        let src_idx = segments.iter().rposition(|segment| *segment == "src");
63        let package_name = src_idx
64            .and_then(|idx| idx.checked_sub(1))
65            .and_then(|idx| segments.get(idx))
66            .copied()
67            .unwrap_or_default();
68        let after_src = src_idx.map(|idx| &segments[idx + 1..]).unwrap_or(&[][..]);
69        let first_after_src = after_src.first().copied().unwrap_or_default();
70        let file_name = segments.last().copied().unwrap_or_default();
71
72        if first_after_src == "tools" {
73            return Self::McpTool;
74        }
75
76        if matches!(first_after_src, "ui" | "tui" | "cli" | "app")
77            || matches!(file_name, "ui.rs" | "tui.rs" | "cli.rs" | "app.rs")
78            || package_name.ends_with("-ui")
79            || package_name.ends_with("_ui")
80            || package_name.ends_with("-tui")
81            || package_name.ends_with("_tui")
82            || package_name.ends_with("-cli")
83            || package_name.ends_with("_cli")
84            || package_name.ends_with("-app")
85            || package_name.ends_with("_app")
86        {
87            return Self::TuiSurface;
88        }
89
90        if matches!(
91            first_after_src,
92            "dispatch" | "server" | "runtime" | "transport"
93        ) || matches!(file_name, "protocol.rs" | "transport.rs" | "runtime.rs")
94            || package_name.ends_with("-mcp")
95            || package_name.ends_with("_mcp")
96        {
97            return Self::McpInfra;
98        }
99
100        // Default: library/engine core
101        Self::EngineCore
102    }
103
104    /// Ranking penalty/boost for "implementation" queries.
105    /// Positive = prefer, negative = demote.
106    pub fn impl_query_prior(self) -> f64 {
107        match self {
108            Self::EngineCore => 6.0,
109            Self::McpTool => -4.0,
110            Self::McpInfra => -2.0,
111            Self::TuiSurface => -8.0,
112            Self::Test => -12.0,
113            Self::Benchmark => -14.0,
114        }
115    }
116}
117
118#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Hash)]
119#[serde(rename_all = "snake_case")]
120pub enum SymbolKind {
121    File,
122    Class,
123    Interface,
124    Enum,
125    Module,
126    Method,
127    Function,
128    Property,
129    Variable,
130    TypeAlias,
131    Unknown,
132}
133
134impl SymbolKind {
135    pub fn as_label(&self) -> &'static str {
136        match self {
137            SymbolKind::File => "file",
138            SymbolKind::Class => "class",
139            SymbolKind::Interface => "interface",
140            SymbolKind::Enum => "enum",
141            SymbolKind::Module => "module",
142            SymbolKind::Method => "method",
143            SymbolKind::Function => "function",
144            SymbolKind::Property => "property",
145            SymbolKind::Variable => "variable",
146            SymbolKind::TypeAlias => "type_alias",
147            SymbolKind::Unknown => "unknown",
148        }
149    }
150
151    pub fn from_str_label(s: &str) -> SymbolKind {
152        match s {
153            "class" => SymbolKind::Class,
154            "interface" => SymbolKind::Interface,
155            "enum" => SymbolKind::Enum,
156            "module" => SymbolKind::Module,
157            "method" => SymbolKind::Method,
158            "function" => SymbolKind::Function,
159            "property" => SymbolKind::Property,
160            "variable" => SymbolKind::Variable,
161            "type_alias" => SymbolKind::TypeAlias,
162            _ => SymbolKind::Unknown,
163        }
164    }
165}
166
167#[derive(Debug, Clone, Serialize)]
168pub struct SymbolInfo {
169    pub name: String,
170    pub kind: SymbolKind,
171    pub file_path: String,
172    pub line: usize,
173    pub column: usize,
174    pub signature: String,
175    pub name_path: String,
176    pub id: String,
177    /// Structural ownership derived from file path. Not stored in DB —
178    /// computed on construction. Used by ranker for disambiguation.
179    #[serde(skip)]
180    pub provenance: SymbolProvenance,
181    #[serde(skip_serializing_if = "Option::is_none")]
182    pub body: Option<String>,
183    #[serde(default, skip_serializing_if = "Vec::is_empty")]
184    pub children: Vec<SymbolInfo>,
185    /// Byte offsets for batch body extraction (not serialized to API output).
186    /// u32 saves 8 bytes per symbol vs usize; sufficient for files up to 4GB.
187    #[serde(skip)]
188    pub start_byte: u32,
189    #[serde(skip)]
190    pub end_byte: u32,
191}
192
193/// Construct a stable symbol ID: `{file_path}#{kind}:{name_path}`
194///
195/// Uses `String::with_capacity` to allocate the exact final size in
196/// one shot, avoiding the internal reallocation that `format!()` may
197/// do when it starts from an empty buffer and grows.
198pub fn make_symbol_id(file_path: &str, kind: &SymbolKind, name_path: &str) -> String {
199    let label = kind.as_label();
200    let mut id = String::with_capacity(file_path.len() + 1 + label.len() + 1 + name_path.len());
201    id.push_str(file_path);
202    id.push('#');
203    id.push_str(label);
204    id.push(':');
205    id.push_str(name_path);
206    id
207}
208
209/// Parse a stable symbol ID. Returns `(file_path, kind_label, name_path)` or `None`.
210pub fn parse_symbol_id(input: &str) -> Option<(&str, &str, &str)> {
211    let hash_pos = input.find('#')?;
212    let after_hash = &input[hash_pos + 1..];
213    let colon_pos = after_hash.find(':')?;
214    let file_path = &input[..hash_pos];
215    let kind = &after_hash[..colon_pos];
216    let name_path = &after_hash[colon_pos + 1..];
217    if file_path.is_empty() || kind.is_empty() || name_path.is_empty() {
218        return None;
219    }
220    Some((file_path, kind, name_path))
221}
222
223#[derive(Debug, Clone, Serialize)]
224pub struct IndexStats {
225    pub indexed_files: usize,
226    pub supported_files: usize,
227    pub stale_files: usize,
228}
229
230#[derive(Debug, Clone, Serialize)]
231pub struct RankedContextEntry {
232    pub name: String,
233    pub kind: String,
234    pub file: String,
235    pub line: usize,
236    pub signature: String,
237    #[serde(skip_serializing_if = "Option::is_none")]
238    pub body: Option<String>,
239    pub relevance_score: i32,
240}
241
242#[derive(Debug, Clone, Serialize)]
243pub struct RankedContextResult {
244    pub query: String,
245    pub symbols: Vec<RankedContextEntry>,
246    pub count: usize,
247    pub token_budget: usize,
248    pub chars_used: usize,
249}
250
251#[derive(Debug, Clone, Serialize, Deserialize)]
252pub(crate) struct ParsedSymbol {
253    pub name: String,
254    pub kind: SymbolKind,
255    pub file_path: String,
256    pub line: usize,
257    pub column: usize,
258    pub start_byte: u32,
259    pub end_byte: u32,
260    pub signature: String,
261    pub body: Option<String>,
262    pub name_path: String,
263    pub children: Vec<ParsedSymbol>,
264}
265
266/// Read-only DB access — either an owned read-only connection or a borrowed writer guard.
267pub(crate) enum ReadDb<'a> {
268    Owned(IndexDb),
269    Writer(std::sync::MutexGuard<'a, IndexDb>),
270}
271
272#[cfg(test)]
273mod tests {
274    use super::SymbolProvenance;
275
276    #[test]
277    fn provenance_detects_tool_handlers_by_src_role() {
278        assert_eq!(
279            SymbolProvenance::from_path("crates/agent-runtime/src/tools/symbols.rs"),
280            SymbolProvenance::McpTool
281        );
282    }
283
284    #[test]
285    fn provenance_detects_infra_by_package_or_runtime_path() {
286        assert_eq!(
287            SymbolProvenance::from_path("crates/agent-mcp/src/state.rs"),
288            SymbolProvenance::McpInfra
289        );
290        assert_eq!(
291            SymbolProvenance::from_path("workspace/runtime/src/dispatch/router.rs"),
292            SymbolProvenance::McpInfra
293        );
294    }
295
296    #[test]
297    fn provenance_detects_surface_by_package_or_surface_file() {
298        assert_eq!(
299            SymbolProvenance::from_path("crates/project-tui/src/app.rs"),
300            SymbolProvenance::TuiSurface
301        );
302        assert_eq!(
303            SymbolProvenance::from_path("packages/client-ui/src/lib.rs"),
304            SymbolProvenance::TuiSurface
305        );
306    }
307
308    #[test]
309    fn provenance_defaults_to_engine_core_for_plain_source() {
310        assert_eq!(
311            SymbolProvenance::from_path("crates/foo-core/src/lib.rs"),
312            SymbolProvenance::EngineCore
313        );
314        assert_eq!(
315            SymbolProvenance::from_path("src/service.py"),
316            SymbolProvenance::EngineCore
317        );
318    }
319}
320
321/// Intermediate result of analyzing a single file.
322/// Decouples parse phase from DB write phase, enabling:
323/// - Parallel parse (rayon) → sequential DB commit
324/// - Failure tracking without losing previously indexed data
325/// - Future: async pipeline stages
326pub(crate) struct AnalyzedFile {
327    pub relative_path: String,
328    pub mtime: i64,
329    pub content_hash: String,
330    pub size_bytes: i64,
331    pub language_ext: String,
332    pub symbols: Vec<ParsedSymbol>,
333    pub imports: Vec<crate::db::NewImport>,
334    pub calls: Vec<crate::db::NewCall>,
335}
336
337impl std::ops::Deref for ReadDb<'_> {
338    type Target = IndexDb;
339    fn deref(&self) -> &IndexDb {
340        match self {
341            ReadDb::Owned(db) => db,
342            ReadDb::Writer(guard) => guard,
343        }
344    }
345}