Skip to main content

codelens_engine/symbols/
types.rs

1use crate::db::IndexDb;
2use serde::{Deserialize, Serialize};
3
4/// Structural ownership category derived from file path.
5/// Used by the ranker to disambiguate same-name symbols across
6/// crate boundaries without hardcoding specific symbol names.
7#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
8#[serde(rename_all = "snake_case")]
9pub enum SymbolProvenance {
10    /// Core engine implementation (codelens-engine/src/)
11    #[default]
12    EngineCore,
13    /// MCP tool handler (codelens-mcp/src/tools/)
14    McpTool,
15    /// MCP dispatch/protocol layer (codelens-mcp/src/dispatch/, protocol.rs)
16    McpInfra,
17    /// TUI surface layer (codelens-tui/src/)
18    TuiSurface,
19    /// Test code (**/tests/, *_tests.rs)
20    Test,
21    /// Benchmark/script code (benchmarks/, scripts/)
22    Benchmark,
23}
24
25impl SymbolProvenance {
26    /// Derive provenance from a relative file path using package/path structure.
27    ///
28    /// Classification is based on stable path-role conventions rather than
29    /// repository-specific crate names:
30    /// - Test: `/tests/`, `_tests.rs`, `/integration_tests/`
31    /// - Benchmark: `benchmarks/`, `scripts/`, `models/`
32    /// - TuiSurface: `src/ui/`, `src/tui/`, `src/cli/`, `src/app/` or package
33    ///   names ending in `-ui`, `-tui`, `-cli`, `-app`
34    /// - McpTool: `src/tools/` directory (tool handler convention)
35    /// - McpInfra: `src/dispatch/`, `src/server/`, `src/runtime/`,
36    ///   `protocol.rs`, `transport.rs`, or package names ending in `-mcp`
37    /// - EngineCore: everything else (library source)
38    pub fn from_path(path: &str) -> Self {
39        let normalized = path.replace('\\', "/");
40
41        // Test detection (universal pattern)
42        if normalized.contains("/tests/")
43            || normalized.contains("/tests.")
44            || normalized.ends_with("_tests.rs")
45            || normalized.contains("/integration_tests/")
46            || normalized.contains("/test_helpers")
47        {
48            return Self::Test;
49        }
50        // Benchmark/scripts (universal pattern)
51        if normalized.starts_with("benchmarks/")
52            || normalized.starts_with("scripts/")
53            || normalized.starts_with("models/")
54        {
55            return Self::Benchmark;
56        }
57
58        let segments: Vec<&str> = normalized
59            .split('/')
60            .filter(|segment| !segment.is_empty())
61            .collect();
62        let src_idx = segments.iter().rposition(|segment| *segment == "src");
63        let package_name = src_idx
64            .and_then(|idx| idx.checked_sub(1))
65            .and_then(|idx| segments.get(idx))
66            .copied()
67            .unwrap_or_default();
68        let after_src = src_idx.map(|idx| &segments[idx + 1..]).unwrap_or(&[][..]);
69        let first_after_src = after_src.first().copied().unwrap_or_default();
70        let file_name = segments.last().copied().unwrap_or_default();
71
72        if first_after_src == "tools" {
73            return Self::McpTool;
74        }
75
76        if matches!(first_after_src, "ui" | "tui" | "cli" | "app")
77            || matches!(file_name, "ui.rs" | "tui.rs" | "cli.rs" | "app.rs")
78            || package_name.ends_with("-ui")
79            || package_name.ends_with("_ui")
80            || package_name.ends_with("-tui")
81            || package_name.ends_with("_tui")
82            || package_name.ends_with("-cli")
83            || package_name.ends_with("_cli")
84            || package_name.ends_with("-app")
85            || package_name.ends_with("_app")
86        {
87            return Self::TuiSurface;
88        }
89
90        if matches!(
91            first_after_src,
92            "dispatch" | "server" | "runtime" | "transport"
93        ) || matches!(file_name, "protocol.rs" | "transport.rs" | "runtime.rs")
94            || package_name.ends_with("-mcp")
95            || package_name.ends_with("_mcp")
96        {
97            return Self::McpInfra;
98        }
99
100        // Default: library/engine core
101        Self::EngineCore
102    }
103
104    /// Ranking penalty/boost for "implementation" queries.
105    /// Positive = prefer, negative = demote.
106    pub fn impl_query_prior(self) -> f64 {
107        match self {
108            Self::EngineCore => 6.0,
109            Self::McpTool => -4.0,
110            Self::McpInfra => -2.0,
111            Self::TuiSurface => -8.0,
112            Self::Test => -12.0,
113            Self::Benchmark => -14.0,
114        }
115    }
116}
117
118#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Hash)]
119#[serde(rename_all = "snake_case")]
120pub enum SymbolKind {
121    File,
122    Class,
123    Interface,
124    Enum,
125    Module,
126    Method,
127    Function,
128    Property,
129    Variable,
130    TypeAlias,
131    Unknown,
132}
133
134impl SymbolKind {
135    pub fn as_label(&self) -> &'static str {
136        match self {
137            SymbolKind::File => "file",
138            SymbolKind::Class => "class",
139            SymbolKind::Interface => "interface",
140            SymbolKind::Enum => "enum",
141            SymbolKind::Module => "module",
142            SymbolKind::Method => "method",
143            SymbolKind::Function => "function",
144            SymbolKind::Property => "property",
145            SymbolKind::Variable => "variable",
146            SymbolKind::TypeAlias => "type_alias",
147            SymbolKind::Unknown => "unknown",
148        }
149    }
150
151    pub fn from_str_label(s: &str) -> SymbolKind {
152        match s {
153            "class" => SymbolKind::Class,
154            "interface" => SymbolKind::Interface,
155            "enum" => SymbolKind::Enum,
156            "module" => SymbolKind::Module,
157            "method" => SymbolKind::Method,
158            "function" => SymbolKind::Function,
159            "property" => SymbolKind::Property,
160            "variable" => SymbolKind::Variable,
161            "type_alias" => SymbolKind::TypeAlias,
162            _ => SymbolKind::Unknown,
163        }
164    }
165}
166
167#[derive(Debug, Clone, Serialize)]
168pub struct SymbolInfo {
169    pub name: String,
170    pub kind: SymbolKind,
171    pub file_path: String,
172    pub line: usize,
173    pub column: usize,
174    pub signature: String,
175    pub name_path: String,
176    pub id: String,
177    /// Structural ownership derived from file path. Not stored in DB —
178    /// computed on construction. Used by ranker for disambiguation.
179    #[serde(skip)]
180    pub provenance: SymbolProvenance,
181    #[serde(skip_serializing_if = "Option::is_none")]
182    pub body: Option<String>,
183    #[serde(default, skip_serializing_if = "Vec::is_empty")]
184    pub children: Vec<SymbolInfo>,
185    /// Byte offsets for batch body extraction (not serialized to API output).
186    /// u32 saves 8 bytes per symbol vs usize; sufficient for files up to 4GB.
187    #[serde(skip)]
188    pub start_byte: u32,
189    #[serde(skip)]
190    pub end_byte: u32,
191    /// Inclusive end line of the symbol's span. Populated from
192    /// tree-sitter's `Node::end_position().row` at extraction time.
193    /// Defaults to `line` when the span is unknown (e.g. symbols
194    /// materialised from DB rows that predate the `end_line` column
195    /// or tests that build `SymbolInfo` by hand). Used by the P1-4
196    /// per-symbol LSP boost to judge whether a ref line plausibly
197    /// lives *inside* the symbol's body rather than just near its
198    /// declaration.
199    #[serde(skip)]
200    pub end_line: usize,
201}
202
203/// Construct a stable symbol ID: `{file_path}#{kind}:{name_path}`
204///
205/// Uses `String::with_capacity` to allocate the exact final size in
206/// one shot, avoiding the internal reallocation that `format!()` may
207/// do when it starts from an empty buffer and grows.
208pub fn make_symbol_id(file_path: &str, kind: &SymbolKind, name_path: &str) -> String {
209    let label = kind.as_label();
210    let mut id = String::with_capacity(file_path.len() + 1 + label.len() + 1 + name_path.len());
211    id.push_str(file_path);
212    id.push('#');
213    id.push_str(label);
214    id.push(':');
215    id.push_str(name_path);
216    id
217}
218
219/// Parse a stable symbol ID. Returns `(file_path, kind_label, name_path)` or `None`.
220pub fn parse_symbol_id(input: &str) -> Option<(&str, &str, &str)> {
221    let hash_pos = input.find('#')?;
222    let after_hash = &input[hash_pos + 1..];
223    let colon_pos = after_hash.find(':')?;
224    let file_path = &input[..hash_pos];
225    let kind = &after_hash[..colon_pos];
226    let name_path = &after_hash[colon_pos + 1..];
227    if file_path.is_empty() || kind.is_empty() || name_path.is_empty() {
228        return None;
229    }
230    Some((file_path, kind, name_path))
231}
232
233#[derive(Debug, Clone, Serialize)]
234pub struct IndexStats {
235    pub indexed_files: usize,
236    pub supported_files: usize,
237    pub stale_files: usize,
238}
239
240#[derive(Debug, Clone, Serialize)]
241pub struct RankedContextEntry {
242    pub name: String,
243    pub kind: String,
244    pub file: String,
245    pub line: usize,
246    pub signature: String,
247    #[serde(skip_serializing_if = "Option::is_none")]
248    pub body: Option<String>,
249    pub relevance_score: i32,
250}
251
252#[derive(Debug, Clone, Serialize)]
253pub struct RankedContextResult {
254    pub query: String,
255    pub symbols: Vec<RankedContextEntry>,
256    pub count: usize,
257    pub token_budget: usize,
258    pub chars_used: usize,
259    /// Number of candidate symbols dropped by `prune_to_budget`.
260    /// 0 when every candidate fit in the budget.
261    pub pruned_count: usize,
262    /// Relevance score of the lowest-ranked kept entry.
263    /// Agents can use this to tell "we almost lost relevant context"
264    /// from "only junk got dropped".
265    pub last_kept_score: f64,
266}
267
268#[derive(Debug, Clone, Serialize, Deserialize)]
269pub(crate) struct ParsedSymbol {
270    pub name: String,
271    pub kind: SymbolKind,
272    pub file_path: String,
273    pub line: usize,
274    pub column: usize,
275    pub start_byte: u32,
276    pub end_byte: u32,
277    /// Inclusive end line of the symbol's span. Populated from
278    /// tree-sitter's `Node::end_position().row` at parse time.
279    /// Symbols materialised from DB rows (which pre-date the column)
280    /// fall back to `line` so downstream `end_line >= line` invariants
281    /// hold.
282    pub end_line: usize,
283    pub signature: String,
284    pub body: Option<String>,
285    pub name_path: String,
286    pub children: Vec<ParsedSymbol>,
287}
288
289/// Read-only DB access — either an owned read-only connection or a borrowed writer guard.
290pub(crate) enum ReadDb<'a> {
291    Owned(IndexDb),
292    Writer(std::sync::MutexGuard<'a, IndexDb>),
293}
294
295#[cfg(test)]
296mod tests {
297    use super::SymbolProvenance;
298
299    #[test]
300    fn provenance_detects_tool_handlers_by_src_role() {
301        assert_eq!(
302            SymbolProvenance::from_path("crates/agent-runtime/src/tools/symbols.rs"),
303            SymbolProvenance::McpTool
304        );
305    }
306
307    #[test]
308    fn provenance_detects_infra_by_package_or_runtime_path() {
309        assert_eq!(
310            SymbolProvenance::from_path("crates/agent-mcp/src/state.rs"),
311            SymbolProvenance::McpInfra
312        );
313        assert_eq!(
314            SymbolProvenance::from_path("workspace/runtime/src/dispatch/router.rs"),
315            SymbolProvenance::McpInfra
316        );
317    }
318
319    #[test]
320    fn provenance_detects_surface_by_package_or_surface_file() {
321        assert_eq!(
322            SymbolProvenance::from_path("crates/project-tui/src/app.rs"),
323            SymbolProvenance::TuiSurface
324        );
325        assert_eq!(
326            SymbolProvenance::from_path("packages/client-ui/src/lib.rs"),
327            SymbolProvenance::TuiSurface
328        );
329    }
330
331    #[test]
332    fn provenance_defaults_to_engine_core_for_plain_source() {
333        assert_eq!(
334            SymbolProvenance::from_path("crates/foo-core/src/lib.rs"),
335            SymbolProvenance::EngineCore
336        );
337        assert_eq!(
338            SymbolProvenance::from_path("src/service.py"),
339            SymbolProvenance::EngineCore
340        );
341    }
342}
343
344/// Intermediate result of analyzing a single file.
345/// Decouples parse phase from DB write phase, enabling:
346/// - Parallel parse (rayon) → sequential DB commit
347/// - Failure tracking without losing previously indexed data
348/// - Future: async pipeline stages
349pub(crate) struct AnalyzedFile {
350    pub relative_path: String,
351    pub mtime: i64,
352    pub content_hash: String,
353    pub size_bytes: i64,
354    pub language_ext: String,
355    pub symbols: Vec<ParsedSymbol>,
356    pub imports: Vec<crate::db::NewImport>,
357    pub calls: Vec<crate::db::NewCall>,
358}
359
360impl std::ops::Deref for ReadDb<'_> {
361    type Target = IndexDb;
362    fn deref(&self) -> &IndexDb {
363        match self {
364            ReadDb::Owned(db) => db,
365            ReadDb::Writer(guard) => guard,
366        }
367    }
368}