codelens_engine/ir.rs
1//! Canonical semantic IR types for downstream consumers.
2//!
3//! This module provides a unified set of types that represent the semantic
4//! structure of a codebase — relationships between symbols, call graph edges,
5//! impact analysis nodes, and structured edit plans.
6//!
7//! # Re-exports
8//!
9//! Core types from other engine modules are re-exported here so that consumers
10//! can import everything from a single location:
11//!
12//! ```rust
13//! use codelens_engine::ir::{
14//! SymbolInfo, Relation, ImpactNode, EditPlan,
15//! SearchCandidate, IntelligenceSource, CodeDiagnostic,
16//! };
17//! ```
18
19use serde::Serialize;
20
21// Re-exports of existing types from other engine modules.
22pub use crate::circular::CircularDependency;
23pub use crate::git::ChangedFile;
24pub use crate::lsp::types::LspDiagnostic;
25pub use crate::rename::RenameEdit;
26pub use crate::search::SearchResult;
27pub use crate::symbols::{RankedContextEntry, SymbolInfo, SymbolKind};
28
29// ---------------------------------------------------------------------------
30// Relation graph types
31// ---------------------------------------------------------------------------
32
33/// A directed relationship between two symbols or files.
34#[derive(Debug, Clone, Serialize)]
35pub struct Relation {
36 /// Source symbol ID or file path.
37 pub source: String,
38 /// Target symbol ID or file path.
39 pub target: String,
40 pub kind: RelationKind,
41 /// File where the relation was observed, if applicable.
42 pub file_path: Option<String>,
43 /// Line number where the relation was observed, if applicable.
44 pub line: Option<usize>,
45}
46
47/// The kind of directed relationship between two symbols or files.
48#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize)]
49pub enum RelationKind {
50 /// Function calls function.
51 Calls,
52 /// Reverse of `Calls`.
53 CalledBy,
54 /// File imports file.
55 Imports,
56 /// Reverse of `Imports`.
57 ImportedBy,
58 /// Class extends class.
59 Inherits,
60 /// Class implements interface.
61 Implements,
62 /// Symbol references symbol.
63 References,
64 /// File or module contains symbol.
65 Contains,
66}
67
68// ---------------------------------------------------------------------------
69// Call graph edge
70// ---------------------------------------------------------------------------
71
72/// A call graph edge with optional metadata.
73///
74/// Note: the engine's lower-level [`crate::call_graph::CallEdge`] carries
75/// confidence and resolution strategy fields. This IR type is the
76/// schema-facing, minimal form used in output payloads.
77#[derive(Debug, Clone, Serialize)]
78pub struct IrCallEdge {
79 /// Caller symbol name or ID.
80 pub caller: String,
81 /// Callee symbol name or ID.
82 pub callee: String,
83 pub caller_file: String,
84 pub callee_file: Option<String>,
85 pub line: usize,
86}
87
88// ---------------------------------------------------------------------------
89// Impact analysis graph
90// ---------------------------------------------------------------------------
91
92/// A node in an impact analysis graph.
93#[derive(Debug, Clone, Serialize)]
94pub struct ImpactNode {
95 pub file_path: String,
96 /// Symbol name within the file, if the node represents a symbol.
97 pub symbol: Option<String>,
98 /// Distance from the change origin (0 = directly changed).
99 pub depth: usize,
100 pub impact_kind: ImpactKind,
101 /// Count of symbols affected within this file.
102 pub affected_symbols: usize,
103}
104
105/// How a file or symbol is affected by a change.
106#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize)]
107pub enum ImpactKind {
108 /// Directly changed.
109 Direct,
110 /// Calls something that changed.
111 Caller,
112 /// Imports something that changed.
113 Importer,
114 /// Inherits or implements something that changed.
115 TypeChild,
116 /// Indirectly affected (transitive dependency).
117 Transitive,
118}
119
120// ---------------------------------------------------------------------------
121// Structured edit plan
122// ---------------------------------------------------------------------------
123
124/// A structured edit plan for multi-file changes.
125#[derive(Debug, Clone, Serialize)]
126pub struct EditPlan {
127 pub description: String,
128 pub edits: Vec<EditAction>,
129}
130
131/// A single edit action within an [`EditPlan`].
132#[derive(Debug, Clone, Serialize)]
133pub struct EditAction {
134 pub file_path: String,
135 pub kind: EditActionKind,
136 /// Target line for `Insert` and `Replace` actions.
137 pub line: Option<usize>,
138 /// Original text to replace (used for `Replace` and `Delete`).
139 pub old_text: Option<String>,
140 /// Replacement or inserted text.
141 pub new_text: String,
142}
143
144/// The kind of edit performed by an [`EditAction`].
145#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize)]
146pub enum EditActionKind {
147 Insert,
148 Replace,
149 Delete,
150 /// Create a new file.
151 Create,
152}
153
154// ---------------------------------------------------------------------------
155// Retrieval pipeline types
156// ---------------------------------------------------------------------------
157
158/// Describes a stage in the retrieval pipeline.
159///
160/// The full pipeline is: `Lexical → SymbolScore → DenseRetrieval → Rerank → GraphExpand`
161///
162/// Each stage can be enabled/disabled and contributes a weighted score.
163#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize)]
164pub enum RetrievalStage {
165 /// FTS5 / BM25 corpus search — file-level pre-filtering.
166 Lexical,
167 /// Symbol name/signature scoring — AST-aware matching.
168 SymbolScore,
169 /// Embedding-based dense retrieval — semantic similarity.
170 DenseRetrieval,
171 /// Multi-signal blending — text + pagerank + recency + semantic.
172 Rerank,
173 /// Graph expansion — callers, importers, type hierarchy of top results.
174 GraphExpand,
175}
176
177/// Configuration for a retrieval pipeline run.
178#[derive(Debug, Clone, Serialize)]
179pub struct RetrievalConfig {
180 /// Which stages are enabled.
181 pub stages: Vec<RetrievalStage>,
182 /// Maximum results to return.
183 pub max_results: usize,
184 /// Token budget for response.
185 pub token_budget: usize,
186 /// Whether to include symbol bodies.
187 pub include_body: bool,
188 /// Weight overrides per stage (default: equal weighting).
189 pub weights: RetrievalWeights,
190}
191
192/// Weights for each retrieval signal in the rerank stage.
193#[derive(Debug, Clone, Serialize)]
194pub struct RetrievalWeights {
195 pub text: f64,
196 pub pagerank: f64,
197 pub recency: f64,
198 pub semantic: f64,
199}
200
201impl Default for RetrievalWeights {
202 fn default() -> Self {
203 Self {
204 text: 0.40,
205 pagerank: 0.20,
206 recency: 0.10,
207 semantic: 0.30,
208 }
209 }
210}
211
212// ---------------------------------------------------------------------------
213// Intelligence source (fast / precise path)
214// ---------------------------------------------------------------------------
215
216/// The backend that produced a result.
217///
218/// Consumers use this to judge confidence: `TreeSitter` results are fast but
219/// approximate; `Lsp` / `Scip` results are precise but require optional backends.
220/// `Semantic` results come from the embedding model.
221#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize)]
222pub enum IntelligenceSource {
223 /// tree-sitter AST parse — always available, fast path.
224 TreeSitter,
225 /// LSP backend (opt-in) — precise type-aware results.
226 Lsp,
227 /// SCIP index import — precise, offline.
228 Scip,
229 /// Embedding-based semantic search.
230 Semantic,
231 /// Hybrid: multiple sources combined.
232 Hybrid,
233}
234
235// ---------------------------------------------------------------------------
236// Unified search candidate
237// ---------------------------------------------------------------------------
238
239/// A search result from any retrieval path. This is the substrate type that
240/// downstream consumers (MCP response builders, workflow tools) should target.
241///
242/// Existing types (`SearchResult`, `ScoredChunk`, `RankedContextEntry`) are
243/// gradually converging toward this shape. New code should prefer
244/// `SearchCandidate` and convert from legacy types via `From` impls.
245#[derive(Debug, Clone, Serialize)]
246pub struct SearchCandidate {
247 pub name: String,
248 pub kind: String,
249 pub file_path: String,
250 pub line: usize,
251 /// Inclusive end line for the underlying definition occurrence, when the
252 /// backend has it. Populated by the SCIP backend from the occurrence
253 /// range so `tools/symbols/handlers.rs` can slice the body exactly
254 /// instead of falling back to the 50-line heuristic. Other backends
255 /// (tree-sitter, embedding) leave this `None`.
256 #[serde(skip_serializing_if = "Option::is_none")]
257 pub end_line: Option<usize>,
258 pub signature: String,
259 #[serde(skip_serializing_if = "Option::is_none")]
260 pub name_path: Option<String>,
261 #[serde(skip_serializing_if = "Option::is_none")]
262 pub body: Option<String>,
263 pub score: f64,
264 pub source: IntelligenceSource,
265}
266
267impl From<crate::search::SearchResult> for SearchCandidate {
268 fn from(r: crate::search::SearchResult) -> Self {
269 Self {
270 name: r.name,
271 kind: r.kind,
272 file_path: r.file,
273 line: r.line,
274 end_line: None,
275 signature: r.signature,
276 name_path: Some(r.name_path),
277 body: None,
278 score: r.score,
279 source: IntelligenceSource::TreeSitter,
280 }
281 }
282}
283
284impl From<crate::embedding_store::ScoredChunk> for SearchCandidate {
285 fn from(c: crate::embedding_store::ScoredChunk) -> Self {
286 Self {
287 name: c.symbol_name,
288 kind: c.kind,
289 file_path: c.file_path,
290 line: c.line,
291 end_line: None,
292 signature: c.signature,
293 name_path: Some(c.name_path),
294 body: None,
295 score: c.score,
296 source: IntelligenceSource::Semantic,
297 }
298 }
299}
300
301// ---------------------------------------------------------------------------
302// Diagnostic (unified)
303// ---------------------------------------------------------------------------
304
305/// A code diagnostic from any analysis backend.
306#[derive(Debug, Clone, Serialize)]
307pub struct CodeDiagnostic {
308 pub file_path: String,
309 pub line: usize,
310 pub column: usize,
311 pub severity: DiagnosticSeverity,
312 pub message: String,
313 pub source: IntelligenceSource,
314 #[serde(skip_serializing_if = "Option::is_none")]
315 pub code: Option<String>,
316}
317
318#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize)]
319pub enum DiagnosticSeverity {
320 Error,
321 Warning,
322 Info,
323 Hint,
324}
325
326// ---------------------------------------------------------------------------
327// Precise backend trait (fast/precise path separation)
328// ---------------------------------------------------------------------------
329
330/// Trait for optional precise code intelligence backends.
331///
332/// The engine's default fast path uses tree-sitter for all operations.
333/// When a precise backend is available, it supplements or replaces
334/// tree-sitter results with type-aware, cross-file intelligence.
335///
336/// Current implementor: [`crate::ScipBackend`]. LSP-powered tools live in the
337/// MCP crate and do not implement this engine trait yet.
338///
339/// # Usage
340///
341/// ```ignore
342/// if let Some(precise) = engine.precise_backend() {
343/// let defs = precise.find_definitions("MyStruct", "src/lib.rs", 42)?;
344/// // defs carry IntelligenceSource::Scip or ::Lsp
345/// } else {
346/// // fall back to tree-sitter search
347/// }
348/// ```
349pub trait PreciseBackend: Send + Sync {
350 /// Find definitions of a symbol at the given location.
351 fn find_definitions(
352 &self,
353 symbol: &str,
354 file_path: &str,
355 line: usize,
356 ) -> anyhow::Result<Vec<SearchCandidate>>;
357
358 /// Find all references to a symbol at the given location.
359 fn find_references(
360 &self,
361 symbol: &str,
362 file_path: &str,
363 line: usize,
364 ) -> anyhow::Result<Vec<SearchCandidate>>;
365
366 /// Get hover documentation for a symbol.
367 fn hover(&self, file_path: &str, line: usize, column: usize) -> anyhow::Result<Option<String>>;
368
369 /// Get diagnostics for a file.
370 fn diagnostics(&self, file_path: &str) -> anyhow::Result<Vec<CodeDiagnostic>>;
371
372 /// Which intelligence source this backend provides.
373 fn source(&self) -> IntelligenceSource;
374
375 /// Whether this backend has an index for the given file.
376 fn has_index_for(&self, file_path: &str) -> bool;
377}
378
379// ---------------------------------------------------------------------------
380// Retrieval config defaults
381// ---------------------------------------------------------------------------
382
383impl Default for RetrievalConfig {
384 fn default() -> Self {
385 Self {
386 stages: vec![
387 RetrievalStage::Lexical,
388 RetrievalStage::SymbolScore,
389 RetrievalStage::DenseRetrieval,
390 RetrievalStage::Rerank,
391 ],
392 max_results: 20,
393 token_budget: 4000,
394 include_body: true,
395 weights: RetrievalWeights::default(),
396 }
397 }
398}