codelens_engine/ir.rs
1//! Canonical semantic IR types for downstream consumers.
2//!
3//! This module provides a unified set of types that represent the semantic
4//! structure of a codebase — relationships between symbols, call graph edges,
5//! impact analysis nodes, and structured edit plans.
6//!
7//! # Re-exports
8//!
9//! Core types from other engine modules are re-exported here so that consumers
10//! can import everything from a single location:
11//!
12//! ```rust
13//! use codelens_engine::ir::{
14//! SymbolInfo, Relation, ImpactNode, EditPlan,
15//! SearchCandidate, IntelligenceSource, CodeDiagnostic,
16//! };
17//! ```
18
19use serde::Serialize;
20
21// Re-exports of existing types from other engine modules.
22pub use crate::circular::CircularDependency;
23pub use crate::git::ChangedFile;
24pub use crate::lsp::types::LspDiagnostic;
25pub use crate::rename::RenameEdit;
26pub use crate::search::SearchResult;
27pub use crate::symbols::{RankedContextEntry, SymbolInfo, SymbolKind};
28
29// ---------------------------------------------------------------------------
30// Relation graph types
31// ---------------------------------------------------------------------------
32
33/// A directed relationship between two symbols or files.
34#[derive(Debug, Clone, Serialize)]
35pub struct Relation {
36 /// Source symbol ID or file path.
37 pub source: String,
38 /// Target symbol ID or file path.
39 pub target: String,
40 pub kind: RelationKind,
41 /// File where the relation was observed, if applicable.
42 pub file_path: Option<String>,
43 /// Line number where the relation was observed, if applicable.
44 pub line: Option<usize>,
45}
46
47/// The kind of directed relationship between two symbols or files.
48#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize)]
49pub enum RelationKind {
50 /// Function calls function.
51 Calls,
52 /// Reverse of `Calls`.
53 CalledBy,
54 /// File imports file.
55 Imports,
56 /// Reverse of `Imports`.
57 ImportedBy,
58 /// Class extends class.
59 Inherits,
60 /// Class implements interface.
61 Implements,
62 /// Symbol references symbol.
63 References,
64 /// File or module contains symbol.
65 Contains,
66}
67
68// ---------------------------------------------------------------------------
69// Call graph edge
70// ---------------------------------------------------------------------------
71
72/// A call graph edge with optional metadata.
73///
74/// Note: the engine's lower-level [`crate::call_graph::CallEdge`] carries
75/// confidence and resolution strategy fields. This IR type is the
76/// schema-facing, minimal form used in output payloads.
77#[derive(Debug, Clone, Serialize)]
78pub struct IrCallEdge {
79 /// Caller symbol name or ID.
80 pub caller: String,
81 /// Callee symbol name or ID.
82 pub callee: String,
83 pub caller_file: String,
84 pub callee_file: Option<String>,
85 pub line: usize,
86}
87
88// ---------------------------------------------------------------------------
89// Impact analysis graph
90// ---------------------------------------------------------------------------
91
92/// A node in an impact analysis graph.
93#[derive(Debug, Clone, Serialize)]
94pub struct ImpactNode {
95 pub file_path: String,
96 /// Symbol name within the file, if the node represents a symbol.
97 pub symbol: Option<String>,
98 /// Distance from the change origin (0 = directly changed).
99 pub depth: usize,
100 pub impact_kind: ImpactKind,
101 /// Count of symbols affected within this file.
102 pub affected_symbols: usize,
103}
104
105/// How a file or symbol is affected by a change.
106#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize)]
107pub enum ImpactKind {
108 /// Directly changed.
109 Direct,
110 /// Calls something that changed.
111 Caller,
112 /// Imports something that changed.
113 Importer,
114 /// Inherits or implements something that changed.
115 TypeChild,
116 /// Indirectly affected (transitive dependency).
117 Transitive,
118}
119
120// ---------------------------------------------------------------------------
121// Structured edit plan
122// ---------------------------------------------------------------------------
123
124/// A structured edit plan for multi-file changes.
125#[derive(Debug, Clone, Serialize)]
126pub struct EditPlan {
127 pub description: String,
128 pub edits: Vec<EditAction>,
129}
130
131/// A single edit action within an [`EditPlan`].
132#[derive(Debug, Clone, Serialize)]
133pub struct EditAction {
134 pub file_path: String,
135 pub kind: EditActionKind,
136 /// Target line for `Insert` and `Replace` actions.
137 pub line: Option<usize>,
138 /// Original text to replace (used for `Replace` and `Delete`).
139 pub old_text: Option<String>,
140 /// Replacement or inserted text.
141 pub new_text: String,
142}
143
144/// The kind of edit performed by an [`EditAction`].
145#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize)]
146pub enum EditActionKind {
147 Insert,
148 Replace,
149 Delete,
150 /// Create a new file.
151 Create,
152}
153
154// ---------------------------------------------------------------------------
155// Retrieval pipeline types
156// ---------------------------------------------------------------------------
157
158/// Describes a stage in the retrieval pipeline.
159///
160/// The full pipeline is: `Lexical → SymbolScore → DenseRetrieval → Rerank → GraphExpand`
161///
162/// Each stage can be enabled/disabled and contributes a weighted score.
163#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize)]
164pub enum RetrievalStage {
165 /// FTS5 / BM25 corpus search — file-level pre-filtering.
166 Lexical,
167 /// Symbol name/signature scoring — AST-aware matching.
168 SymbolScore,
169 /// Embedding-based dense retrieval — semantic similarity.
170 DenseRetrieval,
171 /// Multi-signal blending — text + pagerank + recency + semantic.
172 Rerank,
173 /// Graph expansion — callers, importers, type hierarchy of top results.
174 GraphExpand,
175}
176
177/// Configuration for a retrieval pipeline run.
178#[derive(Debug, Clone, Serialize)]
179pub struct RetrievalConfig {
180 /// Which stages are enabled.
181 pub stages: Vec<RetrievalStage>,
182 /// Maximum results to return.
183 pub max_results: usize,
184 /// Token budget for response.
185 pub token_budget: usize,
186 /// Whether to include symbol bodies.
187 pub include_body: bool,
188 /// Weight overrides per stage (default: equal weighting).
189 pub weights: RetrievalWeights,
190}
191
192/// Weights for each retrieval signal in the rerank stage.
193#[derive(Debug, Clone, Serialize)]
194pub struct RetrievalWeights {
195 pub text: f64,
196 pub pagerank: f64,
197 pub recency: f64,
198 pub semantic: f64,
199}
200
201impl Default for RetrievalWeights {
202 fn default() -> Self {
203 Self {
204 text: 0.40,
205 pagerank: 0.20,
206 recency: 0.10,
207 semantic: 0.30,
208 }
209 }
210}
211
212// ---------------------------------------------------------------------------
213// Intelligence source (fast / precise path)
214// ---------------------------------------------------------------------------
215
216/// The backend that produced a result.
217///
218/// Consumers use this to judge confidence: `TreeSitter` results are fast but
219/// approximate; `Lsp` / `Scip` results are precise but require optional backends.
220/// `Semantic` results come from the embedding model.
221#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize)]
222pub enum IntelligenceSource {
223 /// tree-sitter AST parse — always available, fast path.
224 TreeSitter,
225 /// LSP backend (opt-in) — precise type-aware results.
226 Lsp,
227 /// SCIP index import — precise, offline.
228 Scip,
229 /// Embedding-based semantic search.
230 Semantic,
231 /// Hybrid: multiple sources combined.
232 Hybrid,
233}
234
235// ---------------------------------------------------------------------------
236// Unified search candidate
237// ---------------------------------------------------------------------------
238
239/// A search result from any retrieval path. This is the substrate type that
240/// downstream consumers (MCP response builders, workflow tools) should target.
241///
242/// Existing types (`SearchResult`, `ScoredChunk`, `RankedContextEntry`) are
243/// gradually converging toward this shape. New code should prefer
244/// `SearchCandidate` and convert from legacy types via `From` impls.
245#[derive(Debug, Clone, Serialize)]
246pub struct SearchCandidate {
247 pub name: String,
248 pub kind: String,
249 pub file_path: String,
250 pub line: usize,
251 pub signature: String,
252 #[serde(skip_serializing_if = "Option::is_none")]
253 pub name_path: Option<String>,
254 #[serde(skip_serializing_if = "Option::is_none")]
255 pub body: Option<String>,
256 pub score: f64,
257 pub source: IntelligenceSource,
258}
259
260impl From<crate::search::SearchResult> for SearchCandidate {
261 fn from(r: crate::search::SearchResult) -> Self {
262 Self {
263 name: r.name,
264 kind: r.kind,
265 file_path: r.file,
266 line: r.line,
267 signature: r.signature,
268 name_path: Some(r.name_path),
269 body: None,
270 score: r.score,
271 source: IntelligenceSource::TreeSitter,
272 }
273 }
274}
275
276impl From<crate::embedding_store::ScoredChunk> for SearchCandidate {
277 fn from(c: crate::embedding_store::ScoredChunk) -> Self {
278 Self {
279 name: c.symbol_name,
280 kind: c.kind,
281 file_path: c.file_path,
282 line: c.line,
283 signature: c.signature,
284 name_path: Some(c.name_path),
285 body: None,
286 score: c.score,
287 source: IntelligenceSource::Semantic,
288 }
289 }
290}
291
292// ---------------------------------------------------------------------------
293// Diagnostic (unified)
294// ---------------------------------------------------------------------------
295
296/// A code diagnostic from any analysis backend.
297#[derive(Debug, Clone, Serialize)]
298pub struct CodeDiagnostic {
299 pub file_path: String,
300 pub line: usize,
301 pub column: usize,
302 pub severity: DiagnosticSeverity,
303 pub message: String,
304 pub source: IntelligenceSource,
305 #[serde(skip_serializing_if = "Option::is_none")]
306 pub code: Option<String>,
307}
308
309#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize)]
310pub enum DiagnosticSeverity {
311 Error,
312 Warning,
313 Info,
314 Hint,
315}
316
317// ---------------------------------------------------------------------------
318// Precise backend trait (fast/precise path separation)
319// ---------------------------------------------------------------------------
320
321/// Trait for optional precise code intelligence backends (LSP, SCIP).
322///
323/// The engine's default fast path uses tree-sitter for all operations.
324/// When a precise backend is available, it supplements or replaces
325/// tree-sitter results with type-aware, cross-file intelligence.
326///
327/// Implementors: `ScipIndex` (planned), `LspClient` (planned).
328///
329/// # Usage
330///
331/// ```ignore
332/// if let Some(precise) = engine.precise_backend() {
333/// let defs = precise.find_definitions("MyStruct", "src/lib.rs", 42)?;
334/// // defs carry IntelligenceSource::Scip or ::Lsp
335/// } else {
336/// // fall back to tree-sitter search
337/// }
338/// ```
339pub trait PreciseBackend: Send + Sync {
340 /// Find definitions of a symbol at the given location.
341 fn find_definitions(
342 &self,
343 symbol: &str,
344 file_path: &str,
345 line: usize,
346 ) -> anyhow::Result<Vec<SearchCandidate>>;
347
348 /// Find all references to a symbol at the given location.
349 fn find_references(
350 &self,
351 symbol: &str,
352 file_path: &str,
353 line: usize,
354 ) -> anyhow::Result<Vec<SearchCandidate>>;
355
356 /// Get hover documentation for a symbol.
357 fn hover(&self, file_path: &str, line: usize, column: usize) -> anyhow::Result<Option<String>>;
358
359 /// Get diagnostics for a file.
360 fn diagnostics(&self, file_path: &str) -> anyhow::Result<Vec<CodeDiagnostic>>;
361
362 /// Which intelligence source this backend provides.
363 fn source(&self) -> IntelligenceSource;
364
365 /// Whether this backend has an index for the given file.
366 fn has_index_for(&self, file_path: &str) -> bool;
367}
368
369// ---------------------------------------------------------------------------
370// Retrieval config defaults
371// ---------------------------------------------------------------------------
372
373impl Default for RetrievalConfig {
374 fn default() -> Self {
375 Self {
376 stages: vec![
377 RetrievalStage::Lexical,
378 RetrievalStage::SymbolScore,
379 RetrievalStage::DenseRetrieval,
380 RetrievalStage::Rerank,
381 ],
382 max_results: 20,
383 token_budget: 4000,
384 include_body: true,
385 weights: RetrievalWeights::default(),
386 }
387 }
388}