Skip to main content

codelens_engine/
ir.rs

1//! Canonical semantic IR types for downstream consumers.
2//!
3//! This module provides a unified set of types that represent the semantic
4//! structure of a codebase — relationships between symbols, call graph edges,
5//! impact analysis nodes, and structured edit plans.
6//!
7//! # Re-exports
8//!
9//! Core types from other engine modules are re-exported here so that consumers
10//! can import everything from a single location:
11//!
12//! ```rust
13//! use codelens_engine::ir::{
14//!     SymbolInfo, Relation, ImpactNode, EditPlan,
15//!     SearchCandidate, IntelligenceSource, CodeDiagnostic,
16//! };
17//! ```
18
19use serde::Serialize;
20
21// Re-exports of existing types from other engine modules.
22pub use crate::circular::CircularDependency;
23pub use crate::git::ChangedFile;
24pub use crate::lsp::types::LspDiagnostic;
25pub use crate::rename::RenameEdit;
26pub use crate::search::SearchResult;
27pub use crate::symbols::{RankedContextEntry, SymbolInfo, SymbolKind};
28
29// ---------------------------------------------------------------------------
30// Relation graph types
31// ---------------------------------------------------------------------------
32
33/// A directed relationship between two symbols or files.
34#[derive(Debug, Clone, Serialize)]
35pub struct Relation {
36    /// Source symbol ID or file path.
37    pub source: String,
38    /// Target symbol ID or file path.
39    pub target: String,
40    pub kind: RelationKind,
41    /// File where the relation was observed, if applicable.
42    pub file_path: Option<String>,
43    /// Line number where the relation was observed, if applicable.
44    pub line: Option<usize>,
45}
46
47/// The kind of directed relationship between two symbols or files.
48#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize)]
49pub enum RelationKind {
50    /// Function calls function.
51    Calls,
52    /// Reverse of `Calls`.
53    CalledBy,
54    /// File imports file.
55    Imports,
56    /// Reverse of `Imports`.
57    ImportedBy,
58    /// Class extends class.
59    Inherits,
60    /// Class implements interface.
61    Implements,
62    /// Symbol references symbol.
63    References,
64    /// File or module contains symbol.
65    Contains,
66}
67
68// ---------------------------------------------------------------------------
69// Call graph edge
70// ---------------------------------------------------------------------------
71
72/// A call graph edge with optional metadata.
73///
74/// Note: the engine's lower-level [`crate::call_graph::CallEdge`] carries
75/// confidence and resolution strategy fields.  This IR type is the
76/// schema-facing, minimal form used in output payloads.
77#[derive(Debug, Clone, Serialize)]
78pub struct IrCallEdge {
79    /// Caller symbol name or ID.
80    pub caller: String,
81    /// Callee symbol name or ID.
82    pub callee: String,
83    pub caller_file: String,
84    pub callee_file: Option<String>,
85    pub line: usize,
86}
87
88// ---------------------------------------------------------------------------
89// Impact analysis graph
90// ---------------------------------------------------------------------------
91
92/// A node in an impact analysis graph.
93#[derive(Debug, Clone, Serialize)]
94pub struct ImpactNode {
95    pub file_path: String,
96    /// Symbol name within the file, if the node represents a symbol.
97    pub symbol: Option<String>,
98    /// Distance from the change origin (0 = directly changed).
99    pub depth: usize,
100    pub impact_kind: ImpactKind,
101    /// Count of symbols affected within this file.
102    pub affected_symbols: usize,
103}
104
105/// How a file or symbol is affected by a change.
106#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize)]
107pub enum ImpactKind {
108    /// Directly changed.
109    Direct,
110    /// Calls something that changed.
111    Caller,
112    /// Imports something that changed.
113    Importer,
114    /// Inherits or implements something that changed.
115    TypeChild,
116    /// Indirectly affected (transitive dependency).
117    Transitive,
118}
119
120// ---------------------------------------------------------------------------
121// Structured edit plan
122// ---------------------------------------------------------------------------
123
124/// A structured edit plan for multi-file changes.
125#[derive(Debug, Clone, Serialize)]
126pub struct EditPlan {
127    pub description: String,
128    pub edits: Vec<EditAction>,
129}
130
131/// A single edit action within an [`EditPlan`].
132#[derive(Debug, Clone, Serialize)]
133pub struct EditAction {
134    pub file_path: String,
135    pub kind: EditActionKind,
136    /// Target line for `Insert` and `Replace` actions.
137    pub line: Option<usize>,
138    /// Original text to replace (used for `Replace` and `Delete`).
139    pub old_text: Option<String>,
140    /// Replacement or inserted text.
141    pub new_text: String,
142}
143
144/// The kind of edit performed by an [`EditAction`].
145#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize)]
146pub enum EditActionKind {
147    Insert,
148    Replace,
149    Delete,
150    /// Create a new file.
151    Create,
152}
153
154// ---------------------------------------------------------------------------
155// Retrieval pipeline types
156// ---------------------------------------------------------------------------
157
158/// Describes a stage in the retrieval pipeline.
159///
160/// The full pipeline is: `Lexical → SymbolScore → DenseRetrieval → Rerank → GraphExpand`
161///
162/// Each stage can be enabled/disabled and contributes a weighted score.
163#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize)]
164pub enum RetrievalStage {
165    /// FTS5 / BM25 corpus search — file-level pre-filtering.
166    Lexical,
167    /// Symbol name/signature scoring — AST-aware matching.
168    SymbolScore,
169    /// Embedding-based dense retrieval — semantic similarity.
170    DenseRetrieval,
171    /// Multi-signal blending — text + pagerank + recency + semantic.
172    Rerank,
173    /// Graph expansion — callers, importers, type hierarchy of top results.
174    GraphExpand,
175}
176
177/// Configuration for a retrieval pipeline run.
178#[derive(Debug, Clone, Serialize)]
179pub struct RetrievalConfig {
180    /// Which stages are enabled.
181    pub stages: Vec<RetrievalStage>,
182    /// Maximum results to return.
183    pub max_results: usize,
184    /// Token budget for response.
185    pub token_budget: usize,
186    /// Whether to include symbol bodies.
187    pub include_body: bool,
188    /// Weight overrides per stage (default: equal weighting).
189    pub weights: RetrievalWeights,
190}
191
192/// Weights for each retrieval signal in the rerank stage.
193#[derive(Debug, Clone, Serialize)]
194pub struct RetrievalWeights {
195    pub text: f64,
196    pub pagerank: f64,
197    pub recency: f64,
198    pub semantic: f64,
199}
200
201impl Default for RetrievalWeights {
202    fn default() -> Self {
203        Self {
204            text: 0.40,
205            pagerank: 0.20,
206            recency: 0.10,
207            semantic: 0.30,
208        }
209    }
210}
211
212// ---------------------------------------------------------------------------
213// Intelligence source (fast / precise path)
214// ---------------------------------------------------------------------------
215
216/// The backend that produced a result.
217///
218/// Consumers use this to judge confidence: `TreeSitter` results are fast but
219/// approximate; `Lsp` / `Scip` results are precise but require optional backends.
220/// `Semantic` results come from the embedding model.
221#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize)]
222pub enum IntelligenceSource {
223    /// tree-sitter AST parse — always available, fast path.
224    TreeSitter,
225    /// LSP backend (opt-in) — precise type-aware results.
226    Lsp,
227    /// SCIP index import — precise, offline.
228    Scip,
229    /// Embedding-based semantic search.
230    Semantic,
231    /// Hybrid: multiple sources combined.
232    Hybrid,
233}
234
235// ---------------------------------------------------------------------------
236// Unified search candidate
237// ---------------------------------------------------------------------------
238
239/// A search result from any retrieval path. This is the substrate type that
240/// downstream consumers (MCP response builders, workflow tools) should target.
241///
242/// Existing types (`SearchResult`, `ScoredChunk`, `RankedContextEntry`) are
243/// gradually converging toward this shape. New code should prefer
244/// `SearchCandidate` and convert from legacy types via `From` impls.
245#[derive(Debug, Clone, Serialize)]
246pub struct SearchCandidate {
247    pub name: String,
248    pub kind: String,
249    pub file_path: String,
250    pub line: usize,
251    /// Inclusive end line for the underlying definition occurrence, when the
252    /// backend has it. Populated by the SCIP backend from the occurrence
253    /// range so `tools/symbols/handlers.rs` can slice the body exactly
254    /// instead of falling back to the 50-line heuristic. Other backends
255    /// (tree-sitter, embedding) leave this `None`.
256    #[serde(skip_serializing_if = "Option::is_none")]
257    pub end_line: Option<usize>,
258    pub signature: String,
259    #[serde(skip_serializing_if = "Option::is_none")]
260    pub name_path: Option<String>,
261    #[serde(skip_serializing_if = "Option::is_none")]
262    pub body: Option<String>,
263    pub score: f64,
264    pub source: IntelligenceSource,
265}
266
267impl From<crate::search::SearchResult> for SearchCandidate {
268    fn from(r: crate::search::SearchResult) -> Self {
269        Self {
270            name: r.name,
271            kind: r.kind,
272            file_path: r.file,
273            line: r.line,
274            end_line: None,
275            signature: r.signature,
276            name_path: Some(r.name_path),
277            body: None,
278            score: r.score,
279            source: IntelligenceSource::TreeSitter,
280        }
281    }
282}
283
284impl From<crate::embedding_store::ScoredChunk> for SearchCandidate {
285    fn from(c: crate::embedding_store::ScoredChunk) -> Self {
286        Self {
287            name: c.symbol_name,
288            kind: c.kind,
289            file_path: c.file_path,
290            line: c.line,
291            end_line: None,
292            signature: c.signature,
293            name_path: Some(c.name_path),
294            body: None,
295            score: c.score,
296            source: IntelligenceSource::Semantic,
297        }
298    }
299}
300
301// ---------------------------------------------------------------------------
302// Diagnostic (unified)
303// ---------------------------------------------------------------------------
304
305/// A code diagnostic from any analysis backend.
306#[derive(Debug, Clone, Serialize)]
307pub struct CodeDiagnostic {
308    pub file_path: String,
309    pub line: usize,
310    pub column: usize,
311    pub severity: DiagnosticSeverity,
312    pub message: String,
313    pub source: IntelligenceSource,
314    #[serde(skip_serializing_if = "Option::is_none")]
315    pub code: Option<String>,
316}
317
318#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize)]
319pub enum DiagnosticSeverity {
320    Error,
321    Warning,
322    Info,
323    Hint,
324}
325
326// ---------------------------------------------------------------------------
327// Precise backend trait (fast/precise path separation)
328// ---------------------------------------------------------------------------
329
330/// Trait for optional precise code intelligence backends.
331///
332/// The engine's default fast path uses tree-sitter for all operations.
333/// When a precise backend is available, it supplements or replaces
334/// tree-sitter results with type-aware, cross-file intelligence.
335///
336/// Current implementor: [`crate::ScipBackend`]. LSP-powered tools live in the
337/// MCP crate and do not implement this engine trait yet.
338///
339/// # Usage
340///
341/// ```ignore
342/// if let Some(precise) = engine.precise_backend() {
343///     let defs = precise.find_definitions("MyStruct", "src/lib.rs", 42)?;
344///     // defs carry IntelligenceSource::Scip or ::Lsp
345/// } else {
346///     // fall back to tree-sitter search
347/// }
348/// ```
349pub trait PreciseBackend: Send + Sync {
350    /// Find definitions of a symbol at the given location.
351    fn find_definitions(
352        &self,
353        symbol: &str,
354        file_path: &str,
355        line: usize,
356    ) -> anyhow::Result<Vec<SearchCandidate>>;
357
358    /// Find all references to a symbol at the given location.
359    fn find_references(
360        &self,
361        symbol: &str,
362        file_path: &str,
363        line: usize,
364    ) -> anyhow::Result<Vec<SearchCandidate>>;
365
366    /// Get hover documentation for a symbol.
367    fn hover(&self, file_path: &str, line: usize, column: usize) -> anyhow::Result<Option<String>>;
368
369    /// Get diagnostics for a file.
370    fn diagnostics(&self, file_path: &str) -> anyhow::Result<Vec<CodeDiagnostic>>;
371
372    /// Which intelligence source this backend provides.
373    fn source(&self) -> IntelligenceSource;
374
375    /// Whether this backend has an index for the given file.
376    fn has_index_for(&self, file_path: &str) -> bool;
377}
378
379// ---------------------------------------------------------------------------
380// Retrieval config defaults
381// ---------------------------------------------------------------------------
382
383impl Default for RetrievalConfig {
384    fn default() -> Self {
385        Self {
386            stages: vec![
387                RetrievalStage::Lexical,
388                RetrievalStage::SymbolScore,
389                RetrievalStage::DenseRetrieval,
390                RetrievalStage::Rerank,
391            ],
392            max_results: 20,
393            token_budget: 4000,
394            include_body: true,
395            weights: RetrievalWeights::default(),
396        }
397    }
398}