Skip to main content

codelens_engine/
ir.rs

1//! Canonical semantic IR types for downstream consumers.
2//!
3//! This module provides a unified set of types that represent the semantic
4//! structure of a codebase — relationships between symbols, call graph edges,
5//! impact analysis nodes, and structured edit plans.
6//!
7//! # Re-exports
8//!
9//! Core types from other engine modules are re-exported here so that consumers
10//! can import everything from a single location:
11//!
12//! ```rust
13//! use codelens_engine::ir::{
14//!     SymbolInfo, Relation, ImpactNode, EditPlan,
15//!     SearchCandidate, IntelligenceSource, CodeDiagnostic,
16//! };
17//! ```
18
19use serde::Serialize;
20
21// Re-exports of existing types from other engine modules.
22pub use crate::circular::CircularDependency;
23pub use crate::git::ChangedFile;
24pub use crate::lsp::types::LspDiagnostic;
25pub use crate::rename::RenameEdit;
26pub use crate::search::SearchResult;
27pub use crate::symbols::{RankedContextEntry, SymbolInfo, SymbolKind};
28
29// ---------------------------------------------------------------------------
30// Relation graph types
31// ---------------------------------------------------------------------------
32
33/// A directed relationship between two symbols or files.
34#[derive(Debug, Clone, Serialize)]
35pub struct Relation {
36    /// Source symbol ID or file path.
37    pub source: String,
38    /// Target symbol ID or file path.
39    pub target: String,
40    pub kind: RelationKind,
41    /// File where the relation was observed, if applicable.
42    pub file_path: Option<String>,
43    /// Line number where the relation was observed, if applicable.
44    pub line: Option<usize>,
45}
46
47/// The kind of directed relationship between two symbols or files.
48#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize)]
49pub enum RelationKind {
50    /// Function calls function.
51    Calls,
52    /// Reverse of `Calls`.
53    CalledBy,
54    /// File imports file.
55    Imports,
56    /// Reverse of `Imports`.
57    ImportedBy,
58    /// Class extends class.
59    Inherits,
60    /// Class implements interface.
61    Implements,
62    /// Symbol references symbol.
63    References,
64    /// File or module contains symbol.
65    Contains,
66}
67
68// ---------------------------------------------------------------------------
69// Call graph edge
70// ---------------------------------------------------------------------------
71
72/// A call graph edge with optional metadata.
73///
74/// Note: the engine's lower-level [`crate::call_graph::CallEdge`] carries
75/// confidence and resolution strategy fields.  This IR type is the
76/// schema-facing, minimal form used in output payloads.
77#[derive(Debug, Clone, Serialize)]
78pub struct IrCallEdge {
79    /// Caller symbol name or ID.
80    pub caller: String,
81    /// Callee symbol name or ID.
82    pub callee: String,
83    pub caller_file: String,
84    pub callee_file: Option<String>,
85    pub line: usize,
86}
87
88// ---------------------------------------------------------------------------
89// Impact analysis graph
90// ---------------------------------------------------------------------------
91
92/// A node in an impact analysis graph.
93#[derive(Debug, Clone, Serialize)]
94pub struct ImpactNode {
95    pub file_path: String,
96    /// Symbol name within the file, if the node represents a symbol.
97    pub symbol: Option<String>,
98    /// Distance from the change origin (0 = directly changed).
99    pub depth: usize,
100    pub impact_kind: ImpactKind,
101    /// Count of symbols affected within this file.
102    pub affected_symbols: usize,
103}
104
105/// How a file or symbol is affected by a change.
106#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize)]
107pub enum ImpactKind {
108    /// Directly changed.
109    Direct,
110    /// Calls something that changed.
111    Caller,
112    /// Imports something that changed.
113    Importer,
114    /// Inherits or implements something that changed.
115    TypeChild,
116    /// Indirectly affected (transitive dependency).
117    Transitive,
118}
119
120// ---------------------------------------------------------------------------
121// Structured edit plan
122// ---------------------------------------------------------------------------
123
124/// A structured edit plan for multi-file changes.
125#[derive(Debug, Clone, Serialize)]
126pub struct EditPlan {
127    pub description: String,
128    pub edits: Vec<EditAction>,
129}
130
131/// A single edit action within an [`EditPlan`].
132#[derive(Debug, Clone, Serialize)]
133pub struct EditAction {
134    pub file_path: String,
135    pub kind: EditActionKind,
136    /// Target line for `Insert` and `Replace` actions.
137    pub line: Option<usize>,
138    /// Original text to replace (used for `Replace` and `Delete`).
139    pub old_text: Option<String>,
140    /// Replacement or inserted text.
141    pub new_text: String,
142}
143
144/// The kind of edit performed by an [`EditAction`].
145#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize)]
146pub enum EditActionKind {
147    Insert,
148    Replace,
149    Delete,
150    /// Create a new file.
151    Create,
152}
153
154// ---------------------------------------------------------------------------
155// Retrieval pipeline types
156// ---------------------------------------------------------------------------
157
158/// Describes a stage in the retrieval pipeline.
159///
160/// The full pipeline is: `Lexical → SymbolScore → DenseRetrieval → Rerank → GraphExpand`
161///
162/// Each stage can be enabled/disabled and contributes a weighted score.
163#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize)]
164pub enum RetrievalStage {
165    /// FTS5 / BM25 corpus search — file-level pre-filtering.
166    Lexical,
167    /// Symbol name/signature scoring — AST-aware matching.
168    SymbolScore,
169    /// Embedding-based dense retrieval — semantic similarity.
170    DenseRetrieval,
171    /// Multi-signal blending — text + pagerank + recency + semantic.
172    Rerank,
173    /// Graph expansion — callers, importers, type hierarchy of top results.
174    GraphExpand,
175}
176
177/// Configuration for a retrieval pipeline run.
178#[derive(Debug, Clone, Serialize)]
179pub struct RetrievalConfig {
180    /// Which stages are enabled.
181    pub stages: Vec<RetrievalStage>,
182    /// Maximum results to return.
183    pub max_results: usize,
184    /// Token budget for response.
185    pub token_budget: usize,
186    /// Whether to include symbol bodies.
187    pub include_body: bool,
188    /// Weight overrides per stage (default: equal weighting).
189    pub weights: RetrievalWeights,
190}
191
192/// Weights for each retrieval signal in the rerank stage.
193#[derive(Debug, Clone, Serialize)]
194pub struct RetrievalWeights {
195    pub text: f64,
196    pub pagerank: f64,
197    pub recency: f64,
198    pub semantic: f64,
199}
200
201impl Default for RetrievalWeights {
202    fn default() -> Self {
203        Self {
204            text: 0.40,
205            pagerank: 0.20,
206            recency: 0.10,
207            semantic: 0.30,
208        }
209    }
210}
211
212// ---------------------------------------------------------------------------
213// Intelligence source (fast / precise path)
214// ---------------------------------------------------------------------------
215
216/// The backend that produced a result.
217///
218/// Consumers use this to judge confidence: `TreeSitter` results are fast but
219/// approximate; `Lsp` / `Scip` results are precise but require optional backends.
220/// `Semantic` results come from the embedding model.
221#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize)]
222pub enum IntelligenceSource {
223    /// tree-sitter AST parse — always available, fast path.
224    TreeSitter,
225    /// LSP backend (opt-in) — precise type-aware results.
226    Lsp,
227    /// SCIP index import — precise, offline.
228    Scip,
229    /// Embedding-based semantic search.
230    Semantic,
231    /// Hybrid: multiple sources combined.
232    Hybrid,
233}
234
235// ---------------------------------------------------------------------------
236// Unified search candidate
237// ---------------------------------------------------------------------------
238
239/// A search result from any retrieval path. This is the substrate type that
240/// downstream consumers (MCP response builders, workflow tools) should target.
241///
242/// Existing types (`SearchResult`, `ScoredChunk`, `RankedContextEntry`) are
243/// gradually converging toward this shape. New code should prefer
244/// `SearchCandidate` and convert from legacy types via `From` impls.
245#[derive(Debug, Clone, Serialize)]
246pub struct SearchCandidate {
247    pub name: String,
248    pub kind: String,
249    pub file_path: String,
250    pub line: usize,
251    pub signature: String,
252    #[serde(skip_serializing_if = "Option::is_none")]
253    pub name_path: Option<String>,
254    #[serde(skip_serializing_if = "Option::is_none")]
255    pub body: Option<String>,
256    pub score: f64,
257    pub source: IntelligenceSource,
258}
259
260impl From<crate::search::SearchResult> for SearchCandidate {
261    fn from(r: crate::search::SearchResult) -> Self {
262        Self {
263            name: r.name,
264            kind: r.kind,
265            file_path: r.file,
266            line: r.line,
267            signature: r.signature,
268            name_path: Some(r.name_path),
269            body: None,
270            score: r.score,
271            source: IntelligenceSource::TreeSitter,
272        }
273    }
274}
275
276impl From<crate::embedding_store::ScoredChunk> for SearchCandidate {
277    fn from(c: crate::embedding_store::ScoredChunk) -> Self {
278        Self {
279            name: c.symbol_name,
280            kind: c.kind,
281            file_path: c.file_path,
282            line: c.line,
283            signature: c.signature,
284            name_path: Some(c.name_path),
285            body: None,
286            score: c.score,
287            source: IntelligenceSource::Semantic,
288        }
289    }
290}
291
292// ---------------------------------------------------------------------------
293// Diagnostic (unified)
294// ---------------------------------------------------------------------------
295
296/// A code diagnostic from any analysis backend.
297#[derive(Debug, Clone, Serialize)]
298pub struct CodeDiagnostic {
299    pub file_path: String,
300    pub line: usize,
301    pub column: usize,
302    pub severity: DiagnosticSeverity,
303    pub message: String,
304    pub source: IntelligenceSource,
305    #[serde(skip_serializing_if = "Option::is_none")]
306    pub code: Option<String>,
307}
308
309#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize)]
310pub enum DiagnosticSeverity {
311    Error,
312    Warning,
313    Info,
314    Hint,
315}
316
317// ---------------------------------------------------------------------------
318// Precise backend trait (fast/precise path separation)
319// ---------------------------------------------------------------------------
320
321/// Trait for optional precise code intelligence backends (LSP, SCIP).
322///
323/// The engine's default fast path uses tree-sitter for all operations.
324/// When a precise backend is available, it supplements or replaces
325/// tree-sitter results with type-aware, cross-file intelligence.
326///
327/// Implementors: `ScipIndex` (planned), `LspClient` (planned).
328///
329/// # Usage
330///
331/// ```ignore
332/// if let Some(precise) = engine.precise_backend() {
333///     let defs = precise.find_definitions("MyStruct", "src/lib.rs", 42)?;
334///     // defs carry IntelligenceSource::Scip or ::Lsp
335/// } else {
336///     // fall back to tree-sitter search
337/// }
338/// ```
339pub trait PreciseBackend: Send + Sync {
340    /// Find definitions of a symbol at the given location.
341    fn find_definitions(
342        &self,
343        symbol: &str,
344        file_path: &str,
345        line: usize,
346    ) -> anyhow::Result<Vec<SearchCandidate>>;
347
348    /// Find all references to a symbol at the given location.
349    fn find_references(
350        &self,
351        symbol: &str,
352        file_path: &str,
353        line: usize,
354    ) -> anyhow::Result<Vec<SearchCandidate>>;
355
356    /// Get hover documentation for a symbol.
357    fn hover(&self, file_path: &str, line: usize, column: usize) -> anyhow::Result<Option<String>>;
358
359    /// Get diagnostics for a file.
360    fn diagnostics(&self, file_path: &str) -> anyhow::Result<Vec<CodeDiagnostic>>;
361
362    /// Which intelligence source this backend provides.
363    fn source(&self) -> IntelligenceSource;
364
365    /// Whether this backend has an index for the given file.
366    fn has_index_for(&self, file_path: &str) -> bool;
367}
368
369// ---------------------------------------------------------------------------
370// Retrieval config defaults
371// ---------------------------------------------------------------------------
372
373impl Default for RetrievalConfig {
374    fn default() -> Self {
375        Self {
376            stages: vec![
377                RetrievalStage::Lexical,
378                RetrievalStage::SymbolScore,
379                RetrievalStage::DenseRetrieval,
380                RetrievalStage::Rerank,
381            ],
382            max_results: 20,
383            token_budget: 4000,
384            include_body: true,
385            weights: RetrievalWeights::default(),
386        }
387    }
388}