perl_symbol_table/lib.rs
1//! Symbol table and scope management for Perl LSP.
2//!
3//! This crate provides the core data structures for tracking Perl symbols,
4//! references, and scopes for IDE features like go-to-definition,
5//! find-all-references, and semantic highlighting.
6//!
7//! # Core Types
8//!
9//! - [`Symbol`] - A symbol definition with metadata
10//! - [`SymbolReference`] - A reference to a symbol
11//! - [`SymbolTable`] - Central registry of symbols and references
12//! - [`Scope`] - A lexical scope boundary
13//! - [`ScopeKind`] - Classification of scope types
14//!
15//! # Usage
16//!
17//! ```
18//! use perl_symbol_table::{Symbol, SymbolTable, Scope, ScopeKind, ScopeId};
19//! use perl_symbol_types::SymbolKind;
20//! use perl_position_tracking::SourceLocation;
21//!
22//! // Create a symbol table
23//! let mut table = SymbolTable::new();
24//!
25//! // Add a symbol
26//! let symbol = Symbol {
27//! name: "foo".to_string(),
28//! qualified_name: "main::foo".to_string(),
29//! kind: SymbolKind::Subroutine,
30//! location: SourceLocation { start: 0, end: 10 },
31//! scope_id: 0,
32//! declaration: None,
33//! documentation: Some("A function".to_string()),
34//! attributes: vec![],
35//! };
36//!
37//! table.add_symbol(symbol);
38//! ```
39
40use perl_position_tracking::SourceLocation;
41use std::collections::{HashMap, HashSet};
42
43// Re-export symbol types for convenience
44pub use perl_symbol_types::{SymbolKind, VarKind};
45
46/// Unique identifier for a scope.
47pub type ScopeId = usize;
48
49/// A symbol definition in Perl code with comprehensive metadata.
50///
51/// Represents a symbol definition with full context including scope,
52/// package qualification, and documentation for LSP features like
53/// go-to-definition, hover, and workspace symbols.
54///
55/// # Performance Characteristics
56/// - Memory: ~128 bytes per symbol (optimized for large codebases)
57/// - Lookup time: O(1) via hash table indexing
58/// - Scope resolution: O(log n) with scope hierarchy
59///
60/// # Perl Language Semantics
61/// - Package qualification: `Package::symbol` vs bare `symbol`
62/// - Scope rules: Lexical (`my`), package (`our`), dynamic (`local`), persistent (`state`)
63/// - Symbol types: Variables (`$`, `@`, `%`), subroutines, packages, constants
64/// - Attribute parsing: `:shared`, `:method`, `:lvalue` and custom attributes
65#[derive(Debug, Clone)]
66#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
67pub struct Symbol {
68 /// Symbol name (without sigil for variables)
69 pub name: String,
70 /// Fully qualified name with package prefix
71 pub qualified_name: String,
72 /// Classification of symbol type
73 pub kind: SymbolKind,
74 /// Source location of symbol definition
75 pub location: SourceLocation,
76 /// Lexical scope identifier for visibility rules
77 pub scope_id: ScopeId,
78 /// Variable declaration type (my, our, local, state)
79 pub declaration: Option<String>,
80 /// Extracted POD or comment documentation
81 pub documentation: Option<String>,
82 /// Perl attributes applied to the symbol
83 pub attributes: Vec<String>,
84}
85
86/// A reference to a symbol with usage context for LSP analysis.
87///
88/// Tracks symbol usage sites for features like find-all-references,
89/// rename refactoring, and unused symbol detection with precise
90/// scope and context information.
91///
92/// # Performance Characteristics
93/// - Memory: ~64 bytes per reference
94/// - Collection: O(n) during AST traversal
95/// - Query time: O(log n) with spatial indexing
96///
97/// # LSP Integration
98/// Essential for:
99/// - Find references: Locate all usage sites
100/// - Rename refactoring: Update all references atomically
101/// - Unused detection: Identify unreferenced symbols
102/// - Call hierarchy: Build caller/callee relationships
103#[derive(Debug, Clone)]
104#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
105pub struct SymbolReference {
106 /// Symbol name (without sigil for variables)
107 pub name: String,
108 /// Symbol type inferred from usage context
109 pub kind: SymbolKind,
110 /// Source location of the reference
111 pub location: SourceLocation,
112 /// Lexical scope where reference occurs
113 pub scope_id: ScopeId,
114 /// Whether this is a write reference (assignment)
115 pub is_write: bool,
116}
117
118/// A lexical scope in Perl code with hierarchical symbol visibility.
119///
120/// Represents a lexical scope boundary (subroutine, block, package) with
121/// symbol visibility rules according to Perl's lexical scoping semantics.
122///
123/// # Performance Characteristics
124/// - Scope lookup: O(log n) with parent chain traversal
125/// - Symbol resolution: O(1) per scope level
126/// - Memory: ~64 bytes per scope + symbol set
127///
128/// # Perl Scoping Rules
129/// - Global scope: File-level and package symbols
130/// - Package scope: Package-qualified symbols
131/// - Subroutine scope: Local variables and parameters
132/// - Block scope: Lexical variables in control structures
133/// - Lexical precedence: Inner scopes shadow outer scopes
134#[derive(Debug, Clone)]
135#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
136pub struct Scope {
137 /// Unique scope identifier for reference tracking
138 pub id: ScopeId,
139 /// Parent scope for hierarchical lookup (None for global)
140 pub parent: Option<ScopeId>,
141 /// Classification of scope type
142 pub kind: ScopeKind,
143 /// Source location where scope begins
144 pub location: SourceLocation,
145 /// Set of symbol names defined in this scope
146 pub symbols: HashSet<String>,
147}
148
149/// Classification of lexical scope types in Perl.
150///
151/// Defines different scope boundaries with specific symbol visibility
152/// and resolution rules according to Perl language semantics.
153///
154/// # Scope Hierarchy
155/// - Global: File-level symbols and imports
156/// - Package: Package-qualified namespace
157/// - Subroutine: Function parameters and local variables
158/// - Block: Control structure lexical variables
159/// - Eval: Dynamic evaluation context
160#[derive(Debug, Clone, Copy, PartialEq, Eq)]
161#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
162pub enum ScopeKind {
163 /// Global/file scope
164 Global,
165 /// Package scope
166 Package,
167 /// Subroutine scope
168 Subroutine,
169 /// Block scope (if, while, for, etc.)
170 Block,
171 /// Eval scope
172 Eval,
173}
174
175/// Comprehensive symbol table for Perl code analysis and LSP features.
176///
177/// Central data structure containing all symbols, references, and scopes
178/// with efficient indexing for LSP operations like go-to-definition,
179/// find-references, and workspace symbols.
180///
181/// # Performance Characteristics
182/// - Symbol lookup: O(1) average, O(n) worst case for overloaded names
183/// - Reference queries: O(log n) with spatial indexing
184/// - Memory usage: ~500KB per 10K lines of Perl code
185/// - Construction time: O(n) single-pass AST traversal
186///
187/// # LSP Integration
188/// Core data structure for:
189/// - Symbol resolution: Package-qualified and bare name lookup
190/// - Reference tracking: All usage sites with context
191/// - Scope analysis: Lexical visibility and shadowing
192/// - Completion: Context-aware symbol suggestions
193/// - Workspace indexing: Cross-file symbol registry
194///
195/// # Perl Language Support
196/// - Package qualification: `Package::symbol` resolution
197/// - Lexical scoping: `my`, `our`, `local`, `state` variable semantics
198/// - Symbol overloading: Multiple definitions with scope precedence
199/// - Context sensitivity: Scalar/array/hash context resolution
200#[derive(Debug, Default)]
201pub struct SymbolTable {
202 /// Symbols indexed by name with multiple definitions support
203 pub symbols: HashMap<String, Vec<Symbol>>,
204 /// References indexed by name for find-all-references
205 pub references: HashMap<String, Vec<SymbolReference>>,
206 /// Scopes indexed by ID for hierarchical lookup
207 pub scopes: HashMap<ScopeId, Scope>,
208 /// Scope stack maintained during AST traversal
209 scope_stack: Vec<ScopeId>,
210 /// Monotonic scope ID generator
211 next_scope_id: ScopeId,
212 /// Current package context for symbol qualification
213 current_package: String,
214}
215
216impl SymbolTable {
217 /// Create a new symbol table with global scope initialized.
218 pub fn new() -> Self {
219 let mut table = SymbolTable {
220 symbols: HashMap::new(),
221 references: HashMap::new(),
222 scopes: HashMap::new(),
223 scope_stack: vec![0],
224 next_scope_id: 1,
225 current_package: "main".to_string(),
226 };
227
228 // Create global scope
229 table.scopes.insert(
230 0,
231 Scope {
232 id: 0,
233 parent: None,
234 kind: ScopeKind::Global,
235 location: SourceLocation { start: 0, end: 0 },
236 symbols: HashSet::new(),
237 },
238 );
239
240 table
241 }
242
243 /// Get the current scope ID.
244 pub fn current_scope(&self) -> ScopeId {
245 *self.scope_stack.last().unwrap_or(&0)
246 }
247
248 /// Get the current package name.
249 pub fn current_package(&self) -> &str {
250 &self.current_package
251 }
252
253 /// Set the current package name.
254 pub fn set_current_package(&mut self, package: String) {
255 self.current_package = package;
256 }
257
258 /// Push a new scope onto the stack.
259 pub fn push_scope(&mut self, kind: ScopeKind, location: SourceLocation) -> ScopeId {
260 let parent = self.current_scope();
261 let scope_id = self.next_scope_id;
262 self.next_scope_id += 1;
263
264 let scope =
265 Scope { id: scope_id, parent: Some(parent), kind, location, symbols: HashSet::new() };
266
267 self.scopes.insert(scope_id, scope);
268 self.scope_stack.push(scope_id);
269 scope_id
270 }
271
272 /// Pop the current scope from the stack.
273 pub fn pop_scope(&mut self) {
274 self.scope_stack.pop();
275 }
276
277 /// Add a symbol definition to the table.
278 pub fn add_symbol(&mut self, symbol: Symbol) {
279 let name = symbol.name.clone();
280 if let Some(scope) = self.scopes.get_mut(&symbol.scope_id) {
281 scope.symbols.insert(name.clone());
282 }
283 self.symbols.entry(name).or_default().push(symbol);
284 }
285
286 /// Add a symbol reference to the table.
287 pub fn add_reference(&mut self, reference: SymbolReference) {
288 let name = reference.name.clone();
289 self.references.entry(name).or_default().push(reference);
290 }
291
292 /// Find symbol definitions visible from a given scope.
293 pub fn find_symbol(&self, name: &str, from_scope: ScopeId, kind: SymbolKind) -> Vec<&Symbol> {
294 let mut results = Vec::new();
295 let mut current_scope_id = Some(from_scope);
296
297 // Walk up the scope chain
298 while let Some(scope_id) = current_scope_id {
299 if let Some(scope) = self.scopes.get(&scope_id) {
300 // Check if symbol is defined in this scope
301 if scope.symbols.contains(name)
302 && let Some(symbols) = self.symbols.get(name)
303 {
304 for symbol in symbols {
305 if symbol.scope_id == scope_id && symbol.kind == kind {
306 results.push(symbol);
307 }
308 }
309 }
310
311 // For 'our' variables, also check package scope
312 if scope.kind != ScopeKind::Package
313 && let Some(symbols) = self.symbols.get(name)
314 {
315 for symbol in symbols {
316 if symbol.declaration.as_deref() == Some("our") && symbol.kind == kind {
317 results.push(symbol);
318 }
319 }
320 }
321
322 current_scope_id = scope.parent;
323 } else {
324 break;
325 }
326 }
327
328 results
329 }
330
331 /// Get all references to a symbol.
332 pub fn find_references(&self, symbol: &Symbol) -> Vec<&SymbolReference> {
333 self.references
334 .get(&symbol.name)
335 .map(|refs| refs.iter().filter(|r| r.kind == symbol.kind).collect())
336 .unwrap_or_default()
337 }
338
339 /// Get all symbols in the table.
340 pub fn all_symbols(&self) -> impl Iterator<Item = &Symbol> {
341 self.symbols.values().flatten()
342 }
343
344 /// Get all references in the table.
345 pub fn all_references(&self) -> impl Iterator<Item = &SymbolReference> {
346 self.references.values().flatten()
347 }
348
349 /// Get a scope by ID.
350 pub fn get_scope(&self, id: ScopeId) -> Option<&Scope> {
351 self.scopes.get(&id)
352 }
353}
354
355#[cfg(test)]
356mod tests {
357 use super::*;
358
359 #[test]
360 fn test_symbol_table_creation() {
361 let table = SymbolTable::new();
362 assert_eq!(table.current_scope(), 0);
363 assert_eq!(table.current_package(), "main");
364 assert!(table.scopes.contains_key(&0));
365 }
366
367 #[test]
368 fn test_add_symbol() {
369 let mut table = SymbolTable::new();
370 let symbol = Symbol {
371 name: "foo".to_string(),
372 qualified_name: "main::foo".to_string(),
373 kind: SymbolKind::Subroutine,
374 location: SourceLocation { start: 0, end: 10 },
375 scope_id: 0,
376 declaration: None,
377 documentation: None,
378 attributes: vec![],
379 };
380 table.add_symbol(symbol);
381
382 assert!(table.symbols.contains_key("foo"));
383 assert_eq!(table.symbols["foo"].len(), 1);
384 }
385
386 #[test]
387 fn test_scope_management() {
388 let mut table = SymbolTable::new();
389
390 // Push a subroutine scope
391 let sub_scope =
392 table.push_scope(ScopeKind::Subroutine, SourceLocation { start: 10, end: 100 });
393 assert_eq!(table.current_scope(), sub_scope);
394
395 // Push a block scope inside
396 let block_scope = table.push_scope(ScopeKind::Block, SourceLocation { start: 20, end: 80 });
397 assert_eq!(table.current_scope(), block_scope);
398
399 // Pop back to subroutine scope
400 table.pop_scope();
401 assert_eq!(table.current_scope(), sub_scope);
402
403 // Pop back to global scope
404 table.pop_scope();
405 assert_eq!(table.current_scope(), 0);
406 }
407
408 #[test]
409 fn test_find_symbol() {
410 let mut table = SymbolTable::new();
411
412 // Add a symbol in global scope
413 let symbol = Symbol {
414 name: "x".to_string(),
415 qualified_name: "main::x".to_string(),
416 kind: SymbolKind::scalar(),
417 location: SourceLocation { start: 0, end: 5 },
418 scope_id: 0,
419 declaration: Some("my".to_string()),
420 documentation: None,
421 attributes: vec![],
422 };
423 table.add_symbol(symbol);
424
425 // Should find it from global scope
426 let found = table.find_symbol("x", 0, SymbolKind::scalar());
427 assert_eq!(found.len(), 1);
428 }
429}