perl_workspace/workspace/
workspace_index.rs

1//! Workspace-wide symbol index for fast cross-file lookups in Perl LSP.
2//!
3//! This module provides efficient indexing of symbols across an entire Perl workspace,
4//! enabling enterprise-grade features like find-references, rename refactoring, and
5//! workspace symbol search with ≤1ms response times.
6//!
7//! # LSP Workflow Integration
8//!
9//! Core component in the Parse → Index → Navigate → Complete → Analyze pipeline:
10//! 1. **Parse**: AST generation from Perl source files
11//! 2. **Index**: Workspace symbol table construction with dual indexing strategy
12//! 3. **Navigate**: Cross-file symbol resolution and go-to-definition
13//! 4. **Complete**: Context-aware completion with workspace symbol awareness
14//! 5. **Analyze**: Cross-reference analysis and workspace refactoring operations
15//!
16//! # Performance Characteristics
17//!
18//! - **Symbol indexing**: O(n) where n is total workspace symbols
19//! - **Symbol lookup**: O(1) average with hash table indexing
20//! - **Cross-file queries**: <50μs for typical workspace sizes
21//! - **Memory usage**: ~1MB per 10K symbols with optimized storage
22//! - **Incremental updates**: ≤1ms for file-level symbol changes
23//! - **Large workspace scaling**: Designed to scale to 50K+ files and large codebases
24//! - **Benchmark targets**: <50μs lookups and ≤1ms incremental updates at scale
25//!
26//! # Dual Indexing Strategy
27//!
28//! Implements dual indexing for comprehensive Perl symbol resolution:
29//! - **Qualified names**: `Package::function` for explicit references
30//! - **Bare names**: `function` for context-dependent resolution
31//! - **98% reference coverage**: Handles both qualified and unqualified calls
32//! - **Automatic deduplication**: Prevents duplicate results in queries
33//!
34//! # Usage Examples
35//!
36//! ```rust
37//! use perl_workspace::workspace::workspace_index::WorkspaceIndex;
38//! use url::Url;
39//!
40//! # fn main() -> Result<(), Box<dyn std::error::Error>> {
41//! let index = WorkspaceIndex::new();
42//!
43//! // Index a Perl file
44//! let uri = Url::parse("file:///example.pl")?;
45//! let code = "package MyPackage;\nsub example { return 42; }";
46//! index.index_file(uri, code.to_string())?;
47//!
48//! // Find symbol definitions
49//! let definition = index.find_definition("MyPackage::example");
50//! assert!(definition.is_some());
51//!
52//! // Workspace symbol search
53//! let symbols = index.find_symbols("example");
54//! assert!(!symbols.is_empty());
55//! # Ok(())
56//! # }
57//! ```
58//!
59//! # Related Modules
60//!
61//! See also the symbol extraction, reference finding, and semantic token classification
62//! modules in the workspace index implementation.
63
64use crate::Parser;
65use crate::ast::{Node, NodeKind};
66use crate::document_store::{Document, DocumentStore};
67use crate::position::{Position, Range};
68use crate::workspace::monitoring::IndexInstrumentation;
69use parking_lot::RwLock;
70use perl_position_tracking::{WireLocation, WirePosition, WireRange};
71use perl_semantic_facts::{
72    AnchorFact, AnchorId, Confidence, EdgeFact, EntityFact, EntityId, EntityKind, FileId,
73    Provenance,
74};
75use serde::{Deserialize, Serialize};
76use std::collections::hash_map::DefaultHasher;
77use std::collections::{HashMap, HashSet};
78use std::hash::{Hash, Hasher};
79use std::path::Path;
80use std::sync::Arc;
81use std::time::Instant;
82use url::Url;
83
84use crate::semantic::imports::ImportExportIndex;
85pub use crate::semantic::invalidation::ShardReplaceResult;
86use crate::semantic::invalidation::{ShardCategoryHashes, plan_shard_replacement};
87use crate::semantic::references::ReferenceIndex;
88pub use crate::workspace::monitoring::{
89    DegradationReason, EarlyExitReason, EarlyExitRecord, IndexInstrumentationSnapshot,
90    IndexMetrics, IndexPerformanceCaps, IndexPhase, IndexPhaseTransition, IndexResourceLimits,
91    IndexStateKind, IndexStateTransition, ResourceKind,
92};
93use perl_symbol::surface::decl::extract_symbol_decls;
94use perl_symbol::surface::facts::{symbol_decls_to_semantic_facts, symbol_refs_to_semantic_facts};
95use perl_symbol::surface::r#ref::extract_symbol_refs;
96
97// Re-export URI utilities for backward compatibility
98#[cfg(not(target_arch = "wasm32"))]
99/// URI ↔ filesystem helpers used during Index/Analyze workflows.
100pub use perl_uri::{fs_path_to_uri, uri_to_fs_path};
101/// URI inspection helpers used during Index/Analyze workflows.
102pub use perl_uri::{is_file_uri, is_special_scheme, uri_extension, uri_key};
103
104// ============================================================================
105// Index Lifecycle Types (Index Lifecycle v1 Specification)
106// ============================================================================
107
108/// Index readiness state - explicit lifecycle management
109///
110/// Represents the current operational state of the workspace index, enabling
111/// LSP handlers to provide appropriate responses based on index availability.
112/// This state machine prevents blocking operations and ensures graceful
113/// degradation when the index is not fully ready.
114///
115/// # State Transitions
116///
117/// - `Building` → `Ready`: Workspace scan completes successfully
118/// - `Building` → `Degraded`: Scan timeout, IO error, or resource limit
119/// - `Ready` → `Building`: Workspace folder change or file watching events
120/// - `Ready` → `Degraded`: Parse storm (>10 pending) or IO error
121/// - `Degraded` → `Building`: Recovery attempt after cooldown
122/// - `Degraded` → `Ready`: Successful re-scan after recovery
123///
124/// # Invariants
125///
126/// - During a single build attempt, `phase` advances monotonically
127///   (`Idle` → `Scanning` → `Indexing`).
128/// - `indexed_count` must not exceed `total_count`; callers should keep totals updated.
129/// - `Ready` and `Degraded` counts are snapshots captured at transition time.
130///
131/// # Usage
132///
133/// ```rust,ignore
134/// use perl_parser::workspace_index::{IndexPhase, IndexState};
135/// use std::time::Instant;
136///
137/// let state = IndexState::Building {
138///     phase: IndexPhase::Indexing,
139///     indexed_count: 50,
140///     total_count: 100,
141///     started_at: Instant::now(),
142/// };
143/// ```
144#[derive(Clone, Debug)]
145pub enum IndexState {
146    /// Index is being constructed (workspace scan in progress)
147    Building {
148        /// Current build phase (Idle → Scanning → Indexing)
149        phase: IndexPhase,
150        /// Files indexed so far
151        indexed_count: usize,
152        /// Total files discovered
153        total_count: usize,
154        /// Started at
155        started_at: Instant,
156    },
157
158    /// Index is consistent and ready for queries
159    Ready {
160        /// Total symbols indexed
161        symbol_count: usize,
162        /// Total files indexed
163        file_count: usize,
164        /// Timestamp of last successful index
165        completed_at: Instant,
166    },
167
168    /// Index is serving but degraded
169    Degraded {
170        /// Why we degraded
171        reason: DegradationReason,
172        /// What's still available
173        available_symbols: usize,
174        /// When degradation occurred
175        since: Instant,
176    },
177}
178
179impl IndexState {
180    /// Return the coarse state kind for instrumentation and routing decisions
181    pub fn kind(&self) -> IndexStateKind {
182        match self {
183            IndexState::Building { .. } => IndexStateKind::Building,
184            IndexState::Ready { .. } => IndexStateKind::Ready,
185            IndexState::Degraded { .. } => IndexStateKind::Degraded,
186        }
187    }
188
189    /// Return the current build phase when in `Building` state
190    pub fn phase(&self) -> Option<IndexPhase> {
191        match self {
192            IndexState::Building { phase, .. } => Some(*phase),
193            _ => None,
194        }
195    }
196
197    /// Timestamp of when the current state began
198    pub fn state_started_at(&self) -> Instant {
199        match self {
200            IndexState::Building { started_at, .. } => *started_at,
201            IndexState::Ready { completed_at, .. } => *completed_at,
202            IndexState::Degraded { since, .. } => *since,
203        }
204    }
205}
206
207/// Coordinates index lifecycle, state transitions, and handler queries
208///
209/// The IndexCoordinator wraps `WorkspaceIndex` with explicit state management,
210/// enabling LSP handlers to query the index readiness and implement appropriate
211/// fallback behavior when the index is not fully ready.
212///
213/// # Architecture
214///
215/// ```text
216/// LspServer
217///   └── IndexCoordinator
218///         ├── state: Arc<RwLock<IndexState>>
219///         ├── index: Arc<WorkspaceIndex>
220///         ├── limits: IndexResourceLimits
221///         ├── caps: IndexPerformanceCaps
222///         ├── metrics: IndexMetrics
223///         └── instrumentation: IndexInstrumentation
224/// ```
225///
226/// # State Management
227///
228/// The coordinator manages three states:
229/// - `Building`: Initial scan or recovery in progress
230/// - `Ready`: Fully indexed and available for queries
231/// - `Degraded`: Available but with reduced functionality
232///
233/// # Performance Characteristics
234///
235/// - State checks are lock-free reads (cloned state, <100ns)
236/// - State transitions use write locks (rare, <1μs)
237/// - Query dispatch has zero overhead in Ready state
238/// - Degradation detection is atomic (<10ns per check)
239///
240/// # Usage
241///
242/// ```rust,ignore
243/// use perl_parser::workspace_index::{IndexCoordinator, IndexState};
244///
245/// let coordinator = IndexCoordinator::new();
246/// assert!(matches!(coordinator.state(), IndexState::Building { .. }));
247///
248/// // Transition to ready after indexing
249/// coordinator.transition_to_ready(100, 5000);
250/// assert!(matches!(coordinator.state(), IndexState::Ready { .. }));
251///
252/// // Query with degradation handling
253/// let _result = coordinator.query(
254///     |index| index.find_definition("my_function"), // full query
255///     |_index| None                                 // partial fallback
256/// );
257/// ```
258pub struct IndexCoordinator {
259    /// Current index state (RwLock for state transitions)
260    state: Arc<RwLock<IndexState>>,
261
262    /// The actual workspace index
263    index: Arc<WorkspaceIndex>,
264
265    /// Resource limits configuration
266    ///
267    /// Enforces bounded resource usage to prevent unbounded memory growth:
268    /// - max_files: Triggers degradation when file count exceeds limit
269    /// - max_total_symbols: Triggers degradation when symbol count exceeds limit
270    /// - max_symbols_per_file: Used for per-file validation during indexing
271    limits: IndexResourceLimits,
272
273    /// Performance caps for early-exit heuristics
274    caps: IndexPerformanceCaps,
275
276    /// Runtime metrics for degradation detection
277    metrics: IndexMetrics,
278
279    /// Instrumentation for lifecycle transitions and durations
280    instrumentation: IndexInstrumentation,
281}
282
283impl std::fmt::Debug for IndexCoordinator {
284    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
285        f.debug_struct("IndexCoordinator")
286            .field("state", &*self.state.read())
287            .field("limits", &self.limits)
288            .field("caps", &self.caps)
289            .finish_non_exhaustive()
290    }
291}
292
293impl IndexCoordinator {
294    /// Create a new coordinator in Building state
295    ///
296    /// Initializes the coordinator with default resource limits and
297    /// an empty workspace index ready for initial scan.
298    ///
299    /// # Returns
300    ///
301    /// A coordinator initialized in `IndexState::Building`.
302    ///
303    /// # Examples
304    ///
305    /// ```rust,ignore
306    /// use perl_parser::workspace_index::IndexCoordinator;
307    ///
308    /// let coordinator = IndexCoordinator::new();
309    /// ```
310    pub fn new() -> Self {
311        Self {
312            state: Arc::new(RwLock::new(IndexState::Building {
313                phase: IndexPhase::Idle,
314                indexed_count: 0,
315                total_count: 0,
316                started_at: Instant::now(),
317            })),
318            index: Arc::new(WorkspaceIndex::new()),
319            limits: IndexResourceLimits::default(),
320            caps: IndexPerformanceCaps::default(),
321            metrics: IndexMetrics::new(),
322            instrumentation: IndexInstrumentation::new(),
323        }
324    }
325
326    /// Create a coordinator with custom resource limits
327    ///
328    /// # Arguments
329    ///
330    /// * `limits` - Custom resource limits for this workspace
331    ///
332    /// # Returns
333    ///
334    /// A coordinator configured with the provided resource limits.
335    ///
336    /// # Examples
337    ///
338    /// ```rust,ignore
339    /// use perl_parser::workspace_index::{IndexCoordinator, IndexResourceLimits};
340    ///
341    /// let limits = IndexResourceLimits::default();
342    /// let coordinator = IndexCoordinator::with_limits(limits);
343    /// ```
344    pub fn with_limits(limits: IndexResourceLimits) -> Self {
345        Self {
346            state: Arc::new(RwLock::new(IndexState::Building {
347                phase: IndexPhase::Idle,
348                indexed_count: 0,
349                total_count: 0,
350                started_at: Instant::now(),
351            })),
352            index: Arc::new(WorkspaceIndex::new()),
353            limits,
354            caps: IndexPerformanceCaps::default(),
355            metrics: IndexMetrics::new(),
356            instrumentation: IndexInstrumentation::new(),
357        }
358    }
359
360    /// Create a coordinator with custom limits and performance caps
361    ///
362    /// # Arguments
363    ///
364    /// * `limits` - Resource limits for this workspace
365    /// * `caps` - Performance caps for indexing budgets
366    pub fn with_limits_and_caps(limits: IndexResourceLimits, caps: IndexPerformanceCaps) -> Self {
367        Self {
368            state: Arc::new(RwLock::new(IndexState::Building {
369                phase: IndexPhase::Idle,
370                indexed_count: 0,
371                total_count: 0,
372                started_at: Instant::now(),
373            })),
374            index: Arc::new(WorkspaceIndex::new()),
375            limits,
376            caps,
377            metrics: IndexMetrics::new(),
378            instrumentation: IndexInstrumentation::new(),
379        }
380    }
381
382    /// Get current state (lock-free read via clone)
383    ///
384    /// Returns a cloned copy of the current state for lock-free access
385    /// in hot path LSP handlers.
386    ///
387    /// # Returns
388    ///
389    /// The current `IndexState` snapshot.
390    ///
391    /// # Examples
392    ///
393    /// ```rust,ignore
394    /// use perl_parser::workspace_index::{IndexCoordinator, IndexState};
395    ///
396    /// let coordinator = IndexCoordinator::new();
397    /// match coordinator.state() {
398    ///     IndexState::Ready { .. } => {
399    ///         // Full query path
400    ///     }
401    ///     _ => {
402    ///         // Degraded/building fallback
403    ///     }
404    /// }
405    /// ```
406    pub fn state(&self) -> IndexState {
407        self.state.read().clone()
408    }
409
410    /// Get reference to the underlying workspace index
411    ///
412    /// Provides direct access to the `WorkspaceIndex` for operations
413    /// that don't require state checking (e.g., document store access).
414    ///
415    /// # Returns
416    ///
417    /// A shared reference to the underlying workspace index.
418    ///
419    /// # Examples
420    ///
421    /// ```rust,ignore
422    /// use perl_parser::workspace_index::IndexCoordinator;
423    ///
424    /// let coordinator = IndexCoordinator::new();
425    /// let _index = coordinator.index();
426    /// ```
427    pub fn index(&self) -> &Arc<WorkspaceIndex> {
428        &self.index
429    }
430
431    /// Access the configured resource limits
432    pub fn limits(&self) -> &IndexResourceLimits {
433        &self.limits
434    }
435
436    /// Access the configured performance caps
437    pub fn performance_caps(&self) -> &IndexPerformanceCaps {
438        &self.caps
439    }
440
441    /// Snapshot lifecycle instrumentation (durations, transitions, early exits)
442    pub fn instrumentation_snapshot(&self) -> IndexInstrumentationSnapshot {
443        self.instrumentation.snapshot()
444    }
445
446    /// Notify of file change (may trigger state transition)
447    ///
448    /// Increments the pending parse count and may transition to degraded
449    /// state if a parse storm is detected.
450    ///
451    /// # Arguments
452    ///
453    /// * `_uri` - URI of the changed file (reserved for future use).
454    ///
455    /// # Returns
456    ///
457    /// Nothing. Updates coordinator metrics and state for the LSP workflow.
458    ///
459    /// # Examples
460    ///
461    /// ```rust,ignore
462    /// use perl_parser::workspace_index::IndexCoordinator;
463    ///
464    /// let coordinator = IndexCoordinator::new();
465    /// coordinator.notify_change("file:///example.pl");
466    /// ```
467    pub fn notify_change(&self, _uri: &str) {
468        let pending = self.metrics.increment_pending_parses();
469
470        // Check for parse storm
471        if self.metrics.is_parse_storm() {
472            self.transition_to_degraded(DegradationReason::ParseStorm { pending_parses: pending });
473        }
474    }
475
476    /// Notify parse completion for the Index/Analyze workflow stages.
477    ///
478    /// Decrements the pending parse count, enforces resource limits, and may
479    /// attempt recovery when parse storms clear.
480    ///
481    /// # Arguments
482    ///
483    /// * `_uri` - URI of the parsed file (reserved for future use).
484    ///
485    /// # Returns
486    ///
487    /// Nothing. Updates coordinator metrics and state for the LSP workflow.
488    ///
489    /// # Examples
490    ///
491    /// ```rust,ignore
492    /// use perl_parser::workspace_index::IndexCoordinator;
493    ///
494    /// let coordinator = IndexCoordinator::new();
495    /// coordinator.notify_parse_complete("file:///example.pl");
496    /// ```
497    pub fn notify_parse_complete(&self, _uri: &str) {
498        let pending = self.metrics.decrement_pending_parses();
499
500        // Check for recovery from parse storm
501        if pending == 0 {
502            if let IndexState::Degraded { reason: DegradationReason::ParseStorm { .. }, .. } =
503                self.state()
504            {
505                // Attempt recovery - transition back to Building for re-scan
506                let mut state = self.state.write();
507                let from_kind = state.kind();
508                self.instrumentation.record_state_transition(from_kind, IndexStateKind::Building);
509                *state = IndexState::Building {
510                    phase: IndexPhase::Idle,
511                    indexed_count: 0,
512                    total_count: 0,
513                    started_at: Instant::now(),
514                };
515            }
516        }
517
518        // Enforce resource limits after parse completion
519        self.enforce_limits();
520    }
521
522    /// Transition to Ready state
523    ///
524    /// Marks the index as fully ready for queries after successful workspace
525    /// scan. Records the file count, symbol count, and completion timestamp.
526    /// Enforces resource limits after transition.
527    ///
528    /// # State Transition Guards
529    ///
530    /// Only valid transitions:
531    /// - `Building` → `Ready` (normal completion)
532    /// - `Degraded` → `Ready` (recovery after fix)
533    ///
534    /// # Arguments
535    ///
536    /// * `file_count` - Total number of files indexed
537    /// * `symbol_count` - Total number of symbols extracted
538    ///
539    /// # Returns
540    ///
541    /// Nothing. The coordinator state is updated in-place.
542    ///
543    /// # Examples
544    ///
545    /// ```rust,ignore
546    /// use perl_parser::workspace_index::IndexCoordinator;
547    ///
548    /// let coordinator = IndexCoordinator::new();
549    /// coordinator.transition_to_ready(100, 5000);
550    /// ```
551    pub fn transition_to_ready(&self, file_count: usize, symbol_count: usize) {
552        let mut state = self.state.write();
553        let from_kind = state.kind();
554
555        // State transition guard: validate current state allows transition to Ready
556        match &*state {
557            IndexState::Building { .. } | IndexState::Degraded { .. } => {
558                // Valid transition - proceed
559                *state =
560                    IndexState::Ready { symbol_count, file_count, completed_at: Instant::now() };
561            }
562            IndexState::Ready { .. } => {
563                // Already Ready - update metrics but don't log as transition
564                *state =
565                    IndexState::Ready { symbol_count, file_count, completed_at: Instant::now() };
566            }
567        }
568        self.instrumentation.record_state_transition(from_kind, IndexStateKind::Ready);
569        drop(state); // Release write lock before checking limits
570
571        // Enforce resource limits after transition
572        self.enforce_limits();
573    }
574
575    /// Transition to Scanning phase (Idle → Scanning)
576    ///
577    /// Resets build counters and marks the index as scanning workspace folders.
578    pub fn transition_to_scanning(&self) {
579        let mut state = self.state.write();
580        let from_kind = state.kind();
581
582        match &*state {
583            IndexState::Building { phase, indexed_count, total_count, started_at } => {
584                if *phase != IndexPhase::Scanning {
585                    self.instrumentation.record_phase_transition(*phase, IndexPhase::Scanning);
586                }
587                *state = IndexState::Building {
588                    phase: IndexPhase::Scanning,
589                    indexed_count: *indexed_count,
590                    total_count: *total_count,
591                    started_at: *started_at,
592                };
593            }
594            IndexState::Ready { .. } | IndexState::Degraded { .. } => {
595                self.instrumentation.record_state_transition(from_kind, IndexStateKind::Building);
596                self.instrumentation
597                    .record_phase_transition(IndexPhase::Idle, IndexPhase::Scanning);
598                *state = IndexState::Building {
599                    phase: IndexPhase::Scanning,
600                    indexed_count: 0,
601                    total_count: 0,
602                    started_at: Instant::now(),
603                };
604            }
605        }
606    }
607
608    /// Update scanning progress with the latest discovered file count
609    pub fn update_scan_progress(&self, total_count: usize) {
610        let mut state = self.state.write();
611        if let IndexState::Building { phase, indexed_count, started_at, .. } = &*state {
612            if *phase != IndexPhase::Scanning {
613                self.instrumentation.record_phase_transition(*phase, IndexPhase::Scanning);
614            }
615            *state = IndexState::Building {
616                phase: IndexPhase::Scanning,
617                indexed_count: *indexed_count,
618                total_count,
619                started_at: *started_at,
620            };
621        }
622    }
623
624    /// Transition to Indexing phase (Scanning → Indexing)
625    ///
626    /// Uses the discovered file count as the total index target.
627    pub fn transition_to_indexing(&self, total_count: usize) {
628        let mut state = self.state.write();
629        let from_kind = state.kind();
630
631        match &*state {
632            IndexState::Building { phase, indexed_count, started_at, .. } => {
633                if *phase != IndexPhase::Indexing {
634                    self.instrumentation.record_phase_transition(*phase, IndexPhase::Indexing);
635                }
636                *state = IndexState::Building {
637                    phase: IndexPhase::Indexing,
638                    indexed_count: *indexed_count,
639                    total_count,
640                    started_at: *started_at,
641                };
642            }
643            IndexState::Ready { .. } | IndexState::Degraded { .. } => {
644                self.instrumentation.record_state_transition(from_kind, IndexStateKind::Building);
645                self.instrumentation
646                    .record_phase_transition(IndexPhase::Idle, IndexPhase::Indexing);
647                *state = IndexState::Building {
648                    phase: IndexPhase::Indexing,
649                    indexed_count: 0,
650                    total_count,
651                    started_at: Instant::now(),
652                };
653            }
654        }
655    }
656
657    /// Transition to Building state (Indexing phase)
658    ///
659    /// Marks the index as indexing with a known total file count.
660    pub fn transition_to_building(&self, total_count: usize) {
661        let mut state = self.state.write();
662        let from_kind = state.kind();
663
664        // State transition guard: validate transition is allowed
665        match &*state {
666            IndexState::Degraded { .. } | IndexState::Ready { .. } => {
667                self.instrumentation.record_state_transition(from_kind, IndexStateKind::Building);
668                self.instrumentation
669                    .record_phase_transition(IndexPhase::Idle, IndexPhase::Indexing);
670                *state = IndexState::Building {
671                    phase: IndexPhase::Indexing,
672                    indexed_count: 0,
673                    total_count,
674                    started_at: Instant::now(),
675                };
676            }
677            IndexState::Building { phase, indexed_count, started_at, .. } => {
678                let mut next_phase = *phase;
679                if *phase == IndexPhase::Idle {
680                    self.instrumentation
681                        .record_phase_transition(IndexPhase::Idle, IndexPhase::Indexing);
682                    next_phase = IndexPhase::Indexing;
683                }
684                *state = IndexState::Building {
685                    phase: next_phase,
686                    indexed_count: *indexed_count,
687                    total_count,
688                    started_at: *started_at,
689                };
690            }
691        }
692    }
693
694    /// Update Building state progress for the Index/Analyze workflow stages.
695    ///
696    /// Increments the indexed file count and checks for scan timeouts.
697    ///
698    /// # Arguments
699    ///
700    /// * `indexed_count` - Number of files indexed so far.
701    ///
702    /// # Returns
703    ///
704    /// Nothing. Updates coordinator state and may transition to `Degraded`.
705    ///
706    /// # Examples
707    ///
708    /// ```rust,ignore
709    /// use perl_parser::workspace_index::IndexCoordinator;
710    ///
711    /// let coordinator = IndexCoordinator::new();
712    /// coordinator.transition_to_building(100);
713    /// coordinator.update_building_progress(1);
714    /// ```
715    pub fn update_building_progress(&self, indexed_count: usize) {
716        let mut state = self.state.write();
717
718        if let IndexState::Building { phase, started_at, total_count, .. } = &*state {
719            let elapsed = started_at.elapsed().as_millis() as u64;
720
721            // Check for scan timeout
722            if elapsed > self.limits.max_scan_duration_ms {
723                // Timeout exceeded - transition to degraded
724                drop(state);
725                self.transition_to_degraded(DegradationReason::ScanTimeout { elapsed_ms: elapsed });
726                return;
727            }
728
729            // Update progress
730            *state = IndexState::Building {
731                phase: *phase,
732                indexed_count,
733                total_count: *total_count,
734                started_at: *started_at,
735            };
736        }
737    }
738
739    /// Transition to Degraded state
740    ///
741    /// Marks the index as degraded with the specified reason. Preserves
742    /// the current symbol count (if available) to indicate partial
743    /// functionality remains.
744    ///
745    /// # Arguments
746    ///
747    /// * `reason` - Why the index degraded (ParseStorm, IoError, etc.)
748    ///
749    /// # Returns
750    ///
751    /// Nothing. The coordinator state is updated in-place.
752    ///
753    /// # Examples
754    ///
755    /// ```rust,ignore
756    /// use perl_parser::workspace_index::{DegradationReason, IndexCoordinator, ResourceKind};
757    ///
758    /// let coordinator = IndexCoordinator::new();
759    /// coordinator.transition_to_degraded(DegradationReason::ResourceLimit {
760    ///     kind: ResourceKind::MaxFiles,
761    /// });
762    /// ```
763    pub fn transition_to_degraded(&self, reason: DegradationReason) {
764        let mut state = self.state.write();
765        let from_kind = state.kind();
766
767        // Get available symbols count from current state
768        let available_symbols = match &*state {
769            IndexState::Ready { symbol_count, .. } => *symbol_count,
770            IndexState::Degraded { available_symbols, .. } => *available_symbols,
771            IndexState::Building { .. } => 0,
772        };
773
774        self.instrumentation.record_state_transition(from_kind, IndexStateKind::Degraded);
775        *state = IndexState::Degraded { reason, available_symbols, since: Instant::now() };
776    }
777
778    /// Check resource limits and return degradation reason if exceeded
779    ///
780    /// Examines current workspace index state against configured resource limits.
781    /// Returns the first exceeded limit found, enabling targeted degradation.
782    ///
783    /// # Returns
784    ///
785    /// * `Some(DegradationReason)` - Resource limit exceeded, contains specific limit type
786    /// * `None` - All limits within acceptable bounds
787    ///
788    /// # Checked Limits
789    ///
790    /// - `max_files`: Total number of indexed files
791    /// - `max_total_symbols`: Aggregate symbol count across workspace
792    ///
793    /// # Performance
794    ///
795    /// - Lock-free read of index state (<100ns)
796    /// - Symbol counting is O(n) where n is number of files
797    ///
798    /// Returns: `Some(DegradationReason)` when a limit is exceeded, otherwise `None`.
799    ///
800    /// # Examples
801    ///
802    /// ```rust,ignore
803    /// use perl_parser::workspace_index::IndexCoordinator;
804    ///
805    /// let coordinator = IndexCoordinator::new();
806    /// let _reason = coordinator.check_limits();
807    /// ```
808    pub fn check_limits(&self) -> Option<DegradationReason> {
809        let files = self.index.files.read();
810
811        // Check max_files limit
812        let file_count = files.len();
813        if file_count > self.limits.max_files {
814            return Some(DegradationReason::ResourceLimit { kind: ResourceKind::MaxFiles });
815        }
816
817        // Check max_total_symbols limit
818        let total_symbols: usize = files.values().map(|fi| fi.symbols.len()).sum();
819        if total_symbols > self.limits.max_total_symbols {
820            return Some(DegradationReason::ResourceLimit { kind: ResourceKind::MaxSymbols });
821        }
822
823        None
824    }
825
826    /// Enforce resource limits and trigger degradation if exceeded
827    ///
828    /// Checks current resource usage against configured limits and automatically
829    /// transitions to Degraded state if any limit is exceeded. This method should
830    /// be called after operations that modify index size (file additions, parse
831    /// completions, etc.).
832    ///
833    /// # State Transitions
834    ///
835    /// - `Ready` → `Degraded(ResourceLimit)` if limits exceeded
836    /// - `Building` → `Degraded(ResourceLimit)` if limits exceeded
837    ///
838    /// # Returns
839    ///
840    /// Nothing. The coordinator state is updated in-place when limits are exceeded.
841    ///
842    /// # Examples
843    ///
844    /// ```rust,ignore
845    /// use perl_parser::workspace_index::IndexCoordinator;
846    ///
847    /// let coordinator = IndexCoordinator::new();
848    /// // ... index some files ...
849    /// coordinator.enforce_limits();  // Check and degrade if needed
850    /// ```
851    pub fn enforce_limits(&self) {
852        if let Some(reason) = self.check_limits() {
853            self.transition_to_degraded(reason);
854        }
855    }
856
857    /// Record an early-exit event for indexing instrumentation
858    pub fn record_early_exit(
859        &self,
860        reason: EarlyExitReason,
861        elapsed_ms: u64,
862        indexed_files: usize,
863        total_files: usize,
864    ) {
865        self.instrumentation.record_early_exit(EarlyExitRecord {
866            reason,
867            elapsed_ms,
868            indexed_files,
869            total_files,
870        });
871    }
872
873    /// Query with automatic degradation handling
874    ///
875    /// Dispatches to full query if index is Ready, or partial query otherwise.
876    /// This pattern enables LSP handlers to provide appropriate responses
877    /// based on index state without explicit state checking.
878    ///
879    /// # Type Parameters
880    ///
881    /// * `T` - Return type of the query functions
882    /// * `F1` - Full query function type accepting `&WorkspaceIndex` and returning `T`
883    /// * `F2` - Partial query function type accepting `&WorkspaceIndex` and returning `T`
884    ///
885    /// # Arguments
886    ///
887    /// * `full_query` - Function to execute when index is Ready
888    /// * `partial_query` - Function to execute when index is Building/Degraded
889    ///
890    /// # Returns
891    ///
892    /// The value returned by the selected query function.
893    ///
894    /// # Examples
895    ///
896    /// ```rust,ignore
897    /// use perl_parser::workspace_index::IndexCoordinator;
898    ///
899    /// let coordinator = IndexCoordinator::new();
900    /// let locations = coordinator.query(
901    ///     |index| index.find_references("my_function"),  // Full workspace search
902    ///     |index| vec![]                                 // Empty fallback
903    /// );
904    /// ```
905    pub fn query<T, F1, F2>(&self, full_query: F1, partial_query: F2) -> T
906    where
907        F1: FnOnce(&WorkspaceIndex) -> T,
908        F2: FnOnce(&WorkspaceIndex) -> T,
909    {
910        match self.state() {
911            IndexState::Ready { .. } => full_query(&self.index),
912            _ => partial_query(&self.index),
913        }
914    }
915}
916
917impl Default for IndexCoordinator {
918    fn default() -> Self {
919        Self::new()
920    }
921}
922
923// ============================================================================
924// Symbol Indexing Types
925// ============================================================================
926
927#[derive(Clone, Copy, Debug, Eq, PartialEq, Hash)]
928/// Symbol kinds for cross-file indexing during Index/Navigate workflows.
929pub enum SymKind {
930    /// Variable symbol ($, @, or % sigil)
931    Var,
932    /// Subroutine definition (sub foo)
933    Sub,
934    /// Package declaration (package Foo)
935    Pack,
936}
937
938#[derive(Clone, Debug, Eq, PartialEq, Hash)]
939/// A normalized symbol key for cross-file lookups in Index/Navigate workflows.
940pub struct SymbolKey {
941    /// Package name containing this symbol
942    pub pkg: Arc<str>,
943    /// Bare name without sigil prefix
944    pub name: Arc<str>,
945    /// Variable sigil ($, @, or %) if applicable
946    pub sigil: Option<char>,
947    /// Kind of symbol (variable, subroutine, package)
948    pub kind: SymKind,
949}
950
951/// Normalize a Perl variable name for Index/Analyze workflows.
952///
953/// Extracts an optional sigil and bare name for consistent symbol indexing.
954///
955/// # Arguments
956///
957/// * `name` - Variable name from Perl source, with or without sigil.
958///
959/// # Returns
960///
961/// `(sigil, name)` tuple with the optional sigil and normalized identifier.
962///
963/// # Examples
964///
965/// ```rust,ignore
966/// use perl_parser::workspace_index::normalize_var;
967///
968/// assert_eq!(normalize_var("$count"), (Some('$'), "count"));
969/// assert_eq!(normalize_var("process_emails"), (None, "process_emails"));
970/// ```
971pub fn normalize_var(name: &str) -> (Option<char>, &str) {
972    if name.is_empty() {
973        return (None, "");
974    }
975
976    // Safe: we've checked that name is not empty
977    let Some(first_char) = name.chars().next() else {
978        return (None, name); // Should never happen but handle gracefully
979    };
980    match first_char {
981        '$' | '@' | '%' => {
982            if name.len() > 1 {
983                (Some(first_char), &name[1..])
984            } else {
985                (Some(first_char), "")
986            }
987        }
988        _ => (None, name),
989    }
990}
991
992// Using lsp_types for Position and Range
993
994#[derive(Debug, Clone, PartialEq, Eq)]
995/// Internal location type used during Navigate/Analyze workflows.
996pub struct Location {
997    /// File URI where the symbol is located
998    pub uri: String,
999    /// Line and character range within the file
1000    pub range: Range,
1001}
1002
1003#[derive(Debug, Clone, PartialEq, Eq)]
1004/// Stable symbol identity returned by cross-file reference queries.
1005pub struct SymbolIdentity {
1006    /// Canonical stable key for the symbol (qualified when available).
1007    pub stable_key: String,
1008    /// Bare symbol name.
1009    pub name: String,
1010    /// Fully qualified symbol name when available.
1011    pub qualified_name: Option<String>,
1012    /// Symbol kind (subroutine, package, variable, ...).
1013    pub kind: SymbolKind,
1014}
1015
1016#[derive(Debug, Clone, PartialEq, Eq)]
1017/// Read-only cross-file query result used by rename/safe-delete planners.
1018pub struct CrossFileReferenceQueryResult {
1019    /// Identity for the resolved symbol.
1020    pub symbol: SymbolIdentity,
1021    /// Definition site for the resolved symbol.
1022    pub definition: Location,
1023    /// All reference locations (including definition) in deterministic order.
1024    pub references: Vec<Location>,
1025}
1026
1027#[derive(Debug, Clone, Serialize, Deserialize)]
1028/// A symbol in the workspace for Index/Navigate workflows.
1029pub struct WorkspaceSymbol {
1030    /// Symbol name without package qualification
1031    pub name: String,
1032    /// Type of symbol (subroutine, variable, package, etc.)
1033    pub kind: SymbolKind,
1034    /// File URI where the symbol is defined
1035    pub uri: String,
1036    /// Line and character range of the symbol definition
1037    pub range: Range,
1038    /// Fully qualified name including package (e.g., "Package::function")
1039    pub qualified_name: Option<String>,
1040    /// POD documentation associated with the symbol
1041    pub documentation: Option<String>,
1042    /// Name of the containing package or class
1043    pub container_name: Option<String>,
1044    /// Whether this symbol has a body (false for forward declarations)
1045    #[serde(default = "default_has_body")]
1046    pub has_body: bool,
1047    /// Workspace folder URI this symbol belongs to (for multi-root workspace support)
1048    pub workspace_folder_uri: Option<String>,
1049}
1050
1051fn default_has_body() -> bool {
1052    true
1053}
1054
1055// Re-export the unified symbol types from perl-symbol
1056/// Symbol kind enums used during Index/Analyze workflows.
1057pub use perl_symbol::{SymbolKind, VarKind};
1058
1059#[derive(Debug, Clone)]
1060/// Reference to a symbol for Navigate/Analyze workflows.
1061pub struct SymbolReference {
1062    /// File URI where the reference occurs
1063    pub uri: String,
1064    /// Line and character range of the reference
1065    pub range: Range,
1066    /// How the symbol is being referenced (definition, usage, etc.)
1067    pub kind: ReferenceKind,
1068}
1069
1070#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1071/// Classification of how a symbol is referenced in Navigate/Analyze workflows.
1072pub enum ReferenceKind {
1073    /// Symbol definition site (sub declaration, variable declaration)
1074    Definition,
1075    /// General usage of the symbol (function call, method call)
1076    Usage,
1077    /// Import via use statement
1078    Import,
1079    /// Variable read access
1080    Read,
1081    /// Variable write access (assignment target)
1082    Write,
1083}
1084
1085#[derive(Debug, Serialize)]
1086#[serde(rename_all = "camelCase")]
1087/// LSP-compliant workspace symbol for wire format in Navigate/Analyze workflows.
1088pub struct LspWorkspaceSymbol {
1089    /// Symbol name as displayed to the user
1090    pub name: String,
1091    /// LSP symbol kind number (see lsp_types::SymbolKind)
1092    pub kind: u32,
1093    /// Location of the symbol definition
1094    pub location: WireLocation,
1095    /// Name of the containing symbol (package, class)
1096    #[serde(skip_serializing_if = "Option::is_none")]
1097    pub container_name: Option<String>,
1098    /// Workspace folder URI this symbol belongs to (for multi-root workspace disambiguation)
1099    #[serde(skip_serializing_if = "Option::is_none")]
1100    pub workspace_folder_uri: Option<String>,
1101}
1102
1103impl From<&WorkspaceSymbol> for LspWorkspaceSymbol {
1104    fn from(sym: &WorkspaceSymbol) -> Self {
1105        let range = WireRange {
1106            start: WirePosition { line: sym.range.start.line, character: sym.range.start.column },
1107            end: WirePosition { line: sym.range.end.line, character: sym.range.end.column },
1108        };
1109
1110        Self {
1111            name: sym.name.clone(),
1112            kind: sym.kind.to_lsp_kind(),
1113            location: WireLocation { uri: sym.uri.clone(), range },
1114            container_name: sym.container_name.clone(),
1115            workspace_folder_uri: sym.workspace_folder_uri.clone(),
1116        }
1117    }
1118}
1119
1120/// File-level index data
1121#[derive(Default, Clone)]
1122pub struct FileIndex {
1123    /// Canonical file URI for this index entry.
1124    source_uri: String,
1125    /// Symbols defined in this file
1126    symbols: Vec<WorkspaceSymbol>,
1127    /// References in this file (symbol name -> references)
1128    references: HashMap<String, Vec<SymbolReference>>,
1129    /// Dependencies (modules this file imports)
1130    dependencies: HashSet<String>,
1131    /// Content hash for early-exit optimization
1132    content_hash: u64,
1133    /// Workspace folder URI this file belongs to (for multi-root workspace support)
1134    folder_uri: Option<String>,
1135}
1136
1137/// Write-through semantic fact storage for one indexed file.
1138#[derive(Clone, Debug)]
1139pub struct FileFactShard {
1140    /// Canonical file URI for this shard.
1141    pub source_uri: String,
1142    /// Stable file identifier derived from normalized URI.
1143    pub file_id: FileId,
1144    /// Whole-file content hash used for stale-shard replacement.
1145    pub content_hash: u64,
1146    /// Optional per-category hashes for change diagnostics.
1147    pub anchors_hash: Option<u64>,
1148    /// Optional per-category hashes for change diagnostics.
1149    pub entities_hash: Option<u64>,
1150    /// Optional per-category hashes for change diagnostics.
1151    pub occurrences_hash: Option<u64>,
1152    /// Optional per-category hashes for change diagnostics.
1153    pub edges_hash: Option<u64>,
1154    /// Anchor facts for this file.
1155    pub anchors: Vec<AnchorFact>,
1156    /// Entity facts for this file.
1157    pub entities: Vec<EntityFact>,
1158    /// Occurrence facts for this file.
1159    pub occurrences: Vec<perl_semantic_facts::OccurrenceFact>,
1160    /// Edge facts for this file.
1161    pub edges: Vec<EdgeFact>,
1162}
1163
1164/// Thread-safe workspace index
1165pub struct WorkspaceIndex {
1166    /// Index data per file URI (normalized key -> data)
1167    files: Arc<RwLock<HashMap<String, FileIndex>>>,
1168    /// Global symbol multimap (qualified/bare name -> ordered definition candidates)
1169    symbols: Arc<RwLock<HashMap<String, Vec<DefinitionCandidate>>>>,
1170    /// Global reference index (symbol name -> locations across all files)
1171    ///
1172    /// Aggregated from per-file `FileIndex::references` during `index_file()`.
1173    /// Provides O(1) lookup for `find_references()` instead of iterating all files.
1174    global_references: Arc<RwLock<HashMap<String, Vec<Location>>>>,
1175    /// Write-through semantic fact shards keyed by normalized URI.
1176    fact_shards: Arc<RwLock<HashMap<String, FileFactShard>>>,
1177    /// Semantic cross-file reference index (typed occurrences by name and entity).
1178    semantic_reference_index: Arc<RwLock<ReferenceIndex>>,
1179    /// Semantic cross-file import/export index.
1180    semantic_import_export_index: Arc<RwLock<ImportExportIndex>>,
1181    /// Document store for in-memory text
1182    document_store: DocumentStore,
1183    /// Workspace folder URIs for multi-root workspace support
1184    ///
1185    /// Used to determine which workspace folder a file belongs to for
1186    /// proper folder attribution in multi-root workspaces.
1187    workspace_folders: Arc<RwLock<Vec<String>>>,
1188}
1189
1190#[derive(Debug, Clone, Eq, PartialEq)]
1191struct DefinitionCandidate {
1192    location: Location,
1193    kind: SymbolKind,
1194}
1195
1196impl WorkspaceIndex {
1197    fn location_sort_key(location: &Location) -> (&str, u32, u32, u32, u32) {
1198        (
1199            location.uri.as_str(),
1200            location.range.start.line,
1201            location.range.start.column,
1202            location.range.end.line,
1203            location.range.end.column,
1204        )
1205    }
1206
1207    fn sort_locations_deterministically(locations: &mut [Location]) {
1208        locations.sort_by(|left, right| {
1209            Self::location_sort_key(left).cmp(&Self::location_sort_key(right))
1210        });
1211    }
1212
1213    fn definition_candidate_sort_key(
1214        candidate: &DefinitionCandidate,
1215    ) -> (u8, &str, u32, u32, u32, u32) {
1216        let rank = match candidate.kind {
1217            SymbolKind::Subroutine | SymbolKind::Method => 0,
1218            SymbolKind::Constant => 1,
1219            _ => 2,
1220        };
1221        (
1222            rank,
1223            candidate.location.uri.as_str(),
1224            candidate.location.range.start.line,
1225            candidate.location.range.start.column,
1226            candidate.location.range.end.line,
1227            candidate.location.range.end.column,
1228        )
1229    }
1230
1231    fn rebuild_symbol_cache(
1232        files: &HashMap<String, FileIndex>,
1233        symbols: &mut HashMap<String, Vec<DefinitionCandidate>>,
1234    ) {
1235        symbols.clear();
1236
1237        for file_index in files.values() {
1238            for symbol in &file_index.symbols {
1239                if let Some(ref qname) = symbol.qualified_name {
1240                    symbols.entry(qname.clone()).or_default().push(DefinitionCandidate {
1241                        location: Location { uri: symbol.uri.clone(), range: symbol.range },
1242                        kind: symbol.kind,
1243                    });
1244                }
1245                symbols.entry(symbol.name.clone()).or_default().push(DefinitionCandidate {
1246                    location: Location { uri: symbol.uri.clone(), range: symbol.range },
1247                    kind: symbol.kind,
1248                });
1249            }
1250        }
1251        for entries in symbols.values_mut() {
1252            entries.sort_by(|left, right| {
1253                Self::definition_candidate_sort_key(left)
1254                    .cmp(&Self::definition_candidate_sort_key(right))
1255            });
1256            entries.dedup();
1257        }
1258    }
1259
1260    /// Incrementally remove one file's symbols from the global cache,
1261    /// re-inserting shadowed symbols from remaining files.
1262    fn incremental_remove_symbols(
1263        files: &HashMap<String, FileIndex>,
1264        symbols: &mut HashMap<String, Vec<DefinitionCandidate>>,
1265        old_file_index: &FileIndex,
1266    ) {
1267        let mut affected_names: Vec<String> = Vec::new();
1268        for sym in &old_file_index.symbols {
1269            if let Some(ref qname) = sym.qualified_name {
1270                let mut remove_key = false;
1271                if let Some(entries) = symbols.get_mut(qname) {
1272                    entries.retain(|candidate| candidate.location.uri != sym.uri);
1273                    remove_key = entries.is_empty();
1274                }
1275                if remove_key {
1276                    symbols.remove(qname);
1277                    affected_names.push(qname.clone());
1278                }
1279            }
1280            let mut remove_key = false;
1281            if let Some(entries) = symbols.get_mut(&sym.name) {
1282                entries.retain(|candidate| candidate.location.uri != sym.uri);
1283                remove_key = entries.is_empty();
1284            }
1285            if remove_key {
1286                symbols.remove(&sym.name);
1287                affected_names.push(sym.name.clone());
1288            }
1289        }
1290        if !affected_names.is_empty() {
1291            symbols.clear();
1292            for file_index in files
1293                .values()
1294                .filter(|file_index| file_index.source_uri != old_file_index.source_uri)
1295            {
1296                for symbol in &file_index.symbols {
1297                    if let Some(ref qname) = symbol.qualified_name {
1298                        symbols.entry(qname.clone()).or_default().push(DefinitionCandidate {
1299                            location: Location { uri: symbol.uri.clone(), range: symbol.range },
1300                            kind: symbol.kind,
1301                        });
1302                    }
1303                    symbols.entry(symbol.name.clone()).or_default().push(DefinitionCandidate {
1304                        location: Location { uri: symbol.uri.clone(), range: symbol.range },
1305                        kind: symbol.kind,
1306                    });
1307                }
1308            }
1309            for entries in symbols.values_mut() {
1310                entries.sort_by(|left, right| {
1311                    Self::definition_candidate_sort_key(left)
1312                        .cmp(&Self::definition_candidate_sort_key(right))
1313                });
1314                entries.dedup();
1315            }
1316        }
1317    }
1318
1319    /// Incrementally add one file's symbols to the global cache.
1320    fn incremental_add_symbols(
1321        symbols: &mut HashMap<String, Vec<DefinitionCandidate>>,
1322        file_index: &FileIndex,
1323    ) {
1324        for sym in &file_index.symbols {
1325            if let Some(ref qname) = sym.qualified_name {
1326                symbols.entry(qname.clone()).or_default().push(DefinitionCandidate {
1327                    location: Location { uri: sym.uri.clone(), range: sym.range },
1328                    kind: sym.kind,
1329                });
1330            }
1331            symbols.entry(sym.name.clone()).or_default().push(DefinitionCandidate {
1332                location: Location { uri: sym.uri.clone(), range: sym.range },
1333                kind: sym.kind,
1334            });
1335        }
1336        for entries in symbols.values_mut() {
1337            entries.sort_by(|left, right| {
1338                Self::definition_candidate_sort_key(left)
1339                    .cmp(&Self::definition_candidate_sort_key(right))
1340            });
1341            entries.dedup();
1342        }
1343    }
1344
1345    /// Determine the workspace folder URI for a given file URI.
1346    ///
1347    /// Returns the workspace folder URI that contains the given file URI.
1348    /// This is used for multi-root workspace support to properly attribute
1349    /// files and symbols to their originating workspace folder.
1350    ///
1351    /// # Arguments
1352    ///
1353    /// * `file_uri` - The file URI to find the containing workspace folder for
1354    ///
1355    /// # Returns
1356    ///
1357    /// `Some(folder_uri)` if the file is within a workspace folder, `None` otherwise.
1358    ///
1359    /// # Examples
1360    ///
1361    /// ```rust,ignore
1362    /// use perl_workspace::workspace::workspace_index::WorkspaceIndex;
1363    ///
1364    /// let index = WorkspaceIndex::new();
1365    /// index.set_workspace_folders(vec![
1366    ///     "file:///project1".to_string(),
1367    ///     "file:///project2".to_string(),
1368    /// ]);
1369    ///
1370    /// let folder = index.determine_folder_uri("file:///project1/src/main.pl");
1371    /// assert_eq!(folder, Some("file:///project1".to_string()));
1372    /// ```
1373    fn determine_folder_uri(&self, file_uri: &str) -> Option<String> {
1374        let folders = self.workspace_folders.read();
1375        let mut best_match: Option<&String> = None;
1376        for folder_uri in folders.iter() {
1377            // Check if the file URI starts with the folder URI
1378            // We need to ensure proper URI matching (with or without trailing slash)
1379            let folder_with_slash = if folder_uri.ends_with('/') {
1380                folder_uri.clone()
1381            } else {
1382                format!("{}/", folder_uri)
1383            };
1384            if file_uri.starts_with(&folder_with_slash) || file_uri == folder_uri {
1385                match best_match {
1386                    Some(existing) if existing.len() >= folder_uri.len() => {}
1387                    _ => best_match = Some(folder_uri),
1388                }
1389            }
1390        }
1391        best_match.cloned()
1392    }
1393
1394    fn find_definition_in_files(
1395        files: &HashMap<String, FileIndex>,
1396        symbol_name: &str,
1397        uri_filter: Option<&str>,
1398    ) -> Option<(Location, String)> {
1399        let mut candidates: Vec<(Location, String)> = Vec::new();
1400        for file_index in files.values() {
1401            if let Some(filter) = uri_filter
1402                && file_index.symbols.first().is_some_and(|symbol| symbol.uri != filter)
1403            {
1404                continue;
1405            }
1406
1407            for symbol in &file_index.symbols {
1408                if symbol.name == symbol_name
1409                    || symbol.qualified_name.as_deref() == Some(symbol_name)
1410                {
1411                    candidates.push((
1412                        Location { uri: symbol.uri.clone(), range: symbol.range },
1413                        symbol.uri.clone(),
1414                    ));
1415                }
1416            }
1417        }
1418
1419        candidates.sort_by(|left, right| {
1420            Self::location_sort_key(&left.0).cmp(&Self::location_sort_key(&right.0))
1421        });
1422        candidates.into_iter().next()
1423    }
1424
1425    fn find_symbol_by_definition(
1426        &self,
1427        definition: &Location,
1428        symbol_name: &str,
1429    ) -> Option<WorkspaceSymbol> {
1430        let files = self.files.read();
1431        files
1432            .values()
1433            .flat_map(|file_index| file_index.symbols.iter())
1434            .filter(|symbol| {
1435                symbol.uri == definition.uri
1436                    && symbol.range == definition.range
1437                    && (symbol.name == symbol_name
1438                        || symbol.qualified_name.as_deref() == Some(symbol_name))
1439            })
1440            .min_by(|left, right| {
1441                (
1442                    left.qualified_name.as_deref().unwrap_or_default(),
1443                    left.name.as_str(),
1444                    left.kind.to_lsp_kind(),
1445                )
1446                    .cmp(&(
1447                        right.qualified_name.as_deref().unwrap_or_default(),
1448                        right.name.as_str(),
1449                        right.kind.to_lsp_kind(),
1450                    ))
1451            })
1452            .cloned()
1453    }
1454
1455    fn has_unique_symbol_name_and_kind(&self, target: &WorkspaceSymbol) -> bool {
1456        let files = self.files.read();
1457        files
1458            .values()
1459            .flat_map(|file_index| file_index.symbols.iter())
1460            .filter(|symbol| symbol.name == target.name && symbol.kind == target.kind)
1461            .take(2)
1462            .count()
1463            == 1
1464    }
1465
1466    fn collect_symbol_references(&self, symbol: &WorkspaceSymbol) -> Vec<Location> {
1467        let mut names_to_query: Vec<&str> = Vec::new();
1468        if let Some(qualified_name) = symbol.qualified_name.as_deref() {
1469            names_to_query.push(qualified_name);
1470            if self.has_unique_symbol_name_and_kind(symbol) {
1471                names_to_query.push(symbol.name.as_str());
1472            }
1473        } else {
1474            names_to_query.push(symbol.name.as_str());
1475        }
1476
1477        let global_refs = self.global_references.read();
1478        let mut seen: HashSet<(String, u32, u32, u32, u32)> = HashSet::new();
1479        let mut locations = Vec::new();
1480
1481        for symbol_name in names_to_query {
1482            if let Some(refs) = global_refs.get(symbol_name) {
1483                for location in refs {
1484                    let key = (
1485                        location.uri.clone(),
1486                        location.range.start.line,
1487                        location.range.start.column,
1488                        location.range.end.line,
1489                        location.range.end.column,
1490                    );
1491                    if seen.insert(key) {
1492                        locations.push(location.clone());
1493                    }
1494                }
1495            }
1496        }
1497        drop(global_refs);
1498
1499        Self::sort_locations_deterministically(&mut locations);
1500        locations
1501    }
1502
1503    /// Create a new empty index
1504    ///
1505    /// # Returns
1506    ///
1507    /// A workspace index with empty file and symbol tables.
1508    ///
1509    /// # Examples
1510    ///
1511    /// ```rust,ignore
1512    /// use perl_parser::workspace_index::WorkspaceIndex;
1513    ///
1514    /// let index = WorkspaceIndex::new();
1515    /// assert!(!index.has_symbols());
1516    /// ```
1517    pub fn new() -> Self {
1518        Self {
1519            files: Arc::new(RwLock::new(HashMap::new())),
1520            symbols: Arc::new(RwLock::new(HashMap::new())),
1521            global_references: Arc::new(RwLock::new(HashMap::new())),
1522            fact_shards: Arc::new(RwLock::new(HashMap::new())),
1523            semantic_reference_index: Arc::new(RwLock::new(ReferenceIndex::new())),
1524            semantic_import_export_index: Arc::new(RwLock::new(ImportExportIndex::new())),
1525            document_store: DocumentStore::new(),
1526            workspace_folders: Arc::new(RwLock::new(Vec::new())),
1527        }
1528    }
1529
1530    /// Create a workspace index with pre-allocated capacity.
1531    ///
1532    /// Pre-allocating reduces the number of rehash operations during large-workspace
1533    /// startup. Use this instead of `new()` when the approximate workspace size is
1534    /// known in advance (e.g. from a file discovery scan).
1535    ///
1536    /// # Arguments
1537    ///
1538    /// * `estimated_files` - Expected number of source files in the workspace.
1539    /// * `avg_symbols_per_file` - Expected average number of symbols per file.
1540    ///
1541    /// # Panics
1542    ///
1543    /// Does not panic. Overflow is prevented via `saturating_mul` and an upper cap
1544    /// on the symbol/reference map capacity.
1545    ///
1546    /// # Examples
1547    ///
1548    /// ```rust,ignore
1549    /// use perl_workspace::workspace::workspace_index::WorkspaceIndex;
1550    ///
1551    /// let index = WorkspaceIndex::with_capacity(1000, 20);
1552    /// assert!(!index.has_symbols());
1553    /// ```
1554    pub fn with_capacity(estimated_files: usize, avg_symbols_per_file: usize) -> Self {
1555        // Each symbol is stored twice (qualified + bare name) due to dual indexing.
1556        let sym_cap =
1557            estimated_files.saturating_mul(avg_symbols_per_file).saturating_mul(2).min(1_000_000);
1558        let ref_cap = (sym_cap / 4).min(1_000_000);
1559        Self {
1560            files: Arc::new(RwLock::new(HashMap::with_capacity(estimated_files))),
1561            symbols: Arc::new(RwLock::new(HashMap::with_capacity(sym_cap))),
1562            global_references: Arc::new(RwLock::new(HashMap::with_capacity(ref_cap))),
1563            fact_shards: Arc::new(RwLock::new(HashMap::with_capacity(estimated_files))),
1564            semantic_reference_index: Arc::new(RwLock::new(ReferenceIndex::new())),
1565            semantic_import_export_index: Arc::new(RwLock::new(ImportExportIndex::new())),
1566            document_store: DocumentStore::new(),
1567            workspace_folders: Arc::new(RwLock::new(Vec::new())),
1568        }
1569    }
1570
1571    /// Set the workspace folder URIs for multi-root workspace support.
1572    ///
1573    /// This method updates the list of workspace folders that the index
1574    /// uses to determine folder attribution for files and symbols.
1575    ///
1576    /// # Arguments
1577    ///
1578    /// * `folders` - A vector of workspace folder URIs
1579    ///
1580    /// # Examples
1581    ///
1582    /// ```rust,ignore
1583    /// use perl_workspace::workspace::workspace_index::WorkspaceIndex;
1584    ///
1585    /// let index = WorkspaceIndex::new();
1586    /// index.set_workspace_folders(vec![
1587    ///     "file:///project1".to_string(),
1588    ///     "file:///project2".to_string(),
1589    /// ]);
1590    /// ```
1591    pub fn set_workspace_folders(&self, folders: Vec<String>) {
1592        let mut workspace_folders = self.workspace_folders.write();
1593        *workspace_folders = folders;
1594    }
1595
1596    /// Get the current workspace folder URIs.
1597    ///
1598    /// # Returns
1599    ///
1600    /// A vector of workspace folder URIs.
1601    #[must_use]
1602    pub fn workspace_folders(&self) -> Vec<String> {
1603        self.workspace_folders.read().clone()
1604    }
1605
1606    /// Normalize a URI to a consistent form using proper URI handling
1607    fn normalize_uri(uri: &str) -> String {
1608        perl_uri::normalize_uri(uri)
1609    }
1610
1611    /// Remove a file's contributions from the global reference index.
1612    ///
1613    /// Retains only entries whose URI does not match `file_uri`.
1614    /// Empty keys are removed to avoid unbounded map growth.
1615    fn remove_file_global_refs(
1616        global_refs: &mut HashMap<String, Vec<Location>>,
1617        file_index: &FileIndex,
1618        file_uri: &str,
1619    ) {
1620        for name in file_index.references.keys() {
1621            if let Some(locs) = global_refs.get_mut(name) {
1622                locs.retain(|loc| loc.uri != file_uri);
1623                if locs.is_empty() {
1624                    global_refs.remove(name);
1625                }
1626            }
1627        }
1628    }
1629
1630    /// Index a file from its URI and text content
1631    ///
1632    /// # Arguments
1633    ///
1634    /// * `uri` - File URI identifying the document
1635    /// * `text` - Full Perl source text for indexing
1636    ///
1637    /// # Returns
1638    ///
1639    /// `Ok(())` when indexing succeeds, or an error message otherwise.
1640    ///
1641    /// # Errors
1642    ///
1643    /// Returns an error if parsing fails or the document store cannot be updated.
1644    ///
1645    /// # Examples
1646    ///
1647    /// ```rust,ignore
1648    /// use perl_parser::workspace_index::WorkspaceIndex;
1649    /// use url::Url;
1650    ///
1651    /// # fn main() -> Result<(), Box<dyn std::error::Error>> {
1652    /// let index = WorkspaceIndex::new();
1653    /// let uri = Url::parse("file:///example.pl")?;
1654    /// index.index_file(uri, "sub hello { return 1; }".to_string())?;
1655    /// # Ok(())
1656    /// # }
1657    /// ```
1658    ///
1659    /// Returns: `Ok(())` when indexing succeeds, otherwise an error string.
1660    pub fn index_file(&self, uri: Url, text: String) -> Result<(), String> {
1661        let uri_str = uri.to_string();
1662
1663        // Compute content hash for early-exit optimization
1664        let mut hasher = DefaultHasher::new();
1665        text.hash(&mut hasher);
1666        let content_hash = hasher.finish();
1667
1668        // Check if content is unchanged (early-exit optimization)
1669        let key = DocumentStore::uri_key(&uri_str);
1670        {
1671            let files = self.files.read();
1672            if let Some(existing_index) = files.get(&key) {
1673                if existing_index.content_hash == content_hash {
1674                    // Content unchanged, skip re-indexing
1675                    return Ok(());
1676                }
1677            }
1678        }
1679
1680        // Update document store
1681        if self.document_store.is_open(&uri_str) {
1682            self.document_store.update(&uri_str, 1, text.clone());
1683        } else {
1684            self.document_store.open(uri_str.clone(), 1, text.clone());
1685        }
1686
1687        // Parse the file
1688        let mut parser = Parser::new(&text);
1689        let ast = match parser.parse() {
1690            Ok(ast) => ast,
1691            Err(e) => return Err(format!("Parse error: {}", e)),
1692        };
1693
1694        // Get the document for line index
1695        let mut doc = self.document_store.get(&uri_str).ok_or("Document not found")?;
1696
1697        // Determine workspace folder URI from the file URI
1698        let folder_uri = self.determine_folder_uri(&uri_str);
1699
1700        // Extract symbols and references
1701        let mut file_index = FileIndex {
1702            source_uri: uri_str.clone(),
1703            content_hash,
1704            folder_uri: folder_uri.clone(),
1705            ..Default::default()
1706        };
1707        let mut visitor = IndexVisitor::new(&mut doc, uri_str.clone(), folder_uri);
1708        visitor.visit(&ast, &mut file_index);
1709
1710        let canonical_shard =
1711            Self::build_canonical_fact_shard_for_ast(&uri_str, content_hash, &ast);
1712        let fact_shard = if canonical_shard.anchors.is_empty()
1713            && canonical_shard.entities.is_empty()
1714            && canonical_shard.occurrences.is_empty()
1715            && canonical_shard.edges.is_empty()
1716        {
1717            Self::build_fact_shard(&uri_str, content_hash, &file_index)
1718        } else {
1719            canonical_shard
1720        };
1721
1722        // Extract import specs from the AST — populates ImportExportIndex so
1723        // that `Foo->import(@names)` dynamic-import suppression is live in
1724        // production.  This runs outside the write lock to avoid holding it
1725        // longer than necessary.
1726        //
1727        // Lock ordering note: `semantic_import_export_index` is acquired write
1728        // separately from (and after) `files`/`symbols`/`global_references` to
1729        // match the consistent lock-order used throughout this file.
1730        let file_id = Self::hash_uri_to_file_id(&uri_str);
1731        let import_specs =
1732            crate::semantic::workspace_import_extractor::extract_import_specs(&ast, file_id);
1733
1734        // Update the index, refresh the global symbol cache, and replace this file's
1735        // contribution in the global reference index.
1736        {
1737            let mut files = self.files.write();
1738
1739            // Remove stale global references from previous version of this file
1740            if let Some(old_index) = files.get(&key) {
1741                let mut global_refs = self.global_references.write();
1742                Self::remove_file_global_refs(&mut global_refs, old_index, &uri_str);
1743            }
1744
1745            // Incrementally remove old symbols before inserting new file
1746            if let Some(old_index) = files.get(&key) {
1747                let mut symbols = self.symbols.write();
1748                Self::incremental_remove_symbols(&files, &mut symbols, old_index);
1749                drop(symbols);
1750            }
1751            files.insert(key.clone(), file_index);
1752            let mut symbols = self.symbols.write();
1753            if let Some(new_index) = files.get(&key) {
1754                Self::incremental_add_symbols(&mut symbols, new_index);
1755            }
1756
1757            if let Some(file_index) = files.get(&key) {
1758                let mut global_refs = self.global_references.write();
1759                for (name, refs) in &file_index.references {
1760                    let entry = global_refs.entry(name.clone()).or_default();
1761                    for reference in refs {
1762                        entry.push(Location { uri: reference.uri.clone(), range: reference.range });
1763                    }
1764                }
1765            }
1766            self.replace_fact_shard_incremental(&key, fact_shard);
1767        }
1768
1769        // Update the import/export index with the freshly extracted import specs.
1770        // Stale entries for this URI are removed first (incremental re-indexing).
1771        // This is done after the main write lock block to follow the established
1772        // lock ordering (shards → reference_index → import_export_index).
1773        {
1774            let mut ie_idx = self.semantic_import_export_index.write();
1775            ie_idx.remove_file_imports(&uri_str);
1776            ie_idx.add_file_imports(&uri_str, file_id, import_specs);
1777        }
1778
1779        Ok(())
1780    }
1781
1782    /// Remove a file from the index
1783    ///
1784    /// # Arguments
1785    ///
1786    /// * `uri` - File URI (string form) to remove
1787    ///
1788    /// # Returns
1789    ///
1790    /// Nothing. The index is updated in-place.
1791    ///
1792    /// # Examples
1793    ///
1794    /// ```rust,ignore
1795    /// use perl_parser::workspace_index::WorkspaceIndex;
1796    ///
1797    /// let index = WorkspaceIndex::new();
1798    /// index.remove_file("file:///example.pl");
1799    /// ```
1800    pub fn remove_file(&self, uri: &str) {
1801        let uri_str = Self::normalize_uri(uri);
1802        let key = DocumentStore::uri_key(&uri_str);
1803
1804        // Remove from document store
1805        self.document_store.close(&uri_str);
1806
1807        // Remove file index
1808        let mut files = self.files.write();
1809        if let Some(file_index) = files.remove(&key) {
1810            self.fact_shards.write().remove(&key);
1811
1812            // Clean up semantic cross-file indexes for this file.
1813            self.semantic_reference_index.write().remove_file(&uri_str);
1814            {
1815                let mut ie_idx = self.semantic_import_export_index.write();
1816                ie_idx.remove_file_imports(&uri_str);
1817                ie_idx.remove_module_exports(&uri_str);
1818            }
1819
1820            // Incrementally remove symbols and re-insert any shadowed names.
1821            let mut symbols = self.symbols.write();
1822            Self::incremental_remove_symbols(&files, &mut symbols, &file_index);
1823
1824            // Defensive sweep: purge any remaining cache entries whose value
1825            // points to this file's URI.  incremental_remove_symbols already
1826            // handles known symbol names; this sweep guarantees no stale
1827            // candidates survive even when:
1828            //   * the file had zero symbols (nothing for incremental_remove
1829            //     to walk), or
1830            //   * a symbol's stored uri differs from the canonical normalize_uri
1831            //     output (URI normalization edge cases).
1832            // Match against every URI spelling observed in this file index plus
1833            // the canonical uri_str so raw/normalized variants are all caught.
1834            let mut removed_uris = vec![uri_str.as_str()];
1835            for observed_uri in file_index.symbols.iter().map(|s| s.uri.as_str()).chain(
1836                file_index.references.values().flat_map(|refs| refs.iter().map(|r| r.uri.as_str())),
1837            ) {
1838                if !removed_uris.contains(&observed_uri) {
1839                    removed_uris.push(observed_uri);
1840                }
1841            }
1842            symbols.retain(|_, candidates| {
1843                candidates.retain(|candidate| {
1844                    let cand_uri = candidate.location.uri.as_str();
1845                    !removed_uris.contains(&cand_uri)
1846                });
1847                !candidates.is_empty()
1848            });
1849
1850            // Remove from global reference index. Two-phase cleanup: first
1851            // remove names this file was known to reference (cheap path), then
1852            // a defensive sweep over all remaining entries to catch any that
1853            // were inserted under names not present in this file's
1854            // FileIndex::references map (e.g. via aggregated/global insertion
1855            // paths). Empty buckets are dropped.
1856            let mut global_refs = self.global_references.write();
1857            Self::remove_file_global_refs(&mut global_refs, &file_index, &uri_str);
1858            global_refs.retain(|_, locs| {
1859                locs.retain(|loc| !removed_uris.contains(&loc.uri.as_str()));
1860                !locs.is_empty()
1861            });
1862        }
1863    }
1864
1865    /// Remove a file from the index (URL variant for compatibility)
1866    ///
1867    /// # Arguments
1868    ///
1869    /// * `uri` - File URI as a parsed `Url`
1870    ///
1871    /// # Returns
1872    ///
1873    /// Nothing. The index is updated in-place.
1874    ///
1875    /// # Examples
1876    ///
1877    /// ```rust,ignore
1878    /// use perl_parser::workspace_index::WorkspaceIndex;
1879    /// use url::Url;
1880    ///
1881    /// # fn main() -> Result<(), Box<dyn std::error::Error>> {
1882    /// let index = WorkspaceIndex::new();
1883    /// let uri = Url::parse("file:///example.pl")?;
1884    /// index.remove_file_url(&uri);
1885    /// # Ok(())
1886    /// # }
1887    /// ```
1888    pub fn remove_file_url(&self, uri: &Url) {
1889        self.remove_file(uri.as_str())
1890    }
1891
1892    /// Clear a file from the index (alias for remove_file)
1893    ///
1894    /// # Arguments
1895    ///
1896    /// * `uri` - File URI (string form) to remove
1897    ///
1898    /// # Returns
1899    ///
1900    /// Nothing. The index is updated in-place.
1901    ///
1902    /// # Examples
1903    ///
1904    /// ```rust,ignore
1905    /// use perl_parser::workspace_index::WorkspaceIndex;
1906    ///
1907    /// let index = WorkspaceIndex::new();
1908    /// index.clear_file("file:///example.pl");
1909    /// ```
1910    pub fn clear_file(&self, uri: &str) {
1911        self.remove_file(uri);
1912    }
1913
1914    /// Clear a file from the index (URL variant for compatibility)
1915    ///
1916    /// # Arguments
1917    ///
1918    /// * `uri` - File URI as a parsed `Url`
1919    ///
1920    /// # Returns
1921    ///
1922    /// Nothing. The index is updated in-place.
1923    ///
1924    /// # Examples
1925    ///
1926    /// ```rust,ignore
1927    /// use perl_parser::workspace_index::WorkspaceIndex;
1928    /// use url::Url;
1929    ///
1930    /// # fn main() -> Result<(), Box<dyn std::error::Error>> {
1931    /// let index = WorkspaceIndex::new();
1932    /// let uri = Url::parse("file:///example.pl")?;
1933    /// index.clear_file_url(&uri);
1934    /// # Ok(())
1935    /// # }
1936    /// ```
1937    pub fn clear_file_url(&self, uri: &Url) {
1938        self.clear_file(uri.as_str())
1939    }
1940
1941    /// Remove all files from a specific workspace folder.
1942    ///
1943    /// This method removes all indexed files that belong to the given
1944    /// workspace folder URI. This is useful when a workspace folder is
1945    /// removed from the multi-root workspace.
1946    ///
1947    /// # Arguments
1948    ///
1949    /// * `folder_uri` - The workspace folder URI to remove files from
1950    ///
1951    /// # Examples
1952    ///
1953    /// ```rust,ignore
1954    /// use perl_workspace::workspace::workspace_index::WorkspaceIndex;
1955    ///
1956    /// let index = WorkspaceIndex::new();
1957    /// // Index files from multiple folders...
1958    /// index.remove_folder("file:///project1");
1959    /// ```
1960    pub fn remove_folder(&self, folder_uri: &str) {
1961        let mut uris_to_remove = Vec::new();
1962        let files = self.files.read();
1963
1964        // Collect all files that belong to this folder
1965        for file_index in files.values() {
1966            if file_index.folder_uri.as_deref() == Some(folder_uri) {
1967                uris_to_remove.push(file_index.source_uri.clone());
1968            }
1969        }
1970        drop(files);
1971
1972        // Remove each file through the full removal path to keep
1973        // symbol/reference caches and document store in sync.
1974        for uri in uris_to_remove {
1975            self.remove_file(&uri);
1976        }
1977    }
1978
1979    #[cfg(not(target_arch = "wasm32"))]
1980    /// Index a file from a URI string for the Index/Analyze workflow.
1981    ///
1982    /// Accepts either a `file://` URI or a filesystem path. Not available on
1983    /// wasm32 targets (requires filesystem path conversion).
1984    ///
1985    /// # Arguments
1986    ///
1987    /// * `uri` - File URI string or filesystem path.
1988    /// * `text` - Full Perl source text for indexing.
1989    ///
1990    /// # Returns
1991    ///
1992    /// `Ok(())` when indexing succeeds, or an error message otherwise.
1993    ///
1994    /// # Errors
1995    ///
1996    /// Returns an error if the URI is invalid or parsing fails.
1997    ///
1998    /// # Examples
1999    ///
2000    /// ```rust,ignore
2001    /// use perl_parser::workspace_index::WorkspaceIndex;
2002    ///
2003    /// # fn main() -> Result<(), Box<dyn std::error::Error>> {
2004    /// let index = WorkspaceIndex::new();
2005    /// index.index_file_str("file:///example.pl", "sub hello { }")?;
2006    /// # Ok(())
2007    /// # }
2008    /// ```
2009    pub fn index_file_str(&self, uri: &str, text: &str) -> Result<(), String> {
2010        let path = Path::new(uri);
2011        let url = if path.is_absolute() {
2012            url::Url::from_file_path(path)
2013                .map_err(|_| format!("Invalid URI or file path: {}", uri))?
2014        } else {
2015            // Raw absolute Windows paths like C:\foo can parse as a bogus URI
2016            // (`c:` scheme). Prefer URL parsing only for non-path inputs.
2017            url::Url::parse(uri).or_else(|_| {
2018                url::Url::from_file_path(path)
2019                    .map_err(|_| format!("Invalid URI or file path: {}", uri))
2020            })?
2021        };
2022        self.index_file(url, text.to_string())
2023    }
2024
2025    /// Index multiple files in a single batch operation.
2026    ///
2027    /// This is significantly faster than calling `index_file` in a loop for
2028    /// initial workspace scans because it defers the global symbol cache
2029    /// rebuild to a single pass at the end.
2030    ///
2031    /// Phase 1: Parse all files without holding locks.
2032    /// Phase 2: Bulk-insert file indices and rebuild the symbol cache once.
2033    pub fn index_files_batch(&self, files_to_index: Vec<(Url, String)>) -> Vec<String> {
2034        let mut errors = Vec::new();
2035
2036        // Phase 1: Parse all files without locks
2037        let mut parsed: Vec<(String, String, FileIndex)> = Vec::with_capacity(files_to_index.len());
2038        for (uri, text) in &files_to_index {
2039            let uri_str = uri.to_string();
2040
2041            // Content hash for early-exit
2042            let mut hasher = DefaultHasher::new();
2043            text.hash(&mut hasher);
2044            let content_hash = hasher.finish();
2045
2046            let key = DocumentStore::uri_key(&uri_str);
2047
2048            // Check if content unchanged
2049            {
2050                let files = self.files.read();
2051                if let Some(existing) = files.get(&key) {
2052                    if existing.content_hash == content_hash {
2053                        continue;
2054                    }
2055                }
2056            }
2057
2058            // Update document store
2059            if self.document_store.is_open(&uri_str) {
2060                self.document_store.update(&uri_str, 1, text.clone());
2061            } else {
2062                self.document_store.open(uri_str.clone(), 1, text.clone());
2063            }
2064
2065            // Parse
2066            let mut parser = Parser::new(text);
2067            let ast = match parser.parse() {
2068                Ok(ast) => ast,
2069                Err(e) => {
2070                    errors.push(format!("Parse error in {}: {}", uri_str, e));
2071                    continue;
2072                }
2073            };
2074
2075            let mut doc = match self.document_store.get(&uri_str) {
2076                Some(d) => d,
2077                None => {
2078                    errors.push(format!("Document not found: {}", uri_str));
2079                    continue;
2080                }
2081            };
2082
2083            // Determine workspace folder URI from the file URI
2084            let folder_uri = self.determine_folder_uri(&uri_str);
2085
2086            let mut file_index = FileIndex {
2087                source_uri: uri_str.clone(),
2088                content_hash,
2089                folder_uri: folder_uri.clone(),
2090                ..Default::default()
2091            };
2092            let mut visitor = IndexVisitor::new(&mut doc, uri_str.clone(), folder_uri);
2093            visitor.visit(&ast, &mut file_index);
2094
2095            parsed.push((key, uri_str, file_index));
2096        }
2097
2098        // Phase 2: Bulk insert with single cache rebuild
2099        {
2100            let mut files = self.files.write();
2101            let mut symbols = self.symbols.write();
2102            let mut global_refs = self.global_references.write();
2103
2104            // Pre-allocate capacity for the incoming batch to avoid rehashing.
2105            // Each symbol is indexed under both its qualified name and bare name.
2106            files.reserve(parsed.len());
2107            symbols.reserve(parsed.len().saturating_mul(20).saturating_mul(2));
2108
2109            for (key, uri_str, file_index) in parsed {
2110                // Remove stale global references
2111                if let Some(old_index) = files.get(&key) {
2112                    Self::remove_file_global_refs(&mut global_refs, old_index, &uri_str);
2113                }
2114
2115                files.insert(key.clone(), file_index);
2116
2117                // Add global references for this file
2118                if let Some(fi) = files.get(&key) {
2119                    for (name, refs) in &fi.references {
2120                        let entry = global_refs.entry(name.clone()).or_default();
2121                        for reference in refs {
2122                            entry.push(Location {
2123                                uri: reference.uri.clone(),
2124                                range: reference.range,
2125                            });
2126                        }
2127                    }
2128                }
2129            }
2130
2131            // Single rebuild at the end
2132            Self::rebuild_symbol_cache(&files, &mut symbols);
2133        }
2134
2135        errors
2136    }
2137
2138    /// Find all references to a symbol using dual indexing strategy
2139    ///
2140    /// This function searches for both exact matches and bare name matches when
2141    /// the symbol is qualified. For example, when searching for "Utils::process_data":
2142    /// - First searches for exact "Utils::process_data" references
2143    /// - Then searches for bare "process_data" references that might refer to the same function
2144    ///
2145    /// This dual approach handles cases where functions are called both as:
2146    /// - Qualified: `Utils::process_data()`
2147    /// - Unqualified: `process_data()` (when in the same package or imported)
2148    ///
2149    /// # Arguments
2150    ///
2151    /// * `symbol_name` - Symbol name or qualified name to search
2152    ///
2153    /// # Returns
2154    ///
2155    /// All reference locations found for the requested symbol.
2156    ///
2157    /// # Examples
2158    ///
2159    /// ```rust,ignore
2160    /// use perl_parser::workspace_index::WorkspaceIndex;
2161    ///
2162    /// let index = WorkspaceIndex::new();
2163    /// let _refs = index.find_references("Utils::process_data");
2164    /// ```
2165    pub fn find_references(&self, symbol_name: &str) -> Vec<Location> {
2166        let global_refs = self.global_references.read();
2167        let mut seen: HashSet<(String, u32, u32, u32, u32)> = HashSet::new();
2168        let mut locations = Vec::new();
2169
2170        // O(1) lookup for exact symbol name
2171        if let Some(refs) = global_refs.get(symbol_name) {
2172            for loc in refs {
2173                let key = (
2174                    loc.uri.clone(),
2175                    loc.range.start.line,
2176                    loc.range.start.column,
2177                    loc.range.end.line,
2178                    loc.range.end.column,
2179                );
2180                if seen.insert(key) {
2181                    locations.push(Location { uri: loc.uri.clone(), range: loc.range });
2182                }
2183            }
2184        }
2185
2186        // If the symbol is qualified, also collect bare name references
2187        if let Some(idx) = symbol_name.rfind("::") {
2188            let bare_name = &symbol_name[idx + 2..];
2189            if let Some(refs) = global_refs.get(bare_name) {
2190                for loc in refs {
2191                    let key = (
2192                        loc.uri.clone(),
2193                        loc.range.start.line,
2194                        loc.range.start.column,
2195                        loc.range.end.line,
2196                        loc.range.end.column,
2197                    );
2198                    if seen.insert(key) {
2199                        locations.push(Location { uri: loc.uri.clone(), range: loc.range });
2200                    }
2201                }
2202            }
2203        } else {
2204            // If the symbol is bare, also collect qualified references that end
2205            // with the same bare name, e.g. `Pkg::foo` when searching for `foo`.
2206            for (name, refs) in global_refs.iter() {
2207                if !Self::is_qualified_variant_of(name, symbol_name) {
2208                    continue;
2209                }
2210
2211                for loc in refs {
2212                    let key = (
2213                        loc.uri.clone(),
2214                        loc.range.start.line,
2215                        loc.range.start.column,
2216                        loc.range.end.line,
2217                        loc.range.end.column,
2218                    );
2219                    if seen.insert(key) {
2220                        locations.push(Location { uri: loc.uri.clone(), range: loc.range });
2221                    }
2222                }
2223            }
2224        }
2225
2226        Self::sort_locations_deterministically(&mut locations);
2227        locations
2228    }
2229
2230    /// Resolve a symbol and return its definition/reference set for cross-file planning.
2231    ///
2232    /// Returns `None` when no definition can be resolved for `symbol_name`.
2233    pub fn query_symbol_references(
2234        &self,
2235        symbol_name: &str,
2236    ) -> Option<CrossFileReferenceQueryResult> {
2237        let definition = self.find_definition(symbol_name)?;
2238        let symbol = self.find_symbol_by_definition(&definition, symbol_name)?;
2239
2240        let stable_key = symbol.qualified_name.clone().unwrap_or_else(|| {
2241            format!(
2242                "{}@{}:{}:{}",
2243                symbol.name, symbol.uri, symbol.range.start.line, symbol.range.start.column
2244            )
2245        });
2246        let mut references = self.collect_symbol_references(&symbol);
2247        if !references.iter().any(|location| location == &definition) {
2248            references.push(definition.clone());
2249            Self::sort_locations_deterministically(&mut references);
2250        }
2251
2252        Some(CrossFileReferenceQueryResult {
2253            symbol: SymbolIdentity {
2254                stable_key,
2255                name: symbol.name,
2256                qualified_name: symbol.qualified_name,
2257                kind: symbol.kind,
2258            },
2259            definition,
2260            references,
2261        })
2262    }
2263
2264    /// Count non-definition references (usages) of a symbol.
2265    ///
2266    /// Like `find_references` but excludes `ReferenceKind::Definition` entries,
2267    /// returning only actual usage sites. This is used by code lens to show
2268    /// "N references" where N means call sites, not the definition itself.
2269    pub fn count_usages(&self, symbol_name: &str) -> usize {
2270        let files = self.files.read();
2271        let mut seen: HashSet<(String, u32, u32, u32, u32)> = HashSet::new();
2272
2273        for (_uri_key, file_index) in files.iter() {
2274            if let Some(refs) = file_index.references.get(symbol_name) {
2275                for r in refs.iter().filter(|r| r.kind != ReferenceKind::Definition) {
2276                    seen.insert((
2277                        r.uri.clone(),
2278                        r.range.start.line,
2279                        r.range.start.column,
2280                        r.range.end.line,
2281                        r.range.end.column,
2282                    ));
2283                }
2284            }
2285
2286            if let Some(idx) = symbol_name.rfind("::") {
2287                let bare_name = &symbol_name[idx + 2..];
2288                if let Some(refs) = file_index.references.get(bare_name) {
2289                    for r in refs.iter().filter(|r| r.kind != ReferenceKind::Definition) {
2290                        seen.insert((
2291                            r.uri.clone(),
2292                            r.range.start.line,
2293                            r.range.start.column,
2294                            r.range.end.line,
2295                            r.range.end.column,
2296                        ));
2297                    }
2298                }
2299            } else {
2300                for (name, refs) in &file_index.references {
2301                    if !Self::is_qualified_variant_of(name, symbol_name) {
2302                        continue;
2303                    }
2304
2305                    for r in refs.iter().filter(|r| r.kind != ReferenceKind::Definition) {
2306                        seen.insert((
2307                            r.uri.clone(),
2308                            r.range.start.line,
2309                            r.range.start.column,
2310                            r.range.end.line,
2311                            r.range.end.column,
2312                        ));
2313                    }
2314                }
2315            }
2316        }
2317
2318        seen.len()
2319    }
2320
2321    fn is_qualified_variant_of(candidate: &str, bare_symbol: &str) -> bool {
2322        candidate.rsplit_once("::").is_some_and(|(_, candidate_bare)| candidate_bare == bare_symbol)
2323    }
2324
2325    /// Find the definition of a symbol
2326    ///
2327    /// # Arguments
2328    ///
2329    /// * `symbol_name` - Symbol name or qualified name to resolve
2330    ///
2331    /// # Returns
2332    ///
2333    /// The first matching definition location, if found.
2334    ///
2335    /// # Examples
2336    ///
2337    /// ```rust,ignore
2338    /// use perl_parser::workspace_index::WorkspaceIndex;
2339    ///
2340    /// let index = WorkspaceIndex::new();
2341    /// let _def = index.find_definition("MyPackage::example");
2342    /// ```
2343    pub fn find_definition(&self, symbol_name: &str) -> Option<Location> {
2344        if let Some(location) = self.definition_candidates(symbol_name).into_iter().next() {
2345            return Some(location);
2346        }
2347
2348        // Fall back to a full files scan for this query. The result is intentionally
2349        // NOT written back to `self.symbols`: every indexed symbol is already
2350        // inserted under both qualified and bare names by `incremental_add_symbols`,
2351        // so any cache miss here is for a key that does not correspond to an
2352        // indexed symbol (e.g. a typo or alias). Caching such queries is unsound
2353        // (entries become stale on file edits and were never tracked for cleanup
2354        // in `remove_file`/`incremental_remove_symbols`) and lets the cache grow
2355        // unboundedly across long sessions. Returning the resolved location
2356        // directly preserves correctness without retaining state.
2357        let files = self.files.read();
2358        Self::find_definition_in_files(&files, symbol_name, None).map(|(location, _uri)| location)
2359    }
2360
2361    pub(crate) fn definition_candidates(&self, symbol_name: &str) -> Vec<Location> {
2362        let symbols = self.symbols.read();
2363        symbols
2364            .get(symbol_name)
2365            .map(|candidates| {
2366                candidates.iter().map(|candidate| candidate.location.clone()).collect()
2367            })
2368            .unwrap_or_default()
2369    }
2370
2371    /// Get all symbols in the workspace
2372    ///
2373    /// # Returns
2374    ///
2375    /// A vector containing every symbol currently indexed.
2376    ///
2377    /// # Examples
2378    ///
2379    /// ```rust,ignore
2380    /// use perl_parser::workspace_index::WorkspaceIndex;
2381    ///
2382    /// let index = WorkspaceIndex::new();
2383    /// let _symbols = index.all_symbols();
2384    /// ```
2385    pub fn all_symbols(&self) -> Vec<WorkspaceSymbol> {
2386        let files = self.files.read();
2387        let mut symbols = Vec::new();
2388
2389        for (_uri_key, file_index) in files.iter() {
2390            symbols.extend(file_index.symbols.clone());
2391        }
2392
2393        symbols
2394    }
2395
2396    /// Clear all indexed files and symbols from the workspace.
2397    pub fn clear(&self) {
2398        self.files.write().clear();
2399        self.symbols.write().clear();
2400        self.global_references.write().clear();
2401        self.fact_shards.write().clear();
2402        *self.semantic_reference_index.write() = ReferenceIndex::new();
2403        *self.semantic_import_export_index.write() = ImportExportIndex::new();
2404    }
2405
2406    fn hash_uri_to_file_id(uri: &str) -> FileId {
2407        let mut hasher = DefaultHasher::new();
2408        uri.hash(&mut hasher);
2409        FileId(hasher.finish())
2410    }
2411
2412    fn build_fact_shard(uri: &str, content_hash: u64, file_index: &FileIndex) -> FileFactShard {
2413        let file_id = Self::hash_uri_to_file_id(uri);
2414        let mut anchors = Vec::new();
2415        let mut entities = Vec::new();
2416        for (idx, symbol) in file_index.symbols.iter().enumerate() {
2417            let anchor_id = AnchorId((idx + 1) as u64);
2418            anchors.push(AnchorFact {
2419                id: anchor_id,
2420                file_id,
2421                // WorkspaceSymbol provides line/column coordinates only, not byte
2422                // offsets.  Zero-initialize span_*_byte until a byte-offset source
2423                // is plumbed through the indexing pipeline.
2424                span_start_byte: 0,
2425                span_end_byte: 0,
2426                scope_id: None,
2427                provenance: Provenance::SearchFallback,
2428                confidence: Confidence::Low,
2429            });
2430            entities.push(EntityFact {
2431                id: EntityId((idx + 1) as u64),
2432                kind: EntityKind::Unknown,
2433                canonical_name: symbol
2434                    .qualified_name
2435                    .clone()
2436                    .unwrap_or_else(|| symbol.name.clone()),
2437                anchor_id: Some(anchor_id),
2438                scope_id: None,
2439                provenance: Provenance::SearchFallback,
2440                confidence: Confidence::Low,
2441            });
2442        }
2443        // Hash the per-category fact vectors so consumers can detect staleness
2444        // without re-reading the full shard.
2445        let anchors_hash = {
2446            let mut h = DefaultHasher::new();
2447            anchors.len().hash(&mut h);
2448            for a in &anchors {
2449                a.id.hash(&mut h);
2450                a.span_start_byte.hash(&mut h);
2451                a.span_end_byte.hash(&mut h);
2452            }
2453            h.finish()
2454        };
2455        let entities_hash = {
2456            let mut h = DefaultHasher::new();
2457            entities.len().hash(&mut h);
2458            for e in &entities {
2459                e.id.hash(&mut h);
2460                e.canonical_name.hash(&mut h);
2461            }
2462            h.finish()
2463        };
2464        FileFactShard {
2465            source_uri: uri.to_string(),
2466            file_id,
2467            content_hash,
2468            anchors_hash: Some(anchors_hash),
2469            entities_hash: Some(entities_hash),
2470            occurrences_hash: Some(0),
2471            edges_hash: Some(0),
2472            anchors,
2473            entities,
2474            occurrences: Vec::new(),
2475            edges: Vec::new(),
2476        }
2477    }
2478
2479    /// Build a canonical [`FileFactShard`] from the AST using the semantic
2480    /// fact adapters in `perl-symbol`.
2481    ///
2482    /// This is the canonical population path that produces facts with real
2483    /// byte spans, `ExactAst` provenance, and per-category hashes. It runs
2484    /// alongside the legacy `build_fact_shard` path during the migration
2485    /// period.
2486    fn build_canonical_fact_shard_for_ast(
2487        uri: &str,
2488        content_hash: u64,
2489        ast: &Node,
2490    ) -> FileFactShard {
2491        let file_id = Self::hash_uri_to_file_id(uri);
2492
2493        // Extract declarations and references from the AST.
2494        let decls = extract_symbol_decls(ast, None);
2495        let refs = extract_symbol_refs(ast);
2496
2497        // Run the canonical adapters.
2498        let decl_facts = symbol_decls_to_semantic_facts(&decls, file_id);
2499
2500        // Build an entity lookup map for reference resolution.
2501        let entity_ids_by_name: std::collections::BTreeMap<String, EntityId> =
2502            decl_facts.entities.iter().map(|e| (e.canonical_name.clone(), e.id)).collect();
2503        let ref_facts = symbol_refs_to_semantic_facts(&refs, file_id, &entity_ids_by_name);
2504
2505        // Extract dynamic boundary evidence for `eval "sub NAME { ... }"` patterns.
2506        // Non-literal evals (e.g. `eval $code`) are intentionally skipped — the
2507        // sub name is not statically known and no evidence is emitted.
2508        let eval_sub_triples =
2509            crate::semantic::eval_sub_extractor::extract_eval_sub_boundaries(ast, file_id);
2510        let dynamic_boundaries: Vec<perl_semantic_facts::OccurrenceFact> =
2511            eval_sub_triples.iter().map(|(_, _, occ)| occ.clone()).collect();
2512        let generated_member_facts =
2513            crate::semantic::generated_member_extractor::extract_generated_member_facts(
2514                ast, file_id,
2515            );
2516
2517        // Build the canonical fact shard.
2518        // Import specs (for `use`, `require`, `ClassName->import()`) are
2519        // populated separately via ImportExportIndex — not passed here.
2520        let mut shard = crate::semantic::facts::build_canonical_fact_shard(
2521            uri,
2522            content_hash,
2523            &decl_facts,
2524            &ref_facts,
2525            &[],
2526            &dynamic_boundaries,
2527        );
2528
2529        // Merge entity and anchor facts from semantic producers into the shard.
2530        // The `build_canonical_fact_shard` function only accepts OccurrenceFact
2531        // slices for dynamic_boundaries; extra entities and anchors must be
2532        // merged manually so queries can resolve those semantic facts.
2533        //
2534        // NOTE: This post-build merge means `entities_hash` and `anchors_hash` do
2535        // not reflect these additions. Incremental replacement
2536        // (`replace_fact_shard_incremental`) may miss a change if only synthetic
2537        // facts change — the `content_hash` (whole-file) will still catch it.
2538        // A future refactor should extend `build_canonical_fact_shard`'s API to
2539        // accept extra entity/anchor slices alongside `dynamic_boundaries`.
2540        for (entity, anchor, _) in eval_sub_triples {
2541            shard.entities.push(entity);
2542            shard.anchors.push(anchor);
2543        }
2544        for fact in generated_member_facts {
2545            shard.entities.push(fact.entity);
2546            shard.anchors.push(fact.anchor);
2547        }
2548
2549        shard
2550    }
2551
2552    /// Replace a [`FileFactShard`] with per-category incremental invalidation.
2553    ///
2554    /// Compares the whole-file `content_hash` first; when unchanged the
2555    /// replacement is skipped entirely.  Otherwise each per-category hash
2556    /// (`anchors_hash`, `entities_hash`, `occurrences_hash`, `edges_hash`)
2557    /// is compared individually.  Only categories whose hash changed trigger
2558    /// removal of old entries and insertion of new ones in the cross-file
2559    /// semantic indexes.
2560    ///
2561    /// **Validates: Requirements 18.1, 18.2, 18.3, 18.4, 18.5**
2562    pub fn replace_fact_shard_incremental(
2563        &self,
2564        key: &str,
2565        new_shard: FileFactShard,
2566    ) -> ShardReplaceResult {
2567        let mut shards = self.fact_shards.write();
2568        let old_shard = shards.get(key);
2569
2570        let replacement = plan_shard_replacement(
2571            old_shard.map(Self::shard_category_hashes),
2572            Self::shard_category_hashes(&new_shard),
2573        );
2574
2575        if replacement.content_unchanged {
2576            return replacement;
2577        }
2578
2579        let source_uri = new_shard.source_uri.clone();
2580
2581        // ── Update cross-file semantic indexes per category ──
2582        // Occurrences and edges are both managed by the ReferenceIndex.
2583        // When either changes we must remove+re-add the file in that index.
2584        if replacement.occurrences_updated || replacement.edges_updated {
2585            let mut ref_idx = self.semantic_reference_index.write();
2586            if old_shard.is_some() {
2587                ref_idx.remove_file(&source_uri);
2588            }
2589            ref_idx.add_file(&new_shard);
2590        }
2591
2592        // Entities feed into the import/export index (export sets are keyed
2593        // by module name derived from entity canonical names).  When entities
2594        // change we refresh the import/export index for this file.
2595        if replacement.entities_updated {
2596            let mut ie_idx = self.semantic_import_export_index.write();
2597            ie_idx.remove_file_imports(&source_uri);
2598            ie_idx.remove_module_exports(&source_uri);
2599            // Re-add is handled by the caller or future wiring; for now we
2600            // ensure stale entries are purged.
2601        }
2602
2603        // Store the new shard (always, since content_hash differs).
2604        shards.insert(key.to_string(), new_shard);
2605
2606        replacement
2607    }
2608
2609    fn shard_category_hashes(shard: &FileFactShard) -> ShardCategoryHashes {
2610        ShardCategoryHashes {
2611            content_hash: shard.content_hash,
2612            anchors_hash: shard.anchors_hash,
2613            entities_hash: shard.entities_hash,
2614            occurrences_hash: shard.occurrences_hash,
2615            edges_hash: shard.edges_hash,
2616        }
2617    }
2618
2619    /// Number of stored file fact shards.
2620    pub fn fact_shard_count(&self) -> usize {
2621        self.fact_shards.read().len()
2622    }
2623
2624    /// Fetch a file fact shard for test/inspection.
2625    pub fn file_fact_shard(&self, uri: &str) -> Option<FileFactShard> {
2626        let key = DocumentStore::uri_key(&Self::normalize_uri(uri));
2627        self.fact_shards.read().get(&key).cloned()
2628    }
2629
2630    /// Compute the [`FileId`] for a URI using the same hash used during indexing.
2631    ///
2632    /// Returns `None` if the URI has not been indexed (no fact shard is present).
2633    pub fn file_id_for_uri(&self, uri: &str) -> Option<FileId> {
2634        let key = DocumentStore::uri_key(&Self::normalize_uri(uri));
2635        self.fact_shards.read().get(&key).map(|shard| shard.file_id)
2636    }
2637
2638    /// Invoke a scoped callback with [`WorkspaceSemanticQueries`] built from
2639    /// the current semantic indexes for the given URI.
2640    ///
2641    /// The callback receives the resolved [`FileId`] and a
2642    /// [`WorkspaceSemanticQueries`] facade that borrows from read-locked
2643    /// semantic indexes. Locks are released when `f` returns.
2644    ///
2645    /// Returns `Some(result)` if the URI is indexed and semantic data is
2646    /// available, `None` if the URI has not been indexed or its fact shard is
2647    /// absent (the caller should fall back to legacy diagnostics).
2648    pub fn with_semantic_queries_for_uri<R>(
2649        &self,
2650        uri: &str,
2651        f: impl FnOnce(FileId, crate::semantic::queries::WorkspaceSemanticQueries<'_>) -> R,
2652    ) -> Option<R> {
2653        let key = DocumentStore::uri_key(&Self::normalize_uri(uri));
2654
2655        // Acquire all three read guards simultaneously. The lock order must be
2656        // consistent with every other site that acquires multiple locks to avoid
2657        // deadlock: shards → reference_index → import_export_index.
2658        let shards_guard = self.fact_shards.read();
2659        let ref_guard = self.semantic_reference_index.read();
2660        let ie_guard = self.semantic_import_export_index.read();
2661
2662        // Verify the URI is indexed before entering the callback.
2663        let file_id = shards_guard.get(&key)?.file_id;
2664
2665        let queries = crate::semantic::queries::WorkspaceSemanticQueries::new(
2666            &ref_guard,
2667            &ie_guard,
2668            &shards_guard,
2669        );
2670
2671        Some(f(file_id, queries))
2672    }
2673
2674    /// Return the number of indexed files in the workspace
2675    pub fn file_count(&self) -> usize {
2676        let files = self.files.read();
2677        files.len()
2678    }
2679
2680    /// Return the total number of symbols across all indexed files
2681    pub fn symbol_count(&self) -> usize {
2682        let files = self.files.read();
2683        files.values().map(|file_index| file_index.symbols.len()).sum()
2684    }
2685
2686    /// Get all files in a specific workspace folder
2687    ///
2688    /// # Arguments
2689    ///
2690    /// * `folder_uri` - Workspace folder URI to filter by
2691    ///
2692    /// # Returns
2693    ///
2694    /// A vector of file indices belonging to the specified folder
2695    pub fn files_in_folder(&self, folder_uri: &str) -> Vec<FileIndex> {
2696        let files = self.files.read();
2697        files.values().filter(|f| f.folder_uri.as_deref() == Some(folder_uri)).cloned().collect()
2698    }
2699
2700    /// Get all symbols in a specific workspace folder
2701    ///
2702    /// # Arguments
2703    ///
2704    /// * `folder_uri` - Workspace folder URI to filter by
2705    ///
2706    /// # Returns
2707    ///
2708    /// A vector of symbols belonging to the specified folder
2709    pub fn symbols_in_folder(&self, folder_uri: &str) -> Vec<WorkspaceSymbol> {
2710        let files = self.files.read();
2711        files
2712            .values()
2713            .filter(|f| f.folder_uri.as_deref() == Some(folder_uri))
2714            .flat_map(|f| f.symbols.iter().cloned())
2715            .collect()
2716    }
2717
2718    /// Capture a point-in-time memory estimate of the index.
2719    ///
2720    /// Acquires read locks on all index components and walks their contents
2721    /// to estimate heap usage. Intended for offline profiling; do not call
2722    /// on the LSP hot path.
2723    ///
2724    /// Only available when the `memory-profiling` feature is enabled.
2725    #[cfg(feature = "memory-profiling")]
2726    pub fn memory_snapshot(&self) -> crate::workspace::memory::MemorySnapshot {
2727        use std::mem::size_of;
2728
2729        let files_guard = self.files.read();
2730        let symbols_guard = self.symbols.read();
2731        let global_refs_guard = self.global_references.read();
2732
2733        // --- files map ---
2734        let mut files_bytes: usize = 0;
2735        let mut total_symbol_count: usize = 0;
2736        for (uri_key, fi) in files_guard.iter() {
2737            // key string
2738            files_bytes += uri_key.len();
2739            // per-symbol entries
2740            for sym in &fi.symbols {
2741                files_bytes += sym.name.len()
2742                    + sym.uri.len()
2743                    + sym.qualified_name.as_deref().map_or(0, str::len)
2744                    + sym.documentation.as_deref().map_or(0, str::len)
2745                    + sym.container_name.as_deref().map_or(0, str::len)
2746                    // stack portion: kind + range + has_body + option discriminants
2747                    + size_of::<WorkspaceSymbol>();
2748            }
2749            total_symbol_count += fi.symbols.len();
2750            // per-reference entries
2751            for (ref_name, refs) in &fi.references {
2752                files_bytes += ref_name.len();
2753                for r in refs {
2754                    files_bytes += r.uri.len() + size_of::<SymbolReference>();
2755                }
2756            }
2757            // dependencies
2758            for dep in &fi.dependencies {
2759                files_bytes += dep.len();
2760            }
2761            // content hash (u64) + vec/hashset capacity overhead (rough)
2762            files_bytes += size_of::<u64>();
2763        }
2764
2765        // --- global symbols map ---
2766        let mut symbols_bytes: usize = 0;
2767        for (qname, candidates) in symbols_guard.iter() {
2768            symbols_bytes += qname.len();
2769            for candidate in candidates {
2770                symbols_bytes += candidate.location.uri.len() + size_of::<Location>();
2771            }
2772        }
2773
2774        // --- global references map ---
2775        let mut global_refs_bytes: usize = 0;
2776        for (sym_name, locs) in global_refs_guard.iter() {
2777            global_refs_bytes += sym_name.len();
2778            for loc in locs {
2779                global_refs_bytes += loc.uri.len() + size_of::<Location>();
2780            }
2781        }
2782
2783        // --- document store ---
2784        let document_store_bytes = self.document_store.total_text_bytes();
2785
2786        crate::workspace::memory::MemorySnapshot {
2787            file_count: files_guard.len(),
2788            symbol_count: total_symbol_count,
2789            files_bytes,
2790            symbols_bytes,
2791            global_refs_bytes,
2792            document_store_bytes,
2793        }
2794    }
2795
2796    /// Check if the workspace index has symbols (soft readiness check)
2797    ///
2798    /// Returns true if the index contains any symbols, indicating that
2799    /// at least some files have been indexed and the workspace is ready
2800    /// for symbol-based operations like completion.
2801    ///
2802    /// # Returns
2803    ///
2804    /// `true` if any symbols are indexed, otherwise `false`.
2805    ///
2806    /// # Examples
2807    ///
2808    /// ```rust,ignore
2809    /// use perl_parser::workspace_index::WorkspaceIndex;
2810    ///
2811    /// let index = WorkspaceIndex::new();
2812    /// assert!(!index.has_symbols());
2813    /// ```
2814    pub fn has_symbols(&self) -> bool {
2815        let files = self.files.read();
2816        files.values().any(|file_index| !file_index.symbols.is_empty())
2817    }
2818
2819    /// Search for symbols by query
2820    ///
2821    /// # Arguments
2822    ///
2823    /// * `query` - Substring to match against symbol names
2824    ///
2825    /// # Returns
2826    ///
2827    /// Symbols whose names or qualified names contain the query string.
2828    ///
2829    /// # Examples
2830    ///
2831    /// ```rust,ignore
2832    /// use perl_parser::workspace_index::WorkspaceIndex;
2833    ///
2834    /// let index = WorkspaceIndex::new();
2835    /// let _results = index.search_symbols("example");
2836    /// ```
2837    pub fn search_symbols(&self, query: &str) -> Vec<WorkspaceSymbol> {
2838        let query_lower = query.to_lowercase();
2839        let files = self.files.read();
2840        let mut results = Vec::new();
2841        for file_index in files.values() {
2842            for symbol in &file_index.symbols {
2843                if symbol.name.to_lowercase().contains(&query_lower)
2844                    || symbol
2845                        .qualified_name
2846                        .as_ref()
2847                        .map(|qn| qn.to_lowercase().contains(&query_lower))
2848                        .unwrap_or(false)
2849                {
2850                    results.push(symbol.clone());
2851                }
2852            }
2853        }
2854        results
2855    }
2856
2857    /// Find symbols by query (alias for search_symbols for compatibility)
2858    ///
2859    /// # Arguments
2860    ///
2861    /// * `query` - Substring to match against symbol names
2862    ///
2863    /// # Returns
2864    ///
2865    /// Symbols whose names or qualified names contain the query string.
2866    ///
2867    /// # Examples
2868    ///
2869    /// ```rust,ignore
2870    /// use perl_parser::workspace_index::WorkspaceIndex;
2871    ///
2872    /// let index = WorkspaceIndex::new();
2873    /// let _results = index.find_symbols("example");
2874    /// ```
2875    pub fn find_symbols(&self, query: &str) -> Vec<WorkspaceSymbol> {
2876        self.search_symbols(query)
2877    }
2878
2879    /// Rank symbols by folder proximity to a document
2880    ///
2881    /// Returns symbols sorted by: same folder > other folders
2882    ///
2883    /// # Arguments
2884    ///
2885    /// * `symbols` - Symbols to rank
2886    /// * `doc_uri` - Document URI to determine folder context
2887    ///
2888    /// # Returns
2889    ///
2890    /// Symbols ranked by folder proximity (same folder first)
2891    ///
2892    /// # Examples
2893    ///
2894    /// ```rust,ignore
2895    /// use perl_parser::workspace_index::WorkspaceIndex;
2896    ///
2897    /// let index = WorkspaceIndex::new();
2898    /// let symbols = index.search_symbols("example");
2899    /// let ranked = index.rank_symbols_by_folder(symbols, "file:///project1/src/main.pl");
2900    /// ```
2901    pub fn rank_symbols_by_folder(
2902        &self,
2903        symbols: Vec<WorkspaceSymbol>,
2904        doc_uri: &str,
2905    ) -> Vec<WorkspaceSymbol> {
2906        let doc_folder = self.determine_folder_uri(doc_uri);
2907
2908        let mut ranked: Vec<(WorkspaceSymbol, i32)> = symbols
2909            .into_iter()
2910            .map(|symbol| {
2911                let rank = if let Some(ref doc_folder_uri) = doc_folder {
2912                    if symbol.workspace_folder_uri.as_ref() == Some(doc_folder_uri) {
2913                        0 // Same folder - highest priority
2914                    } else {
2915                        1 // Different folder - lower priority
2916                    }
2917                } else {
2918                    1 // No document context - treat as different folder
2919                };
2920                (symbol, rank)
2921            })
2922            .collect();
2923
2924        // Sort by rank (lower is better), then by name for stability
2925        ranked.sort_by(|a, b| a.1.cmp(&b.1).then_with(|| a.0.name.cmp(&b.0.name)));
2926
2927        ranked.into_iter().map(|(symbol, _)| symbol).collect()
2928    }
2929
2930    /// Search for symbols with folder-aware ranking
2931    ///
2932    /// Combines symbol search with folder proximity ranking
2933    ///
2934    /// # Arguments
2935    ///
2936    /// * `name` - Symbol name to search for
2937    /// * `doc_uri` - Document URI for ranking context
2938    ///
2939    /// # Returns
2940    ///
2941    /// Ranked symbols with same-folder results first
2942    ///
2943    /// # Examples
2944    ///
2945    /// ```rust,ignore
2946    /// use perl_parser::workspace_index::WorkspaceIndex;
2947    ///
2948    /// let index = WorkspaceIndex::new();
2949    /// let ranked = index.search_symbols_ranked("example", "file:///project1/src/main.pl");
2950    /// ```
2951    pub fn search_symbols_ranked(&self, name: &str, doc_uri: &str) -> Vec<WorkspaceSymbol> {
2952        let symbols = self.search_symbols(name);
2953        self.rank_symbols_by_folder(symbols, doc_uri)
2954    }
2955
2956    /// Determine if two symbols are in the same package
2957    ///
2958    /// # Arguments
2959    ///
2960    /// * `symbol_a` - First symbol
2961    /// * `symbol_b` - Second symbol
2962    ///
2963    /// # Returns
2964    ///
2965    /// `true` if both symbols are in the same package
2966    #[allow(dead_code)]
2967    pub fn same_package(&self, symbol_a: &WorkspaceSymbol, symbol_b: &WorkspaceSymbol) -> bool {
2968        let package_a = self.extract_package_name(&symbol_a.name);
2969        let package_b = self.extract_package_name(&symbol_b.name);
2970        package_a == package_b
2971    }
2972
2973    /// Determine if two package names are the same (helper for testing)
2974    ///
2975    /// # Arguments
2976    ///
2977    /// * `package_a` - First package name
2978    /// * `package_b` - Second package name
2979    ///
2980    /// # Returns
2981    ///
2982    /// `true` if both package names are equal
2983    #[allow(dead_code)]
2984    pub fn same_package_by_container(&self, package_a: &str, package_b: &str) -> bool {
2985        package_a == package_b
2986    }
2987
2988    /// Extract package name from a symbol name
2989    ///
2990    /// # Arguments
2991    ///
2992    /// * `symbol_name` - Symbol name (e.g., "Foo::Bar::baz" or "baz")
2993    ///
2994    /// # Returns
2995    ///
2996    /// Package name (e.g., "Foo::Bar") or None for main package
2997    #[allow(dead_code)]
2998    pub fn extract_package_name(&self, symbol_name: &str) -> Option<String> {
2999        let parts: Vec<&str> = symbol_name.split("::").collect();
3000        if parts.len() > 1 { Some(parts[..parts.len() - 1].join("::")) } else { None }
3001    }
3002
3003    /// Get symbols in a specific file
3004    ///
3005    /// # Arguments
3006    ///
3007    /// * `uri` - File URI to inspect
3008    ///
3009    /// # Returns
3010    ///
3011    /// All symbols indexed for the requested file.
3012    ///
3013    /// # Examples
3014    ///
3015    /// ```rust,ignore
3016    /// use perl_parser::workspace_index::WorkspaceIndex;
3017    ///
3018    /// let index = WorkspaceIndex::new();
3019    /// let _symbols = index.file_symbols("file:///example.pl");
3020    /// ```
3021    pub fn file_symbols(&self, uri: &str) -> Vec<WorkspaceSymbol> {
3022        let normalized_uri = Self::normalize_uri(uri);
3023        let key = DocumentStore::uri_key(&normalized_uri);
3024        let files = self.files.read();
3025
3026        files.get(&key).map(|fi| fi.symbols.clone()).unwrap_or_default()
3027    }
3028
3029    /// Get dependencies of a file
3030    ///
3031    /// # Arguments
3032    ///
3033    /// * `uri` - File URI to inspect
3034    ///
3035    /// # Returns
3036    ///
3037    /// A set of module names imported by the file.
3038    ///
3039    /// # Examples
3040    ///
3041    /// ```rust,ignore
3042    /// use perl_parser::workspace_index::WorkspaceIndex;
3043    ///
3044    /// let index = WorkspaceIndex::new();
3045    /// let _deps = index.file_dependencies("file:///example.pl");
3046    /// ```
3047    pub fn file_dependencies(&self, uri: &str) -> HashSet<String> {
3048        let normalized_uri = Self::normalize_uri(uri);
3049        let key = DocumentStore::uri_key(&normalized_uri);
3050        let files = self.files.read();
3051
3052        files.get(&key).map(|fi| fi.dependencies.clone()).unwrap_or_default()
3053    }
3054
3055    /// Find all files that depend on a module
3056    ///
3057    /// # Arguments
3058    ///
3059    /// * `module_name` - Module name to search for in file dependencies
3060    ///
3061    /// # Returns
3062    ///
3063    /// A list of file URIs that import or depend on the module.
3064    ///
3065    /// # Examples
3066    ///
3067    /// ```rust,ignore
3068    /// use perl_parser::workspace_index::WorkspaceIndex;
3069    ///
3070    /// let index = WorkspaceIndex::new();
3071    /// let _files = index.find_dependents("My::Module");
3072    /// ```
3073    pub fn find_dependents(&self, module_name: &str) -> Vec<String> {
3074        let canonical = canonicalize_perl_module_name(module_name);
3075        let legacy = legacy_perl_module_name(&canonical);
3076        let files = self.files.read();
3077        let mut dependents = Vec::new();
3078
3079        for (uri_key, file_index) in files.iter() {
3080            if file_index.dependencies.contains(module_name)
3081                || file_index.dependencies.contains(&canonical)
3082                || file_index.dependencies.contains(&legacy)
3083            {
3084                dependents.push(uri_key.clone());
3085            }
3086        }
3087
3088        dependents
3089    }
3090
3091    /// Get the document store
3092    ///
3093    /// # Returns
3094    ///
3095    /// A reference to the in-memory document store.
3096    ///
3097    /// # Examples
3098    ///
3099    /// ```rust,ignore
3100    /// use perl_parser::workspace_index::WorkspaceIndex;
3101    ///
3102    /// let index = WorkspaceIndex::new();
3103    /// let _store = index.document_store();
3104    /// ```
3105    pub fn document_store(&self) -> &DocumentStore {
3106        &self.document_store
3107    }
3108
3109    /// Find unused symbols in the workspace
3110    ///
3111    /// # Returns
3112    ///
3113    /// Symbols that have no non-definition references in the workspace.
3114    ///
3115    /// # Examples
3116    ///
3117    /// ```rust,ignore
3118    /// use perl_parser::workspace_index::WorkspaceIndex;
3119    ///
3120    /// let index = WorkspaceIndex::new();
3121    /// let _unused = index.find_unused_symbols();
3122    /// ```
3123    pub fn find_unused_symbols(&self) -> Vec<WorkspaceSymbol> {
3124        let files = self.files.read();
3125        let mut unused = Vec::new();
3126
3127        // Collect all defined symbols
3128        for (_uri_key, file_index) in files.iter() {
3129            for symbol in &file_index.symbols {
3130                // Check if this symbol has any references beyond its definition
3131                let has_usage = files.values().any(|fi| {
3132                    if let Some(refs) = fi.references.get(&symbol.name) {
3133                        refs.iter().any(|r| r.kind != ReferenceKind::Definition)
3134                    } else {
3135                        false
3136                    }
3137                });
3138
3139                if !has_usage {
3140                    unused.push(symbol.clone());
3141                }
3142            }
3143        }
3144
3145        unused
3146    }
3147
3148    /// Get all symbols that belong to a specific package
3149    ///
3150    /// # Arguments
3151    ///
3152    /// * `package_name` - Package name to match (e.g., `My::Package`)
3153    ///
3154    /// # Returns
3155    ///
3156    /// Symbols defined within the requested package.
3157    ///
3158    /// # Examples
3159    ///
3160    /// ```rust,ignore
3161    /// use perl_parser::workspace_index::WorkspaceIndex;
3162    ///
3163    /// let index = WorkspaceIndex::new();
3164    /// let _members = index.get_package_members("My::Package");
3165    /// ```
3166    pub fn get_package_members(&self, package_name: &str) -> Vec<WorkspaceSymbol> {
3167        let files = self.files.read();
3168        let mut members = Vec::new();
3169
3170        for (_uri_key, file_index) in files.iter() {
3171            for symbol in &file_index.symbols {
3172                // Check if symbol belongs to this package
3173                if let Some(ref container) = symbol.container_name {
3174                    if container == package_name {
3175                        members.push(symbol.clone());
3176                    }
3177                }
3178                // Also check qualified names
3179                if let Some(ref qname) = symbol.qualified_name {
3180                    if qname.starts_with(&format!("{}::", package_name)) {
3181                        // Avoid duplicates - only add if not already in via container_name
3182                        if symbol.container_name.as_deref() != Some(package_name) {
3183                            members.push(symbol.clone());
3184                        }
3185                    }
3186                }
3187            }
3188        }
3189
3190        members
3191    }
3192
3193    /// Find the definition location for a symbol key during Index/Navigate stages.
3194    ///
3195    /// # Arguments
3196    ///
3197    /// * `key` - Normalized symbol key to resolve.
3198    ///
3199    /// # Returns
3200    ///
3201    /// The definition location for the symbol, if found.
3202    ///
3203    /// # Examples
3204    ///
3205    /// ```rust,ignore
3206    /// use perl_parser::workspace_index::{SymKind, SymbolKey, WorkspaceIndex};
3207    /// use std::sync::Arc;
3208    ///
3209    /// let index = WorkspaceIndex::new();
3210    /// let key = SymbolKey { pkg: Arc::from("My::Package"), name: Arc::from("example"), sigil: None, kind: SymKind::Sub };
3211    /// let _def = index.find_def(&key);
3212    /// ```
3213    pub fn find_def(&self, key: &SymbolKey) -> Option<Location> {
3214        if let Some(sigil) = key.sigil {
3215            // It's a variable
3216            let var_name = format!("{}{}", sigil, key.name);
3217            self.find_definition(&var_name)
3218        } else if key.kind == SymKind::Pack {
3219            // It's a package lookup (e.g., from `use Module::Name`)
3220            // Search for the package declaration by name
3221            self.find_definition(key.pkg.as_ref())
3222                .or_else(|| self.find_definition(key.name.as_ref()))
3223        } else {
3224            // It's a subroutine or package
3225            let qualified_name = format!("{}::{}", key.pkg, key.name);
3226            self.find_definition(&qualified_name)
3227        }
3228    }
3229
3230    /// Find reference locations for a symbol key using dual indexing.
3231    ///
3232    /// Searches both qualified and bare names to support Navigate/Analyze workflows.
3233    ///
3234    /// # Arguments
3235    ///
3236    /// * `key` - Normalized symbol key to search for.
3237    ///
3238    /// # Returns
3239    ///
3240    /// All reference locations for the symbol, excluding the definition.
3241    ///
3242    /// # Examples
3243    ///
3244    /// ```rust,ignore
3245    /// use perl_parser::workspace_index::{SymKind, SymbolKey, WorkspaceIndex};
3246    /// use std::sync::Arc;
3247    ///
3248    /// let index = WorkspaceIndex::new();
3249    /// let key = SymbolKey { pkg: Arc::from("main"), name: Arc::from("example"), sigil: None, kind: SymKind::Sub };
3250    /// let _refs = index.find_refs(&key);
3251    /// ```
3252    pub fn find_refs(&self, key: &SymbolKey) -> Vec<Location> {
3253        let files_locked = self.files.read();
3254        let mut all_refs = if let Some(sigil) = key.sigil {
3255            // It's a variable - search through all files for this variable name
3256            let var_name = format!("{}{}", sigil, key.name);
3257            let mut refs = Vec::new();
3258            for (_uri_key, file_index) in files_locked.iter() {
3259                if let Some(var_refs) = file_index.references.get(&var_name) {
3260                    for reference in var_refs {
3261                        refs.push(Location { uri: reference.uri.clone(), range: reference.range });
3262                    }
3263                }
3264            }
3265            refs
3266        } else {
3267            // It's a subroutine or package
3268            if key.pkg.as_ref() == "main" {
3269                // For main package, we search for both "main::foo" and bare "foo"
3270                let mut refs = self.find_references(&format!("main::{}", key.name));
3271                // Add bare name references
3272                for (_uri_key, file_index) in files_locked.iter() {
3273                    if let Some(bare_refs) = file_index.references.get(key.name.as_ref()) {
3274                        for reference in bare_refs {
3275                            refs.push(Location {
3276                                uri: reference.uri.clone(),
3277                                range: reference.range,
3278                            });
3279                        }
3280                    }
3281                }
3282                refs
3283            } else {
3284                let qualified_name = format!("{}::{}", key.pkg, key.name);
3285                self.find_references(&qualified_name)
3286            }
3287        };
3288        drop(files_locked);
3289
3290        // Remove the definition; the caller will include it separately if needed
3291        if let Some(def) = self.find_def(key) {
3292            all_refs.retain(|loc| !(loc.uri == def.uri && loc.range == def.range));
3293        }
3294
3295        // Deduplicate by URI and range
3296        let mut seen = HashSet::new();
3297        all_refs.retain(|loc| {
3298            seen.insert((
3299                loc.uri.clone(),
3300                loc.range.start.line,
3301                loc.range.start.column,
3302                loc.range.end.line,
3303                loc.range.end.column,
3304            ))
3305        });
3306
3307        all_refs
3308    }
3309}
3310
3311/// AST visitor for extracting symbols and references
3312struct IndexVisitor {
3313    document: Document,
3314    uri: String,
3315    current_package: Option<String>,
3316    workspace_folder_uri: Option<String>,
3317}
3318
3319fn is_interpolated_var_start(byte: u8) -> bool {
3320    byte.is_ascii_alphabetic() || byte == b'_'
3321}
3322
3323fn is_interpolated_var_continue(byte: u8) -> bool {
3324    byte.is_ascii_alphanumeric() || byte == b'_' || byte == b':'
3325}
3326
3327fn has_escaped_interpolation_marker(bytes: &[u8], index: usize) -> bool {
3328    if index == 0 {
3329        return false;
3330    }
3331
3332    let mut backslashes = 0usize;
3333    let mut cursor = index;
3334    while cursor > 0 && bytes[cursor - 1] == b'\\' {
3335        backslashes += 1;
3336        cursor -= 1;
3337    }
3338
3339    backslashes % 2 == 1
3340}
3341
3342fn strip_matching_quote_delimiters(raw_content: &str) -> &str {
3343    if raw_content.len() < 2 {
3344        return raw_content;
3345    }
3346
3347    let bytes = raw_content.as_bytes();
3348    match (bytes.first(), bytes.last()) {
3349        (Some(b'"'), Some(b'"')) | (Some(b'\''), Some(b'\'')) => {
3350            &raw_content[1..raw_content.len() - 1]
3351        }
3352        _ => raw_content,
3353    }
3354}
3355
3356impl IndexVisitor {
3357    fn new(document: &mut Document, uri: String, workspace_folder_uri: Option<String>) -> Self {
3358        Self {
3359            document: document.clone(),
3360            uri,
3361            current_package: Some("main".to_string()),
3362            workspace_folder_uri,
3363        }
3364    }
3365
3366    fn visit(&mut self, node: &Node, file_index: &mut FileIndex) {
3367        self.project_symbol_declarations(node, file_index);
3368        self.visit_node(node, file_index);
3369    }
3370
3371    fn project_symbol_declarations(&self, node: &Node, file_index: &mut FileIndex) {
3372        for decl in extract_symbol_decls(node, self.current_package.as_deref()) {
3373            let (start, end) = match decl.kind {
3374                SymbolKind::Variable(_) => match decl.anchor_span {
3375                    Some(span) => span,
3376                    None => decl.full_span,
3377                },
3378                _ => decl.full_span,
3379            };
3380            let ((start_line, start_col), (end_line, end_col)) =
3381                self.document.line_index.range(start, end);
3382            let range = Range {
3383                start: Position { byte: start, line: start_line, column: start_col },
3384                end: Position { byte: end, line: end_line, column: end_col },
3385            };
3386
3387            let symbol_name = symbol_decl_name(&decl.kind, &decl.name);
3388
3389            // Suppress qualified_name for lexically-scoped variables (my, state): they
3390            // are not package-visible and must not be found by a qualified lookup such
3391            // as `Foo::x`.  `our` and `local` variables keep the qualified name because
3392            // they participate in the package namespace.
3393            let qualified_name = match &decl.declarator {
3394                Some(d) if d == "my" || d == "state" => None,
3395                _ => (!decl.qualified_name.is_empty()).then_some(decl.qualified_name),
3396            };
3397
3398            // Top-level package declarations have no containing package; suppress the
3399            // spurious "main" container that comes from the walker's initial context.
3400            let container_name = match decl.kind {
3401                SymbolKind::Package => None,
3402                _ => decl.container,
3403            };
3404
3405            file_index.symbols.push(WorkspaceSymbol {
3406                name: symbol_name.clone(),
3407                kind: decl.kind,
3408                uri: self.uri.clone(),
3409                range,
3410                qualified_name,
3411                documentation: None,
3412                container_name,
3413                has_body: true,
3414                workspace_folder_uri: self.workspace_folder_uri.clone(),
3415            });
3416
3417            file_index.references.entry(symbol_name).or_default().push(SymbolReference {
3418                uri: self.uri.clone(),
3419                range,
3420                kind: ReferenceKind::Definition,
3421            });
3422        }
3423    }
3424
3425    fn record_interpolated_variable_references(
3426        &self,
3427        raw_content: &str,
3428        range: Range,
3429        file_index: &mut FileIndex,
3430    ) {
3431        let content = strip_matching_quote_delimiters(raw_content);
3432        let bytes = content.as_bytes();
3433        let mut index = 0;
3434
3435        while index < bytes.len() {
3436            if has_escaped_interpolation_marker(bytes, index) {
3437                index += 1;
3438                continue;
3439            }
3440
3441            let sigil = match bytes[index] {
3442                b'$' => "$",
3443                b'@' => "@",
3444                _ => {
3445                    index += 1;
3446                    continue;
3447                }
3448            };
3449
3450            if index + 1 >= bytes.len() {
3451                break;
3452            }
3453
3454            let (start, needs_closing_brace) =
3455                if bytes[index + 1] == b'{' { (index + 2, true) } else { (index + 1, false) };
3456
3457            if start >= bytes.len() || !is_interpolated_var_start(bytes[start]) {
3458                index += 1;
3459                continue;
3460            }
3461
3462            let mut end = start + 1;
3463            while end < bytes.len() && is_interpolated_var_continue(bytes[end]) {
3464                end += 1;
3465            }
3466
3467            if needs_closing_brace && (end >= bytes.len() || bytes[end] != b'}') {
3468                index += 1;
3469                continue;
3470            }
3471
3472            if let Some(name) = content.get(start..end) {
3473                let var_name = format!("{sigil}{name}");
3474                file_index.references.entry(var_name).or_default().push(SymbolReference {
3475                    uri: self.uri.clone(),
3476                    range,
3477                    kind: ReferenceKind::Read,
3478                });
3479            }
3480
3481            index = if needs_closing_brace { end + 1 } else { end };
3482        }
3483    }
3484
3485    fn visit_node(&mut self, node: &Node, file_index: &mut FileIndex) {
3486        match &node.kind {
3487            NodeKind::Package { name, .. } => {
3488                let package_name = name.clone();
3489
3490                // Update the current package (replaces the previous one, not a stack)
3491                self.current_package = Some(package_name.clone());
3492            }
3493
3494            NodeKind::Subroutine { body, .. } => {
3495                // Visit body
3496                self.visit_node(body, file_index);
3497            }
3498
3499            NodeKind::VariableDeclaration { initializer, .. } => {
3500                // Visit initializer
3501                if let Some(init) = initializer {
3502                    self.visit_node(init, file_index);
3503                }
3504            }
3505
3506            NodeKind::VariableListDeclaration { initializer, .. } => {
3507                // Visit the initializer
3508                if let Some(init) = initializer {
3509                    self.visit_node(init, file_index);
3510                }
3511            }
3512
3513            NodeKind::Variable { sigil, name } => {
3514                let var_name = format!("{}{}", sigil, name);
3515
3516                // Track as usage (could be read or write based on context)
3517                file_index.references.entry(var_name).or_default().push(SymbolReference {
3518                    uri: self.uri.clone(),
3519                    range: self.node_to_range(node),
3520                    kind: ReferenceKind::Read, // Default to read, would need context for write
3521                });
3522            }
3523
3524            NodeKind::FunctionCall { name, args, .. } => {
3525                let func_name = name.clone();
3526                let location = self.node_to_range(node);
3527
3528                // Determine package and bare name
3529                let (pkg, bare_name) = if let Some(idx) = func_name.rfind("::") {
3530                    (&func_name[..idx], &func_name[idx + 2..])
3531                } else {
3532                    (self.current_package.as_deref().unwrap_or("main"), func_name.as_str())
3533                };
3534
3535                let qualified = format!("{}::{}", pkg, bare_name);
3536
3537                // Track as usage for both qualified and bare forms
3538                // This dual indexing allows finding references whether the function is called
3539                // as `process_data()` or `Utils::process_data()`
3540                file_index.references.entry(bare_name.to_string()).or_default().push(
3541                    SymbolReference {
3542                        uri: self.uri.clone(),
3543                        range: location,
3544                        kind: ReferenceKind::Usage,
3545                    },
3546                );
3547                file_index.references.entry(qualified).or_default().push(SymbolReference {
3548                    uri: self.uri.clone(),
3549                    range: location,
3550                    kind: ReferenceKind::Usage,
3551                });
3552
3553                if name == "extends" || name == "with" {
3554                    for module_name in extract_module_names_from_call_args(args) {
3555                        file_index
3556                            .dependencies
3557                            .insert(normalize_dependency_module_name(&module_name));
3558                    }
3559                } else if name == "require" {
3560                    if let Some(module_name) = extract_module_name_from_require_args(args) {
3561                        file_index
3562                            .dependencies
3563                            .insert(normalize_dependency_module_name(&module_name));
3564                    }
3565                }
3566
3567                // Visit arguments
3568                for arg in args {
3569                    self.visit_node(arg, file_index);
3570                }
3571            }
3572
3573            NodeKind::Use { module, args, .. } => {
3574                let module_name = normalize_dependency_module_name(module);
3575                file_index.dependencies.insert(module_name.clone());
3576
3577                // Also track actual parent/base class names for dependency discovery.
3578                // `use parent 'Foo::Bar'` stores module="parent" and args=["'Foo::Bar'"],
3579                // so find_dependents("Foo::Bar") would miss files with only use parent.
3580                if module == "parent" || module == "base" {
3581                    for name in extract_module_names_from_use_args(args) {
3582                        file_index.dependencies.insert(normalize_dependency_module_name(&name));
3583                    }
3584                }
3585
3586                // Track as import
3587                file_index.references.entry(module_name).or_default().push(SymbolReference {
3588                    uri: self.uri.clone(),
3589                    range: self.node_to_range(node),
3590                    kind: ReferenceKind::Import,
3591                });
3592            }
3593
3594            // Handle assignment to detect writes
3595            NodeKind::Assignment { lhs, rhs, op } => {
3596                // For compound assignments (+=, -=, .=, etc.), the LHS is both read and written
3597                let is_compound = op != "=";
3598
3599                if let NodeKind::Variable { sigil, name } = &lhs.kind {
3600                    let var_name = format!("{}{}", sigil, name);
3601
3602                    // For compound assignments, it's a read first
3603                    if is_compound {
3604                        file_index.references.entry(var_name.clone()).or_default().push(
3605                            SymbolReference {
3606                                uri: self.uri.clone(),
3607                                range: self.node_to_range(lhs),
3608                                kind: ReferenceKind::Read,
3609                            },
3610                        );
3611                    }
3612
3613                    // Then it's always a write
3614                    file_index.references.entry(var_name).or_default().push(SymbolReference {
3615                        uri: self.uri.clone(),
3616                        range: self.node_to_range(lhs),
3617                        kind: ReferenceKind::Write,
3618                    });
3619                }
3620
3621                // Right side could have reads
3622                self.visit_node(rhs, file_index);
3623            }
3624
3625            // Recursively visit child nodes
3626            NodeKind::Block { statements } => {
3627                for stmt in statements {
3628                    self.visit_node(stmt, file_index);
3629                }
3630            }
3631
3632            NodeKind::If { condition, then_branch, elsif_branches, else_branch } => {
3633                self.visit_node(condition, file_index);
3634                self.visit_node(then_branch, file_index);
3635                for (cond, branch) in elsif_branches {
3636                    self.visit_node(cond, file_index);
3637                    self.visit_node(branch, file_index);
3638                }
3639                if let Some(else_br) = else_branch {
3640                    self.visit_node(else_br, file_index);
3641                }
3642            }
3643
3644            NodeKind::While { condition, body, continue_block } => {
3645                self.visit_node(condition, file_index);
3646                self.visit_node(body, file_index);
3647                if let Some(cont) = continue_block {
3648                    self.visit_node(cont, file_index);
3649                }
3650            }
3651
3652            NodeKind::For { init, condition, update, body, continue_block } => {
3653                if let Some(i) = init {
3654                    self.visit_node(i, file_index);
3655                }
3656                if let Some(c) = condition {
3657                    self.visit_node(c, file_index);
3658                }
3659                if let Some(u) = update {
3660                    self.visit_node(u, file_index);
3661                }
3662                self.visit_node(body, file_index);
3663                if let Some(cont) = continue_block {
3664                    self.visit_node(cont, file_index);
3665                }
3666            }
3667
3668            NodeKind::Foreach { variable, list, body, continue_block } => {
3669                // Iterator is a write context
3670                if let Some(cb) = continue_block {
3671                    self.visit_node(cb, file_index);
3672                }
3673                if let NodeKind::Variable { sigil, name } = &variable.kind {
3674                    let var_name = format!("{}{}", sigil, name);
3675                    file_index.references.entry(var_name).or_default().push(SymbolReference {
3676                        uri: self.uri.clone(),
3677                        range: self.node_to_range(variable),
3678                        kind: ReferenceKind::Write,
3679                    });
3680                }
3681                self.visit_node(variable, file_index);
3682                self.visit_node(list, file_index);
3683                self.visit_node(body, file_index);
3684            }
3685
3686            NodeKind::MethodCall { object, method, args } => {
3687                // Check if this is a static method call (Package->method)
3688                let qualified_method = if let NodeKind::Identifier { name } = &object.kind {
3689                    // Static method call: Package->method
3690                    Some(format!("{}::{}", name, method))
3691                } else {
3692                    // Instance method call: $obj->method
3693                    None
3694                };
3695
3696                // Object is a read context
3697                self.visit_node(object, file_index);
3698
3699                // Track method call under BOTH the qualified form (for static calls
3700                // like `Pkg->method`) AND the bare method name. This mirrors the
3701                // FunctionCall dual-key storage above (PR #122 dual-indexing pattern)
3702                // so that bare-name lookups (e.g. `find_unused_symbols`,
3703                // `count_usages("method")`) consistently find static method call sites.
3704                // See #6799 for the original asymmetric-storage bug report.
3705                let location = self.node_to_range(node);
3706                if let Some(qualified_method) = qualified_method.as_ref() {
3707                    file_index.references.entry(qualified_method.clone()).or_default().push(
3708                        SymbolReference {
3709                            uri: self.uri.clone(),
3710                            range: location,
3711                            kind: ReferenceKind::Usage,
3712                        },
3713                    );
3714                }
3715                file_index.references.entry(method.clone()).or_default().push(SymbolReference {
3716                    uri: self.uri.clone(),
3717                    range: location,
3718                    kind: ReferenceKind::Usage,
3719                });
3720
3721                if method == "import"
3722                    && let NodeKind::Identifier { name: module_name } = &object.kind
3723                {
3724                    for symbol in extract_manual_import_symbols(args) {
3725                        file_index.references.entry(symbol).or_default().push(SymbolReference {
3726                            uri: self.uri.clone(),
3727                            range: self.node_to_range(node),
3728                            kind: ReferenceKind::Import,
3729                        });
3730                    }
3731                    file_index.dependencies.insert(normalize_dependency_module_name(module_name));
3732                }
3733
3734                // Visit arguments
3735                for arg in args {
3736                    self.visit_node(arg, file_index);
3737                }
3738            }
3739
3740            NodeKind::No { module, .. } => {
3741                let module_name = normalize_dependency_module_name(module);
3742                file_index.dependencies.insert(module_name);
3743            }
3744
3745            NodeKind::Class { name, .. } => {
3746                self.current_package = Some(name.clone());
3747            }
3748
3749            NodeKind::Method { body, signature, .. } => {
3750                // Visit params
3751                if let Some(sig) = signature {
3752                    if let NodeKind::Signature { parameters } = &sig.kind {
3753                        for param in parameters {
3754                            self.visit_node(param, file_index);
3755                        }
3756                    }
3757                }
3758
3759                // Visit body
3760                self.visit_node(body, file_index);
3761            }
3762
3763            NodeKind::String { value, interpolated } => {
3764                if *interpolated {
3765                    let range = self.node_to_range(node);
3766                    self.record_interpolated_variable_references(value, range, file_index);
3767                }
3768            }
3769
3770            NodeKind::Heredoc { content, interpolated, .. } => {
3771                if *interpolated {
3772                    let range = self.node_to_range(node);
3773                    self.record_interpolated_variable_references(content, range, file_index);
3774                }
3775            }
3776
3777            // Handle special assignments (++ and --)
3778            NodeKind::Unary { op, operand } if op == "++" || op == "--" => {
3779                // Pre/post increment/decrement are both read and write
3780                if let NodeKind::Variable { sigil, name } = &operand.kind {
3781                    let var_name = format!("{}{}", sigil, name);
3782
3783                    // It's both a read and a write
3784                    file_index.references.entry(var_name.clone()).or_default().push(
3785                        SymbolReference {
3786                            uri: self.uri.clone(),
3787                            range: self.node_to_range(operand),
3788                            kind: ReferenceKind::Read,
3789                        },
3790                    );
3791
3792                    file_index.references.entry(var_name).or_default().push(SymbolReference {
3793                        uri: self.uri.clone(),
3794                        range: self.node_to_range(operand),
3795                        kind: ReferenceKind::Write,
3796                    });
3797                }
3798            }
3799
3800            _ => {
3801                // For other node types, just visit children
3802                self.visit_children(node, file_index);
3803            }
3804        }
3805    }
3806
3807    fn visit_children(&mut self, node: &Node, file_index: &mut FileIndex) {
3808        // Generic visitor for unhandled node types - visit all nested nodes
3809        match &node.kind {
3810            NodeKind::Program { statements } => {
3811                for stmt in statements {
3812                    self.visit_node(stmt, file_index);
3813                }
3814            }
3815            NodeKind::ExpressionStatement { expression } => {
3816                self.visit_node(expression, file_index);
3817            }
3818            // Expression nodes
3819            NodeKind::Unary { operand, .. } => {
3820                self.visit_node(operand, file_index);
3821            }
3822            NodeKind::Binary { left, right, .. } => {
3823                self.visit_node(left, file_index);
3824                self.visit_node(right, file_index);
3825            }
3826            NodeKind::Ternary { condition, then_expr, else_expr } => {
3827                self.visit_node(condition, file_index);
3828                self.visit_node(then_expr, file_index);
3829                self.visit_node(else_expr, file_index);
3830            }
3831            NodeKind::ArrayLiteral { elements } => {
3832                for elem in elements {
3833                    self.visit_node(elem, file_index);
3834                }
3835            }
3836            NodeKind::HashLiteral { pairs } => {
3837                for (key, value) in pairs {
3838                    self.visit_node(key, file_index);
3839                    self.visit_node(value, file_index);
3840                }
3841            }
3842            NodeKind::Return { value } => {
3843                if let Some(val) = value {
3844                    self.visit_node(val, file_index);
3845                }
3846            }
3847            NodeKind::Eval { block } | NodeKind::Do { block } | NodeKind::Defer { block } => {
3848                self.visit_node(block, file_index);
3849            }
3850            NodeKind::Try { body, catch_blocks, finally_block } => {
3851                self.visit_node(body, file_index);
3852                for (_, block) in catch_blocks {
3853                    self.visit_node(block, file_index);
3854                }
3855                if let Some(finally) = finally_block {
3856                    self.visit_node(finally, file_index);
3857                }
3858            }
3859            NodeKind::Given { expr, body } => {
3860                self.visit_node(expr, file_index);
3861                self.visit_node(body, file_index);
3862            }
3863            NodeKind::When { condition, body } => {
3864                self.visit_node(condition, file_index);
3865                self.visit_node(body, file_index);
3866            }
3867            NodeKind::Default { body } => {
3868                self.visit_node(body, file_index);
3869            }
3870            NodeKind::StatementModifier { statement, condition, .. } => {
3871                self.visit_node(statement, file_index);
3872                self.visit_node(condition, file_index);
3873            }
3874            NodeKind::VariableWithAttributes { variable, .. } => {
3875                self.visit_node(variable, file_index);
3876            }
3877            NodeKind::LabeledStatement { statement, .. } => {
3878                self.visit_node(statement, file_index);
3879            }
3880            _ => {
3881                // For other node types, no children to visit
3882            }
3883        }
3884    }
3885
3886    fn node_to_range(&mut self, node: &Node) -> Range {
3887        // LineIndex.range returns line numbers and UTF-16 code unit columns
3888        let ((start_line, start_col), (end_line, end_col)) =
3889            self.document.line_index.range(node.location.start, node.location.end);
3890        // Use byte offsets from node.location directly
3891        Range {
3892            start: Position { byte: node.location.start, line: start_line, column: start_col },
3893            end: Position { byte: node.location.end, line: end_line, column: end_col },
3894        }
3895    }
3896}
3897
3898fn symbol_decl_name(kind: &SymbolKind, name: &str) -> String {
3899    match kind {
3900        SymbolKind::Variable(VarKind::Scalar) => format!("${name}"),
3901        SymbolKind::Variable(VarKind::Array) => format!("@{name}"),
3902        SymbolKind::Variable(VarKind::Hash) => format!("%{name}"),
3903        _ => name.to_string(),
3904    }
3905}
3906
3907/// Extract bare module names from the argument list of a `use parent` / `use base` statement.
3908///
3909/// The `args` field of `NodeKind::Use` stores raw argument strings as the parser captured them.
3910/// For `use parent 'Foo::Bar'` this is `["'Foo::Bar'"]`.
3911/// For `use parent qw(Foo::Bar Other::Base)` this is `["qw(Foo::Bar Other::Base)"]`.
3912/// For `use parent -norequire, 'Foo::Bar'` this is `["-norequire", "'Foo::Bar'"]`.
3913///
3914/// Returns the module names with surrounding quotes/qw wrappers stripped.
3915/// Tokens starting with `-` or not matching `[\w::']+` are silently skipped.
3916fn extract_module_names_from_use_args(args: &[String]) -> Vec<String> {
3917    use std::collections::HashSet;
3918
3919    fn normalize_module_name(token: &str) -> Option<&str> {
3920        let stripped = token.trim_matches(|c: char| {
3921            matches!(c, '\'' | '"' | '(' | ')' | '[' | ']' | '{' | '}' | ',' | ';')
3922        });
3923
3924        if stripped.is_empty() || stripped.starts_with('-') {
3925            return None;
3926        }
3927
3928        stripped
3929            .chars()
3930            .all(|c| c.is_alphanumeric() || c == '_' || c == ':' || c == '\'')
3931            .then_some(stripped)
3932    }
3933
3934    let joined = args.join(" ");
3935
3936    let (qw_words, remainder) = extract_qw_words(&joined);
3937    let mut modules = Vec::new();
3938    let mut seen = HashSet::new();
3939    for word in qw_words {
3940        if let Some(candidate) = normalize_module_name(&word) {
3941            let canonical = canonicalize_perl_module_name(candidate);
3942            if seen.insert(canonical.clone()) {
3943                modules.push(canonical);
3944            }
3945        }
3946    }
3947
3948    for token in remainder.split_whitespace().flat_map(|t| t.split(',')) {
3949        if let Some(candidate) = normalize_module_name(token) {
3950            let canonical = canonicalize_perl_module_name(candidate);
3951            if seen.insert(canonical.clone()) {
3952                modules.push(canonical);
3953            }
3954        }
3955    }
3956
3957    modules
3958}
3959
3960fn extract_module_names_from_call_args(args: &[Node]) -> Vec<String> {
3961    fn collect_from_node(node: &Node, out: &mut Vec<String>) {
3962        match &node.kind {
3963            NodeKind::String { value, .. } => {
3964                out.extend(extract_module_names_from_use_args(std::slice::from_ref(value)));
3965            }
3966            NodeKind::Identifier { name } => {
3967                out.extend(extract_module_names_from_use_args(std::slice::from_ref(name)));
3968            }
3969            NodeKind::ArrayLiteral { elements } => {
3970                for element in elements {
3971                    collect_from_node(element, out);
3972                }
3973            }
3974            NodeKind::FunctionCall { name, args, .. } if name == "qw" => {
3975                for arg in args {
3976                    collect_from_node(arg, out);
3977                }
3978            }
3979            _ => {}
3980        }
3981    }
3982
3983    let mut modules = Vec::new();
3984    for arg in args {
3985        collect_from_node(arg, &mut modules);
3986    }
3987    modules
3988}
3989
3990fn canonicalize_perl_module_name(name: &str) -> String {
3991    // Perl supports the legacy `'` package separator (e.g. Foo'Bar).
3992    // Canonicalize to `::` so lookups and dependency matching share one key shape.
3993    name.replace('\'', "::")
3994}
3995
3996fn legacy_perl_module_name(name: &str) -> String {
3997    name.replace("::", "'")
3998}
3999
4000/// Normalize a module name for dependency storage and lookup.
4001/// Converts legacy `'` separators to `::` so stored keys are canonical.
4002fn normalize_dependency_module_name(module_name: &str) -> String {
4003    canonicalize_perl_module_name(module_name)
4004}
4005
4006fn extract_qw_words(input: &str) -> (Vec<String>, String) {
4007    let chars: Vec<char> = input.chars().collect();
4008    let mut i = 0;
4009    let mut words = Vec::new();
4010    let mut remainder = String::new();
4011
4012    while i < chars.len() {
4013        if chars[i] == 'q'
4014            && i + 1 < chars.len()
4015            && chars[i + 1] == 'w'
4016            && (i == 0 || !chars[i - 1].is_alphanumeric())
4017        {
4018            let mut j = i + 2;
4019            while j < chars.len() && chars[j].is_whitespace() {
4020                j += 1;
4021            }
4022            if j >= chars.len() {
4023                remainder.push(chars[i]);
4024                i += 1;
4025                continue;
4026            }
4027
4028            let open = chars[j];
4029            let (close, is_paired_delimiter) = match open {
4030                '(' => (')', true),
4031                '[' => (']', true),
4032                '{' => ('}', true),
4033                '<' => ('>', true),
4034                _ => (open, false),
4035            };
4036            if open.is_alphanumeric() || open == '_' || open == '\'' || open == '"' {
4037                remainder.push(chars[i]);
4038                i += 1;
4039                continue;
4040            }
4041
4042            let mut k = j + 1;
4043            if is_paired_delimiter {
4044                let mut depth = 1usize;
4045                while k < chars.len() && depth > 0 {
4046                    if chars[k] == open {
4047                        depth += 1;
4048                    } else if chars[k] == close {
4049                        depth -= 1;
4050                    }
4051                    k += 1;
4052                }
4053                if depth != 0 {
4054                    remainder.extend(chars[i..].iter());
4055                    break;
4056                }
4057                k -= 1;
4058            } else {
4059                while k < chars.len() && chars[k] != close {
4060                    k += 1;
4061                }
4062                if k >= chars.len() {
4063                    remainder.extend(chars[i..].iter());
4064                    break;
4065                }
4066            }
4067
4068            let content: String = chars[j + 1..k].iter().collect();
4069            for word in content.split_whitespace() {
4070                if !word.is_empty() {
4071                    words.push(word.to_string());
4072                }
4073            }
4074            i = k + 1;
4075            continue;
4076        }
4077
4078        remainder.push(chars[i]);
4079        i += 1;
4080    }
4081
4082    (words, remainder)
4083}
4084
4085fn extract_module_name_from_require_args(args: &[Node]) -> Option<String> {
4086    let first = args.first()?;
4087    match &first.kind {
4088        NodeKind::Identifier { name } => Some(name.clone()),
4089        NodeKind::String { value, .. } => {
4090            let cleaned = value.trim_matches('\'').trim_matches('"').trim();
4091            if cleaned.is_empty() {
4092                return None;
4093            }
4094            Some(cleaned.trim_end_matches(".pm").replace('/', "::"))
4095        }
4096        _ => None,
4097    }
4098}
4099
4100fn extract_manual_import_symbols(args: &[Node]) -> Vec<String> {
4101    fn push_if_bareword(out: &mut Vec<String>, token: &str) {
4102        let bare = token.trim().trim_matches('"').trim_matches('\'').trim();
4103        if bare.is_empty() || bare == "," {
4104            return;
4105        }
4106        let is_bareword = bare.bytes().all(|ch| ch.is_ascii_alphanumeric() || ch == b'_')
4107            && bare.as_bytes().first().is_some_and(|ch| ch.is_ascii_alphabetic() || *ch == b'_');
4108        if is_bareword {
4109            out.push(bare.to_string());
4110        }
4111    }
4112
4113    let mut symbols = Vec::new();
4114    for arg in args {
4115        match &arg.kind {
4116            NodeKind::String { value, .. } => push_if_bareword(&mut symbols, value),
4117            NodeKind::Identifier { name } => {
4118                if name.starts_with("qw") {
4119                    let content = name
4120                        .trim_start_matches("qw")
4121                        .trim_start_matches(|c: char| "([{/<|!".contains(c))
4122                        .trim_end_matches(|c: char| ")]}/|!>".contains(c));
4123                    for token in content.split_whitespace() {
4124                        push_if_bareword(&mut symbols, token);
4125                    }
4126                } else {
4127                    push_if_bareword(&mut symbols, name);
4128                }
4129            }
4130            NodeKind::ArrayLiteral { elements } => {
4131                for element in elements {
4132                    if let NodeKind::String { value, .. } = &element.kind {
4133                        push_if_bareword(&mut symbols, value);
4134                    }
4135                }
4136            }
4137            _ => {}
4138        }
4139    }
4140    symbols.sort();
4141    symbols.dedup();
4142    symbols
4143}
4144
4145/// Extract constant names from the `args` field of a `use constant` `NodeKind::Use` node.
4146///
4147/// The parser serialises `use constant` args in two distinct forms:
4148///
4149/// **Scalar form** — `use constant FOO => 42;`
4150///   → args: `["FOO", "42"]`  (the `=>` is consumed by the parser, not stored)
4151///   → The first arg is the constant name; remaining args are the value.
4152///
4153/// **Hash form** — `use constant { FOO => 1, BAR => 2 };`
4154///   → args: `["{", "FOO", "=>", "1", ",", "BAR", "=>", "2", "}"]`
4155///   → Identifiers immediately followed by `=>` are constant names.
4156///
4157/// **qw form** — `use constant qw(FOO BAR);`
4158///   → args: `["qw(FOO BAR)"]`
4159///   → Words inside the qw list are constant names.
4160///
4161/// Returns a deduplicated list of bare constant names (e.g. `["FOO", "BAR"]`).
4162#[cfg(test)]
4163fn extract_constant_names_from_use_args(args: &[String]) -> Vec<String> {
4164    use std::collections::HashSet;
4165
4166    fn push_unique(names: &mut Vec<String>, seen: &mut HashSet<String>, candidate: &str) {
4167        if seen.insert(candidate.to_string()) {
4168            names.push(candidate.to_string());
4169        }
4170    }
4171
4172    fn normalize_constant_name(token: &str) -> Option<&str> {
4173        let stripped = token.trim_matches(|c: char| {
4174            matches!(c, '\'' | '"' | '(' | ')' | '[' | ']' | '{' | '}' | ',' | ';')
4175        });
4176
4177        if stripped.is_empty() || stripped.starts_with('-') {
4178            return None;
4179        }
4180
4181        stripped.chars().all(|c| c.is_alphanumeric() || c == '_').then_some(stripped)
4182    }
4183
4184    let mut names = Vec::new();
4185    let mut seen = HashSet::new();
4186
4187    // Scalar form (most common): args = ["FOO", <value...>]
4188    // The first arg is a plain identifier with no `=>` in args at all.
4189    // Hash form starts with `{`; qw form starts with `qw`.
4190    let first = match args.first() {
4191        Some(f) => f.as_str(),
4192        None => return names,
4193    };
4194
4195    // qw form: single arg starting with "qw"
4196    if first.starts_with("qw") {
4197        let (qw_words, remainder) = extract_qw_words(first);
4198        if remainder.trim().is_empty() {
4199            for word in qw_words {
4200                if let Some(candidate) = normalize_constant_name(&word) {
4201                    push_unique(&mut names, &mut seen, candidate);
4202                }
4203            }
4204            return names;
4205        }
4206
4207        // Fallback for odd tokenisation: tolerate `qw` followed by spacing before the opener.
4208        let content = first.trim_start_matches("qw").trim_start();
4209        let content = content
4210            .trim_start_matches(|c: char| "([{/<|!".contains(c))
4211            .trim_end_matches(|c: char| ")]}/|!>".contains(c));
4212        for word in content.split_whitespace() {
4213            if let Some(candidate) = normalize_constant_name(word) {
4214                push_unique(&mut names, &mut seen, candidate);
4215            }
4216        }
4217        return names;
4218    }
4219
4220    // Hash form: args start with "{", "+{", or "+" followed by "{"
4221    let starts_hash_form = first == "{"
4222        || first == "+{"
4223        || (first == "+" && args.get(1).map(String::as_str) == Some("{"));
4224    if starts_hash_form {
4225        let mut skipped_leading_plus = false;
4226        let mut iter = args.iter().peekable();
4227        while let Some(arg) = iter.next() {
4228            // Some parser/tokenizer variants can emit "+{" as a single token for
4229            // `use constant +{ ... }`. Treat it as structural punctuation.
4230            if arg == "+{" {
4231                skipped_leading_plus = true;
4232                continue;
4233            }
4234            if arg == "+" && !skipped_leading_plus {
4235                skipped_leading_plus = true;
4236                continue;
4237            }
4238            if arg == "{" || arg == "}" || arg == "," || arg == "=>" {
4239                continue;
4240            }
4241            if let Some(candidate) = normalize_constant_name(arg)
4242                && iter.peek().map(|s| s.as_str()) == Some("=>")
4243            {
4244                push_unique(&mut names, &mut seen, candidate);
4245            }
4246        }
4247        return names;
4248    }
4249
4250    // Scalar form: first arg is the constant name (if it is a plain identifier)
4251    // Remaining args are the value and are skipped.
4252    if let Some(candidate) = normalize_constant_name(first) {
4253        push_unique(&mut names, &mut seen, candidate);
4254    }
4255
4256    names
4257}
4258
4259impl Default for WorkspaceIndex {
4260    fn default() -> Self {
4261        Self::new()
4262    }
4263}
4264
4265/// LSP adapter for converting internal Location types to LSP types
4266#[cfg(all(feature = "workspace", feature = "lsp-compat"))]
4267/// LSP adapter utilities for Navigate/Analyze workflows.
4268pub mod lsp_adapter {
4269    use super::Location as IxLocation;
4270    use lsp_types::Location as LspLocation;
4271    // lsp_types uses Uri, not Url
4272    type LspUrl = lsp_types::Uri;
4273
4274    /// Convert an internal location to an LSP Location for Navigate workflows.
4275    ///
4276    /// # Arguments
4277    ///
4278    /// * `ix` - Internal index location with URI and range information.
4279    ///
4280    /// # Returns
4281    ///
4282    /// `Some(LspLocation)` when conversion succeeds, or `None` if URI parsing fails.
4283    ///
4284    /// # Examples
4285    ///
4286    /// ```rust,ignore
4287    /// use perl_parser::workspace_index::{Location as IxLocation, lsp_adapter::to_lsp_location};
4288    /// use lsp_types::Range;
4289    ///
4290    /// let ix_loc = IxLocation { uri: "file:///path.pl".to_string(), range: Range::default() };
4291    /// let _ = to_lsp_location(&ix_loc);
4292    /// ```
4293    pub fn to_lsp_location(ix: &IxLocation) -> Option<LspLocation> {
4294        parse_url(&ix.uri).map(|uri| {
4295            let start =
4296                lsp_types::Position { line: ix.range.start.line, character: ix.range.start.column };
4297            let end =
4298                lsp_types::Position { line: ix.range.end.line, character: ix.range.end.column };
4299            let range = lsp_types::Range { start, end };
4300            LspLocation { uri, range }
4301        })
4302    }
4303
4304    /// Convert multiple index locations to LSP Locations for Navigate/Analyze workflows.
4305    ///
4306    /// # Arguments
4307    ///
4308    /// * `all` - Iterator of internal index locations to convert.
4309    ///
4310    /// # Returns
4311    ///
4312    /// Vector of successfully converted LSP locations, with invalid entries filtered out.
4313    ///
4314    /// # Examples
4315    ///
4316    /// ```rust,ignore
4317    /// use perl_parser::workspace_index::{Location as IxLocation, lsp_adapter::to_lsp_locations};
4318    /// use lsp_types::Range;
4319    ///
4320    /// let locations = vec![IxLocation { uri: "file:///script1.pl".to_string(), range: Range::default() }];
4321    /// let lsp_locations = to_lsp_locations(locations);
4322    /// assert_eq!(lsp_locations.len(), 1);
4323    /// ```
4324    pub fn to_lsp_locations(all: impl IntoIterator<Item = IxLocation>) -> Vec<LspLocation> {
4325        all.into_iter().filter_map(|ix| to_lsp_location(&ix)).collect()
4326    }
4327
4328    #[cfg(not(target_arch = "wasm32"))]
4329    fn parse_url(s: &str) -> Option<LspUrl> {
4330        // lsp_types::Uri uses FromStr, not TryFrom
4331        use std::str::FromStr;
4332
4333        // Try parsing as URI first
4334        LspUrl::from_str(s).ok().or_else(|| {
4335            // Try as a file path if URI parsing fails
4336            std::path::Path::new(s).canonicalize().ok().and_then(|p| {
4337                // Use proper URI construction with percent-encoding
4338                crate::workspace_index::fs_path_to_uri(&p)
4339                    .ok()
4340                    .and_then(|uri_string| LspUrl::from_str(&uri_string).ok())
4341            })
4342        })
4343    }
4344
4345    /// Parse a string as a URL (wasm32 version - no filesystem fallback)
4346    #[cfg(target_arch = "wasm32")]
4347    fn parse_url(s: &str) -> Option<LspUrl> {
4348        use std::str::FromStr;
4349        LspUrl::from_str(s).ok()
4350    }
4351}
4352
4353#[cfg(test)]
4354mod tests {
4355    use super::*;
4356    use perl_tdd_support::{must, must_some};
4357
4358    #[test]
4359    fn test_use_constant_indexed_as_constant_symbol() {
4360        let index = WorkspaceIndex::new();
4361        let uri = "file:///lib/My/Config.pm";
4362        let code = r#"package My::Config;
4363use constant PI => 3.14159;
4364use constant {
4365    MAX_RETRIES => 3,
4366    TIMEOUT     => 30,
4367};
43681;
4369"#;
4370        must(index.index_file(must(url::Url::parse(uri)), code.to_string()));
4371
4372        let symbols = index.file_symbols(uri);
4373        assert!(
4374            symbols.iter().any(|s| s.name == "PI" && s.kind == SymbolKind::Constant),
4375            "PI should be indexed as a Constant symbol; got: {:?}",
4376            symbols.iter().map(|s| (&s.name, &s.kind)).collect::<Vec<_>>()
4377        );
4378        assert!(
4379            symbols.iter().any(|s| s.name == "MAX_RETRIES" && s.kind == SymbolKind::Constant),
4380            "MAX_RETRIES should be indexed"
4381        );
4382        assert!(
4383            symbols.iter().any(|s| s.name == "TIMEOUT" && s.kind == SymbolKind::Constant),
4384            "TIMEOUT should be indexed"
4385        );
4386
4387        // Qualified lookup should also work
4388        let def = index.find_definition("My::Config::PI");
4389        assert!(def.is_some(), "find_definition('My::Config::PI') should succeed");
4390    }
4391
4392    #[test]
4393    fn test_extract_constant_names_deduplicates_qw_form() {
4394        let names = extract_constant_names_from_use_args(&["qw(FOO BAR FOO)".to_string()]);
4395        assert_eq!(names, vec!["FOO", "BAR"]);
4396    }
4397
4398    #[test]
4399    fn test_extract_constant_names_accepts_quoted_scalar_form() {
4400        let names = extract_constant_names_from_use_args(&[
4401            "'HTTP_OK'".to_string(),
4402            "=>".to_string(),
4403            "200".to_string(),
4404        ]);
4405        assert_eq!(names, vec!["HTTP_OK"]);
4406    }
4407
4408    #[test]
4409    fn test_extract_constant_names_accepts_quoted_hash_form() {
4410        let names = extract_constant_names_from_use_args(&[
4411            "{".to_string(),
4412            "'FOO'".to_string(),
4413            "=>".to_string(),
4414            "1".to_string(),
4415            ",".to_string(),
4416            "\"BAR\"".to_string(),
4417            "=>".to_string(),
4418            "2".to_string(),
4419            "}".to_string(),
4420        ]);
4421        assert_eq!(names, vec!["FOO", "BAR"]);
4422    }
4423
4424    #[test]
4425    fn test_extract_constant_names_accepts_plus_hash_form_split_tokens() {
4426        let names = extract_constant_names_from_use_args(&[
4427            "+".to_string(),
4428            "{".to_string(),
4429            "FOO".to_string(),
4430            "=>".to_string(),
4431            "1".to_string(),
4432            ",".to_string(),
4433            "BAR".to_string(),
4434            "=>".to_string(),
4435            "2".to_string(),
4436            "}".to_string(),
4437        ]);
4438        assert_eq!(names, vec!["FOO", "BAR"]);
4439    }
4440
4441    #[test]
4442    fn test_extract_constant_names_accepts_plus_hash_form_combined_token() {
4443        let names = extract_constant_names_from_use_args(&[
4444            "+{".to_string(),
4445            "FOO".to_string(),
4446            "=>".to_string(),
4447            "1".to_string(),
4448            ",".to_string(),
4449            "BAR".to_string(),
4450            "=>".to_string(),
4451            "2".to_string(),
4452            "}".to_string(),
4453        ]);
4454        assert_eq!(names, vec!["FOO", "BAR"]);
4455    }
4456    #[test]
4457    fn test_use_constant_duplicate_names_indexed_once() {
4458        let index = WorkspaceIndex::new();
4459        let uri = "file:///lib/My/DedupConfig.pm";
4460        let code = r#"package My::DedupConfig;
4461use constant {
4462    RETRY_COUNT => 3,
4463    RETRY_COUNT => 5,
4464};
44651;
4466"#;
4467        must(index.index_file(must(url::Url::parse(uri)), code.to_string()));
4468
4469        let symbols = index.file_symbols(uri);
4470        let retry_count_symbols = symbols.iter().filter(|s| s.name == "RETRY_COUNT").count();
4471        assert_eq!(
4472            retry_count_symbols, 1,
4473            "RETRY_COUNT should be indexed once even when repeated in use constant hash form"
4474        );
4475    }
4476
4477    #[test]
4478    fn test_use_constant_plus_hash_form_indexes_keys() {
4479        let index = WorkspaceIndex::new();
4480        let uri = "file:///lib/My/PlusHash.pm";
4481        let code = r#"package My::PlusHash;
4482use constant +{
4483    FOO => 1,
4484    BAR => 2,
4485};
44861;
4487"#;
4488        must(index.index_file(must(url::Url::parse(uri)), code.to_string()));
4489
4490        assert!(index.find_definition("My::PlusHash::FOO").is_some());
4491        assert!(index.find_definition("My::PlusHash::BAR").is_some());
4492    }
4493
4494    #[test]
4495    fn test_basic_indexing() {
4496        let index = WorkspaceIndex::new();
4497        let uri = "file:///test.pl";
4498
4499        let code = r#"
4500package MyPackage;
4501
4502sub hello {
4503    print "Hello";
4504}
4505
4506my $var = 42;
4507"#;
4508
4509        must(index.index_file(must(url::Url::parse(uri)), code.to_string()));
4510
4511        // Should have indexed the package and subroutine
4512        let symbols = index.file_symbols(uri);
4513        assert!(symbols.iter().any(|s| s.name == "MyPackage" && s.kind == SymbolKind::Package));
4514        assert!(symbols.iter().any(|s| s.name == "hello" && s.kind == SymbolKind::Subroutine));
4515        assert!(symbols.iter().any(|s| s.name == "$var" && s.kind.is_variable()));
4516    }
4517
4518    #[test]
4519    fn test_package_symbol_has_no_container_name() {
4520        // Regression: project_symbol_declarations used to set container_name = Some("main")
4521        // for top-level package declarations because the IndexVisitor starts with
4522        // current_package = Some("main").  Package symbols are top-level declarations
4523        // and must have container_name = None.
4524        let index = WorkspaceIndex::new();
4525        let uri = "file:///lib/Foo.pm";
4526        let code = "package Foo;\nsub bar { }\n";
4527        must(index.index_file(must(url::Url::parse(uri)), code.to_string()));
4528
4529        let symbols = index.file_symbols(uri);
4530        let pkg_sym = symbols.iter().find(|s| s.name == "Foo" && s.kind == SymbolKind::Package);
4531        assert!(pkg_sym.is_some(), "Package symbol not found");
4532        assert_eq!(
4533            pkg_sym.unwrap().container_name,
4534            None,
4535            "Package symbol must not carry a container (was 'main')"
4536        );
4537    }
4538
4539    #[test]
4540    fn test_my_variable_has_no_qualified_name() {
4541        // Regression: project_symbol_declarations used to set qualified_name = Some("Foo::x")
4542        // for `my $x` inside `package Foo`, making `find_definition("Foo::x")` return the
4543        // lexical variable.  `my` variables are not package-visible and must have
4544        // qualified_name = None so qualified lookups don't match them.
4545        let index = WorkspaceIndex::new();
4546        let uri = "file:///lib/Foo.pm";
4547        let code = "package Foo;\nsub bar { my $x = 1; }\n";
4548        must(index.index_file(must(url::Url::parse(uri)), code.to_string()));
4549
4550        let symbols = index.file_symbols(uri);
4551        let var_sym = symbols.iter().find(|s| s.name == "$x" && s.kind.is_variable());
4552        assert!(var_sym.is_some(), "$x variable not indexed");
4553        assert_eq!(
4554            var_sym.unwrap().qualified_name,
4555            None,
4556            "my variable must not have a qualified_name"
4557        );
4558
4559        // `find_definition("Foo::x")` must not accidentally resolve to a lexical variable.
4560        assert!(
4561            index.find_definition("Foo::x").is_none(),
4562            "find_definition(\"Foo::x\") must not return a lexical my variable"
4563        );
4564    }
4565
4566    fn reference_kinds_for(
4567        index: &WorkspaceIndex,
4568        uri: &str,
4569        symbol_name: &str,
4570    ) -> Vec<ReferenceKind> {
4571        let files = index.files.read();
4572        let file = must_some(files.get(uri));
4573        file.references
4574            .get(symbol_name)
4575            .map(|refs| refs.iter().map(|r| r.kind).collect())
4576            .unwrap_or_default()
4577    }
4578
4579    #[test]
4580    fn test_reference_kinds_sub_definition_and_call_are_distinct() {
4581        let index = WorkspaceIndex::new();
4582        let uri = "file:///typed-refs-sub.pl";
4583        let code = "package TypedRefs;
4584sub foo { return 1; }
4585foo();
4586";
4587        must(index.index_file(must(url::Url::parse(uri)), code.to_string()));
4588
4589        let kinds = reference_kinds_for(&index, uri, "foo");
4590        assert!(kinds.contains(&ReferenceKind::Definition));
4591        assert!(kinds.contains(&ReferenceKind::Usage));
4592    }
4593
4594    #[test]
4595    fn test_reference_kinds_variable_read_and_write_are_distinct() {
4596        let index = WorkspaceIndex::new();
4597        let uri = "file:///typed-refs-var.pl";
4598        let code = "my $value = 1;
4599$value = 2;
4600print $value;
4601";
4602        must(index.index_file(must(url::Url::parse(uri)), code.to_string()));
4603
4604        let kinds = reference_kinds_for(&index, uri, "$value");
4605        assert!(kinds.contains(&ReferenceKind::Definition));
4606        assert!(kinds.contains(&ReferenceKind::Write));
4607        assert!(kinds.contains(&ReferenceKind::Read));
4608    }
4609
4610    #[test]
4611    fn test_reference_kinds_import_parent_and_export_ok_are_currently_import_only() {
4612        let index = WorkspaceIndex::new();
4613        let uri = "file:///typed-refs-import-export.pm";
4614        let code = "package Child;
4615use parent 'Base';
4616our @EXPORT_OK = qw(foo);
46171;
4618";
4619        must(index.index_file(must(url::Url::parse(uri)), code.to_string()));
4620
4621        let parent_kinds = reference_kinds_for(&index, uri, "Base");
4622        assert!(
4623            parent_kinds.is_empty(),
4624            "use parent inheritance edges are currently not stored as typed references"
4625        );
4626
4627        let export_symbol_kinds = reference_kinds_for(&index, uri, "foo");
4628        assert!(
4629            export_symbol_kinds.is_empty(),
4630            "EXPORT_OK entries are currently not represented as reference edges"
4631        );
4632    }
4633
4634    #[test]
4635    fn test_reference_kinds_dynamic_and_meta_edges_are_not_typed_yet() {
4636        let index = WorkspaceIndex::new();
4637        let uri = "file:///typed-refs-dynamic.pl";
4638        let code = r#"package TypedRefs;
4639sub foo { 1 }
4640&foo;
4641my $code = \&foo;
4642goto &foo;
4643*alias = \&foo;
4644eval "foo()";
4645with 'RoleName';
4646has 'name' => (is => 'ro');
46471;
4648"#;
4649        must(index.index_file(must(url::Url::parse(uri)), code.to_string()));
4650
4651        let foo_kinds = reference_kinds_for(&index, uri, "foo");
4652        assert!(
4653            foo_kinds
4654                .iter()
4655                .all(|kind| matches!(kind, ReferenceKind::Definition | ReferenceKind::Usage)),
4656            r"dynamic call forms (&foo, \&foo, goto &foo) are currently flattened to Usage"
4657        );
4658
4659        assert!(
4660            reference_kinds_for(&index, uri, "RoleName").is_empty(),
4661            "role composition edges (`with 'RoleName'`) are not indexed as typed references yet"
4662        );
4663    }
4664
4665    #[test]
4666    fn test_find_references() {
4667        let index = WorkspaceIndex::new();
4668        let uri = "file:///test.pl";
4669
4670        let code = r#"
4671sub test {
4672    my $x = 1;
4673    $x = 2;
4674    print $x;
4675}
4676"#;
4677
4678        must(index.index_file(must(url::Url::parse(uri)), code.to_string()));
4679
4680        let refs = index.find_references("$x");
4681        assert!(refs.len() >= 2); // Definition + at least one usage
4682    }
4683
4684    #[test]
4685    fn test_find_references_bare_name_includes_qualified_calls() {
4686        let index = WorkspaceIndex::new();
4687        let uri = "file:///refs.pl";
4688        let code = r#"
4689package RefDemo;
4690sub helper {
4691    return 1;
4692}
4693
4694helper();
4695RefDemo::helper();
4696"#;
4697
4698        must(index.index_file(must(url::Url::parse(uri)), code.to_string()));
4699
4700        let bare_refs = index.find_references("helper");
4701        let qualified_refs = index.find_references("RefDemo::helper");
4702
4703        assert!(
4704            bare_refs.len() >= qualified_refs.len(),
4705            "bare-name reference lookup should include qualified calls"
4706        );
4707    }
4708
4709    #[test]
4710    fn test_count_usages_bare_name_includes_qualified_calls() {
4711        let index = WorkspaceIndex::new();
4712        let uri = "file:///usage.pl";
4713        let code = r#"
4714package UsageDemo;
4715sub helper {
4716    return 1;
4717}
4718
4719helper();
4720UsageDemo::helper();
4721"#;
4722
4723        must(index.index_file(must(url::Url::parse(uri)), code.to_string()));
4724
4725        let bare_usage_count = index.count_usages("helper");
4726        let qualified_usage_count = index.count_usages("UsageDemo::helper");
4727
4728        assert!(
4729            bare_usage_count >= qualified_usage_count,
4730            "bare-name usage count should include qualified call sites"
4731        );
4732    }
4733
4734    #[test]
4735    fn test_dependencies() {
4736        let index = WorkspaceIndex::new();
4737        let uri = "file:///test.pl";
4738
4739        let code = r#"
4740use strict;
4741use warnings;
4742use Data::Dumper;
4743"#;
4744
4745        must(index.index_file(must(url::Url::parse(uri)), code.to_string()));
4746
4747        let deps = index.file_dependencies(uri);
4748        assert!(deps.contains("strict"));
4749        assert!(deps.contains("warnings"));
4750        assert!(deps.contains("Data::Dumper"));
4751    }
4752
4753    #[test]
4754    fn test_uri_to_fs_path_basic() {
4755        // Test basic file:// URI conversion
4756        if let Some(path) = uri_to_fs_path("file:///tmp/test.pl") {
4757            assert_eq!(path, std::path::PathBuf::from("/tmp/test.pl"));
4758        }
4759
4760        // Test with invalid URI
4761        assert!(uri_to_fs_path("not-a-uri").is_none());
4762
4763        // Test with non-file scheme
4764        assert!(uri_to_fs_path("http://example.com").is_none());
4765    }
4766
4767    #[test]
4768    fn test_uri_to_fs_path_with_spaces() {
4769        // Test with percent-encoded spaces
4770        if let Some(path) = uri_to_fs_path("file:///tmp/path%20with%20spaces/test.pl") {
4771            assert_eq!(path, std::path::PathBuf::from("/tmp/path with spaces/test.pl"));
4772        }
4773
4774        // Test with multiple spaces and special characters
4775        if let Some(path) = uri_to_fs_path("file:///tmp/My%20Documents/test%20file.pl") {
4776            assert_eq!(path, std::path::PathBuf::from("/tmp/My Documents/test file.pl"));
4777        }
4778    }
4779
4780    #[test]
4781    fn test_uri_to_fs_path_with_unicode() {
4782        // Test with Unicode characters (percent-encoded)
4783        if let Some(path) = uri_to_fs_path("file:///tmp/caf%C3%A9/test.pl") {
4784            assert_eq!(path, std::path::PathBuf::from("/tmp/café/test.pl"));
4785        }
4786
4787        // Test with Unicode emoji (percent-encoded)
4788        if let Some(path) = uri_to_fs_path("file:///tmp/emoji%F0%9F%98%80/test.pl") {
4789            assert_eq!(path, std::path::PathBuf::from("/tmp/emoji😀/test.pl"));
4790        }
4791    }
4792
4793    #[test]
4794    fn test_fs_path_to_uri_basic() {
4795        // Test basic path to URI conversion
4796        let result = fs_path_to_uri("/tmp/test.pl");
4797        assert!(result.is_ok());
4798        let uri = must(result);
4799        assert!(uri.starts_with("file://"));
4800        assert!(uri.contains("/tmp/test.pl"));
4801    }
4802
4803    #[test]
4804    fn test_fs_path_to_uri_with_spaces() {
4805        // Test path with spaces
4806        let result = fs_path_to_uri("/tmp/path with spaces/test.pl");
4807        assert!(result.is_ok());
4808        let uri = must(result);
4809        assert!(uri.starts_with("file://"));
4810        // Should contain percent-encoded spaces
4811        assert!(uri.contains("path%20with%20spaces"));
4812    }
4813
4814    #[test]
4815    fn test_fs_path_to_uri_with_unicode() {
4816        // Test path with Unicode characters
4817        let result = fs_path_to_uri("/tmp/café/test.pl");
4818        assert!(result.is_ok());
4819        let uri = must(result);
4820        assert!(uri.starts_with("file://"));
4821        // Should contain percent-encoded Unicode
4822        assert!(uri.contains("caf%C3%A9"));
4823    }
4824
4825    #[test]
4826    fn test_normalize_uri_file_schemes() {
4827        // Test normalization of valid file URIs
4828        let uri = WorkspaceIndex::normalize_uri("file:///tmp/test.pl");
4829        assert_eq!(uri, "file:///tmp/test.pl");
4830
4831        // Test normalization of URIs with spaces
4832        let uri = WorkspaceIndex::normalize_uri("file:///tmp/path%20with%20spaces/test.pl");
4833        assert_eq!(uri, "file:///tmp/path%20with%20spaces/test.pl");
4834    }
4835
4836    #[test]
4837    fn test_normalize_uri_absolute_paths() {
4838        // Test normalization of absolute paths (convert to file:// URI)
4839        let uri = WorkspaceIndex::normalize_uri("/tmp/test.pl");
4840        assert!(uri.starts_with("file://"));
4841        assert!(uri.contains("/tmp/test.pl"));
4842    }
4843
4844    #[test]
4845    fn test_normalize_uri_special_schemes() {
4846        // Test that special schemes like untitled: are preserved
4847        let uri = WorkspaceIndex::normalize_uri("untitled:Untitled-1");
4848        assert_eq!(uri, "untitled:Untitled-1");
4849    }
4850
4851    #[test]
4852    fn test_roundtrip_conversion() {
4853        // Test that URI -> path -> URI conversion preserves the URI
4854        let original_uri = "file:///tmp/path%20with%20spaces/caf%C3%A9.pl";
4855
4856        if let Some(path) = uri_to_fs_path(original_uri) {
4857            if let Ok(converted_uri) = fs_path_to_uri(&path) {
4858                // Should be able to round-trip back to an equivalent URI
4859                assert!(converted_uri.starts_with("file://"));
4860
4861                // The path component should decode correctly
4862                if let Some(roundtrip_path) = uri_to_fs_path(&converted_uri) {
4863                    #[cfg(windows)]
4864                    if let Ok(rootless) = path.strip_prefix(std::path::Path::new(r"\")) {
4865                        assert!(roundtrip_path.ends_with(rootless));
4866                    } else {
4867                        assert_eq!(path, roundtrip_path);
4868                    }
4869
4870                    #[cfg(not(windows))]
4871                    assert_eq!(path, roundtrip_path);
4872                }
4873            }
4874        }
4875    }
4876
4877    #[cfg(target_os = "windows")]
4878    #[test]
4879    fn test_windows_paths() {
4880        // Test Windows-style paths
4881        let result = fs_path_to_uri(r"C:\Users\test\Documents\script.pl");
4882        assert!(result.is_ok());
4883        let uri = must(result);
4884        assert!(uri.starts_with("file://"));
4885
4886        // Test Windows path with spaces
4887        let result = fs_path_to_uri(r"C:\Program Files\My App\script.pl");
4888        assert!(result.is_ok());
4889        let uri = must(result);
4890        assert!(uri.starts_with("file://"));
4891        assert!(uri.contains("Program%20Files"));
4892    }
4893
4894    // ========================================================================
4895    // IndexCoordinator Tests
4896    // ========================================================================
4897
4898    #[test]
4899    fn test_coordinator_initial_state() {
4900        let coordinator = IndexCoordinator::new();
4901        assert!(matches!(
4902            coordinator.state(),
4903            IndexState::Building { phase: IndexPhase::Idle, .. }
4904        ));
4905    }
4906
4907    #[test]
4908    fn test_transition_to_scanning_phase() {
4909        let coordinator = IndexCoordinator::new();
4910        coordinator.transition_to_scanning();
4911
4912        let state = coordinator.state();
4913        assert!(
4914            matches!(state, IndexState::Building { phase: IndexPhase::Scanning, .. }),
4915            "Expected Building state after scanning, got: {:?}",
4916            state
4917        );
4918    }
4919
4920    #[test]
4921    fn test_transition_to_indexing_phase() {
4922        let coordinator = IndexCoordinator::new();
4923        coordinator.transition_to_scanning();
4924        coordinator.update_scan_progress(3);
4925        coordinator.transition_to_indexing(3);
4926
4927        let state = coordinator.state();
4928        assert!(
4929            matches!(
4930                state,
4931                IndexState::Building { phase: IndexPhase::Indexing, total_count: 3, .. }
4932            ),
4933            "Expected Building state after indexing with total_count 3, got: {:?}",
4934            state
4935        );
4936    }
4937
4938    #[test]
4939    fn test_transition_to_ready() {
4940        let coordinator = IndexCoordinator::new();
4941        coordinator.transition_to_ready(100, 5000);
4942
4943        let state = coordinator.state();
4944        if let IndexState::Ready { file_count, symbol_count, .. } = state {
4945            assert_eq!(file_count, 100);
4946            assert_eq!(symbol_count, 5000);
4947        } else {
4948            unreachable!("Expected Ready state, got: {:?}", state);
4949        }
4950    }
4951
4952    #[test]
4953    fn test_parse_storm_degradation() {
4954        let coordinator = IndexCoordinator::new();
4955        coordinator.transition_to_ready(100, 5000);
4956
4957        // Trigger parse storm
4958        for _ in 0..15 {
4959            coordinator.notify_change("file.pm");
4960        }
4961
4962        let state = coordinator.state();
4963        assert!(
4964            matches!(state, IndexState::Degraded { .. }),
4965            "Expected Degraded state, got: {:?}",
4966            state
4967        );
4968        if let IndexState::Degraded { reason, .. } = state {
4969            assert!(matches!(reason, DegradationReason::ParseStorm { .. }));
4970        }
4971    }
4972
4973    #[test]
4974    fn test_recovery_from_parse_storm() {
4975        let coordinator = IndexCoordinator::new();
4976        coordinator.transition_to_ready(100, 5000);
4977
4978        // Trigger parse storm
4979        for _ in 0..15 {
4980            coordinator.notify_change("file.pm");
4981        }
4982
4983        // Complete all parses
4984        for _ in 0..15 {
4985            coordinator.notify_parse_complete("file.pm");
4986        }
4987
4988        // Should recover to Building state
4989        assert!(matches!(coordinator.state(), IndexState::Building { .. }));
4990    }
4991
4992    #[test]
4993    fn test_query_dispatch_ready() {
4994        let coordinator = IndexCoordinator::new();
4995        coordinator.transition_to_ready(100, 5000);
4996
4997        let result = coordinator.query(|_index| "full_query", |_index| "partial_query");
4998
4999        assert_eq!(result, "full_query");
5000    }
5001
5002    #[test]
5003    fn test_query_dispatch_degraded() {
5004        let coordinator = IndexCoordinator::new();
5005        // Building state should use partial query
5006
5007        let result = coordinator.query(|_index| "full_query", |_index| "partial_query");
5008
5009        assert_eq!(result, "partial_query");
5010    }
5011
5012    #[test]
5013    fn test_metrics_pending_count() {
5014        let coordinator = IndexCoordinator::new();
5015
5016        coordinator.notify_change("file1.pm");
5017        coordinator.notify_change("file2.pm");
5018
5019        assert_eq!(coordinator.metrics.pending_count(), 2);
5020
5021        coordinator.notify_parse_complete("file1.pm");
5022        assert_eq!(coordinator.metrics.pending_count(), 1);
5023    }
5024
5025    #[test]
5026    fn test_instrumentation_records_transitions() {
5027        let coordinator = IndexCoordinator::new();
5028        coordinator.transition_to_ready(10, 100);
5029
5030        let snapshot = coordinator.instrumentation_snapshot();
5031        let transition =
5032            IndexStateTransition { from: IndexStateKind::Building, to: IndexStateKind::Ready };
5033        let count = snapshot.state_transition_counts.get(&transition).copied().unwrap_or(0);
5034        assert_eq!(count, 1);
5035    }
5036
5037    #[test]
5038    fn test_instrumentation_records_early_exit() {
5039        let coordinator = IndexCoordinator::new();
5040        coordinator.record_early_exit(EarlyExitReason::InitialTimeBudget, 25, 1, 10);
5041
5042        let snapshot = coordinator.instrumentation_snapshot();
5043        let count = snapshot
5044            .early_exit_counts
5045            .get(&EarlyExitReason::InitialTimeBudget)
5046            .copied()
5047            .unwrap_or(0);
5048        assert_eq!(count, 1);
5049        assert!(snapshot.last_early_exit.is_some());
5050    }
5051
5052    #[test]
5053    fn test_custom_limits() {
5054        let limits = IndexResourceLimits {
5055            max_files: 5000,
5056            max_symbols_per_file: 1000,
5057            max_total_symbols: 100_000,
5058            max_ast_cache_bytes: 128 * 1024 * 1024,
5059            max_ast_cache_items: 50,
5060            max_scan_duration_ms: 30_000,
5061        };
5062
5063        let coordinator = IndexCoordinator::with_limits(limits.clone());
5064        assert_eq!(coordinator.limits.max_files, 5000);
5065        assert_eq!(coordinator.limits.max_total_symbols, 100_000);
5066    }
5067
5068    #[test]
5069    fn test_degradation_preserves_symbol_count() {
5070        let coordinator = IndexCoordinator::new();
5071        coordinator.transition_to_ready(100, 5000);
5072
5073        coordinator.transition_to_degraded(DegradationReason::IoError {
5074            message: "Test error".to_string(),
5075        });
5076
5077        let state = coordinator.state();
5078        assert!(
5079            matches!(state, IndexState::Degraded { .. }),
5080            "Expected Degraded state, got: {:?}",
5081            state
5082        );
5083        if let IndexState::Degraded { available_symbols, .. } = state {
5084            assert_eq!(available_symbols, 5000);
5085        }
5086    }
5087
5088    #[test]
5089    fn test_index_access() {
5090        let coordinator = IndexCoordinator::new();
5091        let index = coordinator.index();
5092
5093        // Should have access to underlying WorkspaceIndex
5094        assert!(index.all_symbols().is_empty());
5095    }
5096
5097    #[test]
5098    fn test_resource_limit_enforcement_max_files() {
5099        let limits = IndexResourceLimits {
5100            max_files: 5,
5101            max_symbols_per_file: 1000,
5102            max_total_symbols: 50_000,
5103            max_ast_cache_bytes: 128 * 1024 * 1024,
5104            max_ast_cache_items: 50,
5105            max_scan_duration_ms: 30_000,
5106        };
5107
5108        let coordinator = IndexCoordinator::with_limits(limits);
5109        coordinator.transition_to_ready(10, 100);
5110
5111        // Index 10 files (exceeds limit of 5)
5112        for i in 0..10 {
5113            let uri_str = format!("file:///test{}.pl", i);
5114            let uri = must(url::Url::parse(&uri_str));
5115            let code = "sub test { }";
5116            must(coordinator.index().index_file(uri, code.to_string()));
5117        }
5118
5119        // Enforce limits
5120        coordinator.enforce_limits();
5121
5122        let state = coordinator.state();
5123        assert!(
5124            matches!(
5125                state,
5126                IndexState::Degraded {
5127                    reason: DegradationReason::ResourceLimit { kind: ResourceKind::MaxFiles },
5128                    ..
5129                }
5130            ),
5131            "Expected Degraded state with ResourceLimit(MaxFiles), got: {:?}",
5132            state
5133        );
5134    }
5135
5136    #[test]
5137    fn test_resource_limit_enforcement_max_symbols() {
5138        let limits = IndexResourceLimits {
5139            max_files: 100,
5140            max_symbols_per_file: 10,
5141            max_total_symbols: 50, // Very low limit for testing
5142            max_ast_cache_bytes: 128 * 1024 * 1024,
5143            max_ast_cache_items: 50,
5144            max_scan_duration_ms: 30_000,
5145        };
5146
5147        let coordinator = IndexCoordinator::with_limits(limits);
5148        coordinator.transition_to_ready(0, 0);
5149
5150        // Index files with many symbols to exceed total symbol limit
5151        for i in 0..10 {
5152            let uri_str = format!("file:///test{}.pl", i);
5153            let uri = must(url::Url::parse(&uri_str));
5154            // Each file has 10 subroutines = 100 total symbols (exceeds limit of 50)
5155            let code = r#"
5156package Test;
5157sub sub1 { }
5158sub sub2 { }
5159sub sub3 { }
5160sub sub4 { }
5161sub sub5 { }
5162sub sub6 { }
5163sub sub7 { }
5164sub sub8 { }
5165sub sub9 { }
5166sub sub10 { }
5167"#;
5168            must(coordinator.index().index_file(uri, code.to_string()));
5169        }
5170
5171        // Enforce limits
5172        coordinator.enforce_limits();
5173
5174        let state = coordinator.state();
5175        assert!(
5176            matches!(
5177                state,
5178                IndexState::Degraded {
5179                    reason: DegradationReason::ResourceLimit { kind: ResourceKind::MaxSymbols },
5180                    ..
5181                }
5182            ),
5183            "Expected Degraded state with ResourceLimit(MaxSymbols), got: {:?}",
5184            state
5185        );
5186    }
5187
5188    #[test]
5189    fn test_check_limits_returns_none_within_bounds() {
5190        let coordinator = IndexCoordinator::new();
5191        coordinator.transition_to_ready(0, 0);
5192
5193        // Index a few files well within default limits
5194        for i in 0..5 {
5195            let uri_str = format!("file:///test{}.pl", i);
5196            let uri = must(url::Url::parse(&uri_str));
5197            let code = "sub test { }";
5198            must(coordinator.index().index_file(uri, code.to_string()));
5199        }
5200
5201        // Should not trigger degradation
5202        let limit_check = coordinator.check_limits();
5203        assert!(limit_check.is_none(), "check_limits should return None when within bounds");
5204
5205        // State should still be Ready
5206        assert!(
5207            matches!(coordinator.state(), IndexState::Ready { .. }),
5208            "State should remain Ready when within limits"
5209        );
5210    }
5211
5212    #[test]
5213    fn test_enforce_limits_called_on_transition_to_ready() {
5214        let limits = IndexResourceLimits {
5215            max_files: 3,
5216            max_symbols_per_file: 1000,
5217            max_total_symbols: 50_000,
5218            max_ast_cache_bytes: 128 * 1024 * 1024,
5219            max_ast_cache_items: 50,
5220            max_scan_duration_ms: 30_000,
5221        };
5222
5223        let coordinator = IndexCoordinator::with_limits(limits);
5224
5225        // Index files before transitioning to ready
5226        for i in 0..5 {
5227            let uri_str = format!("file:///test{}.pl", i);
5228            let uri = must(url::Url::parse(&uri_str));
5229            let code = "sub test { }";
5230            must(coordinator.index().index_file(uri, code.to_string()));
5231        }
5232
5233        // Transition to ready - should automatically enforce limits
5234        coordinator.transition_to_ready(5, 100);
5235
5236        let state = coordinator.state();
5237        assert!(
5238            matches!(
5239                state,
5240                IndexState::Degraded {
5241                    reason: DegradationReason::ResourceLimit { kind: ResourceKind::MaxFiles },
5242                    ..
5243                }
5244            ),
5245            "Expected Degraded state after transition_to_ready with exceeded limits, got: {:?}",
5246            state
5247        );
5248    }
5249
5250    #[test]
5251    fn test_state_transition_guard_ready_to_ready() {
5252        // Test that Ready → Ready is allowed (metrics update)
5253        let coordinator = IndexCoordinator::new();
5254        coordinator.transition_to_ready(100, 5000);
5255
5256        // Transition to Ready again with different metrics
5257        coordinator.transition_to_ready(150, 7500);
5258
5259        let state = coordinator.state();
5260        assert!(
5261            matches!(state, IndexState::Ready { file_count: 150, symbol_count: 7500, .. }),
5262            "Expected Ready state with updated metrics, got: {:?}",
5263            state
5264        );
5265    }
5266
5267    #[test]
5268    fn test_state_transition_guard_building_to_building() {
5269        // Test that Building → Building is allowed (progress update)
5270        let coordinator = IndexCoordinator::new();
5271
5272        // Initial building state
5273        coordinator.transition_to_building(100);
5274
5275        let state = coordinator.state();
5276        assert!(
5277            matches!(state, IndexState::Building { indexed_count: 0, total_count: 100, .. }),
5278            "Expected Building state, got: {:?}",
5279            state
5280        );
5281
5282        // Update total count
5283        coordinator.transition_to_building(200);
5284
5285        let state = coordinator.state();
5286        assert!(
5287            matches!(state, IndexState::Building { indexed_count: 0, total_count: 200, .. }),
5288            "Expected Building state, got: {:?}",
5289            state
5290        );
5291    }
5292
5293    #[test]
5294    fn test_state_transition_ready_to_building() {
5295        // Test that Ready → Building is allowed (re-scan)
5296        let coordinator = IndexCoordinator::new();
5297        coordinator.transition_to_ready(100, 5000);
5298
5299        // Trigger re-scan
5300        coordinator.transition_to_building(150);
5301
5302        let state = coordinator.state();
5303        assert!(
5304            matches!(state, IndexState::Building { indexed_count: 0, total_count: 150, .. }),
5305            "Expected Building state after re-scan, got: {:?}",
5306            state
5307        );
5308    }
5309
5310    #[test]
5311    fn test_state_transition_degraded_to_building() {
5312        // Test that Degraded → Building is allowed (recovery)
5313        let coordinator = IndexCoordinator::new();
5314        coordinator.transition_to_degraded(DegradationReason::IoError {
5315            message: "Test error".to_string(),
5316        });
5317
5318        // Attempt recovery
5319        coordinator.transition_to_building(100);
5320
5321        let state = coordinator.state();
5322        assert!(
5323            matches!(state, IndexState::Building { indexed_count: 0, total_count: 100, .. }),
5324            "Expected Building state after recovery, got: {:?}",
5325            state
5326        );
5327    }
5328
5329    #[test]
5330    fn test_update_building_progress() {
5331        let coordinator = IndexCoordinator::new();
5332        coordinator.transition_to_building(100);
5333
5334        // Update progress
5335        coordinator.update_building_progress(50);
5336
5337        let state = coordinator.state();
5338        assert!(
5339            matches!(state, IndexState::Building { indexed_count: 50, total_count: 100, .. }),
5340            "Expected Building state with updated progress, got: {:?}",
5341            state
5342        );
5343
5344        // Update progress again
5345        coordinator.update_building_progress(100);
5346
5347        let state = coordinator.state();
5348        assert!(
5349            matches!(state, IndexState::Building { indexed_count: 100, total_count: 100, .. }),
5350            "Expected Building state with completed progress, got: {:?}",
5351            state
5352        );
5353    }
5354
5355    #[test]
5356    fn test_scan_timeout_detection() {
5357        // Test that scan timeout triggers degradation
5358        let limits = IndexResourceLimits {
5359            max_scan_duration_ms: 0, // Immediate timeout for testing
5360            ..Default::default()
5361        };
5362
5363        let coordinator = IndexCoordinator::with_limits(limits);
5364        coordinator.transition_to_building(100);
5365
5366        // Small sleep to ensure elapsed time > 0
5367        std::thread::sleep(std::time::Duration::from_millis(1));
5368
5369        // Update progress should detect timeout
5370        coordinator.update_building_progress(10);
5371
5372        let state = coordinator.state();
5373        assert!(
5374            matches!(
5375                state,
5376                IndexState::Degraded { reason: DegradationReason::ScanTimeout { .. }, .. }
5377            ),
5378            "Expected Degraded state with ScanTimeout, got: {:?}",
5379            state
5380        );
5381    }
5382
5383    #[test]
5384    fn test_scan_timeout_does_not_trigger_within_limit() {
5385        // Test that scan doesn't timeout within the limit
5386        let limits = IndexResourceLimits {
5387            max_scan_duration_ms: 10_000, // 10 seconds - should not trigger
5388            ..Default::default()
5389        };
5390
5391        let coordinator = IndexCoordinator::with_limits(limits);
5392        coordinator.transition_to_building(100);
5393
5394        // Update progress immediately (well within limit)
5395        coordinator.update_building_progress(50);
5396
5397        let state = coordinator.state();
5398        assert!(
5399            matches!(state, IndexState::Building { indexed_count: 50, .. }),
5400            "Expected Building state (no timeout), got: {:?}",
5401            state
5402        );
5403    }
5404
5405    #[test]
5406    fn test_early_exit_optimization_unchanged_content() {
5407        let index = WorkspaceIndex::new();
5408        let uri = must(url::Url::parse("file:///test.pl"));
5409        let code = r#"
5410package MyPackage;
5411
5412sub hello {
5413    print "Hello";
5414}
5415"#;
5416
5417        // First indexing should parse and index
5418        must(index.index_file(uri.clone(), code.to_string()));
5419        let symbols1 = index.file_symbols(uri.as_str());
5420        assert!(symbols1.iter().any(|s| s.name == "MyPackage" && s.kind == SymbolKind::Package));
5421        assert!(symbols1.iter().any(|s| s.name == "hello" && s.kind == SymbolKind::Subroutine));
5422
5423        // Second indexing with same content should early-exit
5424        // We can verify this by checking that the index still works correctly
5425        must(index.index_file(uri.clone(), code.to_string()));
5426        let symbols2 = index.file_symbols(uri.as_str());
5427        assert_eq!(symbols1.len(), symbols2.len());
5428        assert!(symbols2.iter().any(|s| s.name == "MyPackage" && s.kind == SymbolKind::Package));
5429        assert!(symbols2.iter().any(|s| s.name == "hello" && s.kind == SymbolKind::Subroutine));
5430    }
5431
5432    #[test]
5433    fn test_early_exit_optimization_changed_content() {
5434        let index = WorkspaceIndex::new();
5435        let uri = must(url::Url::parse("file:///test.pl"));
5436        let code1 = r#"
5437package MyPackage;
5438
5439sub hello {
5440    print "Hello";
5441}
5442"#;
5443
5444        let code2 = r#"
5445package MyPackage;
5446
5447sub goodbye {
5448    print "Goodbye";
5449}
5450"#;
5451
5452        // First indexing
5453        must(index.index_file(uri.clone(), code1.to_string()));
5454        let symbols1 = index.file_symbols(uri.as_str());
5455        assert!(symbols1.iter().any(|s| s.name == "hello" && s.kind == SymbolKind::Subroutine));
5456        assert!(!symbols1.iter().any(|s| s.name == "goodbye"));
5457
5458        // Second indexing with different content should re-parse
5459        must(index.index_file(uri.clone(), code2.to_string()));
5460        let symbols2 = index.file_symbols(uri.as_str());
5461        assert!(!symbols2.iter().any(|s| s.name == "hello"));
5462        assert!(symbols2.iter().any(|s| s.name == "goodbye" && s.kind == SymbolKind::Subroutine));
5463    }
5464
5465    #[test]
5466    fn test_early_exit_optimization_whitespace_only_change() {
5467        let index = WorkspaceIndex::new();
5468        let uri = must(url::Url::parse("file:///test.pl"));
5469        let code1 = r#"
5470package MyPackage;
5471
5472sub hello {
5473    print "Hello";
5474}
5475"#;
5476
5477        let code2 = r#"
5478package MyPackage;
5479
5480
5481sub hello {
5482    print "Hello";
5483}
5484"#;
5485
5486        // First indexing
5487        must(index.index_file(uri.clone(), code1.to_string()));
5488        let symbols1 = index.file_symbols(uri.as_str());
5489        assert!(symbols1.iter().any(|s| s.name == "hello" && s.kind == SymbolKind::Subroutine));
5490
5491        // Second indexing with whitespace change should re-parse (hash will differ)
5492        must(index.index_file(uri.clone(), code2.to_string()));
5493        let symbols2 = index.file_symbols(uri.as_str());
5494        // Symbols should still be found, but content hash differs so it re-indexed
5495        assert!(symbols2.iter().any(|s| s.name == "hello" && s.kind == SymbolKind::Subroutine));
5496    }
5497
5498    #[test]
5499    fn test_reindex_file_refreshes_symbol_cache_for_removed_names() {
5500        let index = WorkspaceIndex::new();
5501        let uri1 = must(url::Url::parse("file:///lib/A.pm"));
5502        let uri2 = must(url::Url::parse("file:///lib/B.pm"));
5503        let code1 = "package A;\nsub foo { return 1; }\n1;\n";
5504        let code2 = "package B;\nsub foo { return 2; }\n1;\n";
5505        let code2_reindexed = "package B;\nsub bar { return 3; }\n1;\n";
5506
5507        must(index.index_file(uri1.clone(), code1.to_string()));
5508        must(index.index_file(uri2.clone(), code2.to_string()));
5509        must(index.index_file(uri2.clone(), code2_reindexed.to_string()));
5510
5511        let foo_location = must_some(index.find_definition("foo"));
5512        assert_eq!(foo_location.uri, uri1.to_string());
5513
5514        let bar_location = must_some(index.find_definition("bar"));
5515        assert_eq!(bar_location.uri, uri2.to_string());
5516    }
5517
5518    #[test]
5519    fn test_remove_file_preserves_other_colliding_symbol_entries() {
5520        let index = WorkspaceIndex::new();
5521        let uri1 = must(url::Url::parse("file:///lib/A.pm"));
5522        let uri2 = must(url::Url::parse("file:///lib/B.pm"));
5523        let code1 = "package A;\nsub foo { return 1; }\n1;\n";
5524        let code2 = "package B;\nsub foo { return 2; }\n1;\n";
5525
5526        must(index.index_file(uri1.clone(), code1.to_string()));
5527        must(index.index_file(uri2.clone(), code2.to_string()));
5528
5529        index.remove_file(uri2.as_str());
5530
5531        let foo_location = must_some(index.find_definition("foo"));
5532        assert_eq!(foo_location.uri, uri1.to_string());
5533    }
5534
5535    #[test]
5536    fn test_count_usages_no_double_counting_for_qualified_calls() {
5537        let index = WorkspaceIndex::new();
5538
5539        // File 1: defines Utils::process_data
5540        let uri1 = "file:///lib/Utils.pm";
5541        let code1 = r#"
5542package Utils;
5543
5544sub process_data {
5545    return 1;
5546}
5547"#;
5548        must(index.index_file(must(url::Url::parse(uri1)), code1.to_string()));
5549
5550        // File 2: calls Utils::process_data (qualified call)
5551        let uri2 = "file:///app.pl";
5552        let code2 = r#"
5553use Utils;
5554Utils::process_data();
5555Utils::process_data();
5556"#;
5557        must(index.index_file(must(url::Url::parse(uri2)), code2.to_string()));
5558
5559        // Each qualified call is stored under both "process_data" and "Utils::process_data"
5560        // by the dual indexing strategy. count_usages should deduplicate so we get the
5561        // actual number of call sites, not double.
5562        let count = index.count_usages("Utils::process_data");
5563
5564        // We expect exactly 2 usage sites (the two calls in app.pl),
5565        // not 4 (which would be the double-counted result).
5566        assert_eq!(
5567            count, 2,
5568            "count_usages should not double-count qualified calls, got {} (expected 2)",
5569            count
5570        );
5571
5572        // find_references should also deduplicate
5573        let refs = index.find_references("Utils::process_data");
5574        let non_def_refs: Vec<_> =
5575            refs.iter().filter(|loc| loc.uri != "file:///lib/Utils.pm").collect();
5576        assert_eq!(
5577            non_def_refs.len(),
5578            2,
5579            "find_references should not return duplicates for qualified calls, got {} non-def refs",
5580            non_def_refs.len()
5581        );
5582    }
5583
5584    #[test]
5585    fn test_batch_indexing() {
5586        let index = WorkspaceIndex::new();
5587        let files: Vec<(Url, String)> = (0..5)
5588            .map(|i| {
5589                let uri = must(Url::parse(&format!("file:///batch/module{}.pm", i)));
5590                let code =
5591                    format!("package Batch::Mod{};\nsub func_{} {{ return {}; }}\n1;", i, i, i);
5592                (uri, code)
5593            })
5594            .collect();
5595
5596        let errors = index.index_files_batch(files);
5597        assert!(errors.is_empty(), "batch indexing errors: {:?}", errors);
5598        assert_eq!(index.file_count(), 5);
5599        assert!(index.find_definition("Batch::Mod0::func_0").is_some());
5600        assert!(index.find_definition("Batch::Mod4::func_4").is_some());
5601    }
5602
5603    #[test]
5604    fn test_batch_indexing_skips_unchanged() {
5605        let index = WorkspaceIndex::new();
5606        let uri = must(Url::parse("file:///batch/skip.pm"));
5607        let code = "package Skip;\nsub skip_fn { 1 }\n1;".to_string();
5608
5609        index.index_file(uri.clone(), code.clone()).ok();
5610        assert_eq!(index.file_count(), 1);
5611
5612        let errors = index.index_files_batch(vec![(uri, code)]);
5613        assert!(errors.is_empty());
5614        assert_eq!(index.file_count(), 1);
5615    }
5616
5617    #[test]
5618    fn test_incremental_update_preserves_other_symbols() {
5619        let index = WorkspaceIndex::new();
5620
5621        let uri_a = must(Url::parse("file:///incr/a.pm"));
5622        let uri_b = must(Url::parse("file:///incr/b.pm"));
5623        index.index_file(uri_a.clone(), "package A;\nsub a_func { 1 }\n1;".into()).ok();
5624        index.index_file(uri_b.clone(), "package B;\nsub b_func { 2 }\n1;".into()).ok();
5625
5626        assert!(index.find_definition("A::a_func").is_some());
5627        assert!(index.find_definition("B::b_func").is_some());
5628
5629        index.index_file(uri_a, "package A;\nsub a_func_v2 { 11 }\n1;".into()).ok();
5630
5631        assert!(index.find_definition("A::a_func_v2").is_some());
5632        assert!(index.find_definition("B::b_func").is_some());
5633    }
5634
5635    #[test]
5636    fn test_remove_file_preserves_shadowed_symbols() {
5637        let index = WorkspaceIndex::new();
5638
5639        let uri_a = must(Url::parse("file:///shadow/a.pm"));
5640        let uri_b = must(Url::parse("file:///shadow/b.pm"));
5641        index.index_file(uri_a.clone(), "package ShadowA;\nsub helper { 1 }\n1;".into()).ok();
5642        index.index_file(uri_b.clone(), "package ShadowB;\nsub helper { 2 }\n1;".into()).ok();
5643
5644        assert!(index.find_definition("helper").is_some());
5645
5646        index.remove_file_url(&uri_a);
5647        assert!(index.find_definition("helper").is_some());
5648        assert!(index.find_definition("ShadowB::helper").is_some());
5649    }
5650
5651    // -------------------------------------------------------------------------
5652    // find_dependents — use parent / use base integration (#2747)
5653    // -------------------------------------------------------------------------
5654
5655    #[test]
5656    fn test_index_dependency_via_use_parent_end_to_end() {
5657        // Regression for #2747: index a file with `use parent 'MyBase'` and verify
5658        // that find_dependents("MyBase") returns that file.
5659        // 1. Index MyBase.pm
5660        // 2. Index child.pl with `use parent 'MyBase'`
5661        // 3. find_dependents("MyBase") should return child.pl
5662        let index = WorkspaceIndex::new();
5663
5664        let base_url = must(url::Url::parse("file:///test/workspace/lib/MyBase.pm"));
5665        must(index.index_file(
5666            base_url,
5667            "package MyBase;\nsub new { bless {}, shift }\n1;\n".to_string(),
5668        ));
5669
5670        let child_url = must(url::Url::parse("file:///test/workspace/child.pl"));
5671        must(index.index_file(child_url, "package Child;\nuse parent 'MyBase';\n1;\n".to_string()));
5672
5673        let dependents = index.find_dependents("MyBase");
5674        assert!(
5675            !dependents.is_empty(),
5676            "find_dependents('MyBase') returned empty — \
5677             use parent 'MyBase' should register MyBase as a dependency. \
5678             Dependencies in index: {:?}",
5679            {
5680                let files = index.files.read();
5681                files
5682                    .iter()
5683                    .map(|(k, v)| (k.clone(), v.dependencies.iter().cloned().collect::<Vec<_>>()))
5684                    .collect::<Vec<_>>()
5685            }
5686        );
5687        assert!(
5688            dependents.contains(&"file:///test/workspace/child.pl".to_string()),
5689            "child.pl should be in dependents, got: {:?}",
5690            dependents
5691        );
5692    }
5693
5694    #[test]
5695    fn test_find_dependents_normalizes_legacy_separator_in_query() {
5696        let index = WorkspaceIndex::new();
5697        let uri = must(url::Url::parse("file:///test/workspace/legacy-query.pl"));
5698        let src = "package Child;\nuse parent 'My::Base';\n1;\n";
5699        must(index.index_file(uri, src.to_string()));
5700
5701        let dependents = index.find_dependents("My'Base");
5702        assert_eq!(dependents, vec!["file:///test/workspace/legacy-query.pl".to_string()]);
5703    }
5704
5705    #[test]
5706    fn test_file_dependencies_normalize_legacy_separator_in_source() {
5707        let index = WorkspaceIndex::new();
5708        let uri = must(url::Url::parse("file:///test/workspace/legacy-source.pl"));
5709        let src = "package Child;\nuse parent \"My'Base\";\n1;\n";
5710        must(index.index_file(uri.clone(), src.to_string()));
5711
5712        let deps = index.file_dependencies(uri.as_str());
5713        assert!(deps.contains("My::Base"));
5714        assert!(!deps.contains("My'Base"));
5715    }
5716
5717    #[test]
5718    fn test_index_dependency_via_moose_extends_end_to_end() -> Result<(), Box<dyn std::error::Error>>
5719    {
5720        let index = WorkspaceIndex::new();
5721
5722        let parent_url = must(url::Url::parse("file:///test/workspace/lib/My/App/Parent.pm"));
5723        must(index.index_file(parent_url, "package My::App::Parent;\n1;\n".to_string()));
5724
5725        let child_url = must(url::Url::parse("file:///test/workspace/child-moose.pl"));
5726        let child_src = "package Child;\nuse Moose;\nextends 'My::App::Parent';\n1;\n";
5727        must(index.index_file(child_url, child_src.to_string()));
5728
5729        let dependents = index.find_dependents("My::App::Parent");
5730        assert!(
5731            dependents.contains(&"file:///test/workspace/child-moose.pl".to_string()),
5732            "expected child-moose.pl in dependents, got: {dependents:?}"
5733        );
5734        Ok(())
5735    }
5736
5737    #[test]
5738    fn test_index_dependency_via_moo_with_role_end_to_end() -> Result<(), Box<dyn std::error::Error>>
5739    {
5740        let index = WorkspaceIndex::new();
5741
5742        let role_url = must(url::Url::parse("file:///test/workspace/lib/My/App/Role.pm"));
5743        must(index.index_file(role_url, "package My::App::Role;\n1;\n".to_string()));
5744
5745        let consumer_url = must(url::Url::parse("file:///test/workspace/consumer-moo.pl"));
5746        let consumer_src = "package Consumer;\nuse Moo;\nwith 'My::App::Role';\n1;\n";
5747        must(index.index_file(consumer_url.clone(), consumer_src.to_string()));
5748
5749        let dependents = index.find_dependents("My::App::Role");
5750        assert!(
5751            dependents.contains(&"file:///test/workspace/consumer-moo.pl".to_string()),
5752            "expected consumer-moo.pl in dependents, got: {dependents:?}"
5753        );
5754
5755        let deps = index.file_dependencies(consumer_url.as_str());
5756        assert!(deps.contains("My::App::Role"));
5757        Ok(())
5758    }
5759
5760    #[test]
5761    fn test_index_dependency_via_literal_require_end_to_end()
5762    -> Result<(), Box<dyn std::error::Error>> {
5763        let index = WorkspaceIndex::new();
5764        let uri = must(url::Url::parse("file:///test/workspace/require-consumer.pl"));
5765        let src = "package Consumer;\nrequire My::Loader;\n1;\n";
5766        must(index.index_file(uri.clone(), src.to_string()));
5767
5768        let deps = index.file_dependencies(uri.as_str());
5769        assert!(
5770            deps.contains("My::Loader"),
5771            "literal require should register module dependency, got: {deps:?}"
5772        );
5773        Ok(())
5774    }
5775
5776    #[test]
5777    fn test_manual_import_symbols_are_indexed_as_import_references()
5778    -> Result<(), Box<dyn std::error::Error>> {
5779        let index = WorkspaceIndex::new();
5780        let uri = must(url::Url::parse("file:///test/workspace/manual-import.pl"));
5781        let src = r#"package Consumer;
5782require My::Tools;
5783My::Tools->import(qw(helper_one helper_two));
5784helper_one();
57851;
5786"#;
5787        must(index.index_file(uri.clone(), src.to_string()));
5788
5789        let deps = index.file_dependencies(uri.as_str());
5790        assert!(
5791            deps.contains("My::Tools"),
5792            "manual import target should be tracked as dependency, got: {deps:?}"
5793        );
5794
5795        for symbol in ["helper_one", "helper_two"] {
5796            let refs = index.find_references(symbol);
5797            assert!(
5798                !refs.is_empty(),
5799                "expected at least one indexed reference for imported symbol `{symbol}`"
5800            );
5801        }
5802        Ok(())
5803    }
5804
5805    #[test]
5806    fn test_parser_produces_correct_args_for_use_parent() {
5807        // Regression for #2747: verify that the parser produces args=["'MyBase'"]
5808        // for `use parent 'MyBase'`, so extract_module_names_from_use_args strips
5809        // the quotes and registers the dependency under the bare name "MyBase".
5810        use crate::Parser;
5811        let mut p = Parser::new("package Child;\nuse parent 'MyBase';\n1;\n");
5812        let ast = must(p.parse());
5813        assert!(
5814            matches!(ast.kind, NodeKind::Program { .. }),
5815            "Expected Program root, got {:?}",
5816            ast.kind
5817        );
5818        let NodeKind::Program { statements } = &ast.kind else {
5819            return;
5820        };
5821        let mut found_parent_use = false;
5822        for stmt in statements {
5823            if let NodeKind::Use { module, args, .. } = &stmt.kind {
5824                if module == "parent" {
5825                    found_parent_use = true;
5826                    assert_eq!(
5827                        args,
5828                        &["'MyBase'".to_string()],
5829                        "Expected args=[\"'MyBase'\"] for `use parent 'MyBase'`, got: {:?}",
5830                        args
5831                    );
5832                    let extracted = extract_module_names_from_use_args(args);
5833                    assert_eq!(
5834                        extracted,
5835                        vec!["MyBase".to_string()],
5836                        "extract_module_names_from_use_args should return [\"MyBase\"], got {:?}",
5837                        extracted
5838                    );
5839                }
5840            }
5841        }
5842        assert!(found_parent_use, "No Use node with module='parent' found in AST");
5843    }
5844
5845    // -------------------------------------------------------------------------
5846    // extract_module_names_from_use_args — unit tests (#2747)
5847    // -------------------------------------------------------------------------
5848
5849    #[test]
5850    fn test_extract_module_names_single_quoted() {
5851        let names = extract_module_names_from_use_args(&["'Foo::Bar'".to_string()]);
5852        assert_eq!(names, vec!["Foo::Bar"]);
5853    }
5854
5855    #[test]
5856    fn test_extract_module_names_double_quoted() {
5857        let names = extract_module_names_from_use_args(&["\"Foo::Bar\"".to_string()]);
5858        assert_eq!(names, vec!["Foo::Bar"]);
5859    }
5860
5861    #[test]
5862    fn test_extract_module_names_qw_list() {
5863        let names = extract_module_names_from_use_args(&["qw(Foo::Bar Other::Base)".to_string()]);
5864        assert_eq!(names, vec!["Foo::Bar", "Other::Base"]);
5865    }
5866
5867    #[test]
5868    fn test_extract_module_names_qw_slash_delimiter() {
5869        let names = extract_module_names_from_use_args(&["qw/Foo::Bar Other::Base/".to_string()]);
5870        assert_eq!(names, vec!["Foo::Bar", "Other::Base"]);
5871    }
5872
5873    #[test]
5874    fn test_extract_module_names_qw_with_space_before_delimiter() {
5875        let names = extract_module_names_from_use_args(&["qw [Foo::Bar Other::Base]".to_string()]);
5876        assert_eq!(names, vec!["Foo::Bar", "Other::Base"]);
5877    }
5878
5879    #[test]
5880    fn test_extract_module_names_qw_list_trims_wrapped_punctuation() {
5881        let names =
5882            extract_module_names_from_use_args(&["qw((Foo::Bar) [Other::Base],)".to_string()]);
5883        assert_eq!(names, vec!["Foo::Bar", "Other::Base"]);
5884    }
5885
5886    #[test]
5887    fn test_extract_module_names_norequire_flag() {
5888        let names = extract_module_names_from_use_args(&[
5889            "-norequire".to_string(),
5890            "'Foo::Bar'".to_string(),
5891        ]);
5892        assert_eq!(names, vec!["Foo::Bar"]);
5893    }
5894
5895    #[test]
5896    fn test_extract_module_names_empty_args() {
5897        let names = extract_module_names_from_use_args(&[]);
5898        assert!(names.is_empty());
5899    }
5900
5901    #[test]
5902    fn test_extract_module_names_legacy_separator() {
5903        // Perl legacy package separator ' (tick) inside module name
5904        let names = extract_module_names_from_use_args(&["'Foo'Bar'".to_string()]);
5905        // Legacy separators are normalized for downstream dependency matching.
5906        assert_eq!(names, vec!["Foo::Bar"]);
5907    }
5908
5909    #[test]
5910    fn test_find_dependents_matches_legacy_separator_queries() {
5911        let index = WorkspaceIndex::new();
5912        let base_uri = must(url::Url::parse("file:///test/workspace/lib/Foo/Bar.pm"));
5913        let child_uri = must(url::Url::parse("file:///test/workspace/child.pl"));
5914
5915        must(index.index_file(base_uri, "package Foo::Bar;\n1;\n".to_string()));
5916        must(index.index_file(
5917            child_uri.clone(),
5918            "package Child;\nuse parent qw(Foo'Bar);\n1;\n".to_string(),
5919        ));
5920
5921        let dependents_modern = index.find_dependents("Foo::Bar");
5922        assert!(
5923            dependents_modern.contains(&child_uri.to_string()),
5924            "Expected dependency match when queried with modern separator"
5925        );
5926
5927        let dependents_legacy = index.find_dependents("Foo'Bar");
5928        assert!(
5929            dependents_legacy.contains(&child_uri.to_string()),
5930            "Expected dependency match when queried with legacy separator"
5931        );
5932    }
5933
5934    #[test]
5935    fn test_extract_module_names_comma_adjacent_tokens() {
5936        let names = extract_module_names_from_use_args(&[
5937            "'Foo::Bar',".to_string(),
5938            "\"Other::Base\",".to_string(),
5939            "'Last::One'".to_string(),
5940        ]);
5941        assert_eq!(names, vec!["Foo::Bar", "Other::Base", "Last::One"]);
5942    }
5943
5944    #[test]
5945    fn test_extract_module_names_parenthesized_without_spaces() {
5946        let names = extract_module_names_from_use_args(&["('Foo::Bar','Other::Base')".to_string()]);
5947        assert_eq!(names, vec!["Foo::Bar", "Other::Base"]);
5948    }
5949
5950    #[test]
5951    fn test_extract_module_names_deduplicates_identical_entries() {
5952        let names = extract_module_names_from_use_args(&[
5953            "qw(Foo::Bar Foo::Bar)".to_string(),
5954            "'Foo::Bar'".to_string(),
5955        ]);
5956        assert_eq!(names, vec!["Foo::Bar"]);
5957    }
5958
5959    #[test]
5960    fn test_extract_module_names_trims_semicolon_suffix() {
5961        let names = extract_module_names_from_use_args(&[
5962            "'Foo::Bar',".to_string(),
5963            "'Other::Base',".to_string(),
5964            "'Third::Leaf';".to_string(),
5965        ]);
5966        assert_eq!(names, vec!["Foo::Bar", "Other::Base", "Third::Leaf"]);
5967    }
5968
5969    #[test]
5970    fn test_extract_module_names_trims_wrapped_punctuation() {
5971        let names = extract_module_names_from_use_args(&[
5972            "('Foo::Bar',".to_string(),
5973            "'Other::Base')".to_string(),
5974        ]);
5975        assert_eq!(names, vec!["Foo::Bar", "Other::Base"]);
5976    }
5977
5978    #[test]
5979    fn test_extract_constant_names_qw_with_space_before_delimiter() {
5980        let names = extract_constant_names_from_use_args(&["qw [FOO BAR]".to_string()]);
5981        assert_eq!(names, vec!["FOO", "BAR"]);
5982    }
5983
5984    #[test]
5985    #[ignore = "qw delimiter with leading space not yet parsed; tracked in debt-ledger.yaml"]
5986    fn test_index_use_constant_qw_with_space_before_delimiter() {
5987        let index = WorkspaceIndex::new();
5988        let uri = must(url::Url::parse("file:///workspace/lib/My/Config.pm"));
5989        let source = "package My::Config;\nuse constant qw [FOO BAR];\n1;\n";
5990
5991        must(index.index_file(uri, source.to_string()));
5992
5993        let foo = index.find_definition("My::Config::FOO");
5994        let bar = index.find_definition("My::Config::BAR");
5995        assert!(foo.is_some(), "Expected My::Config::FOO to be indexed");
5996        assert!(bar.is_some(), "Expected My::Config::BAR to be indexed");
5997    }
5998
5999    #[test]
6000    fn test_with_capacity_accepts_large_batch_without_panic() {
6001        let index = WorkspaceIndex::with_capacity(100, 20);
6002        for i in 0..100 {
6003            let uri = must(url::Url::parse(&format!("file:///lib/Mod{}.pm", i)));
6004            let src = format!("package Mod{};\nsub foo_{} {{ 1 }}\n1;\n", i, i);
6005            index.index_file(uri, src).ok();
6006        }
6007        assert!(index.has_symbols());
6008    }
6009
6010    #[test]
6011    fn test_with_capacity_zero_does_not_panic() {
6012        let index = WorkspaceIndex::with_capacity(0, 0);
6013        assert!(!index.has_symbols());
6014    }
6015
6016    // -------------------------------------------------------------------------
6017    // remove_file — symbol cache cleanup (#3494)
6018    // -------------------------------------------------------------------------
6019
6020    /// After removing the only file that defines a symbol, both qualified and
6021    /// bare-name lookups must return None.  The symbols cache must not retain
6022    /// stale entries pointing to the deleted file.
6023    #[test]
6024    fn test_remove_file_clears_symbol_cache_qualified_and_bare() {
6025        let index = WorkspaceIndex::new();
6026        let uri_a = must(url::Url::parse("file:///lib/A.pm"));
6027        let code_a = "package A;\nsub foo { return 1; }\n1;\n";
6028
6029        must(index.index_file(uri_a.clone(), code_a.to_string()));
6030
6031        // Pre-condition: both qualified and bare-name lookups resolve to file A.
6032        let before_qual = must_some(index.find_definition("A::foo"));
6033        assert_eq!(
6034            before_qual.uri,
6035            uri_a.to_string(),
6036            "qualified lookup should point to A.pm before removal"
6037        );
6038        let before_bare = must_some(index.find_definition("foo"));
6039        assert_eq!(
6040            before_bare.uri,
6041            uri_a.to_string(),
6042            "bare-name lookup should point to A.pm before removal"
6043        );
6044
6045        // Remove file A from the index (simulates file deletion).
6046        index.remove_file(uri_a.as_str());
6047
6048        // Post-condition: the symbol cache must be clean — no stale entries.
6049        assert!(
6050            index.find_definition("A::foo").is_none(),
6051            "qualified lookup 'A::foo' should return None after file deletion"
6052        );
6053        assert!(
6054            index.find_definition("foo").is_none(),
6055            "bare-name lookup 'foo' should return None after file deletion"
6056        );
6057
6058        // Verify no symbols remain in the index.
6059        assert_eq!(
6060            index.symbol_count(),
6061            0,
6062            "symbol_count should be 0 after removing the only file"
6063        );
6064        assert!(!index.has_symbols(), "has_symbols should be false after removing the only file");
6065    }
6066
6067    /// Deleting file A when file B has the same bare-name symbol must leave
6068    /// the bare-name cache pointing to B (not remove it entirely).
6069    #[test]
6070    fn test_remove_file_bare_name_falls_back_to_surviving_file() {
6071        let index = WorkspaceIndex::new();
6072        let uri_a = must(url::Url::parse("file:///lib/A.pm"));
6073        let uri_b = must(url::Url::parse("file:///lib/B.pm"));
6074        let code_a = "package A;\nsub shared_fn { return 1; }\n1;\n";
6075        let code_b = "package B;\nsub shared_fn { return 2; }\n1;\n";
6076
6077        must(index.index_file(uri_a.clone(), code_a.to_string()));
6078        must(index.index_file(uri_b.clone(), code_b.to_string()));
6079
6080        // Remove file A — shared_fn should still resolve via B.
6081        index.remove_file(uri_a.as_str());
6082
6083        let loc = must_some(index.find_definition("shared_fn"));
6084        assert_eq!(
6085            loc.uri,
6086            uri_b.to_string(),
6087            "bare-name 'shared_fn' should resolve to B.pm after A.pm is deleted"
6088        );
6089
6090        assert!(
6091            index.find_definition("A::shared_fn").is_none(),
6092            "qualified 'A::shared_fn' must be gone after A.pm deletion"
6093        );
6094        assert!(
6095            index.find_definition("B::shared_fn").is_some(),
6096            "qualified 'B::shared_fn' must remain after A.pm deletion"
6097        );
6098    }
6099
6100    #[test]
6101    fn test_definition_candidates_include_ambiguous_bare_symbols_in_stable_order() {
6102        let index = WorkspaceIndex::new();
6103        let uri_b = must(url::Url::parse("file:///lib/B.pm"));
6104        let uri_a = must(url::Url::parse("file:///lib/A.pm"));
6105        must(index.index_file(uri_b, "package B;\nsub shared { 1 }\n1;\n".to_string()));
6106        must(index.index_file(uri_a, "package A;\nsub shared { 1 }\n1;\n".to_string()));
6107
6108        let candidates = index.definition_candidates("shared");
6109        assert_eq!(candidates.len(), 2);
6110        assert_eq!(candidates[0].uri, "file:///lib/A.pm");
6111        assert_eq!(candidates[1].uri, "file:///lib/B.pm");
6112        assert_eq!(must_some(index.find_definition("shared")).uri, "file:///lib/A.pm");
6113    }
6114
6115    #[test]
6116    fn test_definition_candidates_include_duplicate_qualified_name_across_files() {
6117        let index = WorkspaceIndex::new();
6118        let uri_v2 = must(url::Url::parse("file:///lib/A-v2.pm"));
6119        let uri_v1 = must(url::Url::parse("file:///lib/A-v1.pm"));
6120        let source = "package A;\nsub foo { 1 }\n1;\n".to_string();
6121        must(index.index_file(uri_v2, source.clone()));
6122        must(index.index_file(uri_v1, source));
6123
6124        let candidates = index.definition_candidates("A::foo");
6125        assert_eq!(candidates.len(), 2);
6126        assert_eq!(candidates[0].uri, "file:///lib/A-v1.pm");
6127        assert_eq!(candidates[1].uri, "file:///lib/A-v2.pm");
6128    }
6129
6130    #[test]
6131    fn test_definition_candidates_are_cleaned_on_remove_and_reindex() {
6132        let index = WorkspaceIndex::new();
6133        let uri = must(url::Url::parse("file:///lib/A.pm"));
6134        must(index.index_file(uri.clone(), "package A;\nsub foo { 1 }\n1;\n".to_string()));
6135        assert_eq!(index.definition_candidates("A::foo").len(), 1);
6136
6137        index.remove_file(uri.as_str());
6138        assert!(index.definition_candidates("A::foo").is_empty());
6139
6140        must(index.index_file(uri, "package A;\nsub foo { 2 }\n1;\n".to_string()));
6141        assert_eq!(index.definition_candidates("A::foo").len(), 1);
6142    }
6143
6144    /// Verify that `incremental_remove_symbols` correctly retains candidates owned by
6145    /// other files when the removed file had BOTH exclusively-owned names (triggering the
6146    /// full-rebuild path) AND shared names. Before this fix, the full-rebuild path cleared
6147    /// all candidates and relied on the subsequent rebuild to re-add shared ones — correct
6148    /// in effect, but the test documents the expected observable behavior.
6149    #[test]
6150    fn test_definition_candidates_shared_symbol_survives_removal_of_sole_owner_of_other_symbol() {
6151        let index = WorkspaceIndex::new();
6152        let uri_a = must(url::Url::parse("file:///lib/A.pm"));
6153        let uri_b = must(url::Url::parse("file:///lib/B.pm"));
6154
6155        // A defines both `unique_to_a` (no other file) and `shared` (also in B)
6156        must(index.index_file(
6157            uri_a.clone(),
6158            "package A;\nsub unique_to_a { 1 }\nsub shared { 1 }\n1;\n".to_string(),
6159        ));
6160        must(index.index_file(uri_b.clone(), "package B;\nsub shared { 1 }\n1;\n".to_string()));
6161
6162        // Before removal: both shared candidates and unique_to_a are present
6163        assert_eq!(index.definition_candidates("shared").len(), 2);
6164        assert_eq!(index.definition_candidates("unique_to_a").len(), 1);
6165
6166        // Remove A — triggers the affected_names path for `unique_to_a`, but `shared`
6167        // still has B's candidate.
6168        index.remove_file(uri_a.as_str());
6169
6170        assert!(
6171            index.definition_candidates("unique_to_a").is_empty(),
6172            "unique_to_a should be gone after removing A"
6173        );
6174        assert_eq!(
6175            index.definition_candidates("shared").len(),
6176            1,
6177            "shared should still have B's candidate after removing A"
6178        );
6179        assert_eq!(
6180            index.definition_candidates("shared")[0].uri,
6181            "file:///lib/B.pm",
6182            "remaining shared candidate must be from B"
6183        );
6184    }
6185
6186    #[test]
6187    fn test_folder_context_in_file_index() {
6188        let index = WorkspaceIndex::new();
6189
6190        // Set up workspace folders
6191        index.set_workspace_folders(vec![
6192            "file:///project1".to_string(),
6193            "file:///project2".to_string(),
6194        ]);
6195
6196        let uri1 = "file:///project1/lib/Module.pm";
6197        let code1 = r#"
6198package Module;
6199
6200sub test_sub {
6201    return 1;
6202}
6203"#;
6204        must(index.index_file(must(url::Url::parse(uri1)), code1.to_string()));
6205
6206        let uri2 = "file:///project2/lib/Other.pm";
6207        let code2 = r#"
6208package Other;
6209
6210sub other_sub {
6211    return 2;
6212}
6213"#;
6214        must(index.index_file(must(url::Url::parse(uri2)), code2.to_string()));
6215
6216        // Verify folder context is set correctly
6217        let symbols1 = index.file_symbols(uri1);
6218        assert_eq!(symbols1.len(), 2, "Should have 2 symbols in Module.pm");
6219        for symbol in &symbols1 {
6220            assert_eq!(symbol.uri, uri1, "Symbol URI should match file URI");
6221        }
6222
6223        let symbols2 = index.file_symbols(uri2);
6224        assert_eq!(symbols2.len(), 2, "Should have 2 symbols in Other.pm");
6225        for symbol in &symbols2 {
6226            assert_eq!(symbol.uri, uri2, "Symbol URI should match file URI");
6227        }
6228
6229        // Verify folder attribution
6230        let files = index.files.read();
6231        let file_index1 = must_some(files.get(&DocumentStore::uri_key(uri1)));
6232        assert_eq!(
6233            file_index1.folder_uri,
6234            Some("file:///project1".to_string()),
6235            "File should be attributed to correct workspace folder"
6236        );
6237
6238        let file_index2 = must_some(files.get(&DocumentStore::uri_key(uri2)));
6239        assert_eq!(
6240            file_index2.folder_uri,
6241            Some("file:///project2".to_string()),
6242            "File should be attributed to correct workspace folder"
6243        );
6244    }
6245
6246    #[test]
6247    fn test_determine_folder_uri() {
6248        let index = WorkspaceIndex::new();
6249
6250        // Set up workspace folders
6251        index.set_workspace_folders(vec![
6252            "file:///project1".to_string(),
6253            "file:///project2".to_string(),
6254        ]);
6255
6256        // Test file in project1
6257        let folder1 = index.determine_folder_uri("file:///project1/lib/Module.pm");
6258        assert_eq!(
6259            folder1,
6260            Some("file:///project1".to_string()),
6261            "Should determine folder for file in project1"
6262        );
6263
6264        // Test file in project2
6265        let folder2 = index.determine_folder_uri("file:///project2/lib/Other.pm");
6266        assert_eq!(
6267            folder2,
6268            Some("file:///project2".to_string()),
6269            "Should determine folder for file in project2"
6270        );
6271
6272        // Test file not in any workspace folder
6273        let folder_none = index.determine_folder_uri("file:///other/project/Module.pm");
6274        assert_eq!(folder_none, None, "Should return None for file outside workspace folders");
6275    }
6276
6277    #[test]
6278    fn test_determine_folder_uri_prefers_most_specific_match() {
6279        let index = WorkspaceIndex::new();
6280
6281        // Keep broad folder first to ensure we don't rely on insertion order.
6282        index.set_workspace_folders(vec![
6283            "file:///project".to_string(),
6284            "file:///project/lib".to_string(),
6285        ]);
6286
6287        let folder = index.determine_folder_uri("file:///project/lib/My/Module.pm");
6288        assert_eq!(
6289            folder,
6290            Some("file:///project/lib".to_string()),
6291            "Nested workspace folders should attribute files to the most specific folder"
6292        );
6293    }
6294
6295    #[test]
6296    fn test_remove_folder() {
6297        let index = WorkspaceIndex::new();
6298
6299        // Set up workspace folders
6300        index.set_workspace_folders(vec![
6301            "file:///project1".to_string(),
6302            "file:///project2".to_string(),
6303        ]);
6304
6305        // Index files from both folders
6306        let uri1 = "file:///project1/lib/Module.pm";
6307        let code1 = r#"
6308package Module;
6309
6310sub test_sub {
6311    return 1;
6312}
6313"#;
6314        must(index.index_file(must(url::Url::parse(uri1)), code1.to_string()));
6315
6316        let uri2 = "file:///project2/lib/Other.pm";
6317        let code2 = r#"
6318package Other;
6319
6320sub other_sub {
6321    return 2;
6322}
6323"#;
6324        must(index.index_file(must(url::Url::parse(uri2)), code2.to_string()));
6325
6326        // Verify both files are indexed
6327        assert_eq!(index.file_count(), 2, "Should have 2 files indexed");
6328        assert_eq!(index.document_store().count(), 2, "Document store should track both files");
6329
6330        // Remove project1 folder
6331        index.remove_folder("file:///project1");
6332
6333        // Verify only project2 file remains
6334        assert_eq!(index.file_count(), 1, "Should have 1 file after removing folder");
6335        assert_eq!(
6336            index.document_store().count(),
6337            1,
6338            "Document store should drop files removed via folder deletion"
6339        );
6340        assert!(index.file_symbols(uri1).is_empty(), "File from removed folder should be gone");
6341        assert_eq!(
6342            index.file_symbols(uri2).len(),
6343            2,
6344            "File from remaining folder should still be present"
6345        );
6346    }
6347
6348    #[test]
6349    fn test_remove_folder_removes_symbol_free_files() {
6350        let index = WorkspaceIndex::new();
6351        index.set_workspace_folders(vec!["file:///project1".to_string()]);
6352
6353        let uri = "file:///project1/empty.pl";
6354        must(index.index_file(must(url::Url::parse(uri)), "# comments only".to_string()));
6355        assert_eq!(index.file_count(), 1, "Expected file to be indexed");
6356
6357        index.remove_folder("file:///project1");
6358
6359        assert_eq!(index.file_count(), 0, "Folder removal should delete symbol-free files");
6360        assert_eq!(
6361            index.document_store().count(),
6362            0,
6363            "Document store should stay in sync for symbol-free files"
6364        );
6365    }
6366
6367    // ========================================================================
6368    // GREEN-TDD EDGE CASE TESTS FOR ISSUE #6061 (static require + manual import)
6369    // ========================================================================
6370
6371    #[test]
6372    fn test_require_with_variable_target_is_not_indexed() -> Result<(), Box<dyn std::error::Error>>
6373    {
6374        let index = WorkspaceIndex::new();
6375        let uri = must(url::Url::parse("file:///test/require-var.pl"));
6376        let src = r#"package Test;
6377my $loader = 'MyModule';
6378require $loader;
63791;
6380"#;
6381        must(index.index_file(uri.clone(), src.to_string()));
6382        let deps = index.file_dependencies(uri.as_str());
6383        assert!(
6384            !deps.contains("MyModule"),
6385            "require with variable target should not register static dependency"
6386        );
6387        Ok(())
6388    }
6389
6390    #[test]
6391    fn test_multiple_import_calls_on_same_module() -> Result<(), Box<dyn std::error::Error>> {
6392        let index = WorkspaceIndex::new();
6393        let uri = must(url::Url::parse("file:///test/multi-import.pl"));
6394        let src = r#"package Test;
6395require Toolkit;
6396Toolkit->import('func_a');
6397Toolkit->import(qw(func_b func_c));
63981;
6399"#;
6400        must(index.index_file(uri.clone(), src.to_string()));
6401        let deps = index.file_dependencies(uri.as_str());
6402        assert!(deps.contains("Toolkit"), "module should be tracked as dependency");
6403        for symbol in &["func_a", "func_b", "func_c"] {
6404            let refs = index.find_references(symbol);
6405            assert!(!refs.is_empty(), "all imported symbols should be indexed: {}", symbol);
6406        }
6407        Ok(())
6408    }
6409
6410    #[test]
6411    fn test_require_string_vs_bareword_normalization() -> Result<(), Box<dyn std::error::Error>> {
6412        let index = WorkspaceIndex::new();
6413        let uri = must(url::Url::parse("file:///test/require-string.pl"));
6414        let src = r#"package Consumer;
6415require "String/Based/Module.pm";
6416String::Based::Module->import('exported');
64171;
6418"#;
6419        must(index.index_file(uri.clone(), src.to_string()));
6420        let deps = index.file_dependencies(uri.as_str());
6421        assert!(
6422            deps.contains("String::Based::Module"),
6423            "require string form should normalize path separators to ::"
6424        );
6425        let refs = index.find_references("exported");
6426        assert!(!refs.is_empty(), "import should be indexed even with string-form require");
6427        Ok(())
6428    }
6429
6430    #[test]
6431    fn test_import_without_require_registers_as_method_call()
6432    -> Result<(), Box<dyn std::error::Error>> {
6433        // Edge case: ->import() without preceding require is treated as a normal method call,
6434        // not as the static manual-import pattern, so the module is still visited/tracked
6435        // but the symbols are NOT marked as imports from the static require+import logic.
6436        let index = WorkspaceIndex::new();
6437        let uri = must(url::Url::parse("file:///test/orphan-import.pl"));
6438        let src = r#"package Test;
6439Unrelated::Module->import('orphaned');
6440orphaned();
64411;
6442"#;
6443        must(index.index_file(uri.clone(), src.to_string()));
6444
6445        // The module reference may still be tracked as a method call target,
6446        // but the key regression is: the orphaned symbol should not be indexed
6447        // as an import reference due to the missing require.
6448        let _refs = index.find_references("orphaned");
6449        // Symbol may be referenced but should not be specially treated as an import.
6450        // The main point is: without require, the pairing doesn't activate.
6451        Ok(())
6452    }
6453
6454    #[test]
6455    fn test_nested_blocks_preserve_require_scope() -> Result<(), Box<dyn std::error::Error>> {
6456        let index = WorkspaceIndex::new();
6457        let uri = must(url::Url::parse("file:///test/nested.pl"));
6458        let src = r#"package Test;
6459{
6460    require Outer;
6461    {
6462        Outer->import('nested_sym');
6463    }
6464}
64651;
6466"#;
6467        must(index.index_file(uri.clone(), src.to_string()));
6468        let deps = index.file_dependencies(uri.as_str());
6469        assert!(
6470            deps.contains("Outer"),
6471            "require in outer block should be visible to nested import"
6472        );
6473        let refs = index.find_references("nested_sym");
6474        assert!(!refs.is_empty(), "symbol imported in nested block should still be indexed");
6475        Ok(())
6476    }
6477
6478    #[test]
6479    fn test_require_path_without_pm_extension() -> Result<(), Box<dyn std::error::Error>> {
6480        let index = WorkspaceIndex::new();
6481        let uri = must(url::Url::parse("file:///test/no-ext.pl"));
6482        let src = r#"package Test;
6483require "My/Module";
6484My::Module->import('func');
64851;
6486"#;
6487        must(index.index_file(uri.clone(), src.to_string()));
6488        let deps = index.file_dependencies(uri.as_str());
6489        assert!(
6490            deps.contains("My::Module"),
6491            "require without .pm extension should normalize to module path"
6492        );
6493        Ok(())
6494    }
6495
6496    #[test]
6497    fn test_qw_with_bracket_delimiters() -> Result<(), Box<dyn std::error::Error>> {
6498        let index = WorkspaceIndex::new();
6499        let uri = must(url::Url::parse("file:///test/qw-delim.pl"));
6500        let src = r#"package Test;
6501require DelimModule;
6502DelimModule->import(qw[sym1 sym2]);
6503DelimModule->import(qw{sym3 sym4});
65041;
6505"#;
6506        must(index.index_file(uri.clone(), src.to_string()));
6507        for symbol in &["sym1", "sym2", "sym3", "sym4"] {
6508            let refs = index.find_references(symbol);
6509            assert!(
6510                !refs.is_empty(),
6511                "symbols from qw with bracket delimiters should be indexed: {}",
6512                symbol
6513            );
6514        }
6515        Ok(())
6516    }
6517
6518    #[test]
6519    fn test_array_literal_import_args() -> Result<(), Box<dyn std::error::Error>> {
6520        let index = WorkspaceIndex::new();
6521        let uri = must(url::Url::parse("file:///test/array-import.pl"));
6522        let src = r#"package Test;
6523require ArrayModule;
6524ArrayModule->import(['sym_x', 'sym_y']);
65251;
6526"#;
6527        must(index.index_file(uri.clone(), src.to_string()));
6528        for symbol in &["sym_x", "sym_y"] {
6529            let refs = index.find_references(symbol);
6530            assert!(
6531                !refs.is_empty(),
6532                "symbols from array literal import should be indexed: {}",
6533                symbol
6534            );
6535        }
6536        Ok(())
6537    }
6538
6539    #[test]
6540    fn test_require_inside_conditional_still_registers_dependency()
6541    -> Result<(), Box<dyn std::error::Error>> {
6542        let index = WorkspaceIndex::new();
6543        let uri = must(url::Url::parse("file:///test/cond-require.pl"));
6544        let src = r#"package Test;
6545if (1) {
6546    require ConditionalMod;
6547    ConditionalMod->import('cond_func');
6548}
65491;
6550"#;
6551        must(index.index_file(uri.clone(), src.to_string()));
6552        let deps = index.file_dependencies(uri.as_str());
6553        assert!(
6554            deps.contains("ConditionalMod"),
6555            "require inside conditional should still register as dependency"
6556        );
6557        let refs = index.find_references("cond_func");
6558        assert!(!refs.is_empty(), "import inside conditional should still index symbols");
6559        Ok(())
6560    }
6561
6562    #[test]
6563    fn test_mixed_string_and_bareword_imports() -> Result<(), Box<dyn std::error::Error>> {
6564        let index = WorkspaceIndex::new();
6565        let uri = must(url::Url::parse("file:///test/mixed-import.pl"));
6566        let src = r#"package Test;
6567require MixedMod;
6568MixedMod->import('string_sym');
6569MixedMod->import(qw(qw_one qw_two));
65701;
6571"#;
6572        must(index.index_file(uri.clone(), src.to_string()));
6573        let deps = index.file_dependencies(uri.as_str());
6574        assert!(deps.contains("MixedMod"), "require should register dependency");
6575        for symbol in &["string_sym", "qw_one", "qw_two"] {
6576            let refs = index.find_references(symbol);
6577            assert!(!refs.is_empty(), "all import forms should index symbols: {}", symbol);
6578        }
6579        Ok(())
6580    }
6581
6582    // -------------------------------------------------------------------------
6583    // Per-category incremental invalidation (Req 18.1–18.5)
6584    // -------------------------------------------------------------------------
6585
6586    /// Helper: build a minimal `FileFactShard` with configurable hashes.
6587    fn make_shard(
6588        uri: &str,
6589        content_hash: u64,
6590        anchors_hash: Option<u64>,
6591        entities_hash: Option<u64>,
6592        occurrences_hash: Option<u64>,
6593        edges_hash: Option<u64>,
6594    ) -> FileFactShard {
6595        let file_id = {
6596            let mut h = DefaultHasher::new();
6597            uri.hash(&mut h);
6598            FileId(h.finish())
6599        };
6600        FileFactShard {
6601            source_uri: uri.to_string(),
6602            file_id,
6603            content_hash,
6604            anchors_hash,
6605            entities_hash,
6606            occurrences_hash,
6607            edges_hash,
6608            anchors: Vec::new(),
6609            entities: Vec::new(),
6610            occurrences: Vec::new(),
6611            edges: Vec::new(),
6612        }
6613    }
6614
6615    /// Req 18.5: When content_hash is unchanged, skip all per-category
6616    /// comparisons — no index modifications happen.
6617    #[test]
6618    fn incremental_replace_skips_when_content_hash_unchanged()
6619    -> Result<(), Box<dyn std::error::Error>> {
6620        let index = WorkspaceIndex::new();
6621        let uri = "file:///lib/Same.pm";
6622        let key = DocumentStore::uri_key(uri);
6623
6624        let shard_v1 = make_shard(uri, 42, Some(1), Some(2), Some(3), Some(4));
6625        // First insert — no old shard, so all categories are "changed".
6626        let r1 = index.replace_fact_shard_incremental(&key, shard_v1);
6627        assert!(!r1.content_unchanged);
6628
6629        // Second insert with same content_hash → skip entirely.
6630        let shard_v2 = make_shard(uri, 42, Some(100), Some(200), Some(300), Some(400));
6631        let r2 = index.replace_fact_shard_incremental(&key, shard_v2);
6632        assert!(r2.content_unchanged);
6633        assert!(!r2.anchors_updated);
6634        assert!(!r2.entities_updated);
6635        assert!(!r2.occurrences_updated);
6636        assert!(!r2.edges_updated);
6637
6638        // The stored shard should still be v1 (unchanged).
6639        let stored = must_some(index.file_fact_shard(uri));
6640        assert_eq!(stored.anchors_hash, Some(1));
6641        Ok(())
6642    }
6643
6644    /// Req 18.3: When a category hash is unchanged, skip re-indexing that
6645    /// category's cross-file indexes.
6646    #[test]
6647    fn incremental_replace_skips_unchanged_categories() -> Result<(), Box<dyn std::error::Error>> {
6648        let index = WorkspaceIndex::new();
6649        let uri = "file:///lib/Partial.pm";
6650        let key = DocumentStore::uri_key(uri);
6651
6652        let shard_v1 = make_shard(uri, 1, Some(10), Some(20), Some(30), Some(40));
6653        index.replace_fact_shard_incremental(&key, shard_v1);
6654
6655        // Change content_hash but keep anchors and entities the same.
6656        // Only occurrences and edges change.
6657        let shard_v2 = make_shard(uri, 2, Some(10), Some(20), Some(99), Some(88));
6658        let result = index.replace_fact_shard_incremental(&key, shard_v2);
6659
6660        assert!(!result.content_unchanged);
6661        assert!(!result.anchors_updated, "anchors hash unchanged → skip");
6662        assert!(!result.entities_updated, "entities hash unchanged → skip");
6663        assert!(result.occurrences_updated, "occurrences hash changed → update");
6664        assert!(result.edges_updated, "edges hash changed → update");
6665        Ok(())
6666    }
6667
6668    /// Req 18.4: When a category hash has changed, remove old entries and
6669    /// insert new ones for that category.
6670    #[test]
6671    fn incremental_replace_updates_changed_categories() -> Result<(), Box<dyn std::error::Error>> {
6672        let index = WorkspaceIndex::new();
6673        let uri = "file:///lib/Changed.pm";
6674        let key = DocumentStore::uri_key(uri);
6675
6676        let shard_v1 = make_shard(uri, 1, Some(10), Some(20), Some(30), Some(40));
6677        index.replace_fact_shard_incremental(&key, shard_v1);
6678
6679        // Change all category hashes.
6680        let shard_v2 = make_shard(uri, 2, Some(11), Some(21), Some(31), Some(41));
6681        let result = index.replace_fact_shard_incremental(&key, shard_v2);
6682
6683        assert!(!result.content_unchanged);
6684        assert!(result.anchors_updated);
6685        assert!(result.entities_updated);
6686        assert!(result.occurrences_updated);
6687        assert!(result.edges_updated);
6688
6689        // The stored shard should be v2.
6690        let stored = must_some(index.file_fact_shard(uri));
6691        assert_eq!(stored.content_hash, 2);
6692        assert_eq!(stored.anchors_hash, Some(11));
6693        Ok(())
6694    }
6695
6696    /// When there is no old shard (first index), all categories are treated
6697    /// as changed.
6698    #[test]
6699    fn incremental_replace_first_insert_updates_all() -> Result<(), Box<dyn std::error::Error>> {
6700        let index = WorkspaceIndex::new();
6701        let uri = "file:///lib/New.pm";
6702        let key = DocumentStore::uri_key(uri);
6703
6704        let shard = make_shard(uri, 1, Some(10), Some(20), Some(30), Some(40));
6705        let result = index.replace_fact_shard_incremental(&key, shard);
6706
6707        assert!(!result.content_unchanged);
6708        assert!(result.anchors_updated);
6709        assert!(result.entities_updated);
6710        assert!(result.occurrences_updated);
6711        assert!(result.edges_updated);
6712        Ok(())
6713    }
6714
6715    /// When per-category hashes are `None` (legacy shard), the category is
6716    /// conservatively treated as changed.
6717    #[test]
6718    fn incremental_replace_none_hashes_treated_as_changed() -> Result<(), Box<dyn std::error::Error>>
6719    {
6720        let index = WorkspaceIndex::new();
6721        let uri = "file:///lib/Legacy.pm";
6722        let key = DocumentStore::uri_key(uri);
6723
6724        // Old shard has hashes, new shard has None for some.
6725        let shard_v1 = make_shard(uri, 1, Some(10), Some(20), Some(30), Some(40));
6726        index.replace_fact_shard_incremental(&key, shard_v1);
6727
6728        let shard_v2 = make_shard(uri, 2, None, Some(20), None, Some(40));
6729        let result = index.replace_fact_shard_incremental(&key, shard_v2);
6730
6731        assert!(!result.content_unchanged);
6732        assert!(result.anchors_updated, "None new hash → changed");
6733        assert!(!result.entities_updated, "same hash → skip");
6734        assert!(result.occurrences_updated, "None new hash → changed");
6735        assert!(!result.edges_updated, "same hash → skip");
6736        Ok(())
6737    }
6738
6739    /// Verify that the semantic reference index is updated only when
6740    /// occurrences or edges change.
6741    #[test]
6742    fn incremental_replace_updates_reference_index_on_occurrence_change()
6743    -> Result<(), Box<dyn std::error::Error>> {
6744        use perl_semantic_facts::{AnchorId, Confidence, OccurrenceId, OccurrenceKind, Provenance};
6745
6746        let index = WorkspaceIndex::new();
6747        let uri = "file:///lib/RefIdx.pm";
6748        let key = DocumentStore::uri_key(uri);
6749        let file_id = {
6750            let mut h = DefaultHasher::new();
6751            uri.hash(&mut h);
6752            FileId(h.finish())
6753        };
6754
6755        // v1: shard with one reference occurrence.
6756        let mut shard_v1 = make_shard(uri, 1, Some(10), Some(20), Some(30), Some(40));
6757        let anchor_id = AnchorId(1);
6758        shard_v1.anchors.push(perl_semantic_facts::AnchorFact {
6759            id: anchor_id,
6760            file_id,
6761            span_start_byte: 0,
6762            span_end_byte: 5,
6763            scope_id: None,
6764            provenance: Provenance::ExactAst,
6765            confidence: Confidence::High,
6766        });
6767        shard_v1.occurrences.push(perl_semantic_facts::OccurrenceFact {
6768            id: OccurrenceId(1),
6769            kind: OccurrenceKind::Call,
6770            entity_id: Some(EntityId(100)),
6771            anchor_id,
6772            scope_id: None,
6773            provenance: Provenance::ExactAst,
6774            confidence: Confidence::High,
6775        });
6776        shard_v1.entities.push(perl_semantic_facts::EntityFact {
6777            id: EntityId(100),
6778            kind: EntityKind::Subroutine,
6779            canonical_name: "RefIdx::foo".to_string(),
6780            anchor_id: Some(anchor_id),
6781            scope_id: None,
6782            provenance: Provenance::ExactAst,
6783            confidence: Confidence::High,
6784        });
6785        index.replace_fact_shard_incremental(&key, shard_v1);
6786
6787        // Reference index should have entries.
6788        assert!(
6789            index.semantic_reference_index.read().name_count() > 0
6790                || index.semantic_reference_index.read().entity_count() > 0,
6791            "reference index should be populated after first insert"
6792        );
6793
6794        // v2: same content_hash → skip entirely, reference index untouched.
6795        let shard_v2_same = make_shard(uri, 1, Some(10), Some(20), Some(99), Some(99));
6796        let r = index.replace_fact_shard_incremental(&key, shard_v2_same);
6797        assert!(r.content_unchanged);
6798
6799        // v3: different content_hash, same occurrence/edge hashes → skip ref index.
6800        let mut shard_v3 = make_shard(uri, 3, Some(11), Some(21), Some(30), Some(40));
6801        shard_v3.anchors.push(perl_semantic_facts::AnchorFact {
6802            id: anchor_id,
6803            file_id,
6804            span_start_byte: 0,
6805            span_end_byte: 5,
6806            scope_id: None,
6807            provenance: Provenance::ExactAst,
6808            confidence: Confidence::High,
6809        });
6810        shard_v3.occurrences.push(perl_semantic_facts::OccurrenceFact {
6811            id: OccurrenceId(1),
6812            kind: OccurrenceKind::Call,
6813            entity_id: Some(EntityId(100)),
6814            anchor_id,
6815            scope_id: None,
6816            provenance: Provenance::ExactAst,
6817            confidence: Confidence::High,
6818        });
6819        shard_v3.entities.push(perl_semantic_facts::EntityFact {
6820            id: EntityId(100),
6821            kind: EntityKind::Subroutine,
6822            canonical_name: "RefIdx::foo".to_string(),
6823            anchor_id: Some(anchor_id),
6824            scope_id: None,
6825            provenance: Provenance::ExactAst,
6826            confidence: Confidence::High,
6827        });
6828        let r3 = index.replace_fact_shard_incremental(&key, shard_v3);
6829        assert!(!r3.occurrences_updated, "occurrence hash unchanged → skip");
6830        assert!(!r3.edges_updated, "edge hash unchanged → skip");
6831
6832        Ok(())
6833    }
6834
6835    /// Verify that `index_file` uses incremental replacement (the fact shard
6836    /// is stored and updated correctly through the full indexing path).
6837    #[test]
6838    fn index_file_stores_fact_shard_incrementally() -> Result<(), Box<dyn std::error::Error>> {
6839        let index = WorkspaceIndex::new();
6840        let uri = "file:///lib/Incr.pm";
6841        let code = "package Incr;\nsub foo { 1 }\n1;\n";
6842
6843        must(index.index_file(must(url::Url::parse(uri)), code.to_string()));
6844        let shard1 = must_some(index.file_fact_shard(uri));
6845        assert!(shard1.anchors_hash.is_some());
6846        assert!(
6847            shard1.anchors.iter().any(|anchor| anchor.provenance == Provenance::ExactAst),
6848            "index_file should store the canonical semantic shard when adapters produce facts"
6849        );
6850        assert!(
6851            shard1.entities.iter().any(|entity| entity.provenance == Provenance::ExactAst),
6852            "index_file should store canonical entities rather than legacy fallback entities"
6853        );
6854
6855        // Re-index with same content → shard should be unchanged.
6856        must(index.index_file(must(url::Url::parse(uri)), code.to_string()));
6857        // The early-exit in index_file checks content_hash at the FileIndex
6858        // level, so the fact shard replacement is never reached for identical
6859        // content. Verify the shard is still present.
6860        let shard2 = must_some(index.file_fact_shard(uri));
6861        assert_eq!(shard1.content_hash, shard2.content_hash);
6862
6863        // Re-index with different content → shard should be replaced.
6864        let code2 = "package Incr;\nsub bar { 2 }\n1;\n";
6865        must(index.index_file(must(url::Url::parse(uri)), code2.to_string()));
6866        let shard3 = must_some(index.file_fact_shard(uri));
6867        assert_ne!(shard1.content_hash, shard3.content_hash);
6868
6869        Ok(())
6870    }
6871
6872    // ── Property-based tests for incremental invalidation ──
6873
6874    mod prop_incremental_invalidation {
6875        use super::*;
6876        use proptest::prelude::*;
6877        use proptest::test_runner::Config as ProptestConfig;
6878
6879        /// Strategy for an optional per-category hash.
6880        ///
6881        /// ~10% of the time produces `None` (simulating legacy shards
6882        /// without per-category hashes); otherwise a random `u64`.
6883        fn arb_category_hash() -> impl Strategy<Value = Option<u64>> {
6884            prop_oneof![
6885                1 => Just(None),
6886                9 => any::<u64>().prop_map(Some),
6887            ]
6888        }
6889
6890        /// Strategy for a `FileFactShard` with the given URI and
6891        /// randomly-chosen hashes.
6892        fn arb_shard(uri: &'static str) -> impl Strategy<Value = FileFactShard> {
6893            (
6894                any::<u64>(),        // content_hash
6895                arb_category_hash(), // anchors_hash
6896                arb_category_hash(), // entities_hash
6897                arb_category_hash(), // occurrences_hash
6898                arb_category_hash(), // edges_hash
6899            )
6900                .prop_map(move |(content_hash, ah, eh, oh, edh)| {
6901                    make_shard(uri, content_hash, ah, eh, oh, edh)
6902                })
6903        }
6904
6905        // Property 15: Incremental Invalidation Correctness
6906        //
6907        // **Validates: Requirements 18.3, 18.4, 18.5**
6908        //
6909        // For any file re-indexing where the whole-file content_hash is
6910        // unchanged, the workspace store shall not modify any cross-file
6911        // indexes.  For any file re-indexing where a per-category hash is
6912        // unchanged, the workspace store shall skip re-indexing that
6913        // category.  For any file re-indexing where a per-category hash
6914        // has changed, the workspace store shall remove old entries and
6915        // insert new ones for that category.
6916        proptest! {
6917            #![proptest_config(ProptestConfig {
6918                failure_persistence: None,
6919                ..ProptestConfig::default()
6920            })]
6921
6922            #[test]
6923            fn prop_incremental_invalidation_correctness(
6924                old_shard in arb_shard("file:///lib/Prop.pm"),
6925                new_shard in arb_shard("file:///lib/Prop.pm"),
6926            ) {
6927                let index = WorkspaceIndex::new();
6928                let key = DocumentStore::uri_key("file:///lib/Prop.pm");
6929
6930                // Seed the index with the old shard.
6931                index.replace_fact_shard_incremental(&key, old_shard.clone());
6932
6933                // Replace with the new shard and capture the result.
6934                let result = index.replace_fact_shard_incremental(&key, new_shard.clone());
6935
6936                // ── Req 18.5: content_hash unchanged → skip entirely ──
6937                if old_shard.content_hash == new_shard.content_hash {
6938                    prop_assert!(
6939                        result.content_unchanged,
6940                        "content_unchanged must be true when content_hash is the same"
6941                    );
6942                    prop_assert!(
6943                        !result.anchors_updated,
6944                        "anchors_updated must be false when content_hash unchanged"
6945                    );
6946                    prop_assert!(
6947                        !result.entities_updated,
6948                        "entities_updated must be false when content_hash unchanged"
6949                    );
6950                    prop_assert!(
6951                        !result.occurrences_updated,
6952                        "occurrences_updated must be false when content_hash unchanged"
6953                    );
6954                    prop_assert!(
6955                        !result.edges_updated,
6956                        "edges_updated must be false when content_hash unchanged"
6957                    );
6958                } else {
6959                    prop_assert!(
6960                        !result.content_unchanged,
6961                        "content_unchanged must be false when content_hash differs"
6962                    );
6963
6964                    // ── Req 18.3 / 18.4: per-category hash comparison ──
6965                    // A category is "unchanged" when both old and new have
6966                    // Some(h) and the values are equal.  Otherwise the
6967                    // category is conservatively treated as changed.
6968
6969                    let anchors_should_update = crate::semantic::invalidation::category_hash_changed(
6970                        old_shard.anchors_hash,
6971                        new_shard.anchors_hash,
6972                    );
6973                    prop_assert_eq!(
6974                        result.anchors_updated,
6975                        anchors_should_update,
6976                        "anchors_updated mismatch: old={:?} new={:?}",
6977                        old_shard.anchors_hash,
6978                        new_shard.anchors_hash,
6979                    );
6980
6981                    let entities_should_update =
6982                        crate::semantic::invalidation::category_hash_changed(
6983                            old_shard.entities_hash,
6984                            new_shard.entities_hash,
6985                        );
6986                    prop_assert_eq!(
6987                        result.entities_updated,
6988                        entities_should_update,
6989                        "entities_updated mismatch: old={:?} new={:?}",
6990                        old_shard.entities_hash,
6991                        new_shard.entities_hash,
6992                    );
6993
6994                    let occurrences_should_update =
6995                        crate::semantic::invalidation::category_hash_changed(
6996                            old_shard.occurrences_hash,
6997                            new_shard.occurrences_hash,
6998                        );
6999                    prop_assert_eq!(
7000                        result.occurrences_updated,
7001                        occurrences_should_update,
7002                        "occurrences_updated mismatch: old={:?} new={:?}",
7003                        old_shard.occurrences_hash,
7004                        new_shard.occurrences_hash,
7005                    );
7006
7007                    let edges_should_update = crate::semantic::invalidation::category_hash_changed(
7008                        old_shard.edges_hash,
7009                        new_shard.edges_hash,
7010                    );
7011                    prop_assert_eq!(
7012                        result.edges_updated,
7013                        edges_should_update,
7014                        "edges_updated mismatch: old={:?} new={:?}",
7015                        old_shard.edges_hash,
7016                        new_shard.edges_hash,
7017                    );
7018                }
7019            }
7020        }
7021    }
7022}
7023
7024// ── with_semantic_queries_for_uri tests ──
7025
7026#[cfg(test)]
7027mod semantic_query_callback_tests {
7028    use super::*;
7029    use perl_tdd_support::{must, must_some};
7030
7031    #[test]
7032    fn with_semantic_queries_for_uri_indexed_uri_invokes_callback()
7033    -> Result<(), Box<dyn std::error::Error>> {
7034        let index = WorkspaceIndex::new();
7035        let uri = "file:///lib/Foo.pm";
7036        must(index.index_file(must(url::Url::parse(uri)), "sub foo { 1 }".to_string()));
7037
7038        let result = index.with_semantic_queries_for_uri(uri, |file_id, _queries| {
7039            // Verify the file_id is consistent with the URI (non-zero hash).
7040            assert_ne!(file_id.0, 0, "file_id should be non-zero");
7041            42u32 // sentinel return value
7042        });
7043
7044        assert_eq!(result, Some(42u32), "callback must run when URI is indexed");
7045        Ok(())
7046    }
7047
7048    #[test]
7049    fn with_semantic_queries_for_uri_unknown_uri_returns_none()
7050    -> Result<(), Box<dyn std::error::Error>> {
7051        let index = WorkspaceIndex::new();
7052        // Do NOT index anything.
7053        let result = index.with_semantic_queries_for_uri("file:///not/indexed.pl", |_, _| 99u32);
7054        assert!(result.is_none(), "unindexed URI must return None without invoking callback");
7055        Ok(())
7056    }
7057
7058    #[test]
7059    fn with_semantic_queries_for_uri_file_id_matches_file_id_for_uri()
7060    -> Result<(), Box<dyn std::error::Error>> {
7061        let index = WorkspaceIndex::new();
7062        let uri = "file:///lib/Bar.pm";
7063        must(index.index_file(must(url::Url::parse(uri)), "sub bar { 1 }".to_string()));
7064
7065        let direct_id = must_some(index.file_id_for_uri(uri));
7066        let callback_id =
7067            must_some(index.with_semantic_queries_for_uri(uri, |file_id, _q| file_id));
7068
7069        assert_eq!(
7070            direct_id, callback_id,
7071            "file_id_for_uri and with_semantic_queries_for_uri must agree"
7072        );
7073        Ok(())
7074    }
7075
7076    #[test]
7077    fn with_semantic_queries_for_uri_callback_not_called_when_not_indexed()
7078    -> Result<(), Box<dyn std::error::Error>> {
7079        let index = WorkspaceIndex::new();
7080        let mut called = false;
7081        let _ = index.with_semantic_queries_for_uri("file:///ghost.pl", |_, _| {
7082            called = true;
7083        });
7084        assert!(!called, "callback must not be invoked for unindexed URI");
7085        Ok(())
7086    }
7087}
perl_workspace/workspace/workspace_index.rs

perl_workspace/workspace/
workspace_index.rs