Skip to main content

perl_workspace/workspace/
workspace_index.rs

1//! Workspace-wide symbol index for fast cross-file lookups in Perl LSP.
2//!
3//! This module provides efficient indexing of symbols across an entire Perl workspace,
4//! enabling enterprise-grade features like find-references, rename refactoring, and
5//! workspace symbol search with ≤1ms response times.
6//!
7//! # LSP Workflow Integration
8//!
9//! Core component in the Parse → Index → Navigate → Complete → Analyze pipeline:
10//! 1. **Parse**: AST generation from Perl source files
11//! 2. **Index**: Workspace symbol table construction with dual indexing strategy
12//! 3. **Navigate**: Cross-file symbol resolution and go-to-definition
13//! 4. **Complete**: Context-aware completion with workspace symbol awareness
14//! 5. **Analyze**: Cross-reference analysis and workspace refactoring operations
15//!
16//! # Performance Characteristics
17//!
18//! - **Symbol indexing**: O(n) where n is total workspace symbols
19//! - **Symbol lookup**: O(1) average with hash table indexing
20//! - **Cross-file queries**: <50μs for typical workspace sizes
21//! - **Memory usage**: ~1MB per 10K symbols with optimized storage
22//! - **Incremental updates**: ≤1ms for file-level symbol changes
23//! - **Large workspace scaling**: Designed to scale to 50K+ files and large codebases
24//! - **Benchmark targets**: <50μs lookups and ≤1ms incremental updates at scale
25//!
26//! # Dual Indexing Strategy
27//!
28//! Implements dual indexing for comprehensive Perl symbol resolution:
29//! - **Qualified names**: `Package::function` for explicit references
30//! - **Bare names**: `function` for context-dependent resolution
31//! - **98% reference coverage**: Handles both qualified and unqualified calls
32//! - **Automatic deduplication**: Prevents duplicate results in queries
33//!
34//! # Usage Examples
35//!
36//! ```rust
37//! use perl_workspace::workspace::workspace_index::WorkspaceIndex;
38//! use url::Url;
39//!
40//! # fn main() -> Result<(), Box<dyn std::error::Error>> {
41//! let index = WorkspaceIndex::new();
42//!
43//! // Index a Perl file
44//! let uri = Url::parse("file:///example.pl")?;
45//! let code = "package MyPackage;\nsub example { return 42; }";
46//! index.index_file(uri, code.to_string())?;
47//!
48//! // Find symbol definitions
49//! let definition = index.find_definition("MyPackage::example");
50//! assert!(definition.is_some());
51//!
52//! // Workspace symbol search
53//! let symbols = index.find_symbols("example");
54//! assert!(!symbols.is_empty());
55//! # Ok(())
56//! # }
57//! ```
58//!
59//! # Related Modules
60//!
61//! See also the symbol extraction, reference finding, and semantic token classification
62//! modules in the workspace index implementation.
63
64use crate::Parser;
65use crate::ast::{Node, NodeKind};
66use crate::document_store::{Document, DocumentStore};
67use crate::position::{Position, Range};
68use crate::workspace::monitoring::IndexInstrumentation;
69use parking_lot::RwLock;
70use perl_position_tracking::{WireLocation, WirePosition, WireRange};
71use perl_semantic_facts::{
72    AnchorFact, AnchorId, Confidence, EdgeFact, EntityFact, EntityId, EntityKind, FileId,
73    Provenance,
74};
75use serde::{Deserialize, Serialize};
76use std::collections::hash_map::DefaultHasher;
77use std::collections::{HashMap, HashSet};
78use std::hash::{Hash, Hasher};
79use std::path::Path;
80use std::sync::Arc;
81use std::time::Instant;
82use url::Url;
83
84use crate::semantic::imports::ImportExportIndex;
85pub use crate::semantic::invalidation::ShardReplaceResult;
86use crate::semantic::invalidation::{ShardCategoryHashes, plan_shard_replacement};
87use crate::semantic::references::ReferenceIndex;
88pub use crate::workspace::monitoring::{
89    DegradationReason, EarlyExitReason, EarlyExitRecord, IndexInstrumentationSnapshot,
90    IndexMetrics, IndexPerformanceCaps, IndexPhase, IndexPhaseTransition, IndexResourceLimits,
91    IndexStateKind, IndexStateTransition, ResourceKind,
92};
93use perl_symbol::surface::decl::extract_symbol_decls;
94use perl_symbol::surface::facts::{symbol_decls_to_semantic_facts, symbol_refs_to_semantic_facts};
95use perl_symbol::surface::r#ref::extract_symbol_refs;
96
97// Re-export URI utilities for backward compatibility
98#[cfg(not(target_arch = "wasm32"))]
99/// URI ↔ filesystem helpers used during Index/Analyze workflows.
100pub use perl_uri::{fs_path_to_uri, uri_to_fs_path};
101/// URI inspection helpers used during Index/Analyze workflows.
102pub use perl_uri::{is_file_uri, is_special_scheme, uri_extension, uri_key};
103
104// ============================================================================
105// Index Lifecycle Types (Index Lifecycle v1 Specification)
106// ============================================================================
107
108/// Index readiness state - explicit lifecycle management
109///
110/// Represents the current operational state of the workspace index, enabling
111/// LSP handlers to provide appropriate responses based on index availability.
112/// This state machine prevents blocking operations and ensures graceful
113/// degradation when the index is not fully ready.
114///
115/// # State Transitions
116///
117/// - `Building` → `Ready`: Workspace scan completes successfully
118/// - `Building` → `Degraded`: Scan timeout, IO error, or resource limit
119/// - `Ready` → `Building`: Workspace folder change or file watching events
120/// - `Ready` → `Degraded`: Parse storm (>10 pending) or IO error
121/// - `Degraded` → `Building`: Recovery attempt after cooldown
122/// - `Degraded` → `Ready`: Successful re-scan after recovery
123///
124/// # Invariants
125///
126/// - During a single build attempt, `phase` advances monotonically
127///   (`Idle` → `Scanning` → `Indexing`).
128/// - `indexed_count` must not exceed `total_count`; callers should keep totals updated.
129/// - `Ready` and `Degraded` counts are snapshots captured at transition time.
130///
131/// # Usage
132///
133/// ```rust,ignore
134/// use perl_parser::workspace_index::{IndexPhase, IndexState};
135/// use std::time::Instant;
136///
137/// let state = IndexState::Building {
138///     phase: IndexPhase::Indexing,
139///     indexed_count: 50,
140///     total_count: 100,
141///     started_at: Instant::now(),
142/// };
143/// ```
144#[derive(Clone, Debug)]
145pub enum IndexState {
146    /// Index is being constructed (workspace scan in progress)
147    Building {
148        /// Current build phase (Idle → Scanning → Indexing)
149        phase: IndexPhase,
150        /// Files indexed so far
151        indexed_count: usize,
152        /// Total files discovered
153        total_count: usize,
154        /// Started at
155        started_at: Instant,
156    },
157
158    /// Index is consistent and ready for queries
159    Ready {
160        /// Total symbols indexed
161        symbol_count: usize,
162        /// Total files indexed
163        file_count: usize,
164        /// Timestamp of last successful index
165        completed_at: Instant,
166    },
167
168    /// Index is serving but degraded
169    Degraded {
170        /// Why we degraded
171        reason: DegradationReason,
172        /// What's still available
173        available_symbols: usize,
174        /// When degradation occurred
175        since: Instant,
176    },
177}
178
179impl IndexState {
180    /// Return the coarse state kind for instrumentation and routing decisions
181    pub fn kind(&self) -> IndexStateKind {
182        match self {
183            IndexState::Building { .. } => IndexStateKind::Building,
184            IndexState::Ready { .. } => IndexStateKind::Ready,
185            IndexState::Degraded { .. } => IndexStateKind::Degraded,
186        }
187    }
188
189    /// Return the current build phase when in `Building` state
190    pub fn phase(&self) -> Option<IndexPhase> {
191        match self {
192            IndexState::Building { phase, .. } => Some(*phase),
193            _ => None,
194        }
195    }
196
197    /// Timestamp of when the current state began
198    pub fn state_started_at(&self) -> Instant {
199        match self {
200            IndexState::Building { started_at, .. } => *started_at,
201            IndexState::Ready { completed_at, .. } => *completed_at,
202            IndexState::Degraded { since, .. } => *since,
203        }
204    }
205}
206
207/// Coordinates index lifecycle, state transitions, and handler queries
208///
209/// The IndexCoordinator wraps `WorkspaceIndex` with explicit state management,
210/// enabling LSP handlers to query the index readiness and implement appropriate
211/// fallback behavior when the index is not fully ready.
212///
213/// # Architecture
214///
215/// ```text
216/// LspServer
217///   └── IndexCoordinator
218///         ├── state: Arc<RwLock<IndexState>>
219///         ├── index: Arc<WorkspaceIndex>
220///         ├── limits: IndexResourceLimits
221///         ├── caps: IndexPerformanceCaps
222///         ├── metrics: IndexMetrics
223///         └── instrumentation: IndexInstrumentation
224/// ```
225///
226/// # State Management
227///
228/// The coordinator manages three states:
229/// - `Building`: Initial scan or recovery in progress
230/// - `Ready`: Fully indexed and available for queries
231/// - `Degraded`: Available but with reduced functionality
232///
233/// # Performance Characteristics
234///
235/// - State checks are lock-free reads (cloned state, <100ns)
236/// - State transitions use write locks (rare, <1μs)
237/// - Query dispatch has zero overhead in Ready state
238/// - Degradation detection is atomic (<10ns per check)
239///
240/// # Usage
241///
242/// ```rust,ignore
243/// use perl_parser::workspace_index::{IndexCoordinator, IndexState};
244///
245/// let coordinator = IndexCoordinator::new();
246/// assert!(matches!(coordinator.state(), IndexState::Building { .. }));
247///
248/// // Transition to ready after indexing
249/// coordinator.transition_to_ready(100, 5000);
250/// assert!(matches!(coordinator.state(), IndexState::Ready { .. }));
251///
252/// // Query with degradation handling
253/// let _result = coordinator.query(
254///     |index| index.find_definition("my_function"), // full query
255///     |_index| None                                 // partial fallback
256/// );
257/// ```
258pub struct IndexCoordinator {
259    /// Current index state (RwLock for state transitions)
260    state: Arc<RwLock<IndexState>>,
261
262    /// The actual workspace index
263    index: Arc<WorkspaceIndex>,
264
265    /// Resource limits configuration
266    ///
267    /// Enforces bounded resource usage to prevent unbounded memory growth:
268    /// - max_files: Triggers degradation when file count exceeds limit
269    /// - max_total_symbols: Triggers degradation when symbol count exceeds limit
270    /// - max_symbols_per_file: Used for per-file validation during indexing
271    limits: IndexResourceLimits,
272
273    /// Performance caps for early-exit heuristics
274    caps: IndexPerformanceCaps,
275
276    /// Runtime metrics for degradation detection
277    metrics: IndexMetrics,
278
279    /// Instrumentation for lifecycle transitions and durations
280    instrumentation: IndexInstrumentation,
281}
282
283impl std::fmt::Debug for IndexCoordinator {
284    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
285        f.debug_struct("IndexCoordinator")
286            .field("state", &*self.state.read())
287            .field("limits", &self.limits)
288            .field("caps", &self.caps)
289            .finish_non_exhaustive()
290    }
291}
292
293impl IndexCoordinator {
294    /// Create a new coordinator in Building state
295    ///
296    /// Initializes the coordinator with default resource limits and
297    /// an empty workspace index ready for initial scan.
298    ///
299    /// # Returns
300    ///
301    /// A coordinator initialized in `IndexState::Building`.
302    ///
303    /// # Examples
304    ///
305    /// ```rust,ignore
306    /// use perl_parser::workspace_index::IndexCoordinator;
307    ///
308    /// let coordinator = IndexCoordinator::new();
309    /// ```
310    pub fn new() -> Self {
311        Self {
312            state: Arc::new(RwLock::new(IndexState::Building {
313                phase: IndexPhase::Idle,
314                indexed_count: 0,
315                total_count: 0,
316                started_at: Instant::now(),
317            })),
318            index: Arc::new(WorkspaceIndex::new()),
319            limits: IndexResourceLimits::default(),
320            caps: IndexPerformanceCaps::default(),
321            metrics: IndexMetrics::new(),
322            instrumentation: IndexInstrumentation::new(),
323        }
324    }
325
326    /// Create a coordinator with custom resource limits
327    ///
328    /// # Arguments
329    ///
330    /// * `limits` - Custom resource limits for this workspace
331    ///
332    /// # Returns
333    ///
334    /// A coordinator configured with the provided resource limits.
335    ///
336    /// # Examples
337    ///
338    /// ```rust,ignore
339    /// use perl_parser::workspace_index::{IndexCoordinator, IndexResourceLimits};
340    ///
341    /// let limits = IndexResourceLimits::default();
342    /// let coordinator = IndexCoordinator::with_limits(limits);
343    /// ```
344    pub fn with_limits(limits: IndexResourceLimits) -> Self {
345        Self {
346            state: Arc::new(RwLock::new(IndexState::Building {
347                phase: IndexPhase::Idle,
348                indexed_count: 0,
349                total_count: 0,
350                started_at: Instant::now(),
351            })),
352            index: Arc::new(WorkspaceIndex::new()),
353            limits,
354            caps: IndexPerformanceCaps::default(),
355            metrics: IndexMetrics::new(),
356            instrumentation: IndexInstrumentation::new(),
357        }
358    }
359
360    /// Create a coordinator with custom limits and performance caps
361    ///
362    /// # Arguments
363    ///
364    /// * `limits` - Resource limits for this workspace
365    /// * `caps` - Performance caps for indexing budgets
366    pub fn with_limits_and_caps(limits: IndexResourceLimits, caps: IndexPerformanceCaps) -> Self {
367        Self {
368            state: Arc::new(RwLock::new(IndexState::Building {
369                phase: IndexPhase::Idle,
370                indexed_count: 0,
371                total_count: 0,
372                started_at: Instant::now(),
373            })),
374            index: Arc::new(WorkspaceIndex::new()),
375            limits,
376            caps,
377            metrics: IndexMetrics::new(),
378            instrumentation: IndexInstrumentation::new(),
379        }
380    }
381
382    /// Get current state (lock-free read via clone)
383    ///
384    /// Returns a cloned copy of the current state for lock-free access
385    /// in hot path LSP handlers.
386    ///
387    /// # Returns
388    ///
389    /// The current `IndexState` snapshot.
390    ///
391    /// # Examples
392    ///
393    /// ```rust,ignore
394    /// use perl_parser::workspace_index::{IndexCoordinator, IndexState};
395    ///
396    /// let coordinator = IndexCoordinator::new();
397    /// match coordinator.state() {
398    ///     IndexState::Ready { .. } => {
399    ///         // Full query path
400    ///     }
401    ///     _ => {
402    ///         // Degraded/building fallback
403    ///     }
404    /// }
405    /// ```
406    pub fn state(&self) -> IndexState {
407        self.state.read().clone()
408    }
409
410    /// Get reference to the underlying workspace index
411    ///
412    /// Provides direct access to the `WorkspaceIndex` for operations
413    /// that don't require state checking (e.g., document store access).
414    ///
415    /// # Returns
416    ///
417    /// A shared reference to the underlying workspace index.
418    ///
419    /// # Examples
420    ///
421    /// ```rust,ignore
422    /// use perl_parser::workspace_index::IndexCoordinator;
423    ///
424    /// let coordinator = IndexCoordinator::new();
425    /// let _index = coordinator.index();
426    /// ```
427    pub fn index(&self) -> &Arc<WorkspaceIndex> {
428        &self.index
429    }
430
431    /// Access the configured resource limits
432    pub fn limits(&self) -> &IndexResourceLimits {
433        &self.limits
434    }
435
436    /// Access the configured performance caps
437    pub fn performance_caps(&self) -> &IndexPerformanceCaps {
438        &self.caps
439    }
440
441    /// Snapshot lifecycle instrumentation (durations, transitions, early exits)
442    pub fn instrumentation_snapshot(&self) -> IndexInstrumentationSnapshot {
443        self.instrumentation.snapshot()
444    }
445
446    /// Notify of file change (may trigger state transition)
447    ///
448    /// Increments the pending parse count and may transition to degraded
449    /// state if a parse storm is detected.
450    ///
451    /// # Arguments
452    ///
453    /// * `_uri` - URI of the changed file (reserved for future use).
454    ///
455    /// # Returns
456    ///
457    /// Nothing. Updates coordinator metrics and state for the LSP workflow.
458    ///
459    /// # Examples
460    ///
461    /// ```rust,ignore
462    /// use perl_parser::workspace_index::IndexCoordinator;
463    ///
464    /// let coordinator = IndexCoordinator::new();
465    /// coordinator.notify_change("file:///example.pl");
466    /// ```
467    pub fn notify_change(&self, _uri: &str) {
468        let pending = self.metrics.increment_pending_parses();
469
470        // Check for parse storm
471        if self.metrics.is_parse_storm() {
472            self.transition_to_degraded(DegradationReason::ParseStorm { pending_parses: pending });
473        }
474    }
475
476    /// Notify parse completion for the Index/Analyze workflow stages.
477    ///
478    /// Decrements the pending parse count, enforces resource limits, and may
479    /// attempt recovery when parse storms clear.
480    ///
481    /// # Arguments
482    ///
483    /// * `_uri` - URI of the parsed file (reserved for future use).
484    ///
485    /// # Returns
486    ///
487    /// Nothing. Updates coordinator metrics and state for the LSP workflow.
488    ///
489    /// # Examples
490    ///
491    /// ```rust,ignore
492    /// use perl_parser::workspace_index::IndexCoordinator;
493    ///
494    /// let coordinator = IndexCoordinator::new();
495    /// coordinator.notify_parse_complete("file:///example.pl");
496    /// ```
497    pub fn notify_parse_complete(&self, _uri: &str) {
498        let pending = self.metrics.decrement_pending_parses();
499
500        // Check for recovery from parse storm
501        if pending == 0 {
502            if let IndexState::Degraded { reason: DegradationReason::ParseStorm { .. }, .. } =
503                self.state()
504            {
505                // Attempt recovery - transition back to Building for re-scan
506                let mut state = self.state.write();
507                let from_kind = state.kind();
508                self.instrumentation.record_state_transition(from_kind, IndexStateKind::Building);
509                *state = IndexState::Building {
510                    phase: IndexPhase::Idle,
511                    indexed_count: 0,
512                    total_count: 0,
513                    started_at: Instant::now(),
514                };
515            }
516        }
517
518        // Enforce resource limits after parse completion
519        self.enforce_limits();
520    }
521
522    /// Transition to Ready state
523    ///
524    /// Marks the index as fully ready for queries after successful workspace
525    /// scan. Records the file count, symbol count, and completion timestamp.
526    /// Enforces resource limits after transition.
527    ///
528    /// # State Transition Guards
529    ///
530    /// Only valid transitions:
531    /// - `Building` → `Ready` (normal completion)
532    /// - `Degraded` → `Ready` (recovery after fix)
533    ///
534    /// # Arguments
535    ///
536    /// * `file_count` - Total number of files indexed
537    /// * `symbol_count` - Total number of symbols extracted
538    ///
539    /// # Returns
540    ///
541    /// Nothing. The coordinator state is updated in-place.
542    ///
543    /// # Examples
544    ///
545    /// ```rust,ignore
546    /// use perl_parser::workspace_index::IndexCoordinator;
547    ///
548    /// let coordinator = IndexCoordinator::new();
549    /// coordinator.transition_to_ready(100, 5000);
550    /// ```
551    pub fn transition_to_ready(&self, file_count: usize, symbol_count: usize) {
552        let mut state = self.state.write();
553        let from_kind = state.kind();
554
555        // State transition guard: validate current state allows transition to Ready
556        match &*state {
557            IndexState::Building { .. } | IndexState::Degraded { .. } => {
558                // Valid transition - proceed
559                *state =
560                    IndexState::Ready { symbol_count, file_count, completed_at: Instant::now() };
561            }
562            IndexState::Ready { .. } => {
563                // Already Ready - update metrics but don't log as transition
564                *state =
565                    IndexState::Ready { symbol_count, file_count, completed_at: Instant::now() };
566            }
567        }
568        self.instrumentation.record_state_transition(from_kind, IndexStateKind::Ready);
569        drop(state); // Release write lock before checking limits
570
571        // Enforce resource limits after transition
572        self.enforce_limits();
573    }
574
575    /// Transition to Scanning phase (Idle → Scanning)
576    ///
577    /// Resets build counters and marks the index as scanning workspace folders.
578    pub fn transition_to_scanning(&self) {
579        let mut state = self.state.write();
580        let from_kind = state.kind();
581
582        match &*state {
583            IndexState::Building { phase, indexed_count, total_count, started_at } => {
584                if *phase != IndexPhase::Scanning {
585                    self.instrumentation.record_phase_transition(*phase, IndexPhase::Scanning);
586                }
587                *state = IndexState::Building {
588                    phase: IndexPhase::Scanning,
589                    indexed_count: *indexed_count,
590                    total_count: *total_count,
591                    started_at: *started_at,
592                };
593            }
594            IndexState::Ready { .. } | IndexState::Degraded { .. } => {
595                self.instrumentation.record_state_transition(from_kind, IndexStateKind::Building);
596                self.instrumentation
597                    .record_phase_transition(IndexPhase::Idle, IndexPhase::Scanning);
598                *state = IndexState::Building {
599                    phase: IndexPhase::Scanning,
600                    indexed_count: 0,
601                    total_count: 0,
602                    started_at: Instant::now(),
603                };
604            }
605        }
606    }
607
608    /// Update scanning progress with the latest discovered file count
609    pub fn update_scan_progress(&self, total_count: usize) {
610        let mut state = self.state.write();
611        if let IndexState::Building { phase, indexed_count, started_at, .. } = &*state {
612            if *phase != IndexPhase::Scanning {
613                self.instrumentation.record_phase_transition(*phase, IndexPhase::Scanning);
614            }
615            *state = IndexState::Building {
616                phase: IndexPhase::Scanning,
617                indexed_count: *indexed_count,
618                total_count,
619                started_at: *started_at,
620            };
621        }
622    }
623
624    /// Transition to Indexing phase (Scanning → Indexing)
625    ///
626    /// Uses the discovered file count as the total index target.
627    pub fn transition_to_indexing(&self, total_count: usize) {
628        let mut state = self.state.write();
629        let from_kind = state.kind();
630
631        match &*state {
632            IndexState::Building { phase, indexed_count, started_at, .. } => {
633                if *phase != IndexPhase::Indexing {
634                    self.instrumentation.record_phase_transition(*phase, IndexPhase::Indexing);
635                }
636                *state = IndexState::Building {
637                    phase: IndexPhase::Indexing,
638                    indexed_count: *indexed_count,
639                    total_count,
640                    started_at: *started_at,
641                };
642            }
643            IndexState::Ready { .. } | IndexState::Degraded { .. } => {
644                self.instrumentation.record_state_transition(from_kind, IndexStateKind::Building);
645                self.instrumentation
646                    .record_phase_transition(IndexPhase::Idle, IndexPhase::Indexing);
647                *state = IndexState::Building {
648                    phase: IndexPhase::Indexing,
649                    indexed_count: 0,
650                    total_count,
651                    started_at: Instant::now(),
652                };
653            }
654        }
655    }
656
657    /// Transition to Building state (Indexing phase)
658    ///
659    /// Marks the index as indexing with a known total file count.
660    pub fn transition_to_building(&self, total_count: usize) {
661        let mut state = self.state.write();
662        let from_kind = state.kind();
663
664        // State transition guard: validate transition is allowed
665        match &*state {
666            IndexState::Degraded { .. } | IndexState::Ready { .. } => {
667                self.instrumentation.record_state_transition(from_kind, IndexStateKind::Building);
668                self.instrumentation
669                    .record_phase_transition(IndexPhase::Idle, IndexPhase::Indexing);
670                *state = IndexState::Building {
671                    phase: IndexPhase::Indexing,
672                    indexed_count: 0,
673                    total_count,
674                    started_at: Instant::now(),
675                };
676            }
677            IndexState::Building { phase, indexed_count, started_at, .. } => {
678                let mut next_phase = *phase;
679                if *phase == IndexPhase::Idle {
680                    self.instrumentation
681                        .record_phase_transition(IndexPhase::Idle, IndexPhase::Indexing);
682                    next_phase = IndexPhase::Indexing;
683                }
684                *state = IndexState::Building {
685                    phase: next_phase,
686                    indexed_count: *indexed_count,
687                    total_count,
688                    started_at: *started_at,
689                };
690            }
691        }
692    }
693
694    /// Update Building state progress for the Index/Analyze workflow stages.
695    ///
696    /// Increments the indexed file count and checks for scan timeouts.
697    ///
698    /// # Arguments
699    ///
700    /// * `indexed_count` - Number of files indexed so far.
701    ///
702    /// # Returns
703    ///
704    /// Nothing. Updates coordinator state and may transition to `Degraded`.
705    ///
706    /// # Examples
707    ///
708    /// ```rust,ignore
709    /// use perl_parser::workspace_index::IndexCoordinator;
710    ///
711    /// let coordinator = IndexCoordinator::new();
712    /// coordinator.transition_to_building(100);
713    /// coordinator.update_building_progress(1);
714    /// ```
715    pub fn update_building_progress(&self, indexed_count: usize) {
716        let mut state = self.state.write();
717
718        if let IndexState::Building { phase, started_at, total_count, .. } = &*state {
719            let elapsed = started_at.elapsed().as_millis() as u64;
720
721            // Check for scan timeout
722            if elapsed > self.limits.max_scan_duration_ms {
723                // Timeout exceeded - transition to degraded
724                drop(state);
725                self.transition_to_degraded(DegradationReason::ScanTimeout { elapsed_ms: elapsed });
726                return;
727            }
728
729            // Update progress
730            *state = IndexState::Building {
731                phase: *phase,
732                indexed_count,
733                total_count: *total_count,
734                started_at: *started_at,
735            };
736        }
737    }
738
739    /// Transition to Degraded state
740    ///
741    /// Marks the index as degraded with the specified reason. Preserves
742    /// the current symbol count (if available) to indicate partial
743    /// functionality remains.
744    ///
745    /// # Arguments
746    ///
747    /// * `reason` - Why the index degraded (ParseStorm, IoError, etc.)
748    ///
749    /// # Returns
750    ///
751    /// Nothing. The coordinator state is updated in-place.
752    ///
753    /// # Examples
754    ///
755    /// ```rust,ignore
756    /// use perl_parser::workspace_index::{DegradationReason, IndexCoordinator, ResourceKind};
757    ///
758    /// let coordinator = IndexCoordinator::new();
759    /// coordinator.transition_to_degraded(DegradationReason::ResourceLimit {
760    ///     kind: ResourceKind::MaxFiles,
761    /// });
762    /// ```
763    pub fn transition_to_degraded(&self, reason: DegradationReason) {
764        let mut state = self.state.write();
765        let from_kind = state.kind();
766
767        // Get available symbols count from current state
768        let available_symbols = match &*state {
769            IndexState::Ready { symbol_count, .. } => *symbol_count,
770            IndexState::Degraded { available_symbols, .. } => *available_symbols,
771            IndexState::Building { .. } => 0,
772        };
773
774        self.instrumentation.record_state_transition(from_kind, IndexStateKind::Degraded);
775        *state = IndexState::Degraded { reason, available_symbols, since: Instant::now() };
776    }
777
778    /// Check resource limits and return degradation reason if exceeded
779    ///
780    /// Examines current workspace index state against configured resource limits.
781    /// Returns the first exceeded limit found, enabling targeted degradation.
782    ///
783    /// # Returns
784    ///
785    /// * `Some(DegradationReason)` - Resource limit exceeded, contains specific limit type
786    /// * `None` - All limits within acceptable bounds
787    ///
788    /// # Checked Limits
789    ///
790    /// - `max_files`: Total number of indexed files
791    /// - `max_total_symbols`: Aggregate symbol count across workspace
792    ///
793    /// # Performance
794    ///
795    /// - Lock-free read of index state (<100ns)
796    /// - Symbol counting is O(n) where n is number of files
797    ///
798    /// Returns: `Some(DegradationReason)` when a limit is exceeded, otherwise `None`.
799    ///
800    /// # Examples
801    ///
802    /// ```rust,ignore
803    /// use perl_parser::workspace_index::IndexCoordinator;
804    ///
805    /// let coordinator = IndexCoordinator::new();
806    /// let _reason = coordinator.check_limits();
807    /// ```
808    pub fn check_limits(&self) -> Option<DegradationReason> {
809        let files = self.index.files.read();
810
811        // Check max_files limit
812        let file_count = files.len();
813        if file_count > self.limits.max_files {
814            return Some(DegradationReason::ResourceLimit { kind: ResourceKind::MaxFiles });
815        }
816
817        // Check max_total_symbols limit
818        let total_symbols: usize = files.values().map(|fi| fi.symbols.len()).sum();
819        if total_symbols > self.limits.max_total_symbols {
820            return Some(DegradationReason::ResourceLimit { kind: ResourceKind::MaxSymbols });
821        }
822
823        None
824    }
825
826    /// Enforce resource limits and trigger degradation if exceeded
827    ///
828    /// Checks current resource usage against configured limits and automatically
829    /// transitions to Degraded state if any limit is exceeded. This method should
830    /// be called after operations that modify index size (file additions, parse
831    /// completions, etc.).
832    ///
833    /// # State Transitions
834    ///
835    /// - `Ready` → `Degraded(ResourceLimit)` if limits exceeded
836    /// - `Building` → `Degraded(ResourceLimit)` if limits exceeded
837    ///
838    /// # Returns
839    ///
840    /// Nothing. The coordinator state is updated in-place when limits are exceeded.
841    ///
842    /// # Examples
843    ///
844    /// ```rust,ignore
845    /// use perl_parser::workspace_index::IndexCoordinator;
846    ///
847    /// let coordinator = IndexCoordinator::new();
848    /// // ... index some files ...
849    /// coordinator.enforce_limits();  // Check and degrade if needed
850    /// ```
851    pub fn enforce_limits(&self) {
852        if let Some(reason) = self.check_limits() {
853            self.transition_to_degraded(reason);
854        }
855    }
856
857    /// Record an early-exit event for indexing instrumentation
858    pub fn record_early_exit(
859        &self,
860        reason: EarlyExitReason,
861        elapsed_ms: u64,
862        indexed_files: usize,
863        total_files: usize,
864    ) {
865        self.instrumentation.record_early_exit(EarlyExitRecord {
866            reason,
867            elapsed_ms,
868            indexed_files,
869            total_files,
870        });
871    }
872
873    /// Query with automatic degradation handling
874    ///
875    /// Dispatches to full query if index is Ready, or partial query otherwise.
876    /// This pattern enables LSP handlers to provide appropriate responses
877    /// based on index state without explicit state checking.
878    ///
879    /// # Type Parameters
880    ///
881    /// * `T` - Return type of the query functions
882    /// * `F1` - Full query function type accepting `&WorkspaceIndex` and returning `T`
883    /// * `F2` - Partial query function type accepting `&WorkspaceIndex` and returning `T`
884    ///
885    /// # Arguments
886    ///
887    /// * `full_query` - Function to execute when index is Ready
888    /// * `partial_query` - Function to execute when index is Building/Degraded
889    ///
890    /// # Returns
891    ///
892    /// The value returned by the selected query function.
893    ///
894    /// # Examples
895    ///
896    /// ```rust,ignore
897    /// use perl_parser::workspace_index::IndexCoordinator;
898    ///
899    /// let coordinator = IndexCoordinator::new();
900    /// let locations = coordinator.query(
901    ///     |index| index.find_references("my_function"),  // Full workspace search
902    ///     |index| vec![]                                 // Empty fallback
903    /// );
904    /// ```
905    pub fn query<T, F1, F2>(&self, full_query: F1, partial_query: F2) -> T
906    where
907        F1: FnOnce(&WorkspaceIndex) -> T,
908        F2: FnOnce(&WorkspaceIndex) -> T,
909    {
910        match self.state() {
911            IndexState::Ready { .. } => full_query(&self.index),
912            _ => partial_query(&self.index),
913        }
914    }
915}
916
917impl Default for IndexCoordinator {
918    fn default() -> Self {
919        Self::new()
920    }
921}
922
923// ============================================================================
924// Symbol Indexing Types
925// ============================================================================
926
927#[derive(Clone, Copy, Debug, Eq, PartialEq, Hash)]
928/// Symbol kinds for cross-file indexing during Index/Navigate workflows.
929pub enum SymKind {
930    /// Variable symbol ($, @, or % sigil)
931    Var,
932    /// Subroutine definition (sub foo)
933    Sub,
934    /// Package declaration (package Foo)
935    Pack,
936}
937
938#[derive(Clone, Debug, Eq, PartialEq, Hash)]
939/// A normalized symbol key for cross-file lookups in Index/Navigate workflows.
940pub struct SymbolKey {
941    /// Package name containing this symbol
942    pub pkg: Arc<str>,
943    /// Bare name without sigil prefix
944    pub name: Arc<str>,
945    /// Variable sigil ($, @, or %) if applicable
946    pub sigil: Option<char>,
947    /// Kind of symbol (variable, subroutine, package)
948    pub kind: SymKind,
949}
950
951/// Normalize a Perl variable name for Index/Analyze workflows.
952///
953/// Extracts an optional sigil and bare name for consistent symbol indexing.
954///
955/// # Arguments
956///
957/// * `name` - Variable name from Perl source, with or without sigil.
958///
959/// # Returns
960///
961/// `(sigil, name)` tuple with the optional sigil and normalized identifier.
962///
963/// # Examples
964///
965/// ```rust,ignore
966/// use perl_parser::workspace_index::normalize_var;
967///
968/// assert_eq!(normalize_var("$count"), (Some('$'), "count"));
969/// assert_eq!(normalize_var("process_emails"), (None, "process_emails"));
970/// ```
971pub fn normalize_var(name: &str) -> (Option<char>, &str) {
972    if name.is_empty() {
973        return (None, "");
974    }
975
976    // Safe: we've checked that name is not empty
977    let Some(first_char) = name.chars().next() else {
978        return (None, name); // Should never happen but handle gracefully
979    };
980    match first_char {
981        '$' | '@' | '%' => {
982            if name.len() > 1 {
983                (Some(first_char), &name[1..])
984            } else {
985                (Some(first_char), "")
986            }
987        }
988        _ => (None, name),
989    }
990}
991
992// Using lsp_types for Position and Range
993
994#[derive(Debug, Clone, PartialEq, Eq)]
995/// Internal location type used during Navigate/Analyze workflows.
996pub struct Location {
997    /// File URI where the symbol is located
998    pub uri: String,
999    /// Line and character range within the file
1000    pub range: Range,
1001}
1002
1003#[derive(Debug, Clone, PartialEq, Eq)]
1004/// Stable symbol identity returned by cross-file reference queries.
1005pub struct SymbolIdentity {
1006    /// Canonical stable key for the symbol (qualified when available).
1007    pub stable_key: String,
1008    /// Bare symbol name.
1009    pub name: String,
1010    /// Fully qualified symbol name when available.
1011    pub qualified_name: Option<String>,
1012    /// Symbol kind (subroutine, package, variable, ...).
1013    pub kind: SymbolKind,
1014}
1015
1016#[derive(Debug, Clone, PartialEq, Eq)]
1017/// Read-only cross-file query result used by rename/safe-delete planners.
1018pub struct CrossFileReferenceQueryResult {
1019    /// Identity for the resolved symbol.
1020    pub symbol: SymbolIdentity,
1021    /// Definition site for the resolved symbol.
1022    pub definition: Location,
1023    /// All reference locations (including definition) in deterministic order.
1024    pub references: Vec<Location>,
1025}
1026
1027#[derive(Debug, Clone, Serialize, Deserialize)]
1028/// A symbol in the workspace for Index/Navigate workflows.
1029pub struct WorkspaceSymbol {
1030    /// Symbol name without package qualification
1031    pub name: String,
1032    /// Type of symbol (subroutine, variable, package, etc.)
1033    pub kind: SymbolKind,
1034    /// File URI where the symbol is defined
1035    pub uri: String,
1036    /// Line and character range of the symbol definition
1037    pub range: Range,
1038    /// Fully qualified name including package (e.g., "Package::function")
1039    pub qualified_name: Option<String>,
1040    /// POD documentation associated with the symbol
1041    pub documentation: Option<String>,
1042    /// Name of the containing package or class
1043    pub container_name: Option<String>,
1044    /// Whether this symbol has a body (false for forward declarations)
1045    #[serde(default = "default_has_body")]
1046    pub has_body: bool,
1047    /// Workspace folder URI this symbol belongs to (for multi-root workspace support)
1048    pub workspace_folder_uri: Option<String>,
1049}
1050
1051fn default_has_body() -> bool {
1052    true
1053}
1054
1055// Re-export the unified symbol types from perl-symbol
1056/// Symbol kind enums used during Index/Analyze workflows.
1057pub use perl_symbol::{SymbolKind, VarKind};
1058
1059#[derive(Debug, Clone)]
1060/// Reference to a symbol for Navigate/Analyze workflows.
1061pub struct SymbolReference {
1062    /// File URI where the reference occurs
1063    pub uri: String,
1064    /// Line and character range of the reference
1065    pub range: Range,
1066    /// How the symbol is being referenced (definition, usage, etc.)
1067    pub kind: ReferenceKind,
1068}
1069
1070#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1071/// Classification of how a symbol is referenced in Navigate/Analyze workflows.
1072pub enum ReferenceKind {
1073    /// Symbol definition site (sub declaration, variable declaration)
1074    Definition,
1075    /// General usage of the symbol (function call, method call)
1076    Usage,
1077    /// Import via use statement
1078    Import,
1079    /// Variable read access
1080    Read,
1081    /// Variable write access (assignment target)
1082    Write,
1083}
1084
1085#[derive(Debug, Serialize)]
1086#[serde(rename_all = "camelCase")]
1087/// LSP-compliant workspace symbol for wire format in Navigate/Analyze workflows.
1088pub struct LspWorkspaceSymbol {
1089    /// Symbol name as displayed to the user
1090    pub name: String,
1091    /// LSP symbol kind number (see lsp_types::SymbolKind)
1092    pub kind: u32,
1093    /// Location of the symbol definition
1094    pub location: WireLocation,
1095    /// Name of the containing symbol (package, class)
1096    #[serde(skip_serializing_if = "Option::is_none")]
1097    pub container_name: Option<String>,
1098    /// Workspace folder URI this symbol belongs to (for multi-root workspace disambiguation)
1099    #[serde(skip_serializing_if = "Option::is_none")]
1100    pub workspace_folder_uri: Option<String>,
1101}
1102
1103impl From<&WorkspaceSymbol> for LspWorkspaceSymbol {
1104    fn from(sym: &WorkspaceSymbol) -> Self {
1105        let range = WireRange {
1106            start: WirePosition { line: sym.range.start.line, character: sym.range.start.column },
1107            end: WirePosition { line: sym.range.end.line, character: sym.range.end.column },
1108        };
1109
1110        Self {
1111            name: sym.name.clone(),
1112            kind: sym.kind.to_lsp_kind(),
1113            location: WireLocation { uri: sym.uri.clone(), range },
1114            container_name: sym.container_name.clone(),
1115            workspace_folder_uri: sym.workspace_folder_uri.clone(),
1116        }
1117    }
1118}
1119
1120/// File-level index data
1121#[derive(Default, Clone)]
1122pub struct FileIndex {
1123    /// Canonical file URI for this index entry.
1124    source_uri: String,
1125    /// Symbols defined in this file
1126    symbols: Vec<WorkspaceSymbol>,
1127    /// References in this file (symbol name -> references)
1128    references: HashMap<String, Vec<SymbolReference>>,
1129    /// Dependencies (modules this file imports)
1130    dependencies: HashSet<String>,
1131    /// Content hash for early-exit optimization
1132    content_hash: u64,
1133    /// Workspace folder URI this file belongs to (for multi-root workspace support)
1134    folder_uri: Option<String>,
1135}
1136
1137/// Write-through semantic fact storage for one indexed file.
1138#[derive(Clone, Debug)]
1139pub struct FileFactShard {
1140    /// Canonical file URI for this shard.
1141    pub source_uri: String,
1142    /// Stable file identifier derived from normalized URI.
1143    pub file_id: FileId,
1144    /// Whole-file content hash used for stale-shard replacement.
1145    pub content_hash: u64,
1146    /// Optional per-category hashes for change diagnostics.
1147    pub anchors_hash: Option<u64>,
1148    /// Optional per-category hashes for change diagnostics.
1149    pub entities_hash: Option<u64>,
1150    /// Optional per-category hashes for change diagnostics.
1151    pub occurrences_hash: Option<u64>,
1152    /// Optional per-category hashes for change diagnostics.
1153    pub edges_hash: Option<u64>,
1154    /// Anchor facts for this file.
1155    pub anchors: Vec<AnchorFact>,
1156    /// Entity facts for this file.
1157    pub entities: Vec<EntityFact>,
1158    /// Occurrence facts for this file.
1159    pub occurrences: Vec<perl_semantic_facts::OccurrenceFact>,
1160    /// Edge facts for this file.
1161    pub edges: Vec<EdgeFact>,
1162}
1163
1164/// Thread-safe workspace index
1165pub struct WorkspaceIndex {
1166    /// Index data per file URI (normalized key -> data)
1167    files: Arc<RwLock<HashMap<String, FileIndex>>>,
1168    /// Global symbol multimap (qualified/bare name -> ordered definition candidates)
1169    symbols: Arc<RwLock<HashMap<String, Vec<DefinitionCandidate>>>>,
1170    /// Global reference index (symbol name -> locations across all files)
1171    ///
1172    /// Aggregated from per-file `FileIndex::references` during `index_file()`.
1173    /// Provides O(1) lookup for `find_references()` instead of iterating all files.
1174    global_references: Arc<RwLock<HashMap<String, Vec<Location>>>>,
1175    /// Write-through semantic fact shards keyed by normalized URI.
1176    fact_shards: Arc<RwLock<HashMap<String, FileFactShard>>>,
1177    /// Semantic cross-file reference index (typed occurrences by name and entity).
1178    semantic_reference_index: Arc<RwLock<ReferenceIndex>>,
1179    /// Semantic cross-file import/export index.
1180    semantic_import_export_index: Arc<RwLock<ImportExportIndex>>,
1181    /// Document store for in-memory text
1182    document_store: DocumentStore,
1183    /// Workspace folder URIs for multi-root workspace support
1184    ///
1185    /// Used to determine which workspace folder a file belongs to for
1186    /// proper folder attribution in multi-root workspaces.
1187    workspace_folders: Arc<RwLock<Vec<String>>>,
1188}
1189
1190#[derive(Debug, Clone, Eq, PartialEq)]
1191struct DefinitionCandidate {
1192    location: Location,
1193    kind: SymbolKind,
1194}
1195
1196impl WorkspaceIndex {
1197    fn location_sort_key(location: &Location) -> (&str, u32, u32, u32, u32) {
1198        (
1199            location.uri.as_str(),
1200            location.range.start.line,
1201            location.range.start.column,
1202            location.range.end.line,
1203            location.range.end.column,
1204        )
1205    }
1206
1207    fn sort_locations_deterministically(locations: &mut [Location]) {
1208        locations.sort_by(|left, right| {
1209            Self::location_sort_key(left).cmp(&Self::location_sort_key(right))
1210        });
1211    }
1212
1213    fn definition_candidate_sort_key(
1214        candidate: &DefinitionCandidate,
1215    ) -> (u8, &str, u32, u32, u32, u32) {
1216        let rank = match candidate.kind {
1217            SymbolKind::Subroutine | SymbolKind::Method => 0,
1218            SymbolKind::Constant => 1,
1219            _ => 2,
1220        };
1221        (
1222            rank,
1223            candidate.location.uri.as_str(),
1224            candidate.location.range.start.line,
1225            candidate.location.range.start.column,
1226            candidate.location.range.end.line,
1227            candidate.location.range.end.column,
1228        )
1229    }
1230
1231    fn rebuild_symbol_cache(
1232        files: &HashMap<String, FileIndex>,
1233        symbols: &mut HashMap<String, Vec<DefinitionCandidate>>,
1234    ) {
1235        symbols.clear();
1236
1237        for file_index in files.values() {
1238            for symbol in &file_index.symbols {
1239                if let Some(ref qname) = symbol.qualified_name {
1240                    symbols.entry(qname.clone()).or_default().push(DefinitionCandidate {
1241                        location: Location { uri: symbol.uri.clone(), range: symbol.range },
1242                        kind: symbol.kind,
1243                    });
1244                }
1245                symbols.entry(symbol.name.clone()).or_default().push(DefinitionCandidate {
1246                    location: Location { uri: symbol.uri.clone(), range: symbol.range },
1247                    kind: symbol.kind,
1248                });
1249            }
1250        }
1251        for entries in symbols.values_mut() {
1252            entries.sort_by(|left, right| {
1253                Self::definition_candidate_sort_key(left)
1254                    .cmp(&Self::definition_candidate_sort_key(right))
1255            });
1256            entries.dedup();
1257        }
1258    }
1259
1260    /// Incrementally remove one file's symbols from the global cache,
1261    /// re-inserting shadowed symbols from remaining files.
1262    fn incremental_remove_symbols(
1263        files: &HashMap<String, FileIndex>,
1264        symbols: &mut HashMap<String, Vec<DefinitionCandidate>>,
1265        old_file_index: &FileIndex,
1266    ) {
1267        let mut affected_names: Vec<String> = Vec::new();
1268        for sym in &old_file_index.symbols {
1269            if let Some(ref qname) = sym.qualified_name {
1270                let mut remove_key = false;
1271                if let Some(entries) = symbols.get_mut(qname) {
1272                    entries.retain(|candidate| candidate.location.uri != sym.uri);
1273                    remove_key = entries.is_empty();
1274                }
1275                if remove_key {
1276                    symbols.remove(qname);
1277                    affected_names.push(qname.clone());
1278                }
1279            }
1280            let mut remove_key = false;
1281            if let Some(entries) = symbols.get_mut(&sym.name) {
1282                entries.retain(|candidate| candidate.location.uri != sym.uri);
1283                remove_key = entries.is_empty();
1284            }
1285            if remove_key {
1286                symbols.remove(&sym.name);
1287                affected_names.push(sym.name.clone());
1288            }
1289        }
1290        if !affected_names.is_empty() {
1291            symbols.clear();
1292            for file_index in files
1293                .values()
1294                .filter(|file_index| file_index.source_uri != old_file_index.source_uri)
1295            {
1296                for symbol in &file_index.symbols {
1297                    if let Some(ref qname) = symbol.qualified_name {
1298                        symbols.entry(qname.clone()).or_default().push(DefinitionCandidate {
1299                            location: Location { uri: symbol.uri.clone(), range: symbol.range },
1300                            kind: symbol.kind,
1301                        });
1302                    }
1303                    symbols.entry(symbol.name.clone()).or_default().push(DefinitionCandidate {
1304                        location: Location { uri: symbol.uri.clone(), range: symbol.range },
1305                        kind: symbol.kind,
1306                    });
1307                }
1308            }
1309            for entries in symbols.values_mut() {
1310                entries.sort_by(|left, right| {
1311                    Self::definition_candidate_sort_key(left)
1312                        .cmp(&Self::definition_candidate_sort_key(right))
1313                });
1314                entries.dedup();
1315            }
1316        }
1317    }
1318
1319    /// Incrementally add one file's symbols to the global cache.
1320    fn incremental_add_symbols(
1321        symbols: &mut HashMap<String, Vec<DefinitionCandidate>>,
1322        file_index: &FileIndex,
1323    ) {
1324        for sym in &file_index.symbols {
1325            if let Some(ref qname) = sym.qualified_name {
1326                symbols.entry(qname.clone()).or_default().push(DefinitionCandidate {
1327                    location: Location { uri: sym.uri.clone(), range: sym.range },
1328                    kind: sym.kind,
1329                });
1330            }
1331            symbols.entry(sym.name.clone()).or_default().push(DefinitionCandidate {
1332                location: Location { uri: sym.uri.clone(), range: sym.range },
1333                kind: sym.kind,
1334            });
1335        }
1336        for entries in symbols.values_mut() {
1337            entries.sort_by(|left, right| {
1338                Self::definition_candidate_sort_key(left)
1339                    .cmp(&Self::definition_candidate_sort_key(right))
1340            });
1341            entries.dedup();
1342        }
1343    }
1344
1345    /// Determine the workspace folder URI for a given file URI.
1346    ///
1347    /// Returns the workspace folder URI that contains the given file URI.
1348    /// This is used for multi-root workspace support to properly attribute
1349    /// files and symbols to their originating workspace folder.
1350    ///
1351    /// # Arguments
1352    ///
1353    /// * `file_uri` - The file URI to find the containing workspace folder for
1354    ///
1355    /// # Returns
1356    ///
1357    /// `Some(folder_uri)` if the file is within a workspace folder, `None` otherwise.
1358    ///
1359    /// # Examples
1360    ///
1361    /// ```rust,ignore
1362    /// use perl_workspace::workspace::workspace_index::WorkspaceIndex;
1363    ///
1364    /// let index = WorkspaceIndex::new();
1365    /// index.set_workspace_folders(vec![
1366    ///     "file:///project1".to_string(),
1367    ///     "file:///project2".to_string(),
1368    /// ]);
1369    ///
1370    /// let folder = index.determine_folder_uri("file:///project1/src/main.pl");
1371    /// assert_eq!(folder, Some("file:///project1".to_string()));
1372    /// ```
1373    fn determine_folder_uri(&self, file_uri: &str) -> Option<String> {
1374        let folders = self.workspace_folders.read();
1375        let mut best_match: Option<&String> = None;
1376        for folder_uri in folders.iter() {
1377            // Check if the file URI starts with the folder URI
1378            // We need to ensure proper URI matching (with or without trailing slash)
1379            let folder_with_slash = if folder_uri.ends_with('/') {
1380                folder_uri.clone()
1381            } else {
1382                format!("{}/", folder_uri)
1383            };
1384            if file_uri.starts_with(&folder_with_slash) || file_uri == folder_uri {
1385                match best_match {
1386                    Some(existing) if existing.len() >= folder_uri.len() => {}
1387                    _ => best_match = Some(folder_uri),
1388                }
1389            }
1390        }
1391        best_match.cloned()
1392    }
1393
1394    fn find_definition_in_files(
1395        files: &HashMap<String, FileIndex>,
1396        symbol_name: &str,
1397        uri_filter: Option<&str>,
1398    ) -> Option<(Location, String)> {
1399        let mut candidates: Vec<(Location, String)> = Vec::new();
1400        for file_index in files.values() {
1401            if let Some(filter) = uri_filter
1402                && file_index.symbols.first().is_some_and(|symbol| symbol.uri != filter)
1403            {
1404                continue;
1405            }
1406
1407            for symbol in &file_index.symbols {
1408                if symbol.name == symbol_name
1409                    || symbol.qualified_name.as_deref() == Some(symbol_name)
1410                {
1411                    candidates.push((
1412                        Location { uri: symbol.uri.clone(), range: symbol.range },
1413                        symbol.uri.clone(),
1414                    ));
1415                }
1416            }
1417        }
1418
1419        candidates.sort_by(|left, right| {
1420            Self::location_sort_key(&left.0).cmp(&Self::location_sort_key(&right.0))
1421        });
1422        candidates.into_iter().next()
1423    }
1424
1425    fn find_symbol_by_definition(
1426        &self,
1427        definition: &Location,
1428        symbol_name: &str,
1429    ) -> Option<WorkspaceSymbol> {
1430        let files = self.files.read();
1431        files
1432            .values()
1433            .flat_map(|file_index| file_index.symbols.iter())
1434            .filter(|symbol| {
1435                symbol.uri == definition.uri
1436                    && symbol.range == definition.range
1437                    && (symbol.name == symbol_name
1438                        || symbol.qualified_name.as_deref() == Some(symbol_name))
1439            })
1440            .min_by(|left, right| {
1441                (
1442                    left.qualified_name.as_deref().unwrap_or_default(),
1443                    left.name.as_str(),
1444                    left.kind.to_lsp_kind(),
1445                )
1446                    .cmp(&(
1447                        right.qualified_name.as_deref().unwrap_or_default(),
1448                        right.name.as_str(),
1449                        right.kind.to_lsp_kind(),
1450                    ))
1451            })
1452            .cloned()
1453    }
1454
1455    fn has_unique_symbol_name_and_kind(&self, target: &WorkspaceSymbol) -> bool {
1456        let files = self.files.read();
1457        files
1458            .values()
1459            .flat_map(|file_index| file_index.symbols.iter())
1460            .filter(|symbol| symbol.name == target.name && symbol.kind == target.kind)
1461            .take(2)
1462            .count()
1463            == 1
1464    }
1465
1466    fn collect_symbol_references(&self, symbol: &WorkspaceSymbol) -> Vec<Location> {
1467        let mut names_to_query: Vec<&str> = Vec::new();
1468        if let Some(qualified_name) = symbol.qualified_name.as_deref() {
1469            names_to_query.push(qualified_name);
1470            if self.has_unique_symbol_name_and_kind(symbol) {
1471                names_to_query.push(symbol.name.as_str());
1472            }
1473        } else {
1474            names_to_query.push(symbol.name.as_str());
1475        }
1476
1477        let global_refs = self.global_references.read();
1478        let mut seen: HashSet<(String, u32, u32, u32, u32)> = HashSet::new();
1479        let mut locations = Vec::new();
1480
1481        for symbol_name in names_to_query {
1482            if let Some(refs) = global_refs.get(symbol_name) {
1483                for location in refs {
1484                    let key = (
1485                        location.uri.clone(),
1486                        location.range.start.line,
1487                        location.range.start.column,
1488                        location.range.end.line,
1489                        location.range.end.column,
1490                    );
1491                    if seen.insert(key) {
1492                        locations.push(location.clone());
1493                    }
1494                }
1495            }
1496        }
1497        drop(global_refs);
1498
1499        Self::sort_locations_deterministically(&mut locations);
1500        locations
1501    }
1502
1503    /// Create a new empty index
1504    ///
1505    /// # Returns
1506    ///
1507    /// A workspace index with empty file and symbol tables.
1508    ///
1509    /// # Examples
1510    ///
1511    /// ```rust,ignore
1512    /// use perl_parser::workspace_index::WorkspaceIndex;
1513    ///
1514    /// let index = WorkspaceIndex::new();
1515    /// assert!(!index.has_symbols());
1516    /// ```
1517    pub fn new() -> Self {
1518        Self {
1519            files: Arc::new(RwLock::new(HashMap::new())),
1520            symbols: Arc::new(RwLock::new(HashMap::new())),
1521            global_references: Arc::new(RwLock::new(HashMap::new())),
1522            fact_shards: Arc::new(RwLock::new(HashMap::new())),
1523            semantic_reference_index: Arc::new(RwLock::new(ReferenceIndex::new())),
1524            semantic_import_export_index: Arc::new(RwLock::new(ImportExportIndex::new())),
1525            document_store: DocumentStore::new(),
1526            workspace_folders: Arc::new(RwLock::new(Vec::new())),
1527        }
1528    }
1529
1530    /// Create a workspace index with pre-allocated capacity.
1531    ///
1532    /// Pre-allocating reduces the number of rehash operations during large-workspace
1533    /// startup. Use this instead of `new()` when the approximate workspace size is
1534    /// known in advance (e.g. from a file discovery scan).
1535    ///
1536    /// # Arguments
1537    ///
1538    /// * `estimated_files` - Expected number of source files in the workspace.
1539    /// * `avg_symbols_per_file` - Expected average number of symbols per file.
1540    ///
1541    /// # Panics
1542    ///
1543    /// Does not panic. Overflow is prevented via `saturating_mul` and an upper cap
1544    /// on the symbol/reference map capacity.
1545    ///
1546    /// # Examples
1547    ///
1548    /// ```rust,ignore
1549    /// use perl_workspace::workspace::workspace_index::WorkspaceIndex;
1550    ///
1551    /// let index = WorkspaceIndex::with_capacity(1000, 20);
1552    /// assert!(!index.has_symbols());
1553    /// ```
1554    pub fn with_capacity(estimated_files: usize, avg_symbols_per_file: usize) -> Self {
1555        // Each symbol is stored twice (qualified + bare name) due to dual indexing.
1556        let sym_cap =
1557            estimated_files.saturating_mul(avg_symbols_per_file).saturating_mul(2).min(1_000_000);
1558        let ref_cap = (sym_cap / 4).min(1_000_000);
1559        Self {
1560            files: Arc::new(RwLock::new(HashMap::with_capacity(estimated_files))),
1561            symbols: Arc::new(RwLock::new(HashMap::with_capacity(sym_cap))),
1562            global_references: Arc::new(RwLock::new(HashMap::with_capacity(ref_cap))),
1563            fact_shards: Arc::new(RwLock::new(HashMap::with_capacity(estimated_files))),
1564            semantic_reference_index: Arc::new(RwLock::new(ReferenceIndex::new())),
1565            semantic_import_export_index: Arc::new(RwLock::new(ImportExportIndex::new())),
1566            document_store: DocumentStore::new(),
1567            workspace_folders: Arc::new(RwLock::new(Vec::new())),
1568        }
1569    }
1570
1571    /// Set the workspace folder URIs for multi-root workspace support.
1572    ///
1573    /// This method updates the list of workspace folders that the index
1574    /// uses to determine folder attribution for files and symbols.
1575    ///
1576    /// # Arguments
1577    ///
1578    /// * `folders` - A vector of workspace folder URIs
1579    ///
1580    /// # Examples
1581    ///
1582    /// ```rust,ignore
1583    /// use perl_workspace::workspace::workspace_index::WorkspaceIndex;
1584    ///
1585    /// let index = WorkspaceIndex::new();
1586    /// index.set_workspace_folders(vec![
1587    ///     "file:///project1".to_string(),
1588    ///     "file:///project2".to_string(),
1589    /// ]);
1590    /// ```
1591    pub fn set_workspace_folders(&self, folders: Vec<String>) {
1592        let mut workspace_folders = self.workspace_folders.write();
1593        *workspace_folders = folders;
1594    }
1595
1596    /// Get the current workspace folder URIs.
1597    ///
1598    /// # Returns
1599    ///
1600    /// A vector of workspace folder URIs.
1601    #[must_use]
1602    pub fn workspace_folders(&self) -> Vec<String> {
1603        self.workspace_folders.read().clone()
1604    }
1605
1606    /// Normalize a URI to a consistent form using proper URI handling
1607    fn normalize_uri(uri: &str) -> String {
1608        perl_uri::normalize_uri(uri)
1609    }
1610
1611    /// Remove a file's contributions from the global reference index.
1612    ///
1613    /// Retains only entries whose URI does not match `file_uri`.
1614    /// Empty keys are removed to avoid unbounded map growth.
1615    fn remove_file_global_refs(
1616        global_refs: &mut HashMap<String, Vec<Location>>,
1617        file_index: &FileIndex,
1618        file_uri: &str,
1619    ) {
1620        for name in file_index.references.keys() {
1621            if let Some(locs) = global_refs.get_mut(name) {
1622                locs.retain(|loc| loc.uri != file_uri);
1623                if locs.is_empty() {
1624                    global_refs.remove(name);
1625                }
1626            }
1627        }
1628    }
1629
1630    /// Index a file from its URI and text content
1631    ///
1632    /// # Arguments
1633    ///
1634    /// * `uri` - File URI identifying the document
1635    /// * `text` - Full Perl source text for indexing
1636    ///
1637    /// # Returns
1638    ///
1639    /// `Ok(())` when indexing succeeds, or an error message otherwise.
1640    ///
1641    /// # Errors
1642    ///
1643    /// Returns an error if parsing fails or the document store cannot be updated.
1644    ///
1645    /// # Examples
1646    ///
1647    /// ```rust,ignore
1648    /// use perl_parser::workspace_index::WorkspaceIndex;
1649    /// use url::Url;
1650    ///
1651    /// # fn main() -> Result<(), Box<dyn std::error::Error>> {
1652    /// let index = WorkspaceIndex::new();
1653    /// let uri = Url::parse("file:///example.pl")?;
1654    /// index.index_file(uri, "sub hello { return 1; }".to_string())?;
1655    /// # Ok(())
1656    /// # }
1657    /// ```
1658    ///
1659    /// Returns: `Ok(())` when indexing succeeds, otherwise an error string.
1660    pub fn index_file(&self, uri: Url, text: String) -> Result<(), String> {
1661        let uri_str = uri.to_string();
1662
1663        // Compute content hash for early-exit optimization
1664        let mut hasher = DefaultHasher::new();
1665        text.hash(&mut hasher);
1666        let content_hash = hasher.finish();
1667
1668        // Check if content is unchanged (early-exit optimization)
1669        let key = DocumentStore::uri_key(&uri_str);
1670        {
1671            let files = self.files.read();
1672            if let Some(existing_index) = files.get(&key) {
1673                if existing_index.content_hash == content_hash {
1674                    // Content unchanged, skip re-indexing
1675                    return Ok(());
1676                }
1677            }
1678        }
1679
1680        // Update document store
1681        if self.document_store.is_open(&uri_str) {
1682            self.document_store.update(&uri_str, 1, text.clone());
1683        } else {
1684            self.document_store.open(uri_str.clone(), 1, text.clone());
1685        }
1686
1687        // Parse the file
1688        let mut parser = Parser::new(&text);
1689        let ast = match parser.parse() {
1690            Ok(ast) => ast,
1691            Err(e) => return Err(format!("Parse error: {}", e)),
1692        };
1693
1694        // Get the document for line index
1695        let mut doc = self.document_store.get(&uri_str).ok_or("Document not found")?;
1696
1697        // Determine workspace folder URI from the file URI
1698        let folder_uri = self.determine_folder_uri(&uri_str);
1699
1700        // Extract symbols and references
1701        let mut file_index = FileIndex {
1702            source_uri: uri_str.clone(),
1703            content_hash,
1704            folder_uri: folder_uri.clone(),
1705            ..Default::default()
1706        };
1707        let mut visitor = IndexVisitor::new(&mut doc, uri_str.clone(), folder_uri);
1708        visitor.visit(&ast, &mut file_index);
1709
1710        let canonical_shard =
1711            Self::build_canonical_fact_shard_for_ast(&uri_str, content_hash, &ast);
1712        let fact_shard = if canonical_shard.anchors.is_empty()
1713            && canonical_shard.entities.is_empty()
1714            && canonical_shard.occurrences.is_empty()
1715            && canonical_shard.edges.is_empty()
1716        {
1717            Self::build_fact_shard(&uri_str, content_hash, &file_index)
1718        } else {
1719            canonical_shard
1720        };
1721
1722        // Update the index, refresh the global symbol cache, and replace this file's
1723        // contribution in the global reference index.
1724        {
1725            let mut files = self.files.write();
1726
1727            // Remove stale global references from previous version of this file
1728            if let Some(old_index) = files.get(&key) {
1729                let mut global_refs = self.global_references.write();
1730                Self::remove_file_global_refs(&mut global_refs, old_index, &uri_str);
1731            }
1732
1733            // Incrementally remove old symbols before inserting new file
1734            if let Some(old_index) = files.get(&key) {
1735                let mut symbols = self.symbols.write();
1736                Self::incremental_remove_symbols(&files, &mut symbols, old_index);
1737                drop(symbols);
1738            }
1739            files.insert(key.clone(), file_index);
1740            let mut symbols = self.symbols.write();
1741            if let Some(new_index) = files.get(&key) {
1742                Self::incremental_add_symbols(&mut symbols, new_index);
1743            }
1744
1745            if let Some(file_index) = files.get(&key) {
1746                let mut global_refs = self.global_references.write();
1747                for (name, refs) in &file_index.references {
1748                    let entry = global_refs.entry(name.clone()).or_default();
1749                    for reference in refs {
1750                        entry.push(Location { uri: reference.uri.clone(), range: reference.range });
1751                    }
1752                }
1753            }
1754            self.replace_fact_shard_incremental(&key, fact_shard);
1755        }
1756
1757        Ok(())
1758    }
1759
1760    /// Remove a file from the index
1761    ///
1762    /// # Arguments
1763    ///
1764    /// * `uri` - File URI (string form) to remove
1765    ///
1766    /// # Returns
1767    ///
1768    /// Nothing. The index is updated in-place.
1769    ///
1770    /// # Examples
1771    ///
1772    /// ```rust,ignore
1773    /// use perl_parser::workspace_index::WorkspaceIndex;
1774    ///
1775    /// let index = WorkspaceIndex::new();
1776    /// index.remove_file("file:///example.pl");
1777    /// ```
1778    pub fn remove_file(&self, uri: &str) {
1779        let uri_str = Self::normalize_uri(uri);
1780        let key = DocumentStore::uri_key(&uri_str);
1781
1782        // Remove from document store
1783        self.document_store.close(&uri_str);
1784
1785        // Remove file index
1786        let mut files = self.files.write();
1787        if let Some(file_index) = files.remove(&key) {
1788            self.fact_shards.write().remove(&key);
1789
1790            // Clean up semantic cross-file indexes for this file.
1791            self.semantic_reference_index.write().remove_file(&uri_str);
1792            {
1793                let mut ie_idx = self.semantic_import_export_index.write();
1794                ie_idx.remove_file_imports(&uri_str);
1795                ie_idx.remove_module_exports(&uri_str);
1796            }
1797
1798            // Incrementally remove symbols and re-insert any shadowed names.
1799            let mut symbols = self.symbols.write();
1800            Self::incremental_remove_symbols(&files, &mut symbols, &file_index);
1801
1802            // Defensive sweep: purge any remaining cache entries whose value
1803            // points to this file's URI.  incremental_remove_symbols already
1804            // handles known symbol names; this sweep catches any entries that
1805            // were inserted via the find_definition fallback path using a key
1806            // that differs from both sym.name and sym.qualified_name.
1807            // Use the URI stored in the file_index itself (not the caller-supplied
1808            // uri_str) so the comparison is always against the exact string that
1809            // was stored during indexing.
1810            if let Some(indexed_uri) = file_index.symbols.first().map(|s| s.uri.as_str()) {
1811                symbols.retain(|_, candidates| {
1812                    candidates.retain(|candidate| candidate.location.uri.as_str() != indexed_uri);
1813                    !candidates.is_empty()
1814                });
1815            }
1816
1817            // Remove from global reference index
1818            let mut global_refs = self.global_references.write();
1819            Self::remove_file_global_refs(&mut global_refs, &file_index, &uri_str);
1820        }
1821    }
1822
1823    /// Remove a file from the index (URL variant for compatibility)
1824    ///
1825    /// # Arguments
1826    ///
1827    /// * `uri` - File URI as a parsed `Url`
1828    ///
1829    /// # Returns
1830    ///
1831    /// Nothing. The index is updated in-place.
1832    ///
1833    /// # Examples
1834    ///
1835    /// ```rust,ignore
1836    /// use perl_parser::workspace_index::WorkspaceIndex;
1837    /// use url::Url;
1838    ///
1839    /// # fn main() -> Result<(), Box<dyn std::error::Error>> {
1840    /// let index = WorkspaceIndex::new();
1841    /// let uri = Url::parse("file:///example.pl")?;
1842    /// index.remove_file_url(&uri);
1843    /// # Ok(())
1844    /// # }
1845    /// ```
1846    pub fn remove_file_url(&self, uri: &Url) {
1847        self.remove_file(uri.as_str())
1848    }
1849
1850    /// Clear a file from the index (alias for remove_file)
1851    ///
1852    /// # Arguments
1853    ///
1854    /// * `uri` - File URI (string form) to remove
1855    ///
1856    /// # Returns
1857    ///
1858    /// Nothing. The index is updated in-place.
1859    ///
1860    /// # Examples
1861    ///
1862    /// ```rust,ignore
1863    /// use perl_parser::workspace_index::WorkspaceIndex;
1864    ///
1865    /// let index = WorkspaceIndex::new();
1866    /// index.clear_file("file:///example.pl");
1867    /// ```
1868    pub fn clear_file(&self, uri: &str) {
1869        self.remove_file(uri);
1870    }
1871
1872    /// Clear a file from the index (URL variant for compatibility)
1873    ///
1874    /// # Arguments
1875    ///
1876    /// * `uri` - File URI as a parsed `Url`
1877    ///
1878    /// # Returns
1879    ///
1880    /// Nothing. The index is updated in-place.
1881    ///
1882    /// # Examples
1883    ///
1884    /// ```rust,ignore
1885    /// use perl_parser::workspace_index::WorkspaceIndex;
1886    /// use url::Url;
1887    ///
1888    /// # fn main() -> Result<(), Box<dyn std::error::Error>> {
1889    /// let index = WorkspaceIndex::new();
1890    /// let uri = Url::parse("file:///example.pl")?;
1891    /// index.clear_file_url(&uri);
1892    /// # Ok(())
1893    /// # }
1894    /// ```
1895    pub fn clear_file_url(&self, uri: &Url) {
1896        self.clear_file(uri.as_str())
1897    }
1898
1899    /// Remove all files from a specific workspace folder.
1900    ///
1901    /// This method removes all indexed files that belong to the given
1902    /// workspace folder URI. This is useful when a workspace folder is
1903    /// removed from the multi-root workspace.
1904    ///
1905    /// # Arguments
1906    ///
1907    /// * `folder_uri` - The workspace folder URI to remove files from
1908    ///
1909    /// # Examples
1910    ///
1911    /// ```rust,ignore
1912    /// use perl_workspace::workspace::workspace_index::WorkspaceIndex;
1913    ///
1914    /// let index = WorkspaceIndex::new();
1915    /// // Index files from multiple folders...
1916    /// index.remove_folder("file:///project1");
1917    /// ```
1918    pub fn remove_folder(&self, folder_uri: &str) {
1919        let mut uris_to_remove = Vec::new();
1920        let files = self.files.read();
1921
1922        // Collect all files that belong to this folder
1923        for file_index in files.values() {
1924            if file_index.folder_uri.as_deref() == Some(folder_uri) {
1925                uris_to_remove.push(file_index.source_uri.clone());
1926            }
1927        }
1928        drop(files);
1929
1930        // Remove each file through the full removal path to keep
1931        // symbol/reference caches and document store in sync.
1932        for uri in uris_to_remove {
1933            self.remove_file(&uri);
1934        }
1935    }
1936
1937    #[cfg(not(target_arch = "wasm32"))]
1938    /// Index a file from a URI string for the Index/Analyze workflow.
1939    ///
1940    /// Accepts either a `file://` URI or a filesystem path. Not available on
1941    /// wasm32 targets (requires filesystem path conversion).
1942    ///
1943    /// # Arguments
1944    ///
1945    /// * `uri` - File URI string or filesystem path.
1946    /// * `text` - Full Perl source text for indexing.
1947    ///
1948    /// # Returns
1949    ///
1950    /// `Ok(())` when indexing succeeds, or an error message otherwise.
1951    ///
1952    /// # Errors
1953    ///
1954    /// Returns an error if the URI is invalid or parsing fails.
1955    ///
1956    /// # Examples
1957    ///
1958    /// ```rust,ignore
1959    /// use perl_parser::workspace_index::WorkspaceIndex;
1960    ///
1961    /// # fn main() -> Result<(), Box<dyn std::error::Error>> {
1962    /// let index = WorkspaceIndex::new();
1963    /// index.index_file_str("file:///example.pl", "sub hello { }")?;
1964    /// # Ok(())
1965    /// # }
1966    /// ```
1967    pub fn index_file_str(&self, uri: &str, text: &str) -> Result<(), String> {
1968        let path = Path::new(uri);
1969        let url = if path.is_absolute() {
1970            url::Url::from_file_path(path)
1971                .map_err(|_| format!("Invalid URI or file path: {}", uri))?
1972        } else {
1973            // Raw absolute Windows paths like C:\foo can parse as a bogus URI
1974            // (`c:` scheme). Prefer URL parsing only for non-path inputs.
1975            url::Url::parse(uri).or_else(|_| {
1976                url::Url::from_file_path(path)
1977                    .map_err(|_| format!("Invalid URI or file path: {}", uri))
1978            })?
1979        };
1980        self.index_file(url, text.to_string())
1981    }
1982
1983    /// Index multiple files in a single batch operation.
1984    ///
1985    /// This is significantly faster than calling `index_file` in a loop for
1986    /// initial workspace scans because it defers the global symbol cache
1987    /// rebuild to a single pass at the end.
1988    ///
1989    /// Phase 1: Parse all files without holding locks.
1990    /// Phase 2: Bulk-insert file indices and rebuild the symbol cache once.
1991    pub fn index_files_batch(&self, files_to_index: Vec<(Url, String)>) -> Vec<String> {
1992        let mut errors = Vec::new();
1993
1994        // Phase 1: Parse all files without locks
1995        let mut parsed: Vec<(String, String, FileIndex)> = Vec::with_capacity(files_to_index.len());
1996        for (uri, text) in &files_to_index {
1997            let uri_str = uri.to_string();
1998
1999            // Content hash for early-exit
2000            let mut hasher = DefaultHasher::new();
2001            text.hash(&mut hasher);
2002            let content_hash = hasher.finish();
2003
2004            let key = DocumentStore::uri_key(&uri_str);
2005
2006            // Check if content unchanged
2007            {
2008                let files = self.files.read();
2009                if let Some(existing) = files.get(&key) {
2010                    if existing.content_hash == content_hash {
2011                        continue;
2012                    }
2013                }
2014            }
2015
2016            // Update document store
2017            if self.document_store.is_open(&uri_str) {
2018                self.document_store.update(&uri_str, 1, text.clone());
2019            } else {
2020                self.document_store.open(uri_str.clone(), 1, text.clone());
2021            }
2022
2023            // Parse
2024            let mut parser = Parser::new(text);
2025            let ast = match parser.parse() {
2026                Ok(ast) => ast,
2027                Err(e) => {
2028                    errors.push(format!("Parse error in {}: {}", uri_str, e));
2029                    continue;
2030                }
2031            };
2032
2033            let mut doc = match self.document_store.get(&uri_str) {
2034                Some(d) => d,
2035                None => {
2036                    errors.push(format!("Document not found: {}", uri_str));
2037                    continue;
2038                }
2039            };
2040
2041            // Determine workspace folder URI from the file URI
2042            let folder_uri = self.determine_folder_uri(&uri_str);
2043
2044            let mut file_index = FileIndex {
2045                source_uri: uri_str.clone(),
2046                content_hash,
2047                folder_uri: folder_uri.clone(),
2048                ..Default::default()
2049            };
2050            let mut visitor = IndexVisitor::new(&mut doc, uri_str.clone(), folder_uri);
2051            visitor.visit(&ast, &mut file_index);
2052
2053            parsed.push((key, uri_str, file_index));
2054        }
2055
2056        // Phase 2: Bulk insert with single cache rebuild
2057        {
2058            let mut files = self.files.write();
2059            let mut symbols = self.symbols.write();
2060            let mut global_refs = self.global_references.write();
2061
2062            // Pre-allocate capacity for the incoming batch to avoid rehashing.
2063            // Each symbol is indexed under both its qualified name and bare name.
2064            files.reserve(parsed.len());
2065            symbols.reserve(parsed.len().saturating_mul(20).saturating_mul(2));
2066
2067            for (key, uri_str, file_index) in parsed {
2068                // Remove stale global references
2069                if let Some(old_index) = files.get(&key) {
2070                    Self::remove_file_global_refs(&mut global_refs, old_index, &uri_str);
2071                }
2072
2073                files.insert(key.clone(), file_index);
2074
2075                // Add global references for this file
2076                if let Some(fi) = files.get(&key) {
2077                    for (name, refs) in &fi.references {
2078                        let entry = global_refs.entry(name.clone()).or_default();
2079                        for reference in refs {
2080                            entry.push(Location {
2081                                uri: reference.uri.clone(),
2082                                range: reference.range,
2083                            });
2084                        }
2085                    }
2086                }
2087            }
2088
2089            // Single rebuild at the end
2090            Self::rebuild_symbol_cache(&files, &mut symbols);
2091        }
2092
2093        errors
2094    }
2095
2096    /// Find all references to a symbol using dual indexing strategy
2097    ///
2098    /// This function searches for both exact matches and bare name matches when
2099    /// the symbol is qualified. For example, when searching for "Utils::process_data":
2100    /// - First searches for exact "Utils::process_data" references
2101    /// - Then searches for bare "process_data" references that might refer to the same function
2102    ///
2103    /// This dual approach handles cases where functions are called both as:
2104    /// - Qualified: `Utils::process_data()`
2105    /// - Unqualified: `process_data()` (when in the same package or imported)
2106    ///
2107    /// # Arguments
2108    ///
2109    /// * `symbol_name` - Symbol name or qualified name to search
2110    ///
2111    /// # Returns
2112    ///
2113    /// All reference locations found for the requested symbol.
2114    ///
2115    /// # Examples
2116    ///
2117    /// ```rust,ignore
2118    /// use perl_parser::workspace_index::WorkspaceIndex;
2119    ///
2120    /// let index = WorkspaceIndex::new();
2121    /// let _refs = index.find_references("Utils::process_data");
2122    /// ```
2123    pub fn find_references(&self, symbol_name: &str) -> Vec<Location> {
2124        let global_refs = self.global_references.read();
2125        let mut seen: HashSet<(String, u32, u32, u32, u32)> = HashSet::new();
2126        let mut locations = Vec::new();
2127
2128        // O(1) lookup for exact symbol name
2129        if let Some(refs) = global_refs.get(symbol_name) {
2130            for loc in refs {
2131                let key = (
2132                    loc.uri.clone(),
2133                    loc.range.start.line,
2134                    loc.range.start.column,
2135                    loc.range.end.line,
2136                    loc.range.end.column,
2137                );
2138                if seen.insert(key) {
2139                    locations.push(Location { uri: loc.uri.clone(), range: loc.range });
2140                }
2141            }
2142        }
2143
2144        // If the symbol is qualified, also collect bare name references
2145        if let Some(idx) = symbol_name.rfind("::") {
2146            let bare_name = &symbol_name[idx + 2..];
2147            if let Some(refs) = global_refs.get(bare_name) {
2148                for loc in refs {
2149                    let key = (
2150                        loc.uri.clone(),
2151                        loc.range.start.line,
2152                        loc.range.start.column,
2153                        loc.range.end.line,
2154                        loc.range.end.column,
2155                    );
2156                    if seen.insert(key) {
2157                        locations.push(Location { uri: loc.uri.clone(), range: loc.range });
2158                    }
2159                }
2160            }
2161        } else {
2162            // If the symbol is bare, also collect qualified references that end
2163            // with the same bare name, e.g. `Pkg::foo` when searching for `foo`.
2164            for (name, refs) in global_refs.iter() {
2165                if !Self::is_qualified_variant_of(name, symbol_name) {
2166                    continue;
2167                }
2168
2169                for loc in refs {
2170                    let key = (
2171                        loc.uri.clone(),
2172                        loc.range.start.line,
2173                        loc.range.start.column,
2174                        loc.range.end.line,
2175                        loc.range.end.column,
2176                    );
2177                    if seen.insert(key) {
2178                        locations.push(Location { uri: loc.uri.clone(), range: loc.range });
2179                    }
2180                }
2181            }
2182        }
2183
2184        Self::sort_locations_deterministically(&mut locations);
2185        locations
2186    }
2187
2188    /// Resolve a symbol and return its definition/reference set for cross-file planning.
2189    ///
2190    /// Returns `None` when no definition can be resolved for `symbol_name`.
2191    pub fn query_symbol_references(
2192        &self,
2193        symbol_name: &str,
2194    ) -> Option<CrossFileReferenceQueryResult> {
2195        let definition = self.find_definition(symbol_name)?;
2196        let symbol = self.find_symbol_by_definition(&definition, symbol_name)?;
2197
2198        let stable_key = symbol.qualified_name.clone().unwrap_or_else(|| {
2199            format!(
2200                "{}@{}:{}:{}",
2201                symbol.name, symbol.uri, symbol.range.start.line, symbol.range.start.column
2202            )
2203        });
2204        let mut references = self.collect_symbol_references(&symbol);
2205        if !references.iter().any(|location| location == &definition) {
2206            references.push(definition.clone());
2207            Self::sort_locations_deterministically(&mut references);
2208        }
2209
2210        Some(CrossFileReferenceQueryResult {
2211            symbol: SymbolIdentity {
2212                stable_key,
2213                name: symbol.name,
2214                qualified_name: symbol.qualified_name,
2215                kind: symbol.kind,
2216            },
2217            definition,
2218            references,
2219        })
2220    }
2221
2222    /// Count non-definition references (usages) of a symbol.
2223    ///
2224    /// Like `find_references` but excludes `ReferenceKind::Definition` entries,
2225    /// returning only actual usage sites. This is used by code lens to show
2226    /// "N references" where N means call sites, not the definition itself.
2227    pub fn count_usages(&self, symbol_name: &str) -> usize {
2228        let files = self.files.read();
2229        let mut seen: HashSet<(String, u32, u32, u32, u32)> = HashSet::new();
2230
2231        for (_uri_key, file_index) in files.iter() {
2232            if let Some(refs) = file_index.references.get(symbol_name) {
2233                for r in refs.iter().filter(|r| r.kind != ReferenceKind::Definition) {
2234                    seen.insert((
2235                        r.uri.clone(),
2236                        r.range.start.line,
2237                        r.range.start.column,
2238                        r.range.end.line,
2239                        r.range.end.column,
2240                    ));
2241                }
2242            }
2243
2244            if let Some(idx) = symbol_name.rfind("::") {
2245                let bare_name = &symbol_name[idx + 2..];
2246                if let Some(refs) = file_index.references.get(bare_name) {
2247                    for r in refs.iter().filter(|r| r.kind != ReferenceKind::Definition) {
2248                        seen.insert((
2249                            r.uri.clone(),
2250                            r.range.start.line,
2251                            r.range.start.column,
2252                            r.range.end.line,
2253                            r.range.end.column,
2254                        ));
2255                    }
2256                }
2257            } else {
2258                for (name, refs) in &file_index.references {
2259                    if !Self::is_qualified_variant_of(name, symbol_name) {
2260                        continue;
2261                    }
2262
2263                    for r in refs.iter().filter(|r| r.kind != ReferenceKind::Definition) {
2264                        seen.insert((
2265                            r.uri.clone(),
2266                            r.range.start.line,
2267                            r.range.start.column,
2268                            r.range.end.line,
2269                            r.range.end.column,
2270                        ));
2271                    }
2272                }
2273            }
2274        }
2275
2276        seen.len()
2277    }
2278
2279    fn is_qualified_variant_of(candidate: &str, bare_symbol: &str) -> bool {
2280        candidate.rsplit_once("::").is_some_and(|(_, candidate_bare)| candidate_bare == bare_symbol)
2281    }
2282
2283    /// Find the definition of a symbol
2284    ///
2285    /// # Arguments
2286    ///
2287    /// * `symbol_name` - Symbol name or qualified name to resolve
2288    ///
2289    /// # Returns
2290    ///
2291    /// The first matching definition location, if found.
2292    ///
2293    /// # Examples
2294    ///
2295    /// ```rust,ignore
2296    /// use perl_parser::workspace_index::WorkspaceIndex;
2297    ///
2298    /// let index = WorkspaceIndex::new();
2299    /// let _def = index.find_definition("MyPackage::example");
2300    /// ```
2301    pub fn find_definition(&self, symbol_name: &str) -> Option<Location> {
2302        if let Some(location) = self.definition_candidates(symbol_name).into_iter().next() {
2303            return Some(location);
2304        }
2305
2306        let files = self.files.read();
2307        let resolved = Self::find_definition_in_files(&files, symbol_name, None);
2308        drop(files);
2309
2310        if let Some((location, _uri)) = resolved {
2311            let mut symbols = self.symbols.write();
2312            symbols.entry(symbol_name.to_string()).or_default().push(DefinitionCandidate {
2313                location: location.clone(),
2314                kind: SymbolKind::Subroutine,
2315            });
2316            if let Some(candidates) = symbols.get_mut(symbol_name) {
2317                candidates.sort_by(|left, right| {
2318                    Self::definition_candidate_sort_key(left)
2319                        .cmp(&Self::definition_candidate_sort_key(right))
2320                });
2321                candidates.dedup();
2322            }
2323            return Some(location);
2324        }
2325
2326        None
2327    }
2328
2329    pub(crate) fn definition_candidates(&self, symbol_name: &str) -> Vec<Location> {
2330        let symbols = self.symbols.read();
2331        symbols
2332            .get(symbol_name)
2333            .map(|candidates| {
2334                candidates.iter().map(|candidate| candidate.location.clone()).collect()
2335            })
2336            .unwrap_or_default()
2337    }
2338
2339    /// Get all symbols in the workspace
2340    ///
2341    /// # Returns
2342    ///
2343    /// A vector containing every symbol currently indexed.
2344    ///
2345    /// # Examples
2346    ///
2347    /// ```rust,ignore
2348    /// use perl_parser::workspace_index::WorkspaceIndex;
2349    ///
2350    /// let index = WorkspaceIndex::new();
2351    /// let _symbols = index.all_symbols();
2352    /// ```
2353    pub fn all_symbols(&self) -> Vec<WorkspaceSymbol> {
2354        let files = self.files.read();
2355        let mut symbols = Vec::new();
2356
2357        for (_uri_key, file_index) in files.iter() {
2358            symbols.extend(file_index.symbols.clone());
2359        }
2360
2361        symbols
2362    }
2363
2364    /// Clear all indexed files and symbols from the workspace.
2365    pub fn clear(&self) {
2366        self.files.write().clear();
2367        self.symbols.write().clear();
2368        self.global_references.write().clear();
2369        self.fact_shards.write().clear();
2370        *self.semantic_reference_index.write() = ReferenceIndex::new();
2371        *self.semantic_import_export_index.write() = ImportExportIndex::new();
2372    }
2373
2374    fn hash_uri_to_file_id(uri: &str) -> FileId {
2375        let mut hasher = DefaultHasher::new();
2376        uri.hash(&mut hasher);
2377        FileId(hasher.finish())
2378    }
2379
2380    fn build_fact_shard(uri: &str, content_hash: u64, file_index: &FileIndex) -> FileFactShard {
2381        let file_id = Self::hash_uri_to_file_id(uri);
2382        let mut anchors = Vec::new();
2383        let mut entities = Vec::new();
2384        for (idx, symbol) in file_index.symbols.iter().enumerate() {
2385            let anchor_id = AnchorId((idx + 1) as u64);
2386            anchors.push(AnchorFact {
2387                id: anchor_id,
2388                file_id,
2389                // WorkspaceSymbol provides line/column coordinates only, not byte
2390                // offsets.  Zero-initialize span_*_byte until a byte-offset source
2391                // is plumbed through the indexing pipeline.
2392                span_start_byte: 0,
2393                span_end_byte: 0,
2394                scope_id: None,
2395                provenance: Provenance::SearchFallback,
2396                confidence: Confidence::Low,
2397            });
2398            entities.push(EntityFact {
2399                id: EntityId((idx + 1) as u64),
2400                kind: EntityKind::Unknown,
2401                canonical_name: symbol
2402                    .qualified_name
2403                    .clone()
2404                    .unwrap_or_else(|| symbol.name.clone()),
2405                anchor_id: Some(anchor_id),
2406                scope_id: None,
2407                provenance: Provenance::SearchFallback,
2408                confidence: Confidence::Low,
2409            });
2410        }
2411        // Hash the per-category fact vectors so consumers can detect staleness
2412        // without re-reading the full shard.
2413        let anchors_hash = {
2414            let mut h = DefaultHasher::new();
2415            anchors.len().hash(&mut h);
2416            for a in &anchors {
2417                a.id.hash(&mut h);
2418                a.span_start_byte.hash(&mut h);
2419                a.span_end_byte.hash(&mut h);
2420            }
2421            h.finish()
2422        };
2423        let entities_hash = {
2424            let mut h = DefaultHasher::new();
2425            entities.len().hash(&mut h);
2426            for e in &entities {
2427                e.id.hash(&mut h);
2428                e.canonical_name.hash(&mut h);
2429            }
2430            h.finish()
2431        };
2432        FileFactShard {
2433            source_uri: uri.to_string(),
2434            file_id,
2435            content_hash,
2436            anchors_hash: Some(anchors_hash),
2437            entities_hash: Some(entities_hash),
2438            occurrences_hash: Some(0),
2439            edges_hash: Some(0),
2440            anchors,
2441            entities,
2442            occurrences: Vec::new(),
2443            edges: Vec::new(),
2444        }
2445    }
2446
2447    /// Build a canonical [`FileFactShard`] from the AST using the semantic
2448    /// fact adapters in `perl-symbol`.
2449    ///
2450    /// This is the canonical population path that produces facts with real
2451    /// byte spans, `ExactAst` provenance, and per-category hashes. It runs
2452    /// alongside the legacy `build_fact_shard` path during the migration
2453    /// period.
2454    fn build_canonical_fact_shard_for_ast(
2455        uri: &str,
2456        content_hash: u64,
2457        ast: &Node,
2458    ) -> FileFactShard {
2459        let file_id = Self::hash_uri_to_file_id(uri);
2460
2461        // Extract declarations and references from the AST.
2462        let decls = extract_symbol_decls(ast, None);
2463        let refs = extract_symbol_refs(ast);
2464
2465        // Run the canonical adapters.
2466        let decl_facts = symbol_decls_to_semantic_facts(&decls, file_id);
2467
2468        // Build an entity lookup map for reference resolution.
2469        let entity_ids_by_name: std::collections::BTreeMap<String, EntityId> =
2470            decl_facts.entities.iter().map(|e| (e.canonical_name.clone(), e.id)).collect();
2471        let ref_facts = symbol_refs_to_semantic_facts(&refs, file_id, &entity_ids_by_name);
2472
2473        // No imports or dynamic boundaries available at this layer yet —
2474        // those will be supplied by perl-semantic-analyzer in later phases.
2475        crate::semantic::facts::build_canonical_fact_shard(
2476            uri,
2477            content_hash,
2478            &decl_facts,
2479            &ref_facts,
2480            &[],
2481            &[],
2482        )
2483    }
2484
2485    /// Replace a [`FileFactShard`] with per-category incremental invalidation.
2486    ///
2487    /// Compares the whole-file `content_hash` first; when unchanged the
2488    /// replacement is skipped entirely.  Otherwise each per-category hash
2489    /// (`anchors_hash`, `entities_hash`, `occurrences_hash`, `edges_hash`)
2490    /// is compared individually.  Only categories whose hash changed trigger
2491    /// removal of old entries and insertion of new ones in the cross-file
2492    /// semantic indexes.
2493    ///
2494    /// **Validates: Requirements 18.1, 18.2, 18.3, 18.4, 18.5**
2495    pub fn replace_fact_shard_incremental(
2496        &self,
2497        key: &str,
2498        new_shard: FileFactShard,
2499    ) -> ShardReplaceResult {
2500        let mut shards = self.fact_shards.write();
2501        let old_shard = shards.get(key);
2502
2503        let replacement = plan_shard_replacement(
2504            old_shard.map(Self::shard_category_hashes),
2505            Self::shard_category_hashes(&new_shard),
2506        );
2507
2508        if replacement.content_unchanged {
2509            return replacement;
2510        }
2511
2512        let source_uri = new_shard.source_uri.clone();
2513
2514        // ── Update cross-file semantic indexes per category ──
2515        // Occurrences and edges are both managed by the ReferenceIndex.
2516        // When either changes we must remove+re-add the file in that index.
2517        if replacement.occurrences_updated || replacement.edges_updated {
2518            let mut ref_idx = self.semantic_reference_index.write();
2519            if old_shard.is_some() {
2520                ref_idx.remove_file(&source_uri);
2521            }
2522            ref_idx.add_file(&new_shard);
2523        }
2524
2525        // Entities feed into the import/export index (export sets are keyed
2526        // by module name derived from entity canonical names).  When entities
2527        // change we refresh the import/export index for this file.
2528        if replacement.entities_updated {
2529            let mut ie_idx = self.semantic_import_export_index.write();
2530            ie_idx.remove_file_imports(&source_uri);
2531            ie_idx.remove_module_exports(&source_uri);
2532            // Re-add is handled by the caller or future wiring; for now we
2533            // ensure stale entries are purged.
2534        }
2535
2536        // Store the new shard (always, since content_hash differs).
2537        shards.insert(key.to_string(), new_shard);
2538
2539        replacement
2540    }
2541
2542    fn shard_category_hashes(shard: &FileFactShard) -> ShardCategoryHashes {
2543        ShardCategoryHashes {
2544            content_hash: shard.content_hash,
2545            anchors_hash: shard.anchors_hash,
2546            entities_hash: shard.entities_hash,
2547            occurrences_hash: shard.occurrences_hash,
2548            edges_hash: shard.edges_hash,
2549        }
2550    }
2551
2552    /// Number of stored file fact shards.
2553    pub fn fact_shard_count(&self) -> usize {
2554        self.fact_shards.read().len()
2555    }
2556
2557    /// Fetch a file fact shard for test/inspection.
2558    pub fn file_fact_shard(&self, uri: &str) -> Option<FileFactShard> {
2559        let key = DocumentStore::uri_key(&Self::normalize_uri(uri));
2560        self.fact_shards.read().get(&key).cloned()
2561    }
2562
2563    /// Return the number of indexed files in the workspace
2564    pub fn file_count(&self) -> usize {
2565        let files = self.files.read();
2566        files.len()
2567    }
2568
2569    /// Return the total number of symbols across all indexed files
2570    pub fn symbol_count(&self) -> usize {
2571        let files = self.files.read();
2572        files.values().map(|file_index| file_index.symbols.len()).sum()
2573    }
2574
2575    /// Get all files in a specific workspace folder
2576    ///
2577    /// # Arguments
2578    ///
2579    /// * `folder_uri` - Workspace folder URI to filter by
2580    ///
2581    /// # Returns
2582    ///
2583    /// A vector of file indices belonging to the specified folder
2584    pub fn files_in_folder(&self, folder_uri: &str) -> Vec<FileIndex> {
2585        let files = self.files.read();
2586        files.values().filter(|f| f.folder_uri.as_deref() == Some(folder_uri)).cloned().collect()
2587    }
2588
2589    /// Get all symbols in a specific workspace folder
2590    ///
2591    /// # Arguments
2592    ///
2593    /// * `folder_uri` - Workspace folder URI to filter by
2594    ///
2595    /// # Returns
2596    ///
2597    /// A vector of symbols belonging to the specified folder
2598    pub fn symbols_in_folder(&self, folder_uri: &str) -> Vec<WorkspaceSymbol> {
2599        let files = self.files.read();
2600        files
2601            .values()
2602            .filter(|f| f.folder_uri.as_deref() == Some(folder_uri))
2603            .flat_map(|f| f.symbols.iter().cloned())
2604            .collect()
2605    }
2606
2607    /// Capture a point-in-time memory estimate of the index.
2608    ///
2609    /// Acquires read locks on all index components and walks their contents
2610    /// to estimate heap usage. Intended for offline profiling; do not call
2611    /// on the LSP hot path.
2612    ///
2613    /// Only available when the `memory-profiling` feature is enabled.
2614    #[cfg(feature = "memory-profiling")]
2615    pub fn memory_snapshot(&self) -> crate::workspace::memory::MemorySnapshot {
2616        use std::mem::size_of;
2617
2618        let files_guard = self.files.read();
2619        let symbols_guard = self.symbols.read();
2620        let global_refs_guard = self.global_references.read();
2621
2622        // --- files map ---
2623        let mut files_bytes: usize = 0;
2624        let mut total_symbol_count: usize = 0;
2625        for (uri_key, fi) in files_guard.iter() {
2626            // key string
2627            files_bytes += uri_key.len();
2628            // per-symbol entries
2629            for sym in &fi.symbols {
2630                files_bytes += sym.name.len()
2631                    + sym.uri.len()
2632                    + sym.qualified_name.as_deref().map_or(0, str::len)
2633                    + sym.documentation.as_deref().map_or(0, str::len)
2634                    + sym.container_name.as_deref().map_or(0, str::len)
2635                    // stack portion: kind + range + has_body + option discriminants
2636                    + size_of::<WorkspaceSymbol>();
2637            }
2638            total_symbol_count += fi.symbols.len();
2639            // per-reference entries
2640            for (ref_name, refs) in &fi.references {
2641                files_bytes += ref_name.len();
2642                for r in refs {
2643                    files_bytes += r.uri.len() + size_of::<SymbolReference>();
2644                }
2645            }
2646            // dependencies
2647            for dep in &fi.dependencies {
2648                files_bytes += dep.len();
2649            }
2650            // content hash (u64) + vec/hashset capacity overhead (rough)
2651            files_bytes += size_of::<u64>();
2652        }
2653
2654        // --- global symbols map ---
2655        let mut symbols_bytes: usize = 0;
2656        for (qname, candidates) in symbols_guard.iter() {
2657            symbols_bytes += qname.len();
2658            for candidate in candidates {
2659                symbols_bytes += candidate.location.uri.len() + size_of::<Location>();
2660            }
2661        }
2662
2663        // --- global references map ---
2664        let mut global_refs_bytes: usize = 0;
2665        for (sym_name, locs) in global_refs_guard.iter() {
2666            global_refs_bytes += sym_name.len();
2667            for loc in locs {
2668                global_refs_bytes += loc.uri.len() + size_of::<Location>();
2669            }
2670        }
2671
2672        // --- document store ---
2673        let document_store_bytes = self.document_store.total_text_bytes();
2674
2675        crate::workspace::memory::MemorySnapshot {
2676            file_count: files_guard.len(),
2677            symbol_count: total_symbol_count,
2678            files_bytes,
2679            symbols_bytes,
2680            global_refs_bytes,
2681            document_store_bytes,
2682        }
2683    }
2684
2685    /// Check if the workspace index has symbols (soft readiness check)
2686    ///
2687    /// Returns true if the index contains any symbols, indicating that
2688    /// at least some files have been indexed and the workspace is ready
2689    /// for symbol-based operations like completion.
2690    ///
2691    /// # Returns
2692    ///
2693    /// `true` if any symbols are indexed, otherwise `false`.
2694    ///
2695    /// # Examples
2696    ///
2697    /// ```rust,ignore
2698    /// use perl_parser::workspace_index::WorkspaceIndex;
2699    ///
2700    /// let index = WorkspaceIndex::new();
2701    /// assert!(!index.has_symbols());
2702    /// ```
2703    pub fn has_symbols(&self) -> bool {
2704        let files = self.files.read();
2705        files.values().any(|file_index| !file_index.symbols.is_empty())
2706    }
2707
2708    /// Search for symbols by query
2709    ///
2710    /// # Arguments
2711    ///
2712    /// * `query` - Substring to match against symbol names
2713    ///
2714    /// # Returns
2715    ///
2716    /// Symbols whose names or qualified names contain the query string.
2717    ///
2718    /// # Examples
2719    ///
2720    /// ```rust,ignore
2721    /// use perl_parser::workspace_index::WorkspaceIndex;
2722    ///
2723    /// let index = WorkspaceIndex::new();
2724    /// let _results = index.search_symbols("example");
2725    /// ```
2726    pub fn search_symbols(&self, query: &str) -> Vec<WorkspaceSymbol> {
2727        let query_lower = query.to_lowercase();
2728        let files = self.files.read();
2729        let mut results = Vec::new();
2730        for file_index in files.values() {
2731            for symbol in &file_index.symbols {
2732                if symbol.name.to_lowercase().contains(&query_lower)
2733                    || symbol
2734                        .qualified_name
2735                        .as_ref()
2736                        .map(|qn| qn.to_lowercase().contains(&query_lower))
2737                        .unwrap_or(false)
2738                {
2739                    results.push(symbol.clone());
2740                }
2741            }
2742        }
2743        results
2744    }
2745
2746    /// Find symbols by query (alias for search_symbols for compatibility)
2747    ///
2748    /// # Arguments
2749    ///
2750    /// * `query` - Substring to match against symbol names
2751    ///
2752    /// # Returns
2753    ///
2754    /// Symbols whose names or qualified names contain the query string.
2755    ///
2756    /// # Examples
2757    ///
2758    /// ```rust,ignore
2759    /// use perl_parser::workspace_index::WorkspaceIndex;
2760    ///
2761    /// let index = WorkspaceIndex::new();
2762    /// let _results = index.find_symbols("example");
2763    /// ```
2764    pub fn find_symbols(&self, query: &str) -> Vec<WorkspaceSymbol> {
2765        self.search_symbols(query)
2766    }
2767
2768    /// Rank symbols by folder proximity to a document
2769    ///
2770    /// Returns symbols sorted by: same folder > other folders
2771    ///
2772    /// # Arguments
2773    ///
2774    /// * `symbols` - Symbols to rank
2775    /// * `doc_uri` - Document URI to determine folder context
2776    ///
2777    /// # Returns
2778    ///
2779    /// Symbols ranked by folder proximity (same folder first)
2780    ///
2781    /// # Examples
2782    ///
2783    /// ```rust,ignore
2784    /// use perl_parser::workspace_index::WorkspaceIndex;
2785    ///
2786    /// let index = WorkspaceIndex::new();
2787    /// let symbols = index.search_symbols("example");
2788    /// let ranked = index.rank_symbols_by_folder(symbols, "file:///project1/src/main.pl");
2789    /// ```
2790    pub fn rank_symbols_by_folder(
2791        &self,
2792        symbols: Vec<WorkspaceSymbol>,
2793        doc_uri: &str,
2794    ) -> Vec<WorkspaceSymbol> {
2795        let doc_folder = self.determine_folder_uri(doc_uri);
2796
2797        let mut ranked: Vec<(WorkspaceSymbol, i32)> = symbols
2798            .into_iter()
2799            .map(|symbol| {
2800                let rank = if let Some(ref doc_folder_uri) = doc_folder {
2801                    if symbol.workspace_folder_uri.as_ref() == Some(doc_folder_uri) {
2802                        0 // Same folder - highest priority
2803                    } else {
2804                        1 // Different folder - lower priority
2805                    }
2806                } else {
2807                    1 // No document context - treat as different folder
2808                };
2809                (symbol, rank)
2810            })
2811            .collect();
2812
2813        // Sort by rank (lower is better), then by name for stability
2814        ranked.sort_by(|a, b| a.1.cmp(&b.1).then_with(|| a.0.name.cmp(&b.0.name)));
2815
2816        ranked.into_iter().map(|(symbol, _)| symbol).collect()
2817    }
2818
2819    /// Search for symbols with folder-aware ranking
2820    ///
2821    /// Combines symbol search with folder proximity ranking
2822    ///
2823    /// # Arguments
2824    ///
2825    /// * `name` - Symbol name to search for
2826    /// * `doc_uri` - Document URI for ranking context
2827    ///
2828    /// # Returns
2829    ///
2830    /// Ranked symbols with same-folder results first
2831    ///
2832    /// # Examples
2833    ///
2834    /// ```rust,ignore
2835    /// use perl_parser::workspace_index::WorkspaceIndex;
2836    ///
2837    /// let index = WorkspaceIndex::new();
2838    /// let ranked = index.search_symbols_ranked("example", "file:///project1/src/main.pl");
2839    /// ```
2840    pub fn search_symbols_ranked(&self, name: &str, doc_uri: &str) -> Vec<WorkspaceSymbol> {
2841        let symbols = self.search_symbols(name);
2842        self.rank_symbols_by_folder(symbols, doc_uri)
2843    }
2844
2845    /// Determine if two symbols are in the same package
2846    ///
2847    /// # Arguments
2848    ///
2849    /// * `symbol_a` - First symbol
2850    /// * `symbol_b` - Second symbol
2851    ///
2852    /// # Returns
2853    ///
2854    /// `true` if both symbols are in the same package
2855    #[allow(dead_code)]
2856    pub fn same_package(&self, symbol_a: &WorkspaceSymbol, symbol_b: &WorkspaceSymbol) -> bool {
2857        let package_a = self.extract_package_name(&symbol_a.name);
2858        let package_b = self.extract_package_name(&symbol_b.name);
2859        package_a == package_b
2860    }
2861
2862    /// Determine if two package names are the same (helper for testing)
2863    ///
2864    /// # Arguments
2865    ///
2866    /// * `package_a` - First package name
2867    /// * `package_b` - Second package name
2868    ///
2869    /// # Returns
2870    ///
2871    /// `true` if both package names are equal
2872    #[allow(dead_code)]
2873    pub fn same_package_by_container(&self, package_a: &str, package_b: &str) -> bool {
2874        package_a == package_b
2875    }
2876
2877    /// Extract package name from a symbol name
2878    ///
2879    /// # Arguments
2880    ///
2881    /// * `symbol_name` - Symbol name (e.g., "Foo::Bar::baz" or "baz")
2882    ///
2883    /// # Returns
2884    ///
2885    /// Package name (e.g., "Foo::Bar") or None for main package
2886    #[allow(dead_code)]
2887    pub fn extract_package_name(&self, symbol_name: &str) -> Option<String> {
2888        let parts: Vec<&str> = symbol_name.split("::").collect();
2889        if parts.len() > 1 { Some(parts[..parts.len() - 1].join("::")) } else { None }
2890    }
2891
2892    /// Get symbols in a specific file
2893    ///
2894    /// # Arguments
2895    ///
2896    /// * `uri` - File URI to inspect
2897    ///
2898    /// # Returns
2899    ///
2900    /// All symbols indexed for the requested file.
2901    ///
2902    /// # Examples
2903    ///
2904    /// ```rust,ignore
2905    /// use perl_parser::workspace_index::WorkspaceIndex;
2906    ///
2907    /// let index = WorkspaceIndex::new();
2908    /// let _symbols = index.file_symbols("file:///example.pl");
2909    /// ```
2910    pub fn file_symbols(&self, uri: &str) -> Vec<WorkspaceSymbol> {
2911        let normalized_uri = Self::normalize_uri(uri);
2912        let key = DocumentStore::uri_key(&normalized_uri);
2913        let files = self.files.read();
2914
2915        files.get(&key).map(|fi| fi.symbols.clone()).unwrap_or_default()
2916    }
2917
2918    /// Get dependencies of a file
2919    ///
2920    /// # Arguments
2921    ///
2922    /// * `uri` - File URI to inspect
2923    ///
2924    /// # Returns
2925    ///
2926    /// A set of module names imported by the file.
2927    ///
2928    /// # Examples
2929    ///
2930    /// ```rust,ignore
2931    /// use perl_parser::workspace_index::WorkspaceIndex;
2932    ///
2933    /// let index = WorkspaceIndex::new();
2934    /// let _deps = index.file_dependencies("file:///example.pl");
2935    /// ```
2936    pub fn file_dependencies(&self, uri: &str) -> HashSet<String> {
2937        let normalized_uri = Self::normalize_uri(uri);
2938        let key = DocumentStore::uri_key(&normalized_uri);
2939        let files = self.files.read();
2940
2941        files.get(&key).map(|fi| fi.dependencies.clone()).unwrap_or_default()
2942    }
2943
2944    /// Find all files that depend on a module
2945    ///
2946    /// # Arguments
2947    ///
2948    /// * `module_name` - Module name to search for in file dependencies
2949    ///
2950    /// # Returns
2951    ///
2952    /// A list of file URIs that import or depend on the module.
2953    ///
2954    /// # Examples
2955    ///
2956    /// ```rust,ignore
2957    /// use perl_parser::workspace_index::WorkspaceIndex;
2958    ///
2959    /// let index = WorkspaceIndex::new();
2960    /// let _files = index.find_dependents("My::Module");
2961    /// ```
2962    pub fn find_dependents(&self, module_name: &str) -> Vec<String> {
2963        let canonical = canonicalize_perl_module_name(module_name);
2964        let legacy = legacy_perl_module_name(&canonical);
2965        let files = self.files.read();
2966        let mut dependents = Vec::new();
2967
2968        for (uri_key, file_index) in files.iter() {
2969            if file_index.dependencies.contains(module_name)
2970                || file_index.dependencies.contains(&canonical)
2971                || file_index.dependencies.contains(&legacy)
2972            {
2973                dependents.push(uri_key.clone());
2974            }
2975        }
2976
2977        dependents
2978    }
2979
2980    /// Get the document store
2981    ///
2982    /// # Returns
2983    ///
2984    /// A reference to the in-memory document store.
2985    ///
2986    /// # Examples
2987    ///
2988    /// ```rust,ignore
2989    /// use perl_parser::workspace_index::WorkspaceIndex;
2990    ///
2991    /// let index = WorkspaceIndex::new();
2992    /// let _store = index.document_store();
2993    /// ```
2994    pub fn document_store(&self) -> &DocumentStore {
2995        &self.document_store
2996    }
2997
2998    /// Find unused symbols in the workspace
2999    ///
3000    /// # Returns
3001    ///
3002    /// Symbols that have no non-definition references in the workspace.
3003    ///
3004    /// # Examples
3005    ///
3006    /// ```rust,ignore
3007    /// use perl_parser::workspace_index::WorkspaceIndex;
3008    ///
3009    /// let index = WorkspaceIndex::new();
3010    /// let _unused = index.find_unused_symbols();
3011    /// ```
3012    pub fn find_unused_symbols(&self) -> Vec<WorkspaceSymbol> {
3013        let files = self.files.read();
3014        let mut unused = Vec::new();
3015
3016        // Collect all defined symbols
3017        for (_uri_key, file_index) in files.iter() {
3018            for symbol in &file_index.symbols {
3019                // Check if this symbol has any references beyond its definition
3020                let has_usage = files.values().any(|fi| {
3021                    if let Some(refs) = fi.references.get(&symbol.name) {
3022                        refs.iter().any(|r| r.kind != ReferenceKind::Definition)
3023                    } else {
3024                        false
3025                    }
3026                });
3027
3028                if !has_usage {
3029                    unused.push(symbol.clone());
3030                }
3031            }
3032        }
3033
3034        unused
3035    }
3036
3037    /// Get all symbols that belong to a specific package
3038    ///
3039    /// # Arguments
3040    ///
3041    /// * `package_name` - Package name to match (e.g., `My::Package`)
3042    ///
3043    /// # Returns
3044    ///
3045    /// Symbols defined within the requested package.
3046    ///
3047    /// # Examples
3048    ///
3049    /// ```rust,ignore
3050    /// use perl_parser::workspace_index::WorkspaceIndex;
3051    ///
3052    /// let index = WorkspaceIndex::new();
3053    /// let _members = index.get_package_members("My::Package");
3054    /// ```
3055    pub fn get_package_members(&self, package_name: &str) -> Vec<WorkspaceSymbol> {
3056        let files = self.files.read();
3057        let mut members = Vec::new();
3058
3059        for (_uri_key, file_index) in files.iter() {
3060            for symbol in &file_index.symbols {
3061                // Check if symbol belongs to this package
3062                if let Some(ref container) = symbol.container_name {
3063                    if container == package_name {
3064                        members.push(symbol.clone());
3065                    }
3066                }
3067                // Also check qualified names
3068                if let Some(ref qname) = symbol.qualified_name {
3069                    if qname.starts_with(&format!("{}::", package_name)) {
3070                        // Avoid duplicates - only add if not already in via container_name
3071                        if symbol.container_name.as_deref() != Some(package_name) {
3072                            members.push(symbol.clone());
3073                        }
3074                    }
3075                }
3076            }
3077        }
3078
3079        members
3080    }
3081
3082    /// Find the definition location for a symbol key during Index/Navigate stages.
3083    ///
3084    /// # Arguments
3085    ///
3086    /// * `key` - Normalized symbol key to resolve.
3087    ///
3088    /// # Returns
3089    ///
3090    /// The definition location for the symbol, if found.
3091    ///
3092    /// # Examples
3093    ///
3094    /// ```rust,ignore
3095    /// use perl_parser::workspace_index::{SymKind, SymbolKey, WorkspaceIndex};
3096    /// use std::sync::Arc;
3097    ///
3098    /// let index = WorkspaceIndex::new();
3099    /// let key = SymbolKey { pkg: Arc::from("My::Package"), name: Arc::from("example"), sigil: None, kind: SymKind::Sub };
3100    /// let _def = index.find_def(&key);
3101    /// ```
3102    pub fn find_def(&self, key: &SymbolKey) -> Option<Location> {
3103        if let Some(sigil) = key.sigil {
3104            // It's a variable
3105            let var_name = format!("{}{}", sigil, key.name);
3106            self.find_definition(&var_name)
3107        } else if key.kind == SymKind::Pack {
3108            // It's a package lookup (e.g., from `use Module::Name`)
3109            // Search for the package declaration by name
3110            self.find_definition(key.pkg.as_ref())
3111                .or_else(|| self.find_definition(key.name.as_ref()))
3112        } else {
3113            // It's a subroutine or package
3114            let qualified_name = format!("{}::{}", key.pkg, key.name);
3115            self.find_definition(&qualified_name)
3116        }
3117    }
3118
3119    /// Find reference locations for a symbol key using dual indexing.
3120    ///
3121    /// Searches both qualified and bare names to support Navigate/Analyze workflows.
3122    ///
3123    /// # Arguments
3124    ///
3125    /// * `key` - Normalized symbol key to search for.
3126    ///
3127    /// # Returns
3128    ///
3129    /// All reference locations for the symbol, excluding the definition.
3130    ///
3131    /// # Examples
3132    ///
3133    /// ```rust,ignore
3134    /// use perl_parser::workspace_index::{SymKind, SymbolKey, WorkspaceIndex};
3135    /// use std::sync::Arc;
3136    ///
3137    /// let index = WorkspaceIndex::new();
3138    /// let key = SymbolKey { pkg: Arc::from("main"), name: Arc::from("example"), sigil: None, kind: SymKind::Sub };
3139    /// let _refs = index.find_refs(&key);
3140    /// ```
3141    pub fn find_refs(&self, key: &SymbolKey) -> Vec<Location> {
3142        let files_locked = self.files.read();
3143        let mut all_refs = if let Some(sigil) = key.sigil {
3144            // It's a variable - search through all files for this variable name
3145            let var_name = format!("{}{}", sigil, key.name);
3146            let mut refs = Vec::new();
3147            for (_uri_key, file_index) in files_locked.iter() {
3148                if let Some(var_refs) = file_index.references.get(&var_name) {
3149                    for reference in var_refs {
3150                        refs.push(Location { uri: reference.uri.clone(), range: reference.range });
3151                    }
3152                }
3153            }
3154            refs
3155        } else {
3156            // It's a subroutine or package
3157            if key.pkg.as_ref() == "main" {
3158                // For main package, we search for both "main::foo" and bare "foo"
3159                let mut refs = self.find_references(&format!("main::{}", key.name));
3160                // Add bare name references
3161                for (_uri_key, file_index) in files_locked.iter() {
3162                    if let Some(bare_refs) = file_index.references.get(key.name.as_ref()) {
3163                        for reference in bare_refs {
3164                            refs.push(Location {
3165                                uri: reference.uri.clone(),
3166                                range: reference.range,
3167                            });
3168                        }
3169                    }
3170                }
3171                refs
3172            } else {
3173                let qualified_name = format!("{}::{}", key.pkg, key.name);
3174                self.find_references(&qualified_name)
3175            }
3176        };
3177        drop(files_locked);
3178
3179        // Remove the definition; the caller will include it separately if needed
3180        if let Some(def) = self.find_def(key) {
3181            all_refs.retain(|loc| !(loc.uri == def.uri && loc.range == def.range));
3182        }
3183
3184        // Deduplicate by URI and range
3185        let mut seen = HashSet::new();
3186        all_refs.retain(|loc| {
3187            seen.insert((
3188                loc.uri.clone(),
3189                loc.range.start.line,
3190                loc.range.start.column,
3191                loc.range.end.line,
3192                loc.range.end.column,
3193            ))
3194        });
3195
3196        all_refs
3197    }
3198}
3199
3200/// AST visitor for extracting symbols and references
3201struct IndexVisitor {
3202    document: Document,
3203    uri: String,
3204    current_package: Option<String>,
3205    workspace_folder_uri: Option<String>,
3206}
3207
3208fn is_interpolated_var_start(byte: u8) -> bool {
3209    byte.is_ascii_alphabetic() || byte == b'_'
3210}
3211
3212fn is_interpolated_var_continue(byte: u8) -> bool {
3213    byte.is_ascii_alphanumeric() || byte == b'_' || byte == b':'
3214}
3215
3216fn has_escaped_interpolation_marker(bytes: &[u8], index: usize) -> bool {
3217    if index == 0 {
3218        return false;
3219    }
3220
3221    let mut backslashes = 0usize;
3222    let mut cursor = index;
3223    while cursor > 0 && bytes[cursor - 1] == b'\\' {
3224        backslashes += 1;
3225        cursor -= 1;
3226    }
3227
3228    backslashes % 2 == 1
3229}
3230
3231fn strip_matching_quote_delimiters(raw_content: &str) -> &str {
3232    if raw_content.len() < 2 {
3233        return raw_content;
3234    }
3235
3236    let bytes = raw_content.as_bytes();
3237    match (bytes.first(), bytes.last()) {
3238        (Some(b'"'), Some(b'"')) | (Some(b'\''), Some(b'\'')) => {
3239            &raw_content[1..raw_content.len() - 1]
3240        }
3241        _ => raw_content,
3242    }
3243}
3244
3245impl IndexVisitor {
3246    fn new(document: &mut Document, uri: String, workspace_folder_uri: Option<String>) -> Self {
3247        Self {
3248            document: document.clone(),
3249            uri,
3250            current_package: Some("main".to_string()),
3251            workspace_folder_uri,
3252        }
3253    }
3254
3255    fn visit(&mut self, node: &Node, file_index: &mut FileIndex) {
3256        self.project_symbol_declarations(node, file_index);
3257        self.visit_node(node, file_index);
3258    }
3259
3260    fn project_symbol_declarations(&self, node: &Node, file_index: &mut FileIndex) {
3261        for decl in extract_symbol_decls(node, self.current_package.as_deref()) {
3262            let (start, end) = match decl.kind {
3263                SymbolKind::Variable(_) => match decl.anchor_span {
3264                    Some(span) => span,
3265                    None => decl.full_span,
3266                },
3267                _ => decl.full_span,
3268            };
3269            let ((start_line, start_col), (end_line, end_col)) =
3270                self.document.line_index.range(start, end);
3271            let range = Range {
3272                start: Position { byte: start, line: start_line, column: start_col },
3273                end: Position { byte: end, line: end_line, column: end_col },
3274            };
3275
3276            let symbol_name = symbol_decl_name(&decl.kind, &decl.name);
3277
3278            // Suppress qualified_name for lexically-scoped variables (my, state): they
3279            // are not package-visible and must not be found by a qualified lookup such
3280            // as `Foo::x`.  `our` and `local` variables keep the qualified name because
3281            // they participate in the package namespace.
3282            let qualified_name = match &decl.declarator {
3283                Some(d) if d == "my" || d == "state" => None,
3284                _ => (!decl.qualified_name.is_empty()).then_some(decl.qualified_name),
3285            };
3286
3287            // Top-level package declarations have no containing package; suppress the
3288            // spurious "main" container that comes from the walker's initial context.
3289            let container_name = match decl.kind {
3290                SymbolKind::Package => None,
3291                _ => decl.container,
3292            };
3293
3294            file_index.symbols.push(WorkspaceSymbol {
3295                name: symbol_name.clone(),
3296                kind: decl.kind,
3297                uri: self.uri.clone(),
3298                range,
3299                qualified_name,
3300                documentation: None,
3301                container_name,
3302                has_body: true,
3303                workspace_folder_uri: self.workspace_folder_uri.clone(),
3304            });
3305
3306            file_index.references.entry(symbol_name).or_default().push(SymbolReference {
3307                uri: self.uri.clone(),
3308                range,
3309                kind: ReferenceKind::Definition,
3310            });
3311        }
3312    }
3313
3314    fn record_interpolated_variable_references(
3315        &self,
3316        raw_content: &str,
3317        range: Range,
3318        file_index: &mut FileIndex,
3319    ) {
3320        let content = strip_matching_quote_delimiters(raw_content);
3321        let bytes = content.as_bytes();
3322        let mut index = 0;
3323
3324        while index < bytes.len() {
3325            if has_escaped_interpolation_marker(bytes, index) {
3326                index += 1;
3327                continue;
3328            }
3329
3330            let sigil = match bytes[index] {
3331                b'$' => "$",
3332                b'@' => "@",
3333                _ => {
3334                    index += 1;
3335                    continue;
3336                }
3337            };
3338
3339            if index + 1 >= bytes.len() {
3340                break;
3341            }
3342
3343            let (start, needs_closing_brace) =
3344                if bytes[index + 1] == b'{' { (index + 2, true) } else { (index + 1, false) };
3345
3346            if start >= bytes.len() || !is_interpolated_var_start(bytes[start]) {
3347                index += 1;
3348                continue;
3349            }
3350
3351            let mut end = start + 1;
3352            while end < bytes.len() && is_interpolated_var_continue(bytes[end]) {
3353                end += 1;
3354            }
3355
3356            if needs_closing_brace && (end >= bytes.len() || bytes[end] != b'}') {
3357                index += 1;
3358                continue;
3359            }
3360
3361            if let Some(name) = content.get(start..end) {
3362                let var_name = format!("{sigil}{name}");
3363                file_index.references.entry(var_name).or_default().push(SymbolReference {
3364                    uri: self.uri.clone(),
3365                    range,
3366                    kind: ReferenceKind::Read,
3367                });
3368            }
3369
3370            index = if needs_closing_brace { end + 1 } else { end };
3371        }
3372    }
3373
3374    fn visit_node(&mut self, node: &Node, file_index: &mut FileIndex) {
3375        match &node.kind {
3376            NodeKind::Package { name, .. } => {
3377                let package_name = name.clone();
3378
3379                // Update the current package (replaces the previous one, not a stack)
3380                self.current_package = Some(package_name.clone());
3381            }
3382
3383            NodeKind::Subroutine { body, .. } => {
3384                // Visit body
3385                self.visit_node(body, file_index);
3386            }
3387
3388            NodeKind::VariableDeclaration { initializer, .. } => {
3389                // Visit initializer
3390                if let Some(init) = initializer {
3391                    self.visit_node(init, file_index);
3392                }
3393            }
3394
3395            NodeKind::VariableListDeclaration { initializer, .. } => {
3396                // Visit the initializer
3397                if let Some(init) = initializer {
3398                    self.visit_node(init, file_index);
3399                }
3400            }
3401
3402            NodeKind::Variable { sigil, name } => {
3403                let var_name = format!("{}{}", sigil, name);
3404
3405                // Track as usage (could be read or write based on context)
3406                file_index.references.entry(var_name).or_default().push(SymbolReference {
3407                    uri: self.uri.clone(),
3408                    range: self.node_to_range(node),
3409                    kind: ReferenceKind::Read, // Default to read, would need context for write
3410                });
3411            }
3412
3413            NodeKind::FunctionCall { name, args, .. } => {
3414                let func_name = name.clone();
3415                let location = self.node_to_range(node);
3416
3417                // Determine package and bare name
3418                let (pkg, bare_name) = if let Some(idx) = func_name.rfind("::") {
3419                    (&func_name[..idx], &func_name[idx + 2..])
3420                } else {
3421                    (self.current_package.as_deref().unwrap_or("main"), func_name.as_str())
3422                };
3423
3424                let qualified = format!("{}::{}", pkg, bare_name);
3425
3426                // Track as usage for both qualified and bare forms
3427                // This dual indexing allows finding references whether the function is called
3428                // as `process_data()` or `Utils::process_data()`
3429                file_index.references.entry(bare_name.to_string()).or_default().push(
3430                    SymbolReference {
3431                        uri: self.uri.clone(),
3432                        range: location,
3433                        kind: ReferenceKind::Usage,
3434                    },
3435                );
3436                file_index.references.entry(qualified).or_default().push(SymbolReference {
3437                    uri: self.uri.clone(),
3438                    range: location,
3439                    kind: ReferenceKind::Usage,
3440                });
3441
3442                if name == "extends" || name == "with" {
3443                    for module_name in extract_module_names_from_call_args(args) {
3444                        file_index
3445                            .dependencies
3446                            .insert(normalize_dependency_module_name(&module_name));
3447                    }
3448                } else if name == "require" {
3449                    if let Some(module_name) = extract_module_name_from_require_args(args) {
3450                        file_index
3451                            .dependencies
3452                            .insert(normalize_dependency_module_name(&module_name));
3453                    }
3454                }
3455
3456                // Visit arguments
3457                for arg in args {
3458                    self.visit_node(arg, file_index);
3459                }
3460            }
3461
3462            NodeKind::Use { module, args, .. } => {
3463                let module_name = normalize_dependency_module_name(module);
3464                file_index.dependencies.insert(module_name.clone());
3465
3466                // Also track actual parent/base class names for dependency discovery.
3467                // `use parent 'Foo::Bar'` stores module="parent" and args=["'Foo::Bar'"],
3468                // so find_dependents("Foo::Bar") would miss files with only use parent.
3469                if module == "parent" || module == "base" {
3470                    for name in extract_module_names_from_use_args(args) {
3471                        file_index.dependencies.insert(normalize_dependency_module_name(&name));
3472                    }
3473                }
3474
3475                // Track as import
3476                file_index.references.entry(module_name).or_default().push(SymbolReference {
3477                    uri: self.uri.clone(),
3478                    range: self.node_to_range(node),
3479                    kind: ReferenceKind::Import,
3480                });
3481            }
3482
3483            // Handle assignment to detect writes
3484            NodeKind::Assignment { lhs, rhs, op } => {
3485                // For compound assignments (+=, -=, .=, etc.), the LHS is both read and written
3486                let is_compound = op != "=";
3487
3488                if let NodeKind::Variable { sigil, name } = &lhs.kind {
3489                    let var_name = format!("{}{}", sigil, name);
3490
3491                    // For compound assignments, it's a read first
3492                    if is_compound {
3493                        file_index.references.entry(var_name.clone()).or_default().push(
3494                            SymbolReference {
3495                                uri: self.uri.clone(),
3496                                range: self.node_to_range(lhs),
3497                                kind: ReferenceKind::Read,
3498                            },
3499                        );
3500                    }
3501
3502                    // Then it's always a write
3503                    file_index.references.entry(var_name).or_default().push(SymbolReference {
3504                        uri: self.uri.clone(),
3505                        range: self.node_to_range(lhs),
3506                        kind: ReferenceKind::Write,
3507                    });
3508                }
3509
3510                // Right side could have reads
3511                self.visit_node(rhs, file_index);
3512            }
3513
3514            // Recursively visit child nodes
3515            NodeKind::Block { statements } => {
3516                for stmt in statements {
3517                    self.visit_node(stmt, file_index);
3518                }
3519            }
3520
3521            NodeKind::If { condition, then_branch, elsif_branches, else_branch } => {
3522                self.visit_node(condition, file_index);
3523                self.visit_node(then_branch, file_index);
3524                for (cond, branch) in elsif_branches {
3525                    self.visit_node(cond, file_index);
3526                    self.visit_node(branch, file_index);
3527                }
3528                if let Some(else_br) = else_branch {
3529                    self.visit_node(else_br, file_index);
3530                }
3531            }
3532
3533            NodeKind::While { condition, body, continue_block } => {
3534                self.visit_node(condition, file_index);
3535                self.visit_node(body, file_index);
3536                if let Some(cont) = continue_block {
3537                    self.visit_node(cont, file_index);
3538                }
3539            }
3540
3541            NodeKind::For { init, condition, update, body, continue_block } => {
3542                if let Some(i) = init {
3543                    self.visit_node(i, file_index);
3544                }
3545                if let Some(c) = condition {
3546                    self.visit_node(c, file_index);
3547                }
3548                if let Some(u) = update {
3549                    self.visit_node(u, file_index);
3550                }
3551                self.visit_node(body, file_index);
3552                if let Some(cont) = continue_block {
3553                    self.visit_node(cont, file_index);
3554                }
3555            }
3556
3557            NodeKind::Foreach { variable, list, body, continue_block } => {
3558                // Iterator is a write context
3559                if let Some(cb) = continue_block {
3560                    self.visit_node(cb, file_index);
3561                }
3562                if let NodeKind::Variable { sigil, name } = &variable.kind {
3563                    let var_name = format!("{}{}", sigil, name);
3564                    file_index.references.entry(var_name).or_default().push(SymbolReference {
3565                        uri: self.uri.clone(),
3566                        range: self.node_to_range(variable),
3567                        kind: ReferenceKind::Write,
3568                    });
3569                }
3570                self.visit_node(variable, file_index);
3571                self.visit_node(list, file_index);
3572                self.visit_node(body, file_index);
3573            }
3574
3575            NodeKind::MethodCall { object, method, args } => {
3576                // Check if this is a static method call (Package->method)
3577                let qualified_method = if let NodeKind::Identifier { name } = &object.kind {
3578                    // Static method call: Package->method
3579                    Some(format!("{}::{}", name, method))
3580                } else {
3581                    // Instance method call: $obj->method
3582                    None
3583                };
3584
3585                // Object is a read context
3586                self.visit_node(object, file_index);
3587
3588                // Track method call under BOTH the qualified form (for static calls
3589                // like `Pkg->method`) AND the bare method name. This mirrors the
3590                // FunctionCall dual-key storage above (PR #122 dual-indexing pattern)
3591                // so that bare-name lookups (e.g. `find_unused_symbols`,
3592                // `count_usages("method")`) consistently find static method call sites.
3593                // See #6799 for the original asymmetric-storage bug report.
3594                let location = self.node_to_range(node);
3595                if let Some(qualified_method) = qualified_method.as_ref() {
3596                    file_index.references.entry(qualified_method.clone()).or_default().push(
3597                        SymbolReference {
3598                            uri: self.uri.clone(),
3599                            range: location,
3600                            kind: ReferenceKind::Usage,
3601                        },
3602                    );
3603                }
3604                file_index.references.entry(method.clone()).or_default().push(SymbolReference {
3605                    uri: self.uri.clone(),
3606                    range: location,
3607                    kind: ReferenceKind::Usage,
3608                });
3609
3610                if method == "import"
3611                    && let NodeKind::Identifier { name: module_name } = &object.kind
3612                {
3613                    for symbol in extract_manual_import_symbols(args) {
3614                        file_index.references.entry(symbol).or_default().push(SymbolReference {
3615                            uri: self.uri.clone(),
3616                            range: self.node_to_range(node),
3617                            kind: ReferenceKind::Import,
3618                        });
3619                    }
3620                    file_index.dependencies.insert(normalize_dependency_module_name(module_name));
3621                }
3622
3623                // Visit arguments
3624                for arg in args {
3625                    self.visit_node(arg, file_index);
3626                }
3627            }
3628
3629            NodeKind::No { module, .. } => {
3630                let module_name = normalize_dependency_module_name(module);
3631                file_index.dependencies.insert(module_name);
3632            }
3633
3634            NodeKind::Class { name, .. } => {
3635                self.current_package = Some(name.clone());
3636            }
3637
3638            NodeKind::Method { body, signature, .. } => {
3639                // Visit params
3640                if let Some(sig) = signature {
3641                    if let NodeKind::Signature { parameters } = &sig.kind {
3642                        for param in parameters {
3643                            self.visit_node(param, file_index);
3644                        }
3645                    }
3646                }
3647
3648                // Visit body
3649                self.visit_node(body, file_index);
3650            }
3651
3652            NodeKind::String { value, interpolated } => {
3653                if *interpolated {
3654                    let range = self.node_to_range(node);
3655                    self.record_interpolated_variable_references(value, range, file_index);
3656                }
3657            }
3658
3659            NodeKind::Heredoc { content, interpolated, .. } => {
3660                if *interpolated {
3661                    let range = self.node_to_range(node);
3662                    self.record_interpolated_variable_references(content, range, file_index);
3663                }
3664            }
3665
3666            // Handle special assignments (++ and --)
3667            NodeKind::Unary { op, operand } if op == "++" || op == "--" => {
3668                // Pre/post increment/decrement are both read and write
3669                if let NodeKind::Variable { sigil, name } = &operand.kind {
3670                    let var_name = format!("{}{}", sigil, name);
3671
3672                    // It's both a read and a write
3673                    file_index.references.entry(var_name.clone()).or_default().push(
3674                        SymbolReference {
3675                            uri: self.uri.clone(),
3676                            range: self.node_to_range(operand),
3677                            kind: ReferenceKind::Read,
3678                        },
3679                    );
3680
3681                    file_index.references.entry(var_name).or_default().push(SymbolReference {
3682                        uri: self.uri.clone(),
3683                        range: self.node_to_range(operand),
3684                        kind: ReferenceKind::Write,
3685                    });
3686                }
3687            }
3688
3689            _ => {
3690                // For other node types, just visit children
3691                self.visit_children(node, file_index);
3692            }
3693        }
3694    }
3695
3696    fn visit_children(&mut self, node: &Node, file_index: &mut FileIndex) {
3697        // Generic visitor for unhandled node types - visit all nested nodes
3698        match &node.kind {
3699            NodeKind::Program { statements } => {
3700                for stmt in statements {
3701                    self.visit_node(stmt, file_index);
3702                }
3703            }
3704            NodeKind::ExpressionStatement { expression } => {
3705                self.visit_node(expression, file_index);
3706            }
3707            // Expression nodes
3708            NodeKind::Unary { operand, .. } => {
3709                self.visit_node(operand, file_index);
3710            }
3711            NodeKind::Binary { left, right, .. } => {
3712                self.visit_node(left, file_index);
3713                self.visit_node(right, file_index);
3714            }
3715            NodeKind::Ternary { condition, then_expr, else_expr } => {
3716                self.visit_node(condition, file_index);
3717                self.visit_node(then_expr, file_index);
3718                self.visit_node(else_expr, file_index);
3719            }
3720            NodeKind::ArrayLiteral { elements } => {
3721                for elem in elements {
3722                    self.visit_node(elem, file_index);
3723                }
3724            }
3725            NodeKind::HashLiteral { pairs } => {
3726                for (key, value) in pairs {
3727                    self.visit_node(key, file_index);
3728                    self.visit_node(value, file_index);
3729                }
3730            }
3731            NodeKind::Return { value } => {
3732                if let Some(val) = value {
3733                    self.visit_node(val, file_index);
3734                }
3735            }
3736            NodeKind::Eval { block } | NodeKind::Do { block } | NodeKind::Defer { block } => {
3737                self.visit_node(block, file_index);
3738            }
3739            NodeKind::Try { body, catch_blocks, finally_block } => {
3740                self.visit_node(body, file_index);
3741                for (_, block) in catch_blocks {
3742                    self.visit_node(block, file_index);
3743                }
3744                if let Some(finally) = finally_block {
3745                    self.visit_node(finally, file_index);
3746                }
3747            }
3748            NodeKind::Given { expr, body } => {
3749                self.visit_node(expr, file_index);
3750                self.visit_node(body, file_index);
3751            }
3752            NodeKind::When { condition, body } => {
3753                self.visit_node(condition, file_index);
3754                self.visit_node(body, file_index);
3755            }
3756            NodeKind::Default { body } => {
3757                self.visit_node(body, file_index);
3758            }
3759            NodeKind::StatementModifier { statement, condition, .. } => {
3760                self.visit_node(statement, file_index);
3761                self.visit_node(condition, file_index);
3762            }
3763            NodeKind::VariableWithAttributes { variable, .. } => {
3764                self.visit_node(variable, file_index);
3765            }
3766            NodeKind::LabeledStatement { statement, .. } => {
3767                self.visit_node(statement, file_index);
3768            }
3769            _ => {
3770                // For other node types, no children to visit
3771            }
3772        }
3773    }
3774
3775    fn node_to_range(&mut self, node: &Node) -> Range {
3776        // LineIndex.range returns line numbers and UTF-16 code unit columns
3777        let ((start_line, start_col), (end_line, end_col)) =
3778            self.document.line_index.range(node.location.start, node.location.end);
3779        // Use byte offsets from node.location directly
3780        Range {
3781            start: Position { byte: node.location.start, line: start_line, column: start_col },
3782            end: Position { byte: node.location.end, line: end_line, column: end_col },
3783        }
3784    }
3785}
3786
3787fn symbol_decl_name(kind: &SymbolKind, name: &str) -> String {
3788    match kind {
3789        SymbolKind::Variable(VarKind::Scalar) => format!("${name}"),
3790        SymbolKind::Variable(VarKind::Array) => format!("@{name}"),
3791        SymbolKind::Variable(VarKind::Hash) => format!("%{name}"),
3792        _ => name.to_string(),
3793    }
3794}
3795
3796/// Extract bare module names from the argument list of a `use parent` / `use base` statement.
3797///
3798/// The `args` field of `NodeKind::Use` stores raw argument strings as the parser captured them.
3799/// For `use parent 'Foo::Bar'` this is `["'Foo::Bar'"]`.
3800/// For `use parent qw(Foo::Bar Other::Base)` this is `["qw(Foo::Bar Other::Base)"]`.
3801/// For `use parent -norequire, 'Foo::Bar'` this is `["-norequire", "'Foo::Bar'"]`.
3802///
3803/// Returns the module names with surrounding quotes/qw wrappers stripped.
3804/// Tokens starting with `-` or not matching `[\w::']+` are silently skipped.
3805fn extract_module_names_from_use_args(args: &[String]) -> Vec<String> {
3806    use std::collections::HashSet;
3807
3808    fn normalize_module_name(token: &str) -> Option<&str> {
3809        let stripped = token.trim_matches(|c: char| {
3810            matches!(c, '\'' | '"' | '(' | ')' | '[' | ']' | '{' | '}' | ',' | ';')
3811        });
3812
3813        if stripped.is_empty() || stripped.starts_with('-') {
3814            return None;
3815        }
3816
3817        stripped
3818            .chars()
3819            .all(|c| c.is_alphanumeric() || c == '_' || c == ':' || c == '\'')
3820            .then_some(stripped)
3821    }
3822
3823    let joined = args.join(" ");
3824
3825    let (qw_words, remainder) = extract_qw_words(&joined);
3826    let mut modules = Vec::new();
3827    let mut seen = HashSet::new();
3828    for word in qw_words {
3829        if let Some(candidate) = normalize_module_name(&word) {
3830            let canonical = canonicalize_perl_module_name(candidate);
3831            if seen.insert(canonical.clone()) {
3832                modules.push(canonical);
3833            }
3834        }
3835    }
3836
3837    for token in remainder.split_whitespace().flat_map(|t| t.split(',')) {
3838        if let Some(candidate) = normalize_module_name(token) {
3839            let canonical = canonicalize_perl_module_name(candidate);
3840            if seen.insert(canonical.clone()) {
3841                modules.push(canonical);
3842            }
3843        }
3844    }
3845
3846    modules
3847}
3848
3849fn extract_module_names_from_call_args(args: &[Node]) -> Vec<String> {
3850    fn collect_from_node(node: &Node, out: &mut Vec<String>) {
3851        match &node.kind {
3852            NodeKind::String { value, .. } => {
3853                out.extend(extract_module_names_from_use_args(std::slice::from_ref(value)));
3854            }
3855            NodeKind::Identifier { name } => {
3856                out.extend(extract_module_names_from_use_args(std::slice::from_ref(name)));
3857            }
3858            NodeKind::ArrayLiteral { elements } => {
3859                for element in elements {
3860                    collect_from_node(element, out);
3861                }
3862            }
3863            NodeKind::FunctionCall { name, args, .. } if name == "qw" => {
3864                for arg in args {
3865                    collect_from_node(arg, out);
3866                }
3867            }
3868            _ => {}
3869        }
3870    }
3871
3872    let mut modules = Vec::new();
3873    for arg in args {
3874        collect_from_node(arg, &mut modules);
3875    }
3876    modules
3877}
3878
3879fn canonicalize_perl_module_name(name: &str) -> String {
3880    // Perl supports the legacy `'` package separator (e.g. Foo'Bar).
3881    // Canonicalize to `::` so lookups and dependency matching share one key shape.
3882    name.replace('\'', "::")
3883}
3884
3885fn legacy_perl_module_name(name: &str) -> String {
3886    name.replace("::", "'")
3887}
3888
3889/// Normalize a module name for dependency storage and lookup.
3890/// Converts legacy `'` separators to `::` so stored keys are canonical.
3891fn normalize_dependency_module_name(module_name: &str) -> String {
3892    canonicalize_perl_module_name(module_name)
3893}
3894
3895fn extract_qw_words(input: &str) -> (Vec<String>, String) {
3896    let chars: Vec<char> = input.chars().collect();
3897    let mut i = 0;
3898    let mut words = Vec::new();
3899    let mut remainder = String::new();
3900
3901    while i < chars.len() {
3902        if chars[i] == 'q'
3903            && i + 1 < chars.len()
3904            && chars[i + 1] == 'w'
3905            && (i == 0 || !chars[i - 1].is_alphanumeric())
3906        {
3907            let mut j = i + 2;
3908            while j < chars.len() && chars[j].is_whitespace() {
3909                j += 1;
3910            }
3911            if j >= chars.len() {
3912                remainder.push(chars[i]);
3913                i += 1;
3914                continue;
3915            }
3916
3917            let open = chars[j];
3918            let (close, is_paired_delimiter) = match open {
3919                '(' => (')', true),
3920                '[' => (']', true),
3921                '{' => ('}', true),
3922                '<' => ('>', true),
3923                _ => (open, false),
3924            };
3925            if open.is_alphanumeric() || open == '_' || open == '\'' || open == '"' {
3926                remainder.push(chars[i]);
3927                i += 1;
3928                continue;
3929            }
3930
3931            let mut k = j + 1;
3932            if is_paired_delimiter {
3933                let mut depth = 1usize;
3934                while k < chars.len() && depth > 0 {
3935                    if chars[k] == open {
3936                        depth += 1;
3937                    } else if chars[k] == close {
3938                        depth -= 1;
3939                    }
3940                    k += 1;
3941                }
3942                if depth != 0 {
3943                    remainder.extend(chars[i..].iter());
3944                    break;
3945                }
3946                k -= 1;
3947            } else {
3948                while k < chars.len() && chars[k] != close {
3949                    k += 1;
3950                }
3951                if k >= chars.len() {
3952                    remainder.extend(chars[i..].iter());
3953                    break;
3954                }
3955            }
3956
3957            let content: String = chars[j + 1..k].iter().collect();
3958            for word in content.split_whitespace() {
3959                if !word.is_empty() {
3960                    words.push(word.to_string());
3961                }
3962            }
3963            i = k + 1;
3964            continue;
3965        }
3966
3967        remainder.push(chars[i]);
3968        i += 1;
3969    }
3970
3971    (words, remainder)
3972}
3973
3974fn extract_module_name_from_require_args(args: &[Node]) -> Option<String> {
3975    let first = args.first()?;
3976    match &first.kind {
3977        NodeKind::Identifier { name } => Some(name.clone()),
3978        NodeKind::String { value, .. } => {
3979            let cleaned = value.trim_matches('\'').trim_matches('"').trim();
3980            if cleaned.is_empty() {
3981                return None;
3982            }
3983            Some(cleaned.trim_end_matches(".pm").replace('/', "::"))
3984        }
3985        _ => None,
3986    }
3987}
3988
3989fn extract_manual_import_symbols(args: &[Node]) -> Vec<String> {
3990    fn push_if_bareword(out: &mut Vec<String>, token: &str) {
3991        let bare = token.trim().trim_matches('"').trim_matches('\'').trim();
3992        if bare.is_empty() || bare == "," {
3993            return;
3994        }
3995        let is_bareword = bare.bytes().all(|ch| ch.is_ascii_alphanumeric() || ch == b'_')
3996            && bare.as_bytes().first().is_some_and(|ch| ch.is_ascii_alphabetic() || *ch == b'_');
3997        if is_bareword {
3998            out.push(bare.to_string());
3999        }
4000    }
4001
4002    let mut symbols = Vec::new();
4003    for arg in args {
4004        match &arg.kind {
4005            NodeKind::String { value, .. } => push_if_bareword(&mut symbols, value),
4006            NodeKind::Identifier { name } => {
4007                if name.starts_with("qw") {
4008                    let content = name
4009                        .trim_start_matches("qw")
4010                        .trim_start_matches(|c: char| "([{/<|!".contains(c))
4011                        .trim_end_matches(|c: char| ")]}/|!>".contains(c));
4012                    for token in content.split_whitespace() {
4013                        push_if_bareword(&mut symbols, token);
4014                    }
4015                } else {
4016                    push_if_bareword(&mut symbols, name);
4017                }
4018            }
4019            NodeKind::ArrayLiteral { elements } => {
4020                for element in elements {
4021                    if let NodeKind::String { value, .. } = &element.kind {
4022                        push_if_bareword(&mut symbols, value);
4023                    }
4024                }
4025            }
4026            _ => {}
4027        }
4028    }
4029    symbols.sort();
4030    symbols.dedup();
4031    symbols
4032}
4033
4034/// Extract constant names from the `args` field of a `use constant` `NodeKind::Use` node.
4035///
4036/// The parser serialises `use constant` args in two distinct forms:
4037///
4038/// **Scalar form** — `use constant FOO => 42;`
4039///   → args: `["FOO", "42"]`  (the `=>` is consumed by the parser, not stored)
4040///   → The first arg is the constant name; remaining args are the value.
4041///
4042/// **Hash form** — `use constant { FOO => 1, BAR => 2 };`
4043///   → args: `["{", "FOO", "=>", "1", ",", "BAR", "=>", "2", "}"]`
4044///   → Identifiers immediately followed by `=>` are constant names.
4045///
4046/// **qw form** — `use constant qw(FOO BAR);`
4047///   → args: `["qw(FOO BAR)"]`
4048///   → Words inside the qw list are constant names.
4049///
4050/// Returns a deduplicated list of bare constant names (e.g. `["FOO", "BAR"]`).
4051#[cfg(test)]
4052fn extract_constant_names_from_use_args(args: &[String]) -> Vec<String> {
4053    use std::collections::HashSet;
4054
4055    fn push_unique(names: &mut Vec<String>, seen: &mut HashSet<String>, candidate: &str) {
4056        if seen.insert(candidate.to_string()) {
4057            names.push(candidate.to_string());
4058        }
4059    }
4060
4061    fn normalize_constant_name(token: &str) -> Option<&str> {
4062        let stripped = token.trim_matches(|c: char| {
4063            matches!(c, '\'' | '"' | '(' | ')' | '[' | ']' | '{' | '}' | ',' | ';')
4064        });
4065
4066        if stripped.is_empty() || stripped.starts_with('-') {
4067            return None;
4068        }
4069
4070        stripped.chars().all(|c| c.is_alphanumeric() || c == '_').then_some(stripped)
4071    }
4072
4073    let mut names = Vec::new();
4074    let mut seen = HashSet::new();
4075
4076    // Scalar form (most common): args = ["FOO", <value...>]
4077    // The first arg is a plain identifier with no `=>` in args at all.
4078    // Hash form starts with `{`; qw form starts with `qw`.
4079    let first = match args.first() {
4080        Some(f) => f.as_str(),
4081        None => return names,
4082    };
4083
4084    // qw form: single arg starting with "qw"
4085    if first.starts_with("qw") {
4086        let (qw_words, remainder) = extract_qw_words(first);
4087        if remainder.trim().is_empty() {
4088            for word in qw_words {
4089                if let Some(candidate) = normalize_constant_name(&word) {
4090                    push_unique(&mut names, &mut seen, candidate);
4091                }
4092            }
4093            return names;
4094        }
4095
4096        // Fallback for odd tokenisation: tolerate `qw` followed by spacing before the opener.
4097        let content = first.trim_start_matches("qw").trim_start();
4098        let content = content
4099            .trim_start_matches(|c: char| "([{/<|!".contains(c))
4100            .trim_end_matches(|c: char| ")]}/|!>".contains(c));
4101        for word in content.split_whitespace() {
4102            if let Some(candidate) = normalize_constant_name(word) {
4103                push_unique(&mut names, &mut seen, candidate);
4104            }
4105        }
4106        return names;
4107    }
4108
4109    // Hash form: args start with "{", "+{", or "+" followed by "{"
4110    let starts_hash_form = first == "{"
4111        || first == "+{"
4112        || (first == "+" && args.get(1).map(String::as_str) == Some("{"));
4113    if starts_hash_form {
4114        let mut skipped_leading_plus = false;
4115        let mut iter = args.iter().peekable();
4116        while let Some(arg) = iter.next() {
4117            // Some parser/tokenizer variants can emit "+{" as a single token for
4118            // `use constant +{ ... }`. Treat it as structural punctuation.
4119            if arg == "+{" {
4120                skipped_leading_plus = true;
4121                continue;
4122            }
4123            if arg == "+" && !skipped_leading_plus {
4124                skipped_leading_plus = true;
4125                continue;
4126            }
4127            if arg == "{" || arg == "}" || arg == "," || arg == "=>" {
4128                continue;
4129            }
4130            if let Some(candidate) = normalize_constant_name(arg)
4131                && iter.peek().map(|s| s.as_str()) == Some("=>")
4132            {
4133                push_unique(&mut names, &mut seen, candidate);
4134            }
4135        }
4136        return names;
4137    }
4138
4139    // Scalar form: first arg is the constant name (if it is a plain identifier)
4140    // Remaining args are the value and are skipped.
4141    if let Some(candidate) = normalize_constant_name(first) {
4142        push_unique(&mut names, &mut seen, candidate);
4143    }
4144
4145    names
4146}
4147
4148impl Default for WorkspaceIndex {
4149    fn default() -> Self {
4150        Self::new()
4151    }
4152}
4153
4154/// LSP adapter for converting internal Location types to LSP types
4155#[cfg(all(feature = "workspace", feature = "lsp-compat"))]
4156/// LSP adapter utilities for Navigate/Analyze workflows.
4157pub mod lsp_adapter {
4158    use super::Location as IxLocation;
4159    use lsp_types::Location as LspLocation;
4160    // lsp_types uses Uri, not Url
4161    type LspUrl = lsp_types::Uri;
4162
4163    /// Convert an internal location to an LSP Location for Navigate workflows.
4164    ///
4165    /// # Arguments
4166    ///
4167    /// * `ix` - Internal index location with URI and range information.
4168    ///
4169    /// # Returns
4170    ///
4171    /// `Some(LspLocation)` when conversion succeeds, or `None` if URI parsing fails.
4172    ///
4173    /// # Examples
4174    ///
4175    /// ```rust,ignore
4176    /// use perl_parser::workspace_index::{Location as IxLocation, lsp_adapter::to_lsp_location};
4177    /// use lsp_types::Range;
4178    ///
4179    /// let ix_loc = IxLocation { uri: "file:///path.pl".to_string(), range: Range::default() };
4180    /// let _ = to_lsp_location(&ix_loc);
4181    /// ```
4182    pub fn to_lsp_location(ix: &IxLocation) -> Option<LspLocation> {
4183        parse_url(&ix.uri).map(|uri| {
4184            let start =
4185                lsp_types::Position { line: ix.range.start.line, character: ix.range.start.column };
4186            let end =
4187                lsp_types::Position { line: ix.range.end.line, character: ix.range.end.column };
4188            let range = lsp_types::Range { start, end };
4189            LspLocation { uri, range }
4190        })
4191    }
4192
4193    /// Convert multiple index locations to LSP Locations for Navigate/Analyze workflows.
4194    ///
4195    /// # Arguments
4196    ///
4197    /// * `all` - Iterator of internal index locations to convert.
4198    ///
4199    /// # Returns
4200    ///
4201    /// Vector of successfully converted LSP locations, with invalid entries filtered out.
4202    ///
4203    /// # Examples
4204    ///
4205    /// ```rust,ignore
4206    /// use perl_parser::workspace_index::{Location as IxLocation, lsp_adapter::to_lsp_locations};
4207    /// use lsp_types::Range;
4208    ///
4209    /// let locations = vec![IxLocation { uri: "file:///script1.pl".to_string(), range: Range::default() }];
4210    /// let lsp_locations = to_lsp_locations(locations);
4211    /// assert_eq!(lsp_locations.len(), 1);
4212    /// ```
4213    pub fn to_lsp_locations(all: impl IntoIterator<Item = IxLocation>) -> Vec<LspLocation> {
4214        all.into_iter().filter_map(|ix| to_lsp_location(&ix)).collect()
4215    }
4216
4217    #[cfg(not(target_arch = "wasm32"))]
4218    fn parse_url(s: &str) -> Option<LspUrl> {
4219        // lsp_types::Uri uses FromStr, not TryFrom
4220        use std::str::FromStr;
4221
4222        // Try parsing as URI first
4223        LspUrl::from_str(s).ok().or_else(|| {
4224            // Try as a file path if URI parsing fails
4225            std::path::Path::new(s).canonicalize().ok().and_then(|p| {
4226                // Use proper URI construction with percent-encoding
4227                crate::workspace_index::fs_path_to_uri(&p)
4228                    .ok()
4229                    .and_then(|uri_string| LspUrl::from_str(&uri_string).ok())
4230            })
4231        })
4232    }
4233
4234    /// Parse a string as a URL (wasm32 version - no filesystem fallback)
4235    #[cfg(target_arch = "wasm32")]
4236    fn parse_url(s: &str) -> Option<LspUrl> {
4237        use std::str::FromStr;
4238        LspUrl::from_str(s).ok()
4239    }
4240}
4241
4242#[cfg(test)]
4243mod tests {
4244    use super::*;
4245    use perl_tdd_support::{must, must_some};
4246
4247    #[test]
4248    fn test_use_constant_indexed_as_constant_symbol() {
4249        let index = WorkspaceIndex::new();
4250        let uri = "file:///lib/My/Config.pm";
4251        let code = r#"package My::Config;
4252use constant PI => 3.14159;
4253use constant {
4254    MAX_RETRIES => 3,
4255    TIMEOUT     => 30,
4256};
42571;
4258"#;
4259        must(index.index_file(must(url::Url::parse(uri)), code.to_string()));
4260
4261        let symbols = index.file_symbols(uri);
4262        assert!(
4263            symbols.iter().any(|s| s.name == "PI" && s.kind == SymbolKind::Constant),
4264            "PI should be indexed as a Constant symbol; got: {:?}",
4265            symbols.iter().map(|s| (&s.name, &s.kind)).collect::<Vec<_>>()
4266        );
4267        assert!(
4268            symbols.iter().any(|s| s.name == "MAX_RETRIES" && s.kind == SymbolKind::Constant),
4269            "MAX_RETRIES should be indexed"
4270        );
4271        assert!(
4272            symbols.iter().any(|s| s.name == "TIMEOUT" && s.kind == SymbolKind::Constant),
4273            "TIMEOUT should be indexed"
4274        );
4275
4276        // Qualified lookup should also work
4277        let def = index.find_definition("My::Config::PI");
4278        assert!(def.is_some(), "find_definition('My::Config::PI') should succeed");
4279    }
4280
4281    #[test]
4282    fn test_extract_constant_names_deduplicates_qw_form() {
4283        let names = extract_constant_names_from_use_args(&["qw(FOO BAR FOO)".to_string()]);
4284        assert_eq!(names, vec!["FOO", "BAR"]);
4285    }
4286
4287    #[test]
4288    fn test_extract_constant_names_accepts_quoted_scalar_form() {
4289        let names = extract_constant_names_from_use_args(&[
4290            "'HTTP_OK'".to_string(),
4291            "=>".to_string(),
4292            "200".to_string(),
4293        ]);
4294        assert_eq!(names, vec!["HTTP_OK"]);
4295    }
4296
4297    #[test]
4298    fn test_extract_constant_names_accepts_quoted_hash_form() {
4299        let names = extract_constant_names_from_use_args(&[
4300            "{".to_string(),
4301            "'FOO'".to_string(),
4302            "=>".to_string(),
4303            "1".to_string(),
4304            ",".to_string(),
4305            "\"BAR\"".to_string(),
4306            "=>".to_string(),
4307            "2".to_string(),
4308            "}".to_string(),
4309        ]);
4310        assert_eq!(names, vec!["FOO", "BAR"]);
4311    }
4312
4313    #[test]
4314    fn test_extract_constant_names_accepts_plus_hash_form_split_tokens() {
4315        let names = extract_constant_names_from_use_args(&[
4316            "+".to_string(),
4317            "{".to_string(),
4318            "FOO".to_string(),
4319            "=>".to_string(),
4320            "1".to_string(),
4321            ",".to_string(),
4322            "BAR".to_string(),
4323            "=>".to_string(),
4324            "2".to_string(),
4325            "}".to_string(),
4326        ]);
4327        assert_eq!(names, vec!["FOO", "BAR"]);
4328    }
4329
4330    #[test]
4331    fn test_extract_constant_names_accepts_plus_hash_form_combined_token() {
4332        let names = extract_constant_names_from_use_args(&[
4333            "+{".to_string(),
4334            "FOO".to_string(),
4335            "=>".to_string(),
4336            "1".to_string(),
4337            ",".to_string(),
4338            "BAR".to_string(),
4339            "=>".to_string(),
4340            "2".to_string(),
4341            "}".to_string(),
4342        ]);
4343        assert_eq!(names, vec!["FOO", "BAR"]);
4344    }
4345    #[test]
4346    fn test_use_constant_duplicate_names_indexed_once() {
4347        let index = WorkspaceIndex::new();
4348        let uri = "file:///lib/My/DedupConfig.pm";
4349        let code = r#"package My::DedupConfig;
4350use constant {
4351    RETRY_COUNT => 3,
4352    RETRY_COUNT => 5,
4353};
43541;
4355"#;
4356        must(index.index_file(must(url::Url::parse(uri)), code.to_string()));
4357
4358        let symbols = index.file_symbols(uri);
4359        let retry_count_symbols = symbols.iter().filter(|s| s.name == "RETRY_COUNT").count();
4360        assert_eq!(
4361            retry_count_symbols, 1,
4362            "RETRY_COUNT should be indexed once even when repeated in use constant hash form"
4363        );
4364    }
4365
4366    #[test]
4367    fn test_use_constant_plus_hash_form_indexes_keys() {
4368        let index = WorkspaceIndex::new();
4369        let uri = "file:///lib/My/PlusHash.pm";
4370        let code = r#"package My::PlusHash;
4371use constant +{
4372    FOO => 1,
4373    BAR => 2,
4374};
43751;
4376"#;
4377        must(index.index_file(must(url::Url::parse(uri)), code.to_string()));
4378
4379        assert!(index.find_definition("My::PlusHash::FOO").is_some());
4380        assert!(index.find_definition("My::PlusHash::BAR").is_some());
4381    }
4382
4383    #[test]
4384    fn test_basic_indexing() {
4385        let index = WorkspaceIndex::new();
4386        let uri = "file:///test.pl";
4387
4388        let code = r#"
4389package MyPackage;
4390
4391sub hello {
4392    print "Hello";
4393}
4394
4395my $var = 42;
4396"#;
4397
4398        must(index.index_file(must(url::Url::parse(uri)), code.to_string()));
4399
4400        // Should have indexed the package and subroutine
4401        let symbols = index.file_symbols(uri);
4402        assert!(symbols.iter().any(|s| s.name == "MyPackage" && s.kind == SymbolKind::Package));
4403        assert!(symbols.iter().any(|s| s.name == "hello" && s.kind == SymbolKind::Subroutine));
4404        assert!(symbols.iter().any(|s| s.name == "$var" && s.kind.is_variable()));
4405    }
4406
4407    #[test]
4408    fn test_package_symbol_has_no_container_name() {
4409        // Regression: project_symbol_declarations used to set container_name = Some("main")
4410        // for top-level package declarations because the IndexVisitor starts with
4411        // current_package = Some("main").  Package symbols are top-level declarations
4412        // and must have container_name = None.
4413        let index = WorkspaceIndex::new();
4414        let uri = "file:///lib/Foo.pm";
4415        let code = "package Foo;\nsub bar { }\n";
4416        must(index.index_file(must(url::Url::parse(uri)), code.to_string()));
4417
4418        let symbols = index.file_symbols(uri);
4419        let pkg_sym = symbols.iter().find(|s| s.name == "Foo" && s.kind == SymbolKind::Package);
4420        assert!(pkg_sym.is_some(), "Package symbol not found");
4421        assert_eq!(
4422            pkg_sym.unwrap().container_name,
4423            None,
4424            "Package symbol must not carry a container (was 'main')"
4425        );
4426    }
4427
4428    #[test]
4429    fn test_my_variable_has_no_qualified_name() {
4430        // Regression: project_symbol_declarations used to set qualified_name = Some("Foo::x")
4431        // for `my $x` inside `package Foo`, making `find_definition("Foo::x")` return the
4432        // lexical variable.  `my` variables are not package-visible and must have
4433        // qualified_name = None so qualified lookups don't match them.
4434        let index = WorkspaceIndex::new();
4435        let uri = "file:///lib/Foo.pm";
4436        let code = "package Foo;\nsub bar { my $x = 1; }\n";
4437        must(index.index_file(must(url::Url::parse(uri)), code.to_string()));
4438
4439        let symbols = index.file_symbols(uri);
4440        let var_sym = symbols.iter().find(|s| s.name == "$x" && s.kind.is_variable());
4441        assert!(var_sym.is_some(), "$x variable not indexed");
4442        assert_eq!(
4443            var_sym.unwrap().qualified_name,
4444            None,
4445            "my variable must not have a qualified_name"
4446        );
4447
4448        // `find_definition("Foo::x")` must not accidentally resolve to a lexical variable.
4449        assert!(
4450            index.find_definition("Foo::x").is_none(),
4451            "find_definition(\"Foo::x\") must not return a lexical my variable"
4452        );
4453    }
4454
4455    fn reference_kinds_for(
4456        index: &WorkspaceIndex,
4457        uri: &str,
4458        symbol_name: &str,
4459    ) -> Vec<ReferenceKind> {
4460        let files = index.files.read();
4461        let file = must_some(files.get(uri));
4462        file.references
4463            .get(symbol_name)
4464            .map(|refs| refs.iter().map(|r| r.kind).collect())
4465            .unwrap_or_default()
4466    }
4467
4468    #[test]
4469    fn test_reference_kinds_sub_definition_and_call_are_distinct() {
4470        let index = WorkspaceIndex::new();
4471        let uri = "file:///typed-refs-sub.pl";
4472        let code = "package TypedRefs;
4473sub foo { return 1; }
4474foo();
4475";
4476        must(index.index_file(must(url::Url::parse(uri)), code.to_string()));
4477
4478        let kinds = reference_kinds_for(&index, uri, "foo");
4479        assert!(kinds.contains(&ReferenceKind::Definition));
4480        assert!(kinds.contains(&ReferenceKind::Usage));
4481    }
4482
4483    #[test]
4484    fn test_reference_kinds_variable_read_and_write_are_distinct() {
4485        let index = WorkspaceIndex::new();
4486        let uri = "file:///typed-refs-var.pl";
4487        let code = "my $value = 1;
4488$value = 2;
4489print $value;
4490";
4491        must(index.index_file(must(url::Url::parse(uri)), code.to_string()));
4492
4493        let kinds = reference_kinds_for(&index, uri, "$value");
4494        assert!(kinds.contains(&ReferenceKind::Definition));
4495        assert!(kinds.contains(&ReferenceKind::Write));
4496        assert!(kinds.contains(&ReferenceKind::Read));
4497    }
4498
4499    #[test]
4500    fn test_reference_kinds_import_parent_and_export_ok_are_currently_import_only() {
4501        let index = WorkspaceIndex::new();
4502        let uri = "file:///typed-refs-import-export.pm";
4503        let code = "package Child;
4504use parent 'Base';
4505our @EXPORT_OK = qw(foo);
45061;
4507";
4508        must(index.index_file(must(url::Url::parse(uri)), code.to_string()));
4509
4510        let parent_kinds = reference_kinds_for(&index, uri, "Base");
4511        assert!(
4512            parent_kinds.is_empty(),
4513            "use parent inheritance edges are currently not stored as typed references"
4514        );
4515
4516        let export_symbol_kinds = reference_kinds_for(&index, uri, "foo");
4517        assert!(
4518            export_symbol_kinds.is_empty(),
4519            "EXPORT_OK entries are currently not represented as reference edges"
4520        );
4521    }
4522
4523    #[test]
4524    fn test_reference_kinds_dynamic_and_meta_edges_are_not_typed_yet() {
4525        let index = WorkspaceIndex::new();
4526        let uri = "file:///typed-refs-dynamic.pl";
4527        let code = r#"package TypedRefs;
4528sub foo { 1 }
4529&foo;
4530my $code = \&foo;
4531goto &foo;
4532*alias = \&foo;
4533eval "foo()";
4534with 'RoleName';
4535has 'name' => (is => 'ro');
45361;
4537"#;
4538        must(index.index_file(must(url::Url::parse(uri)), code.to_string()));
4539
4540        let foo_kinds = reference_kinds_for(&index, uri, "foo");
4541        assert!(
4542            foo_kinds
4543                .iter()
4544                .all(|kind| matches!(kind, ReferenceKind::Definition | ReferenceKind::Usage)),
4545            r"dynamic call forms (&foo, \&foo, goto &foo) are currently flattened to Usage"
4546        );
4547
4548        assert!(
4549            reference_kinds_for(&index, uri, "RoleName").is_empty(),
4550            "role composition edges (`with 'RoleName'`) are not indexed as typed references yet"
4551        );
4552    }
4553
4554    #[test]
4555    fn test_find_references() {
4556        let index = WorkspaceIndex::new();
4557        let uri = "file:///test.pl";
4558
4559        let code = r#"
4560sub test {
4561    my $x = 1;
4562    $x = 2;
4563    print $x;
4564}
4565"#;
4566
4567        must(index.index_file(must(url::Url::parse(uri)), code.to_string()));
4568
4569        let refs = index.find_references("$x");
4570        assert!(refs.len() >= 2); // Definition + at least one usage
4571    }
4572
4573    #[test]
4574    fn test_find_references_bare_name_includes_qualified_calls() {
4575        let index = WorkspaceIndex::new();
4576        let uri = "file:///refs.pl";
4577        let code = r#"
4578package RefDemo;
4579sub helper {
4580    return 1;
4581}
4582
4583helper();
4584RefDemo::helper();
4585"#;
4586
4587        must(index.index_file(must(url::Url::parse(uri)), code.to_string()));
4588
4589        let bare_refs = index.find_references("helper");
4590        let qualified_refs = index.find_references("RefDemo::helper");
4591
4592        assert!(
4593            bare_refs.len() >= qualified_refs.len(),
4594            "bare-name reference lookup should include qualified calls"
4595        );
4596    }
4597
4598    #[test]
4599    fn test_count_usages_bare_name_includes_qualified_calls() {
4600        let index = WorkspaceIndex::new();
4601        let uri = "file:///usage.pl";
4602        let code = r#"
4603package UsageDemo;
4604sub helper {
4605    return 1;
4606}
4607
4608helper();
4609UsageDemo::helper();
4610"#;
4611
4612        must(index.index_file(must(url::Url::parse(uri)), code.to_string()));
4613
4614        let bare_usage_count = index.count_usages("helper");
4615        let qualified_usage_count = index.count_usages("UsageDemo::helper");
4616
4617        assert!(
4618            bare_usage_count >= qualified_usage_count,
4619            "bare-name usage count should include qualified call sites"
4620        );
4621    }
4622
4623    #[test]
4624    fn test_dependencies() {
4625        let index = WorkspaceIndex::new();
4626        let uri = "file:///test.pl";
4627
4628        let code = r#"
4629use strict;
4630use warnings;
4631use Data::Dumper;
4632"#;
4633
4634        must(index.index_file(must(url::Url::parse(uri)), code.to_string()));
4635
4636        let deps = index.file_dependencies(uri);
4637        assert!(deps.contains("strict"));
4638        assert!(deps.contains("warnings"));
4639        assert!(deps.contains("Data::Dumper"));
4640    }
4641
4642    #[test]
4643    fn test_uri_to_fs_path_basic() {
4644        // Test basic file:// URI conversion
4645        if let Some(path) = uri_to_fs_path("file:///tmp/test.pl") {
4646            assert_eq!(path, std::path::PathBuf::from("/tmp/test.pl"));
4647        }
4648
4649        // Test with invalid URI
4650        assert!(uri_to_fs_path("not-a-uri").is_none());
4651
4652        // Test with non-file scheme
4653        assert!(uri_to_fs_path("http://example.com").is_none());
4654    }
4655
4656    #[test]
4657    fn test_uri_to_fs_path_with_spaces() {
4658        // Test with percent-encoded spaces
4659        if let Some(path) = uri_to_fs_path("file:///tmp/path%20with%20spaces/test.pl") {
4660            assert_eq!(path, std::path::PathBuf::from("/tmp/path with spaces/test.pl"));
4661        }
4662
4663        // Test with multiple spaces and special characters
4664        if let Some(path) = uri_to_fs_path("file:///tmp/My%20Documents/test%20file.pl") {
4665            assert_eq!(path, std::path::PathBuf::from("/tmp/My Documents/test file.pl"));
4666        }
4667    }
4668
4669    #[test]
4670    fn test_uri_to_fs_path_with_unicode() {
4671        // Test with Unicode characters (percent-encoded)
4672        if let Some(path) = uri_to_fs_path("file:///tmp/caf%C3%A9/test.pl") {
4673            assert_eq!(path, std::path::PathBuf::from("/tmp/café/test.pl"));
4674        }
4675
4676        // Test with Unicode emoji (percent-encoded)
4677        if let Some(path) = uri_to_fs_path("file:///tmp/emoji%F0%9F%98%80/test.pl") {
4678            assert_eq!(path, std::path::PathBuf::from("/tmp/emoji😀/test.pl"));
4679        }
4680    }
4681
4682    #[test]
4683    fn test_fs_path_to_uri_basic() {
4684        // Test basic path to URI conversion
4685        let result = fs_path_to_uri("/tmp/test.pl");
4686        assert!(result.is_ok());
4687        let uri = must(result);
4688        assert!(uri.starts_with("file://"));
4689        assert!(uri.contains("/tmp/test.pl"));
4690    }
4691
4692    #[test]
4693    fn test_fs_path_to_uri_with_spaces() {
4694        // Test path with spaces
4695        let result = fs_path_to_uri("/tmp/path with spaces/test.pl");
4696        assert!(result.is_ok());
4697        let uri = must(result);
4698        assert!(uri.starts_with("file://"));
4699        // Should contain percent-encoded spaces
4700        assert!(uri.contains("path%20with%20spaces"));
4701    }
4702
4703    #[test]
4704    fn test_fs_path_to_uri_with_unicode() {
4705        // Test path with Unicode characters
4706        let result = fs_path_to_uri("/tmp/café/test.pl");
4707        assert!(result.is_ok());
4708        let uri = must(result);
4709        assert!(uri.starts_with("file://"));
4710        // Should contain percent-encoded Unicode
4711        assert!(uri.contains("caf%C3%A9"));
4712    }
4713
4714    #[test]
4715    fn test_normalize_uri_file_schemes() {
4716        // Test normalization of valid file URIs
4717        let uri = WorkspaceIndex::normalize_uri("file:///tmp/test.pl");
4718        assert_eq!(uri, "file:///tmp/test.pl");
4719
4720        // Test normalization of URIs with spaces
4721        let uri = WorkspaceIndex::normalize_uri("file:///tmp/path%20with%20spaces/test.pl");
4722        assert_eq!(uri, "file:///tmp/path%20with%20spaces/test.pl");
4723    }
4724
4725    #[test]
4726    fn test_normalize_uri_absolute_paths() {
4727        // Test normalization of absolute paths (convert to file:// URI)
4728        let uri = WorkspaceIndex::normalize_uri("/tmp/test.pl");
4729        assert!(uri.starts_with("file://"));
4730        assert!(uri.contains("/tmp/test.pl"));
4731    }
4732
4733    #[test]
4734    fn test_normalize_uri_special_schemes() {
4735        // Test that special schemes like untitled: are preserved
4736        let uri = WorkspaceIndex::normalize_uri("untitled:Untitled-1");
4737        assert_eq!(uri, "untitled:Untitled-1");
4738    }
4739
4740    #[test]
4741    fn test_roundtrip_conversion() {
4742        // Test that URI -> path -> URI conversion preserves the URI
4743        let original_uri = "file:///tmp/path%20with%20spaces/caf%C3%A9.pl";
4744
4745        if let Some(path) = uri_to_fs_path(original_uri) {
4746            if let Ok(converted_uri) = fs_path_to_uri(&path) {
4747                // Should be able to round-trip back to an equivalent URI
4748                assert!(converted_uri.starts_with("file://"));
4749
4750                // The path component should decode correctly
4751                if let Some(roundtrip_path) = uri_to_fs_path(&converted_uri) {
4752                    #[cfg(windows)]
4753                    if let Ok(rootless) = path.strip_prefix(std::path::Path::new(r"\")) {
4754                        assert!(roundtrip_path.ends_with(rootless));
4755                    } else {
4756                        assert_eq!(path, roundtrip_path);
4757                    }
4758
4759                    #[cfg(not(windows))]
4760                    assert_eq!(path, roundtrip_path);
4761                }
4762            }
4763        }
4764    }
4765
4766    #[cfg(target_os = "windows")]
4767    #[test]
4768    fn test_windows_paths() {
4769        // Test Windows-style paths
4770        let result = fs_path_to_uri(r"C:\Users\test\Documents\script.pl");
4771        assert!(result.is_ok());
4772        let uri = must(result);
4773        assert!(uri.starts_with("file://"));
4774
4775        // Test Windows path with spaces
4776        let result = fs_path_to_uri(r"C:\Program Files\My App\script.pl");
4777        assert!(result.is_ok());
4778        let uri = must(result);
4779        assert!(uri.starts_with("file://"));
4780        assert!(uri.contains("Program%20Files"));
4781    }
4782
4783    // ========================================================================
4784    // IndexCoordinator Tests
4785    // ========================================================================
4786
4787    #[test]
4788    fn test_coordinator_initial_state() {
4789        let coordinator = IndexCoordinator::new();
4790        assert!(matches!(
4791            coordinator.state(),
4792            IndexState::Building { phase: IndexPhase::Idle, .. }
4793        ));
4794    }
4795
4796    #[test]
4797    fn test_transition_to_scanning_phase() {
4798        let coordinator = IndexCoordinator::new();
4799        coordinator.transition_to_scanning();
4800
4801        let state = coordinator.state();
4802        assert!(
4803            matches!(state, IndexState::Building { phase: IndexPhase::Scanning, .. }),
4804            "Expected Building state after scanning, got: {:?}",
4805            state
4806        );
4807    }
4808
4809    #[test]
4810    fn test_transition_to_indexing_phase() {
4811        let coordinator = IndexCoordinator::new();
4812        coordinator.transition_to_scanning();
4813        coordinator.update_scan_progress(3);
4814        coordinator.transition_to_indexing(3);
4815
4816        let state = coordinator.state();
4817        assert!(
4818            matches!(
4819                state,
4820                IndexState::Building { phase: IndexPhase::Indexing, total_count: 3, .. }
4821            ),
4822            "Expected Building state after indexing with total_count 3, got: {:?}",
4823            state
4824        );
4825    }
4826
4827    #[test]
4828    fn test_transition_to_ready() {
4829        let coordinator = IndexCoordinator::new();
4830        coordinator.transition_to_ready(100, 5000);
4831
4832        let state = coordinator.state();
4833        if let IndexState::Ready { file_count, symbol_count, .. } = state {
4834            assert_eq!(file_count, 100);
4835            assert_eq!(symbol_count, 5000);
4836        } else {
4837            unreachable!("Expected Ready state, got: {:?}", state);
4838        }
4839    }
4840
4841    #[test]
4842    fn test_parse_storm_degradation() {
4843        let coordinator = IndexCoordinator::new();
4844        coordinator.transition_to_ready(100, 5000);
4845
4846        // Trigger parse storm
4847        for _ in 0..15 {
4848            coordinator.notify_change("file.pm");
4849        }
4850
4851        let state = coordinator.state();
4852        assert!(
4853            matches!(state, IndexState::Degraded { .. }),
4854            "Expected Degraded state, got: {:?}",
4855            state
4856        );
4857        if let IndexState::Degraded { reason, .. } = state {
4858            assert!(matches!(reason, DegradationReason::ParseStorm { .. }));
4859        }
4860    }
4861
4862    #[test]
4863    fn test_recovery_from_parse_storm() {
4864        let coordinator = IndexCoordinator::new();
4865        coordinator.transition_to_ready(100, 5000);
4866
4867        // Trigger parse storm
4868        for _ in 0..15 {
4869            coordinator.notify_change("file.pm");
4870        }
4871
4872        // Complete all parses
4873        for _ in 0..15 {
4874            coordinator.notify_parse_complete("file.pm");
4875        }
4876
4877        // Should recover to Building state
4878        assert!(matches!(coordinator.state(), IndexState::Building { .. }));
4879    }
4880
4881    #[test]
4882    fn test_query_dispatch_ready() {
4883        let coordinator = IndexCoordinator::new();
4884        coordinator.transition_to_ready(100, 5000);
4885
4886        let result = coordinator.query(|_index| "full_query", |_index| "partial_query");
4887
4888        assert_eq!(result, "full_query");
4889    }
4890
4891    #[test]
4892    fn test_query_dispatch_degraded() {
4893        let coordinator = IndexCoordinator::new();
4894        // Building state should use partial query
4895
4896        let result = coordinator.query(|_index| "full_query", |_index| "partial_query");
4897
4898        assert_eq!(result, "partial_query");
4899    }
4900
4901    #[test]
4902    fn test_metrics_pending_count() {
4903        let coordinator = IndexCoordinator::new();
4904
4905        coordinator.notify_change("file1.pm");
4906        coordinator.notify_change("file2.pm");
4907
4908        assert_eq!(coordinator.metrics.pending_count(), 2);
4909
4910        coordinator.notify_parse_complete("file1.pm");
4911        assert_eq!(coordinator.metrics.pending_count(), 1);
4912    }
4913
4914    #[test]
4915    fn test_instrumentation_records_transitions() {
4916        let coordinator = IndexCoordinator::new();
4917        coordinator.transition_to_ready(10, 100);
4918
4919        let snapshot = coordinator.instrumentation_snapshot();
4920        let transition =
4921            IndexStateTransition { from: IndexStateKind::Building, to: IndexStateKind::Ready };
4922        let count = snapshot.state_transition_counts.get(&transition).copied().unwrap_or(0);
4923        assert_eq!(count, 1);
4924    }
4925
4926    #[test]
4927    fn test_instrumentation_records_early_exit() {
4928        let coordinator = IndexCoordinator::new();
4929        coordinator.record_early_exit(EarlyExitReason::InitialTimeBudget, 25, 1, 10);
4930
4931        let snapshot = coordinator.instrumentation_snapshot();
4932        let count = snapshot
4933            .early_exit_counts
4934            .get(&EarlyExitReason::InitialTimeBudget)
4935            .copied()
4936            .unwrap_or(0);
4937        assert_eq!(count, 1);
4938        assert!(snapshot.last_early_exit.is_some());
4939    }
4940
4941    #[test]
4942    fn test_custom_limits() {
4943        let limits = IndexResourceLimits {
4944            max_files: 5000,
4945            max_symbols_per_file: 1000,
4946            max_total_symbols: 100_000,
4947            max_ast_cache_bytes: 128 * 1024 * 1024,
4948            max_ast_cache_items: 50,
4949            max_scan_duration_ms: 30_000,
4950        };
4951
4952        let coordinator = IndexCoordinator::with_limits(limits.clone());
4953        assert_eq!(coordinator.limits.max_files, 5000);
4954        assert_eq!(coordinator.limits.max_total_symbols, 100_000);
4955    }
4956
4957    #[test]
4958    fn test_degradation_preserves_symbol_count() {
4959        let coordinator = IndexCoordinator::new();
4960        coordinator.transition_to_ready(100, 5000);
4961
4962        coordinator.transition_to_degraded(DegradationReason::IoError {
4963            message: "Test error".to_string(),
4964        });
4965
4966        let state = coordinator.state();
4967        assert!(
4968            matches!(state, IndexState::Degraded { .. }),
4969            "Expected Degraded state, got: {:?}",
4970            state
4971        );
4972        if let IndexState::Degraded { available_symbols, .. } = state {
4973            assert_eq!(available_symbols, 5000);
4974        }
4975    }
4976
4977    #[test]
4978    fn test_index_access() {
4979        let coordinator = IndexCoordinator::new();
4980        let index = coordinator.index();
4981
4982        // Should have access to underlying WorkspaceIndex
4983        assert!(index.all_symbols().is_empty());
4984    }
4985
4986    #[test]
4987    fn test_resource_limit_enforcement_max_files() {
4988        let limits = IndexResourceLimits {
4989            max_files: 5,
4990            max_symbols_per_file: 1000,
4991            max_total_symbols: 50_000,
4992            max_ast_cache_bytes: 128 * 1024 * 1024,
4993            max_ast_cache_items: 50,
4994            max_scan_duration_ms: 30_000,
4995        };
4996
4997        let coordinator = IndexCoordinator::with_limits(limits);
4998        coordinator.transition_to_ready(10, 100);
4999
5000        // Index 10 files (exceeds limit of 5)
5001        for i in 0..10 {
5002            let uri_str = format!("file:///test{}.pl", i);
5003            let uri = must(url::Url::parse(&uri_str));
5004            let code = "sub test { }";
5005            must(coordinator.index().index_file(uri, code.to_string()));
5006        }
5007
5008        // Enforce limits
5009        coordinator.enforce_limits();
5010
5011        let state = coordinator.state();
5012        assert!(
5013            matches!(
5014                state,
5015                IndexState::Degraded {
5016                    reason: DegradationReason::ResourceLimit { kind: ResourceKind::MaxFiles },
5017                    ..
5018                }
5019            ),
5020            "Expected Degraded state with ResourceLimit(MaxFiles), got: {:?}",
5021            state
5022        );
5023    }
5024
5025    #[test]
5026    fn test_resource_limit_enforcement_max_symbols() {
5027        let limits = IndexResourceLimits {
5028            max_files: 100,
5029            max_symbols_per_file: 10,
5030            max_total_symbols: 50, // Very low limit for testing
5031            max_ast_cache_bytes: 128 * 1024 * 1024,
5032            max_ast_cache_items: 50,
5033            max_scan_duration_ms: 30_000,
5034        };
5035
5036        let coordinator = IndexCoordinator::with_limits(limits);
5037        coordinator.transition_to_ready(0, 0);
5038
5039        // Index files with many symbols to exceed total symbol limit
5040        for i in 0..10 {
5041            let uri_str = format!("file:///test{}.pl", i);
5042            let uri = must(url::Url::parse(&uri_str));
5043            // Each file has 10 subroutines = 100 total symbols (exceeds limit of 50)
5044            let code = r#"
5045package Test;
5046sub sub1 { }
5047sub sub2 { }
5048sub sub3 { }
5049sub sub4 { }
5050sub sub5 { }
5051sub sub6 { }
5052sub sub7 { }
5053sub sub8 { }
5054sub sub9 { }
5055sub sub10 { }
5056"#;
5057            must(coordinator.index().index_file(uri, code.to_string()));
5058        }
5059
5060        // Enforce limits
5061        coordinator.enforce_limits();
5062
5063        let state = coordinator.state();
5064        assert!(
5065            matches!(
5066                state,
5067                IndexState::Degraded {
5068                    reason: DegradationReason::ResourceLimit { kind: ResourceKind::MaxSymbols },
5069                    ..
5070                }
5071            ),
5072            "Expected Degraded state with ResourceLimit(MaxSymbols), got: {:?}",
5073            state
5074        );
5075    }
5076
5077    #[test]
5078    fn test_check_limits_returns_none_within_bounds() {
5079        let coordinator = IndexCoordinator::new();
5080        coordinator.transition_to_ready(0, 0);
5081
5082        // Index a few files well within default limits
5083        for i in 0..5 {
5084            let uri_str = format!("file:///test{}.pl", i);
5085            let uri = must(url::Url::parse(&uri_str));
5086            let code = "sub test { }";
5087            must(coordinator.index().index_file(uri, code.to_string()));
5088        }
5089
5090        // Should not trigger degradation
5091        let limit_check = coordinator.check_limits();
5092        assert!(limit_check.is_none(), "check_limits should return None when within bounds");
5093
5094        // State should still be Ready
5095        assert!(
5096            matches!(coordinator.state(), IndexState::Ready { .. }),
5097            "State should remain Ready when within limits"
5098        );
5099    }
5100
5101    #[test]
5102    fn test_enforce_limits_called_on_transition_to_ready() {
5103        let limits = IndexResourceLimits {
5104            max_files: 3,
5105            max_symbols_per_file: 1000,
5106            max_total_symbols: 50_000,
5107            max_ast_cache_bytes: 128 * 1024 * 1024,
5108            max_ast_cache_items: 50,
5109            max_scan_duration_ms: 30_000,
5110        };
5111
5112        let coordinator = IndexCoordinator::with_limits(limits);
5113
5114        // Index files before transitioning to ready
5115        for i in 0..5 {
5116            let uri_str = format!("file:///test{}.pl", i);
5117            let uri = must(url::Url::parse(&uri_str));
5118            let code = "sub test { }";
5119            must(coordinator.index().index_file(uri, code.to_string()));
5120        }
5121
5122        // Transition to ready - should automatically enforce limits
5123        coordinator.transition_to_ready(5, 100);
5124
5125        let state = coordinator.state();
5126        assert!(
5127            matches!(
5128                state,
5129                IndexState::Degraded {
5130                    reason: DegradationReason::ResourceLimit { kind: ResourceKind::MaxFiles },
5131                    ..
5132                }
5133            ),
5134            "Expected Degraded state after transition_to_ready with exceeded limits, got: {:?}",
5135            state
5136        );
5137    }
5138
5139    #[test]
5140    fn test_state_transition_guard_ready_to_ready() {
5141        // Test that Ready → Ready is allowed (metrics update)
5142        let coordinator = IndexCoordinator::new();
5143        coordinator.transition_to_ready(100, 5000);
5144
5145        // Transition to Ready again with different metrics
5146        coordinator.transition_to_ready(150, 7500);
5147
5148        let state = coordinator.state();
5149        assert!(
5150            matches!(state, IndexState::Ready { file_count: 150, symbol_count: 7500, .. }),
5151            "Expected Ready state with updated metrics, got: {:?}",
5152            state
5153        );
5154    }
5155
5156    #[test]
5157    fn test_state_transition_guard_building_to_building() {
5158        // Test that Building → Building is allowed (progress update)
5159        let coordinator = IndexCoordinator::new();
5160
5161        // Initial building state
5162        coordinator.transition_to_building(100);
5163
5164        let state = coordinator.state();
5165        assert!(
5166            matches!(state, IndexState::Building { indexed_count: 0, total_count: 100, .. }),
5167            "Expected Building state, got: {:?}",
5168            state
5169        );
5170
5171        // Update total count
5172        coordinator.transition_to_building(200);
5173
5174        let state = coordinator.state();
5175        assert!(
5176            matches!(state, IndexState::Building { indexed_count: 0, total_count: 200, .. }),
5177            "Expected Building state, got: {:?}",
5178            state
5179        );
5180    }
5181
5182    #[test]
5183    fn test_state_transition_ready_to_building() {
5184        // Test that Ready → Building is allowed (re-scan)
5185        let coordinator = IndexCoordinator::new();
5186        coordinator.transition_to_ready(100, 5000);
5187
5188        // Trigger re-scan
5189        coordinator.transition_to_building(150);
5190
5191        let state = coordinator.state();
5192        assert!(
5193            matches!(state, IndexState::Building { indexed_count: 0, total_count: 150, .. }),
5194            "Expected Building state after re-scan, got: {:?}",
5195            state
5196        );
5197    }
5198
5199    #[test]
5200    fn test_state_transition_degraded_to_building() {
5201        // Test that Degraded → Building is allowed (recovery)
5202        let coordinator = IndexCoordinator::new();
5203        coordinator.transition_to_degraded(DegradationReason::IoError {
5204            message: "Test error".to_string(),
5205        });
5206
5207        // Attempt recovery
5208        coordinator.transition_to_building(100);
5209
5210        let state = coordinator.state();
5211        assert!(
5212            matches!(state, IndexState::Building { indexed_count: 0, total_count: 100, .. }),
5213            "Expected Building state after recovery, got: {:?}",
5214            state
5215        );
5216    }
5217
5218    #[test]
5219    fn test_update_building_progress() {
5220        let coordinator = IndexCoordinator::new();
5221        coordinator.transition_to_building(100);
5222
5223        // Update progress
5224        coordinator.update_building_progress(50);
5225
5226        let state = coordinator.state();
5227        assert!(
5228            matches!(state, IndexState::Building { indexed_count: 50, total_count: 100, .. }),
5229            "Expected Building state with updated progress, got: {:?}",
5230            state
5231        );
5232
5233        // Update progress again
5234        coordinator.update_building_progress(100);
5235
5236        let state = coordinator.state();
5237        assert!(
5238            matches!(state, IndexState::Building { indexed_count: 100, total_count: 100, .. }),
5239            "Expected Building state with completed progress, got: {:?}",
5240            state
5241        );
5242    }
5243
5244    #[test]
5245    fn test_scan_timeout_detection() {
5246        // Test that scan timeout triggers degradation
5247        let limits = IndexResourceLimits {
5248            max_scan_duration_ms: 0, // Immediate timeout for testing
5249            ..Default::default()
5250        };
5251
5252        let coordinator = IndexCoordinator::with_limits(limits);
5253        coordinator.transition_to_building(100);
5254
5255        // Small sleep to ensure elapsed time > 0
5256        std::thread::sleep(std::time::Duration::from_millis(1));
5257
5258        // Update progress should detect timeout
5259        coordinator.update_building_progress(10);
5260
5261        let state = coordinator.state();
5262        assert!(
5263            matches!(
5264                state,
5265                IndexState::Degraded { reason: DegradationReason::ScanTimeout { .. }, .. }
5266            ),
5267            "Expected Degraded state with ScanTimeout, got: {:?}",
5268            state
5269        );
5270    }
5271
5272    #[test]
5273    fn test_scan_timeout_does_not_trigger_within_limit() {
5274        // Test that scan doesn't timeout within the limit
5275        let limits = IndexResourceLimits {
5276            max_scan_duration_ms: 10_000, // 10 seconds - should not trigger
5277            ..Default::default()
5278        };
5279
5280        let coordinator = IndexCoordinator::with_limits(limits);
5281        coordinator.transition_to_building(100);
5282
5283        // Update progress immediately (well within limit)
5284        coordinator.update_building_progress(50);
5285
5286        let state = coordinator.state();
5287        assert!(
5288            matches!(state, IndexState::Building { indexed_count: 50, .. }),
5289            "Expected Building state (no timeout), got: {:?}",
5290            state
5291        );
5292    }
5293
5294    #[test]
5295    fn test_early_exit_optimization_unchanged_content() {
5296        let index = WorkspaceIndex::new();
5297        let uri = must(url::Url::parse("file:///test.pl"));
5298        let code = r#"
5299package MyPackage;
5300
5301sub hello {
5302    print "Hello";
5303}
5304"#;
5305
5306        // First indexing should parse and index
5307        must(index.index_file(uri.clone(), code.to_string()));
5308        let symbols1 = index.file_symbols(uri.as_str());
5309        assert!(symbols1.iter().any(|s| s.name == "MyPackage" && s.kind == SymbolKind::Package));
5310        assert!(symbols1.iter().any(|s| s.name == "hello" && s.kind == SymbolKind::Subroutine));
5311
5312        // Second indexing with same content should early-exit
5313        // We can verify this by checking that the index still works correctly
5314        must(index.index_file(uri.clone(), code.to_string()));
5315        let symbols2 = index.file_symbols(uri.as_str());
5316        assert_eq!(symbols1.len(), symbols2.len());
5317        assert!(symbols2.iter().any(|s| s.name == "MyPackage" && s.kind == SymbolKind::Package));
5318        assert!(symbols2.iter().any(|s| s.name == "hello" && s.kind == SymbolKind::Subroutine));
5319    }
5320
5321    #[test]
5322    fn test_early_exit_optimization_changed_content() {
5323        let index = WorkspaceIndex::new();
5324        let uri = must(url::Url::parse("file:///test.pl"));
5325        let code1 = r#"
5326package MyPackage;
5327
5328sub hello {
5329    print "Hello";
5330}
5331"#;
5332
5333        let code2 = r#"
5334package MyPackage;
5335
5336sub goodbye {
5337    print "Goodbye";
5338}
5339"#;
5340
5341        // First indexing
5342        must(index.index_file(uri.clone(), code1.to_string()));
5343        let symbols1 = index.file_symbols(uri.as_str());
5344        assert!(symbols1.iter().any(|s| s.name == "hello" && s.kind == SymbolKind::Subroutine));
5345        assert!(!symbols1.iter().any(|s| s.name == "goodbye"));
5346
5347        // Second indexing with different content should re-parse
5348        must(index.index_file(uri.clone(), code2.to_string()));
5349        let symbols2 = index.file_symbols(uri.as_str());
5350        assert!(!symbols2.iter().any(|s| s.name == "hello"));
5351        assert!(symbols2.iter().any(|s| s.name == "goodbye" && s.kind == SymbolKind::Subroutine));
5352    }
5353
5354    #[test]
5355    fn test_early_exit_optimization_whitespace_only_change() {
5356        let index = WorkspaceIndex::new();
5357        let uri = must(url::Url::parse("file:///test.pl"));
5358        let code1 = r#"
5359package MyPackage;
5360
5361sub hello {
5362    print "Hello";
5363}
5364"#;
5365
5366        let code2 = r#"
5367package MyPackage;
5368
5369
5370sub hello {
5371    print "Hello";
5372}
5373"#;
5374
5375        // First indexing
5376        must(index.index_file(uri.clone(), code1.to_string()));
5377        let symbols1 = index.file_symbols(uri.as_str());
5378        assert!(symbols1.iter().any(|s| s.name == "hello" && s.kind == SymbolKind::Subroutine));
5379
5380        // Second indexing with whitespace change should re-parse (hash will differ)
5381        must(index.index_file(uri.clone(), code2.to_string()));
5382        let symbols2 = index.file_symbols(uri.as_str());
5383        // Symbols should still be found, but content hash differs so it re-indexed
5384        assert!(symbols2.iter().any(|s| s.name == "hello" && s.kind == SymbolKind::Subroutine));
5385    }
5386
5387    #[test]
5388    fn test_reindex_file_refreshes_symbol_cache_for_removed_names() {
5389        let index = WorkspaceIndex::new();
5390        let uri1 = must(url::Url::parse("file:///lib/A.pm"));
5391        let uri2 = must(url::Url::parse("file:///lib/B.pm"));
5392        let code1 = "package A;\nsub foo { return 1; }\n1;\n";
5393        let code2 = "package B;\nsub foo { return 2; }\n1;\n";
5394        let code2_reindexed = "package B;\nsub bar { return 3; }\n1;\n";
5395
5396        must(index.index_file(uri1.clone(), code1.to_string()));
5397        must(index.index_file(uri2.clone(), code2.to_string()));
5398        must(index.index_file(uri2.clone(), code2_reindexed.to_string()));
5399
5400        let foo_location = must_some(index.find_definition("foo"));
5401        assert_eq!(foo_location.uri, uri1.to_string());
5402
5403        let bar_location = must_some(index.find_definition("bar"));
5404        assert_eq!(bar_location.uri, uri2.to_string());
5405    }
5406
5407    #[test]
5408    fn test_remove_file_preserves_other_colliding_symbol_entries() {
5409        let index = WorkspaceIndex::new();
5410        let uri1 = must(url::Url::parse("file:///lib/A.pm"));
5411        let uri2 = must(url::Url::parse("file:///lib/B.pm"));
5412        let code1 = "package A;\nsub foo { return 1; }\n1;\n";
5413        let code2 = "package B;\nsub foo { return 2; }\n1;\n";
5414
5415        must(index.index_file(uri1.clone(), code1.to_string()));
5416        must(index.index_file(uri2.clone(), code2.to_string()));
5417
5418        index.remove_file(uri2.as_str());
5419
5420        let foo_location = must_some(index.find_definition("foo"));
5421        assert_eq!(foo_location.uri, uri1.to_string());
5422    }
5423
5424    #[test]
5425    fn test_count_usages_no_double_counting_for_qualified_calls() {
5426        let index = WorkspaceIndex::new();
5427
5428        // File 1: defines Utils::process_data
5429        let uri1 = "file:///lib/Utils.pm";
5430        let code1 = r#"
5431package Utils;
5432
5433sub process_data {
5434    return 1;
5435}
5436"#;
5437        must(index.index_file(must(url::Url::parse(uri1)), code1.to_string()));
5438
5439        // File 2: calls Utils::process_data (qualified call)
5440        let uri2 = "file:///app.pl";
5441        let code2 = r#"
5442use Utils;
5443Utils::process_data();
5444Utils::process_data();
5445"#;
5446        must(index.index_file(must(url::Url::parse(uri2)), code2.to_string()));
5447
5448        // Each qualified call is stored under both "process_data" and "Utils::process_data"
5449        // by the dual indexing strategy. count_usages should deduplicate so we get the
5450        // actual number of call sites, not double.
5451        let count = index.count_usages("Utils::process_data");
5452
5453        // We expect exactly 2 usage sites (the two calls in app.pl),
5454        // not 4 (which would be the double-counted result).
5455        assert_eq!(
5456            count, 2,
5457            "count_usages should not double-count qualified calls, got {} (expected 2)",
5458            count
5459        );
5460
5461        // find_references should also deduplicate
5462        let refs = index.find_references("Utils::process_data");
5463        let non_def_refs: Vec<_> =
5464            refs.iter().filter(|loc| loc.uri != "file:///lib/Utils.pm").collect();
5465        assert_eq!(
5466            non_def_refs.len(),
5467            2,
5468            "find_references should not return duplicates for qualified calls, got {} non-def refs",
5469            non_def_refs.len()
5470        );
5471    }
5472
5473    #[test]
5474    fn test_batch_indexing() {
5475        let index = WorkspaceIndex::new();
5476        let files: Vec<(Url, String)> = (0..5)
5477            .map(|i| {
5478                let uri = must(Url::parse(&format!("file:///batch/module{}.pm", i)));
5479                let code =
5480                    format!("package Batch::Mod{};\nsub func_{} {{ return {}; }}\n1;", i, i, i);
5481                (uri, code)
5482            })
5483            .collect();
5484
5485        let errors = index.index_files_batch(files);
5486        assert!(errors.is_empty(), "batch indexing errors: {:?}", errors);
5487        assert_eq!(index.file_count(), 5);
5488        assert!(index.find_definition("Batch::Mod0::func_0").is_some());
5489        assert!(index.find_definition("Batch::Mod4::func_4").is_some());
5490    }
5491
5492    #[test]
5493    fn test_batch_indexing_skips_unchanged() {
5494        let index = WorkspaceIndex::new();
5495        let uri = must(Url::parse("file:///batch/skip.pm"));
5496        let code = "package Skip;\nsub skip_fn { 1 }\n1;".to_string();
5497
5498        index.index_file(uri.clone(), code.clone()).ok();
5499        assert_eq!(index.file_count(), 1);
5500
5501        let errors = index.index_files_batch(vec![(uri, code)]);
5502        assert!(errors.is_empty());
5503        assert_eq!(index.file_count(), 1);
5504    }
5505
5506    #[test]
5507    fn test_incremental_update_preserves_other_symbols() {
5508        let index = WorkspaceIndex::new();
5509
5510        let uri_a = must(Url::parse("file:///incr/a.pm"));
5511        let uri_b = must(Url::parse("file:///incr/b.pm"));
5512        index.index_file(uri_a.clone(), "package A;\nsub a_func { 1 }\n1;".into()).ok();
5513        index.index_file(uri_b.clone(), "package B;\nsub b_func { 2 }\n1;".into()).ok();
5514
5515        assert!(index.find_definition("A::a_func").is_some());
5516        assert!(index.find_definition("B::b_func").is_some());
5517
5518        index.index_file(uri_a, "package A;\nsub a_func_v2 { 11 }\n1;".into()).ok();
5519
5520        assert!(index.find_definition("A::a_func_v2").is_some());
5521        assert!(index.find_definition("B::b_func").is_some());
5522    }
5523
5524    #[test]
5525    fn test_remove_file_preserves_shadowed_symbols() {
5526        let index = WorkspaceIndex::new();
5527
5528        let uri_a = must(Url::parse("file:///shadow/a.pm"));
5529        let uri_b = must(Url::parse("file:///shadow/b.pm"));
5530        index.index_file(uri_a.clone(), "package ShadowA;\nsub helper { 1 }\n1;".into()).ok();
5531        index.index_file(uri_b.clone(), "package ShadowB;\nsub helper { 2 }\n1;".into()).ok();
5532
5533        assert!(index.find_definition("helper").is_some());
5534
5535        index.remove_file_url(&uri_a);
5536        assert!(index.find_definition("helper").is_some());
5537        assert!(index.find_definition("ShadowB::helper").is_some());
5538    }
5539
5540    // -------------------------------------------------------------------------
5541    // find_dependents — use parent / use base integration (#2747)
5542    // -------------------------------------------------------------------------
5543
5544    #[test]
5545    fn test_index_dependency_via_use_parent_end_to_end() {
5546        // Regression for #2747: index a file with `use parent 'MyBase'` and verify
5547        // that find_dependents("MyBase") returns that file.
5548        // 1. Index MyBase.pm
5549        // 2. Index child.pl with `use parent 'MyBase'`
5550        // 3. find_dependents("MyBase") should return child.pl
5551        let index = WorkspaceIndex::new();
5552
5553        let base_url = must(url::Url::parse("file:///test/workspace/lib/MyBase.pm"));
5554        must(index.index_file(
5555            base_url,
5556            "package MyBase;\nsub new { bless {}, shift }\n1;\n".to_string(),
5557        ));
5558
5559        let child_url = must(url::Url::parse("file:///test/workspace/child.pl"));
5560        must(index.index_file(child_url, "package Child;\nuse parent 'MyBase';\n1;\n".to_string()));
5561
5562        let dependents = index.find_dependents("MyBase");
5563        assert!(
5564            !dependents.is_empty(),
5565            "find_dependents('MyBase') returned empty — \
5566             use parent 'MyBase' should register MyBase as a dependency. \
5567             Dependencies in index: {:?}",
5568            {
5569                let files = index.files.read();
5570                files
5571                    .iter()
5572                    .map(|(k, v)| (k.clone(), v.dependencies.iter().cloned().collect::<Vec<_>>()))
5573                    .collect::<Vec<_>>()
5574            }
5575        );
5576        assert!(
5577            dependents.contains(&"file:///test/workspace/child.pl".to_string()),
5578            "child.pl should be in dependents, got: {:?}",
5579            dependents
5580        );
5581    }
5582
5583    #[test]
5584    fn test_find_dependents_normalizes_legacy_separator_in_query() {
5585        let index = WorkspaceIndex::new();
5586        let uri = must(url::Url::parse("file:///test/workspace/legacy-query.pl"));
5587        let src = "package Child;\nuse parent 'My::Base';\n1;\n";
5588        must(index.index_file(uri, src.to_string()));
5589
5590        let dependents = index.find_dependents("My'Base");
5591        assert_eq!(dependents, vec!["file:///test/workspace/legacy-query.pl".to_string()]);
5592    }
5593
5594    #[test]
5595    fn test_file_dependencies_normalize_legacy_separator_in_source() {
5596        let index = WorkspaceIndex::new();
5597        let uri = must(url::Url::parse("file:///test/workspace/legacy-source.pl"));
5598        let src = "package Child;\nuse parent \"My'Base\";\n1;\n";
5599        must(index.index_file(uri.clone(), src.to_string()));
5600
5601        let deps = index.file_dependencies(uri.as_str());
5602        assert!(deps.contains("My::Base"));
5603        assert!(!deps.contains("My'Base"));
5604    }
5605
5606    #[test]
5607    fn test_index_dependency_via_moose_extends_end_to_end() -> Result<(), Box<dyn std::error::Error>>
5608    {
5609        let index = WorkspaceIndex::new();
5610
5611        let parent_url = must(url::Url::parse("file:///test/workspace/lib/My/App/Parent.pm"));
5612        must(index.index_file(parent_url, "package My::App::Parent;\n1;\n".to_string()));
5613
5614        let child_url = must(url::Url::parse("file:///test/workspace/child-moose.pl"));
5615        let child_src = "package Child;\nuse Moose;\nextends 'My::App::Parent';\n1;\n";
5616        must(index.index_file(child_url, child_src.to_string()));
5617
5618        let dependents = index.find_dependents("My::App::Parent");
5619        assert!(
5620            dependents.contains(&"file:///test/workspace/child-moose.pl".to_string()),
5621            "expected child-moose.pl in dependents, got: {dependents:?}"
5622        );
5623        Ok(())
5624    }
5625
5626    #[test]
5627    fn test_index_dependency_via_moo_with_role_end_to_end() -> Result<(), Box<dyn std::error::Error>>
5628    {
5629        let index = WorkspaceIndex::new();
5630
5631        let role_url = must(url::Url::parse("file:///test/workspace/lib/My/App/Role.pm"));
5632        must(index.index_file(role_url, "package My::App::Role;\n1;\n".to_string()));
5633
5634        let consumer_url = must(url::Url::parse("file:///test/workspace/consumer-moo.pl"));
5635        let consumer_src = "package Consumer;\nuse Moo;\nwith 'My::App::Role';\n1;\n";
5636        must(index.index_file(consumer_url.clone(), consumer_src.to_string()));
5637
5638        let dependents = index.find_dependents("My::App::Role");
5639        assert!(
5640            dependents.contains(&"file:///test/workspace/consumer-moo.pl".to_string()),
5641            "expected consumer-moo.pl in dependents, got: {dependents:?}"
5642        );
5643
5644        let deps = index.file_dependencies(consumer_url.as_str());
5645        assert!(deps.contains("My::App::Role"));
5646        Ok(())
5647    }
5648
5649    #[test]
5650    fn test_index_dependency_via_literal_require_end_to_end()
5651    -> Result<(), Box<dyn std::error::Error>> {
5652        let index = WorkspaceIndex::new();
5653        let uri = must(url::Url::parse("file:///test/workspace/require-consumer.pl"));
5654        let src = "package Consumer;\nrequire My::Loader;\n1;\n";
5655        must(index.index_file(uri.clone(), src.to_string()));
5656
5657        let deps = index.file_dependencies(uri.as_str());
5658        assert!(
5659            deps.contains("My::Loader"),
5660            "literal require should register module dependency, got: {deps:?}"
5661        );
5662        Ok(())
5663    }
5664
5665    #[test]
5666    fn test_manual_import_symbols_are_indexed_as_import_references()
5667    -> Result<(), Box<dyn std::error::Error>> {
5668        let index = WorkspaceIndex::new();
5669        let uri = must(url::Url::parse("file:///test/workspace/manual-import.pl"));
5670        let src = r#"package Consumer;
5671require My::Tools;
5672My::Tools->import(qw(helper_one helper_two));
5673helper_one();
56741;
5675"#;
5676        must(index.index_file(uri.clone(), src.to_string()));
5677
5678        let deps = index.file_dependencies(uri.as_str());
5679        assert!(
5680            deps.contains("My::Tools"),
5681            "manual import target should be tracked as dependency, got: {deps:?}"
5682        );
5683
5684        for symbol in ["helper_one", "helper_two"] {
5685            let refs = index.find_references(symbol);
5686            assert!(
5687                !refs.is_empty(),
5688                "expected at least one indexed reference for imported symbol `{symbol}`"
5689            );
5690        }
5691        Ok(())
5692    }
5693
5694    #[test]
5695    fn test_parser_produces_correct_args_for_use_parent() {
5696        // Regression for #2747: verify that the parser produces args=["'MyBase'"]
5697        // for `use parent 'MyBase'`, so extract_module_names_from_use_args strips
5698        // the quotes and registers the dependency under the bare name "MyBase".
5699        use crate::Parser;
5700        let mut p = Parser::new("package Child;\nuse parent 'MyBase';\n1;\n");
5701        let ast = must(p.parse());
5702        assert!(
5703            matches!(ast.kind, NodeKind::Program { .. }),
5704            "Expected Program root, got {:?}",
5705            ast.kind
5706        );
5707        let NodeKind::Program { statements } = &ast.kind else {
5708            return;
5709        };
5710        let mut found_parent_use = false;
5711        for stmt in statements {
5712            if let NodeKind::Use { module, args, .. } = &stmt.kind {
5713                if module == "parent" {
5714                    found_parent_use = true;
5715                    assert_eq!(
5716                        args,
5717                        &["'MyBase'".to_string()],
5718                        "Expected args=[\"'MyBase'\"] for `use parent 'MyBase'`, got: {:?}",
5719                        args
5720                    );
5721                    let extracted = extract_module_names_from_use_args(args);
5722                    assert_eq!(
5723                        extracted,
5724                        vec!["MyBase".to_string()],
5725                        "extract_module_names_from_use_args should return [\"MyBase\"], got {:?}",
5726                        extracted
5727                    );
5728                }
5729            }
5730        }
5731        assert!(found_parent_use, "No Use node with module='parent' found in AST");
5732    }
5733
5734    // -------------------------------------------------------------------------
5735    // extract_module_names_from_use_args — unit tests (#2747)
5736    // -------------------------------------------------------------------------
5737
5738    #[test]
5739    fn test_extract_module_names_single_quoted() {
5740        let names = extract_module_names_from_use_args(&["'Foo::Bar'".to_string()]);
5741        assert_eq!(names, vec!["Foo::Bar"]);
5742    }
5743
5744    #[test]
5745    fn test_extract_module_names_double_quoted() {
5746        let names = extract_module_names_from_use_args(&["\"Foo::Bar\"".to_string()]);
5747        assert_eq!(names, vec!["Foo::Bar"]);
5748    }
5749
5750    #[test]
5751    fn test_extract_module_names_qw_list() {
5752        let names = extract_module_names_from_use_args(&["qw(Foo::Bar Other::Base)".to_string()]);
5753        assert_eq!(names, vec!["Foo::Bar", "Other::Base"]);
5754    }
5755
5756    #[test]
5757    fn test_extract_module_names_qw_slash_delimiter() {
5758        let names = extract_module_names_from_use_args(&["qw/Foo::Bar Other::Base/".to_string()]);
5759        assert_eq!(names, vec!["Foo::Bar", "Other::Base"]);
5760    }
5761
5762    #[test]
5763    fn test_extract_module_names_qw_with_space_before_delimiter() {
5764        let names = extract_module_names_from_use_args(&["qw [Foo::Bar Other::Base]".to_string()]);
5765        assert_eq!(names, vec!["Foo::Bar", "Other::Base"]);
5766    }
5767
5768    #[test]
5769    fn test_extract_module_names_qw_list_trims_wrapped_punctuation() {
5770        let names =
5771            extract_module_names_from_use_args(&["qw((Foo::Bar) [Other::Base],)".to_string()]);
5772        assert_eq!(names, vec!["Foo::Bar", "Other::Base"]);
5773    }
5774
5775    #[test]
5776    fn test_extract_module_names_norequire_flag() {
5777        let names = extract_module_names_from_use_args(&[
5778            "-norequire".to_string(),
5779            "'Foo::Bar'".to_string(),
5780        ]);
5781        assert_eq!(names, vec!["Foo::Bar"]);
5782    }
5783
5784    #[test]
5785    fn test_extract_module_names_empty_args() {
5786        let names = extract_module_names_from_use_args(&[]);
5787        assert!(names.is_empty());
5788    }
5789
5790    #[test]
5791    fn test_extract_module_names_legacy_separator() {
5792        // Perl legacy package separator ' (tick) inside module name
5793        let names = extract_module_names_from_use_args(&["'Foo'Bar'".to_string()]);
5794        // Legacy separators are normalized for downstream dependency matching.
5795        assert_eq!(names, vec!["Foo::Bar"]);
5796    }
5797
5798    #[test]
5799    fn test_find_dependents_matches_legacy_separator_queries() {
5800        let index = WorkspaceIndex::new();
5801        let base_uri = must(url::Url::parse("file:///test/workspace/lib/Foo/Bar.pm"));
5802        let child_uri = must(url::Url::parse("file:///test/workspace/child.pl"));
5803
5804        must(index.index_file(base_uri, "package Foo::Bar;\n1;\n".to_string()));
5805        must(index.index_file(
5806            child_uri.clone(),
5807            "package Child;\nuse parent qw(Foo'Bar);\n1;\n".to_string(),
5808        ));
5809
5810        let dependents_modern = index.find_dependents("Foo::Bar");
5811        assert!(
5812            dependents_modern.contains(&child_uri.to_string()),
5813            "Expected dependency match when queried with modern separator"
5814        );
5815
5816        let dependents_legacy = index.find_dependents("Foo'Bar");
5817        assert!(
5818            dependents_legacy.contains(&child_uri.to_string()),
5819            "Expected dependency match when queried with legacy separator"
5820        );
5821    }
5822
5823    #[test]
5824    fn test_extract_module_names_comma_adjacent_tokens() {
5825        let names = extract_module_names_from_use_args(&[
5826            "'Foo::Bar',".to_string(),
5827            "\"Other::Base\",".to_string(),
5828            "'Last::One'".to_string(),
5829        ]);
5830        assert_eq!(names, vec!["Foo::Bar", "Other::Base", "Last::One"]);
5831    }
5832
5833    #[test]
5834    fn test_extract_module_names_parenthesized_without_spaces() {
5835        let names = extract_module_names_from_use_args(&["('Foo::Bar','Other::Base')".to_string()]);
5836        assert_eq!(names, vec!["Foo::Bar", "Other::Base"]);
5837    }
5838
5839    #[test]
5840    fn test_extract_module_names_deduplicates_identical_entries() {
5841        let names = extract_module_names_from_use_args(&[
5842            "qw(Foo::Bar Foo::Bar)".to_string(),
5843            "'Foo::Bar'".to_string(),
5844        ]);
5845        assert_eq!(names, vec!["Foo::Bar"]);
5846    }
5847
5848    #[test]
5849    fn test_extract_module_names_trims_semicolon_suffix() {
5850        let names = extract_module_names_from_use_args(&[
5851            "'Foo::Bar',".to_string(),
5852            "'Other::Base',".to_string(),
5853            "'Third::Leaf';".to_string(),
5854        ]);
5855        assert_eq!(names, vec!["Foo::Bar", "Other::Base", "Third::Leaf"]);
5856    }
5857
5858    #[test]
5859    fn test_extract_module_names_trims_wrapped_punctuation() {
5860        let names = extract_module_names_from_use_args(&[
5861            "('Foo::Bar',".to_string(),
5862            "'Other::Base')".to_string(),
5863        ]);
5864        assert_eq!(names, vec!["Foo::Bar", "Other::Base"]);
5865    }
5866
5867    #[test]
5868    fn test_extract_constant_names_qw_with_space_before_delimiter() {
5869        let names = extract_constant_names_from_use_args(&["qw [FOO BAR]".to_string()]);
5870        assert_eq!(names, vec!["FOO", "BAR"]);
5871    }
5872
5873    #[test]
5874    #[ignore = "qw delimiter with leading space not yet parsed; tracked in debt-ledger.yaml"]
5875    fn test_index_use_constant_qw_with_space_before_delimiter() {
5876        let index = WorkspaceIndex::new();
5877        let uri = must(url::Url::parse("file:///workspace/lib/My/Config.pm"));
5878        let source = "package My::Config;\nuse constant qw [FOO BAR];\n1;\n";
5879
5880        must(index.index_file(uri, source.to_string()));
5881
5882        let foo = index.find_definition("My::Config::FOO");
5883        let bar = index.find_definition("My::Config::BAR");
5884        assert!(foo.is_some(), "Expected My::Config::FOO to be indexed");
5885        assert!(bar.is_some(), "Expected My::Config::BAR to be indexed");
5886    }
5887
5888    #[test]
5889    fn test_with_capacity_accepts_large_batch_without_panic() {
5890        let index = WorkspaceIndex::with_capacity(100, 20);
5891        for i in 0..100 {
5892            let uri = must(url::Url::parse(&format!("file:///lib/Mod{}.pm", i)));
5893            let src = format!("package Mod{};\nsub foo_{} {{ 1 }}\n1;\n", i, i);
5894            index.index_file(uri, src).ok();
5895        }
5896        assert!(index.has_symbols());
5897    }
5898
5899    #[test]
5900    fn test_with_capacity_zero_does_not_panic() {
5901        let index = WorkspaceIndex::with_capacity(0, 0);
5902        assert!(!index.has_symbols());
5903    }
5904
5905    // -------------------------------------------------------------------------
5906    // remove_file — symbol cache cleanup (#3494)
5907    // -------------------------------------------------------------------------
5908
5909    /// After removing the only file that defines a symbol, both qualified and
5910    /// bare-name lookups must return None.  The symbols cache must not retain
5911    /// stale entries pointing to the deleted file.
5912    #[test]
5913    fn test_remove_file_clears_symbol_cache_qualified_and_bare() {
5914        let index = WorkspaceIndex::new();
5915        let uri_a = must(url::Url::parse("file:///lib/A.pm"));
5916        let code_a = "package A;\nsub foo { return 1; }\n1;\n";
5917
5918        must(index.index_file(uri_a.clone(), code_a.to_string()));
5919
5920        // Pre-condition: both qualified and bare-name lookups resolve to file A.
5921        let before_qual = must_some(index.find_definition("A::foo"));
5922        assert_eq!(
5923            before_qual.uri,
5924            uri_a.to_string(),
5925            "qualified lookup should point to A.pm before removal"
5926        );
5927        let before_bare = must_some(index.find_definition("foo"));
5928        assert_eq!(
5929            before_bare.uri,
5930            uri_a.to_string(),
5931            "bare-name lookup should point to A.pm before removal"
5932        );
5933
5934        // Remove file A from the index (simulates file deletion).
5935        index.remove_file(uri_a.as_str());
5936
5937        // Post-condition: the symbol cache must be clean — no stale entries.
5938        assert!(
5939            index.find_definition("A::foo").is_none(),
5940            "qualified lookup 'A::foo' should return None after file deletion"
5941        );
5942        assert!(
5943            index.find_definition("foo").is_none(),
5944            "bare-name lookup 'foo' should return None after file deletion"
5945        );
5946
5947        // Verify no symbols remain in the index.
5948        assert_eq!(
5949            index.symbol_count(),
5950            0,
5951            "symbol_count should be 0 after removing the only file"
5952        );
5953        assert!(!index.has_symbols(), "has_symbols should be false after removing the only file");
5954    }
5955
5956    /// Deleting file A when file B has the same bare-name symbol must leave
5957    /// the bare-name cache pointing to B (not remove it entirely).
5958    #[test]
5959    fn test_remove_file_bare_name_falls_back_to_surviving_file() {
5960        let index = WorkspaceIndex::new();
5961        let uri_a = must(url::Url::parse("file:///lib/A.pm"));
5962        let uri_b = must(url::Url::parse("file:///lib/B.pm"));
5963        let code_a = "package A;\nsub shared_fn { return 1; }\n1;\n";
5964        let code_b = "package B;\nsub shared_fn { return 2; }\n1;\n";
5965
5966        must(index.index_file(uri_a.clone(), code_a.to_string()));
5967        must(index.index_file(uri_b.clone(), code_b.to_string()));
5968
5969        // Remove file A — shared_fn should still resolve via B.
5970        index.remove_file(uri_a.as_str());
5971
5972        let loc = must_some(index.find_definition("shared_fn"));
5973        assert_eq!(
5974            loc.uri,
5975            uri_b.to_string(),
5976            "bare-name 'shared_fn' should resolve to B.pm after A.pm is deleted"
5977        );
5978
5979        assert!(
5980            index.find_definition("A::shared_fn").is_none(),
5981            "qualified 'A::shared_fn' must be gone after A.pm deletion"
5982        );
5983        assert!(
5984            index.find_definition("B::shared_fn").is_some(),
5985            "qualified 'B::shared_fn' must remain after A.pm deletion"
5986        );
5987    }
5988
5989    #[test]
5990    fn test_definition_candidates_include_ambiguous_bare_symbols_in_stable_order() {
5991        let index = WorkspaceIndex::new();
5992        let uri_b = must(url::Url::parse("file:///lib/B.pm"));
5993        let uri_a = must(url::Url::parse("file:///lib/A.pm"));
5994        must(index.index_file(uri_b, "package B;\nsub shared { 1 }\n1;\n".to_string()));
5995        must(index.index_file(uri_a, "package A;\nsub shared { 1 }\n1;\n".to_string()));
5996
5997        let candidates = index.definition_candidates("shared");
5998        assert_eq!(candidates.len(), 2);
5999        assert_eq!(candidates[0].uri, "file:///lib/A.pm");
6000        assert_eq!(candidates[1].uri, "file:///lib/B.pm");
6001        assert_eq!(must_some(index.find_definition("shared")).uri, "file:///lib/A.pm");
6002    }
6003
6004    #[test]
6005    fn test_definition_candidates_include_duplicate_qualified_name_across_files() {
6006        let index = WorkspaceIndex::new();
6007        let uri_v2 = must(url::Url::parse("file:///lib/A-v2.pm"));
6008        let uri_v1 = must(url::Url::parse("file:///lib/A-v1.pm"));
6009        let source = "package A;\nsub foo { 1 }\n1;\n".to_string();
6010        must(index.index_file(uri_v2, source.clone()));
6011        must(index.index_file(uri_v1, source));
6012
6013        let candidates = index.definition_candidates("A::foo");
6014        assert_eq!(candidates.len(), 2);
6015        assert_eq!(candidates[0].uri, "file:///lib/A-v1.pm");
6016        assert_eq!(candidates[1].uri, "file:///lib/A-v2.pm");
6017    }
6018
6019    #[test]
6020    fn test_definition_candidates_are_cleaned_on_remove_and_reindex() {
6021        let index = WorkspaceIndex::new();
6022        let uri = must(url::Url::parse("file:///lib/A.pm"));
6023        must(index.index_file(uri.clone(), "package A;\nsub foo { 1 }\n1;\n".to_string()));
6024        assert_eq!(index.definition_candidates("A::foo").len(), 1);
6025
6026        index.remove_file(uri.as_str());
6027        assert!(index.definition_candidates("A::foo").is_empty());
6028
6029        must(index.index_file(uri, "package A;\nsub foo { 2 }\n1;\n".to_string()));
6030        assert_eq!(index.definition_candidates("A::foo").len(), 1);
6031    }
6032
6033    /// Verify that `incremental_remove_symbols` correctly retains candidates owned by
6034    /// other files when the removed file had BOTH exclusively-owned names (triggering the
6035    /// full-rebuild path) AND shared names. Before this fix, the full-rebuild path cleared
6036    /// all candidates and relied on the subsequent rebuild to re-add shared ones — correct
6037    /// in effect, but the test documents the expected observable behavior.
6038    #[test]
6039    fn test_definition_candidates_shared_symbol_survives_removal_of_sole_owner_of_other_symbol() {
6040        let index = WorkspaceIndex::new();
6041        let uri_a = must(url::Url::parse("file:///lib/A.pm"));
6042        let uri_b = must(url::Url::parse("file:///lib/B.pm"));
6043
6044        // A defines both `unique_to_a` (no other file) and `shared` (also in B)
6045        must(index.index_file(
6046            uri_a.clone(),
6047            "package A;\nsub unique_to_a { 1 }\nsub shared { 1 }\n1;\n".to_string(),
6048        ));
6049        must(index.index_file(uri_b.clone(), "package B;\nsub shared { 1 }\n1;\n".to_string()));
6050
6051        // Before removal: both shared candidates and unique_to_a are present
6052        assert_eq!(index.definition_candidates("shared").len(), 2);
6053        assert_eq!(index.definition_candidates("unique_to_a").len(), 1);
6054
6055        // Remove A — triggers the affected_names path for `unique_to_a`, but `shared`
6056        // still has B's candidate.
6057        index.remove_file(uri_a.as_str());
6058
6059        assert!(
6060            index.definition_candidates("unique_to_a").is_empty(),
6061            "unique_to_a should be gone after removing A"
6062        );
6063        assert_eq!(
6064            index.definition_candidates("shared").len(),
6065            1,
6066            "shared should still have B's candidate after removing A"
6067        );
6068        assert_eq!(
6069            index.definition_candidates("shared")[0].uri,
6070            "file:///lib/B.pm",
6071            "remaining shared candidate must be from B"
6072        );
6073    }
6074
6075    #[test]
6076    fn test_folder_context_in_file_index() {
6077        let index = WorkspaceIndex::new();
6078
6079        // Set up workspace folders
6080        index.set_workspace_folders(vec![
6081            "file:///project1".to_string(),
6082            "file:///project2".to_string(),
6083        ]);
6084
6085        let uri1 = "file:///project1/lib/Module.pm";
6086        let code1 = r#"
6087package Module;
6088
6089sub test_sub {
6090    return 1;
6091}
6092"#;
6093        must(index.index_file(must(url::Url::parse(uri1)), code1.to_string()));
6094
6095        let uri2 = "file:///project2/lib/Other.pm";
6096        let code2 = r#"
6097package Other;
6098
6099sub other_sub {
6100    return 2;
6101}
6102"#;
6103        must(index.index_file(must(url::Url::parse(uri2)), code2.to_string()));
6104
6105        // Verify folder context is set correctly
6106        let symbols1 = index.file_symbols(uri1);
6107        assert_eq!(symbols1.len(), 2, "Should have 2 symbols in Module.pm");
6108        for symbol in &symbols1 {
6109            assert_eq!(symbol.uri, uri1, "Symbol URI should match file URI");
6110        }
6111
6112        let symbols2 = index.file_symbols(uri2);
6113        assert_eq!(symbols2.len(), 2, "Should have 2 symbols in Other.pm");
6114        for symbol in &symbols2 {
6115            assert_eq!(symbol.uri, uri2, "Symbol URI should match file URI");
6116        }
6117
6118        // Verify folder attribution
6119        let files = index.files.read();
6120        let file_index1 = must_some(files.get(&DocumentStore::uri_key(uri1)));
6121        assert_eq!(
6122            file_index1.folder_uri,
6123            Some("file:///project1".to_string()),
6124            "File should be attributed to correct workspace folder"
6125        );
6126
6127        let file_index2 = must_some(files.get(&DocumentStore::uri_key(uri2)));
6128        assert_eq!(
6129            file_index2.folder_uri,
6130            Some("file:///project2".to_string()),
6131            "File should be attributed to correct workspace folder"
6132        );
6133    }
6134
6135    #[test]
6136    fn test_determine_folder_uri() {
6137        let index = WorkspaceIndex::new();
6138
6139        // Set up workspace folders
6140        index.set_workspace_folders(vec![
6141            "file:///project1".to_string(),
6142            "file:///project2".to_string(),
6143        ]);
6144
6145        // Test file in project1
6146        let folder1 = index.determine_folder_uri("file:///project1/lib/Module.pm");
6147        assert_eq!(
6148            folder1,
6149            Some("file:///project1".to_string()),
6150            "Should determine folder for file in project1"
6151        );
6152
6153        // Test file in project2
6154        let folder2 = index.determine_folder_uri("file:///project2/lib/Other.pm");
6155        assert_eq!(
6156            folder2,
6157            Some("file:///project2".to_string()),
6158            "Should determine folder for file in project2"
6159        );
6160
6161        // Test file not in any workspace folder
6162        let folder_none = index.determine_folder_uri("file:///other/project/Module.pm");
6163        assert_eq!(folder_none, None, "Should return None for file outside workspace folders");
6164    }
6165
6166    #[test]
6167    fn test_determine_folder_uri_prefers_most_specific_match() {
6168        let index = WorkspaceIndex::new();
6169
6170        // Keep broad folder first to ensure we don't rely on insertion order.
6171        index.set_workspace_folders(vec![
6172            "file:///project".to_string(),
6173            "file:///project/lib".to_string(),
6174        ]);
6175
6176        let folder = index.determine_folder_uri("file:///project/lib/My/Module.pm");
6177        assert_eq!(
6178            folder,
6179            Some("file:///project/lib".to_string()),
6180            "Nested workspace folders should attribute files to the most specific folder"
6181        );
6182    }
6183
6184    #[test]
6185    fn test_remove_folder() {
6186        let index = WorkspaceIndex::new();
6187
6188        // Set up workspace folders
6189        index.set_workspace_folders(vec![
6190            "file:///project1".to_string(),
6191            "file:///project2".to_string(),
6192        ]);
6193
6194        // Index files from both folders
6195        let uri1 = "file:///project1/lib/Module.pm";
6196        let code1 = r#"
6197package Module;
6198
6199sub test_sub {
6200    return 1;
6201}
6202"#;
6203        must(index.index_file(must(url::Url::parse(uri1)), code1.to_string()));
6204
6205        let uri2 = "file:///project2/lib/Other.pm";
6206        let code2 = r#"
6207package Other;
6208
6209sub other_sub {
6210    return 2;
6211}
6212"#;
6213        must(index.index_file(must(url::Url::parse(uri2)), code2.to_string()));
6214
6215        // Verify both files are indexed
6216        assert_eq!(index.file_count(), 2, "Should have 2 files indexed");
6217        assert_eq!(index.document_store().count(), 2, "Document store should track both files");
6218
6219        // Remove project1 folder
6220        index.remove_folder("file:///project1");
6221
6222        // Verify only project2 file remains
6223        assert_eq!(index.file_count(), 1, "Should have 1 file after removing folder");
6224        assert_eq!(
6225            index.document_store().count(),
6226            1,
6227            "Document store should drop files removed via folder deletion"
6228        );
6229        assert!(index.file_symbols(uri1).is_empty(), "File from removed folder should be gone");
6230        assert_eq!(
6231            index.file_symbols(uri2).len(),
6232            2,
6233            "File from remaining folder should still be present"
6234        );
6235    }
6236
6237    #[test]
6238    fn test_remove_folder_removes_symbol_free_files() {
6239        let index = WorkspaceIndex::new();
6240        index.set_workspace_folders(vec!["file:///project1".to_string()]);
6241
6242        let uri = "file:///project1/empty.pl";
6243        must(index.index_file(must(url::Url::parse(uri)), "# comments only".to_string()));
6244        assert_eq!(index.file_count(), 1, "Expected file to be indexed");
6245
6246        index.remove_folder("file:///project1");
6247
6248        assert_eq!(index.file_count(), 0, "Folder removal should delete symbol-free files");
6249        assert_eq!(
6250            index.document_store().count(),
6251            0,
6252            "Document store should stay in sync for symbol-free files"
6253        );
6254    }
6255
6256    // ========================================================================
6257    // GREEN-TDD EDGE CASE TESTS FOR ISSUE #6061 (static require + manual import)
6258    // ========================================================================
6259
6260    #[test]
6261    fn test_require_with_variable_target_is_not_indexed() -> Result<(), Box<dyn std::error::Error>>
6262    {
6263        let index = WorkspaceIndex::new();
6264        let uri = must(url::Url::parse("file:///test/require-var.pl"));
6265        let src = r#"package Test;
6266my $loader = 'MyModule';
6267require $loader;
62681;
6269"#;
6270        must(index.index_file(uri.clone(), src.to_string()));
6271        let deps = index.file_dependencies(uri.as_str());
6272        assert!(
6273            !deps.contains("MyModule"),
6274            "require with variable target should not register static dependency"
6275        );
6276        Ok(())
6277    }
6278
6279    #[test]
6280    fn test_multiple_import_calls_on_same_module() -> Result<(), Box<dyn std::error::Error>> {
6281        let index = WorkspaceIndex::new();
6282        let uri = must(url::Url::parse("file:///test/multi-import.pl"));
6283        let src = r#"package Test;
6284require Toolkit;
6285Toolkit->import('func_a');
6286Toolkit->import(qw(func_b func_c));
62871;
6288"#;
6289        must(index.index_file(uri.clone(), src.to_string()));
6290        let deps = index.file_dependencies(uri.as_str());
6291        assert!(deps.contains("Toolkit"), "module should be tracked as dependency");
6292        for symbol in &["func_a", "func_b", "func_c"] {
6293            let refs = index.find_references(symbol);
6294            assert!(!refs.is_empty(), "all imported symbols should be indexed: {}", symbol);
6295        }
6296        Ok(())
6297    }
6298
6299    #[test]
6300    fn test_require_string_vs_bareword_normalization() -> Result<(), Box<dyn std::error::Error>> {
6301        let index = WorkspaceIndex::new();
6302        let uri = must(url::Url::parse("file:///test/require-string.pl"));
6303        let src = r#"package Consumer;
6304require "String/Based/Module.pm";
6305String::Based::Module->import('exported');
63061;
6307"#;
6308        must(index.index_file(uri.clone(), src.to_string()));
6309        let deps = index.file_dependencies(uri.as_str());
6310        assert!(
6311            deps.contains("String::Based::Module"),
6312            "require string form should normalize path separators to ::"
6313        );
6314        let refs = index.find_references("exported");
6315        assert!(!refs.is_empty(), "import should be indexed even with string-form require");
6316        Ok(())
6317    }
6318
6319    #[test]
6320    fn test_import_without_require_registers_as_method_call()
6321    -> Result<(), Box<dyn std::error::Error>> {
6322        // Edge case: ->import() without preceding require is treated as a normal method call,
6323        // not as the static manual-import pattern, so the module is still visited/tracked
6324        // but the symbols are NOT marked as imports from the static require+import logic.
6325        let index = WorkspaceIndex::new();
6326        let uri = must(url::Url::parse("file:///test/orphan-import.pl"));
6327        let src = r#"package Test;
6328Unrelated::Module->import('orphaned');
6329orphaned();
63301;
6331"#;
6332        must(index.index_file(uri.clone(), src.to_string()));
6333
6334        // The module reference may still be tracked as a method call target,
6335        // but the key regression is: the orphaned symbol should not be indexed
6336        // as an import reference due to the missing require.
6337        let _refs = index.find_references("orphaned");
6338        // Symbol may be referenced but should not be specially treated as an import.
6339        // The main point is: without require, the pairing doesn't activate.
6340        Ok(())
6341    }
6342
6343    #[test]
6344    fn test_nested_blocks_preserve_require_scope() -> Result<(), Box<dyn std::error::Error>> {
6345        let index = WorkspaceIndex::new();
6346        let uri = must(url::Url::parse("file:///test/nested.pl"));
6347        let src = r#"package Test;
6348{
6349    require Outer;
6350    {
6351        Outer->import('nested_sym');
6352    }
6353}
63541;
6355"#;
6356        must(index.index_file(uri.clone(), src.to_string()));
6357        let deps = index.file_dependencies(uri.as_str());
6358        assert!(
6359            deps.contains("Outer"),
6360            "require in outer block should be visible to nested import"
6361        );
6362        let refs = index.find_references("nested_sym");
6363        assert!(!refs.is_empty(), "symbol imported in nested block should still be indexed");
6364        Ok(())
6365    }
6366
6367    #[test]
6368    fn test_require_path_without_pm_extension() -> Result<(), Box<dyn std::error::Error>> {
6369        let index = WorkspaceIndex::new();
6370        let uri = must(url::Url::parse("file:///test/no-ext.pl"));
6371        let src = r#"package Test;
6372require "My/Module";
6373My::Module->import('func');
63741;
6375"#;
6376        must(index.index_file(uri.clone(), src.to_string()));
6377        let deps = index.file_dependencies(uri.as_str());
6378        assert!(
6379            deps.contains("My::Module"),
6380            "require without .pm extension should normalize to module path"
6381        );
6382        Ok(())
6383    }
6384
6385    #[test]
6386    fn test_qw_with_bracket_delimiters() -> Result<(), Box<dyn std::error::Error>> {
6387        let index = WorkspaceIndex::new();
6388        let uri = must(url::Url::parse("file:///test/qw-delim.pl"));
6389        let src = r#"package Test;
6390require DelimModule;
6391DelimModule->import(qw[sym1 sym2]);
6392DelimModule->import(qw{sym3 sym4});
63931;
6394"#;
6395        must(index.index_file(uri.clone(), src.to_string()));
6396        for symbol in &["sym1", "sym2", "sym3", "sym4"] {
6397            let refs = index.find_references(symbol);
6398            assert!(
6399                !refs.is_empty(),
6400                "symbols from qw with bracket delimiters should be indexed: {}",
6401                symbol
6402            );
6403        }
6404        Ok(())
6405    }
6406
6407    #[test]
6408    fn test_array_literal_import_args() -> Result<(), Box<dyn std::error::Error>> {
6409        let index = WorkspaceIndex::new();
6410        let uri = must(url::Url::parse("file:///test/array-import.pl"));
6411        let src = r#"package Test;
6412require ArrayModule;
6413ArrayModule->import(['sym_x', 'sym_y']);
64141;
6415"#;
6416        must(index.index_file(uri.clone(), src.to_string()));
6417        for symbol in &["sym_x", "sym_y"] {
6418            let refs = index.find_references(symbol);
6419            assert!(
6420                !refs.is_empty(),
6421                "symbols from array literal import should be indexed: {}",
6422                symbol
6423            );
6424        }
6425        Ok(())
6426    }
6427
6428    #[test]
6429    fn test_require_inside_conditional_still_registers_dependency()
6430    -> Result<(), Box<dyn std::error::Error>> {
6431        let index = WorkspaceIndex::new();
6432        let uri = must(url::Url::parse("file:///test/cond-require.pl"));
6433        let src = r#"package Test;
6434if (1) {
6435    require ConditionalMod;
6436    ConditionalMod->import('cond_func');
6437}
64381;
6439"#;
6440        must(index.index_file(uri.clone(), src.to_string()));
6441        let deps = index.file_dependencies(uri.as_str());
6442        assert!(
6443            deps.contains("ConditionalMod"),
6444            "require inside conditional should still register as dependency"
6445        );
6446        let refs = index.find_references("cond_func");
6447        assert!(!refs.is_empty(), "import inside conditional should still index symbols");
6448        Ok(())
6449    }
6450
6451    #[test]
6452    fn test_mixed_string_and_bareword_imports() -> Result<(), Box<dyn std::error::Error>> {
6453        let index = WorkspaceIndex::new();
6454        let uri = must(url::Url::parse("file:///test/mixed-import.pl"));
6455        let src = r#"package Test;
6456require MixedMod;
6457MixedMod->import('string_sym');
6458MixedMod->import(qw(qw_one qw_two));
64591;
6460"#;
6461        must(index.index_file(uri.clone(), src.to_string()));
6462        let deps = index.file_dependencies(uri.as_str());
6463        assert!(deps.contains("MixedMod"), "require should register dependency");
6464        for symbol in &["string_sym", "qw_one", "qw_two"] {
6465            let refs = index.find_references(symbol);
6466            assert!(!refs.is_empty(), "all import forms should index symbols: {}", symbol);
6467        }
6468        Ok(())
6469    }
6470
6471    // -------------------------------------------------------------------------
6472    // Per-category incremental invalidation (Req 18.1–18.5)
6473    // -------------------------------------------------------------------------
6474
6475    /// Helper: build a minimal `FileFactShard` with configurable hashes.
6476    fn make_shard(
6477        uri: &str,
6478        content_hash: u64,
6479        anchors_hash: Option<u64>,
6480        entities_hash: Option<u64>,
6481        occurrences_hash: Option<u64>,
6482        edges_hash: Option<u64>,
6483    ) -> FileFactShard {
6484        let file_id = {
6485            let mut h = DefaultHasher::new();
6486            uri.hash(&mut h);
6487            FileId(h.finish())
6488        };
6489        FileFactShard {
6490            source_uri: uri.to_string(),
6491            file_id,
6492            content_hash,
6493            anchors_hash,
6494            entities_hash,
6495            occurrences_hash,
6496            edges_hash,
6497            anchors: Vec::new(),
6498            entities: Vec::new(),
6499            occurrences: Vec::new(),
6500            edges: Vec::new(),
6501        }
6502    }
6503
6504    /// Req 18.5: When content_hash is unchanged, skip all per-category
6505    /// comparisons — no index modifications happen.
6506    #[test]
6507    fn incremental_replace_skips_when_content_hash_unchanged()
6508    -> Result<(), Box<dyn std::error::Error>> {
6509        let index = WorkspaceIndex::new();
6510        let uri = "file:///lib/Same.pm";
6511        let key = DocumentStore::uri_key(uri);
6512
6513        let shard_v1 = make_shard(uri, 42, Some(1), Some(2), Some(3), Some(4));
6514        // First insert — no old shard, so all categories are "changed".
6515        let r1 = index.replace_fact_shard_incremental(&key, shard_v1);
6516        assert!(!r1.content_unchanged);
6517
6518        // Second insert with same content_hash → skip entirely.
6519        let shard_v2 = make_shard(uri, 42, Some(100), Some(200), Some(300), Some(400));
6520        let r2 = index.replace_fact_shard_incremental(&key, shard_v2);
6521        assert!(r2.content_unchanged);
6522        assert!(!r2.anchors_updated);
6523        assert!(!r2.entities_updated);
6524        assert!(!r2.occurrences_updated);
6525        assert!(!r2.edges_updated);
6526
6527        // The stored shard should still be v1 (unchanged).
6528        let stored = must_some(index.file_fact_shard(uri));
6529        assert_eq!(stored.anchors_hash, Some(1));
6530        Ok(())
6531    }
6532
6533    /// Req 18.3: When a category hash is unchanged, skip re-indexing that
6534    /// category's cross-file indexes.
6535    #[test]
6536    fn incremental_replace_skips_unchanged_categories() -> Result<(), Box<dyn std::error::Error>> {
6537        let index = WorkspaceIndex::new();
6538        let uri = "file:///lib/Partial.pm";
6539        let key = DocumentStore::uri_key(uri);
6540
6541        let shard_v1 = make_shard(uri, 1, Some(10), Some(20), Some(30), Some(40));
6542        index.replace_fact_shard_incremental(&key, shard_v1);
6543
6544        // Change content_hash but keep anchors and entities the same.
6545        // Only occurrences and edges change.
6546        let shard_v2 = make_shard(uri, 2, Some(10), Some(20), Some(99), Some(88));
6547        let result = index.replace_fact_shard_incremental(&key, shard_v2);
6548
6549        assert!(!result.content_unchanged);
6550        assert!(!result.anchors_updated, "anchors hash unchanged → skip");
6551        assert!(!result.entities_updated, "entities hash unchanged → skip");
6552        assert!(result.occurrences_updated, "occurrences hash changed → update");
6553        assert!(result.edges_updated, "edges hash changed → update");
6554        Ok(())
6555    }
6556
6557    /// Req 18.4: When a category hash has changed, remove old entries and
6558    /// insert new ones for that category.
6559    #[test]
6560    fn incremental_replace_updates_changed_categories() -> Result<(), Box<dyn std::error::Error>> {
6561        let index = WorkspaceIndex::new();
6562        let uri = "file:///lib/Changed.pm";
6563        let key = DocumentStore::uri_key(uri);
6564
6565        let shard_v1 = make_shard(uri, 1, Some(10), Some(20), Some(30), Some(40));
6566        index.replace_fact_shard_incremental(&key, shard_v1);
6567
6568        // Change all category hashes.
6569        let shard_v2 = make_shard(uri, 2, Some(11), Some(21), Some(31), Some(41));
6570        let result = index.replace_fact_shard_incremental(&key, shard_v2);
6571
6572        assert!(!result.content_unchanged);
6573        assert!(result.anchors_updated);
6574        assert!(result.entities_updated);
6575        assert!(result.occurrences_updated);
6576        assert!(result.edges_updated);
6577
6578        // The stored shard should be v2.
6579        let stored = must_some(index.file_fact_shard(uri));
6580        assert_eq!(stored.content_hash, 2);
6581        assert_eq!(stored.anchors_hash, Some(11));
6582        Ok(())
6583    }
6584
6585    /// When there is no old shard (first index), all categories are treated
6586    /// as changed.
6587    #[test]
6588    fn incremental_replace_first_insert_updates_all() -> Result<(), Box<dyn std::error::Error>> {
6589        let index = WorkspaceIndex::new();
6590        let uri = "file:///lib/New.pm";
6591        let key = DocumentStore::uri_key(uri);
6592
6593        let shard = make_shard(uri, 1, Some(10), Some(20), Some(30), Some(40));
6594        let result = index.replace_fact_shard_incremental(&key, shard);
6595
6596        assert!(!result.content_unchanged);
6597        assert!(result.anchors_updated);
6598        assert!(result.entities_updated);
6599        assert!(result.occurrences_updated);
6600        assert!(result.edges_updated);
6601        Ok(())
6602    }
6603
6604    /// When per-category hashes are `None` (legacy shard), the category is
6605    /// conservatively treated as changed.
6606    #[test]
6607    fn incremental_replace_none_hashes_treated_as_changed() -> Result<(), Box<dyn std::error::Error>>
6608    {
6609        let index = WorkspaceIndex::new();
6610        let uri = "file:///lib/Legacy.pm";
6611        let key = DocumentStore::uri_key(uri);
6612
6613        // Old shard has hashes, new shard has None for some.
6614        let shard_v1 = make_shard(uri, 1, Some(10), Some(20), Some(30), Some(40));
6615        index.replace_fact_shard_incremental(&key, shard_v1);
6616
6617        let shard_v2 = make_shard(uri, 2, None, Some(20), None, Some(40));
6618        let result = index.replace_fact_shard_incremental(&key, shard_v2);
6619
6620        assert!(!result.content_unchanged);
6621        assert!(result.anchors_updated, "None new hash → changed");
6622        assert!(!result.entities_updated, "same hash → skip");
6623        assert!(result.occurrences_updated, "None new hash → changed");
6624        assert!(!result.edges_updated, "same hash → skip");
6625        Ok(())
6626    }
6627
6628    /// Verify that the semantic reference index is updated only when
6629    /// occurrences or edges change.
6630    #[test]
6631    fn incremental_replace_updates_reference_index_on_occurrence_change()
6632    -> Result<(), Box<dyn std::error::Error>> {
6633        use perl_semantic_facts::{AnchorId, Confidence, OccurrenceId, OccurrenceKind, Provenance};
6634
6635        let index = WorkspaceIndex::new();
6636        let uri = "file:///lib/RefIdx.pm";
6637        let key = DocumentStore::uri_key(uri);
6638        let file_id = {
6639            let mut h = DefaultHasher::new();
6640            uri.hash(&mut h);
6641            FileId(h.finish())
6642        };
6643
6644        // v1: shard with one reference occurrence.
6645        let mut shard_v1 = make_shard(uri, 1, Some(10), Some(20), Some(30), Some(40));
6646        let anchor_id = AnchorId(1);
6647        shard_v1.anchors.push(perl_semantic_facts::AnchorFact {
6648            id: anchor_id,
6649            file_id,
6650            span_start_byte: 0,
6651            span_end_byte: 5,
6652            scope_id: None,
6653            provenance: Provenance::ExactAst,
6654            confidence: Confidence::High,
6655        });
6656        shard_v1.occurrences.push(perl_semantic_facts::OccurrenceFact {
6657            id: OccurrenceId(1),
6658            kind: OccurrenceKind::Call,
6659            entity_id: Some(EntityId(100)),
6660            anchor_id,
6661            scope_id: None,
6662            provenance: Provenance::ExactAst,
6663            confidence: Confidence::High,
6664        });
6665        shard_v1.entities.push(perl_semantic_facts::EntityFact {
6666            id: EntityId(100),
6667            kind: EntityKind::Subroutine,
6668            canonical_name: "RefIdx::foo".to_string(),
6669            anchor_id: Some(anchor_id),
6670            scope_id: None,
6671            provenance: Provenance::ExactAst,
6672            confidence: Confidence::High,
6673        });
6674        index.replace_fact_shard_incremental(&key, shard_v1);
6675
6676        // Reference index should have entries.
6677        assert!(
6678            index.semantic_reference_index.read().name_count() > 0
6679                || index.semantic_reference_index.read().entity_count() > 0,
6680            "reference index should be populated after first insert"
6681        );
6682
6683        // v2: same content_hash → skip entirely, reference index untouched.
6684        let shard_v2_same = make_shard(uri, 1, Some(10), Some(20), Some(99), Some(99));
6685        let r = index.replace_fact_shard_incremental(&key, shard_v2_same);
6686        assert!(r.content_unchanged);
6687
6688        // v3: different content_hash, same occurrence/edge hashes → skip ref index.
6689        let mut shard_v3 = make_shard(uri, 3, Some(11), Some(21), Some(30), Some(40));
6690        shard_v3.anchors.push(perl_semantic_facts::AnchorFact {
6691            id: anchor_id,
6692            file_id,
6693            span_start_byte: 0,
6694            span_end_byte: 5,
6695            scope_id: None,
6696            provenance: Provenance::ExactAst,
6697            confidence: Confidence::High,
6698        });
6699        shard_v3.occurrences.push(perl_semantic_facts::OccurrenceFact {
6700            id: OccurrenceId(1),
6701            kind: OccurrenceKind::Call,
6702            entity_id: Some(EntityId(100)),
6703            anchor_id,
6704            scope_id: None,
6705            provenance: Provenance::ExactAst,
6706            confidence: Confidence::High,
6707        });
6708        shard_v3.entities.push(perl_semantic_facts::EntityFact {
6709            id: EntityId(100),
6710            kind: EntityKind::Subroutine,
6711            canonical_name: "RefIdx::foo".to_string(),
6712            anchor_id: Some(anchor_id),
6713            scope_id: None,
6714            provenance: Provenance::ExactAst,
6715            confidence: Confidence::High,
6716        });
6717        let r3 = index.replace_fact_shard_incremental(&key, shard_v3);
6718        assert!(!r3.occurrences_updated, "occurrence hash unchanged → skip");
6719        assert!(!r3.edges_updated, "edge hash unchanged → skip");
6720
6721        Ok(())
6722    }
6723
6724    /// Verify that `index_file` uses incremental replacement (the fact shard
6725    /// is stored and updated correctly through the full indexing path).
6726    #[test]
6727    fn index_file_stores_fact_shard_incrementally() -> Result<(), Box<dyn std::error::Error>> {
6728        let index = WorkspaceIndex::new();
6729        let uri = "file:///lib/Incr.pm";
6730        let code = "package Incr;\nsub foo { 1 }\n1;\n";
6731
6732        must(index.index_file(must(url::Url::parse(uri)), code.to_string()));
6733        let shard1 = must_some(index.file_fact_shard(uri));
6734        assert!(shard1.anchors_hash.is_some());
6735        assert!(
6736            shard1.anchors.iter().any(|anchor| anchor.provenance == Provenance::ExactAst),
6737            "index_file should store the canonical semantic shard when adapters produce facts"
6738        );
6739        assert!(
6740            shard1.entities.iter().any(|entity| entity.provenance == Provenance::ExactAst),
6741            "index_file should store canonical entities rather than legacy fallback entities"
6742        );
6743
6744        // Re-index with same content → shard should be unchanged.
6745        must(index.index_file(must(url::Url::parse(uri)), code.to_string()));
6746        // The early-exit in index_file checks content_hash at the FileIndex
6747        // level, so the fact shard replacement is never reached for identical
6748        // content. Verify the shard is still present.
6749        let shard2 = must_some(index.file_fact_shard(uri));
6750        assert_eq!(shard1.content_hash, shard2.content_hash);
6751
6752        // Re-index with different content → shard should be replaced.
6753        let code2 = "package Incr;\nsub bar { 2 }\n1;\n";
6754        must(index.index_file(must(url::Url::parse(uri)), code2.to_string()));
6755        let shard3 = must_some(index.file_fact_shard(uri));
6756        assert_ne!(shard1.content_hash, shard3.content_hash);
6757
6758        Ok(())
6759    }
6760
6761    // ── Property-based tests for incremental invalidation ──
6762
6763    mod prop_incremental_invalidation {
6764        use super::*;
6765        use proptest::prelude::*;
6766        use proptest::test_runner::Config as ProptestConfig;
6767
6768        /// Strategy for an optional per-category hash.
6769        ///
6770        /// ~10% of the time produces `None` (simulating legacy shards
6771        /// without per-category hashes); otherwise a random `u64`.
6772        fn arb_category_hash() -> impl Strategy<Value = Option<u64>> {
6773            prop_oneof![
6774                1 => Just(None),
6775                9 => any::<u64>().prop_map(Some),
6776            ]
6777        }
6778
6779        /// Strategy for a `FileFactShard` with the given URI and
6780        /// randomly-chosen hashes.
6781        fn arb_shard(uri: &'static str) -> impl Strategy<Value = FileFactShard> {
6782            (
6783                any::<u64>(),        // content_hash
6784                arb_category_hash(), // anchors_hash
6785                arb_category_hash(), // entities_hash
6786                arb_category_hash(), // occurrences_hash
6787                arb_category_hash(), // edges_hash
6788            )
6789                .prop_map(move |(content_hash, ah, eh, oh, edh)| {
6790                    make_shard(uri, content_hash, ah, eh, oh, edh)
6791                })
6792        }
6793
6794        // Property 15: Incremental Invalidation Correctness
6795        //
6796        // **Validates: Requirements 18.3, 18.4, 18.5**
6797        //
6798        // For any file re-indexing where the whole-file content_hash is
6799        // unchanged, the workspace store shall not modify any cross-file
6800        // indexes.  For any file re-indexing where a per-category hash is
6801        // unchanged, the workspace store shall skip re-indexing that
6802        // category.  For any file re-indexing where a per-category hash
6803        // has changed, the workspace store shall remove old entries and
6804        // insert new ones for that category.
6805        proptest! {
6806            #![proptest_config(ProptestConfig {
6807                failure_persistence: None,
6808                ..ProptestConfig::default()
6809            })]
6810
6811            #[test]
6812            fn prop_incremental_invalidation_correctness(
6813                old_shard in arb_shard("file:///lib/Prop.pm"),
6814                new_shard in arb_shard("file:///lib/Prop.pm"),
6815            ) {
6816                let index = WorkspaceIndex::new();
6817                let key = DocumentStore::uri_key("file:///lib/Prop.pm");
6818
6819                // Seed the index with the old shard.
6820                index.replace_fact_shard_incremental(&key, old_shard.clone());
6821
6822                // Replace with the new shard and capture the result.
6823                let result = index.replace_fact_shard_incremental(&key, new_shard.clone());
6824
6825                // ── Req 18.5: content_hash unchanged → skip entirely ──
6826                if old_shard.content_hash == new_shard.content_hash {
6827                    prop_assert!(
6828                        result.content_unchanged,
6829                        "content_unchanged must be true when content_hash is the same"
6830                    );
6831                    prop_assert!(
6832                        !result.anchors_updated,
6833                        "anchors_updated must be false when content_hash unchanged"
6834                    );
6835                    prop_assert!(
6836                        !result.entities_updated,
6837                        "entities_updated must be false when content_hash unchanged"
6838                    );
6839                    prop_assert!(
6840                        !result.occurrences_updated,
6841                        "occurrences_updated must be false when content_hash unchanged"
6842                    );
6843                    prop_assert!(
6844                        !result.edges_updated,
6845                        "edges_updated must be false when content_hash unchanged"
6846                    );
6847                } else {
6848                    prop_assert!(
6849                        !result.content_unchanged,
6850                        "content_unchanged must be false when content_hash differs"
6851                    );
6852
6853                    // ── Req 18.3 / 18.4: per-category hash comparison ──
6854                    // A category is "unchanged" when both old and new have
6855                    // Some(h) and the values are equal.  Otherwise the
6856                    // category is conservatively treated as changed.
6857
6858                    let anchors_should_update = crate::semantic::invalidation::category_hash_changed(
6859                        old_shard.anchors_hash,
6860                        new_shard.anchors_hash,
6861                    );
6862                    prop_assert_eq!(
6863                        result.anchors_updated,
6864                        anchors_should_update,
6865                        "anchors_updated mismatch: old={:?} new={:?}",
6866                        old_shard.anchors_hash,
6867                        new_shard.anchors_hash,
6868                    );
6869
6870                    let entities_should_update =
6871                        crate::semantic::invalidation::category_hash_changed(
6872                            old_shard.entities_hash,
6873                            new_shard.entities_hash,
6874                        );
6875                    prop_assert_eq!(
6876                        result.entities_updated,
6877                        entities_should_update,
6878                        "entities_updated mismatch: old={:?} new={:?}",
6879                        old_shard.entities_hash,
6880                        new_shard.entities_hash,
6881                    );
6882
6883                    let occurrences_should_update =
6884                        crate::semantic::invalidation::category_hash_changed(
6885                            old_shard.occurrences_hash,
6886                            new_shard.occurrences_hash,
6887                        );
6888                    prop_assert_eq!(
6889                        result.occurrences_updated,
6890                        occurrences_should_update,
6891                        "occurrences_updated mismatch: old={:?} new={:?}",
6892                        old_shard.occurrences_hash,
6893                        new_shard.occurrences_hash,
6894                    );
6895
6896                    let edges_should_update = crate::semantic::invalidation::category_hash_changed(
6897                        old_shard.edges_hash,
6898                        new_shard.edges_hash,
6899                    );
6900                    prop_assert_eq!(
6901                        result.edges_updated,
6902                        edges_should_update,
6903                        "edges_updated mismatch: old={:?} new={:?}",
6904                        old_shard.edges_hash,
6905                        new_shard.edges_hash,
6906                    );
6907                }
6908            }
6909        }
6910    }
6911}