perl_workspace/workspace/
workspace_index.rs

1//! Workspace-wide symbol index for fast cross-file lookups in Perl LSP.
2//!
3//! This module provides efficient indexing of symbols across an entire Perl workspace,
4//! enabling enterprise-grade features like find-references, rename refactoring, and
5//! workspace symbol search with ≤1ms response times.
6//!
7//! # LSP Workflow Integration
8//!
9//! Core component in the Parse → Index → Navigate → Complete → Analyze pipeline:
10//! 1. **Parse**: AST generation from Perl source files
11//! 2. **Index**: Workspace symbol table construction with dual indexing strategy
12//! 3. **Navigate**: Cross-file symbol resolution and go-to-definition
13//! 4. **Complete**: Context-aware completion with workspace symbol awareness
14//! 5. **Analyze**: Cross-reference analysis and workspace refactoring operations
15//!
16//! # Performance Characteristics
17//!
18//! - **Symbol indexing**: O(n) where n is total workspace symbols
19//! - **Symbol lookup**: O(1) average with hash table indexing
20//! - **Cross-file queries**: <50μs for typical workspace sizes
21//! - **Memory usage**: ~1MB per 10K symbols with optimized storage
22//! - **Incremental updates**: ≤1ms for file-level symbol changes
23//! - **Large workspace scaling**: Designed to scale to 50K+ files and large codebases
24//! - **Benchmark targets**: <50μs lookups and ≤1ms incremental updates at scale
25//!
26//! # Dual Indexing Strategy
27//!
28//! Implements dual indexing for comprehensive Perl symbol resolution:
29//! - **Qualified names**: `Package::function` for explicit references
30//! - **Bare names**: `function` for context-dependent resolution
31//! - **98% reference coverage**: Handles both qualified and unqualified calls
32//! - **Automatic deduplication**: Prevents duplicate results in queries
33//!
34//! # Usage Examples
35//!
36//! ```rust
37//! use perl_workspace::workspace::workspace_index::WorkspaceIndex;
38//! use url::Url;
39//!
40//! # fn main() -> Result<(), Box<dyn std::error::Error>> {
41//! let index = WorkspaceIndex::new();
42//!
43//! // Index a Perl file
44//! let uri = Url::parse("file:///example.pl")?;
45//! let code = "package MyPackage;\nsub example { return 42; }";
46//! index.index_file(uri, code.to_string())?;
47//!
48//! // Find symbol definitions
49//! let definition = index.find_definition("MyPackage::example");
50//! assert!(definition.is_some());
51//!
52//! // Workspace symbol search
53//! let symbols = index.find_symbols("example");
54//! assert!(!symbols.is_empty());
55//! # Ok(())
56//! # }
57//! ```
58//!
59//! # Related Modules
60//!
61//! See also the symbol extraction, reference finding, and semantic token classification
62//! modules in the workspace index implementation.
63
64use crate::Parser;
65use crate::ast::{Node, NodeKind};
66use crate::document_store::{Document, DocumentStore};
67use crate::position::{Position, Range};
68use crate::workspace::monitoring::IndexInstrumentation;
69use parking_lot::RwLock;
70use perl_position_tracking::{WireLocation, WirePosition, WireRange};
71use perl_semantic_facts::{
72    AnchorFact, AnchorId, Confidence, EdgeFact, EntityFact, EntityId, EntityKind, FileId,
73    Provenance,
74};
75use serde::{Deserialize, Serialize};
76use std::collections::hash_map::DefaultHasher;
77use std::collections::{HashMap, HashSet};
78use std::hash::{Hash, Hasher};
79use std::path::Path;
80use std::sync::Arc;
81use std::time::Instant;
82use url::Url;
83
84use crate::semantic::imports::ImportExportIndex;
85pub use crate::semantic::invalidation::ShardReplaceResult;
86use crate::semantic::invalidation::{ShardCategoryHashes, plan_shard_replacement};
87use crate::semantic::references::ReferenceIndex;
88pub use crate::workspace::monitoring::{
89    DegradationReason, EarlyExitReason, EarlyExitRecord, IndexInstrumentationSnapshot,
90    IndexMetrics, IndexPerformanceCaps, IndexPhase, IndexPhaseTransition, IndexResourceLimits,
91    IndexStateKind, IndexStateTransition, ResourceKind,
92};
93use perl_symbol::surface::decl::extract_symbol_decls;
94use perl_symbol::surface::facts::{symbol_decls_to_semantic_facts, symbol_refs_to_semantic_facts};
95use perl_symbol::surface::r#ref::extract_symbol_refs;
96
97// Re-export URI utilities for backward compatibility
98#[cfg(not(target_arch = "wasm32"))]
99/// URI ↔ filesystem helpers used during Index/Analyze workflows.
100pub use perl_uri::{fs_path_to_uri, uri_to_fs_path};
101/// URI inspection helpers used during Index/Analyze workflows.
102pub use perl_uri::{is_file_uri, is_special_scheme, uri_extension, uri_key};
103
104// ============================================================================
105// Index Lifecycle Types (Index Lifecycle v1 Specification)
106// ============================================================================
107
108/// Index readiness state - explicit lifecycle management
109///
110/// Represents the current operational state of the workspace index, enabling
111/// LSP handlers to provide appropriate responses based on index availability.
112/// This state machine prevents blocking operations and ensures graceful
113/// degradation when the index is not fully ready.
114///
115/// # State Transitions
116///
117/// - `Building` → `Ready`: Workspace scan completes successfully
118/// - `Building` → `Degraded`: Scan timeout, IO error, or resource limit
119/// - `Ready` → `Building`: Workspace folder change or file watching events
120/// - `Ready` → `Degraded`: Parse storm (>10 pending) or IO error
121/// - `Degraded` → `Building`: Recovery attempt after cooldown
122/// - `Degraded` → `Ready`: Successful re-scan after recovery
123///
124/// # Invariants
125///
126/// - During a single build attempt, `phase` advances monotonically
127///   (`Idle` → `Scanning` → `Indexing`).
128/// - `indexed_count` must not exceed `total_count`; callers should keep totals updated.
129/// - `Ready` and `Degraded` counts are snapshots captured at transition time.
130///
131/// # Usage
132///
133/// ```rust,ignore
134/// use perl_parser::workspace_index::{IndexPhase, IndexState};
135/// use std::time::Instant;
136///
137/// let state = IndexState::Building {
138///     phase: IndexPhase::Indexing,
139///     indexed_count: 50,
140///     total_count: 100,
141///     started_at: Instant::now(),
142/// };
143/// ```
144#[derive(Clone, Debug)]
145pub enum IndexState {
146    /// Index is being constructed (workspace scan in progress)
147    Building {
148        /// Current build phase (Idle → Scanning → Indexing)
149        phase: IndexPhase,
150        /// Files indexed so far
151        indexed_count: usize,
152        /// Total files discovered
153        total_count: usize,
154        /// Started at
155        started_at: Instant,
156    },
157
158    /// Index is consistent and ready for queries
159    Ready {
160        /// Total symbols indexed
161        symbol_count: usize,
162        /// Total files indexed
163        file_count: usize,
164        /// Timestamp of last successful index
165        completed_at: Instant,
166    },
167
168    /// Index is serving but degraded
169    Degraded {
170        /// Why we degraded
171        reason: DegradationReason,
172        /// What's still available
173        available_symbols: usize,
174        /// When degradation occurred
175        since: Instant,
176    },
177}
178
179impl IndexState {
180    /// Return the coarse state kind for instrumentation and routing decisions
181    pub fn kind(&self) -> IndexStateKind {
182        match self {
183            IndexState::Building { .. } => IndexStateKind::Building,
184            IndexState::Ready { .. } => IndexStateKind::Ready,
185            IndexState::Degraded { .. } => IndexStateKind::Degraded,
186        }
187    }
188
189    /// Return the current build phase when in `Building` state
190    pub fn phase(&self) -> Option<IndexPhase> {
191        match self {
192            IndexState::Building { phase, .. } => Some(*phase),
193            _ => None,
194        }
195    }
196
197    /// Timestamp of when the current state began
198    pub fn state_started_at(&self) -> Instant {
199        match self {
200            IndexState::Building { started_at, .. } => *started_at,
201            IndexState::Ready { completed_at, .. } => *completed_at,
202            IndexState::Degraded { since, .. } => *since,
203        }
204    }
205}
206
207/// Coordinates index lifecycle, state transitions, and handler queries
208///
209/// The IndexCoordinator wraps `WorkspaceIndex` with explicit state management,
210/// enabling LSP handlers to query the index readiness and implement appropriate
211/// fallback behavior when the index is not fully ready.
212///
213/// # Architecture
214///
215/// ```text
216/// LspServer
217///   └── IndexCoordinator
218///         ├── state: Arc<RwLock<IndexState>>
219///         ├── index: Arc<WorkspaceIndex>
220///         ├── limits: IndexResourceLimits
221///         ├── caps: IndexPerformanceCaps
222///         ├── metrics: IndexMetrics
223///         └── instrumentation: IndexInstrumentation
224/// ```
225///
226/// # State Management
227///
228/// The coordinator manages three states:
229/// - `Building`: Initial scan or recovery in progress
230/// - `Ready`: Fully indexed and available for queries
231/// - `Degraded`: Available but with reduced functionality
232///
233/// # Performance Characteristics
234///
235/// - State checks are lock-free reads (cloned state, <100ns)
236/// - State transitions use write locks (rare, <1μs)
237/// - Query dispatch has zero overhead in Ready state
238/// - Degradation detection is atomic (<10ns per check)
239///
240/// # Usage
241///
242/// ```rust,ignore
243/// use perl_parser::workspace_index::{IndexCoordinator, IndexState};
244///
245/// let coordinator = IndexCoordinator::new();
246/// assert!(matches!(coordinator.state(), IndexState::Building { .. }));
247///
248/// // Transition to ready after indexing
249/// coordinator.transition_to_ready(100, 5000);
250/// assert!(matches!(coordinator.state(), IndexState::Ready { .. }));
251///
252/// // Query with degradation handling
253/// let _result = coordinator.query(
254///     |index| index.find_definition("my_function"), // full query
255///     |_index| None                                 // partial fallback
256/// );
257/// ```
258pub struct IndexCoordinator {
259    /// Current index state (RwLock for state transitions)
260    state: Arc<RwLock<IndexState>>,
261
262    /// The actual workspace index
263    index: Arc<WorkspaceIndex>,
264
265    /// Resource limits configuration
266    ///
267    /// Enforces bounded resource usage to prevent unbounded memory growth:
268    /// - max_files: Triggers degradation when file count exceeds limit
269    /// - max_total_symbols: Triggers degradation when symbol count exceeds limit
270    /// - max_symbols_per_file: Used for per-file validation during indexing
271    limits: IndexResourceLimits,
272
273    /// Performance caps for early-exit heuristics
274    caps: IndexPerformanceCaps,
275
276    /// Runtime metrics for degradation detection
277    metrics: IndexMetrics,
278
279    /// Instrumentation for lifecycle transitions and durations
280    instrumentation: IndexInstrumentation,
281}
282
283impl std::fmt::Debug for IndexCoordinator {
284    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
285        f.debug_struct("IndexCoordinator")
286            .field("state", &*self.state.read())
287            .field("limits", &self.limits)
288            .field("caps", &self.caps)
289            .finish_non_exhaustive()
290    }
291}
292
293impl IndexCoordinator {
294    /// Create a new coordinator in Building state
295    ///
296    /// Initializes the coordinator with default resource limits and
297    /// an empty workspace index ready for initial scan.
298    ///
299    /// # Returns
300    ///
301    /// A coordinator initialized in `IndexState::Building`.
302    ///
303    /// # Examples
304    ///
305    /// ```rust,ignore
306    /// use perl_parser::workspace_index::IndexCoordinator;
307    ///
308    /// let coordinator = IndexCoordinator::new();
309    /// ```
310    pub fn new() -> Self {
311        Self {
312            state: Arc::new(RwLock::new(IndexState::Building {
313                phase: IndexPhase::Idle,
314                indexed_count: 0,
315                total_count: 0,
316                started_at: Instant::now(),
317            })),
318            index: Arc::new(WorkspaceIndex::new()),
319            limits: IndexResourceLimits::default(),
320            caps: IndexPerformanceCaps::default(),
321            metrics: IndexMetrics::new(),
322            instrumentation: IndexInstrumentation::new(),
323        }
324    }
325
326    /// Create a coordinator with custom resource limits
327    ///
328    /// # Arguments
329    ///
330    /// * `limits` - Custom resource limits for this workspace
331    ///
332    /// # Returns
333    ///
334    /// A coordinator configured with the provided resource limits.
335    ///
336    /// # Examples
337    ///
338    /// ```rust,ignore
339    /// use perl_parser::workspace_index::{IndexCoordinator, IndexResourceLimits};
340    ///
341    /// let limits = IndexResourceLimits::default();
342    /// let coordinator = IndexCoordinator::with_limits(limits);
343    /// ```
344    pub fn with_limits(limits: IndexResourceLimits) -> Self {
345        Self {
346            state: Arc::new(RwLock::new(IndexState::Building {
347                phase: IndexPhase::Idle,
348                indexed_count: 0,
349                total_count: 0,
350                started_at: Instant::now(),
351            })),
352            index: Arc::new(WorkspaceIndex::new()),
353            limits,
354            caps: IndexPerformanceCaps::default(),
355            metrics: IndexMetrics::new(),
356            instrumentation: IndexInstrumentation::new(),
357        }
358    }
359
360    /// Create a coordinator with custom limits and performance caps
361    ///
362    /// # Arguments
363    ///
364    /// * `limits` - Resource limits for this workspace
365    /// * `caps` - Performance caps for indexing budgets
366    pub fn with_limits_and_caps(limits: IndexResourceLimits, caps: IndexPerformanceCaps) -> Self {
367        Self {
368            state: Arc::new(RwLock::new(IndexState::Building {
369                phase: IndexPhase::Idle,
370                indexed_count: 0,
371                total_count: 0,
372                started_at: Instant::now(),
373            })),
374            index: Arc::new(WorkspaceIndex::new()),
375            limits,
376            caps,
377            metrics: IndexMetrics::new(),
378            instrumentation: IndexInstrumentation::new(),
379        }
380    }
381
382    /// Get current state (lock-free read via clone)
383    ///
384    /// Returns a cloned copy of the current state for lock-free access
385    /// in hot path LSP handlers.
386    ///
387    /// # Returns
388    ///
389    /// The current `IndexState` snapshot.
390    ///
391    /// # Examples
392    ///
393    /// ```rust,ignore
394    /// use perl_parser::workspace_index::{IndexCoordinator, IndexState};
395    ///
396    /// let coordinator = IndexCoordinator::new();
397    /// match coordinator.state() {
398    ///     IndexState::Ready { .. } => {
399    ///         // Full query path
400    ///     }
401    ///     _ => {
402    ///         // Degraded/building fallback
403    ///     }
404    /// }
405    /// ```
406    pub fn state(&self) -> IndexState {
407        self.state.read().clone()
408    }
409
410    /// Get reference to the underlying workspace index
411    ///
412    /// Provides direct access to the `WorkspaceIndex` for operations
413    /// that don't require state checking (e.g., document store access).
414    ///
415    /// # Returns
416    ///
417    /// A shared reference to the underlying workspace index.
418    ///
419    /// # Examples
420    ///
421    /// ```rust,ignore
422    /// use perl_parser::workspace_index::IndexCoordinator;
423    ///
424    /// let coordinator = IndexCoordinator::new();
425    /// let _index = coordinator.index();
426    /// ```
427    pub fn index(&self) -> &Arc<WorkspaceIndex> {
428        &self.index
429    }
430
431    /// Access the configured resource limits
432    pub fn limits(&self) -> &IndexResourceLimits {
433        &self.limits
434    }
435
436    /// Access the configured performance caps
437    pub fn performance_caps(&self) -> &IndexPerformanceCaps {
438        &self.caps
439    }
440
441    /// Snapshot lifecycle instrumentation (durations, transitions, early exits)
442    pub fn instrumentation_snapshot(&self) -> IndexInstrumentationSnapshot {
443        self.instrumentation.snapshot()
444    }
445
446    /// Notify of file change (may trigger state transition)
447    ///
448    /// Increments the pending parse count and may transition to degraded
449    /// state if a parse storm is detected.
450    ///
451    /// # Arguments
452    ///
453    /// * `_uri` - URI of the changed file (reserved for future use).
454    ///
455    /// # Returns
456    ///
457    /// Nothing. Updates coordinator metrics and state for the LSP workflow.
458    ///
459    /// # Examples
460    ///
461    /// ```rust,ignore
462    /// use perl_parser::workspace_index::IndexCoordinator;
463    ///
464    /// let coordinator = IndexCoordinator::new();
465    /// coordinator.notify_change("file:///example.pl");
466    /// ```
467    pub fn notify_change(&self, _uri: &str) {
468        let pending = self.metrics.increment_pending_parses();
469
470        // Check for parse storm
471        if self.metrics.is_parse_storm() {
472            self.transition_to_degraded(DegradationReason::ParseStorm { pending_parses: pending });
473        }
474    }
475
476    /// Notify parse completion for the Index/Analyze workflow stages.
477    ///
478    /// Decrements the pending parse count, enforces resource limits, and may
479    /// attempt recovery when parse storms clear.
480    ///
481    /// # Arguments
482    ///
483    /// * `_uri` - URI of the parsed file (reserved for future use).
484    ///
485    /// # Returns
486    ///
487    /// Nothing. Updates coordinator metrics and state for the LSP workflow.
488    ///
489    /// # Examples
490    ///
491    /// ```rust,ignore
492    /// use perl_parser::workspace_index::IndexCoordinator;
493    ///
494    /// let coordinator = IndexCoordinator::new();
495    /// coordinator.notify_parse_complete("file:///example.pl");
496    /// ```
497    pub fn notify_parse_complete(&self, _uri: &str) {
498        let pending = self.metrics.decrement_pending_parses();
499
500        // Check for recovery from parse storm
501        if pending == 0 {
502            if let IndexState::Degraded { reason: DegradationReason::ParseStorm { .. }, .. } =
503                self.state()
504            {
505                // Attempt recovery - transition back to Building for re-scan
506                let mut state = self.state.write();
507                let from_kind = state.kind();
508                self.instrumentation.record_state_transition(from_kind, IndexStateKind::Building);
509                *state = IndexState::Building {
510                    phase: IndexPhase::Idle,
511                    indexed_count: 0,
512                    total_count: 0,
513                    started_at: Instant::now(),
514                };
515            }
516        }
517
518        // Enforce resource limits after parse completion
519        self.enforce_limits();
520    }
521
522    /// Transition to Ready state
523    ///
524    /// Marks the index as fully ready for queries after successful workspace
525    /// scan. Records the file count, symbol count, and completion timestamp.
526    /// Enforces resource limits after transition.
527    ///
528    /// # State Transition Guards
529    ///
530    /// Only valid transitions:
531    /// - `Building` → `Ready` (normal completion)
532    /// - `Degraded` → `Ready` (recovery after fix)
533    ///
534    /// # Arguments
535    ///
536    /// * `file_count` - Total number of files indexed
537    /// * `symbol_count` - Total number of symbols extracted
538    ///
539    /// # Returns
540    ///
541    /// Nothing. The coordinator state is updated in-place.
542    ///
543    /// # Examples
544    ///
545    /// ```rust,ignore
546    /// use perl_parser::workspace_index::IndexCoordinator;
547    ///
548    /// let coordinator = IndexCoordinator::new();
549    /// coordinator.transition_to_ready(100, 5000);
550    /// ```
551    pub fn transition_to_ready(&self, file_count: usize, symbol_count: usize) {
552        let mut state = self.state.write();
553        let from_kind = state.kind();
554
555        // State transition guard: validate current state allows transition to Ready
556        match &*state {
557            IndexState::Building { .. } | IndexState::Degraded { .. } => {
558                // Valid transition - proceed
559                *state =
560                    IndexState::Ready { symbol_count, file_count, completed_at: Instant::now() };
561            }
562            IndexState::Ready { .. } => {
563                // Already Ready - update metrics but don't log as transition
564                *state =
565                    IndexState::Ready { symbol_count, file_count, completed_at: Instant::now() };
566            }
567        }
568        self.instrumentation.record_state_transition(from_kind, IndexStateKind::Ready);
569        drop(state); // Release write lock before checking limits
570
571        // Enforce resource limits after transition
572        self.enforce_limits();
573    }
574
575    /// Transition to Scanning phase (Idle → Scanning)
576    ///
577    /// Resets build counters and marks the index as scanning workspace folders.
578    pub fn transition_to_scanning(&self) {
579        let mut state = self.state.write();
580        let from_kind = state.kind();
581
582        match &*state {
583            IndexState::Building { phase, indexed_count, total_count, started_at } => {
584                if *phase != IndexPhase::Scanning {
585                    self.instrumentation.record_phase_transition(*phase, IndexPhase::Scanning);
586                }
587                *state = IndexState::Building {
588                    phase: IndexPhase::Scanning,
589                    indexed_count: *indexed_count,
590                    total_count: *total_count,
591                    started_at: *started_at,
592                };
593            }
594            IndexState::Ready { .. } | IndexState::Degraded { .. } => {
595                self.instrumentation.record_state_transition(from_kind, IndexStateKind::Building);
596                self.instrumentation
597                    .record_phase_transition(IndexPhase::Idle, IndexPhase::Scanning);
598                *state = IndexState::Building {
599                    phase: IndexPhase::Scanning,
600                    indexed_count: 0,
601                    total_count: 0,
602                    started_at: Instant::now(),
603                };
604            }
605        }
606    }
607
608    /// Update scanning progress with the latest discovered file count
609    pub fn update_scan_progress(&self, total_count: usize) {
610        let mut state = self.state.write();
611        if let IndexState::Building { phase, indexed_count, started_at, .. } = &*state {
612            if *phase != IndexPhase::Scanning {
613                self.instrumentation.record_phase_transition(*phase, IndexPhase::Scanning);
614            }
615            *state = IndexState::Building {
616                phase: IndexPhase::Scanning,
617                indexed_count: *indexed_count,
618                total_count,
619                started_at: *started_at,
620            };
621        }
622    }
623
624    /// Transition to Indexing phase (Scanning → Indexing)
625    ///
626    /// Uses the discovered file count as the total index target.
627    pub fn transition_to_indexing(&self, total_count: usize) {
628        let mut state = self.state.write();
629        let from_kind = state.kind();
630
631        match &*state {
632            IndexState::Building { phase, indexed_count, started_at, .. } => {
633                if *phase != IndexPhase::Indexing {
634                    self.instrumentation.record_phase_transition(*phase, IndexPhase::Indexing);
635                }
636                *state = IndexState::Building {
637                    phase: IndexPhase::Indexing,
638                    indexed_count: *indexed_count,
639                    total_count,
640                    started_at: *started_at,
641                };
642            }
643            IndexState::Ready { .. } | IndexState::Degraded { .. } => {
644                self.instrumentation.record_state_transition(from_kind, IndexStateKind::Building);
645                self.instrumentation
646                    .record_phase_transition(IndexPhase::Idle, IndexPhase::Indexing);
647                *state = IndexState::Building {
648                    phase: IndexPhase::Indexing,
649                    indexed_count: 0,
650                    total_count,
651                    started_at: Instant::now(),
652                };
653            }
654        }
655    }
656
657    /// Transition to Building state (Indexing phase)
658    ///
659    /// Marks the index as indexing with a known total file count.
660    pub fn transition_to_building(&self, total_count: usize) {
661        let mut state = self.state.write();
662        let from_kind = state.kind();
663
664        // State transition guard: validate transition is allowed
665        match &*state {
666            IndexState::Degraded { .. } | IndexState::Ready { .. } => {
667                self.instrumentation.record_state_transition(from_kind, IndexStateKind::Building);
668                self.instrumentation
669                    .record_phase_transition(IndexPhase::Idle, IndexPhase::Indexing);
670                *state = IndexState::Building {
671                    phase: IndexPhase::Indexing,
672                    indexed_count: 0,
673                    total_count,
674                    started_at: Instant::now(),
675                };
676            }
677            IndexState::Building { phase, indexed_count, started_at, .. } => {
678                let mut next_phase = *phase;
679                if *phase == IndexPhase::Idle {
680                    self.instrumentation
681                        .record_phase_transition(IndexPhase::Idle, IndexPhase::Indexing);
682                    next_phase = IndexPhase::Indexing;
683                }
684                *state = IndexState::Building {
685                    phase: next_phase,
686                    indexed_count: *indexed_count,
687                    total_count,
688                    started_at: *started_at,
689                };
690            }
691        }
692    }
693
694    /// Update Building state progress for the Index/Analyze workflow stages.
695    ///
696    /// Increments the indexed file count and checks for scan timeouts.
697    ///
698    /// # Arguments
699    ///
700    /// * `indexed_count` - Number of files indexed so far.
701    ///
702    /// # Returns
703    ///
704    /// Nothing. Updates coordinator state and may transition to `Degraded`.
705    ///
706    /// # Examples
707    ///
708    /// ```rust,ignore
709    /// use perl_parser::workspace_index::IndexCoordinator;
710    ///
711    /// let coordinator = IndexCoordinator::new();
712    /// coordinator.transition_to_building(100);
713    /// coordinator.update_building_progress(1);
714    /// ```
715    pub fn update_building_progress(&self, indexed_count: usize) {
716        let mut state = self.state.write();
717
718        if let IndexState::Building { phase, started_at, total_count, .. } = &*state {
719            let elapsed = started_at.elapsed().as_millis() as u64;
720
721            // Check for scan timeout
722            if elapsed > self.limits.max_scan_duration_ms {
723                // Timeout exceeded - transition to degraded
724                drop(state);
725                self.transition_to_degraded(DegradationReason::ScanTimeout { elapsed_ms: elapsed });
726                return;
727            }
728
729            // Update progress
730            *state = IndexState::Building {
731                phase: *phase,
732                indexed_count,
733                total_count: *total_count,
734                started_at: *started_at,
735            };
736        }
737    }
738
739    /// Transition to Degraded state
740    ///
741    /// Marks the index as degraded with the specified reason. Preserves
742    /// the current symbol count (if available) to indicate partial
743    /// functionality remains.
744    ///
745    /// # Arguments
746    ///
747    /// * `reason` - Why the index degraded (ParseStorm, IoError, etc.)
748    ///
749    /// # Returns
750    ///
751    /// Nothing. The coordinator state is updated in-place.
752    ///
753    /// # Examples
754    ///
755    /// ```rust,ignore
756    /// use perl_parser::workspace_index::{DegradationReason, IndexCoordinator, ResourceKind};
757    ///
758    /// let coordinator = IndexCoordinator::new();
759    /// coordinator.transition_to_degraded(DegradationReason::ResourceLimit {
760    ///     kind: ResourceKind::MaxFiles,
761    /// });
762    /// ```
763    pub fn transition_to_degraded(&self, reason: DegradationReason) {
764        let mut state = self.state.write();
765        let from_kind = state.kind();
766
767        // Get available symbols count from current state
768        let available_symbols = match &*state {
769            IndexState::Ready { symbol_count, .. } => *symbol_count,
770            IndexState::Degraded { available_symbols, .. } => *available_symbols,
771            IndexState::Building { .. } => 0,
772        };
773
774        self.instrumentation.record_state_transition(from_kind, IndexStateKind::Degraded);
775        *state = IndexState::Degraded { reason, available_symbols, since: Instant::now() };
776    }
777
778    /// Check resource limits and return degradation reason if exceeded
779    ///
780    /// Examines current workspace index state against configured resource limits.
781    /// Returns the first exceeded limit found, enabling targeted degradation.
782    ///
783    /// # Returns
784    ///
785    /// * `Some(DegradationReason)` - Resource limit exceeded, contains specific limit type
786    /// * `None` - All limits within acceptable bounds
787    ///
788    /// # Checked Limits
789    ///
790    /// - `max_files`: Total number of indexed files
791    /// - `max_total_symbols`: Aggregate symbol count across workspace
792    ///
793    /// # Performance
794    ///
795    /// - Lock-free read of index state (<100ns)
796    /// - Symbol counting is O(n) where n is number of files
797    ///
798    /// Returns: `Some(DegradationReason)` when a limit is exceeded, otherwise `None`.
799    ///
800    /// # Examples
801    ///
802    /// ```rust,ignore
803    /// use perl_parser::workspace_index::IndexCoordinator;
804    ///
805    /// let coordinator = IndexCoordinator::new();
806    /// let _reason = coordinator.check_limits();
807    /// ```
808    pub fn check_limits(&self) -> Option<DegradationReason> {
809        let files = self.index.files.read();
810
811        // Check max_files limit
812        let file_count = files.len();
813        if file_count > self.limits.max_files {
814            return Some(DegradationReason::ResourceLimit { kind: ResourceKind::MaxFiles });
815        }
816
817        // Check max_total_symbols limit
818        let total_symbols: usize = files.values().map(|fi| fi.symbols.len()).sum();
819        if total_symbols > self.limits.max_total_symbols {
820            return Some(DegradationReason::ResourceLimit { kind: ResourceKind::MaxSymbols });
821        }
822
823        None
824    }
825
826    /// Enforce resource limits and trigger degradation if exceeded
827    ///
828    /// Checks current resource usage against configured limits and automatically
829    /// transitions to Degraded state if any limit is exceeded. This method should
830    /// be called after operations that modify index size (file additions, parse
831    /// completions, etc.).
832    ///
833    /// # State Transitions
834    ///
835    /// - `Ready` → `Degraded(ResourceLimit)` if limits exceeded
836    /// - `Building` → `Degraded(ResourceLimit)` if limits exceeded
837    ///
838    /// # Returns
839    ///
840    /// Nothing. The coordinator state is updated in-place when limits are exceeded.
841    ///
842    /// # Examples
843    ///
844    /// ```rust,ignore
845    /// use perl_parser::workspace_index::IndexCoordinator;
846    ///
847    /// let coordinator = IndexCoordinator::new();
848    /// // ... index some files ...
849    /// coordinator.enforce_limits();  // Check and degrade if needed
850    /// ```
851    pub fn enforce_limits(&self) {
852        if let Some(reason) = self.check_limits() {
853            self.transition_to_degraded(reason);
854        }
855    }
856
857    /// Record an early-exit event for indexing instrumentation
858    pub fn record_early_exit(
859        &self,
860        reason: EarlyExitReason,
861        elapsed_ms: u64,
862        indexed_files: usize,
863        total_files: usize,
864    ) {
865        self.instrumentation.record_early_exit(EarlyExitRecord {
866            reason,
867            elapsed_ms,
868            indexed_files,
869            total_files,
870        });
871    }
872
873    /// Query with automatic degradation handling
874    ///
875    /// Dispatches to full query if index is Ready, or partial query otherwise.
876    /// This pattern enables LSP handlers to provide appropriate responses
877    /// based on index state without explicit state checking.
878    ///
879    /// # Type Parameters
880    ///
881    /// * `T` - Return type of the query functions
882    /// * `F1` - Full query function type accepting `&WorkspaceIndex` and returning `T`
883    /// * `F2` - Partial query function type accepting `&WorkspaceIndex` and returning `T`
884    ///
885    /// # Arguments
886    ///
887    /// * `full_query` - Function to execute when index is Ready
888    /// * `partial_query` - Function to execute when index is Building/Degraded
889    ///
890    /// # Returns
891    ///
892    /// The value returned by the selected query function.
893    ///
894    /// # Examples
895    ///
896    /// ```rust,ignore
897    /// use perl_parser::workspace_index::IndexCoordinator;
898    ///
899    /// let coordinator = IndexCoordinator::new();
900    /// let locations = coordinator.query(
901    ///     |index| index.find_references("my_function"),  // Full workspace search
902    ///     |index| vec![]                                 // Empty fallback
903    /// );
904    /// ```
905    pub fn query<T, F1, F2>(&self, full_query: F1, partial_query: F2) -> T
906    where
907        F1: FnOnce(&WorkspaceIndex) -> T,
908        F2: FnOnce(&WorkspaceIndex) -> T,
909    {
910        match self.state() {
911            IndexState::Ready { .. } => full_query(&self.index),
912            _ => partial_query(&self.index),
913        }
914    }
915}
916
917impl Default for IndexCoordinator {
918    fn default() -> Self {
919        Self::new()
920    }
921}
922
923// ============================================================================
924// Symbol Indexing Types
925// ============================================================================
926
927#[derive(Clone, Copy, Debug, Eq, PartialEq, Hash)]
928/// Symbol kinds for cross-file indexing during Index/Navigate workflows.
929pub enum SymKind {
930    /// Variable symbol ($, @, or % sigil)
931    Var,
932    /// Subroutine definition (sub foo)
933    Sub,
934    /// Package declaration (package Foo)
935    Pack,
936}
937
938#[derive(Clone, Debug, Eq, PartialEq, Hash)]
939/// A normalized symbol key for cross-file lookups in Index/Navigate workflows.
940pub struct SymbolKey {
941    /// Package name containing this symbol
942    pub pkg: Arc<str>,
943    /// Bare name without sigil prefix
944    pub name: Arc<str>,
945    /// Variable sigil ($, @, or %) if applicable
946    pub sigil: Option<char>,
947    /// Kind of symbol (variable, subroutine, package)
948    pub kind: SymKind,
949}
950
951/// Normalize a Perl variable name for Index/Analyze workflows.
952///
953/// Extracts an optional sigil and bare name for consistent symbol indexing.
954///
955/// # Arguments
956///
957/// * `name` - Variable name from Perl source, with or without sigil.
958///
959/// # Returns
960///
961/// `(sigil, name)` tuple with the optional sigil and normalized identifier.
962///
963/// # Examples
964///
965/// ```rust,ignore
966/// use perl_parser::workspace_index::normalize_var;
967///
968/// assert_eq!(normalize_var("$count"), (Some('$'), "count"));
969/// assert_eq!(normalize_var("process_emails"), (None, "process_emails"));
970/// ```
971pub fn normalize_var(name: &str) -> (Option<char>, &str) {
972    if name.is_empty() {
973        return (None, "");
974    }
975
976    // Safe: we've checked that name is not empty
977    let Some(first_char) = name.chars().next() else {
978        return (None, name); // Should never happen but handle gracefully
979    };
980    match first_char {
981        '$' | '@' | '%' => {
982            if name.len() > 1 {
983                (Some(first_char), &name[1..])
984            } else {
985                (Some(first_char), "")
986            }
987        }
988        _ => (None, name),
989    }
990}
991
992// Using lsp_types for Position and Range
993
994#[derive(Debug, Clone, PartialEq, Eq)]
995/// Internal location type used during Navigate/Analyze workflows.
996pub struct Location {
997    /// File URI where the symbol is located
998    pub uri: String,
999    /// Line and character range within the file
1000    pub range: Range,
1001}
1002
1003#[derive(Debug, Clone, PartialEq, Eq)]
1004/// Stable symbol identity returned by cross-file reference queries.
1005pub struct SymbolIdentity {
1006    /// Canonical stable key for the symbol (qualified when available).
1007    pub stable_key: String,
1008    /// Bare symbol name.
1009    pub name: String,
1010    /// Fully qualified symbol name when available.
1011    pub qualified_name: Option<String>,
1012    /// Symbol kind (subroutine, package, variable, ...).
1013    pub kind: SymbolKind,
1014}
1015
1016#[derive(Debug, Clone, PartialEq, Eq)]
1017/// Read-only cross-file query result used by rename/safe-delete planners.
1018pub struct CrossFileReferenceQueryResult {
1019    /// Identity for the resolved symbol.
1020    pub symbol: SymbolIdentity,
1021    /// Definition site for the resolved symbol.
1022    pub definition: Location,
1023    /// All reference locations (including definition) in deterministic order.
1024    pub references: Vec<Location>,
1025}
1026
1027#[derive(Debug, Clone, Serialize, Deserialize)]
1028/// A symbol in the workspace for Index/Navigate workflows.
1029pub struct WorkspaceSymbol {
1030    /// Symbol name without package qualification
1031    pub name: String,
1032    /// Type of symbol (subroutine, variable, package, etc.)
1033    pub kind: SymbolKind,
1034    /// File URI where the symbol is defined
1035    pub uri: String,
1036    /// Line and character range of the symbol definition
1037    pub range: Range,
1038    /// Fully qualified name including package (e.g., "Package::function")
1039    pub qualified_name: Option<String>,
1040    /// POD documentation associated with the symbol
1041    pub documentation: Option<String>,
1042    /// Name of the containing package or class
1043    pub container_name: Option<String>,
1044    /// Whether this symbol has a body (false for forward declarations)
1045    #[serde(default = "default_has_body")]
1046    pub has_body: bool,
1047    /// Workspace folder URI this symbol belongs to (for multi-root workspace support)
1048    pub workspace_folder_uri: Option<String>,
1049}
1050
1051fn default_has_body() -> bool {
1052    true
1053}
1054
1055// Re-export the unified symbol types from perl-symbol
1056/// Symbol kind enums used during Index/Analyze workflows.
1057pub use perl_symbol::{SymbolKind, VarKind};
1058
1059#[derive(Debug, Clone)]
1060/// Reference to a symbol for Navigate/Analyze workflows.
1061pub struct SymbolReference {
1062    /// File URI where the reference occurs
1063    pub uri: String,
1064    /// Line and character range of the reference
1065    pub range: Range,
1066    /// How the symbol is being referenced (definition, usage, etc.)
1067    pub kind: ReferenceKind,
1068}
1069
1070#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1071/// Classification of how a symbol is referenced in Navigate/Analyze workflows.
1072pub enum ReferenceKind {
1073    /// Symbol definition site (sub declaration, variable declaration)
1074    Definition,
1075    /// General usage of the symbol (function call, method call)
1076    Usage,
1077    /// Import via use statement
1078    Import,
1079    /// Variable read access
1080    Read,
1081    /// Variable write access (assignment target)
1082    Write,
1083}
1084
1085#[derive(Debug, Serialize)]
1086#[serde(rename_all = "camelCase")]
1087/// LSP-compliant workspace symbol for wire format in Navigate/Analyze workflows.
1088pub struct LspWorkspaceSymbol {
1089    /// Symbol name as displayed to the user
1090    pub name: String,
1091    /// LSP symbol kind number (see lsp_types::SymbolKind)
1092    pub kind: u32,
1093    /// Location of the symbol definition
1094    pub location: WireLocation,
1095    /// Name of the containing symbol (package, class)
1096    #[serde(skip_serializing_if = "Option::is_none")]
1097    pub container_name: Option<String>,
1098    /// Workspace folder URI this symbol belongs to (for multi-root workspace disambiguation)
1099    #[serde(skip_serializing_if = "Option::is_none")]
1100    pub workspace_folder_uri: Option<String>,
1101}
1102
1103impl From<&WorkspaceSymbol> for LspWorkspaceSymbol {
1104    fn from(sym: &WorkspaceSymbol) -> Self {
1105        let range = WireRange {
1106            start: WirePosition { line: sym.range.start.line, character: sym.range.start.column },
1107            end: WirePosition { line: sym.range.end.line, character: sym.range.end.column },
1108        };
1109
1110        Self {
1111            name: sym.name.clone(),
1112            kind: sym.kind.to_lsp_kind(),
1113            location: WireLocation { uri: sym.uri.clone(), range },
1114            container_name: sym.container_name.clone(),
1115            workspace_folder_uri: sym.workspace_folder_uri.clone(),
1116        }
1117    }
1118}
1119
1120/// File-level index data
1121#[derive(Default, Clone)]
1122pub struct FileIndex {
1123    /// Canonical file URI for this index entry.
1124    source_uri: String,
1125    /// Symbols defined in this file
1126    symbols: Vec<WorkspaceSymbol>,
1127    /// References in this file (symbol name -> references)
1128    references: HashMap<String, Vec<SymbolReference>>,
1129    /// Dependencies (modules this file imports)
1130    dependencies: HashSet<String>,
1131    /// Content hash for early-exit optimization
1132    content_hash: u64,
1133    /// Workspace folder URI this file belongs to (for multi-root workspace support)
1134    folder_uri: Option<String>,
1135}
1136
1137/// Write-through semantic fact storage for one indexed file.
1138#[derive(Clone, Debug)]
1139pub struct FileFactShard {
1140    /// Canonical file URI for this shard.
1141    pub source_uri: String,
1142    /// Stable file identifier derived from normalized URI.
1143    pub file_id: FileId,
1144    /// Whole-file content hash used for stale-shard replacement.
1145    pub content_hash: u64,
1146    /// Optional per-category hashes for change diagnostics.
1147    pub anchors_hash: Option<u64>,
1148    /// Optional per-category hashes for change diagnostics.
1149    pub entities_hash: Option<u64>,
1150    /// Optional per-category hashes for change diagnostics.
1151    pub occurrences_hash: Option<u64>,
1152    /// Optional per-category hashes for change diagnostics.
1153    pub edges_hash: Option<u64>,
1154    /// Anchor facts for this file.
1155    pub anchors: Vec<AnchorFact>,
1156    /// Entity facts for this file.
1157    pub entities: Vec<EntityFact>,
1158    /// Occurrence facts for this file.
1159    pub occurrences: Vec<perl_semantic_facts::OccurrenceFact>,
1160    /// Edge facts for this file.
1161    pub edges: Vec<EdgeFact>,
1162}
1163
1164/// Thread-safe workspace index
1165pub struct WorkspaceIndex {
1166    /// Index data per file URI (normalized key -> data)
1167    files: Arc<RwLock<HashMap<String, FileIndex>>>,
1168    /// Global symbol multimap (qualified/bare name -> ordered definition candidates)
1169    symbols: Arc<RwLock<HashMap<String, Vec<DefinitionCandidate>>>>,
1170    /// Global reference index (symbol name -> locations across all files)
1171    ///
1172    /// Aggregated from per-file `FileIndex::references` during `index_file()`.
1173    /// Provides O(1) lookup for `find_references()` instead of iterating all files.
1174    global_references: Arc<RwLock<HashMap<String, Vec<Location>>>>,
1175    /// Write-through semantic fact shards keyed by normalized URI.
1176    fact_shards: Arc<RwLock<HashMap<String, FileFactShard>>>,
1177    /// Semantic cross-file reference index (typed occurrences by name and entity).
1178    semantic_reference_index: Arc<RwLock<ReferenceIndex>>,
1179    /// Semantic cross-file import/export index.
1180    semantic_import_export_index: Arc<RwLock<ImportExportIndex>>,
1181    /// Document store for in-memory text
1182    document_store: DocumentStore,
1183    /// Workspace folder URIs for multi-root workspace support
1184    ///
1185    /// Used to determine which workspace folder a file belongs to for
1186    /// proper folder attribution in multi-root workspaces.
1187    workspace_folders: Arc<RwLock<Vec<String>>>,
1188}
1189
1190#[derive(Debug, Clone, Eq, PartialEq)]
1191struct DefinitionCandidate {
1192    location: Location,
1193    kind: SymbolKind,
1194}
1195
1196impl WorkspaceIndex {
1197    fn location_sort_key(location: &Location) -> (&str, u32, u32, u32, u32) {
1198        (
1199            location.uri.as_str(),
1200            location.range.start.line,
1201            location.range.start.column,
1202            location.range.end.line,
1203            location.range.end.column,
1204        )
1205    }
1206
1207    fn sort_locations_deterministically(locations: &mut [Location]) {
1208        locations.sort_by(|left, right| {
1209            Self::location_sort_key(left).cmp(&Self::location_sort_key(right))
1210        });
1211    }
1212
1213    fn definition_candidate_sort_key(
1214        candidate: &DefinitionCandidate,
1215    ) -> (u8, &str, u32, u32, u32, u32) {
1216        let rank = match candidate.kind {
1217            SymbolKind::Subroutine | SymbolKind::Method => 0,
1218            SymbolKind::Constant => 1,
1219            _ => 2,
1220        };
1221        (
1222            rank,
1223            candidate.location.uri.as_str(),
1224            candidate.location.range.start.line,
1225            candidate.location.range.start.column,
1226            candidate.location.range.end.line,
1227            candidate.location.range.end.column,
1228        )
1229    }
1230
1231    fn rebuild_symbol_cache(
1232        files: &HashMap<String, FileIndex>,
1233        symbols: &mut HashMap<String, Vec<DefinitionCandidate>>,
1234    ) {
1235        symbols.clear();
1236
1237        for file_index in files.values() {
1238            for symbol in &file_index.symbols {
1239                if let Some(ref qname) = symbol.qualified_name {
1240                    symbols.entry(qname.clone()).or_default().push(DefinitionCandidate {
1241                        location: Location { uri: symbol.uri.clone(), range: symbol.range },
1242                        kind: symbol.kind,
1243                    });
1244                }
1245                symbols.entry(symbol.name.clone()).or_default().push(DefinitionCandidate {
1246                    location: Location { uri: symbol.uri.clone(), range: symbol.range },
1247                    kind: symbol.kind,
1248                });
1249            }
1250        }
1251        for entries in symbols.values_mut() {
1252            entries.sort_by(|left, right| {
1253                Self::definition_candidate_sort_key(left)
1254                    .cmp(&Self::definition_candidate_sort_key(right))
1255            });
1256            entries.dedup();
1257        }
1258    }
1259
1260    /// Incrementally remove one file's symbols from the global cache,
1261    /// re-inserting shadowed symbols from remaining files.
1262    fn incremental_remove_symbols(
1263        files: &HashMap<String, FileIndex>,
1264        symbols: &mut HashMap<String, Vec<DefinitionCandidate>>,
1265        old_file_index: &FileIndex,
1266    ) {
1267        let mut affected_names: Vec<String> = Vec::new();
1268        for sym in &old_file_index.symbols {
1269            if let Some(ref qname) = sym.qualified_name {
1270                let mut remove_key = false;
1271                if let Some(entries) = symbols.get_mut(qname) {
1272                    entries.retain(|candidate| candidate.location.uri != sym.uri);
1273                    remove_key = entries.is_empty();
1274                }
1275                if remove_key {
1276                    symbols.remove(qname);
1277                    affected_names.push(qname.clone());
1278                }
1279            }
1280            let mut remove_key = false;
1281            if let Some(entries) = symbols.get_mut(&sym.name) {
1282                entries.retain(|candidate| candidate.location.uri != sym.uri);
1283                remove_key = entries.is_empty();
1284            }
1285            if remove_key {
1286                symbols.remove(&sym.name);
1287                affected_names.push(sym.name.clone());
1288            }
1289        }
1290        if !affected_names.is_empty() {
1291            symbols.clear();
1292            for file_index in files
1293                .values()
1294                .filter(|file_index| file_index.source_uri != old_file_index.source_uri)
1295            {
1296                for symbol in &file_index.symbols {
1297                    if let Some(ref qname) = symbol.qualified_name {
1298                        symbols.entry(qname.clone()).or_default().push(DefinitionCandidate {
1299                            location: Location { uri: symbol.uri.clone(), range: symbol.range },
1300                            kind: symbol.kind,
1301                        });
1302                    }
1303                    symbols.entry(symbol.name.clone()).or_default().push(DefinitionCandidate {
1304                        location: Location { uri: symbol.uri.clone(), range: symbol.range },
1305                        kind: symbol.kind,
1306                    });
1307                }
1308            }
1309            for entries in symbols.values_mut() {
1310                entries.sort_by(|left, right| {
1311                    Self::definition_candidate_sort_key(left)
1312                        .cmp(&Self::definition_candidate_sort_key(right))
1313                });
1314                entries.dedup();
1315            }
1316        }
1317    }
1318
1319    /// Incrementally add one file's symbols to the global cache.
1320    fn incremental_add_symbols(
1321        symbols: &mut HashMap<String, Vec<DefinitionCandidate>>,
1322        file_index: &FileIndex,
1323    ) {
1324        for sym in &file_index.symbols {
1325            if let Some(ref qname) = sym.qualified_name {
1326                symbols.entry(qname.clone()).or_default().push(DefinitionCandidate {
1327                    location: Location { uri: sym.uri.clone(), range: sym.range },
1328                    kind: sym.kind,
1329                });
1330            }
1331            symbols.entry(sym.name.clone()).or_default().push(DefinitionCandidate {
1332                location: Location { uri: sym.uri.clone(), range: sym.range },
1333                kind: sym.kind,
1334            });
1335        }
1336        for entries in symbols.values_mut() {
1337            entries.sort_by(|left, right| {
1338                Self::definition_candidate_sort_key(left)
1339                    .cmp(&Self::definition_candidate_sort_key(right))
1340            });
1341            entries.dedup();
1342        }
1343    }
1344
1345    /// Determine the workspace folder URI for a given file URI.
1346    ///
1347    /// Returns the workspace folder URI that contains the given file URI.
1348    /// This is used for multi-root workspace support to properly attribute
1349    /// files and symbols to their originating workspace folder.
1350    ///
1351    /// # Arguments
1352    ///
1353    /// * `file_uri` - The file URI to find the containing workspace folder for
1354    ///
1355    /// # Returns
1356    ///
1357    /// `Some(folder_uri)` if the file is within a workspace folder, `None` otherwise.
1358    ///
1359    /// # Examples
1360    ///
1361    /// ```rust,ignore
1362    /// use perl_workspace::workspace::workspace_index::WorkspaceIndex;
1363    ///
1364    /// let index = WorkspaceIndex::new();
1365    /// index.set_workspace_folders(vec![
1366    ///     "file:///project1".to_string(),
1367    ///     "file:///project2".to_string(),
1368    /// ]);
1369    ///
1370    /// let folder = index.determine_folder_uri("file:///project1/src/main.pl");
1371    /// assert_eq!(folder, Some("file:///project1".to_string()));
1372    /// ```
1373    fn determine_folder_uri(&self, file_uri: &str) -> Option<String> {
1374        let folders = self.workspace_folders.read();
1375        let mut best_match: Option<&String> = None;
1376        for folder_uri in folders.iter() {
1377            // Check if the file URI starts with the folder URI
1378            // We need to ensure proper URI matching (with or without trailing slash)
1379            let folder_with_slash = if folder_uri.ends_with('/') {
1380                folder_uri.clone()
1381            } else {
1382                format!("{}/", folder_uri)
1383            };
1384            if file_uri.starts_with(&folder_with_slash) || file_uri == folder_uri {
1385                match best_match {
1386                    Some(existing) if existing.len() >= folder_uri.len() => {}
1387                    _ => best_match = Some(folder_uri),
1388                }
1389            }
1390        }
1391        best_match.cloned()
1392    }
1393
1394    fn find_definition_in_files(
1395        files: &HashMap<String, FileIndex>,
1396        symbol_name: &str,
1397        uri_filter: Option<&str>,
1398    ) -> Option<(Location, String)> {
1399        let mut candidates: Vec<(Location, String)> = Vec::new();
1400        for file_index in files.values() {
1401            if let Some(filter) = uri_filter
1402                && file_index.symbols.first().is_some_and(|symbol| symbol.uri != filter)
1403            {
1404                continue;
1405            }
1406
1407            for symbol in &file_index.symbols {
1408                if symbol.name == symbol_name
1409                    || symbol.qualified_name.as_deref() == Some(symbol_name)
1410                {
1411                    candidates.push((
1412                        Location { uri: symbol.uri.clone(), range: symbol.range },
1413                        symbol.uri.clone(),
1414                    ));
1415                }
1416            }
1417        }
1418
1419        candidates.sort_by(|left, right| {
1420            Self::location_sort_key(&left.0).cmp(&Self::location_sort_key(&right.0))
1421        });
1422        candidates.into_iter().next()
1423    }
1424
1425    fn find_symbol_by_definition(
1426        &self,
1427        definition: &Location,
1428        symbol_name: &str,
1429    ) -> Option<WorkspaceSymbol> {
1430        let files = self.files.read();
1431        files
1432            .values()
1433            .flat_map(|file_index| file_index.symbols.iter())
1434            .filter(|symbol| {
1435                symbol.uri == definition.uri
1436                    && symbol.range == definition.range
1437                    && (symbol.name == symbol_name
1438                        || symbol.qualified_name.as_deref() == Some(symbol_name))
1439            })
1440            .min_by(|left, right| {
1441                (
1442                    left.qualified_name.as_deref().unwrap_or_default(),
1443                    left.name.as_str(),
1444                    left.kind.to_lsp_kind(),
1445                )
1446                    .cmp(&(
1447                        right.qualified_name.as_deref().unwrap_or_default(),
1448                        right.name.as_str(),
1449                        right.kind.to_lsp_kind(),
1450                    ))
1451            })
1452            .cloned()
1453    }
1454
1455    fn has_unique_symbol_name_and_kind(&self, target: &WorkspaceSymbol) -> bool {
1456        let files = self.files.read();
1457        files
1458            .values()
1459            .flat_map(|file_index| file_index.symbols.iter())
1460            .filter(|symbol| symbol.name == target.name && symbol.kind == target.kind)
1461            .take(2)
1462            .count()
1463            == 1
1464    }
1465
1466    fn collect_symbol_references(&self, symbol: &WorkspaceSymbol) -> Vec<Location> {
1467        let mut names_to_query: Vec<&str> = Vec::new();
1468        if let Some(qualified_name) = symbol.qualified_name.as_deref() {
1469            names_to_query.push(qualified_name);
1470            if self.has_unique_symbol_name_and_kind(symbol) {
1471                names_to_query.push(symbol.name.as_str());
1472            }
1473        } else {
1474            names_to_query.push(symbol.name.as_str());
1475        }
1476
1477        let global_refs = self.global_references.read();
1478        let mut seen: HashSet<(String, u32, u32, u32, u32)> = HashSet::new();
1479        let mut locations = Vec::new();
1480
1481        for symbol_name in names_to_query {
1482            if let Some(refs) = global_refs.get(symbol_name) {
1483                for location in refs {
1484                    let key = (
1485                        location.uri.clone(),
1486                        location.range.start.line,
1487                        location.range.start.column,
1488                        location.range.end.line,
1489                        location.range.end.column,
1490                    );
1491                    if seen.insert(key) {
1492                        locations.push(location.clone());
1493                    }
1494                }
1495            }
1496        }
1497        drop(global_refs);
1498
1499        Self::sort_locations_deterministically(&mut locations);
1500        locations
1501    }
1502
1503    /// Create a new empty index
1504    ///
1505    /// # Returns
1506    ///
1507    /// A workspace index with empty file and symbol tables.
1508    ///
1509    /// # Examples
1510    ///
1511    /// ```rust,ignore
1512    /// use perl_parser::workspace_index::WorkspaceIndex;
1513    ///
1514    /// let index = WorkspaceIndex::new();
1515    /// assert!(!index.has_symbols());
1516    /// ```
1517    pub fn new() -> Self {
1518        Self {
1519            files: Arc::new(RwLock::new(HashMap::new())),
1520            symbols: Arc::new(RwLock::new(HashMap::new())),
1521            global_references: Arc::new(RwLock::new(HashMap::new())),
1522            fact_shards: Arc::new(RwLock::new(HashMap::new())),
1523            semantic_reference_index: Arc::new(RwLock::new(ReferenceIndex::new())),
1524            semantic_import_export_index: Arc::new(RwLock::new(ImportExportIndex::new())),
1525            document_store: DocumentStore::new(),
1526            workspace_folders: Arc::new(RwLock::new(Vec::new())),
1527        }
1528    }
1529
1530    /// Create a workspace index with pre-allocated capacity.
1531    ///
1532    /// Pre-allocating reduces the number of rehash operations during large-workspace
1533    /// startup. Use this instead of `new()` when the approximate workspace size is
1534    /// known in advance (e.g. from a file discovery scan).
1535    ///
1536    /// # Arguments
1537    ///
1538    /// * `estimated_files` - Expected number of source files in the workspace.
1539    /// * `avg_symbols_per_file` - Expected average number of symbols per file.
1540    ///
1541    /// # Panics
1542    ///
1543    /// Does not panic. Overflow is prevented via `saturating_mul` and an upper cap
1544    /// on the symbol/reference map capacity.
1545    ///
1546    /// # Examples
1547    ///
1548    /// ```rust,ignore
1549    /// use perl_workspace::workspace::workspace_index::WorkspaceIndex;
1550    ///
1551    /// let index = WorkspaceIndex::with_capacity(1000, 20);
1552    /// assert!(!index.has_symbols());
1553    /// ```
1554    pub fn with_capacity(estimated_files: usize, avg_symbols_per_file: usize) -> Self {
1555        // Each symbol is stored twice (qualified + bare name) due to dual indexing.
1556        let sym_cap =
1557            estimated_files.saturating_mul(avg_symbols_per_file).saturating_mul(2).min(1_000_000);
1558        let ref_cap = (sym_cap / 4).min(1_000_000);
1559        Self {
1560            files: Arc::new(RwLock::new(HashMap::with_capacity(estimated_files))),
1561            symbols: Arc::new(RwLock::new(HashMap::with_capacity(sym_cap))),
1562            global_references: Arc::new(RwLock::new(HashMap::with_capacity(ref_cap))),
1563            fact_shards: Arc::new(RwLock::new(HashMap::with_capacity(estimated_files))),
1564            semantic_reference_index: Arc::new(RwLock::new(ReferenceIndex::new())),
1565            semantic_import_export_index: Arc::new(RwLock::new(ImportExportIndex::new())),
1566            document_store: DocumentStore::new(),
1567            workspace_folders: Arc::new(RwLock::new(Vec::new())),
1568        }
1569    }
1570
1571    /// Set the workspace folder URIs for multi-root workspace support.
1572    ///
1573    /// This method updates the list of workspace folders that the index
1574    /// uses to determine folder attribution for files and symbols.
1575    ///
1576    /// # Arguments
1577    ///
1578    /// * `folders` - A vector of workspace folder URIs
1579    ///
1580    /// # Examples
1581    ///
1582    /// ```rust,ignore
1583    /// use perl_workspace::workspace::workspace_index::WorkspaceIndex;
1584    ///
1585    /// let index = WorkspaceIndex::new();
1586    /// index.set_workspace_folders(vec![
1587    ///     "file:///project1".to_string(),
1588    ///     "file:///project2".to_string(),
1589    /// ]);
1590    /// ```
1591    pub fn set_workspace_folders(&self, folders: Vec<String>) {
1592        let mut workspace_folders = self.workspace_folders.write();
1593        *workspace_folders = folders;
1594    }
1595
1596    /// Get the current workspace folder URIs.
1597    ///
1598    /// # Returns
1599    ///
1600    /// A vector of workspace folder URIs.
1601    #[must_use]
1602    pub fn workspace_folders(&self) -> Vec<String> {
1603        self.workspace_folders.read().clone()
1604    }
1605
1606    /// Normalize a URI to a consistent form using proper URI handling
1607    fn normalize_uri(uri: &str) -> String {
1608        perl_uri::normalize_uri(uri)
1609    }
1610
1611    /// Remove a file's contributions from the global reference index.
1612    ///
1613    /// Retains only entries whose URI does not match `file_uri`.
1614    /// Empty keys are removed to avoid unbounded map growth.
1615    fn remove_file_global_refs(
1616        global_refs: &mut HashMap<String, Vec<Location>>,
1617        file_index: &FileIndex,
1618        file_uri: &str,
1619    ) {
1620        for name in file_index.references.keys() {
1621            if let Some(locs) = global_refs.get_mut(name) {
1622                locs.retain(|loc| loc.uri != file_uri);
1623                if locs.is_empty() {
1624                    global_refs.remove(name);
1625                }
1626            }
1627        }
1628    }
1629
1630    /// Index a file from its URI and text content
1631    ///
1632    /// # Arguments
1633    ///
1634    /// * `uri` - File URI identifying the document
1635    /// * `text` - Full Perl source text for indexing
1636    ///
1637    /// # Returns
1638    ///
1639    /// `Ok(())` when indexing succeeds, or an error message otherwise.
1640    ///
1641    /// # Errors
1642    ///
1643    /// Returns an error if parsing fails or the document store cannot be updated.
1644    ///
1645    /// # Examples
1646    ///
1647    /// ```rust,ignore
1648    /// use perl_parser::workspace_index::WorkspaceIndex;
1649    /// use url::Url;
1650    ///
1651    /// # fn main() -> Result<(), Box<dyn std::error::Error>> {
1652    /// let index = WorkspaceIndex::new();
1653    /// let uri = Url::parse("file:///example.pl")?;
1654    /// index.index_file(uri, "sub hello { return 1; }".to_string())?;
1655    /// # Ok(())
1656    /// # }
1657    /// ```
1658    ///
1659    /// Returns: `Ok(())` when indexing succeeds, otherwise an error string.
1660    pub fn index_file(&self, uri: Url, text: String) -> Result<(), String> {
1661        let uri_str = uri.to_string();
1662
1663        // Compute content hash for early-exit optimization
1664        let mut hasher = DefaultHasher::new();
1665        text.hash(&mut hasher);
1666        let content_hash = hasher.finish();
1667
1668        // Check if content is unchanged (early-exit optimization)
1669        let key = DocumentStore::uri_key(&uri_str);
1670        {
1671            let files = self.files.read();
1672            if let Some(existing_index) = files.get(&key) {
1673                if existing_index.content_hash == content_hash {
1674                    // Content unchanged, skip re-indexing
1675                    return Ok(());
1676                }
1677            }
1678        }
1679
1680        // Update document store
1681        if self.document_store.is_open(&uri_str) {
1682            self.document_store.update(&uri_str, 1, text.clone());
1683        } else {
1684            self.document_store.open(uri_str.clone(), 1, text.clone());
1685        }
1686
1687        // Parse the file
1688        let mut parser = Parser::new(&text);
1689        let ast = match parser.parse() {
1690            Ok(ast) => ast,
1691            Err(e) => return Err(format!("Parse error: {}", e)),
1692        };
1693
1694        // Get the document for line index
1695        let mut doc = self.document_store.get(&uri_str).ok_or("Document not found")?;
1696
1697        // Determine workspace folder URI from the file URI
1698        let folder_uri = self.determine_folder_uri(&uri_str);
1699
1700        // Extract symbols and references
1701        let mut file_index = FileIndex {
1702            source_uri: uri_str.clone(),
1703            content_hash,
1704            folder_uri: folder_uri.clone(),
1705            ..Default::default()
1706        };
1707        let mut visitor = IndexVisitor::new(&mut doc, uri_str.clone(), folder_uri);
1708        visitor.visit(&ast, &mut file_index);
1709
1710        let canonical_shard =
1711            Self::build_canonical_fact_shard_for_ast(&uri_str, content_hash, &ast);
1712        let fact_shard = if canonical_shard.anchors.is_empty()
1713            && canonical_shard.entities.is_empty()
1714            && canonical_shard.occurrences.is_empty()
1715            && canonical_shard.edges.is_empty()
1716        {
1717            Self::build_fact_shard(&uri_str, content_hash, &file_index)
1718        } else {
1719            canonical_shard
1720        };
1721
1722        // Extract import specs from the AST — populates ImportExportIndex so
1723        // that `Foo->import(@names)` dynamic-import suppression is live in
1724        // production.  This runs outside the write lock to avoid holding it
1725        // longer than necessary.
1726        //
1727        // Lock ordering note: `semantic_import_export_index` is acquired write
1728        // separately from (and after) `files`/`symbols`/`global_references` to
1729        // match the consistent lock-order used throughout this file.
1730        let file_id = Self::hash_uri_to_file_id(&uri_str);
1731        let import_specs =
1732            crate::semantic::workspace_import_extractor::extract_import_specs(&ast, file_id);
1733        let use_lib_facts =
1734            crate::semantic::workspace_import_extractor::extract_use_lib_facts(&ast, file_id);
1735
1736        // Update the index, refresh the global symbol cache, and replace this file's
1737        // contribution in the global reference index.
1738        {
1739            let mut files = self.files.write();
1740
1741            // Remove stale global references from previous version of this file
1742            if let Some(old_index) = files.get(&key) {
1743                let mut global_refs = self.global_references.write();
1744                Self::remove_file_global_refs(&mut global_refs, old_index, &uri_str);
1745            }
1746
1747            // Incrementally remove old symbols before inserting new file
1748            if let Some(old_index) = files.get(&key) {
1749                let mut symbols = self.symbols.write();
1750                Self::incremental_remove_symbols(&files, &mut symbols, old_index);
1751                drop(symbols);
1752            }
1753            files.insert(key.clone(), file_index);
1754            let mut symbols = self.symbols.write();
1755            if let Some(new_index) = files.get(&key) {
1756                Self::incremental_add_symbols(&mut symbols, new_index);
1757            }
1758
1759            if let Some(file_index) = files.get(&key) {
1760                let mut global_refs = self.global_references.write();
1761                for (name, refs) in &file_index.references {
1762                    let entry = global_refs.entry(name.clone()).or_default();
1763                    for reference in refs {
1764                        entry.push(Location { uri: reference.uri.clone(), range: reference.range });
1765                    }
1766                }
1767            }
1768            self.replace_fact_shard_incremental(&key, fact_shard);
1769        }
1770
1771        // Update the import/export index with the freshly extracted import specs
1772        // and use-lib facts.  Stale entries for this URI are removed first
1773        // (incremental re-indexing).  This is done after the main write lock
1774        // block to follow the established lock ordering
1775        // (shards → reference_index → import_export_index).
1776        {
1777            let mut ie_idx = self.semantic_import_export_index.write();
1778            ie_idx.remove_file_imports(&uri_str);
1779            ie_idx.add_file_imports(&uri_str, file_id, import_specs);
1780            ie_idx.remove_file_use_lib(&uri_str);
1781            ie_idx.add_file_use_lib(&uri_str, file_id, use_lib_facts);
1782        }
1783
1784        Ok(())
1785    }
1786
1787    /// Remove a file from the index
1788    ///
1789    /// # Arguments
1790    ///
1791    /// * `uri` - File URI (string form) to remove
1792    ///
1793    /// # Returns
1794    ///
1795    /// Nothing. The index is updated in-place.
1796    ///
1797    /// # Examples
1798    ///
1799    /// ```rust,ignore
1800    /// use perl_parser::workspace_index::WorkspaceIndex;
1801    ///
1802    /// let index = WorkspaceIndex::new();
1803    /// index.remove_file("file:///example.pl");
1804    /// ```
1805    pub fn remove_file(&self, uri: &str) {
1806        let uri_str = Self::normalize_uri(uri);
1807        let key = DocumentStore::uri_key(&uri_str);
1808
1809        // Remove from document store
1810        self.document_store.close(&uri_str);
1811
1812        // Remove file index
1813        let mut files = self.files.write();
1814        if let Some(file_index) = files.remove(&key) {
1815            self.fact_shards.write().remove(&key);
1816
1817            // Clean up semantic cross-file indexes for this file.
1818            self.semantic_reference_index.write().remove_file(&uri_str);
1819            {
1820                let mut ie_idx = self.semantic_import_export_index.write();
1821                ie_idx.remove_file_imports(&uri_str);
1822                ie_idx.remove_module_exports(&uri_str);
1823                ie_idx.remove_file_use_lib(&uri_str);
1824            }
1825
1826            // Incrementally remove symbols and re-insert any shadowed names.
1827            let mut symbols = self.symbols.write();
1828            Self::incremental_remove_symbols(&files, &mut symbols, &file_index);
1829
1830            // Defensive sweep: purge any remaining cache entries whose value
1831            // points to this file's URI.  incremental_remove_symbols already
1832            // handles known symbol names; this sweep guarantees no stale
1833            // candidates survive even when:
1834            //   * the file had zero symbols (nothing for incremental_remove
1835            //     to walk), or
1836            //   * a symbol's stored uri differs from the canonical normalize_uri
1837            //     output (URI normalization edge cases).
1838            // Match against every URI spelling observed in this file index plus
1839            // the canonical uri_str so raw/normalized variants are all caught.
1840            let mut removed_uris = vec![uri_str.as_str()];
1841            for observed_uri in file_index.symbols.iter().map(|s| s.uri.as_str()).chain(
1842                file_index.references.values().flat_map(|refs| refs.iter().map(|r| r.uri.as_str())),
1843            ) {
1844                if !removed_uris.contains(&observed_uri) {
1845                    removed_uris.push(observed_uri);
1846                }
1847            }
1848            symbols.retain(|_, candidates| {
1849                candidates.retain(|candidate| {
1850                    let cand_uri = candidate.location.uri.as_str();
1851                    !removed_uris.contains(&cand_uri)
1852                });
1853                !candidates.is_empty()
1854            });
1855
1856            // Remove from global reference index. Two-phase cleanup: first
1857            // remove names this file was known to reference (cheap path), then
1858            // a defensive sweep over all remaining entries to catch any that
1859            // were inserted under names not present in this file's
1860            // FileIndex::references map (e.g. via aggregated/global insertion
1861            // paths). Empty buckets are dropped.
1862            let mut global_refs = self.global_references.write();
1863            Self::remove_file_global_refs(&mut global_refs, &file_index, &uri_str);
1864            global_refs.retain(|_, locs| {
1865                locs.retain(|loc| !removed_uris.contains(&loc.uri.as_str()));
1866                !locs.is_empty()
1867            });
1868        }
1869    }
1870
1871    /// Remove a file from the index (URL variant for compatibility)
1872    ///
1873    /// # Arguments
1874    ///
1875    /// * `uri` - File URI as a parsed `Url`
1876    ///
1877    /// # Returns
1878    ///
1879    /// Nothing. The index is updated in-place.
1880    ///
1881    /// # Examples
1882    ///
1883    /// ```rust,ignore
1884    /// use perl_parser::workspace_index::WorkspaceIndex;
1885    /// use url::Url;
1886    ///
1887    /// # fn main() -> Result<(), Box<dyn std::error::Error>> {
1888    /// let index = WorkspaceIndex::new();
1889    /// let uri = Url::parse("file:///example.pl")?;
1890    /// index.remove_file_url(&uri);
1891    /// # Ok(())
1892    /// # }
1893    /// ```
1894    pub fn remove_file_url(&self, uri: &Url) {
1895        self.remove_file(uri.as_str())
1896    }
1897
1898    /// Clear a file from the index (alias for remove_file)
1899    ///
1900    /// # Arguments
1901    ///
1902    /// * `uri` - File URI (string form) to remove
1903    ///
1904    /// # Returns
1905    ///
1906    /// Nothing. The index is updated in-place.
1907    ///
1908    /// # Examples
1909    ///
1910    /// ```rust,ignore
1911    /// use perl_parser::workspace_index::WorkspaceIndex;
1912    ///
1913    /// let index = WorkspaceIndex::new();
1914    /// index.clear_file("file:///example.pl");
1915    /// ```
1916    pub fn clear_file(&self, uri: &str) {
1917        self.remove_file(uri);
1918    }
1919
1920    /// Clear a file from the index (URL variant for compatibility)
1921    ///
1922    /// # Arguments
1923    ///
1924    /// * `uri` - File URI as a parsed `Url`
1925    ///
1926    /// # Returns
1927    ///
1928    /// Nothing. The index is updated in-place.
1929    ///
1930    /// # Examples
1931    ///
1932    /// ```rust,ignore
1933    /// use perl_parser::workspace_index::WorkspaceIndex;
1934    /// use url::Url;
1935    ///
1936    /// # fn main() -> Result<(), Box<dyn std::error::Error>> {
1937    /// let index = WorkspaceIndex::new();
1938    /// let uri = Url::parse("file:///example.pl")?;
1939    /// index.clear_file_url(&uri);
1940    /// # Ok(())
1941    /// # }
1942    /// ```
1943    pub fn clear_file_url(&self, uri: &Url) {
1944        self.clear_file(uri.as_str())
1945    }
1946
1947    /// Remove all files from a specific workspace folder.
1948    ///
1949    /// This method removes all indexed files that belong to the given
1950    /// workspace folder URI. This is useful when a workspace folder is
1951    /// removed from the multi-root workspace.
1952    ///
1953    /// # Arguments
1954    ///
1955    /// * `folder_uri` - The workspace folder URI to remove files from
1956    ///
1957    /// # Examples
1958    ///
1959    /// ```rust,ignore
1960    /// use perl_workspace::workspace::workspace_index::WorkspaceIndex;
1961    ///
1962    /// let index = WorkspaceIndex::new();
1963    /// // Index files from multiple folders...
1964    /// index.remove_folder("file:///project1");
1965    /// ```
1966    pub fn remove_folder(&self, folder_uri: &str) {
1967        let mut uris_to_remove = Vec::new();
1968        let files = self.files.read();
1969
1970        // Collect all files that belong to this folder
1971        for file_index in files.values() {
1972            if file_index.folder_uri.as_deref() == Some(folder_uri) {
1973                uris_to_remove.push(file_index.source_uri.clone());
1974            }
1975        }
1976        drop(files);
1977
1978        // Remove each file through the full removal path to keep
1979        // symbol/reference caches and document store in sync.
1980        for uri in uris_to_remove {
1981            self.remove_file(&uri);
1982        }
1983    }
1984
1985    #[cfg(not(target_arch = "wasm32"))]
1986    /// Index a file from a URI string for the Index/Analyze workflow.
1987    ///
1988    /// Accepts either a `file://` URI or a filesystem path. Not available on
1989    /// wasm32 targets (requires filesystem path conversion).
1990    ///
1991    /// # Arguments
1992    ///
1993    /// * `uri` - File URI string or filesystem path.
1994    /// * `text` - Full Perl source text for indexing.
1995    ///
1996    /// # Returns
1997    ///
1998    /// `Ok(())` when indexing succeeds, or an error message otherwise.
1999    ///
2000    /// # Errors
2001    ///
2002    /// Returns an error if the URI is invalid or parsing fails.
2003    ///
2004    /// # Examples
2005    ///
2006    /// ```rust,ignore
2007    /// use perl_parser::workspace_index::WorkspaceIndex;
2008    ///
2009    /// # fn main() -> Result<(), Box<dyn std::error::Error>> {
2010    /// let index = WorkspaceIndex::new();
2011    /// index.index_file_str("file:///example.pl", "sub hello { }")?;
2012    /// # Ok(())
2013    /// # }
2014    /// ```
2015    pub fn index_file_str(&self, uri: &str, text: &str) -> Result<(), String> {
2016        let path = Path::new(uri);
2017        let url = if path.is_absolute() {
2018            url::Url::from_file_path(path)
2019                .map_err(|_| format!("Invalid URI or file path: {}", uri))?
2020        } else {
2021            // Raw absolute Windows paths like C:\foo can parse as a bogus URI
2022            // (`c:` scheme). Prefer URL parsing only for non-path inputs.
2023            url::Url::parse(uri).or_else(|_| {
2024                url::Url::from_file_path(path)
2025                    .map_err(|_| format!("Invalid URI or file path: {}", uri))
2026            })?
2027        };
2028        self.index_file(url, text.to_string())
2029    }
2030
2031    /// Index multiple files in a single batch operation.
2032    ///
2033    /// This is significantly faster than calling `index_file` in a loop for
2034    /// initial workspace scans because it defers the global symbol cache
2035    /// rebuild to a single pass at the end.
2036    ///
2037    /// Phase 1: Parse all files without holding locks.
2038    /// Phase 2: Bulk-insert file indices and rebuild the symbol cache once.
2039    pub fn index_files_batch(&self, files_to_index: Vec<(Url, String)>) -> Vec<String> {
2040        let mut errors = Vec::new();
2041
2042        // Phase 1: Parse all files without locks
2043        let mut parsed: Vec<(String, String, FileIndex)> = Vec::with_capacity(files_to_index.len());
2044        for (uri, text) in &files_to_index {
2045            let uri_str = uri.to_string();
2046
2047            // Content hash for early-exit
2048            let mut hasher = DefaultHasher::new();
2049            text.hash(&mut hasher);
2050            let content_hash = hasher.finish();
2051
2052            let key = DocumentStore::uri_key(&uri_str);
2053
2054            // Check if content unchanged
2055            {
2056                let files = self.files.read();
2057                if let Some(existing) = files.get(&key) {
2058                    if existing.content_hash == content_hash {
2059                        continue;
2060                    }
2061                }
2062            }
2063
2064            // Update document store
2065            if self.document_store.is_open(&uri_str) {
2066                self.document_store.update(&uri_str, 1, text.clone());
2067            } else {
2068                self.document_store.open(uri_str.clone(), 1, text.clone());
2069            }
2070
2071            // Parse
2072            let mut parser = Parser::new(text);
2073            let ast = match parser.parse() {
2074                Ok(ast) => ast,
2075                Err(e) => {
2076                    errors.push(format!("Parse error in {}: {}", uri_str, e));
2077                    continue;
2078                }
2079            };
2080
2081            let mut doc = match self.document_store.get(&uri_str) {
2082                Some(d) => d,
2083                None => {
2084                    errors.push(format!("Document not found: {}", uri_str));
2085                    continue;
2086                }
2087            };
2088
2089            // Determine workspace folder URI from the file URI
2090            let folder_uri = self.determine_folder_uri(&uri_str);
2091
2092            let mut file_index = FileIndex {
2093                source_uri: uri_str.clone(),
2094                content_hash,
2095                folder_uri: folder_uri.clone(),
2096                ..Default::default()
2097            };
2098            let mut visitor = IndexVisitor::new(&mut doc, uri_str.clone(), folder_uri);
2099            visitor.visit(&ast, &mut file_index);
2100
2101            parsed.push((key, uri_str, file_index));
2102        }
2103
2104        // Phase 2: Bulk insert with single cache rebuild
2105        {
2106            let mut files = self.files.write();
2107            let mut symbols = self.symbols.write();
2108            let mut global_refs = self.global_references.write();
2109
2110            // Pre-allocate capacity for the incoming batch to avoid rehashing.
2111            // Each symbol is indexed under both its qualified name and bare name.
2112            files.reserve(parsed.len());
2113            symbols.reserve(parsed.len().saturating_mul(20).saturating_mul(2));
2114
2115            for (key, uri_str, file_index) in parsed {
2116                // Remove stale global references
2117                if let Some(old_index) = files.get(&key) {
2118                    Self::remove_file_global_refs(&mut global_refs, old_index, &uri_str);
2119                }
2120
2121                files.insert(key.clone(), file_index);
2122
2123                // Add global references for this file
2124                if let Some(fi) = files.get(&key) {
2125                    for (name, refs) in &fi.references {
2126                        let entry = global_refs.entry(name.clone()).or_default();
2127                        for reference in refs {
2128                            entry.push(Location {
2129                                uri: reference.uri.clone(),
2130                                range: reference.range,
2131                            });
2132                        }
2133                    }
2134                }
2135            }
2136
2137            // Single rebuild at the end
2138            Self::rebuild_symbol_cache(&files, &mut symbols);
2139        }
2140
2141        errors
2142    }
2143
2144    /// Find all references to a symbol using dual indexing strategy
2145    ///
2146    /// This function searches for both exact matches and bare name matches when
2147    /// the symbol is qualified. For example, when searching for "Utils::process_data":
2148    /// - First searches for exact "Utils::process_data" references
2149    /// - Then searches for bare "process_data" references that might refer to the same function
2150    ///
2151    /// This dual approach handles cases where functions are called both as:
2152    /// - Qualified: `Utils::process_data()`
2153    /// - Unqualified: `process_data()` (when in the same package or imported)
2154    ///
2155    /// # Arguments
2156    ///
2157    /// * `symbol_name` - Symbol name or qualified name to search
2158    ///
2159    /// # Returns
2160    ///
2161    /// All reference locations found for the requested symbol.
2162    ///
2163    /// # Examples
2164    ///
2165    /// ```rust,ignore
2166    /// use perl_parser::workspace_index::WorkspaceIndex;
2167    ///
2168    /// let index = WorkspaceIndex::new();
2169    /// let _refs = index.find_references("Utils::process_data");
2170    /// ```
2171    pub fn find_references(&self, symbol_name: &str) -> Vec<Location> {
2172        let global_refs = self.global_references.read();
2173        let mut seen: HashSet<(String, u32, u32, u32, u32)> = HashSet::new();
2174        let mut locations = Vec::new();
2175
2176        // O(1) lookup for exact symbol name
2177        if let Some(refs) = global_refs.get(symbol_name) {
2178            for loc in refs {
2179                let key = (
2180                    loc.uri.clone(),
2181                    loc.range.start.line,
2182                    loc.range.start.column,
2183                    loc.range.end.line,
2184                    loc.range.end.column,
2185                );
2186                if seen.insert(key) {
2187                    locations.push(Location { uri: loc.uri.clone(), range: loc.range });
2188                }
2189            }
2190        }
2191
2192        // If the symbol is qualified, also collect bare name references
2193        if let Some(idx) = symbol_name.rfind("::") {
2194            let bare_name = &symbol_name[idx + 2..];
2195            if let Some(refs) = global_refs.get(bare_name) {
2196                for loc in refs {
2197                    let key = (
2198                        loc.uri.clone(),
2199                        loc.range.start.line,
2200                        loc.range.start.column,
2201                        loc.range.end.line,
2202                        loc.range.end.column,
2203                    );
2204                    if seen.insert(key) {
2205                        locations.push(Location { uri: loc.uri.clone(), range: loc.range });
2206                    }
2207                }
2208            }
2209        } else {
2210            // If the symbol is bare, also collect qualified references that end
2211            // with the same bare name, e.g. `Pkg::foo` when searching for `foo`.
2212            for (name, refs) in global_refs.iter() {
2213                if !Self::is_qualified_variant_of(name, symbol_name) {
2214                    continue;
2215                }
2216
2217                for loc in refs {
2218                    let key = (
2219                        loc.uri.clone(),
2220                        loc.range.start.line,
2221                        loc.range.start.column,
2222                        loc.range.end.line,
2223                        loc.range.end.column,
2224                    );
2225                    if seen.insert(key) {
2226                        locations.push(Location { uri: loc.uri.clone(), range: loc.range });
2227                    }
2228                }
2229            }
2230        }
2231
2232        Self::sort_locations_deterministically(&mut locations);
2233        locations
2234    }
2235
2236    /// Resolve a symbol and return its definition/reference set for cross-file planning.
2237    ///
2238    /// Returns `None` when no definition can be resolved for `symbol_name`.
2239    pub fn query_symbol_references(
2240        &self,
2241        symbol_name: &str,
2242    ) -> Option<CrossFileReferenceQueryResult> {
2243        let definition = self.find_definition(symbol_name)?;
2244        let symbol = self.find_symbol_by_definition(&definition, symbol_name)?;
2245
2246        let stable_key = symbol.qualified_name.clone().unwrap_or_else(|| {
2247            format!(
2248                "{}@{}:{}:{}",
2249                symbol.name, symbol.uri, symbol.range.start.line, symbol.range.start.column
2250            )
2251        });
2252        let mut references = self.collect_symbol_references(&symbol);
2253        if !references.iter().any(|location| location == &definition) {
2254            references.push(definition.clone());
2255            Self::sort_locations_deterministically(&mut references);
2256        }
2257
2258        Some(CrossFileReferenceQueryResult {
2259            symbol: SymbolIdentity {
2260                stable_key,
2261                name: symbol.name,
2262                qualified_name: symbol.qualified_name,
2263                kind: symbol.kind,
2264            },
2265            definition,
2266            references,
2267        })
2268    }
2269
2270    /// Count non-definition references (usages) of a symbol.
2271    ///
2272    /// Like `find_references` but excludes `ReferenceKind::Definition` entries,
2273    /// returning only actual usage sites. This is used by code lens to show
2274    /// "N references" where N means call sites, not the definition itself.
2275    pub fn count_usages(&self, symbol_name: &str) -> usize {
2276        let files = self.files.read();
2277        let mut seen: HashSet<(String, u32, u32, u32, u32)> = HashSet::new();
2278
2279        for (_uri_key, file_index) in files.iter() {
2280            if let Some(refs) = file_index.references.get(symbol_name) {
2281                for r in refs.iter().filter(|r| r.kind != ReferenceKind::Definition) {
2282                    seen.insert((
2283                        r.uri.clone(),
2284                        r.range.start.line,
2285                        r.range.start.column,
2286                        r.range.end.line,
2287                        r.range.end.column,
2288                    ));
2289                }
2290            }
2291
2292            if let Some(idx) = symbol_name.rfind("::") {
2293                let bare_name = &symbol_name[idx + 2..];
2294                if let Some(refs) = file_index.references.get(bare_name) {
2295                    for r in refs.iter().filter(|r| r.kind != ReferenceKind::Definition) {
2296                        seen.insert((
2297                            r.uri.clone(),
2298                            r.range.start.line,
2299                            r.range.start.column,
2300                            r.range.end.line,
2301                            r.range.end.column,
2302                        ));
2303                    }
2304                }
2305            } else {
2306                for (name, refs) in &file_index.references {
2307                    if !Self::is_qualified_variant_of(name, symbol_name) {
2308                        continue;
2309                    }
2310
2311                    for r in refs.iter().filter(|r| r.kind != ReferenceKind::Definition) {
2312                        seen.insert((
2313                            r.uri.clone(),
2314                            r.range.start.line,
2315                            r.range.start.column,
2316                            r.range.end.line,
2317                            r.range.end.column,
2318                        ));
2319                    }
2320                }
2321            }
2322        }
2323
2324        seen.len()
2325    }
2326
2327    fn is_qualified_variant_of(candidate: &str, bare_symbol: &str) -> bool {
2328        candidate.rsplit_once("::").is_some_and(|(_, candidate_bare)| candidate_bare == bare_symbol)
2329    }
2330
2331    /// Find the definition of a symbol
2332    ///
2333    /// # Arguments
2334    ///
2335    /// * `symbol_name` - Symbol name or qualified name to resolve
2336    ///
2337    /// # Returns
2338    ///
2339    /// The first matching definition location, if found.
2340    ///
2341    /// # Examples
2342    ///
2343    /// ```rust,ignore
2344    /// use perl_parser::workspace_index::WorkspaceIndex;
2345    ///
2346    /// let index = WorkspaceIndex::new();
2347    /// let _def = index.find_definition("MyPackage::example");
2348    /// ```
2349    pub fn find_definition(&self, symbol_name: &str) -> Option<Location> {
2350        if let Some(location) = self.definition_candidates(symbol_name).into_iter().next() {
2351            return Some(location);
2352        }
2353
2354        // Fall back to a full files scan for this query. The result is intentionally
2355        // NOT written back to `self.symbols`: every indexed symbol is already
2356        // inserted under both qualified and bare names by `incremental_add_symbols`,
2357        // so any cache miss here is for a key that does not correspond to an
2358        // indexed symbol (e.g. a typo or alias). Caching such queries is unsound
2359        // (entries become stale on file edits and were never tracked for cleanup
2360        // in `remove_file`/`incremental_remove_symbols`) and lets the cache grow
2361        // unboundedly across long sessions. Returning the resolved location
2362        // directly preserves correctness without retaining state.
2363        let files = self.files.read();
2364        Self::find_definition_in_files(&files, symbol_name, None).map(|(location, _uri)| location)
2365    }
2366
2367    pub(crate) fn definition_candidates(&self, symbol_name: &str) -> Vec<Location> {
2368        let symbols = self.symbols.read();
2369        symbols
2370            .get(symbol_name)
2371            .map(|candidates| {
2372                candidates.iter().map(|candidate| candidate.location.clone()).collect()
2373            })
2374            .unwrap_or_default()
2375    }
2376
2377    /// Get all symbols in the workspace
2378    ///
2379    /// # Returns
2380    ///
2381    /// A vector containing every symbol currently indexed.
2382    ///
2383    /// # Examples
2384    ///
2385    /// ```rust,ignore
2386    /// use perl_parser::workspace_index::WorkspaceIndex;
2387    ///
2388    /// let index = WorkspaceIndex::new();
2389    /// let _symbols = index.all_symbols();
2390    /// ```
2391    pub fn all_symbols(&self) -> Vec<WorkspaceSymbol> {
2392        let files = self.files.read();
2393        let mut symbols = Vec::new();
2394
2395        for (_uri_key, file_index) in files.iter() {
2396            symbols.extend(file_index.symbols.clone());
2397        }
2398
2399        symbols
2400    }
2401
2402    /// Clear all indexed files and symbols from the workspace.
2403    pub fn clear(&self) {
2404        self.files.write().clear();
2405        self.symbols.write().clear();
2406        self.global_references.write().clear();
2407        self.fact_shards.write().clear();
2408        *self.semantic_reference_index.write() = ReferenceIndex::new();
2409        *self.semantic_import_export_index.write() = ImportExportIndex::new();
2410    }
2411
2412    fn hash_uri_to_file_id(uri: &str) -> FileId {
2413        let mut hasher = DefaultHasher::new();
2414        uri.hash(&mut hasher);
2415        FileId(hasher.finish())
2416    }
2417
2418    fn build_fact_shard(uri: &str, content_hash: u64, file_index: &FileIndex) -> FileFactShard {
2419        let file_id = Self::hash_uri_to_file_id(uri);
2420        let mut anchors = Vec::new();
2421        let mut entities = Vec::new();
2422        for (idx, symbol) in file_index.symbols.iter().enumerate() {
2423            let anchor_id = AnchorId((idx + 1) as u64);
2424            anchors.push(AnchorFact {
2425                id: anchor_id,
2426                file_id,
2427                // WorkspaceSymbol provides line/column coordinates only, not byte
2428                // offsets.  Zero-initialize span_*_byte until a byte-offset source
2429                // is plumbed through the indexing pipeline.
2430                span_start_byte: 0,
2431                span_end_byte: 0,
2432                scope_id: None,
2433                provenance: Provenance::SearchFallback,
2434                confidence: Confidence::Low,
2435            });
2436            entities.push(EntityFact {
2437                id: EntityId((idx + 1) as u64),
2438                kind: EntityKind::Unknown,
2439                canonical_name: symbol
2440                    .qualified_name
2441                    .clone()
2442                    .unwrap_or_else(|| symbol.name.clone()),
2443                anchor_id: Some(anchor_id),
2444                scope_id: None,
2445                provenance: Provenance::SearchFallback,
2446                confidence: Confidence::Low,
2447            });
2448        }
2449        // Hash the per-category fact vectors so consumers can detect staleness
2450        // without re-reading the full shard.
2451        let anchors_hash = {
2452            let mut h = DefaultHasher::new();
2453            anchors.len().hash(&mut h);
2454            for a in &anchors {
2455                a.id.hash(&mut h);
2456                a.span_start_byte.hash(&mut h);
2457                a.span_end_byte.hash(&mut h);
2458            }
2459            h.finish()
2460        };
2461        let entities_hash = {
2462            let mut h = DefaultHasher::new();
2463            entities.len().hash(&mut h);
2464            for e in &entities {
2465                e.id.hash(&mut h);
2466                e.canonical_name.hash(&mut h);
2467            }
2468            h.finish()
2469        };
2470        FileFactShard {
2471            source_uri: uri.to_string(),
2472            file_id,
2473            content_hash,
2474            anchors_hash: Some(anchors_hash),
2475            entities_hash: Some(entities_hash),
2476            occurrences_hash: Some(0),
2477            edges_hash: Some(0),
2478            anchors,
2479            entities,
2480            occurrences: Vec::new(),
2481            edges: Vec::new(),
2482        }
2483    }
2484
2485    /// Build a canonical [`FileFactShard`] from the AST using the semantic
2486    /// fact adapters in `perl-symbol`.
2487    ///
2488    /// This is the canonical population path that produces facts with real
2489    /// byte spans, `ExactAst` provenance, and per-category hashes. It runs
2490    /// alongside the legacy `build_fact_shard` path during the migration
2491    /// period.
2492    fn build_canonical_fact_shard_for_ast(
2493        uri: &str,
2494        content_hash: u64,
2495        ast: &Node,
2496    ) -> FileFactShard {
2497        let file_id = Self::hash_uri_to_file_id(uri);
2498
2499        // Extract declarations and references from the AST.
2500        let decls = extract_symbol_decls(ast, None);
2501        let refs = extract_symbol_refs(ast);
2502
2503        // Run the canonical adapters.
2504        let decl_facts = symbol_decls_to_semantic_facts(&decls, file_id);
2505
2506        // Build an entity lookup map for reference resolution.
2507        let entity_ids_by_name: std::collections::BTreeMap<String, EntityId> =
2508            decl_facts.entities.iter().map(|e| (e.canonical_name.clone(), e.id)).collect();
2509        let ref_facts = symbol_refs_to_semantic_facts(&refs, file_id, &entity_ids_by_name);
2510
2511        // Extract dynamic boundary evidence for `eval "sub NAME { ... }"` patterns.
2512        // Non-literal evals (e.g. `eval $code`) are intentionally skipped — the
2513        // sub name is not statically known and no evidence is emitted.
2514        let eval_sub_triples =
2515            crate::semantic::eval_sub_extractor::extract_eval_sub_boundaries(ast, file_id);
2516        let dynamic_boundaries: Vec<perl_semantic_facts::OccurrenceFact> =
2517            eval_sub_triples.iter().map(|(_, _, occ)| occ.clone()).collect();
2518        let generated_member_facts =
2519            crate::semantic::generated_member_extractor::extract_generated_member_facts(
2520                ast, file_id,
2521            );
2522
2523        // Build the canonical fact shard.
2524        // Import specs (for `use`, `require`, `ClassName->import()`) and
2525        // use-lib facts are populated separately via ImportExportIndex — not passed here.
2526        let mut shard = crate::semantic::facts::build_canonical_fact_shard(
2527            uri,
2528            content_hash,
2529            &decl_facts,
2530            &ref_facts,
2531            &[],
2532            &dynamic_boundaries,
2533        );
2534
2535        // Merge entity and anchor facts from semantic producers into the shard.
2536        // The `build_canonical_fact_shard` function only accepts OccurrenceFact
2537        // slices for dynamic_boundaries; extra entities and anchors must be
2538        // merged manually so queries can resolve those semantic facts.
2539        //
2540        // NOTE: This post-build merge means `entities_hash` and `anchors_hash` do
2541        // not reflect these additions. Incremental replacement
2542        // (`replace_fact_shard_incremental`) may miss a change if only synthetic
2543        // facts change — the `content_hash` (whole-file) will still catch it.
2544        // A future refactor should extend `build_canonical_fact_shard`'s API to
2545        // accept extra entity/anchor slices alongside `dynamic_boundaries`.
2546        for (entity, anchor, _) in eval_sub_triples {
2547            shard.entities.push(entity);
2548            shard.anchors.push(anchor);
2549        }
2550        for fact in generated_member_facts {
2551            shard.entities.push(fact.entity);
2552            shard.anchors.push(fact.anchor);
2553        }
2554
2555        shard
2556    }
2557
2558    /// Replace a [`FileFactShard`] with per-category incremental invalidation.
2559    ///
2560    /// Compares the whole-file `content_hash` first; when unchanged the
2561    /// replacement is skipped entirely.  Otherwise each per-category hash
2562    /// (`anchors_hash`, `entities_hash`, `occurrences_hash`, `edges_hash`)
2563    /// is compared individually.  Only categories whose hash changed trigger
2564    /// removal of old entries and insertion of new ones in the cross-file
2565    /// semantic indexes.
2566    ///
2567    /// **Validates: Requirements 18.1, 18.2, 18.3, 18.4, 18.5**
2568    pub fn replace_fact_shard_incremental(
2569        &self,
2570        key: &str,
2571        new_shard: FileFactShard,
2572    ) -> ShardReplaceResult {
2573        let mut shards = self.fact_shards.write();
2574        let old_shard = shards.get(key);
2575
2576        let replacement = plan_shard_replacement(
2577            old_shard.map(Self::shard_category_hashes),
2578            Self::shard_category_hashes(&new_shard),
2579        );
2580
2581        if replacement.content_unchanged {
2582            return replacement;
2583        }
2584
2585        let source_uri = new_shard.source_uri.clone();
2586
2587        // ── Update cross-file semantic indexes per category ──
2588        // Occurrences and edges are both managed by the ReferenceIndex.
2589        // When either changes we must remove+re-add the file in that index.
2590        if replacement.occurrences_updated || replacement.edges_updated {
2591            let mut ref_idx = self.semantic_reference_index.write();
2592            if old_shard.is_some() {
2593                ref_idx.remove_file(&source_uri);
2594            }
2595            ref_idx.add_file(&new_shard);
2596        }
2597
2598        // Entities feed into the import/export index (export sets are keyed
2599        // by module name derived from entity canonical names).  When entities
2600        // change we refresh the import/export index for this file.
2601        if replacement.entities_updated {
2602            let mut ie_idx = self.semantic_import_export_index.write();
2603            ie_idx.remove_file_imports(&source_uri);
2604            ie_idx.remove_module_exports(&source_uri);
2605            // Re-add is handled by the caller or future wiring; for now we
2606            // ensure stale entries are purged.
2607        }
2608
2609        // Store the new shard (always, since content_hash differs).
2610        shards.insert(key.to_string(), new_shard);
2611
2612        replacement
2613    }
2614
2615    fn shard_category_hashes(shard: &FileFactShard) -> ShardCategoryHashes {
2616        ShardCategoryHashes {
2617            content_hash: shard.content_hash,
2618            anchors_hash: shard.anchors_hash,
2619            entities_hash: shard.entities_hash,
2620            occurrences_hash: shard.occurrences_hash,
2621            edges_hash: shard.edges_hash,
2622        }
2623    }
2624
2625    /// Number of stored file fact shards.
2626    pub fn fact_shard_count(&self) -> usize {
2627        self.fact_shards.read().len()
2628    }
2629
2630    /// Fetch a file fact shard for test/inspection.
2631    pub fn file_fact_shard(&self, uri: &str) -> Option<FileFactShard> {
2632        let key = DocumentStore::uri_key(&Self::normalize_uri(uri));
2633        self.fact_shards.read().get(&key).cloned()
2634    }
2635
2636    /// Resolve a semantic anchor to a source-backed LSP-wire location.
2637    ///
2638    /// Returns `None` for missing anchors, zero-width fallback anchors, or
2639    /// anchors whose source text is unavailable from the document store. If
2640    /// more than one shard contains the same anchor ID, this fails closed
2641    /// instead of choosing an arbitrary hash-map iteration result.
2642    pub fn semantic_anchor_wire_location(&self, anchor_id: AnchorId) -> Option<WireLocation> {
2643        let shards = self.fact_shards.read();
2644        let mut location = None;
2645
2646        for shard in shards.values() {
2647            for anchor in shard.anchors.iter().filter(|anchor| anchor.id == anchor_id) {
2648                if anchor.span_end_byte <= anchor.span_start_byte {
2649                    return None;
2650                }
2651
2652                let doc = self.document_store.get(&shard.source_uri)?;
2653                let start = usize::try_from(anchor.span_start_byte).ok()?;
2654                let end = usize::try_from(anchor.span_end_byte).ok()?;
2655                let next_location = WireLocation::new(
2656                    shard.source_uri.clone(),
2657                    WireRange::from_byte_offsets(&doc.text, start, end),
2658                );
2659                if location.replace(next_location).is_some() {
2660                    return None;
2661                }
2662            }
2663        }
2664
2665        location
2666    }
2667
2668    /// Resolve a semantic anchor to a source-backed LSP-wire location in a
2669    /// specific indexed file.
2670    ///
2671    /// This is the edit-safe variant of [`Self::semantic_anchor_wire_location`]:
2672    /// callers that already have `(file_id, anchor_id)` from a semantic plan do
2673    /// not need the global duplicate-anchor fail-closed behavior.
2674    pub fn semantic_anchor_wire_location_for_file(
2675        &self,
2676        file_id: FileId,
2677        anchor_id: AnchorId,
2678    ) -> Option<WireLocation> {
2679        let shards = self.fact_shards.read();
2680        let shard = shards.values().find(|shard| shard.file_id == file_id)?;
2681        let anchor = shard
2682            .anchors
2683            .iter()
2684            .find(|anchor| anchor.id == anchor_id && anchor.file_id == file_id)?;
2685
2686        if anchor.span_end_byte <= anchor.span_start_byte {
2687            return None;
2688        }
2689
2690        let doc = self.document_store.get(&shard.source_uri)?;
2691        let start = usize::try_from(anchor.span_start_byte).ok()?;
2692        let end = usize::try_from(anchor.span_end_byte).ok()?;
2693        doc.text.get(start..end)?;
2694
2695        Some(WireLocation::new(
2696            shard.source_uri.clone(),
2697            WireRange::from_byte_offsets(&doc.text, start, end),
2698        ))
2699    }
2700
2701    /// Compute the [`FileId`] for a URI using the same hash used during indexing.
2702    ///
2703    /// Returns `None` if the URI has not been indexed (no fact shard is present).
2704    pub fn file_id_for_uri(&self, uri: &str) -> Option<FileId> {
2705        let key = DocumentStore::uri_key(&Self::normalize_uri(uri));
2706        self.fact_shards.read().get(&key).map(|shard| shard.file_id)
2707    }
2708
2709    /// Invoke a scoped callback with [`WorkspaceSemanticQueries`] built from
2710    /// the current semantic indexes for the given URI.
2711    ///
2712    /// The callback receives the resolved [`FileId`] and a
2713    /// [`WorkspaceSemanticQueries`] facade that borrows from read-locked
2714    /// semantic indexes. Locks are released when `f` returns.
2715    ///
2716    /// Returns `Some(result)` if the URI is indexed and semantic data is
2717    /// available, `None` if the URI has not been indexed or its fact shard is
2718    /// absent (the caller should fall back to legacy diagnostics).
2719    pub fn with_semantic_queries_for_uri<R>(
2720        &self,
2721        uri: &str,
2722        f: impl FnOnce(FileId, crate::semantic::queries::WorkspaceSemanticQueries<'_>) -> R,
2723    ) -> Option<R> {
2724        let key = DocumentStore::uri_key(&Self::normalize_uri(uri));
2725
2726        // Acquire all three read guards simultaneously. The lock order must be
2727        // consistent with every other site that acquires multiple locks to avoid
2728        // deadlock: shards → reference_index → import_export_index.
2729        let shards_guard = self.fact_shards.read();
2730        let ref_guard = self.semantic_reference_index.read();
2731        let ie_guard = self.semantic_import_export_index.read();
2732
2733        // Verify the URI is indexed before entering the callback.
2734        let file_id = shards_guard.get(&key)?.file_id;
2735
2736        let queries = crate::semantic::queries::WorkspaceSemanticQueries::new(
2737            &ref_guard,
2738            &ie_guard,
2739            &shards_guard,
2740        );
2741
2742        Some(f(file_id, queries))
2743    }
2744
2745    /// Return the number of indexed files in the workspace
2746    pub fn file_count(&self) -> usize {
2747        let files = self.files.read();
2748        files.len()
2749    }
2750
2751    /// Return the total number of symbols across all indexed files
2752    pub fn symbol_count(&self) -> usize {
2753        let files = self.files.read();
2754        files.values().map(|file_index| file_index.symbols.len()).sum()
2755    }
2756
2757    /// Get all files in a specific workspace folder
2758    ///
2759    /// # Arguments
2760    ///
2761    /// * `folder_uri` - Workspace folder URI to filter by
2762    ///
2763    /// # Returns
2764    ///
2765    /// A vector of file indices belonging to the specified folder
2766    pub fn files_in_folder(&self, folder_uri: &str) -> Vec<FileIndex> {
2767        let files = self.files.read();
2768        files.values().filter(|f| f.folder_uri.as_deref() == Some(folder_uri)).cloned().collect()
2769    }
2770
2771    /// Get all symbols in a specific workspace folder
2772    ///
2773    /// # Arguments
2774    ///
2775    /// * `folder_uri` - Workspace folder URI to filter by
2776    ///
2777    /// # Returns
2778    ///
2779    /// A vector of symbols belonging to the specified folder
2780    pub fn symbols_in_folder(&self, folder_uri: &str) -> Vec<WorkspaceSymbol> {
2781        let files = self.files.read();
2782        files
2783            .values()
2784            .filter(|f| f.folder_uri.as_deref() == Some(folder_uri))
2785            .flat_map(|f| f.symbols.iter().cloned())
2786            .collect()
2787    }
2788
2789    /// Capture a point-in-time memory estimate of the index.
2790    ///
2791    /// Acquires read locks on all index components and walks their contents
2792    /// to estimate heap usage. Intended for offline profiling; do not call
2793    /// on the LSP hot path.
2794    ///
2795    /// Only available when the `memory-profiling` feature is enabled.
2796    #[cfg(feature = "memory-profiling")]
2797    pub fn memory_snapshot(&self) -> crate::workspace::memory::MemorySnapshot {
2798        use std::mem::size_of;
2799
2800        let files_guard = self.files.read();
2801        let symbols_guard = self.symbols.read();
2802        let global_refs_guard = self.global_references.read();
2803
2804        // --- files map ---
2805        let mut files_bytes: usize = 0;
2806        let mut total_symbol_count: usize = 0;
2807        for (uri_key, fi) in files_guard.iter() {
2808            // key string
2809            files_bytes += uri_key.len();
2810            // per-symbol entries
2811            for sym in &fi.symbols {
2812                files_bytes += sym.name.len()
2813                    + sym.uri.len()
2814                    + sym.qualified_name.as_deref().map_or(0, str::len)
2815                    + sym.documentation.as_deref().map_or(0, str::len)
2816                    + sym.container_name.as_deref().map_or(0, str::len)
2817                    // stack portion: kind + range + has_body + option discriminants
2818                    + size_of::<WorkspaceSymbol>();
2819            }
2820            total_symbol_count += fi.symbols.len();
2821            // per-reference entries
2822            for (ref_name, refs) in &fi.references {
2823                files_bytes += ref_name.len();
2824                for r in refs {
2825                    files_bytes += r.uri.len() + size_of::<SymbolReference>();
2826                }
2827            }
2828            // dependencies
2829            for dep in &fi.dependencies {
2830                files_bytes += dep.len();
2831            }
2832            // content hash (u64) + vec/hashset capacity overhead (rough)
2833            files_bytes += size_of::<u64>();
2834        }
2835
2836        // --- global symbols map ---
2837        let mut symbols_bytes: usize = 0;
2838        for (qname, candidates) in symbols_guard.iter() {
2839            symbols_bytes += qname.len();
2840            for candidate in candidates {
2841                symbols_bytes += candidate.location.uri.len() + size_of::<Location>();
2842            }
2843        }
2844
2845        // --- global references map ---
2846        let mut global_refs_bytes: usize = 0;
2847        for (sym_name, locs) in global_refs_guard.iter() {
2848            global_refs_bytes += sym_name.len();
2849            for loc in locs {
2850                global_refs_bytes += loc.uri.len() + size_of::<Location>();
2851            }
2852        }
2853
2854        // --- document store ---
2855        let document_store_bytes = self.document_store.total_text_bytes();
2856
2857        crate::workspace::memory::MemorySnapshot {
2858            file_count: files_guard.len(),
2859            symbol_count: total_symbol_count,
2860            files_bytes,
2861            symbols_bytes,
2862            global_refs_bytes,
2863            document_store_bytes,
2864        }
2865    }
2866
2867    /// Check if the workspace index has symbols (soft readiness check)
2868    ///
2869    /// Returns true if the index contains any symbols, indicating that
2870    /// at least some files have been indexed and the workspace is ready
2871    /// for symbol-based operations like completion.
2872    ///
2873    /// # Returns
2874    ///
2875    /// `true` if any symbols are indexed, otherwise `false`.
2876    ///
2877    /// # Examples
2878    ///
2879    /// ```rust,ignore
2880    /// use perl_parser::workspace_index::WorkspaceIndex;
2881    ///
2882    /// let index = WorkspaceIndex::new();
2883    /// assert!(!index.has_symbols());
2884    /// ```
2885    pub fn has_symbols(&self) -> bool {
2886        let files = self.files.read();
2887        files.values().any(|file_index| !file_index.symbols.is_empty())
2888    }
2889
2890    /// Search for symbols by query
2891    ///
2892    /// # Arguments
2893    ///
2894    /// * `query` - Substring to match against symbol names
2895    ///
2896    /// # Returns
2897    ///
2898    /// Symbols whose names or qualified names contain the query string.
2899    ///
2900    /// # Examples
2901    ///
2902    /// ```rust,ignore
2903    /// use perl_parser::workspace_index::WorkspaceIndex;
2904    ///
2905    /// let index = WorkspaceIndex::new();
2906    /// let _results = index.search_symbols("example");
2907    /// ```
2908    pub fn search_symbols(&self, query: &str) -> Vec<WorkspaceSymbol> {
2909        self.search_source_symbols(query)
2910    }
2911
2912    /// Search only source-backed syntax symbols from the workspace index.
2913    ///
2914    /// Generated/framework members are excluded. Use this when a caller needs
2915    /// to preserve the historical source-backed live slice for trust receipts
2916    /// or fallback paths.
2917    pub fn search_source_symbols(&self, query: &str) -> Vec<WorkspaceSymbol> {
2918        let query = query.trim();
2919        let query_lower = query.to_lowercase();
2920        let files = self.files.read();
2921        let mut results = Vec::new();
2922        for file_index in files.values() {
2923            for symbol in &file_index.symbols {
2924                if symbol.name.to_lowercase().contains(&query_lower)
2925                    || symbol
2926                        .qualified_name
2927                        .as_ref()
2928                        .map(|qn| qn.to_lowercase().contains(&query_lower))
2929                        .unwrap_or(false)
2930                {
2931                    results.push(symbol.clone());
2932                }
2933            }
2934        }
2935        results
2936    }
2937
2938    /// Search labeled generated/framework members backed by semantic source anchors.
2939    ///
2940    /// This is a narrow workspace-symbol pilot: returned symbols are explicitly
2941    /// labeled as generated/framework members and point at the source declaration
2942    /// that produced the member, not at an exact generated method body.
2943    pub fn search_generated_workspace_symbols(&self, query: &str) -> Vec<WorkspaceSymbol> {
2944        let query = query.trim();
2945        if query.is_empty() {
2946            return Vec::new();
2947        }
2948
2949        let query_lower = query.to_lowercase();
2950        let source_backed_qualified_names = self.source_backed_qualified_names();
2951        let shards = self.fact_shards.read();
2952        let mut results = Vec::new();
2953
2954        for shard in shards.values() {
2955            for entity in &shard.entities {
2956                if entity.kind != EntityKind::GeneratedMember {
2957                    continue;
2958                }
2959                if !is_framework_generated_member_entity(entity) {
2960                    continue;
2961                }
2962                if source_backed_qualified_names.contains(&entity.canonical_name) {
2963                    continue;
2964                }
2965                let Some((container_name, bare_name)) =
2966                    split_qualified_symbol_name(&entity.canonical_name)
2967                else {
2968                    continue;
2969                };
2970                if !bare_name.to_lowercase().contains(&query_lower)
2971                    && !entity.canonical_name.to_lowercase().contains(&query_lower)
2972                {
2973                    continue;
2974                }
2975                let Some(anchor_id) = entity.anchor_id else {
2976                    continue;
2977                };
2978                let Some(range) = self.generated_member_anchor_range(shard, anchor_id) else {
2979                    continue;
2980                };
2981
2982                results.push(WorkspaceSymbol {
2983                    name: format!("{bare_name} [generated/framework]"),
2984                    kind: SymbolKind::Method,
2985                    uri: shard.source_uri.clone(),
2986                    range,
2987                    qualified_name: Some(entity.canonical_name.clone()),
2988                    documentation: Some(
2989                        "Generated/framework member; virtual symbol anchored to source declaration"
2990                            .to_string(),
2991                    ),
2992                    container_name: Some(format!("{container_name} [generated/framework]")),
2993                    has_body: false,
2994                    workspace_folder_uri: self.determine_folder_uri(&shard.source_uri),
2995                });
2996            }
2997        }
2998
2999        sort_workspace_symbols(&mut results);
3000        results
3001    }
3002
3003    fn source_backed_qualified_names(&self) -> HashSet<String> {
3004        let files = self.files.read();
3005        let mut qualified_names = HashSet::new();
3006        for file_index in files.values() {
3007            for symbol in &file_index.symbols {
3008                if let Some(name) = &symbol.qualified_name {
3009                    qualified_names.insert(name.clone());
3010                    continue;
3011                }
3012                if let Some(container) = &symbol.container_name {
3013                    qualified_names.insert(format!("{container}::{}", symbol.name));
3014                }
3015            }
3016        }
3017        qualified_names
3018    }
3019
3020    fn generated_member_anchor_range(
3021        &self,
3022        shard: &FileFactShard,
3023        anchor_id: AnchorId,
3024    ) -> Option<Range> {
3025        let anchor = shard
3026            .anchors
3027            .iter()
3028            .find(|anchor| anchor.id == anchor_id && anchor.file_id == shard.file_id)?;
3029        if anchor.provenance != Provenance::FrameworkSynthesis
3030            || anchor.confidence != Confidence::Medium
3031        {
3032            return None;
3033        }
3034        if anchor.span_end_byte <= anchor.span_start_byte {
3035            return None;
3036        }
3037
3038        let doc = self.document_store.get(&shard.source_uri)?;
3039        let start = usize::try_from(anchor.span_start_byte).ok()?;
3040        let end = usize::try_from(anchor.span_end_byte).ok()?;
3041        doc.text.get(start..end)?;
3042        let ((start_line, start_col), (end_line, end_col)) = doc.line_index.range(start, end);
3043        Some(Range {
3044            start: Position { byte: start, line: start_line, column: start_col },
3045            end: Position { byte: end, line: end_line, column: end_col },
3046        })
3047    }
3048
3049    /// Find symbols by query (alias for search_symbols for compatibility)
3050    ///
3051    /// # Arguments
3052    ///
3053    /// * `query` - Substring to match against symbol names
3054    ///
3055    /// # Returns
3056    ///
3057    /// Symbols whose names or qualified names contain the query string.
3058    ///
3059    /// # Examples
3060    ///
3061    /// ```rust,ignore
3062    /// use perl_parser::workspace_index::WorkspaceIndex;
3063    ///
3064    /// let index = WorkspaceIndex::new();
3065    /// let _results = index.find_symbols("example");
3066    /// ```
3067    pub fn find_symbols(&self, query: &str) -> Vec<WorkspaceSymbol> {
3068        self.search_symbols(query)
3069    }
3070
3071    /// Rank symbols by folder proximity to a document
3072    ///
3073    /// Returns symbols sorted by: same folder > other folders
3074    ///
3075    /// # Arguments
3076    ///
3077    /// * `symbols` - Symbols to rank
3078    /// * `doc_uri` - Document URI to determine folder context
3079    ///
3080    /// # Returns
3081    ///
3082    /// Symbols ranked by folder proximity (same folder first)
3083    ///
3084    /// # Examples
3085    ///
3086    /// ```rust,ignore
3087    /// use perl_parser::workspace_index::WorkspaceIndex;
3088    ///
3089    /// let index = WorkspaceIndex::new();
3090    /// let symbols = index.search_symbols("example");
3091    /// let ranked = index.rank_symbols_by_folder(symbols, "file:///project1/src/main.pl");
3092    /// ```
3093    pub fn rank_symbols_by_folder(
3094        &self,
3095        symbols: Vec<WorkspaceSymbol>,
3096        doc_uri: &str,
3097    ) -> Vec<WorkspaceSymbol> {
3098        let doc_folder = self.determine_folder_uri(doc_uri);
3099
3100        let mut ranked: Vec<(WorkspaceSymbol, i32)> = symbols
3101            .into_iter()
3102            .map(|symbol| {
3103                let rank = if let Some(ref doc_folder_uri) = doc_folder {
3104                    if symbol.workspace_folder_uri.as_ref() == Some(doc_folder_uri) {
3105                        0 // Same folder - highest priority
3106                    } else {
3107                        1 // Different folder - lower priority
3108                    }
3109                } else {
3110                    1 // No document context - treat as different folder
3111                };
3112                (symbol, rank)
3113            })
3114            .collect();
3115
3116        // Sort by rank (lower is better), then by name for stability
3117        ranked.sort_by(|a, b| a.1.cmp(&b.1).then_with(|| a.0.name.cmp(&b.0.name)));
3118
3119        ranked.into_iter().map(|(symbol, _)| symbol).collect()
3120    }
3121
3122    /// Search for symbols with folder-aware ranking
3123    ///
3124    /// Combines symbol search with folder proximity ranking
3125    ///
3126    /// # Arguments
3127    ///
3128    /// * `name` - Symbol name to search for
3129    /// * `doc_uri` - Document URI for ranking context
3130    ///
3131    /// # Returns
3132    ///
3133    /// Ranked symbols with same-folder results first
3134    ///
3135    /// # Examples
3136    ///
3137    /// ```rust,ignore
3138    /// use perl_parser::workspace_index::WorkspaceIndex;
3139    ///
3140    /// let index = WorkspaceIndex::new();
3141    /// let ranked = index.search_symbols_ranked("example", "file:///project1/src/main.pl");
3142    /// ```
3143    pub fn search_symbols_ranked(&self, name: &str, doc_uri: &str) -> Vec<WorkspaceSymbol> {
3144        let symbols = self.search_symbols(name);
3145        self.rank_symbols_by_folder(symbols, doc_uri)
3146    }
3147
3148    /// Determine if two symbols are in the same package
3149    ///
3150    /// # Arguments
3151    ///
3152    /// * `symbol_a` - First symbol
3153    /// * `symbol_b` - Second symbol
3154    ///
3155    /// # Returns
3156    ///
3157    /// `true` if both symbols are in the same package
3158    #[allow(dead_code)]
3159    pub fn same_package(&self, symbol_a: &WorkspaceSymbol, symbol_b: &WorkspaceSymbol) -> bool {
3160        let package_a = self.extract_package_name(&symbol_a.name);
3161        let package_b = self.extract_package_name(&symbol_b.name);
3162        package_a == package_b
3163    }
3164
3165    /// Determine if two package names are the same (helper for testing)
3166    ///
3167    /// # Arguments
3168    ///
3169    /// * `package_a` - First package name
3170    /// * `package_b` - Second package name
3171    ///
3172    /// # Returns
3173    ///
3174    /// `true` if both package names are equal
3175    #[allow(dead_code)]
3176    pub fn same_package_by_container(&self, package_a: &str, package_b: &str) -> bool {
3177        package_a == package_b
3178    }
3179
3180    /// Extract package name from a symbol name
3181    ///
3182    /// # Arguments
3183    ///
3184    /// * `symbol_name` - Symbol name (e.g., "Foo::Bar::baz" or "baz")
3185    ///
3186    /// # Returns
3187    ///
3188    /// Package name (e.g., "Foo::Bar") or None for main package
3189    #[allow(dead_code)]
3190    pub fn extract_package_name(&self, symbol_name: &str) -> Option<String> {
3191        let parts: Vec<&str> = symbol_name.split("::").collect();
3192        if parts.len() > 1 { Some(parts[..parts.len() - 1].join("::")) } else { None }
3193    }
3194
3195    /// Get symbols in a specific file
3196    ///
3197    /// # Arguments
3198    ///
3199    /// * `uri` - File URI to inspect
3200    ///
3201    /// # Returns
3202    ///
3203    /// All symbols indexed for the requested file.
3204    ///
3205    /// # Examples
3206    ///
3207    /// ```rust,ignore
3208    /// use perl_parser::workspace_index::WorkspaceIndex;
3209    ///
3210    /// let index = WorkspaceIndex::new();
3211    /// let _symbols = index.file_symbols("file:///example.pl");
3212    /// ```
3213    pub fn file_symbols(&self, uri: &str) -> Vec<WorkspaceSymbol> {
3214        let normalized_uri = Self::normalize_uri(uri);
3215        let key = DocumentStore::uri_key(&normalized_uri);
3216        let files = self.files.read();
3217
3218        files.get(&key).map(|fi| fi.symbols.clone()).unwrap_or_default()
3219    }
3220
3221    /// Get dependencies of a file
3222    ///
3223    /// # Arguments
3224    ///
3225    /// * `uri` - File URI to inspect
3226    ///
3227    /// # Returns
3228    ///
3229    /// A set of module names imported by the file.
3230    ///
3231    /// # Examples
3232    ///
3233    /// ```rust,ignore
3234    /// use perl_parser::workspace_index::WorkspaceIndex;
3235    ///
3236    /// let index = WorkspaceIndex::new();
3237    /// let _deps = index.file_dependencies("file:///example.pl");
3238    /// ```
3239    pub fn file_dependencies(&self, uri: &str) -> HashSet<String> {
3240        let normalized_uri = Self::normalize_uri(uri);
3241        let key = DocumentStore::uri_key(&normalized_uri);
3242        let files = self.files.read();
3243
3244        files.get(&key).map(|fi| fi.dependencies.clone()).unwrap_or_default()
3245    }
3246
3247    /// Find all files that depend on a module
3248    ///
3249    /// # Arguments
3250    ///
3251    /// * `module_name` - Module name to search for in file dependencies
3252    ///
3253    /// # Returns
3254    ///
3255    /// A list of file URIs that import or depend on the module.
3256    ///
3257    /// # Examples
3258    ///
3259    /// ```rust,ignore
3260    /// use perl_parser::workspace_index::WorkspaceIndex;
3261    ///
3262    /// let index = WorkspaceIndex::new();
3263    /// let _files = index.find_dependents("My::Module");
3264    /// ```
3265    pub fn find_dependents(&self, module_name: &str) -> Vec<String> {
3266        let canonical = canonicalize_perl_module_name(module_name);
3267        let legacy = legacy_perl_module_name(&canonical);
3268        let files = self.files.read();
3269        let mut dependents = Vec::new();
3270
3271        for (uri_key, file_index) in files.iter() {
3272            if file_index.dependencies.contains(module_name)
3273                || file_index.dependencies.contains(&canonical)
3274                || file_index.dependencies.contains(&legacy)
3275            {
3276                dependents.push(uri_key.clone());
3277            }
3278        }
3279
3280        dependents
3281    }
3282
3283    /// Get the document store
3284    ///
3285    /// # Returns
3286    ///
3287    /// A reference to the in-memory document store.
3288    ///
3289    /// # Examples
3290    ///
3291    /// ```rust,ignore
3292    /// use perl_parser::workspace_index::WorkspaceIndex;
3293    ///
3294    /// let index = WorkspaceIndex::new();
3295    /// let _store = index.document_store();
3296    /// ```
3297    pub fn document_store(&self) -> &DocumentStore {
3298        &self.document_store
3299    }
3300
3301    /// Find unused symbols in the workspace
3302    ///
3303    /// # Returns
3304    ///
3305    /// Symbols that have no non-definition references in the workspace.
3306    ///
3307    /// # Examples
3308    ///
3309    /// ```rust,ignore
3310    /// use perl_parser::workspace_index::WorkspaceIndex;
3311    ///
3312    /// let index = WorkspaceIndex::new();
3313    /// let _unused = index.find_unused_symbols();
3314    /// ```
3315    pub fn find_unused_symbols(&self) -> Vec<WorkspaceSymbol> {
3316        let files = self.files.read();
3317        let mut unused = Vec::new();
3318
3319        // Collect all defined symbols
3320        for (_uri_key, file_index) in files.iter() {
3321            for symbol in &file_index.symbols {
3322                // Check if this symbol has any references beyond its definition
3323                let has_usage = files.values().any(|fi| {
3324                    if let Some(refs) = fi.references.get(&symbol.name) {
3325                        refs.iter().any(|r| r.kind != ReferenceKind::Definition)
3326                    } else {
3327                        false
3328                    }
3329                });
3330
3331                if !has_usage {
3332                    unused.push(symbol.clone());
3333                }
3334            }
3335        }
3336
3337        unused
3338    }
3339
3340    /// Get all symbols that belong to a specific package
3341    ///
3342    /// # Arguments
3343    ///
3344    /// * `package_name` - Package name to match (e.g., `My::Package`)
3345    ///
3346    /// # Returns
3347    ///
3348    /// Symbols defined within the requested package.
3349    ///
3350    /// # Examples
3351    ///
3352    /// ```rust,ignore
3353    /// use perl_parser::workspace_index::WorkspaceIndex;
3354    ///
3355    /// let index = WorkspaceIndex::new();
3356    /// let _members = index.get_package_members("My::Package");
3357    /// ```
3358    pub fn get_package_members(&self, package_name: &str) -> Vec<WorkspaceSymbol> {
3359        let files = self.files.read();
3360        let mut members = Vec::new();
3361
3362        for (_uri_key, file_index) in files.iter() {
3363            for symbol in &file_index.symbols {
3364                // Check if symbol belongs to this package
3365                if let Some(ref container) = symbol.container_name {
3366                    if container == package_name {
3367                        members.push(symbol.clone());
3368                    }
3369                }
3370                // Also check qualified names
3371                if let Some(ref qname) = symbol.qualified_name {
3372                    if qname.starts_with(&format!("{}::", package_name)) {
3373                        // Avoid duplicates - only add if not already in via container_name
3374                        if symbol.container_name.as_deref() != Some(package_name) {
3375                            members.push(symbol.clone());
3376                        }
3377                    }
3378                }
3379            }
3380        }
3381
3382        members
3383    }
3384
3385    /// Names of all packages explicitly declared in a file.
3386    ///
3387    /// Returns the bare declared name for each `package` statement or block in
3388    /// the file (e.g. `"Foo"`, `"Bar"`, `"Foo::Nested"`).  A file with no
3389    /// explicit `package` declaration returns an empty vec; there is no implicit
3390    /// `"main"` symbol to surface.  A file containing `package main;` explicitly
3391    /// WILL appear in results.
3392    ///
3393    /// # Arguments
3394    ///
3395    /// * `uri` - File URI to inspect (normalized via `normalize_uri`)
3396    ///
3397    /// # Returns
3398    ///
3399    /// Declared package names in declaration order (AST walk order).
3400    pub fn file_packages(&self, uri: &str) -> Vec<String> {
3401        let normalized = Self::normalize_uri(uri);
3402        let key = DocumentStore::uri_key(&normalized);
3403        let files = self.files.read();
3404        let Some(file) = files.get(&key) else {
3405            return Vec::new();
3406        };
3407
3408        let mut packages = Vec::new();
3409        for symbol in &file.symbols {
3410            if symbol.kind == SymbolKind::Package {
3411                packages.push(symbol.name.clone());
3412            }
3413        }
3414        packages
3415    }
3416
3417    /// Symbols declared inside a specific package within a file.
3418    ///
3419    /// Returns all `WorkspaceSymbol` entries whose `container_name` equals
3420    /// `package_name` (bare name match, e.g. `"Bar"` or `"Foo::Nested"`).
3421    /// Package declaration symbols themselves are excluded (they carry
3422    /// `container_name = None`).
3423    ///
3424    /// # Arguments
3425    ///
3426    /// * `uri`          - File URI to inspect
3427    /// * `package_name` - Bare package name to filter by (e.g. `"Foo::Bar"`)
3428    ///
3429    /// # Returns
3430    ///
3431    /// Symbols belonging to the package, in declaration order.
3432    pub fn file_package_symbols(&self, uri: &str, package_name: &str) -> Vec<WorkspaceSymbol> {
3433        let normalized = Self::normalize_uri(uri);
3434        let key = DocumentStore::uri_key(&normalized);
3435        let files = self.files.read();
3436        let Some(file) = files.get(&key) else {
3437            return Vec::new();
3438        };
3439
3440        let mut symbols = Vec::new();
3441        for symbol in &file.symbols {
3442            if Self::symbol_belongs_to_package(symbol, package_name) {
3443                symbols.push(symbol.clone());
3444            }
3445        }
3446        symbols
3447    }
3448
3449    fn symbol_belongs_to_package(symbol: &WorkspaceSymbol, package_name: &str) -> bool {
3450        symbol.container_name.as_ref().is_some_and(|container| package_name.eq(container.as_str()))
3451    }
3452
3453    /// Find the definition location for a symbol key during Index/Navigate stages.
3454    ///
3455    /// # Arguments
3456    ///
3457    /// * `key` - Normalized symbol key to resolve.
3458    ///
3459    /// # Returns
3460    ///
3461    /// The definition location for the symbol, if found.
3462    ///
3463    /// # Examples
3464    ///
3465    /// ```rust,ignore
3466    /// use perl_parser::workspace_index::{SymKind, SymbolKey, WorkspaceIndex};
3467    /// use std::sync::Arc;
3468    ///
3469    /// let index = WorkspaceIndex::new();
3470    /// let key = SymbolKey { pkg: Arc::from("My::Package"), name: Arc::from("example"), sigil: None, kind: SymKind::Sub };
3471    /// let _def = index.find_def(&key);
3472    /// ```
3473    pub fn find_def(&self, key: &SymbolKey) -> Option<Location> {
3474        if let Some(sigil) = key.sigil {
3475            // It's a variable
3476            let var_name = format!("{}{}", sigil, key.name);
3477            self.find_definition(&var_name)
3478        } else if key.kind == SymKind::Pack {
3479            // It's a package lookup (e.g., from `use Module::Name`)
3480            // Search for the package declaration by name
3481            self.find_definition(key.pkg.as_ref())
3482                .or_else(|| self.find_definition(key.name.as_ref()))
3483        } else {
3484            // It's a subroutine or package
3485            let qualified_name = format!("{}::{}", key.pkg, key.name);
3486            self.find_definition(&qualified_name)
3487        }
3488    }
3489
3490    /// Find reference locations for a symbol key using dual indexing.
3491    ///
3492    /// Searches both qualified and bare names to support Navigate/Analyze workflows.
3493    ///
3494    /// # Arguments
3495    ///
3496    /// * `key` - Normalized symbol key to search for.
3497    ///
3498    /// # Returns
3499    ///
3500    /// All reference locations for the symbol, excluding the definition.
3501    ///
3502    /// # Examples
3503    ///
3504    /// ```rust,ignore
3505    /// use perl_parser::workspace_index::{SymKind, SymbolKey, WorkspaceIndex};
3506    /// use std::sync::Arc;
3507    ///
3508    /// let index = WorkspaceIndex::new();
3509    /// let key = SymbolKey { pkg: Arc::from("main"), name: Arc::from("example"), sigil: None, kind: SymKind::Sub };
3510    /// let _refs = index.find_refs(&key);
3511    /// ```
3512    pub fn find_refs(&self, key: &SymbolKey) -> Vec<Location> {
3513        let files_locked = self.files.read();
3514        let mut all_refs = if let Some(sigil) = key.sigil {
3515            // It's a variable - search through all files for this variable name
3516            let var_name = format!("{}{}", sigil, key.name);
3517            let mut refs = Vec::new();
3518            for (_uri_key, file_index) in files_locked.iter() {
3519                if let Some(var_refs) = file_index.references.get(&var_name) {
3520                    for reference in var_refs {
3521                        refs.push(Location { uri: reference.uri.clone(), range: reference.range });
3522                    }
3523                }
3524            }
3525            refs
3526        } else {
3527            // It's a subroutine or package
3528            if key.pkg.as_ref() == "main" {
3529                // For main package, we search for both "main::foo" and bare "foo"
3530                let mut refs = self.find_references(&format!("main::{}", key.name));
3531                // Add bare name references
3532                for (_uri_key, file_index) in files_locked.iter() {
3533                    if let Some(bare_refs) = file_index.references.get(key.name.as_ref()) {
3534                        for reference in bare_refs {
3535                            refs.push(Location {
3536                                uri: reference.uri.clone(),
3537                                range: reference.range,
3538                            });
3539                        }
3540                    }
3541                }
3542                refs
3543            } else {
3544                let qualified_name = format!("{}::{}", key.pkg, key.name);
3545                self.find_references(&qualified_name)
3546            }
3547        };
3548        drop(files_locked);
3549
3550        // Remove the definition; the caller will include it separately if needed
3551        if let Some(def) = self.find_def(key) {
3552            all_refs.retain(|loc| !(loc.uri == def.uri && loc.range == def.range));
3553        }
3554
3555        // Deduplicate by URI and range
3556        let mut seen = HashSet::new();
3557        all_refs.retain(|loc| {
3558            seen.insert((
3559                loc.uri.clone(),
3560                loc.range.start.line,
3561                loc.range.start.column,
3562                loc.range.end.line,
3563                loc.range.end.column,
3564            ))
3565        });
3566
3567        all_refs
3568    }
3569}
3570
3571/// AST visitor for extracting symbols and references
3572struct IndexVisitor {
3573    document: Document,
3574    uri: String,
3575    current_package: Option<String>,
3576    workspace_folder_uri: Option<String>,
3577}
3578
3579fn is_interpolated_var_start(byte: u8) -> bool {
3580    byte.is_ascii_alphabetic() || byte == b'_'
3581}
3582
3583fn is_interpolated_var_continue(byte: u8) -> bool {
3584    byte.is_ascii_alphanumeric() || byte == b'_' || byte == b':'
3585}
3586
3587fn has_escaped_interpolation_marker(bytes: &[u8], index: usize) -> bool {
3588    if index == 0 {
3589        return false;
3590    }
3591
3592    let mut backslashes = 0usize;
3593    let mut cursor = index;
3594    while cursor > 0 && bytes[cursor - 1] == b'\\' {
3595        backslashes += 1;
3596        cursor -= 1;
3597    }
3598
3599    backslashes % 2 == 1
3600}
3601
3602fn strip_matching_quote_delimiters(raw_content: &str) -> &str {
3603    if raw_content.len() < 2 {
3604        return raw_content;
3605    }
3606
3607    let bytes = raw_content.as_bytes();
3608    match (bytes.first(), bytes.last()) {
3609        (Some(b'"'), Some(b'"')) | (Some(b'\''), Some(b'\'')) => {
3610            &raw_content[1..raw_content.len() - 1]
3611        }
3612        _ => raw_content,
3613    }
3614}
3615
3616impl IndexVisitor {
3617    fn new(document: &mut Document, uri: String, workspace_folder_uri: Option<String>) -> Self {
3618        Self {
3619            document: document.clone(),
3620            uri,
3621            current_package: Some("main".to_string()),
3622            workspace_folder_uri,
3623        }
3624    }
3625
3626    fn visit(&mut self, node: &Node, file_index: &mut FileIndex) {
3627        self.project_symbol_declarations(node, file_index);
3628        self.visit_node(node, file_index);
3629    }
3630
3631    fn project_symbol_declarations(&self, node: &Node, file_index: &mut FileIndex) {
3632        for decl in extract_symbol_decls(node, self.current_package.as_deref()) {
3633            let (start, end) = match decl.kind {
3634                SymbolKind::Variable(_) => match decl.anchor_span {
3635                    Some(span) => span,
3636                    None => decl.full_span,
3637                },
3638                _ => decl.full_span,
3639            };
3640            let ((start_line, start_col), (end_line, end_col)) =
3641                self.document.line_index.range(start, end);
3642            let range = Range {
3643                start: Position { byte: start, line: start_line, column: start_col },
3644                end: Position { byte: end, line: end_line, column: end_col },
3645            };
3646
3647            let symbol_name = symbol_decl_name(&decl.kind, &decl.name);
3648
3649            // Suppress qualified_name for lexically-scoped variables (my, state): they
3650            // are not package-visible and must not be found by a qualified lookup such
3651            // as `Foo::x`.  `our` and `local` variables keep the qualified name because
3652            // they participate in the package namespace.
3653            let qualified_name = match &decl.declarator {
3654                Some(d) if d == "my" || d == "state" => None,
3655                _ => (!decl.qualified_name.is_empty()).then_some(decl.qualified_name),
3656            };
3657
3658            // Top-level package declarations have no containing package; suppress the
3659            // spurious "main" container that comes from the walker's initial context.
3660            let container_name = match decl.kind {
3661                SymbolKind::Package => None,
3662                _ => decl.container,
3663            };
3664
3665            file_index.symbols.push(WorkspaceSymbol {
3666                name: symbol_name.clone(),
3667                kind: decl.kind,
3668                uri: self.uri.clone(),
3669                range,
3670                qualified_name,
3671                documentation: None,
3672                container_name,
3673                has_body: true,
3674                workspace_folder_uri: self.workspace_folder_uri.clone(),
3675            });
3676
3677            file_index.references.entry(symbol_name).or_default().push(SymbolReference {
3678                uri: self.uri.clone(),
3679                range,
3680                kind: ReferenceKind::Definition,
3681            });
3682        }
3683    }
3684
3685    fn record_interpolated_variable_references(
3686        &self,
3687        raw_content: &str,
3688        range: Range,
3689        file_index: &mut FileIndex,
3690    ) {
3691        let content = strip_matching_quote_delimiters(raw_content);
3692        let bytes = content.as_bytes();
3693        let mut index = 0;
3694
3695        while index < bytes.len() {
3696            if has_escaped_interpolation_marker(bytes, index) {
3697                index += 1;
3698                continue;
3699            }
3700
3701            let sigil = match bytes[index] {
3702                b'$' => "$",
3703                b'@' => "@",
3704                _ => {
3705                    index += 1;
3706                    continue;
3707                }
3708            };
3709
3710            if index + 1 >= bytes.len() {
3711                break;
3712            }
3713
3714            let (start, needs_closing_brace) =
3715                if bytes[index + 1] == b'{' { (index + 2, true) } else { (index + 1, false) };
3716
3717            if start >= bytes.len() || !is_interpolated_var_start(bytes[start]) {
3718                index += 1;
3719                continue;
3720            }
3721
3722            let mut end = start + 1;
3723            while end < bytes.len() && is_interpolated_var_continue(bytes[end]) {
3724                end += 1;
3725            }
3726
3727            if needs_closing_brace && (end >= bytes.len() || bytes[end] != b'}') {
3728                index += 1;
3729                continue;
3730            }
3731
3732            if let Some(name) = content.get(start..end) {
3733                let var_name = format!("{sigil}{name}");
3734                file_index.references.entry(var_name).or_default().push(SymbolReference {
3735                    uri: self.uri.clone(),
3736                    range,
3737                    kind: ReferenceKind::Read,
3738                });
3739            }
3740
3741            index = if needs_closing_brace { end + 1 } else { end };
3742        }
3743    }
3744
3745    fn visit_node(&mut self, node: &Node, file_index: &mut FileIndex) {
3746        match &node.kind {
3747            NodeKind::Package { name, .. } => {
3748                let package_name = name.clone();
3749
3750                // Update the current package (replaces the previous one, not a stack)
3751                self.current_package = Some(package_name.clone());
3752            }
3753
3754            NodeKind::Subroutine { body, .. } => {
3755                // Visit body
3756                self.visit_node(body, file_index);
3757            }
3758
3759            NodeKind::VariableDeclaration { initializer, .. } => {
3760                // Visit initializer
3761                if let Some(init) = initializer {
3762                    self.visit_node(init, file_index);
3763                }
3764            }
3765
3766            NodeKind::VariableListDeclaration { initializer, .. } => {
3767                // Visit the initializer
3768                if let Some(init) = initializer {
3769                    self.visit_node(init, file_index);
3770                }
3771            }
3772
3773            NodeKind::Variable { sigil, name } => {
3774                let var_name = format!("{}{}", sigil, name);
3775
3776                // Track as usage (could be read or write based on context)
3777                file_index.references.entry(var_name).or_default().push(SymbolReference {
3778                    uri: self.uri.clone(),
3779                    range: self.node_to_range(node),
3780                    kind: ReferenceKind::Read, // Default to read, would need context for write
3781                });
3782            }
3783
3784            NodeKind::FunctionCall { name, args, .. } => {
3785                let func_name = name.clone();
3786                let location = self.node_to_range(node);
3787
3788                // Determine package and bare name
3789                let (pkg, bare_name) = if let Some(idx) = func_name.rfind("::") {
3790                    (&func_name[..idx], &func_name[idx + 2..])
3791                } else {
3792                    (self.current_package.as_deref().unwrap_or("main"), func_name.as_str())
3793                };
3794
3795                let qualified = format!("{}::{}", pkg, bare_name);
3796
3797                // Track as usage for both qualified and bare forms
3798                // This dual indexing allows finding references whether the function is called
3799                // as `process_data()` or `Utils::process_data()`
3800                file_index.references.entry(bare_name.to_string()).or_default().push(
3801                    SymbolReference {
3802                        uri: self.uri.clone(),
3803                        range: location,
3804                        kind: ReferenceKind::Usage,
3805                    },
3806                );
3807                file_index.references.entry(qualified).or_default().push(SymbolReference {
3808                    uri: self.uri.clone(),
3809                    range: location,
3810                    kind: ReferenceKind::Usage,
3811                });
3812
3813                if name == "extends" || name == "with" {
3814                    for module_name in extract_module_names_from_call_args(args) {
3815                        file_index
3816                            .dependencies
3817                            .insert(normalize_dependency_module_name(&module_name));
3818                    }
3819                } else if name == "require" {
3820                    if let Some(module_name) = extract_module_name_from_require_args(args) {
3821                        file_index
3822                            .dependencies
3823                            .insert(normalize_dependency_module_name(&module_name));
3824                    }
3825                }
3826
3827                // Visit arguments
3828                for arg in args {
3829                    self.visit_node(arg, file_index);
3830                }
3831            }
3832
3833            NodeKind::Use { module, args, .. } => {
3834                let module_name = normalize_dependency_module_name(module);
3835                file_index.dependencies.insert(module_name.clone());
3836
3837                // Also track actual parent/base class names for dependency discovery.
3838                // `use parent 'Foo::Bar'` stores module="parent" and args=["'Foo::Bar'"],
3839                // so find_dependents("Foo::Bar") would miss files with only use parent.
3840                if module == "parent" || module == "base" {
3841                    for name in extract_module_names_from_use_args(args) {
3842                        file_index.dependencies.insert(normalize_dependency_module_name(&name));
3843                    }
3844                }
3845
3846                // Track as import
3847                file_index.references.entry(module_name).or_default().push(SymbolReference {
3848                    uri: self.uri.clone(),
3849                    range: self.node_to_range(node),
3850                    kind: ReferenceKind::Import,
3851                });
3852            }
3853
3854            // Handle assignment to detect writes
3855            NodeKind::Assignment { lhs, rhs, op } => {
3856                // For compound assignments (+=, -=, .=, etc.), the LHS is both read and written
3857                let is_compound = op != "=";
3858
3859                if let NodeKind::Variable { sigil, name } = &lhs.kind {
3860                    let var_name = format!("{}{}", sigil, name);
3861
3862                    // For compound assignments, it's a read first
3863                    if is_compound {
3864                        file_index.references.entry(var_name.clone()).or_default().push(
3865                            SymbolReference {
3866                                uri: self.uri.clone(),
3867                                range: self.node_to_range(lhs),
3868                                kind: ReferenceKind::Read,
3869                            },
3870                        );
3871                    }
3872
3873                    // Then it's always a write
3874                    file_index.references.entry(var_name).or_default().push(SymbolReference {
3875                        uri: self.uri.clone(),
3876                        range: self.node_to_range(lhs),
3877                        kind: ReferenceKind::Write,
3878                    });
3879                }
3880
3881                // Right side could have reads
3882                self.visit_node(rhs, file_index);
3883            }
3884
3885            // Recursively visit child nodes
3886            NodeKind::Block { statements } => {
3887                for stmt in statements {
3888                    self.visit_node(stmt, file_index);
3889                }
3890            }
3891
3892            NodeKind::If { condition, then_branch, elsif_branches, else_branch, .. } => {
3893                self.visit_node(condition, file_index);
3894                self.visit_node(then_branch, file_index);
3895                for (cond, branch) in elsif_branches {
3896                    self.visit_node(cond, file_index);
3897                    self.visit_node(branch, file_index);
3898                }
3899                if let Some(else_br) = else_branch {
3900                    self.visit_node(else_br, file_index);
3901                }
3902            }
3903
3904            NodeKind::While { condition, body, continue_block, .. } => {
3905                self.visit_node(condition, file_index);
3906                self.visit_node(body, file_index);
3907                if let Some(cont) = continue_block {
3908                    self.visit_node(cont, file_index);
3909                }
3910            }
3911
3912            NodeKind::For { init, condition, update, body, continue_block } => {
3913                if let Some(i) = init {
3914                    self.visit_node(i, file_index);
3915                }
3916                if let Some(c) = condition {
3917                    self.visit_node(c, file_index);
3918                }
3919                if let Some(u) = update {
3920                    self.visit_node(u, file_index);
3921                }
3922                self.visit_node(body, file_index);
3923                if let Some(cont) = continue_block {
3924                    self.visit_node(cont, file_index);
3925                }
3926            }
3927
3928            NodeKind::Foreach { variable, list, body, continue_block } => {
3929                // Iterator is a write context
3930                if let Some(cb) = continue_block {
3931                    self.visit_node(cb, file_index);
3932                }
3933                if let NodeKind::Variable { sigil, name } = &variable.kind {
3934                    let var_name = format!("{}{}", sigil, name);
3935                    file_index.references.entry(var_name).or_default().push(SymbolReference {
3936                        uri: self.uri.clone(),
3937                        range: self.node_to_range(variable),
3938                        kind: ReferenceKind::Write,
3939                    });
3940                }
3941                self.visit_node(variable, file_index);
3942                self.visit_node(list, file_index);
3943                self.visit_node(body, file_index);
3944            }
3945
3946            NodeKind::MethodCall { object, method, args } => {
3947                // Check if this is a static method call (Package->method)
3948                let qualified_method = if let NodeKind::Identifier { name } = &object.kind {
3949                    // Static method call: Package->method
3950                    Some(format!("{}::{}", name, method))
3951                } else {
3952                    // Instance method call: $obj->method
3953                    None
3954                };
3955
3956                // Object is a read context
3957                self.visit_node(object, file_index);
3958
3959                // Track method call under BOTH the qualified form (for static calls
3960                // like `Pkg->method`) AND the bare method name. This mirrors the
3961                // FunctionCall dual-key storage above (PR #122 dual-indexing pattern)
3962                // so that bare-name lookups (e.g. `find_unused_symbols`,
3963                // `count_usages("method")`) consistently find static method call sites.
3964                // See #6799 for the original asymmetric-storage bug report.
3965                let location = self.node_to_range(node);
3966                if let Some(qualified_method) = qualified_method.as_ref() {
3967                    file_index.references.entry(qualified_method.clone()).or_default().push(
3968                        SymbolReference {
3969                            uri: self.uri.clone(),
3970                            range: location,
3971                            kind: ReferenceKind::Usage,
3972                        },
3973                    );
3974                }
3975                file_index.references.entry(method.clone()).or_default().push(SymbolReference {
3976                    uri: self.uri.clone(),
3977                    range: location,
3978                    kind: ReferenceKind::Usage,
3979                });
3980
3981                if method == "import"
3982                    && let NodeKind::Identifier { name: module_name } = &object.kind
3983                {
3984                    for symbol in extract_manual_import_symbols(args) {
3985                        file_index.references.entry(symbol).or_default().push(SymbolReference {
3986                            uri: self.uri.clone(),
3987                            range: self.node_to_range(node),
3988                            kind: ReferenceKind::Import,
3989                        });
3990                    }
3991                    file_index.dependencies.insert(normalize_dependency_module_name(module_name));
3992                }
3993
3994                // Visit arguments
3995                for arg in args {
3996                    self.visit_node(arg, file_index);
3997                }
3998            }
3999
4000            NodeKind::No { module, .. } => {
4001                let module_name = normalize_dependency_module_name(module);
4002                file_index.dependencies.insert(module_name);
4003            }
4004
4005            NodeKind::Class { name, .. } => {
4006                self.current_package = Some(name.clone());
4007            }
4008
4009            NodeKind::Method { body, signature, .. } => {
4010                // Visit params
4011                if let Some(sig) = signature {
4012                    if let NodeKind::Signature { parameters } = &sig.kind {
4013                        for param in parameters {
4014                            self.visit_node(param, file_index);
4015                        }
4016                    }
4017                }
4018
4019                // Visit body
4020                self.visit_node(body, file_index);
4021            }
4022
4023            NodeKind::String { value, interpolated } => {
4024                if *interpolated {
4025                    let range = self.node_to_range(node);
4026                    self.record_interpolated_variable_references(value, range, file_index);
4027                }
4028            }
4029
4030            NodeKind::Heredoc { content, interpolated, .. } => {
4031                if *interpolated {
4032                    let range = self.node_to_range(node);
4033                    self.record_interpolated_variable_references(content, range, file_index);
4034                }
4035            }
4036
4037            // Handle special assignments (++ and --)
4038            NodeKind::Unary { op, operand } if op == "++" || op == "--" => {
4039                // Pre/post increment/decrement are both read and write
4040                if let NodeKind::Variable { sigil, name } = &operand.kind {
4041                    let var_name = format!("{}{}", sigil, name);
4042
4043                    // It's both a read and a write
4044                    file_index.references.entry(var_name.clone()).or_default().push(
4045                        SymbolReference {
4046                            uri: self.uri.clone(),
4047                            range: self.node_to_range(operand),
4048                            kind: ReferenceKind::Read,
4049                        },
4050                    );
4051
4052                    file_index.references.entry(var_name).or_default().push(SymbolReference {
4053                        uri: self.uri.clone(),
4054                        range: self.node_to_range(operand),
4055                        kind: ReferenceKind::Write,
4056                    });
4057                }
4058            }
4059
4060            _ => {
4061                // For other node types, just visit children
4062                self.visit_children(node, file_index);
4063            }
4064        }
4065    }
4066
4067    fn visit_children(&mut self, node: &Node, file_index: &mut FileIndex) {
4068        // Generic visitor for unhandled node types - visit all nested nodes
4069        match &node.kind {
4070            NodeKind::Program { statements } => {
4071                for stmt in statements {
4072                    self.visit_node(stmt, file_index);
4073                }
4074            }
4075            NodeKind::ExpressionStatement { expression } => {
4076                self.visit_node(expression, file_index);
4077            }
4078            // Expression nodes
4079            NodeKind::Unary { operand, .. } => {
4080                self.visit_node(operand, file_index);
4081            }
4082            NodeKind::Binary { left, right, .. } => {
4083                self.visit_node(left, file_index);
4084                self.visit_node(right, file_index);
4085            }
4086            NodeKind::Ternary { condition, then_expr, else_expr } => {
4087                self.visit_node(condition, file_index);
4088                self.visit_node(then_expr, file_index);
4089                self.visit_node(else_expr, file_index);
4090            }
4091            NodeKind::ArrayLiteral { elements } => {
4092                for elem in elements {
4093                    self.visit_node(elem, file_index);
4094                }
4095            }
4096            NodeKind::HashLiteral { pairs } => {
4097                for (key, value) in pairs {
4098                    self.visit_node(key, file_index);
4099                    self.visit_node(value, file_index);
4100                }
4101            }
4102            NodeKind::Return { value } => {
4103                if let Some(val) = value {
4104                    self.visit_node(val, file_index);
4105                }
4106            }
4107            NodeKind::Eval { block } | NodeKind::Do { block } | NodeKind::Defer { block } => {
4108                self.visit_node(block, file_index);
4109            }
4110            NodeKind::Try { body, catch_blocks, finally_block } => {
4111                self.visit_node(body, file_index);
4112                for (_, block) in catch_blocks {
4113                    self.visit_node(block, file_index);
4114                }
4115                if let Some(finally) = finally_block {
4116                    self.visit_node(finally, file_index);
4117                }
4118            }
4119            NodeKind::Given { expr, body } => {
4120                self.visit_node(expr, file_index);
4121                self.visit_node(body, file_index);
4122            }
4123            NodeKind::When { condition, body } => {
4124                self.visit_node(condition, file_index);
4125                self.visit_node(body, file_index);
4126            }
4127            NodeKind::Default { body } => {
4128                self.visit_node(body, file_index);
4129            }
4130            NodeKind::StatementModifier { statement, condition, .. } => {
4131                self.visit_node(statement, file_index);
4132                self.visit_node(condition, file_index);
4133            }
4134            NodeKind::VariableWithAttributes { variable, .. } => {
4135                self.visit_node(variable, file_index);
4136            }
4137            NodeKind::LabeledStatement { statement, .. } => {
4138                self.visit_node(statement, file_index);
4139            }
4140            _ => {
4141                // For other node types, no children to visit
4142            }
4143        }
4144    }
4145
4146    fn node_to_range(&mut self, node: &Node) -> Range {
4147        // LineIndex.range returns line numbers and UTF-16 code unit columns
4148        let ((start_line, start_col), (end_line, end_col)) =
4149            self.document.line_index.range(node.location.start, node.location.end);
4150        // Use byte offsets from node.location directly
4151        Range {
4152            start: Position { byte: node.location.start, line: start_line, column: start_col },
4153            end: Position { byte: node.location.end, line: end_line, column: end_col },
4154        }
4155    }
4156}
4157
4158fn symbol_decl_name(kind: &SymbolKind, name: &str) -> String {
4159    match kind {
4160        SymbolKind::Variable(VarKind::Scalar) => format!("${name}"),
4161        SymbolKind::Variable(VarKind::Array) => format!("@{name}"),
4162        SymbolKind::Variable(VarKind::Hash) => format!("%{name}"),
4163        _ => name.to_string(),
4164    }
4165}
4166
4167fn split_qualified_symbol_name(canonical_name: &str) -> Option<(&str, &str)> {
4168    let (container, bare_name) = canonical_name.rsplit_once("::")?;
4169    if container.is_empty() || bare_name.is_empty() {
4170        return None;
4171    }
4172    Some((container, bare_name))
4173}
4174
4175fn is_framework_generated_member_entity(entity: &EntityFact) -> bool {
4176    entity.provenance == Provenance::FrameworkSynthesis && entity.confidence == Confidence::Medium
4177}
4178
4179fn sort_workspace_symbols(symbols: &mut [WorkspaceSymbol]) {
4180    symbols.sort_by(|left, right| {
4181        left.name
4182            .cmp(&right.name)
4183            .then_with(|| left.uri.cmp(&right.uri))
4184            .then_with(|| left.range.start.line.cmp(&right.range.start.line))
4185            .then_with(|| left.range.start.column.cmp(&right.range.start.column))
4186            .then_with(|| left.range.end.line.cmp(&right.range.end.line))
4187            .then_with(|| left.range.end.column.cmp(&right.range.end.column))
4188    });
4189}
4190
4191/// Extract bare module names from the argument list of a `use parent` / `use base` statement.
4192///
4193/// The `args` field of `NodeKind::Use` stores raw argument strings as the parser captured them.
4194/// For `use parent 'Foo::Bar'` this is `["'Foo::Bar'"]`.
4195/// For `use parent qw(Foo::Bar Other::Base)` this is `["qw(Foo::Bar Other::Base)"]`.
4196/// For `use parent -norequire, 'Foo::Bar'` this is `["-norequire", "'Foo::Bar'"]`.
4197///
4198/// Returns the module names with surrounding quotes/qw wrappers stripped.
4199/// Tokens starting with `-` or not matching `[\w::']+` are silently skipped.
4200fn extract_module_names_from_use_args(args: &[String]) -> Vec<String> {
4201    use std::collections::HashSet;
4202
4203    fn normalize_module_name(token: &str) -> Option<&str> {
4204        let stripped = token.trim_matches(|c: char| {
4205            matches!(c, '\'' | '"' | '(' | ')' | '[' | ']' | '{' | '}' | ',' | ';')
4206        });
4207
4208        if stripped.is_empty() || stripped.starts_with('-') {
4209            return None;
4210        }
4211
4212        stripped
4213            .chars()
4214            .all(|c| c.is_alphanumeric() || c == '_' || c == ':' || c == '\'')
4215            .then_some(stripped)
4216    }
4217
4218    let joined = args.join(" ");
4219
4220    let (qw_words, remainder) = extract_qw_words(&joined);
4221    let mut modules = Vec::new();
4222    let mut seen = HashSet::new();
4223    for word in qw_words {
4224        if let Some(candidate) = normalize_module_name(&word) {
4225            let canonical = canonicalize_perl_module_name(candidate);
4226            if seen.insert(canonical.clone()) {
4227                modules.push(canonical);
4228            }
4229        }
4230    }
4231
4232    for token in remainder.split_whitespace().flat_map(|t| t.split(',')) {
4233        if let Some(candidate) = normalize_module_name(token) {
4234            let canonical = canonicalize_perl_module_name(candidate);
4235            if seen.insert(canonical.clone()) {
4236                modules.push(canonical);
4237            }
4238        }
4239    }
4240
4241    modules
4242}
4243
4244fn extract_module_names_from_call_args(args: &[Node]) -> Vec<String> {
4245    fn collect_from_node(node: &Node, out: &mut Vec<String>) {
4246        match &node.kind {
4247            NodeKind::String { value, .. } => {
4248                out.extend(extract_module_names_from_use_args(std::slice::from_ref(value)));
4249            }
4250            NodeKind::Identifier { name } => {
4251                out.extend(extract_module_names_from_use_args(std::slice::from_ref(name)));
4252            }
4253            NodeKind::ArrayLiteral { elements } => {
4254                for element in elements {
4255                    collect_from_node(element, out);
4256                }
4257            }
4258            NodeKind::FunctionCall { name, args, .. } if name == "qw" => {
4259                for arg in args {
4260                    collect_from_node(arg, out);
4261                }
4262            }
4263            _ => {}
4264        }
4265    }
4266
4267    let mut modules = Vec::new();
4268    for arg in args {
4269        collect_from_node(arg, &mut modules);
4270    }
4271    modules
4272}
4273
4274fn canonicalize_perl_module_name(name: &str) -> String {
4275    // Perl supports the legacy `'` package separator (e.g. Foo'Bar).
4276    // Canonicalize to `::` so lookups and dependency matching share one key shape.
4277    name.replace('\'', "::")
4278}
4279
4280fn legacy_perl_module_name(name: &str) -> String {
4281    name.replace("::", "'")
4282}
4283
4284/// Normalize a module name for dependency storage and lookup.
4285/// Converts legacy `'` separators to `::` so stored keys are canonical.
4286fn normalize_dependency_module_name(module_name: &str) -> String {
4287    canonicalize_perl_module_name(module_name)
4288}
4289
4290fn extract_qw_words(input: &str) -> (Vec<String>, String) {
4291    let chars: Vec<char> = input.chars().collect();
4292    let mut i = 0;
4293    let mut words = Vec::new();
4294    let mut remainder = String::new();
4295
4296    while i < chars.len() {
4297        if chars[i] == 'q'
4298            && i + 1 < chars.len()
4299            && chars[i + 1] == 'w'
4300            && (i == 0 || !chars[i - 1].is_alphanumeric())
4301        {
4302            let mut j = i + 2;
4303            while j < chars.len() && chars[j].is_whitespace() {
4304                j += 1;
4305            }
4306            if j >= chars.len() {
4307                remainder.push(chars[i]);
4308                i += 1;
4309                continue;
4310            }
4311
4312            let open = chars[j];
4313            let (close, is_paired_delimiter) = match open {
4314                '(' => (')', true),
4315                '[' => (']', true),
4316                '{' => ('}', true),
4317                '<' => ('>', true),
4318                _ => (open, false),
4319            };
4320            if open.is_alphanumeric() || open == '_' || open == '\'' || open == '"' {
4321                remainder.push(chars[i]);
4322                i += 1;
4323                continue;
4324            }
4325
4326            let mut k = j + 1;
4327            if is_paired_delimiter {
4328                let mut depth = 1usize;
4329                while k < chars.len() && depth > 0 {
4330                    if chars[k] == open {
4331                        depth += 1;
4332                    } else if chars[k] == close {
4333                        depth -= 1;
4334                    }
4335                    k += 1;
4336                }
4337                if depth != 0 {
4338                    remainder.extend(chars[i..].iter());
4339                    break;
4340                }
4341                k -= 1;
4342            } else {
4343                while k < chars.len() && chars[k] != close {
4344                    k += 1;
4345                }
4346                if k >= chars.len() {
4347                    remainder.extend(chars[i..].iter());
4348                    break;
4349                }
4350            }
4351
4352            let content: String = chars[j + 1..k].iter().collect();
4353            for word in content.split_whitespace() {
4354                if !word.is_empty() {
4355                    words.push(word.to_string());
4356                }
4357            }
4358            i = k + 1;
4359            continue;
4360        }
4361
4362        remainder.push(chars[i]);
4363        i += 1;
4364    }
4365
4366    (words, remainder)
4367}
4368
4369fn extract_module_name_from_require_args(args: &[Node]) -> Option<String> {
4370    let first = args.first()?;
4371    match &first.kind {
4372        NodeKind::Identifier { name } => Some(name.clone()),
4373        NodeKind::String { value, .. } => {
4374            let cleaned = value.trim_matches('\'').trim_matches('"').trim();
4375            if cleaned.is_empty() {
4376                return None;
4377            }
4378            Some(cleaned.trim_end_matches(".pm").replace('/', "::"))
4379        }
4380        _ => None,
4381    }
4382}
4383
4384fn extract_manual_import_symbols(args: &[Node]) -> Vec<String> {
4385    fn push_if_bareword(out: &mut Vec<String>, token: &str) {
4386        let bare = token.trim().trim_matches('"').trim_matches('\'').trim();
4387        if bare.is_empty() || bare == "," {
4388            return;
4389        }
4390        let is_bareword = bare.bytes().all(|ch| ch.is_ascii_alphanumeric() || ch == b'_')
4391            && bare.as_bytes().first().is_some_and(|ch| ch.is_ascii_alphabetic() || *ch == b'_');
4392        if is_bareword {
4393            out.push(bare.to_string());
4394        }
4395    }
4396
4397    let mut symbols = Vec::new();
4398    for arg in args {
4399        match &arg.kind {
4400            NodeKind::String { value, .. } => push_if_bareword(&mut symbols, value),
4401            NodeKind::Identifier { name } => {
4402                if name.starts_with("qw") {
4403                    let content = name
4404                        .trim_start_matches("qw")
4405                        .trim_start_matches(|c: char| "([{/<|!".contains(c))
4406                        .trim_end_matches(|c: char| ")]}/|!>".contains(c));
4407                    for token in content.split_whitespace() {
4408                        push_if_bareword(&mut symbols, token);
4409                    }
4410                } else {
4411                    push_if_bareword(&mut symbols, name);
4412                }
4413            }
4414            NodeKind::ArrayLiteral { elements } => {
4415                for element in elements {
4416                    if let NodeKind::String { value, .. } = &element.kind {
4417                        push_if_bareword(&mut symbols, value);
4418                    }
4419                }
4420            }
4421            _ => {}
4422        }
4423    }
4424    symbols.sort();
4425    symbols.dedup();
4426    symbols
4427}
4428
4429/// Extract constant names from the `args` field of a `use constant` `NodeKind::Use` node.
4430///
4431/// The parser serialises `use constant` args in two distinct forms:
4432///
4433/// **Scalar form** — `use constant FOO => 42;`
4434///   → args: `["FOO", "42"]`  (the `=>` is consumed by the parser, not stored)
4435///   → The first arg is the constant name; remaining args are the value.
4436///
4437/// **Hash form** — `use constant { FOO => 1, BAR => 2 };`
4438///   → args: `["{", "FOO", "=>", "1", ",", "BAR", "=>", "2", "}"]`
4439///   → Identifiers immediately followed by `=>` are constant names.
4440///
4441/// **qw form** — `use constant qw(FOO BAR);`
4442///   → args: `["qw(FOO BAR)"]`
4443///   → Words inside the qw list are constant names.
4444///
4445/// Returns a deduplicated list of bare constant names (e.g. `["FOO", "BAR"]`).
4446#[cfg(test)]
4447fn extract_constant_names_from_use_args(args: &[String]) -> Vec<String> {
4448    use std::collections::HashSet;
4449
4450    fn push_unique(names: &mut Vec<String>, seen: &mut HashSet<String>, candidate: &str) {
4451        if seen.insert(candidate.to_string()) {
4452            names.push(candidate.to_string());
4453        }
4454    }
4455
4456    fn normalize_constant_name(token: &str) -> Option<&str> {
4457        let stripped = token.trim_matches(|c: char| {
4458            matches!(c, '\'' | '"' | '(' | ')' | '[' | ']' | '{' | '}' | ',' | ';')
4459        });
4460
4461        if stripped.is_empty() || stripped.starts_with('-') {
4462            return None;
4463        }
4464
4465        stripped.chars().all(|c| c.is_alphanumeric() || c == '_').then_some(stripped)
4466    }
4467
4468    let mut names = Vec::new();
4469    let mut seen = HashSet::new();
4470
4471    // Scalar form (most common): args = ["FOO", <value...>]
4472    // The first arg is a plain identifier with no `=>` in args at all.
4473    // Hash form starts with `{`; qw form starts with `qw`.
4474    let first = match args.first() {
4475        Some(f) => f.as_str(),
4476        None => return names,
4477    };
4478
4479    // qw form: single arg starting with "qw"
4480    if first.starts_with("qw") {
4481        let (qw_words, remainder) = extract_qw_words(first);
4482        if remainder.trim().is_empty() {
4483            for word in qw_words {
4484                if let Some(candidate) = normalize_constant_name(&word) {
4485                    push_unique(&mut names, &mut seen, candidate);
4486                }
4487            }
4488            return names;
4489        }
4490
4491        // Fallback for odd tokenisation: tolerate `qw` followed by spacing before the opener.
4492        let content = first.trim_start_matches("qw").trim_start();
4493        let content = content
4494            .trim_start_matches(|c: char| "([{/<|!".contains(c))
4495            .trim_end_matches(|c: char| ")]}/|!>".contains(c));
4496        for word in content.split_whitespace() {
4497            if let Some(candidate) = normalize_constant_name(word) {
4498                push_unique(&mut names, &mut seen, candidate);
4499            }
4500        }
4501        return names;
4502    }
4503
4504    // Hash form: args start with "{", "+{", or "+" followed by "{"
4505    let starts_hash_form = first == "{"
4506        || first == "+{"
4507        || (first == "+" && args.get(1).map(String::as_str) == Some("{"));
4508    if starts_hash_form {
4509        let mut skipped_leading_plus = false;
4510        let mut iter = args.iter().peekable();
4511        while let Some(arg) = iter.next() {
4512            // Some parser/tokenizer variants can emit "+{" as a single token for
4513            // `use constant +{ ... }`. Treat it as structural punctuation.
4514            if arg == "+{" {
4515                skipped_leading_plus = true;
4516                continue;
4517            }
4518            if arg == "+" && !skipped_leading_plus {
4519                skipped_leading_plus = true;
4520                continue;
4521            }
4522            if arg == "{" || arg == "}" || arg == "," || arg == "=>" {
4523                continue;
4524            }
4525            if let Some(candidate) = normalize_constant_name(arg)
4526                && iter.peek().map(|s| s.as_str()) == Some("=>")
4527            {
4528                push_unique(&mut names, &mut seen, candidate);
4529            }
4530        }
4531        return names;
4532    }
4533
4534    // Scalar form: first arg is the constant name (if it is a plain identifier)
4535    // Remaining args are the value and are skipped.
4536    if let Some(candidate) = normalize_constant_name(first) {
4537        push_unique(&mut names, &mut seen, candidate);
4538    }
4539
4540    names
4541}
4542
4543impl Default for WorkspaceIndex {
4544    fn default() -> Self {
4545        Self::new()
4546    }
4547}
4548
4549/// LSP adapter for converting internal Location types to LSP types
4550#[cfg(all(feature = "workspace", feature = "lsp-compat"))]
4551/// LSP adapter utilities for Navigate/Analyze workflows.
4552pub mod lsp_adapter {
4553    use super::Location as IxLocation;
4554    use lsp_types::Location as LspLocation;
4555    // lsp_types uses Uri, not Url
4556    type LspUrl = lsp_types::Uri;
4557
4558    /// Convert an internal location to an LSP Location for Navigate workflows.
4559    ///
4560    /// # Arguments
4561    ///
4562    /// * `ix` - Internal index location with URI and range information.
4563    ///
4564    /// # Returns
4565    ///
4566    /// `Some(LspLocation)` when conversion succeeds, or `None` if URI parsing fails.
4567    ///
4568    /// # Examples
4569    ///
4570    /// ```rust,ignore
4571    /// use perl_parser::workspace_index::{Location as IxLocation, lsp_adapter::to_lsp_location};
4572    /// use lsp_types::Range;
4573    ///
4574    /// let ix_loc = IxLocation { uri: "file:///path.pl".to_string(), range: Range::default() };
4575    /// let _ = to_lsp_location(&ix_loc);
4576    /// ```
4577    pub fn to_lsp_location(ix: &IxLocation) -> Option<LspLocation> {
4578        parse_url(&ix.uri).map(|uri| {
4579            let start =
4580                lsp_types::Position { line: ix.range.start.line, character: ix.range.start.column };
4581            let end =
4582                lsp_types::Position { line: ix.range.end.line, character: ix.range.end.column };
4583            let range = lsp_types::Range { start, end };
4584            LspLocation { uri, range }
4585        })
4586    }
4587
4588    /// Convert multiple index locations to LSP Locations for Navigate/Analyze workflows.
4589    ///
4590    /// # Arguments
4591    ///
4592    /// * `all` - Iterator of internal index locations to convert.
4593    ///
4594    /// # Returns
4595    ///
4596    /// Vector of successfully converted LSP locations, with invalid entries filtered out.
4597    ///
4598    /// # Examples
4599    ///
4600    /// ```rust,ignore
4601    /// use perl_parser::workspace_index::{Location as IxLocation, lsp_adapter::to_lsp_locations};
4602    /// use lsp_types::Range;
4603    ///
4604    /// let locations = vec![IxLocation { uri: "file:///script1.pl".to_string(), range: Range::default() }];
4605    /// let lsp_locations = to_lsp_locations(locations);
4606    /// assert_eq!(lsp_locations.len(), 1);
4607    /// ```
4608    pub fn to_lsp_locations(all: impl IntoIterator<Item = IxLocation>) -> Vec<LspLocation> {
4609        all.into_iter().filter_map(|ix| to_lsp_location(&ix)).collect()
4610    }
4611
4612    #[cfg(not(target_arch = "wasm32"))]
4613    fn parse_url(s: &str) -> Option<LspUrl> {
4614        // lsp_types::Uri uses FromStr, not TryFrom
4615        use std::str::FromStr;
4616
4617        // Try parsing as URI first
4618        LspUrl::from_str(s).ok().or_else(|| {
4619            // Try as a file path if URI parsing fails
4620            std::path::Path::new(s).canonicalize().ok().and_then(|p| {
4621                // Use proper URI construction with percent-encoding
4622                crate::workspace_index::fs_path_to_uri(&p)
4623                    .ok()
4624                    .and_then(|uri_string| LspUrl::from_str(&uri_string).ok())
4625            })
4626        })
4627    }
4628
4629    /// Parse a string as a URL (wasm32 version - no filesystem fallback)
4630    #[cfg(target_arch = "wasm32")]
4631    fn parse_url(s: &str) -> Option<LspUrl> {
4632        use std::str::FromStr;
4633        LspUrl::from_str(s).ok()
4634    }
4635}
4636
4637#[cfg(test)]
4638mod tests {
4639    use super::*;
4640    use perl_tdd_support::{must, must_some};
4641
4642    #[test]
4643    fn test_use_constant_indexed_as_constant_symbol() {
4644        let index = WorkspaceIndex::new();
4645        let uri = "file:///lib/My/Config.pm";
4646        let code = r#"package My::Config;
4647use constant PI => 3.14159;
4648use constant {
4649    MAX_RETRIES => 3,
4650    TIMEOUT     => 30,
4651};
46521;
4653"#;
4654        must(index.index_file(must(url::Url::parse(uri)), code.to_string()));
4655
4656        let symbols = index.file_symbols(uri);
4657        assert!(
4658            symbols.iter().any(|s| s.name == "PI" && s.kind == SymbolKind::Constant),
4659            "PI should be indexed as a Constant symbol; got: {:?}",
4660            symbols.iter().map(|s| (&s.name, &s.kind)).collect::<Vec<_>>()
4661        );
4662        assert!(
4663            symbols.iter().any(|s| s.name == "MAX_RETRIES" && s.kind == SymbolKind::Constant),
4664            "MAX_RETRIES should be indexed"
4665        );
4666        assert!(
4667            symbols.iter().any(|s| s.name == "TIMEOUT" && s.kind == SymbolKind::Constant),
4668            "TIMEOUT should be indexed"
4669        );
4670
4671        // Qualified lookup should also work
4672        let def = index.find_definition("My::Config::PI");
4673        assert!(def.is_some(), "find_definition('My::Config::PI') should succeed");
4674    }
4675
4676    #[test]
4677    fn test_extract_constant_names_deduplicates_qw_form() {
4678        let names = extract_constant_names_from_use_args(&["qw(FOO BAR FOO)".to_string()]);
4679        assert_eq!(names, vec!["FOO", "BAR"]);
4680    }
4681
4682    #[test]
4683    fn test_extract_constant_names_accepts_quoted_scalar_form() {
4684        let names = extract_constant_names_from_use_args(&[
4685            "'HTTP_OK'".to_string(),
4686            "=>".to_string(),
4687            "200".to_string(),
4688        ]);
4689        assert_eq!(names, vec!["HTTP_OK"]);
4690    }
4691
4692    #[test]
4693    fn search_symbols_returns_labeled_generated_framework_members()
4694    -> Result<(), Box<dyn std::error::Error>> {
4695        let index = WorkspaceIndex::new();
4696        let uri = "file:///lib/Generated/Pilot.pm";
4697        let code = r#"package Generated::Pilot;
4698use Moo;
4699has display_name => (is => 'rw');
47001;
4701"#;
4702        must(index.index_file(must(url::Url::parse(uri)), code.to_string()));
4703
4704        let source_symbols = index.search_source_symbols("display_name");
4705        assert!(
4706            source_symbols.is_empty(),
4707            "generated framework members must not enter the exact source-symbol slice"
4708        );
4709        let trimmed_source_symbols = index.search_source_symbols("  display_name  ");
4710        assert!(
4711            trimmed_source_symbols.is_empty(),
4712            "trimmed generated framework member queries must not enter the exact source-symbol slice"
4713        );
4714
4715        let generated_symbols = index.search_generated_workspace_symbols("display_name");
4716        assert_eq!(generated_symbols.len(), 1);
4717        let trimmed_generated_symbols =
4718            index.search_generated_workspace_symbols("  display_name  ");
4719        assert_eq!(trimmed_generated_symbols.len(), 1);
4720        assert_eq!(trimmed_generated_symbols[0].name, "display_name [generated/framework]");
4721        assert!(index.search_generated_workspace_symbols("   ").is_empty());
4722        let symbol = &generated_symbols[0];
4723        assert_eq!(symbol.name, "display_name [generated/framework]");
4724        assert_eq!(symbol.kind, SymbolKind::Method);
4725        assert_eq!(symbol.qualified_name.as_deref(), Some("Generated::Pilot::display_name"));
4726        assert_eq!(
4727            symbol.container_name.as_deref(),
4728            Some("Generated::Pilot [generated/framework]")
4729        );
4730        assert!(!symbol.has_body);
4731        assert_eq!(symbol.uri, uri);
4732        assert!(
4733            symbol.range.end.byte > symbol.range.start.byte,
4734            "generated symbol must be anchored to the source framework declaration"
4735        );
4736
4737        let live_symbols = index.search_symbols("display_name");
4738        assert!(
4739            live_symbols.is_empty(),
4740            "general workspace index search must stay source-backed; generated pilot symbols are opt-in"
4741        );
4742
4743        {
4744            let mut shards = index.fact_shards.write();
4745            let shard = shards.values_mut().next().ok_or("missing generated-member shard")?;
4746            let entity = shard
4747                .entities
4748                .iter_mut()
4749                .find(|entity| entity.canonical_name == "Generated::Pilot::display_name")
4750                .ok_or("missing generated member entity")?;
4751            entity.provenance = Provenance::ExactAst;
4752        }
4753        let non_framework_symbols = index.search_generated_workspace_symbols("display_name");
4754        assert!(
4755            non_framework_symbols.is_empty(),
4756            "generated workspace-symbol pilot must require framework-synthesis provenance"
4757        );
4758        Ok(())
4759    }
4760
4761    #[test]
4762    fn search_symbols_returns_labeled_predicate_generated_members()
4763    -> Result<(), Box<dyn std::error::Error>> {
4764        let index = WorkspaceIndex::new();
4765        let uri = "file:///lib/Generated/PredicatePilot.pm";
4766        let code = r#"package Generated::PredicatePilot;
4767use Moo;
4768has status => (is => 'rw', predicate => 1);
47691;
4770"#;
4771        must(index.index_file(must(url::Url::parse(uri)), code.to_string()));
4772
4773        let source_symbols = index.search_source_symbols("has_status");
4774        assert!(
4775            source_symbols.is_empty(),
4776            "predicate generated members must not enter the exact source-symbol slice"
4777        );
4778
4779        let generated_symbols = index.search_generated_workspace_symbols("has_status");
4780        assert_eq!(generated_symbols.len(), 1);
4781        let symbol = &generated_symbols[0];
4782        assert_eq!(symbol.name, "has_status [generated/framework]");
4783        assert_eq!(symbol.kind, SymbolKind::Method);
4784        assert_eq!(symbol.qualified_name.as_deref(), Some("Generated::PredicatePilot::has_status"));
4785        assert_eq!(
4786            symbol.container_name.as_deref(),
4787            Some("Generated::PredicatePilot [generated/framework]")
4788        );
4789        assert!(!symbol.has_body);
4790        assert_eq!(symbol.uri, uri);
4791        assert!(
4792            symbol.range.end.byte > symbol.range.start.byte,
4793            "predicate generated symbol must be anchored to the source framework declaration"
4794        );
4795
4796        let live_symbols = index.search_symbols("has_status");
4797        assert!(
4798            live_symbols.is_empty(),
4799            "general workspace index search must stay source-backed for predicate generated members"
4800        );
4801        Ok(())
4802    }
4803
4804    #[test]
4805    fn test_extract_constant_names_accepts_quoted_hash_form() {
4806        let names = extract_constant_names_from_use_args(&[
4807            "{".to_string(),
4808            "'FOO'".to_string(),
4809            "=>".to_string(),
4810            "1".to_string(),
4811            ",".to_string(),
4812            "\"BAR\"".to_string(),
4813            "=>".to_string(),
4814            "2".to_string(),
4815            "}".to_string(),
4816        ]);
4817        assert_eq!(names, vec!["FOO", "BAR"]);
4818    }
4819
4820    #[test]
4821    fn test_extract_constant_names_accepts_plus_hash_form_split_tokens() {
4822        let names = extract_constant_names_from_use_args(&[
4823            "+".to_string(),
4824            "{".to_string(),
4825            "FOO".to_string(),
4826            "=>".to_string(),
4827            "1".to_string(),
4828            ",".to_string(),
4829            "BAR".to_string(),
4830            "=>".to_string(),
4831            "2".to_string(),
4832            "}".to_string(),
4833        ]);
4834        assert_eq!(names, vec!["FOO", "BAR"]);
4835    }
4836
4837    #[test]
4838    fn test_extract_constant_names_accepts_plus_hash_form_combined_token() {
4839        let names = extract_constant_names_from_use_args(&[
4840            "+{".to_string(),
4841            "FOO".to_string(),
4842            "=>".to_string(),
4843            "1".to_string(),
4844            ",".to_string(),
4845            "BAR".to_string(),
4846            "=>".to_string(),
4847            "2".to_string(),
4848            "}".to_string(),
4849        ]);
4850        assert_eq!(names, vec!["FOO", "BAR"]);
4851    }
4852    #[test]
4853    fn test_use_constant_duplicate_names_indexed_once() {
4854        let index = WorkspaceIndex::new();
4855        let uri = "file:///lib/My/DedupConfig.pm";
4856        let code = r#"package My::DedupConfig;
4857use constant {
4858    RETRY_COUNT => 3,
4859    RETRY_COUNT => 5,
4860};
48611;
4862"#;
4863        must(index.index_file(must(url::Url::parse(uri)), code.to_string()));
4864
4865        let symbols = index.file_symbols(uri);
4866        let retry_count_symbols = symbols.iter().filter(|s| s.name == "RETRY_COUNT").count();
4867        assert_eq!(
4868            retry_count_symbols, 1,
4869            "RETRY_COUNT should be indexed once even when repeated in use constant hash form"
4870        );
4871    }
4872
4873    #[test]
4874    fn test_use_constant_plus_hash_form_indexes_keys() {
4875        let index = WorkspaceIndex::new();
4876        let uri = "file:///lib/My/PlusHash.pm";
4877        let code = r#"package My::PlusHash;
4878use constant +{
4879    FOO => 1,
4880    BAR => 2,
4881};
48821;
4883"#;
4884        must(index.index_file(must(url::Url::parse(uri)), code.to_string()));
4885
4886        assert!(index.find_definition("My::PlusHash::FOO").is_some());
4887        assert!(index.find_definition("My::PlusHash::BAR").is_some());
4888    }
4889
4890    #[test]
4891    fn test_basic_indexing() {
4892        let index = WorkspaceIndex::new();
4893        let uri = "file:///test.pl";
4894
4895        let code = r#"
4896package MyPackage;
4897
4898sub hello {
4899    print "Hello";
4900}
4901
4902my $var = 42;
4903"#;
4904
4905        must(index.index_file(must(url::Url::parse(uri)), code.to_string()));
4906
4907        // Should have indexed the package and subroutine
4908        let symbols = index.file_symbols(uri);
4909        assert!(symbols.iter().any(|s| s.name == "MyPackage" && s.kind == SymbolKind::Package));
4910        assert!(symbols.iter().any(|s| s.name == "hello" && s.kind == SymbolKind::Subroutine));
4911        assert!(symbols.iter().any(|s| s.name == "$var" && s.kind.is_variable()));
4912    }
4913
4914    #[test]
4915    fn test_package_symbol_has_no_container_name() {
4916        // Regression: project_symbol_declarations used to set container_name = Some("main")
4917        // for top-level package declarations because the IndexVisitor starts with
4918        // current_package = Some("main").  Package symbols are top-level declarations
4919        // and must have container_name = None.
4920        let index = WorkspaceIndex::new();
4921        let uri = "file:///lib/Foo.pm";
4922        let code = "package Foo;\nsub bar { }\n";
4923        must(index.index_file(must(url::Url::parse(uri)), code.to_string()));
4924
4925        let symbols = index.file_symbols(uri);
4926        let pkg_sym =
4927            must_some(symbols.iter().find(|s| s.name == "Foo" && s.kind == SymbolKind::Package));
4928        assert_eq!(
4929            pkg_sym.container_name, None,
4930            "Package symbol must not carry a container (was 'main')"
4931        );
4932    }
4933
4934    #[test]
4935    fn test_file_packages_returns_only_package_symbol_names() {
4936        let index = WorkspaceIndex::new();
4937        let uri = "file:///lib/OnlyPackages.pm";
4938        let code = "package Foo;\nsub hello { 1 }\npackage Bar { sub greet { 2 } }\n";
4939        must(index.index_file(must(url::Url::parse(uri)), code.to_string()));
4940
4941        let mut package_names = index.file_packages(uri);
4942        package_names.sort();
4943        let mut expected_package_names: Vec<String> = index
4944            .file_symbols(uri)
4945            .into_iter()
4946            .filter(|s| s.kind == SymbolKind::Package)
4947            .map(|s| s.name)
4948            .collect();
4949        expected_package_names.sort();
4950
4951        assert_eq!(package_names, expected_package_names);
4952        assert_eq!(package_names, vec!["Bar", "Foo"]);
4953        assert!(!package_names.iter().any(|name| name == "hello"));
4954        assert!(!package_names.iter().any(|name| name == "greet"));
4955    }
4956
4957    #[test]
4958    fn test_file_package_symbols_returns_exact_container_match() {
4959        let index = WorkspaceIndex::new();
4960        let uri = "file:///lib/PackageMembers.pm";
4961        let code = "package Foo;\nsub hello { 1 }\npackage Bar;\nsub greet { 2 }\n";
4962        must(index.index_file(must(url::Url::parse(uri)), code.to_string()));
4963
4964        let all_symbols = index.file_symbols(uri);
4965        let package_name = "Bar";
4966        let greet_symbol = must_some(all_symbols.iter().find(|s| s.name == "greet"));
4967        let bar_package = must_some(
4968            all_symbols.iter().find(|s| s.name == "Bar" && s.kind == SymbolKind::Package),
4969        );
4970        assert!(WorkspaceIndex::symbol_belongs_to_package(greet_symbol, package_name));
4971        assert!(!WorkspaceIndex::symbol_belongs_to_package(greet_symbol, "Foo"));
4972        assert!(!WorkspaceIndex::symbol_belongs_to_package(bar_package, package_name));
4973
4974        let mut expected_bar_names: Vec<String> = all_symbols
4975            .iter()
4976            .filter(|s| s.container_name.as_deref() == Some(package_name))
4977            .map(|s| s.name.clone())
4978            .collect();
4979        expected_bar_names.sort();
4980
4981        let mut bar_names: Vec<String> =
4982            index.file_package_symbols(uri, package_name).into_iter().map(|s| s.name).collect();
4983        bar_names.sort();
4984        assert_eq!(bar_names, expected_bar_names);
4985        assert_eq!(bar_names, vec!["greet"]);
4986
4987        let mut foo_names: Vec<String> =
4988            index.file_package_symbols(uri, "Foo").into_iter().map(|s| s.name).collect();
4989        foo_names.sort();
4990        assert_eq!(foo_names, vec!["hello"]);
4991        assert!(index.file_package_symbols(uri, "Missing").is_empty());
4992    }
4993
4994    #[test]
4995    fn test_my_variable_has_no_qualified_name() {
4996        // Regression: project_symbol_declarations used to set qualified_name = Some("Foo::x")
4997        // for `my $x` inside `package Foo`, making `find_definition("Foo::x")` return the
4998        // lexical variable.  `my` variables are not package-visible and must have
4999        // qualified_name = None so qualified lookups don't match them.
5000        let index = WorkspaceIndex::new();
5001        let uri = "file:///lib/Foo.pm";
5002        let code = "package Foo;\nsub bar { my $x = 1; }\n";
5003        must(index.index_file(must(url::Url::parse(uri)), code.to_string()));
5004
5005        let symbols = index.file_symbols(uri);
5006        let var_sym = must_some(symbols.iter().find(|s| s.name == "$x" && s.kind.is_variable()));
5007        assert_eq!(var_sym.qualified_name, None, "my variable must not have a qualified_name");
5008
5009        // `find_definition("Foo::x")` must not accidentally resolve to a lexical variable.
5010        assert!(
5011            index.find_definition("Foo::x").is_none(),
5012            "find_definition(\"Foo::x\") must not return a lexical my variable"
5013        );
5014    }
5015
5016    fn reference_kinds_for(
5017        index: &WorkspaceIndex,
5018        uri: &str,
5019        symbol_name: &str,
5020    ) -> Vec<ReferenceKind> {
5021        let files = index.files.read();
5022        let file = must_some(files.get(uri));
5023        file.references
5024            .get(symbol_name)
5025            .map(|refs| refs.iter().map(|r| r.kind).collect())
5026            .unwrap_or_default()
5027    }
5028
5029    #[test]
5030    fn test_reference_kinds_sub_definition_and_call_are_distinct() {
5031        let index = WorkspaceIndex::new();
5032        let uri = "file:///typed-refs-sub.pl";
5033        let code = "package TypedRefs;
5034sub foo { return 1; }
5035foo();
5036";
5037        must(index.index_file(must(url::Url::parse(uri)), code.to_string()));
5038
5039        let kinds = reference_kinds_for(&index, uri, "foo");
5040        assert!(kinds.contains(&ReferenceKind::Definition));
5041        assert!(kinds.contains(&ReferenceKind::Usage));
5042    }
5043
5044    #[test]
5045    fn test_reference_kinds_variable_read_and_write_are_distinct() {
5046        let index = WorkspaceIndex::new();
5047        let uri = "file:///typed-refs-var.pl";
5048        let code = "my $value = 1;
5049$value = 2;
5050print $value;
5051";
5052        must(index.index_file(must(url::Url::parse(uri)), code.to_string()));
5053
5054        let kinds = reference_kinds_for(&index, uri, "$value");
5055        assert!(kinds.contains(&ReferenceKind::Definition));
5056        assert!(kinds.contains(&ReferenceKind::Write));
5057        assert!(kinds.contains(&ReferenceKind::Read));
5058    }
5059
5060    #[test]
5061    fn test_reference_kinds_import_parent_and_export_ok_are_currently_import_only() {
5062        let index = WorkspaceIndex::new();
5063        let uri = "file:///typed-refs-import-export.pm";
5064        let code = "package Child;
5065use parent 'Base';
5066our @EXPORT_OK = qw(foo);
50671;
5068";
5069        must(index.index_file(must(url::Url::parse(uri)), code.to_string()));
5070
5071        let parent_kinds = reference_kinds_for(&index, uri, "Base");
5072        assert!(
5073            parent_kinds.is_empty(),
5074            "use parent inheritance edges are currently not stored as typed references"
5075        );
5076
5077        let export_symbol_kinds = reference_kinds_for(&index, uri, "foo");
5078        assert!(
5079            export_symbol_kinds.is_empty(),
5080            "EXPORT_OK entries are currently not represented as reference edges"
5081        );
5082    }
5083
5084    #[test]
5085    fn test_reference_kinds_dynamic_and_meta_edges_are_not_typed_yet() {
5086        let index = WorkspaceIndex::new();
5087        let uri = "file:///typed-refs-dynamic.pl";
5088        let code = r#"package TypedRefs;
5089sub foo { 1 }
5090&foo;
5091my $code = \&foo;
5092goto &foo;
5093*alias = \&foo;
5094eval "foo()";
5095with 'RoleName';
5096has 'name' => (is => 'ro');
50971;
5098"#;
5099        must(index.index_file(must(url::Url::parse(uri)), code.to_string()));
5100
5101        let foo_kinds = reference_kinds_for(&index, uri, "foo");
5102        assert!(
5103            foo_kinds
5104                .iter()
5105                .all(|kind| matches!(kind, ReferenceKind::Definition | ReferenceKind::Usage)),
5106            r"dynamic call forms (&foo, \&foo, goto &foo) are currently flattened to Usage"
5107        );
5108
5109        assert!(
5110            reference_kinds_for(&index, uri, "RoleName").is_empty(),
5111            "role composition edges (`with 'RoleName'`) are not indexed as typed references yet"
5112        );
5113    }
5114
5115    #[test]
5116    fn test_find_references() {
5117        let index = WorkspaceIndex::new();
5118        let uri = "file:///test.pl";
5119
5120        let code = r#"
5121sub test {
5122    my $x = 1;
5123    $x = 2;
5124    print $x;
5125}
5126"#;
5127
5128        must(index.index_file(must(url::Url::parse(uri)), code.to_string()));
5129
5130        let refs = index.find_references("$x");
5131        assert!(refs.len() >= 2); // Definition + at least one usage
5132    }
5133
5134    #[test]
5135    fn test_find_references_bare_name_includes_qualified_calls() {
5136        let index = WorkspaceIndex::new();
5137        let uri = "file:///refs.pl";
5138        let code = r#"
5139package RefDemo;
5140sub helper {
5141    return 1;
5142}
5143
5144helper();
5145RefDemo::helper();
5146"#;
5147
5148        must(index.index_file(must(url::Url::parse(uri)), code.to_string()));
5149
5150        let bare_refs = index.find_references("helper");
5151        let qualified_refs = index.find_references("RefDemo::helper");
5152
5153        assert!(
5154            bare_refs.len() >= qualified_refs.len(),
5155            "bare-name reference lookup should include qualified calls"
5156        );
5157    }
5158
5159    #[test]
5160    fn test_count_usages_bare_name_includes_qualified_calls() {
5161        let index = WorkspaceIndex::new();
5162        let uri = "file:///usage.pl";
5163        let code = r#"
5164package UsageDemo;
5165sub helper {
5166    return 1;
5167}
5168
5169helper();
5170UsageDemo::helper();
5171"#;
5172
5173        must(index.index_file(must(url::Url::parse(uri)), code.to_string()));
5174
5175        let bare_usage_count = index.count_usages("helper");
5176        let qualified_usage_count = index.count_usages("UsageDemo::helper");
5177
5178        assert!(
5179            bare_usage_count >= qualified_usage_count,
5180            "bare-name usage count should include qualified call sites"
5181        );
5182    }
5183
5184    #[test]
5185    fn test_dependencies() {
5186        let index = WorkspaceIndex::new();
5187        let uri = "file:///test.pl";
5188
5189        let code = r#"
5190use strict;
5191use warnings;
5192use Data::Dumper;
5193"#;
5194
5195        must(index.index_file(must(url::Url::parse(uri)), code.to_string()));
5196
5197        let deps = index.file_dependencies(uri);
5198        assert!(deps.contains("strict"));
5199        assert!(deps.contains("warnings"));
5200        assert!(deps.contains("Data::Dumper"));
5201    }
5202
5203    #[test]
5204    fn test_uri_to_fs_path_basic() {
5205        // Test basic file:// URI conversion
5206        if let Some(path) = uri_to_fs_path("file:///tmp/test.pl") {
5207            assert_eq!(path, std::path::PathBuf::from("/tmp/test.pl"));
5208        }
5209
5210        // Test with invalid URI
5211        assert!(uri_to_fs_path("not-a-uri").is_none());
5212
5213        // Test with non-file scheme
5214        assert!(uri_to_fs_path("http://example.com").is_none());
5215    }
5216
5217    #[test]
5218    fn test_uri_to_fs_path_with_spaces() {
5219        // Test with percent-encoded spaces
5220        if let Some(path) = uri_to_fs_path("file:///tmp/path%20with%20spaces/test.pl") {
5221            assert_eq!(path, std::path::PathBuf::from("/tmp/path with spaces/test.pl"));
5222        }
5223
5224        // Test with multiple spaces and special characters
5225        if let Some(path) = uri_to_fs_path("file:///tmp/My%20Documents/test%20file.pl") {
5226            assert_eq!(path, std::path::PathBuf::from("/tmp/My Documents/test file.pl"));
5227        }
5228    }
5229
5230    #[test]
5231    fn test_uri_to_fs_path_with_unicode() {
5232        // Test with Unicode characters (percent-encoded)
5233        if let Some(path) = uri_to_fs_path("file:///tmp/caf%C3%A9/test.pl") {
5234            assert_eq!(path, std::path::PathBuf::from("/tmp/café/test.pl"));
5235        }
5236
5237        // Test with Unicode emoji (percent-encoded)
5238        if let Some(path) = uri_to_fs_path("file:///tmp/emoji%F0%9F%98%80/test.pl") {
5239            assert_eq!(path, std::path::PathBuf::from("/tmp/emoji😀/test.pl"));
5240        }
5241    }
5242
5243    #[test]
5244    fn test_fs_path_to_uri_basic() {
5245        // Test basic path to URI conversion
5246        let result = fs_path_to_uri("/tmp/test.pl");
5247        assert!(result.is_ok());
5248        let uri = must(result);
5249        assert!(uri.starts_with("file://"));
5250        assert!(uri.contains("/tmp/test.pl"));
5251    }
5252
5253    #[test]
5254    fn test_fs_path_to_uri_with_spaces() {
5255        // Test path with spaces
5256        let result = fs_path_to_uri("/tmp/path with spaces/test.pl");
5257        assert!(result.is_ok());
5258        let uri = must(result);
5259        assert!(uri.starts_with("file://"));
5260        // Should contain percent-encoded spaces
5261        assert!(uri.contains("path%20with%20spaces"));
5262    }
5263
5264    #[test]
5265    fn test_fs_path_to_uri_with_unicode() {
5266        // Test path with Unicode characters
5267        let result = fs_path_to_uri("/tmp/café/test.pl");
5268        assert!(result.is_ok());
5269        let uri = must(result);
5270        assert!(uri.starts_with("file://"));
5271        // Should contain percent-encoded Unicode
5272        assert!(uri.contains("caf%C3%A9"));
5273    }
5274
5275    #[test]
5276    fn test_normalize_uri_file_schemes() {
5277        // Test normalization of valid file URIs
5278        let uri = WorkspaceIndex::normalize_uri("file:///tmp/test.pl");
5279        assert_eq!(uri, "file:///tmp/test.pl");
5280
5281        // Test normalization of URIs with spaces
5282        let uri = WorkspaceIndex::normalize_uri("file:///tmp/path%20with%20spaces/test.pl");
5283        assert_eq!(uri, "file:///tmp/path%20with%20spaces/test.pl");
5284    }
5285
5286    #[test]
5287    fn test_normalize_uri_absolute_paths() {
5288        // Test normalization of absolute paths (convert to file:// URI)
5289        let uri = WorkspaceIndex::normalize_uri("/tmp/test.pl");
5290        assert!(uri.starts_with("file://"));
5291        assert!(uri.contains("/tmp/test.pl"));
5292    }
5293
5294    #[test]
5295    fn test_normalize_uri_special_schemes() {
5296        // Test that special schemes like untitled: are preserved
5297        let uri = WorkspaceIndex::normalize_uri("untitled:Untitled-1");
5298        assert_eq!(uri, "untitled:Untitled-1");
5299    }
5300
5301    #[test]
5302    fn test_roundtrip_conversion() {
5303        // Test that URI -> path -> URI conversion preserves the URI
5304        let original_uri = "file:///tmp/path%20with%20spaces/caf%C3%A9.pl";
5305
5306        if let Some(path) = uri_to_fs_path(original_uri) {
5307            if let Ok(converted_uri) = fs_path_to_uri(&path) {
5308                // Should be able to round-trip back to an equivalent URI
5309                assert!(converted_uri.starts_with("file://"));
5310
5311                // The path component should decode correctly
5312                if let Some(roundtrip_path) = uri_to_fs_path(&converted_uri) {
5313                    #[cfg(windows)]
5314                    if let Ok(rootless) = path.strip_prefix(std::path::Path::new(r"\")) {
5315                        assert!(roundtrip_path.ends_with(rootless));
5316                    } else {
5317                        assert_eq!(path, roundtrip_path);
5318                    }
5319
5320                    #[cfg(not(windows))]
5321                    assert_eq!(path, roundtrip_path);
5322                }
5323            }
5324        }
5325    }
5326
5327    #[cfg(target_os = "windows")]
5328    #[test]
5329    fn test_windows_paths() {
5330        // Test Windows-style paths
5331        let result = fs_path_to_uri(r"C:\Users\test\Documents\script.pl");
5332        assert!(result.is_ok());
5333        let uri = must(result);
5334        assert!(uri.starts_with("file://"));
5335
5336        // Test Windows path with spaces
5337        let result = fs_path_to_uri(r"C:\Program Files\My App\script.pl");
5338        assert!(result.is_ok());
5339        let uri = must(result);
5340        assert!(uri.starts_with("file://"));
5341        assert!(uri.contains("Program%20Files"));
5342    }
5343
5344    // ========================================================================
5345    // IndexCoordinator Tests
5346    // ========================================================================
5347
5348    #[test]
5349    fn test_coordinator_initial_state() {
5350        let coordinator = IndexCoordinator::new();
5351        assert!(matches!(
5352            coordinator.state(),
5353            IndexState::Building { phase: IndexPhase::Idle, .. }
5354        ));
5355    }
5356
5357    #[test]
5358    fn test_transition_to_scanning_phase() {
5359        let coordinator = IndexCoordinator::new();
5360        coordinator.transition_to_scanning();
5361
5362        let state = coordinator.state();
5363        assert!(
5364            matches!(state, IndexState::Building { phase: IndexPhase::Scanning, .. }),
5365            "Expected Building state after scanning, got: {:?}",
5366            state
5367        );
5368    }
5369
5370    #[test]
5371    fn test_transition_to_indexing_phase() {
5372        let coordinator = IndexCoordinator::new();
5373        coordinator.transition_to_scanning();
5374        coordinator.update_scan_progress(3);
5375        coordinator.transition_to_indexing(3);
5376
5377        let state = coordinator.state();
5378        assert!(
5379            matches!(
5380                state,
5381                IndexState::Building { phase: IndexPhase::Indexing, total_count: 3, .. }
5382            ),
5383            "Expected Building state after indexing with total_count 3, got: {:?}",
5384            state
5385        );
5386    }
5387
5388    #[test]
5389    fn test_transition_to_ready() {
5390        let coordinator = IndexCoordinator::new();
5391        coordinator.transition_to_ready(100, 5000);
5392
5393        let state = coordinator.state();
5394        if let IndexState::Ready { file_count, symbol_count, .. } = state {
5395            assert_eq!(file_count, 100);
5396            assert_eq!(symbol_count, 5000);
5397        } else {
5398            unreachable!("Expected Ready state, got: {:?}", state);
5399        }
5400    }
5401
5402    #[test]
5403    fn test_parse_storm_degradation() {
5404        let coordinator = IndexCoordinator::new();
5405        coordinator.transition_to_ready(100, 5000);
5406
5407        // Trigger parse storm
5408        for _ in 0..15 {
5409            coordinator.notify_change("file.pm");
5410        }
5411
5412        let state = coordinator.state();
5413        assert!(
5414            matches!(state, IndexState::Degraded { .. }),
5415            "Expected Degraded state, got: {:?}",
5416            state
5417        );
5418        if let IndexState::Degraded { reason, .. } = state {
5419            assert!(matches!(reason, DegradationReason::ParseStorm { .. }));
5420        }
5421    }
5422
5423    #[test]
5424    fn test_recovery_from_parse_storm() {
5425        let coordinator = IndexCoordinator::new();
5426        coordinator.transition_to_ready(100, 5000);
5427
5428        // Trigger parse storm
5429        for _ in 0..15 {
5430            coordinator.notify_change("file.pm");
5431        }
5432
5433        // Complete all parses
5434        for _ in 0..15 {
5435            coordinator.notify_parse_complete("file.pm");
5436        }
5437
5438        // Should recover to Building state
5439        assert!(matches!(coordinator.state(), IndexState::Building { .. }));
5440    }
5441
5442    #[test]
5443    fn test_query_dispatch_ready() {
5444        let coordinator = IndexCoordinator::new();
5445        coordinator.transition_to_ready(100, 5000);
5446
5447        let result = coordinator.query(|_index| "full_query", |_index| "partial_query");
5448
5449        assert_eq!(result, "full_query");
5450    }
5451
5452    #[test]
5453    fn test_query_dispatch_degraded() {
5454        let coordinator = IndexCoordinator::new();
5455        // Building state should use partial query
5456
5457        let result = coordinator.query(|_index| "full_query", |_index| "partial_query");
5458
5459        assert_eq!(result, "partial_query");
5460    }
5461
5462    #[test]
5463    fn test_metrics_pending_count() {
5464        let coordinator = IndexCoordinator::new();
5465
5466        coordinator.notify_change("file1.pm");
5467        coordinator.notify_change("file2.pm");
5468
5469        assert_eq!(coordinator.metrics.pending_count(), 2);
5470
5471        coordinator.notify_parse_complete("file1.pm");
5472        assert_eq!(coordinator.metrics.pending_count(), 1);
5473    }
5474
5475    #[test]
5476    fn test_instrumentation_records_transitions() {
5477        let coordinator = IndexCoordinator::new();
5478        coordinator.transition_to_ready(10, 100);
5479
5480        let snapshot = coordinator.instrumentation_snapshot();
5481        let transition =
5482            IndexStateTransition { from: IndexStateKind::Building, to: IndexStateKind::Ready };
5483        let count = snapshot.state_transition_counts.get(&transition).copied().unwrap_or(0);
5484        assert_eq!(count, 1);
5485    }
5486
5487    #[test]
5488    fn test_instrumentation_records_early_exit() {
5489        let coordinator = IndexCoordinator::new();
5490        coordinator.record_early_exit(EarlyExitReason::InitialTimeBudget, 25, 1, 10);
5491
5492        let snapshot = coordinator.instrumentation_snapshot();
5493        let count = snapshot
5494            .early_exit_counts
5495            .get(&EarlyExitReason::InitialTimeBudget)
5496            .copied()
5497            .unwrap_or(0);
5498        assert_eq!(count, 1);
5499        assert!(snapshot.last_early_exit.is_some());
5500    }
5501
5502    #[test]
5503    fn test_custom_limits() {
5504        let limits = IndexResourceLimits {
5505            max_files: 5000,
5506            max_symbols_per_file: 1000,
5507            max_total_symbols: 100_000,
5508            max_ast_cache_bytes: 128 * 1024 * 1024,
5509            max_ast_cache_items: 50,
5510            max_scan_duration_ms: 30_000,
5511        };
5512
5513        let coordinator = IndexCoordinator::with_limits(limits.clone());
5514        assert_eq!(coordinator.limits.max_files, 5000);
5515        assert_eq!(coordinator.limits.max_total_symbols, 100_000);
5516    }
5517
5518    #[test]
5519    fn test_degradation_preserves_symbol_count() {
5520        let coordinator = IndexCoordinator::new();
5521        coordinator.transition_to_ready(100, 5000);
5522
5523        coordinator.transition_to_degraded(DegradationReason::IoError {
5524            message: "Test error".to_string(),
5525        });
5526
5527        let state = coordinator.state();
5528        assert!(
5529            matches!(state, IndexState::Degraded { .. }),
5530            "Expected Degraded state, got: {:?}",
5531            state
5532        );
5533        if let IndexState::Degraded { available_symbols, .. } = state {
5534            assert_eq!(available_symbols, 5000);
5535        }
5536    }
5537
5538    #[test]
5539    fn test_index_access() {
5540        let coordinator = IndexCoordinator::new();
5541        let index = coordinator.index();
5542
5543        // Should have access to underlying WorkspaceIndex
5544        assert!(index.all_symbols().is_empty());
5545    }
5546
5547    #[test]
5548    fn test_resource_limit_enforcement_max_files() {
5549        let limits = IndexResourceLimits {
5550            max_files: 5,
5551            max_symbols_per_file: 1000,
5552            max_total_symbols: 50_000,
5553            max_ast_cache_bytes: 128 * 1024 * 1024,
5554            max_ast_cache_items: 50,
5555            max_scan_duration_ms: 30_000,
5556        };
5557
5558        let coordinator = IndexCoordinator::with_limits(limits);
5559        coordinator.transition_to_ready(10, 100);
5560
5561        // Index 10 files (exceeds limit of 5)
5562        for i in 0..10 {
5563            let uri_str = format!("file:///test{}.pl", i);
5564            let uri = must(url::Url::parse(&uri_str));
5565            let code = "sub test { }";
5566            must(coordinator.index().index_file(uri, code.to_string()));
5567        }
5568
5569        // Enforce limits
5570        coordinator.enforce_limits();
5571
5572        let state = coordinator.state();
5573        assert!(
5574            matches!(
5575                state,
5576                IndexState::Degraded {
5577                    reason: DegradationReason::ResourceLimit { kind: ResourceKind::MaxFiles },
5578                    ..
5579                }
5580            ),
5581            "Expected Degraded state with ResourceLimit(MaxFiles), got: {:?}",
5582            state
5583        );
5584    }
5585
5586    #[test]
5587    fn test_resource_limit_enforcement_max_symbols() {
5588        let limits = IndexResourceLimits {
5589            max_files: 100,
5590            max_symbols_per_file: 10,
5591            max_total_symbols: 50, // Very low limit for testing
5592            max_ast_cache_bytes: 128 * 1024 * 1024,
5593            max_ast_cache_items: 50,
5594            max_scan_duration_ms: 30_000,
5595        };
5596
5597        let coordinator = IndexCoordinator::with_limits(limits);
5598        coordinator.transition_to_ready(0, 0);
5599
5600        // Index files with many symbols to exceed total symbol limit
5601        for i in 0..10 {
5602            let uri_str = format!("file:///test{}.pl", i);
5603            let uri = must(url::Url::parse(&uri_str));
5604            // Each file has 10 subroutines = 100 total symbols (exceeds limit of 50)
5605            let code = r#"
5606package Test;
5607sub sub1 { }
5608sub sub2 { }
5609sub sub3 { }
5610sub sub4 { }
5611sub sub5 { }
5612sub sub6 { }
5613sub sub7 { }
5614sub sub8 { }
5615sub sub9 { }
5616sub sub10 { }
5617"#;
5618            must(coordinator.index().index_file(uri, code.to_string()));
5619        }
5620
5621        // Enforce limits
5622        coordinator.enforce_limits();
5623
5624        let state = coordinator.state();
5625        assert!(
5626            matches!(
5627                state,
5628                IndexState::Degraded {
5629                    reason: DegradationReason::ResourceLimit { kind: ResourceKind::MaxSymbols },
5630                    ..
5631                }
5632            ),
5633            "Expected Degraded state with ResourceLimit(MaxSymbols), got: {:?}",
5634            state
5635        );
5636    }
5637
5638    #[test]
5639    fn test_check_limits_returns_none_within_bounds() {
5640        let coordinator = IndexCoordinator::new();
5641        coordinator.transition_to_ready(0, 0);
5642
5643        // Index a few files well within default limits
5644        for i in 0..5 {
5645            let uri_str = format!("file:///test{}.pl", i);
5646            let uri = must(url::Url::parse(&uri_str));
5647            let code = "sub test { }";
5648            must(coordinator.index().index_file(uri, code.to_string()));
5649        }
5650
5651        // Should not trigger degradation
5652        let limit_check = coordinator.check_limits();
5653        assert!(limit_check.is_none(), "check_limits should return None when within bounds");
5654
5655        // State should still be Ready
5656        assert!(
5657            matches!(coordinator.state(), IndexState::Ready { .. }),
5658            "State should remain Ready when within limits"
5659        );
5660    }
5661
5662    #[test]
5663    fn test_enforce_limits_called_on_transition_to_ready() {
5664        let limits = IndexResourceLimits {
5665            max_files: 3,
5666            max_symbols_per_file: 1000,
5667            max_total_symbols: 50_000,
5668            max_ast_cache_bytes: 128 * 1024 * 1024,
5669            max_ast_cache_items: 50,
5670            max_scan_duration_ms: 30_000,
5671        };
5672
5673        let coordinator = IndexCoordinator::with_limits(limits);
5674
5675        // Index files before transitioning to ready
5676        for i in 0..5 {
5677            let uri_str = format!("file:///test{}.pl", i);
5678            let uri = must(url::Url::parse(&uri_str));
5679            let code = "sub test { }";
5680            must(coordinator.index().index_file(uri, code.to_string()));
5681        }
5682
5683        // Transition to ready - should automatically enforce limits
5684        coordinator.transition_to_ready(5, 100);
5685
5686        let state = coordinator.state();
5687        assert!(
5688            matches!(
5689                state,
5690                IndexState::Degraded {
5691                    reason: DegradationReason::ResourceLimit { kind: ResourceKind::MaxFiles },
5692                    ..
5693                }
5694            ),
5695            "Expected Degraded state after transition_to_ready with exceeded limits, got: {:?}",
5696            state
5697        );
5698    }
5699
5700    #[test]
5701    fn test_state_transition_guard_ready_to_ready() {
5702        // Test that Ready → Ready is allowed (metrics update)
5703        let coordinator = IndexCoordinator::new();
5704        coordinator.transition_to_ready(100, 5000);
5705
5706        // Transition to Ready again with different metrics
5707        coordinator.transition_to_ready(150, 7500);
5708
5709        let state = coordinator.state();
5710        assert!(
5711            matches!(state, IndexState::Ready { file_count: 150, symbol_count: 7500, .. }),
5712            "Expected Ready state with updated metrics, got: {:?}",
5713            state
5714        );
5715    }
5716
5717    #[test]
5718    fn test_state_transition_guard_building_to_building() {
5719        // Test that Building → Building is allowed (progress update)
5720        let coordinator = IndexCoordinator::new();
5721
5722        // Initial building state
5723        coordinator.transition_to_building(100);
5724
5725        let state = coordinator.state();
5726        assert!(
5727            matches!(state, IndexState::Building { indexed_count: 0, total_count: 100, .. }),
5728            "Expected Building state, got: {:?}",
5729            state
5730        );
5731
5732        // Update total count
5733        coordinator.transition_to_building(200);
5734
5735        let state = coordinator.state();
5736        assert!(
5737            matches!(state, IndexState::Building { indexed_count: 0, total_count: 200, .. }),
5738            "Expected Building state, got: {:?}",
5739            state
5740        );
5741    }
5742
5743    #[test]
5744    fn test_state_transition_ready_to_building() {
5745        // Test that Ready → Building is allowed (re-scan)
5746        let coordinator = IndexCoordinator::new();
5747        coordinator.transition_to_ready(100, 5000);
5748
5749        // Trigger re-scan
5750        coordinator.transition_to_building(150);
5751
5752        let state = coordinator.state();
5753        assert!(
5754            matches!(state, IndexState::Building { indexed_count: 0, total_count: 150, .. }),
5755            "Expected Building state after re-scan, got: {:?}",
5756            state
5757        );
5758    }
5759
5760    #[test]
5761    fn test_state_transition_degraded_to_building() {
5762        // Test that Degraded → Building is allowed (recovery)
5763        let coordinator = IndexCoordinator::new();
5764        coordinator.transition_to_degraded(DegradationReason::IoError {
5765            message: "Test error".to_string(),
5766        });
5767
5768        // Attempt recovery
5769        coordinator.transition_to_building(100);
5770
5771        let state = coordinator.state();
5772        assert!(
5773            matches!(state, IndexState::Building { indexed_count: 0, total_count: 100, .. }),
5774            "Expected Building state after recovery, got: {:?}",
5775            state
5776        );
5777    }
5778
5779    #[test]
5780    fn test_update_building_progress() {
5781        let coordinator = IndexCoordinator::new();
5782        coordinator.transition_to_building(100);
5783
5784        // Update progress
5785        coordinator.update_building_progress(50);
5786
5787        let state = coordinator.state();
5788        assert!(
5789            matches!(state, IndexState::Building { indexed_count: 50, total_count: 100, .. }),
5790            "Expected Building state with updated progress, got: {:?}",
5791            state
5792        );
5793
5794        // Update progress again
5795        coordinator.update_building_progress(100);
5796
5797        let state = coordinator.state();
5798        assert!(
5799            matches!(state, IndexState::Building { indexed_count: 100, total_count: 100, .. }),
5800            "Expected Building state with completed progress, got: {:?}",
5801            state
5802        );
5803    }
5804
5805    #[test]
5806    fn test_scan_timeout_detection() {
5807        // Test that scan timeout triggers degradation
5808        let limits = IndexResourceLimits {
5809            max_scan_duration_ms: 0, // Immediate timeout for testing
5810            ..Default::default()
5811        };
5812
5813        let coordinator = IndexCoordinator::with_limits(limits);
5814        coordinator.transition_to_building(100);
5815
5816        // Small sleep to ensure elapsed time > 0
5817        std::thread::sleep(std::time::Duration::from_millis(1));
5818
5819        // Update progress should detect timeout
5820        coordinator.update_building_progress(10);
5821
5822        let state = coordinator.state();
5823        assert!(
5824            matches!(
5825                state,
5826                IndexState::Degraded { reason: DegradationReason::ScanTimeout { .. }, .. }
5827            ),
5828            "Expected Degraded state with ScanTimeout, got: {:?}",
5829            state
5830        );
5831    }
5832
5833    #[test]
5834    fn test_scan_timeout_does_not_trigger_within_limit() {
5835        // Test that scan doesn't timeout within the limit
5836        let limits = IndexResourceLimits {
5837            max_scan_duration_ms: 10_000, // 10 seconds - should not trigger
5838            ..Default::default()
5839        };
5840
5841        let coordinator = IndexCoordinator::with_limits(limits);
5842        coordinator.transition_to_building(100);
5843
5844        // Update progress immediately (well within limit)
5845        coordinator.update_building_progress(50);
5846
5847        let state = coordinator.state();
5848        assert!(
5849            matches!(state, IndexState::Building { indexed_count: 50, .. }),
5850            "Expected Building state (no timeout), got: {:?}",
5851            state
5852        );
5853    }
5854
5855    #[test]
5856    fn test_early_exit_optimization_unchanged_content() {
5857        let index = WorkspaceIndex::new();
5858        let uri = must(url::Url::parse("file:///test.pl"));
5859        let code = r#"
5860package MyPackage;
5861
5862sub hello {
5863    print "Hello";
5864}
5865"#;
5866
5867        // First indexing should parse and index
5868        must(index.index_file(uri.clone(), code.to_string()));
5869        let symbols1 = index.file_symbols(uri.as_str());
5870        assert!(symbols1.iter().any(|s| s.name == "MyPackage" && s.kind == SymbolKind::Package));
5871        assert!(symbols1.iter().any(|s| s.name == "hello" && s.kind == SymbolKind::Subroutine));
5872
5873        // Second indexing with same content should early-exit
5874        // We can verify this by checking that the index still works correctly
5875        must(index.index_file(uri.clone(), code.to_string()));
5876        let symbols2 = index.file_symbols(uri.as_str());
5877        assert_eq!(symbols1.len(), symbols2.len());
5878        assert!(symbols2.iter().any(|s| s.name == "MyPackage" && s.kind == SymbolKind::Package));
5879        assert!(symbols2.iter().any(|s| s.name == "hello" && s.kind == SymbolKind::Subroutine));
5880    }
5881
5882    #[test]
5883    fn test_early_exit_optimization_changed_content() {
5884        let index = WorkspaceIndex::new();
5885        let uri = must(url::Url::parse("file:///test.pl"));
5886        let code1 = r#"
5887package MyPackage;
5888
5889sub hello {
5890    print "Hello";
5891}
5892"#;
5893
5894        let code2 = r#"
5895package MyPackage;
5896
5897sub goodbye {
5898    print "Goodbye";
5899}
5900"#;
5901
5902        // First indexing
5903        must(index.index_file(uri.clone(), code1.to_string()));
5904        let symbols1 = index.file_symbols(uri.as_str());
5905        assert!(symbols1.iter().any(|s| s.name == "hello" && s.kind == SymbolKind::Subroutine));
5906        assert!(!symbols1.iter().any(|s| s.name == "goodbye"));
5907
5908        // Second indexing with different content should re-parse
5909        must(index.index_file(uri.clone(), code2.to_string()));
5910        let symbols2 = index.file_symbols(uri.as_str());
5911        assert!(!symbols2.iter().any(|s| s.name == "hello"));
5912        assert!(symbols2.iter().any(|s| s.name == "goodbye" && s.kind == SymbolKind::Subroutine));
5913    }
5914
5915    #[test]
5916    fn test_early_exit_optimization_whitespace_only_change() {
5917        let index = WorkspaceIndex::new();
5918        let uri = must(url::Url::parse("file:///test.pl"));
5919        let code1 = r#"
5920package MyPackage;
5921
5922sub hello {
5923    print "Hello";
5924}
5925"#;
5926
5927        let code2 = r#"
5928package MyPackage;
5929
5930
5931sub hello {
5932    print "Hello";
5933}
5934"#;
5935
5936        // First indexing
5937        must(index.index_file(uri.clone(), code1.to_string()));
5938        let symbols1 = index.file_symbols(uri.as_str());
5939        assert!(symbols1.iter().any(|s| s.name == "hello" && s.kind == SymbolKind::Subroutine));
5940
5941        // Second indexing with whitespace change should re-parse (hash will differ)
5942        must(index.index_file(uri.clone(), code2.to_string()));
5943        let symbols2 = index.file_symbols(uri.as_str());
5944        // Symbols should still be found, but content hash differs so it re-indexed
5945        assert!(symbols2.iter().any(|s| s.name == "hello" && s.kind == SymbolKind::Subroutine));
5946    }
5947
5948    #[test]
5949    fn test_reindex_file_refreshes_symbol_cache_for_removed_names() {
5950        let index = WorkspaceIndex::new();
5951        let uri1 = must(url::Url::parse("file:///lib/A.pm"));
5952        let uri2 = must(url::Url::parse("file:///lib/B.pm"));
5953        let code1 = "package A;\nsub foo { return 1; }\n1;\n";
5954        let code2 = "package B;\nsub foo { return 2; }\n1;\n";
5955        let code2_reindexed = "package B;\nsub bar { return 3; }\n1;\n";
5956
5957        must(index.index_file(uri1.clone(), code1.to_string()));
5958        must(index.index_file(uri2.clone(), code2.to_string()));
5959        must(index.index_file(uri2.clone(), code2_reindexed.to_string()));
5960
5961        let foo_location = must_some(index.find_definition("foo"));
5962        assert_eq!(foo_location.uri, uri1.to_string());
5963
5964        let bar_location = must_some(index.find_definition("bar"));
5965        assert_eq!(bar_location.uri, uri2.to_string());
5966    }
5967
5968    #[test]
5969    fn test_remove_file_preserves_other_colliding_symbol_entries() {
5970        let index = WorkspaceIndex::new();
5971        let uri1 = must(url::Url::parse("file:///lib/A.pm"));
5972        let uri2 = must(url::Url::parse("file:///lib/B.pm"));
5973        let code1 = "package A;\nsub foo { return 1; }\n1;\n";
5974        let code2 = "package B;\nsub foo { return 2; }\n1;\n";
5975
5976        must(index.index_file(uri1.clone(), code1.to_string()));
5977        must(index.index_file(uri2.clone(), code2.to_string()));
5978
5979        index.remove_file(uri2.as_str());
5980
5981        let foo_location = must_some(index.find_definition("foo"));
5982        assert_eq!(foo_location.uri, uri1.to_string());
5983    }
5984
5985    #[test]
5986    fn test_count_usages_no_double_counting_for_qualified_calls() {
5987        let index = WorkspaceIndex::new();
5988
5989        // File 1: defines Utils::process_data
5990        let uri1 = "file:///lib/Utils.pm";
5991        let code1 = r#"
5992package Utils;
5993
5994sub process_data {
5995    return 1;
5996}
5997"#;
5998        must(index.index_file(must(url::Url::parse(uri1)), code1.to_string()));
5999
6000        // File 2: calls Utils::process_data (qualified call)
6001        let uri2 = "file:///app.pl";
6002        let code2 = r#"
6003use Utils;
6004Utils::process_data();
6005Utils::process_data();
6006"#;
6007        must(index.index_file(must(url::Url::parse(uri2)), code2.to_string()));
6008
6009        // Each qualified call is stored under both "process_data" and "Utils::process_data"
6010        // by the dual indexing strategy. count_usages should deduplicate so we get the
6011        // actual number of call sites, not double.
6012        let count = index.count_usages("Utils::process_data");
6013
6014        // We expect exactly 2 usage sites (the two calls in app.pl),
6015        // not 4 (which would be the double-counted result).
6016        assert_eq!(
6017            count, 2,
6018            "count_usages should not double-count qualified calls, got {} (expected 2)",
6019            count
6020        );
6021
6022        // find_references should also deduplicate
6023        let refs = index.find_references("Utils::process_data");
6024        let non_def_refs: Vec<_> =
6025            refs.iter().filter(|loc| loc.uri != "file:///lib/Utils.pm").collect();
6026        assert_eq!(
6027            non_def_refs.len(),
6028            2,
6029            "find_references should not return duplicates for qualified calls, got {} non-def refs",
6030            non_def_refs.len()
6031        );
6032    }
6033
6034    #[test]
6035    fn test_batch_indexing() {
6036        let index = WorkspaceIndex::new();
6037        let files: Vec<(Url, String)> = (0..5)
6038            .map(|i| {
6039                let uri = must(Url::parse(&format!("file:///batch/module{}.pm", i)));
6040                let code =
6041                    format!("package Batch::Mod{};\nsub func_{} {{ return {}; }}\n1;", i, i, i);
6042                (uri, code)
6043            })
6044            .collect();
6045
6046        let errors = index.index_files_batch(files);
6047        assert!(errors.is_empty(), "batch indexing errors: {:?}", errors);
6048        assert_eq!(index.file_count(), 5);
6049        assert!(index.find_definition("Batch::Mod0::func_0").is_some());
6050        assert!(index.find_definition("Batch::Mod4::func_4").is_some());
6051    }
6052
6053    #[test]
6054    fn test_batch_indexing_skips_unchanged() {
6055        let index = WorkspaceIndex::new();
6056        let uri = must(Url::parse("file:///batch/skip.pm"));
6057        let code = "package Skip;\nsub skip_fn { 1 }\n1;".to_string();
6058
6059        index.index_file(uri.clone(), code.clone()).ok();
6060        assert_eq!(index.file_count(), 1);
6061
6062        let errors = index.index_files_batch(vec![(uri, code)]);
6063        assert!(errors.is_empty());
6064        assert_eq!(index.file_count(), 1);
6065    }
6066
6067    #[test]
6068    fn test_incremental_update_preserves_other_symbols() {
6069        let index = WorkspaceIndex::new();
6070
6071        let uri_a = must(Url::parse("file:///incr/a.pm"));
6072        let uri_b = must(Url::parse("file:///incr/b.pm"));
6073        index.index_file(uri_a.clone(), "package A;\nsub a_func { 1 }\n1;".into()).ok();
6074        index.index_file(uri_b.clone(), "package B;\nsub b_func { 2 }\n1;".into()).ok();
6075
6076        assert!(index.find_definition("A::a_func").is_some());
6077        assert!(index.find_definition("B::b_func").is_some());
6078
6079        index.index_file(uri_a, "package A;\nsub a_func_v2 { 11 }\n1;".into()).ok();
6080
6081        assert!(index.find_definition("A::a_func_v2").is_some());
6082        assert!(index.find_definition("B::b_func").is_some());
6083    }
6084
6085    #[test]
6086    fn test_remove_file_preserves_shadowed_symbols() {
6087        let index = WorkspaceIndex::new();
6088
6089        let uri_a = must(Url::parse("file:///shadow/a.pm"));
6090        let uri_b = must(Url::parse("file:///shadow/b.pm"));
6091        index.index_file(uri_a.clone(), "package ShadowA;\nsub helper { 1 }\n1;".into()).ok();
6092        index.index_file(uri_b.clone(), "package ShadowB;\nsub helper { 2 }\n1;".into()).ok();
6093
6094        assert!(index.find_definition("helper").is_some());
6095
6096        index.remove_file_url(&uri_a);
6097        assert!(index.find_definition("helper").is_some());
6098        assert!(index.find_definition("ShadowB::helper").is_some());
6099    }
6100
6101    // -------------------------------------------------------------------------
6102    // find_dependents — use parent / use base integration (#2747)
6103    // -------------------------------------------------------------------------
6104
6105    #[test]
6106    fn test_index_dependency_via_use_parent_end_to_end() {
6107        // Regression for #2747: index a file with `use parent 'MyBase'` and verify
6108        // that find_dependents("MyBase") returns that file.
6109        // 1. Index MyBase.pm
6110        // 2. Index child.pl with `use parent 'MyBase'`
6111        // 3. find_dependents("MyBase") should return child.pl
6112        let index = WorkspaceIndex::new();
6113
6114        let base_url = must(url::Url::parse("file:///test/workspace/lib/MyBase.pm"));
6115        must(index.index_file(
6116            base_url,
6117            "package MyBase;\nsub new { bless {}, shift }\n1;\n".to_string(),
6118        ));
6119
6120        let child_url = must(url::Url::parse("file:///test/workspace/child.pl"));
6121        must(index.index_file(child_url, "package Child;\nuse parent 'MyBase';\n1;\n".to_string()));
6122
6123        let dependents = index.find_dependents("MyBase");
6124        assert!(
6125            !dependents.is_empty(),
6126            "find_dependents('MyBase') returned empty — \
6127             use parent 'MyBase' should register MyBase as a dependency. \
6128             Dependencies in index: {:?}",
6129            {
6130                let files = index.files.read();
6131                files
6132                    .iter()
6133                    .map(|(k, v)| (k.clone(), v.dependencies.iter().cloned().collect::<Vec<_>>()))
6134                    .collect::<Vec<_>>()
6135            }
6136        );
6137        assert!(
6138            dependents.contains(&"file:///test/workspace/child.pl".to_string()),
6139            "child.pl should be in dependents, got: {:?}",
6140            dependents
6141        );
6142    }
6143
6144    #[test]
6145    fn test_find_dependents_normalizes_legacy_separator_in_query() {
6146        let index = WorkspaceIndex::new();
6147        let uri = must(url::Url::parse("file:///test/workspace/legacy-query.pl"));
6148        let src = "package Child;\nuse parent 'My::Base';\n1;\n";
6149        must(index.index_file(uri, src.to_string()));
6150
6151        let dependents = index.find_dependents("My'Base");
6152        assert_eq!(dependents, vec!["file:///test/workspace/legacy-query.pl".to_string()]);
6153    }
6154
6155    #[test]
6156    fn test_file_dependencies_normalize_legacy_separator_in_source() {
6157        let index = WorkspaceIndex::new();
6158        let uri = must(url::Url::parse("file:///test/workspace/legacy-source.pl"));
6159        let src = "package Child;\nuse parent \"My'Base\";\n1;\n";
6160        must(index.index_file(uri.clone(), src.to_string()));
6161
6162        let deps = index.file_dependencies(uri.as_str());
6163        assert!(deps.contains("My::Base"));
6164        assert!(!deps.contains("My'Base"));
6165    }
6166
6167    #[test]
6168    fn test_index_dependency_via_moose_extends_end_to_end() -> Result<(), Box<dyn std::error::Error>>
6169    {
6170        let index = WorkspaceIndex::new();
6171
6172        let parent_url = must(url::Url::parse("file:///test/workspace/lib/My/App/Parent.pm"));
6173        must(index.index_file(parent_url, "package My::App::Parent;\n1;\n".to_string()));
6174
6175        let child_url = must(url::Url::parse("file:///test/workspace/child-moose.pl"));
6176        let child_src = "package Child;\nuse Moose;\nextends 'My::App::Parent';\n1;\n";
6177        must(index.index_file(child_url, child_src.to_string()));
6178
6179        let dependents = index.find_dependents("My::App::Parent");
6180        assert!(
6181            dependents.contains(&"file:///test/workspace/child-moose.pl".to_string()),
6182            "expected child-moose.pl in dependents, got: {dependents:?}"
6183        );
6184        Ok(())
6185    }
6186
6187    #[test]
6188    fn test_index_dependency_via_moo_with_role_end_to_end() -> Result<(), Box<dyn std::error::Error>>
6189    {
6190        let index = WorkspaceIndex::new();
6191
6192        let role_url = must(url::Url::parse("file:///test/workspace/lib/My/App/Role.pm"));
6193        must(index.index_file(role_url, "package My::App::Role;\n1;\n".to_string()));
6194
6195        let consumer_url = must(url::Url::parse("file:///test/workspace/consumer-moo.pl"));
6196        let consumer_src = "package Consumer;\nuse Moo;\nwith 'My::App::Role';\n1;\n";
6197        must(index.index_file(consumer_url.clone(), consumer_src.to_string()));
6198
6199        let dependents = index.find_dependents("My::App::Role");
6200        assert!(
6201            dependents.contains(&"file:///test/workspace/consumer-moo.pl".to_string()),
6202            "expected consumer-moo.pl in dependents, got: {dependents:?}"
6203        );
6204
6205        let deps = index.file_dependencies(consumer_url.as_str());
6206        assert!(deps.contains("My::App::Role"));
6207        Ok(())
6208    }
6209
6210    #[test]
6211    fn test_index_dependency_via_literal_require_end_to_end()
6212    -> Result<(), Box<dyn std::error::Error>> {
6213        let index = WorkspaceIndex::new();
6214        let uri = must(url::Url::parse("file:///test/workspace/require-consumer.pl"));
6215        let src = "package Consumer;\nrequire My::Loader;\n1;\n";
6216        must(index.index_file(uri.clone(), src.to_string()));
6217
6218        let deps = index.file_dependencies(uri.as_str());
6219        assert!(
6220            deps.contains("My::Loader"),
6221            "literal require should register module dependency, got: {deps:?}"
6222        );
6223        Ok(())
6224    }
6225
6226    #[test]
6227    fn test_manual_import_symbols_are_indexed_as_import_references()
6228    -> Result<(), Box<dyn std::error::Error>> {
6229        let index = WorkspaceIndex::new();
6230        let uri = must(url::Url::parse("file:///test/workspace/manual-import.pl"));
6231        let src = r#"package Consumer;
6232require My::Tools;
6233My::Tools->import(qw(helper_one helper_two));
6234helper_one();
62351;
6236"#;
6237        must(index.index_file(uri.clone(), src.to_string()));
6238
6239        let deps = index.file_dependencies(uri.as_str());
6240        assert!(
6241            deps.contains("My::Tools"),
6242            "manual import target should be tracked as dependency, got: {deps:?}"
6243        );
6244
6245        for symbol in ["helper_one", "helper_two"] {
6246            let refs = index.find_references(symbol);
6247            assert!(
6248                !refs.is_empty(),
6249                "expected at least one indexed reference for imported symbol `{symbol}`"
6250            );
6251        }
6252        Ok(())
6253    }
6254
6255    #[test]
6256    fn test_parser_produces_correct_args_for_use_parent() {
6257        // Regression for #2747: verify that the parser produces args=["'MyBase'"]
6258        // for `use parent 'MyBase'`, so extract_module_names_from_use_args strips
6259        // the quotes and registers the dependency under the bare name "MyBase".
6260        use crate::Parser;
6261        let mut p = Parser::new("package Child;\nuse parent 'MyBase';\n1;\n");
6262        let ast = must(p.parse());
6263        assert!(
6264            matches!(ast.kind, NodeKind::Program { .. }),
6265            "Expected Program root, got {:?}",
6266            ast.kind
6267        );
6268        let NodeKind::Program { statements } = &ast.kind else {
6269            return;
6270        };
6271        let mut found_parent_use = false;
6272        for stmt in statements {
6273            if let NodeKind::Use { module, args, .. } = &stmt.kind {
6274                if module == "parent" {
6275                    found_parent_use = true;
6276                    assert_eq!(
6277                        args,
6278                        &["'MyBase'".to_string()],
6279                        "Expected args=[\"'MyBase'\"] for `use parent 'MyBase'`, got: {:?}",
6280                        args
6281                    );
6282                    let extracted = extract_module_names_from_use_args(args);
6283                    assert_eq!(
6284                        extracted,
6285                        vec!["MyBase".to_string()],
6286                        "extract_module_names_from_use_args should return [\"MyBase\"], got {:?}",
6287                        extracted
6288                    );
6289                }
6290            }
6291        }
6292        assert!(found_parent_use, "No Use node with module='parent' found in AST");
6293    }
6294
6295    // -------------------------------------------------------------------------
6296    // extract_module_names_from_use_args — unit tests (#2747)
6297    // -------------------------------------------------------------------------
6298
6299    #[test]
6300    fn test_extract_module_names_single_quoted() {
6301        let names = extract_module_names_from_use_args(&["'Foo::Bar'".to_string()]);
6302        assert_eq!(names, vec!["Foo::Bar"]);
6303    }
6304
6305    #[test]
6306    fn test_extract_module_names_double_quoted() {
6307        let names = extract_module_names_from_use_args(&["\"Foo::Bar\"".to_string()]);
6308        assert_eq!(names, vec!["Foo::Bar"]);
6309    }
6310
6311    #[test]
6312    fn test_extract_module_names_qw_list() {
6313        let names = extract_module_names_from_use_args(&["qw(Foo::Bar Other::Base)".to_string()]);
6314        assert_eq!(names, vec!["Foo::Bar", "Other::Base"]);
6315    }
6316
6317    #[test]
6318    fn test_extract_module_names_qw_slash_delimiter() {
6319        let names = extract_module_names_from_use_args(&["qw/Foo::Bar Other::Base/".to_string()]);
6320        assert_eq!(names, vec!["Foo::Bar", "Other::Base"]);
6321    }
6322
6323    #[test]
6324    fn test_extract_module_names_qw_with_space_before_delimiter() {
6325        let names = extract_module_names_from_use_args(&["qw [Foo::Bar Other::Base]".to_string()]);
6326        assert_eq!(names, vec!["Foo::Bar", "Other::Base"]);
6327    }
6328
6329    #[test]
6330    fn test_extract_module_names_qw_list_trims_wrapped_punctuation() {
6331        let names =
6332            extract_module_names_from_use_args(&["qw((Foo::Bar) [Other::Base],)".to_string()]);
6333        assert_eq!(names, vec!["Foo::Bar", "Other::Base"]);
6334    }
6335
6336    #[test]
6337    fn test_extract_module_names_norequire_flag() {
6338        let names = extract_module_names_from_use_args(&[
6339            "-norequire".to_string(),
6340            "'Foo::Bar'".to_string(),
6341        ]);
6342        assert_eq!(names, vec!["Foo::Bar"]);
6343    }
6344
6345    #[test]
6346    fn test_extract_module_names_empty_args() {
6347        let names = extract_module_names_from_use_args(&[]);
6348        assert!(names.is_empty());
6349    }
6350
6351    #[test]
6352    fn test_extract_module_names_legacy_separator() {
6353        // Perl legacy package separator ' (tick) inside module name
6354        let names = extract_module_names_from_use_args(&["'Foo'Bar'".to_string()]);
6355        // Legacy separators are normalized for downstream dependency matching.
6356        assert_eq!(names, vec!["Foo::Bar"]);
6357    }
6358
6359    #[test]
6360    fn test_find_dependents_matches_legacy_separator_queries() {
6361        let index = WorkspaceIndex::new();
6362        let base_uri = must(url::Url::parse("file:///test/workspace/lib/Foo/Bar.pm"));
6363        let child_uri = must(url::Url::parse("file:///test/workspace/child.pl"));
6364
6365        must(index.index_file(base_uri, "package Foo::Bar;\n1;\n".to_string()));
6366        must(index.index_file(
6367            child_uri.clone(),
6368            "package Child;\nuse parent qw(Foo'Bar);\n1;\n".to_string(),
6369        ));
6370
6371        let dependents_modern = index.find_dependents("Foo::Bar");
6372        assert!(
6373            dependents_modern.contains(&child_uri.to_string()),
6374            "Expected dependency match when queried with modern separator"
6375        );
6376
6377        let dependents_legacy = index.find_dependents("Foo'Bar");
6378        assert!(
6379            dependents_legacy.contains(&child_uri.to_string()),
6380            "Expected dependency match when queried with legacy separator"
6381        );
6382    }
6383
6384    #[test]
6385    fn test_extract_module_names_comma_adjacent_tokens() {
6386        let names = extract_module_names_from_use_args(&[
6387            "'Foo::Bar',".to_string(),
6388            "\"Other::Base\",".to_string(),
6389            "'Last::One'".to_string(),
6390        ]);
6391        assert_eq!(names, vec!["Foo::Bar", "Other::Base", "Last::One"]);
6392    }
6393
6394    #[test]
6395    fn test_extract_module_names_parenthesized_without_spaces() {
6396        let names = extract_module_names_from_use_args(&["('Foo::Bar','Other::Base')".to_string()]);
6397        assert_eq!(names, vec!["Foo::Bar", "Other::Base"]);
6398    }
6399
6400    #[test]
6401    fn test_extract_module_names_deduplicates_identical_entries() {
6402        let names = extract_module_names_from_use_args(&[
6403            "qw(Foo::Bar Foo::Bar)".to_string(),
6404            "'Foo::Bar'".to_string(),
6405        ]);
6406        assert_eq!(names, vec!["Foo::Bar"]);
6407    }
6408
6409    #[test]
6410    fn test_extract_module_names_trims_semicolon_suffix() {
6411        let names = extract_module_names_from_use_args(&[
6412            "'Foo::Bar',".to_string(),
6413            "'Other::Base',".to_string(),
6414            "'Third::Leaf';".to_string(),
6415        ]);
6416        assert_eq!(names, vec!["Foo::Bar", "Other::Base", "Third::Leaf"]);
6417    }
6418
6419    #[test]
6420    fn test_extract_module_names_trims_wrapped_punctuation() {
6421        let names = extract_module_names_from_use_args(&[
6422            "('Foo::Bar',".to_string(),
6423            "'Other::Base')".to_string(),
6424        ]);
6425        assert_eq!(names, vec!["Foo::Bar", "Other::Base"]);
6426    }
6427
6428    #[test]
6429    fn test_extract_constant_names_qw_with_space_before_delimiter() {
6430        let names = extract_constant_names_from_use_args(&["qw [FOO BAR]".to_string()]);
6431        assert_eq!(names, vec!["FOO", "BAR"]);
6432    }
6433
6434    #[test]
6435    #[ignore = "qw delimiter with leading space not yet parsed; tracked in debt-ledger.yaml"]
6436    fn test_index_use_constant_qw_with_space_before_delimiter() {
6437        let index = WorkspaceIndex::new();
6438        let uri = must(url::Url::parse("file:///workspace/lib/My/Config.pm"));
6439        let source = "package My::Config;\nuse constant qw [FOO BAR];\n1;\n";
6440
6441        must(index.index_file(uri, source.to_string()));
6442
6443        let foo = index.find_definition("My::Config::FOO");
6444        let bar = index.find_definition("My::Config::BAR");
6445        assert!(foo.is_some(), "Expected My::Config::FOO to be indexed");
6446        assert!(bar.is_some(), "Expected My::Config::BAR to be indexed");
6447    }
6448
6449    #[test]
6450    fn test_with_capacity_accepts_large_batch_without_panic() {
6451        let index = WorkspaceIndex::with_capacity(100, 20);
6452        for i in 0..100 {
6453            let uri = must(url::Url::parse(&format!("file:///lib/Mod{}.pm", i)));
6454            let src = format!("package Mod{};\nsub foo_{} {{ 1 }}\n1;\n", i, i);
6455            index.index_file(uri, src).ok();
6456        }
6457        assert!(index.has_symbols());
6458    }
6459
6460    #[test]
6461    fn test_with_capacity_zero_does_not_panic() {
6462        let index = WorkspaceIndex::with_capacity(0, 0);
6463        assert!(!index.has_symbols());
6464    }
6465
6466    // -------------------------------------------------------------------------
6467    // remove_file — symbol cache cleanup (#3494)
6468    // -------------------------------------------------------------------------
6469
6470    /// After removing the only file that defines a symbol, both qualified and
6471    /// bare-name lookups must return None.  The symbols cache must not retain
6472    /// stale entries pointing to the deleted file.
6473    #[test]
6474    fn test_remove_file_clears_symbol_cache_qualified_and_bare() {
6475        let index = WorkspaceIndex::new();
6476        let uri_a = must(url::Url::parse("file:///lib/A.pm"));
6477        let code_a = "package A;\nsub foo { return 1; }\n1;\n";
6478
6479        must(index.index_file(uri_a.clone(), code_a.to_string()));
6480
6481        // Pre-condition: both qualified and bare-name lookups resolve to file A.
6482        let before_qual = must_some(index.find_definition("A::foo"));
6483        assert_eq!(
6484            before_qual.uri,
6485            uri_a.to_string(),
6486            "qualified lookup should point to A.pm before removal"
6487        );
6488        let before_bare = must_some(index.find_definition("foo"));
6489        assert_eq!(
6490            before_bare.uri,
6491            uri_a.to_string(),
6492            "bare-name lookup should point to A.pm before removal"
6493        );
6494
6495        // Remove file A from the index (simulates file deletion).
6496        index.remove_file(uri_a.as_str());
6497
6498        // Post-condition: the symbol cache must be clean — no stale entries.
6499        assert!(
6500            index.find_definition("A::foo").is_none(),
6501            "qualified lookup 'A::foo' should return None after file deletion"
6502        );
6503        assert!(
6504            index.find_definition("foo").is_none(),
6505            "bare-name lookup 'foo' should return None after file deletion"
6506        );
6507
6508        // Verify no symbols remain in the index.
6509        assert_eq!(
6510            index.symbol_count(),
6511            0,
6512            "symbol_count should be 0 after removing the only file"
6513        );
6514        assert!(!index.has_symbols(), "has_symbols should be false after removing the only file");
6515    }
6516
6517    /// Deleting file A when file B has the same bare-name symbol must leave
6518    /// the bare-name cache pointing to B (not remove it entirely).
6519    #[test]
6520    fn test_remove_file_bare_name_falls_back_to_surviving_file() {
6521        let index = WorkspaceIndex::new();
6522        let uri_a = must(url::Url::parse("file:///lib/A.pm"));
6523        let uri_b = must(url::Url::parse("file:///lib/B.pm"));
6524        let code_a = "package A;\nsub shared_fn { return 1; }\n1;\n";
6525        let code_b = "package B;\nsub shared_fn { return 2; }\n1;\n";
6526
6527        must(index.index_file(uri_a.clone(), code_a.to_string()));
6528        must(index.index_file(uri_b.clone(), code_b.to_string()));
6529
6530        // Remove file A — shared_fn should still resolve via B.
6531        index.remove_file(uri_a.as_str());
6532
6533        let loc = must_some(index.find_definition("shared_fn"));
6534        assert_eq!(
6535            loc.uri,
6536            uri_b.to_string(),
6537            "bare-name 'shared_fn' should resolve to B.pm after A.pm is deleted"
6538        );
6539
6540        assert!(
6541            index.find_definition("A::shared_fn").is_none(),
6542            "qualified 'A::shared_fn' must be gone after A.pm deletion"
6543        );
6544        assert!(
6545            index.find_definition("B::shared_fn").is_some(),
6546            "qualified 'B::shared_fn' must remain after A.pm deletion"
6547        );
6548    }
6549
6550    #[test]
6551    fn test_definition_candidates_include_ambiguous_bare_symbols_in_stable_order() {
6552        let index = WorkspaceIndex::new();
6553        let uri_b = must(url::Url::parse("file:///lib/B.pm"));
6554        let uri_a = must(url::Url::parse("file:///lib/A.pm"));
6555        must(index.index_file(uri_b, "package B;\nsub shared { 1 }\n1;\n".to_string()));
6556        must(index.index_file(uri_a, "package A;\nsub shared { 1 }\n1;\n".to_string()));
6557
6558        let candidates = index.definition_candidates("shared");
6559        assert_eq!(candidates.len(), 2);
6560        assert_eq!(candidates[0].uri, "file:///lib/A.pm");
6561        assert_eq!(candidates[1].uri, "file:///lib/B.pm");
6562        assert_eq!(must_some(index.find_definition("shared")).uri, "file:///lib/A.pm");
6563    }
6564
6565    #[test]
6566    fn test_definition_candidates_include_duplicate_qualified_name_across_files() {
6567        let index = WorkspaceIndex::new();
6568        let uri_v2 = must(url::Url::parse("file:///lib/A-v2.pm"));
6569        let uri_v1 = must(url::Url::parse("file:///lib/A-v1.pm"));
6570        let source = "package A;\nsub foo { 1 }\n1;\n".to_string();
6571        must(index.index_file(uri_v2, source.clone()));
6572        must(index.index_file(uri_v1, source));
6573
6574        let candidates = index.definition_candidates("A::foo");
6575        assert_eq!(candidates.len(), 2);
6576        assert_eq!(candidates[0].uri, "file:///lib/A-v1.pm");
6577        assert_eq!(candidates[1].uri, "file:///lib/A-v2.pm");
6578    }
6579
6580    #[test]
6581    fn test_definition_candidates_are_cleaned_on_remove_and_reindex() {
6582        let index = WorkspaceIndex::new();
6583        let uri = must(url::Url::parse("file:///lib/A.pm"));
6584        must(index.index_file(uri.clone(), "package A;\nsub foo { 1 }\n1;\n".to_string()));
6585        assert_eq!(index.definition_candidates("A::foo").len(), 1);
6586
6587        index.remove_file(uri.as_str());
6588        assert!(index.definition_candidates("A::foo").is_empty());
6589
6590        must(index.index_file(uri, "package A;\nsub foo { 2 }\n1;\n".to_string()));
6591        assert_eq!(index.definition_candidates("A::foo").len(), 1);
6592    }
6593
6594    /// Verify that `incremental_remove_symbols` correctly retains candidates owned by
6595    /// other files when the removed file had BOTH exclusively-owned names (triggering the
6596    /// full-rebuild path) AND shared names. Before this fix, the full-rebuild path cleared
6597    /// all candidates and relied on the subsequent rebuild to re-add shared ones — correct
6598    /// in effect, but the test documents the expected observable behavior.
6599    #[test]
6600    fn test_definition_candidates_shared_symbol_survives_removal_of_sole_owner_of_other_symbol() {
6601        let index = WorkspaceIndex::new();
6602        let uri_a = must(url::Url::parse("file:///lib/A.pm"));
6603        let uri_b = must(url::Url::parse("file:///lib/B.pm"));
6604
6605        // A defines both `unique_to_a` (no other file) and `shared` (also in B)
6606        must(index.index_file(
6607            uri_a.clone(),
6608            "package A;\nsub unique_to_a { 1 }\nsub shared { 1 }\n1;\n".to_string(),
6609        ));
6610        must(index.index_file(uri_b.clone(), "package B;\nsub shared { 1 }\n1;\n".to_string()));
6611
6612        // Before removal: both shared candidates and unique_to_a are present
6613        assert_eq!(index.definition_candidates("shared").len(), 2);
6614        assert_eq!(index.definition_candidates("unique_to_a").len(), 1);
6615
6616        // Remove A — triggers the affected_names path for `unique_to_a`, but `shared`
6617        // still has B's candidate.
6618        index.remove_file(uri_a.as_str());
6619
6620        assert!(
6621            index.definition_candidates("unique_to_a").is_empty(),
6622            "unique_to_a should be gone after removing A"
6623        );
6624        assert_eq!(
6625            index.definition_candidates("shared").len(),
6626            1,
6627            "shared should still have B's candidate after removing A"
6628        );
6629        assert_eq!(
6630            index.definition_candidates("shared")[0].uri,
6631            "file:///lib/B.pm",
6632            "remaining shared candidate must be from B"
6633        );
6634    }
6635
6636    #[test]
6637    fn test_folder_context_in_file_index() {
6638        let index = WorkspaceIndex::new();
6639
6640        // Set up workspace folders
6641        index.set_workspace_folders(vec![
6642            "file:///project1".to_string(),
6643            "file:///project2".to_string(),
6644        ]);
6645
6646        let uri1 = "file:///project1/lib/Module.pm";
6647        let code1 = r#"
6648package Module;
6649
6650sub test_sub {
6651    return 1;
6652}
6653"#;
6654        must(index.index_file(must(url::Url::parse(uri1)), code1.to_string()));
6655
6656        let uri2 = "file:///project2/lib/Other.pm";
6657        let code2 = r#"
6658package Other;
6659
6660sub other_sub {
6661    return 2;
6662}
6663"#;
6664        must(index.index_file(must(url::Url::parse(uri2)), code2.to_string()));
6665
6666        // Verify folder context is set correctly
6667        let symbols1 = index.file_symbols(uri1);
6668        assert_eq!(symbols1.len(), 2, "Should have 2 symbols in Module.pm");
6669        for symbol in &symbols1 {
6670            assert_eq!(symbol.uri, uri1, "Symbol URI should match file URI");
6671        }
6672
6673        let symbols2 = index.file_symbols(uri2);
6674        assert_eq!(symbols2.len(), 2, "Should have 2 symbols in Other.pm");
6675        for symbol in &symbols2 {
6676            assert_eq!(symbol.uri, uri2, "Symbol URI should match file URI");
6677        }
6678
6679        // Verify folder attribution
6680        let files = index.files.read();
6681        let file_index1 = must_some(files.get(&DocumentStore::uri_key(uri1)));
6682        assert_eq!(
6683            file_index1.folder_uri,
6684            Some("file:///project1".to_string()),
6685            "File should be attributed to correct workspace folder"
6686        );
6687
6688        let file_index2 = must_some(files.get(&DocumentStore::uri_key(uri2)));
6689        assert_eq!(
6690            file_index2.folder_uri,
6691            Some("file:///project2".to_string()),
6692            "File should be attributed to correct workspace folder"
6693        );
6694    }
6695
6696    #[test]
6697    fn test_determine_folder_uri() {
6698        let index = WorkspaceIndex::new();
6699
6700        // Set up workspace folders
6701        index.set_workspace_folders(vec![
6702            "file:///project1".to_string(),
6703            "file:///project2".to_string(),
6704        ]);
6705
6706        // Test file in project1
6707        let folder1 = index.determine_folder_uri("file:///project1/lib/Module.pm");
6708        assert_eq!(
6709            folder1,
6710            Some("file:///project1".to_string()),
6711            "Should determine folder for file in project1"
6712        );
6713
6714        // Test file in project2
6715        let folder2 = index.determine_folder_uri("file:///project2/lib/Other.pm");
6716        assert_eq!(
6717            folder2,
6718            Some("file:///project2".to_string()),
6719            "Should determine folder for file in project2"
6720        );
6721
6722        // Test file not in any workspace folder
6723        let folder_none = index.determine_folder_uri("file:///other/project/Module.pm");
6724        assert_eq!(folder_none, None, "Should return None for file outside workspace folders");
6725    }
6726
6727    #[test]
6728    fn test_determine_folder_uri_prefers_most_specific_match() {
6729        let index = WorkspaceIndex::new();
6730
6731        // Keep broad folder first to ensure we don't rely on insertion order.
6732        index.set_workspace_folders(vec![
6733            "file:///project".to_string(),
6734            "file:///project/lib".to_string(),
6735        ]);
6736
6737        let folder = index.determine_folder_uri("file:///project/lib/My/Module.pm");
6738        assert_eq!(
6739            folder,
6740            Some("file:///project/lib".to_string()),
6741            "Nested workspace folders should attribute files to the most specific folder"
6742        );
6743    }
6744
6745    #[test]
6746    fn test_remove_folder() {
6747        let index = WorkspaceIndex::new();
6748
6749        // Set up workspace folders
6750        index.set_workspace_folders(vec![
6751            "file:///project1".to_string(),
6752            "file:///project2".to_string(),
6753        ]);
6754
6755        // Index files from both folders
6756        let uri1 = "file:///project1/lib/Module.pm";
6757        let code1 = r#"
6758package Module;
6759
6760sub test_sub {
6761    return 1;
6762}
6763"#;
6764        must(index.index_file(must(url::Url::parse(uri1)), code1.to_string()));
6765
6766        let uri2 = "file:///project2/lib/Other.pm";
6767        let code2 = r#"
6768package Other;
6769
6770sub other_sub {
6771    return 2;
6772}
6773"#;
6774        must(index.index_file(must(url::Url::parse(uri2)), code2.to_string()));
6775
6776        // Verify both files are indexed
6777        assert_eq!(index.file_count(), 2, "Should have 2 files indexed");
6778        assert_eq!(index.document_store().count(), 2, "Document store should track both files");
6779
6780        // Remove project1 folder
6781        index.remove_folder("file:///project1");
6782
6783        // Verify only project2 file remains
6784        assert_eq!(index.file_count(), 1, "Should have 1 file after removing folder");
6785        assert_eq!(
6786            index.document_store().count(),
6787            1,
6788            "Document store should drop files removed via folder deletion"
6789        );
6790        assert!(index.file_symbols(uri1).is_empty(), "File from removed folder should be gone");
6791        assert_eq!(
6792            index.file_symbols(uri2).len(),
6793            2,
6794            "File from remaining folder should still be present"
6795        );
6796    }
6797
6798    #[test]
6799    fn test_remove_folder_removes_symbol_free_files() {
6800        let index = WorkspaceIndex::new();
6801        index.set_workspace_folders(vec!["file:///project1".to_string()]);
6802
6803        let uri = "file:///project1/empty.pl";
6804        must(index.index_file(must(url::Url::parse(uri)), "# comments only".to_string()));
6805        assert_eq!(index.file_count(), 1, "Expected file to be indexed");
6806
6807        index.remove_folder("file:///project1");
6808
6809        assert_eq!(index.file_count(), 0, "Folder removal should delete symbol-free files");
6810        assert_eq!(
6811            index.document_store().count(),
6812            0,
6813            "Document store should stay in sync for symbol-free files"
6814        );
6815    }
6816
6817    // ========================================================================
6818    // GREEN-TDD EDGE CASE TESTS FOR ISSUE #6061 (static require + manual import)
6819    // ========================================================================
6820
6821    #[test]
6822    fn test_require_with_variable_target_is_not_indexed() -> Result<(), Box<dyn std::error::Error>>
6823    {
6824        let index = WorkspaceIndex::new();
6825        let uri = must(url::Url::parse("file:///test/require-var.pl"));
6826        let src = r#"package Test;
6827my $loader = 'MyModule';
6828require $loader;
68291;
6830"#;
6831        must(index.index_file(uri.clone(), src.to_string()));
6832        let deps = index.file_dependencies(uri.as_str());
6833        assert!(
6834            !deps.contains("MyModule"),
6835            "require with variable target should not register static dependency"
6836        );
6837        Ok(())
6838    }
6839
6840    #[test]
6841    fn test_multiple_import_calls_on_same_module() -> Result<(), Box<dyn std::error::Error>> {
6842        let index = WorkspaceIndex::new();
6843        let uri = must(url::Url::parse("file:///test/multi-import.pl"));
6844        let src = r#"package Test;
6845require Toolkit;
6846Toolkit->import('func_a');
6847Toolkit->import(qw(func_b func_c));
68481;
6849"#;
6850        must(index.index_file(uri.clone(), src.to_string()));
6851        let deps = index.file_dependencies(uri.as_str());
6852        assert!(deps.contains("Toolkit"), "module should be tracked as dependency");
6853        for symbol in &["func_a", "func_b", "func_c"] {
6854            let refs = index.find_references(symbol);
6855            assert!(!refs.is_empty(), "all imported symbols should be indexed: {}", symbol);
6856        }
6857        Ok(())
6858    }
6859
6860    #[test]
6861    fn test_require_string_vs_bareword_normalization() -> Result<(), Box<dyn std::error::Error>> {
6862        let index = WorkspaceIndex::new();
6863        let uri = must(url::Url::parse("file:///test/require-string.pl"));
6864        let src = r#"package Consumer;
6865require "String/Based/Module.pm";
6866String::Based::Module->import('exported');
68671;
6868"#;
6869        must(index.index_file(uri.clone(), src.to_string()));
6870        let deps = index.file_dependencies(uri.as_str());
6871        assert!(
6872            deps.contains("String::Based::Module"),
6873            "require string form should normalize path separators to ::"
6874        );
6875        let refs = index.find_references("exported");
6876        assert!(!refs.is_empty(), "import should be indexed even with string-form require");
6877        Ok(())
6878    }
6879
6880    #[test]
6881    fn test_import_without_require_registers_as_method_call()
6882    -> Result<(), Box<dyn std::error::Error>> {
6883        // Edge case: ->import() without preceding require is treated as a normal method call,
6884        // not as the static manual-import pattern, so the module is still visited/tracked
6885        // but the symbols are NOT marked as imports from the static require+import logic.
6886        let index = WorkspaceIndex::new();
6887        let uri = must(url::Url::parse("file:///test/orphan-import.pl"));
6888        let src = r#"package Test;
6889Unrelated::Module->import('orphaned');
6890orphaned();
68911;
6892"#;
6893        must(index.index_file(uri.clone(), src.to_string()));
6894
6895        // The module reference may still be tracked as a method call target,
6896        // but the key regression is: the orphaned symbol should not be indexed
6897        // as an import reference due to the missing require.
6898        let _refs = index.find_references("orphaned");
6899        // Symbol may be referenced but should not be specially treated as an import.
6900        // The main point is: without require, the pairing doesn't activate.
6901        Ok(())
6902    }
6903
6904    #[test]
6905    fn test_nested_blocks_preserve_require_scope() -> Result<(), Box<dyn std::error::Error>> {
6906        let index = WorkspaceIndex::new();
6907        let uri = must(url::Url::parse("file:///test/nested.pl"));
6908        let src = r#"package Test;
6909{
6910    require Outer;
6911    {
6912        Outer->import('nested_sym');
6913    }
6914}
69151;
6916"#;
6917        must(index.index_file(uri.clone(), src.to_string()));
6918        let deps = index.file_dependencies(uri.as_str());
6919        assert!(
6920            deps.contains("Outer"),
6921            "require in outer block should be visible to nested import"
6922        );
6923        let refs = index.find_references("nested_sym");
6924        assert!(!refs.is_empty(), "symbol imported in nested block should still be indexed");
6925        Ok(())
6926    }
6927
6928    #[test]
6929    fn test_require_path_without_pm_extension() -> Result<(), Box<dyn std::error::Error>> {
6930        let index = WorkspaceIndex::new();
6931        let uri = must(url::Url::parse("file:///test/no-ext.pl"));
6932        let src = r#"package Test;
6933require "My/Module";
6934My::Module->import('func');
69351;
6936"#;
6937        must(index.index_file(uri.clone(), src.to_string()));
6938        let deps = index.file_dependencies(uri.as_str());
6939        assert!(
6940            deps.contains("My::Module"),
6941            "require without .pm extension should normalize to module path"
6942        );
6943        Ok(())
6944    }
6945
6946    #[test]
6947    fn test_qw_with_bracket_delimiters() -> Result<(), Box<dyn std::error::Error>> {
6948        let index = WorkspaceIndex::new();
6949        let uri = must(url::Url::parse("file:///test/qw-delim.pl"));
6950        let src = r#"package Test;
6951require DelimModule;
6952DelimModule->import(qw[sym1 sym2]);
6953DelimModule->import(qw{sym3 sym4});
69541;
6955"#;
6956        must(index.index_file(uri.clone(), src.to_string()));
6957        for symbol in &["sym1", "sym2", "sym3", "sym4"] {
6958            let refs = index.find_references(symbol);
6959            assert!(
6960                !refs.is_empty(),
6961                "symbols from qw with bracket delimiters should be indexed: {}",
6962                symbol
6963            );
6964        }
6965        Ok(())
6966    }
6967
6968    #[test]
6969    fn test_array_literal_import_args() -> Result<(), Box<dyn std::error::Error>> {
6970        let index = WorkspaceIndex::new();
6971        let uri = must(url::Url::parse("file:///test/array-import.pl"));
6972        let src = r#"package Test;
6973require ArrayModule;
6974ArrayModule->import(['sym_x', 'sym_y']);
69751;
6976"#;
6977        must(index.index_file(uri.clone(), src.to_string()));
6978        for symbol in &["sym_x", "sym_y"] {
6979            let refs = index.find_references(symbol);
6980            assert!(
6981                !refs.is_empty(),
6982                "symbols from array literal import should be indexed: {}",
6983                symbol
6984            );
6985        }
6986        Ok(())
6987    }
6988
6989    #[test]
6990    fn test_require_inside_conditional_still_registers_dependency()
6991    -> Result<(), Box<dyn std::error::Error>> {
6992        let index = WorkspaceIndex::new();
6993        let uri = must(url::Url::parse("file:///test/cond-require.pl"));
6994        let src = r#"package Test;
6995if (1) {
6996    require ConditionalMod;
6997    ConditionalMod->import('cond_func');
6998}
69991;
7000"#;
7001        must(index.index_file(uri.clone(), src.to_string()));
7002        let deps = index.file_dependencies(uri.as_str());
7003        assert!(
7004            deps.contains("ConditionalMod"),
7005            "require inside conditional should still register as dependency"
7006        );
7007        let refs = index.find_references("cond_func");
7008        assert!(!refs.is_empty(), "import inside conditional should still index symbols");
7009        Ok(())
7010    }
7011
7012    #[test]
7013    fn test_mixed_string_and_bareword_imports() -> Result<(), Box<dyn std::error::Error>> {
7014        let index = WorkspaceIndex::new();
7015        let uri = must(url::Url::parse("file:///test/mixed-import.pl"));
7016        let src = r#"package Test;
7017require MixedMod;
7018MixedMod->import('string_sym');
7019MixedMod->import(qw(qw_one qw_two));
70201;
7021"#;
7022        must(index.index_file(uri.clone(), src.to_string()));
7023        let deps = index.file_dependencies(uri.as_str());
7024        assert!(deps.contains("MixedMod"), "require should register dependency");
7025        for symbol in &["string_sym", "qw_one", "qw_two"] {
7026            let refs = index.find_references(symbol);
7027            assert!(!refs.is_empty(), "all import forms should index symbols: {}", symbol);
7028        }
7029        Ok(())
7030    }
7031
7032    // -------------------------------------------------------------------------
7033    // Per-category incremental invalidation (Req 18.1–18.5)
7034    // -------------------------------------------------------------------------
7035
7036    /// Helper: build a minimal `FileFactShard` with configurable hashes.
7037    fn make_shard(
7038        uri: &str,
7039        content_hash: u64,
7040        anchors_hash: Option<u64>,
7041        entities_hash: Option<u64>,
7042        occurrences_hash: Option<u64>,
7043        edges_hash: Option<u64>,
7044    ) -> FileFactShard {
7045        let file_id = {
7046            let mut h = DefaultHasher::new();
7047            uri.hash(&mut h);
7048            FileId(h.finish())
7049        };
7050        FileFactShard {
7051            source_uri: uri.to_string(),
7052            file_id,
7053            content_hash,
7054            anchors_hash,
7055            entities_hash,
7056            occurrences_hash,
7057            edges_hash,
7058            anchors: Vec::new(),
7059            entities: Vec::new(),
7060            occurrences: Vec::new(),
7061            edges: Vec::new(),
7062        }
7063    }
7064
7065    /// Req 18.5: When content_hash is unchanged, skip all per-category
7066    /// comparisons — no index modifications happen.
7067    #[test]
7068    fn incremental_replace_skips_when_content_hash_unchanged()
7069    -> Result<(), Box<dyn std::error::Error>> {
7070        let index = WorkspaceIndex::new();
7071        let uri = "file:///lib/Same.pm";
7072        let key = DocumentStore::uri_key(uri);
7073
7074        let shard_v1 = make_shard(uri, 42, Some(1), Some(2), Some(3), Some(4));
7075        // First insert — no old shard, so all categories are "changed".
7076        let r1 = index.replace_fact_shard_incremental(&key, shard_v1);
7077        assert!(!r1.content_unchanged);
7078
7079        // Second insert with same content_hash → skip entirely.
7080        let shard_v2 = make_shard(uri, 42, Some(100), Some(200), Some(300), Some(400));
7081        let r2 = index.replace_fact_shard_incremental(&key, shard_v2);
7082        assert!(r2.content_unchanged);
7083        assert!(!r2.anchors_updated);
7084        assert!(!r2.entities_updated);
7085        assert!(!r2.occurrences_updated);
7086        assert!(!r2.edges_updated);
7087
7088        // The stored shard should still be v1 (unchanged).
7089        let stored = must_some(index.file_fact_shard(uri));
7090        assert_eq!(stored.anchors_hash, Some(1));
7091        Ok(())
7092    }
7093
7094    /// Req 18.3: When a category hash is unchanged, skip re-indexing that
7095    /// category's cross-file indexes.
7096    #[test]
7097    fn incremental_replace_skips_unchanged_categories() -> Result<(), Box<dyn std::error::Error>> {
7098        let index = WorkspaceIndex::new();
7099        let uri = "file:///lib/Partial.pm";
7100        let key = DocumentStore::uri_key(uri);
7101
7102        let shard_v1 = make_shard(uri, 1, Some(10), Some(20), Some(30), Some(40));
7103        index.replace_fact_shard_incremental(&key, shard_v1);
7104
7105        // Change content_hash but keep anchors and entities the same.
7106        // Only occurrences and edges change.
7107        let shard_v2 = make_shard(uri, 2, Some(10), Some(20), Some(99), Some(88));
7108        let result = index.replace_fact_shard_incremental(&key, shard_v2);
7109
7110        assert!(!result.content_unchanged);
7111        assert!(!result.anchors_updated, "anchors hash unchanged → skip");
7112        assert!(!result.entities_updated, "entities hash unchanged → skip");
7113        assert!(result.occurrences_updated, "occurrences hash changed → update");
7114        assert!(result.edges_updated, "edges hash changed → update");
7115        Ok(())
7116    }
7117
7118    /// Req 18.4: When a category hash has changed, remove old entries and
7119    /// insert new ones for that category.
7120    #[test]
7121    fn incremental_replace_updates_changed_categories() -> Result<(), Box<dyn std::error::Error>> {
7122        let index = WorkspaceIndex::new();
7123        let uri = "file:///lib/Changed.pm";
7124        let key = DocumentStore::uri_key(uri);
7125
7126        let shard_v1 = make_shard(uri, 1, Some(10), Some(20), Some(30), Some(40));
7127        index.replace_fact_shard_incremental(&key, shard_v1);
7128
7129        // Change all category hashes.
7130        let shard_v2 = make_shard(uri, 2, Some(11), Some(21), Some(31), Some(41));
7131        let result = index.replace_fact_shard_incremental(&key, shard_v2);
7132
7133        assert!(!result.content_unchanged);
7134        assert!(result.anchors_updated);
7135        assert!(result.entities_updated);
7136        assert!(result.occurrences_updated);
7137        assert!(result.edges_updated);
7138
7139        // The stored shard should be v2.
7140        let stored = must_some(index.file_fact_shard(uri));
7141        assert_eq!(stored.content_hash, 2);
7142        assert_eq!(stored.anchors_hash, Some(11));
7143        Ok(())
7144    }
7145
7146    /// When there is no old shard (first index), all categories are treated
7147    /// as changed.
7148    #[test]
7149    fn incremental_replace_first_insert_updates_all() -> Result<(), Box<dyn std::error::Error>> {
7150        let index = WorkspaceIndex::new();
7151        let uri = "file:///lib/New.pm";
7152        let key = DocumentStore::uri_key(uri);
7153
7154        let shard = make_shard(uri, 1, Some(10), Some(20), Some(30), Some(40));
7155        let result = index.replace_fact_shard_incremental(&key, shard);
7156
7157        assert!(!result.content_unchanged);
7158        assert!(result.anchors_updated);
7159        assert!(result.entities_updated);
7160        assert!(result.occurrences_updated);
7161        assert!(result.edges_updated);
7162        Ok(())
7163    }
7164
7165    /// When per-category hashes are `None` (legacy shard), the category is
7166    /// conservatively treated as changed.
7167    #[test]
7168    fn incremental_replace_none_hashes_treated_as_changed() -> Result<(), Box<dyn std::error::Error>>
7169    {
7170        let index = WorkspaceIndex::new();
7171        let uri = "file:///lib/Legacy.pm";
7172        let key = DocumentStore::uri_key(uri);
7173
7174        // Old shard has hashes, new shard has None for some.
7175        let shard_v1 = make_shard(uri, 1, Some(10), Some(20), Some(30), Some(40));
7176        index.replace_fact_shard_incremental(&key, shard_v1);
7177
7178        let shard_v2 = make_shard(uri, 2, None, Some(20), None, Some(40));
7179        let result = index.replace_fact_shard_incremental(&key, shard_v2);
7180
7181        assert!(!result.content_unchanged);
7182        assert!(result.anchors_updated, "None new hash → changed");
7183        assert!(!result.entities_updated, "same hash → skip");
7184        assert!(result.occurrences_updated, "None new hash → changed");
7185        assert!(!result.edges_updated, "same hash → skip");
7186        Ok(())
7187    }
7188
7189    /// Verify that the semantic reference index is updated only when
7190    /// occurrences or edges change.
7191    #[test]
7192    fn incremental_replace_updates_reference_index_on_occurrence_change()
7193    -> Result<(), Box<dyn std::error::Error>> {
7194        use perl_semantic_facts::{AnchorId, Confidence, OccurrenceId, OccurrenceKind, Provenance};
7195
7196        let index = WorkspaceIndex::new();
7197        let uri = "file:///lib/RefIdx.pm";
7198        let key = DocumentStore::uri_key(uri);
7199        let file_id = {
7200            let mut h = DefaultHasher::new();
7201            uri.hash(&mut h);
7202            FileId(h.finish())
7203        };
7204
7205        // v1: shard with one reference occurrence.
7206        let mut shard_v1 = make_shard(uri, 1, Some(10), Some(20), Some(30), Some(40));
7207        let anchor_id = AnchorId(1);
7208        shard_v1.anchors.push(perl_semantic_facts::AnchorFact {
7209            id: anchor_id,
7210            file_id,
7211            span_start_byte: 0,
7212            span_end_byte: 5,
7213            scope_id: None,
7214            provenance: Provenance::ExactAst,
7215            confidence: Confidence::High,
7216        });
7217        shard_v1.occurrences.push(perl_semantic_facts::OccurrenceFact {
7218            id: OccurrenceId(1),
7219            kind: OccurrenceKind::Call,
7220            entity_id: Some(EntityId(100)),
7221            anchor_id,
7222            scope_id: None,
7223            provenance: Provenance::ExactAst,
7224            confidence: Confidence::High,
7225        });
7226        shard_v1.entities.push(perl_semantic_facts::EntityFact {
7227            id: EntityId(100),
7228            kind: EntityKind::Subroutine,
7229            canonical_name: "RefIdx::foo".to_string(),
7230            anchor_id: Some(anchor_id),
7231            scope_id: None,
7232            provenance: Provenance::ExactAst,
7233            confidence: Confidence::High,
7234        });
7235        index.replace_fact_shard_incremental(&key, shard_v1);
7236
7237        // Reference index should have entries.
7238        assert!(
7239            index.semantic_reference_index.read().name_count() > 0
7240                || index.semantic_reference_index.read().entity_count() > 0,
7241            "reference index should be populated after first insert"
7242        );
7243
7244        // v2: same content_hash → skip entirely, reference index untouched.
7245        let shard_v2_same = make_shard(uri, 1, Some(10), Some(20), Some(99), Some(99));
7246        let r = index.replace_fact_shard_incremental(&key, shard_v2_same);
7247        assert!(r.content_unchanged);
7248
7249        // v3: different content_hash, same occurrence/edge hashes → skip ref index.
7250        let mut shard_v3 = make_shard(uri, 3, Some(11), Some(21), Some(30), Some(40));
7251        shard_v3.anchors.push(perl_semantic_facts::AnchorFact {
7252            id: anchor_id,
7253            file_id,
7254            span_start_byte: 0,
7255            span_end_byte: 5,
7256            scope_id: None,
7257            provenance: Provenance::ExactAst,
7258            confidence: Confidence::High,
7259        });
7260        shard_v3.occurrences.push(perl_semantic_facts::OccurrenceFact {
7261            id: OccurrenceId(1),
7262            kind: OccurrenceKind::Call,
7263            entity_id: Some(EntityId(100)),
7264            anchor_id,
7265            scope_id: None,
7266            provenance: Provenance::ExactAst,
7267            confidence: Confidence::High,
7268        });
7269        shard_v3.entities.push(perl_semantic_facts::EntityFact {
7270            id: EntityId(100),
7271            kind: EntityKind::Subroutine,
7272            canonical_name: "RefIdx::foo".to_string(),
7273            anchor_id: Some(anchor_id),
7274            scope_id: None,
7275            provenance: Provenance::ExactAst,
7276            confidence: Confidence::High,
7277        });
7278        let r3 = index.replace_fact_shard_incremental(&key, shard_v3);
7279        assert!(!r3.occurrences_updated, "occurrence hash unchanged → skip");
7280        assert!(!r3.edges_updated, "edge hash unchanged → skip");
7281
7282        Ok(())
7283    }
7284
7285    /// Verify that `index_file` uses incremental replacement (the fact shard
7286    /// is stored and updated correctly through the full indexing path).
7287    #[test]
7288    fn index_file_stores_fact_shard_incrementally() -> Result<(), Box<dyn std::error::Error>> {
7289        let index = WorkspaceIndex::new();
7290        let uri = "file:///lib/Incr.pm";
7291        let code = "package Incr;\nsub foo { 1 }\n1;\n";
7292
7293        must(index.index_file(must(url::Url::parse(uri)), code.to_string()));
7294        let shard1 = must_some(index.file_fact_shard(uri));
7295        assert!(shard1.anchors_hash.is_some());
7296        assert!(
7297            shard1.anchors.iter().any(|anchor| anchor.provenance == Provenance::ExactAst),
7298            "index_file should store the canonical semantic shard when adapters produce facts"
7299        );
7300        assert!(
7301            shard1.entities.iter().any(|entity| entity.provenance == Provenance::ExactAst),
7302            "index_file should store canonical entities rather than legacy fallback entities"
7303        );
7304
7305        // Re-index with same content → shard should be unchanged.
7306        must(index.index_file(must(url::Url::parse(uri)), code.to_string()));
7307        // The early-exit in index_file checks content_hash at the FileIndex
7308        // level, so the fact shard replacement is never reached for identical
7309        // content. Verify the shard is still present.
7310        let shard2 = must_some(index.file_fact_shard(uri));
7311        assert_eq!(shard1.content_hash, shard2.content_hash);
7312
7313        // Re-index with different content → shard should be replaced.
7314        let code2 = "package Incr;\nsub bar { 2 }\n1;\n";
7315        must(index.index_file(must(url::Url::parse(uri)), code2.to_string()));
7316        let shard3 = must_some(index.file_fact_shard(uri));
7317        assert_ne!(shard1.content_hash, shard3.content_hash);
7318
7319        Ok(())
7320    }
7321
7322    #[test]
7323    fn semantic_anchor_wire_location_uses_lsp_utf16_columns()
7324    -> Result<(), Box<dyn std::error::Error>> {
7325        use crate::semantic::queries::SemanticQueries;
7326
7327        let index = WorkspaceIndex::new();
7328        let uri = "file:///lib/UnicodeAnchor.pm";
7329        let code = "package UnicodeAnchor; my $emoji = \"😀\"; sub target { 1 }\n1;\n";
7330
7331        must(index.index_file(must(url::Url::parse(uri)), code.to_string()));
7332
7333        let candidates = index
7334            .with_semantic_queries_for_uri(uri, |file_id, queries| {
7335                let ctx = crate::semantic::queries::QueryContext::new(file_id, None, Some(0));
7336                queries.definitions("UnicodeAnchor::target", &ctx)
7337            })
7338            .ok_or("missing semantic queries")?;
7339        let anchor_id = candidates
7340            .first()
7341            .map(|candidate| candidate.anchor_id)
7342            .ok_or("missing unicode definition candidate")?;
7343        let shard = index.file_fact_shard(uri).ok_or("missing fact shard")?;
7344        let anchor = shard
7345            .anchors
7346            .iter()
7347            .find(|anchor| anchor.id == anchor_id)
7348            .ok_or("missing unicode anchor")?;
7349        let start = usize::try_from(anchor.span_start_byte)?;
7350        let end = usize::try_from(anchor.span_end_byte)?;
7351        let expected = WireRange::from_byte_offsets(code, start, end);
7352
7353        let location =
7354            index.semantic_anchor_wire_location(anchor_id).ok_or("missing wire location")?;
7355
7356        assert_eq!(location.range, expected);
7357        let wire_column = usize::try_from(location.range.start.character)?;
7358        let scalar_column = code[..start].chars().count();
7359        assert!(
7360            wire_column > scalar_column,
7361            "fixture must prove the wire column counts UTF-16 units, not Unicode scalar values"
7362        );
7363
7364        Ok(())
7365    }
7366
7367    #[test]
7368    fn semantic_anchor_wire_location_fails_closed_for_duplicate_anchor_ids()
7369    -> Result<(), Box<dyn std::error::Error>> {
7370        use crate::semantic::queries::SemanticQueries;
7371
7372        let index = WorkspaceIndex::new();
7373        let code = "package DuplicateAnchor;\nsub target { 1 }\n1;\n";
7374
7375        must(
7376            index.index_file(must(url::Url::parse("file:///lib/DuplicateA.pm")), code.to_string()),
7377        );
7378        must(
7379            index.index_file(must(url::Url::parse("file:///lib/DuplicateB.pm")), code.to_string()),
7380        );
7381
7382        let candidates = index
7383            .with_semantic_queries_for_uri("file:///lib/DuplicateA.pm", |file_id, queries| {
7384                let ctx = crate::semantic::queries::QueryContext::new(file_id, None, Some(0));
7385                queries.definitions("DuplicateAnchor::target", &ctx)
7386            })
7387            .ok_or("missing semantic queries")?;
7388
7389        let anchor_id = candidates
7390            .first()
7391            .map(|candidate| candidate.anchor_id)
7392            .ok_or("missing duplicate definition candidate")?;
7393        assert!(
7394            candidates.iter().filter(|candidate| candidate.anchor_id == anchor_id).count() > 1,
7395            "fixture must produce duplicate anchor IDs to prove fail-closed behavior"
7396        );
7397        assert_eq!(
7398            index.semantic_anchor_wire_location(anchor_id),
7399            None,
7400            "duplicate source-backed anchors must not resolve to an arbitrary file"
7401        );
7402
7403        Ok(())
7404    }
7405
7406    #[test]
7407    fn semantic_anchor_wire_location_for_file_resolves_duplicate_anchor_ids_by_file()
7408    -> Result<(), Box<dyn std::error::Error>> {
7409        use crate::semantic::queries::SemanticQueries;
7410
7411        let index = WorkspaceIndex::new();
7412        let code = "package DuplicateAnchor;\nsub target { 1 }\n1;\n";
7413        let uri_a = "file:///lib/DuplicateA.pm";
7414        let uri_b = "file:///lib/DuplicateB.pm";
7415
7416        must(index.index_file(must(url::Url::parse(uri_a)), code.to_string()));
7417        must(index.index_file(must(url::Url::parse(uri_b)), code.to_string()));
7418
7419        let (file_id_a, anchor_id) = index
7420            .with_semantic_queries_for_uri(uri_a, |file_id, queries| {
7421                let ctx = crate::semantic::queries::QueryContext::new(file_id, None, Some(0));
7422                queries
7423                    .definitions("DuplicateAnchor::target", &ctx)
7424                    .first()
7425                    .map(|candidate| (file_id, candidate.anchor_id))
7426            })
7427            .flatten()
7428            .ok_or("missing duplicate definition candidate")?;
7429
7430        assert_eq!(
7431            index.semantic_anchor_wire_location(anchor_id),
7432            None,
7433            "global anchor lookup must still fail closed for duplicate anchor IDs"
7434        );
7435
7436        let location = index
7437            .semantic_anchor_wire_location_for_file(file_id_a, anchor_id)
7438            .ok_or("file-scoped anchor lookup should resolve duplicate anchor ID")?;
7439        assert_eq!(location.uri, uri_a);
7440
7441        Ok(())
7442    }
7443
7444    // ── Property-based tests for incremental invalidation ──
7445
7446    mod prop_incremental_invalidation {
7447        use super::*;
7448        use proptest::prelude::*;
7449        use proptest::test_runner::Config as ProptestConfig;
7450
7451        /// Strategy for an optional per-category hash.
7452        ///
7453        /// ~10% of the time produces `None` (simulating legacy shards
7454        /// without per-category hashes); otherwise a random `u64`.
7455        fn arb_category_hash() -> impl Strategy<Value = Option<u64>> {
7456            prop_oneof![
7457                1 => Just(None),
7458                9 => any::<u64>().prop_map(Some),
7459            ]
7460        }
7461
7462        /// Strategy for a `FileFactShard` with the given URI and
7463        /// randomly-chosen hashes.
7464        fn arb_shard(uri: &'static str) -> impl Strategy<Value = FileFactShard> {
7465            (
7466                any::<u64>(),        // content_hash
7467                arb_category_hash(), // anchors_hash
7468                arb_category_hash(), // entities_hash
7469                arb_category_hash(), // occurrences_hash
7470                arb_category_hash(), // edges_hash
7471            )
7472                .prop_map(move |(content_hash, ah, eh, oh, edh)| {
7473                    make_shard(uri, content_hash, ah, eh, oh, edh)
7474                })
7475        }
7476
7477        // Property 15: Incremental Invalidation Correctness
7478        //
7479        // **Validates: Requirements 18.3, 18.4, 18.5**
7480        //
7481        // For any file re-indexing where the whole-file content_hash is
7482        // unchanged, the workspace store shall not modify any cross-file
7483        // indexes.  For any file re-indexing where a per-category hash is
7484        // unchanged, the workspace store shall skip re-indexing that
7485        // category.  For any file re-indexing where a per-category hash
7486        // has changed, the workspace store shall remove old entries and
7487        // insert new ones for that category.
7488        proptest! {
7489            #![proptest_config(ProptestConfig {
7490                failure_persistence: None,
7491                ..ProptestConfig::default()
7492            })]
7493
7494            #[test]
7495            fn prop_incremental_invalidation_correctness(
7496                old_shard in arb_shard("file:///lib/Prop.pm"),
7497                new_shard in arb_shard("file:///lib/Prop.pm"),
7498            ) {
7499                let index = WorkspaceIndex::new();
7500                let key = DocumentStore::uri_key("file:///lib/Prop.pm");
7501
7502                // Seed the index with the old shard.
7503                index.replace_fact_shard_incremental(&key, old_shard.clone());
7504
7505                // Replace with the new shard and capture the result.
7506                let result = index.replace_fact_shard_incremental(&key, new_shard.clone());
7507
7508                // ── Req 18.5: content_hash unchanged → skip entirely ──
7509                if old_shard.content_hash == new_shard.content_hash {
7510                    prop_assert!(
7511                        result.content_unchanged,
7512                        "content_unchanged must be true when content_hash is the same"
7513                    );
7514                    prop_assert!(
7515                        !result.anchors_updated,
7516                        "anchors_updated must be false when content_hash unchanged"
7517                    );
7518                    prop_assert!(
7519                        !result.entities_updated,
7520                        "entities_updated must be false when content_hash unchanged"
7521                    );
7522                    prop_assert!(
7523                        !result.occurrences_updated,
7524                        "occurrences_updated must be false when content_hash unchanged"
7525                    );
7526                    prop_assert!(
7527                        !result.edges_updated,
7528                        "edges_updated must be false when content_hash unchanged"
7529                    );
7530                } else {
7531                    prop_assert!(
7532                        !result.content_unchanged,
7533                        "content_unchanged must be false when content_hash differs"
7534                    );
7535
7536                    // ── Req 18.3 / 18.4: per-category hash comparison ──
7537                    // A category is "unchanged" when both old and new have
7538                    // Some(h) and the values are equal.  Otherwise the
7539                    // category is conservatively treated as changed.
7540
7541                    let anchors_should_update = crate::semantic::invalidation::category_hash_changed(
7542                        old_shard.anchors_hash,
7543                        new_shard.anchors_hash,
7544                    );
7545                    prop_assert_eq!(
7546                        result.anchors_updated,
7547                        anchors_should_update,
7548                        "anchors_updated mismatch: old={:?} new={:?}",
7549                        old_shard.anchors_hash,
7550                        new_shard.anchors_hash,
7551                    );
7552
7553                    let entities_should_update =
7554                        crate::semantic::invalidation::category_hash_changed(
7555                            old_shard.entities_hash,
7556                            new_shard.entities_hash,
7557                        );
7558                    prop_assert_eq!(
7559                        result.entities_updated,
7560                        entities_should_update,
7561                        "entities_updated mismatch: old={:?} new={:?}",
7562                        old_shard.entities_hash,
7563                        new_shard.entities_hash,
7564                    );
7565
7566                    let occurrences_should_update =
7567                        crate::semantic::invalidation::category_hash_changed(
7568                            old_shard.occurrences_hash,
7569                            new_shard.occurrences_hash,
7570                        );
7571                    prop_assert_eq!(
7572                        result.occurrences_updated,
7573                        occurrences_should_update,
7574                        "occurrences_updated mismatch: old={:?} new={:?}",
7575                        old_shard.occurrences_hash,
7576                        new_shard.occurrences_hash,
7577                    );
7578
7579                    let edges_should_update = crate::semantic::invalidation::category_hash_changed(
7580                        old_shard.edges_hash,
7581                        new_shard.edges_hash,
7582                    );
7583                    prop_assert_eq!(
7584                        result.edges_updated,
7585                        edges_should_update,
7586                        "edges_updated mismatch: old={:?} new={:?}",
7587                        old_shard.edges_hash,
7588                        new_shard.edges_hash,
7589                    );
7590                }
7591            }
7592        }
7593    }
7594}
7595
7596// ── with_semantic_queries_for_uri tests ──
7597
7598#[cfg(test)]
7599mod semantic_query_callback_tests {
7600    use super::*;
7601    use perl_tdd_support::{must, must_some};
7602
7603    #[test]
7604    fn with_semantic_queries_for_uri_indexed_uri_invokes_callback()
7605    -> Result<(), Box<dyn std::error::Error>> {
7606        let index = WorkspaceIndex::new();
7607        let uri = "file:///lib/Foo.pm";
7608        must(index.index_file(must(url::Url::parse(uri)), "sub foo { 1 }".to_string()));
7609
7610        let result = index.with_semantic_queries_for_uri(uri, |file_id, _queries| {
7611            // Verify the file_id is consistent with the URI (non-zero hash).
7612            assert_ne!(file_id.0, 0, "file_id should be non-zero");
7613            42u32 // sentinel return value
7614        });
7615
7616        assert_eq!(result, Some(42u32), "callback must run when URI is indexed");
7617        Ok(())
7618    }
7619
7620    #[test]
7621    fn with_semantic_queries_for_uri_unknown_uri_returns_none()
7622    -> Result<(), Box<dyn std::error::Error>> {
7623        let index = WorkspaceIndex::new();
7624        // Do NOT index anything.
7625        let result = index.with_semantic_queries_for_uri("file:///not/indexed.pl", |_, _| 99u32);
7626        assert!(result.is_none(), "unindexed URI must return None without invoking callback");
7627        Ok(())
7628    }
7629
7630    #[test]
7631    fn with_semantic_queries_for_uri_file_id_matches_file_id_for_uri()
7632    -> Result<(), Box<dyn std::error::Error>> {
7633        let index = WorkspaceIndex::new();
7634        let uri = "file:///lib/Bar.pm";
7635        must(index.index_file(must(url::Url::parse(uri)), "sub bar { 1 }".to_string()));
7636
7637        let direct_id = must_some(index.file_id_for_uri(uri));
7638        let callback_id =
7639            must_some(index.with_semantic_queries_for_uri(uri, |file_id, _q| file_id));
7640
7641        assert_eq!(
7642            direct_id, callback_id,
7643            "file_id_for_uri and with_semantic_queries_for_uri must agree"
7644        );
7645        Ok(())
7646    }
7647
7648    #[test]
7649    fn with_semantic_queries_for_uri_callback_not_called_when_not_indexed()
7650    -> Result<(), Box<dyn std::error::Error>> {
7651        let index = WorkspaceIndex::new();
7652        let mut called = false;
7653        let _ = index.with_semantic_queries_for_uri("file:///ghost.pl", |_, _| {
7654            called = true;
7655        });
7656        assert!(!called, "callback must not be invoked for unindexed URI");
7657        Ok(())
7658    }
7659}
perl_workspace/workspace/workspace_index.rs

perl_workspace/workspace/
workspace_index.rs