perl_workspace/workspace/
workspace_index.rs

1//! Workspace-wide symbol index for fast cross-file lookups in Perl LSP.
2//!
3//! This module provides efficient indexing of symbols across an entire Perl workspace,
4//! enabling enterprise-grade features like find-references, rename refactoring, and
5//! workspace symbol search with ≤1ms response times.
6//!
7//! # LSP Workflow Integration
8//!
9//! Core component in the Parse → Index → Navigate → Complete → Analyze pipeline:
10//! 1. **Parse**: AST generation from Perl source files
11//! 2. **Index**: Workspace symbol table construction with dual indexing strategy
12//! 3. **Navigate**: Cross-file symbol resolution and go-to-definition
13//! 4. **Complete**: Context-aware completion with workspace symbol awareness
14//! 5. **Analyze**: Cross-reference analysis and workspace refactoring operations
15//!
16//! # Performance Characteristics
17//!
18//! - **Symbol indexing**: O(n) where n is total workspace symbols
19//! - **Symbol lookup**: O(1) average with hash table indexing
20//! - **Cross-file queries**: <50μs for typical workspace sizes
21//! - **Memory usage**: ~1MB per 10K symbols with optimized storage
22//! - **Incremental updates**: ≤1ms for file-level symbol changes
23//! - **Large workspace scaling**: Designed to scale to 50K+ files and large codebases
24//! - **Benchmark targets**: <50μs lookups and ≤1ms incremental updates at scale
25//!
26//! # Dual Indexing Strategy
27//!
28//! Implements dual indexing for comprehensive Perl symbol resolution:
29//! - **Qualified names**: `Package::function` for explicit references
30//! - **Bare names**: `function` for context-dependent resolution
31//! - **98% reference coverage**: Handles both qualified and unqualified calls
32//! - **Automatic deduplication**: Prevents duplicate results in queries
33//!
34//! # Usage Examples
35//!
36//! ```rust
37//! use perl_workspace::workspace::workspace_index::WorkspaceIndex;
38//! use url::Url;
39//!
40//! # fn main() -> Result<(), Box<dyn std::error::Error>> {
41//! let index = WorkspaceIndex::new();
42//!
43//! // Index a Perl file
44//! let uri = Url::parse("file:///example.pl")?;
45//! let code = "package MyPackage;\nsub example { return 42; }";
46//! index.index_file(uri, code.to_string())?;
47//!
48//! // Find symbol definitions
49//! let definition = index.find_definition("MyPackage::example");
50//! assert!(definition.is_some());
51//!
52//! // Workspace symbol search
53//! let symbols = index.find_symbols("example");
54//! assert!(!symbols.is_empty());
55//! # Ok(())
56//! # }
57//! ```
58//!
59//! # Related Modules
60//!
61//! See also the symbol extraction, reference finding, and semantic token classification
62//! modules in the workspace index implementation.
63
64use crate::Parser;
65use crate::ast::{Node, NodeKind};
66use crate::document_store::{Document, DocumentStore};
67use crate::position::{Position, Range};
68use crate::workspace::monitoring::IndexInstrumentation;
69use parking_lot::RwLock;
70use perl_position_tracking::{WireLocation, WirePosition, WireRange};
71use perl_semantic_facts::{
72    AnchorFact, AnchorId, Confidence, EdgeFact, EntityFact, EntityId, EntityKind, FileId,
73    Provenance,
74};
75use serde::{Deserialize, Serialize};
76use std::collections::hash_map::DefaultHasher;
77use std::collections::{HashMap, HashSet};
78use std::hash::{Hash, Hasher};
79use std::path::Path;
80use std::sync::Arc;
81use std::time::Instant;
82use url::Url;
83
84use crate::semantic::imports::ImportExportIndex;
85pub use crate::semantic::invalidation::ShardReplaceResult;
86use crate::semantic::invalidation::{ShardCategoryHashes, plan_shard_replacement};
87use crate::semantic::references::ReferenceIndex;
88pub use crate::workspace::monitoring::{
89    DegradationReason, EarlyExitReason, EarlyExitRecord, IndexInstrumentationSnapshot,
90    IndexMetrics, IndexPerformanceCaps, IndexPhase, IndexPhaseTransition, IndexResourceLimits,
91    IndexStateKind, IndexStateTransition, ResourceKind,
92};
93use perl_symbol::surface::decl::extract_symbol_decls;
94use perl_symbol::surface::facts::{symbol_decls_to_semantic_facts, symbol_refs_to_semantic_facts};
95use perl_symbol::surface::r#ref::extract_symbol_refs;
96
97// Re-export URI utilities for backward compatibility
98#[cfg(not(target_arch = "wasm32"))]
99/// URI ↔ filesystem helpers used during Index/Analyze workflows.
100pub use perl_uri::{fs_path_to_uri, uri_to_fs_path};
101/// URI inspection helpers used during Index/Analyze workflows.
102pub use perl_uri::{is_file_uri, is_special_scheme, uri_extension, uri_key};
103
104// ============================================================================
105// Index Lifecycle Types (Index Lifecycle v1 Specification)
106// ============================================================================
107
108/// Index readiness state - explicit lifecycle management
109///
110/// Represents the current operational state of the workspace index, enabling
111/// LSP handlers to provide appropriate responses based on index availability.
112/// This state machine prevents blocking operations and ensures graceful
113/// degradation when the index is not fully ready.
114///
115/// # State Transitions
116///
117/// - `Building` → `Ready`: Workspace scan completes successfully
118/// - `Building` → `Degraded`: Scan timeout, IO error, or resource limit
119/// - `Ready` → `Building`: Workspace folder change or file watching events
120/// - `Ready` → `Degraded`: Parse storm (>10 pending) or IO error
121/// - `Degraded` → `Building`: Recovery attempt after cooldown
122/// - `Degraded` → `Ready`: Successful re-scan after recovery
123///
124/// # Invariants
125///
126/// - During a single build attempt, `phase` advances monotonically
127///   (`Idle` → `Scanning` → `Indexing`).
128/// - `indexed_count` must not exceed `total_count`; callers should keep totals updated.
129/// - `Ready` and `Degraded` counts are snapshots captured at transition time.
130///
131/// # Usage
132///
133/// ```rust,ignore
134/// use perl_parser::workspace_index::{IndexPhase, IndexState};
135/// use std::time::Instant;
136///
137/// let state = IndexState::Building {
138///     phase: IndexPhase::Indexing,
139///     indexed_count: 50,
140///     total_count: 100,
141///     started_at: Instant::now(),
142/// };
143/// ```
144#[derive(Clone, Debug)]
145pub enum IndexState {
146    /// Index is being constructed (workspace scan in progress)
147    Building {
148        /// Current build phase (Idle → Scanning → Indexing)
149        phase: IndexPhase,
150        /// Files indexed so far
151        indexed_count: usize,
152        /// Total files discovered
153        total_count: usize,
154        /// Started at
155        started_at: Instant,
156    },
157
158    /// Index is consistent and ready for queries
159    Ready {
160        /// Total symbols indexed
161        symbol_count: usize,
162        /// Total files indexed
163        file_count: usize,
164        /// Timestamp of last successful index
165        completed_at: Instant,
166    },
167
168    /// Index is serving but degraded
169    Degraded {
170        /// Why we degraded
171        reason: DegradationReason,
172        /// What's still available
173        available_symbols: usize,
174        /// When degradation occurred
175        since: Instant,
176    },
177}
178
179impl IndexState {
180    /// Return the coarse state kind for instrumentation and routing decisions
181    pub fn kind(&self) -> IndexStateKind {
182        match self {
183            IndexState::Building { .. } => IndexStateKind::Building,
184            IndexState::Ready { .. } => IndexStateKind::Ready,
185            IndexState::Degraded { .. } => IndexStateKind::Degraded,
186        }
187    }
188
189    /// Return the current build phase when in `Building` state
190    pub fn phase(&self) -> Option<IndexPhase> {
191        match self {
192            IndexState::Building { phase, .. } => Some(*phase),
193            _ => None,
194        }
195    }
196
197    /// Timestamp of when the current state began
198    pub fn state_started_at(&self) -> Instant {
199        match self {
200            IndexState::Building { started_at, .. } => *started_at,
201            IndexState::Ready { completed_at, .. } => *completed_at,
202            IndexState::Degraded { since, .. } => *since,
203        }
204    }
205}
206
207/// Coordinates index lifecycle, state transitions, and handler queries
208///
209/// The IndexCoordinator wraps `WorkspaceIndex` with explicit state management,
210/// enabling LSP handlers to query the index readiness and implement appropriate
211/// fallback behavior when the index is not fully ready.
212///
213/// # Architecture
214///
215/// ```text
216/// LspServer
217///   └── IndexCoordinator
218///         ├── state: Arc<RwLock<IndexState>>
219///         ├── index: Arc<WorkspaceIndex>
220///         ├── limits: IndexResourceLimits
221///         ├── caps: IndexPerformanceCaps
222///         ├── metrics: IndexMetrics
223///         └── instrumentation: IndexInstrumentation
224/// ```
225///
226/// # State Management
227///
228/// The coordinator manages three states:
229/// - `Building`: Initial scan or recovery in progress
230/// - `Ready`: Fully indexed and available for queries
231/// - `Degraded`: Available but with reduced functionality
232///
233/// # Performance Characteristics
234///
235/// - State checks are lock-free reads (cloned state, <100ns)
236/// - State transitions use write locks (rare, <1μs)
237/// - Query dispatch has zero overhead in Ready state
238/// - Degradation detection is atomic (<10ns per check)
239///
240/// # Usage
241///
242/// ```rust,ignore
243/// use perl_parser::workspace_index::{IndexCoordinator, IndexState};
244///
245/// let coordinator = IndexCoordinator::new();
246/// assert!(matches!(coordinator.state(), IndexState::Building { .. }));
247///
248/// // Transition to ready after indexing
249/// coordinator.transition_to_ready(100, 5000);
250/// assert!(matches!(coordinator.state(), IndexState::Ready { .. }));
251///
252/// // Query with degradation handling
253/// let _result = coordinator.query(
254///     |index| index.find_definition("my_function"), // full query
255///     |_index| None                                 // partial fallback
256/// );
257/// ```
258pub struct IndexCoordinator {
259    /// Current index state (RwLock for state transitions)
260    state: Arc<RwLock<IndexState>>,
261
262    /// The actual workspace index
263    index: Arc<WorkspaceIndex>,
264
265    /// Resource limits configuration
266    ///
267    /// Enforces bounded resource usage to prevent unbounded memory growth:
268    /// - max_files: Triggers degradation when file count exceeds limit
269    /// - max_total_symbols: Triggers degradation when symbol count exceeds limit
270    /// - max_symbols_per_file: Used for per-file validation during indexing
271    limits: IndexResourceLimits,
272
273    /// Performance caps for early-exit heuristics
274    caps: IndexPerformanceCaps,
275
276    /// Runtime metrics for degradation detection
277    metrics: IndexMetrics,
278
279    /// Instrumentation for lifecycle transitions and durations
280    instrumentation: IndexInstrumentation,
281}
282
283impl std::fmt::Debug for IndexCoordinator {
284    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
285        f.debug_struct("IndexCoordinator")
286            .field("state", &*self.state.read())
287            .field("limits", &self.limits)
288            .field("caps", &self.caps)
289            .finish_non_exhaustive()
290    }
291}
292
293impl IndexCoordinator {
294    /// Create a new coordinator in Building state
295    ///
296    /// Initializes the coordinator with default resource limits and
297    /// an empty workspace index ready for initial scan.
298    ///
299    /// # Returns
300    ///
301    /// A coordinator initialized in `IndexState::Building`.
302    ///
303    /// # Examples
304    ///
305    /// ```rust,ignore
306    /// use perl_parser::workspace_index::IndexCoordinator;
307    ///
308    /// let coordinator = IndexCoordinator::new();
309    /// ```
310    pub fn new() -> Self {
311        Self {
312            state: Arc::new(RwLock::new(IndexState::Building {
313                phase: IndexPhase::Idle,
314                indexed_count: 0,
315                total_count: 0,
316                started_at: Instant::now(),
317            })),
318            index: Arc::new(WorkspaceIndex::new()),
319            limits: IndexResourceLimits::default(),
320            caps: IndexPerformanceCaps::default(),
321            metrics: IndexMetrics::new(),
322            instrumentation: IndexInstrumentation::new(),
323        }
324    }
325
326    /// Create a coordinator with custom resource limits
327    ///
328    /// # Arguments
329    ///
330    /// * `limits` - Custom resource limits for this workspace
331    ///
332    /// # Returns
333    ///
334    /// A coordinator configured with the provided resource limits.
335    ///
336    /// # Examples
337    ///
338    /// ```rust,ignore
339    /// use perl_parser::workspace_index::{IndexCoordinator, IndexResourceLimits};
340    ///
341    /// let limits = IndexResourceLimits::default();
342    /// let coordinator = IndexCoordinator::with_limits(limits);
343    /// ```
344    pub fn with_limits(limits: IndexResourceLimits) -> Self {
345        Self {
346            state: Arc::new(RwLock::new(IndexState::Building {
347                phase: IndexPhase::Idle,
348                indexed_count: 0,
349                total_count: 0,
350                started_at: Instant::now(),
351            })),
352            index: Arc::new(WorkspaceIndex::new()),
353            limits,
354            caps: IndexPerformanceCaps::default(),
355            metrics: IndexMetrics::new(),
356            instrumentation: IndexInstrumentation::new(),
357        }
358    }
359
360    /// Create a coordinator with custom limits and performance caps
361    ///
362    /// # Arguments
363    ///
364    /// * `limits` - Resource limits for this workspace
365    /// * `caps` - Performance caps for indexing budgets
366    pub fn with_limits_and_caps(limits: IndexResourceLimits, caps: IndexPerformanceCaps) -> Self {
367        Self {
368            state: Arc::new(RwLock::new(IndexState::Building {
369                phase: IndexPhase::Idle,
370                indexed_count: 0,
371                total_count: 0,
372                started_at: Instant::now(),
373            })),
374            index: Arc::new(WorkspaceIndex::new()),
375            limits,
376            caps,
377            metrics: IndexMetrics::new(),
378            instrumentation: IndexInstrumentation::new(),
379        }
380    }
381
382    /// Get current state (lock-free read via clone)
383    ///
384    /// Returns a cloned copy of the current state for lock-free access
385    /// in hot path LSP handlers.
386    ///
387    /// # Returns
388    ///
389    /// The current `IndexState` snapshot.
390    ///
391    /// # Examples
392    ///
393    /// ```rust,ignore
394    /// use perl_parser::workspace_index::{IndexCoordinator, IndexState};
395    ///
396    /// let coordinator = IndexCoordinator::new();
397    /// match coordinator.state() {
398    ///     IndexState::Ready { .. } => {
399    ///         // Full query path
400    ///     }
401    ///     _ => {
402    ///         // Degraded/building fallback
403    ///     }
404    /// }
405    /// ```
406    pub fn state(&self) -> IndexState {
407        self.state.read().clone()
408    }
409
410    /// Get reference to the underlying workspace index
411    ///
412    /// Provides direct access to the `WorkspaceIndex` for operations
413    /// that don't require state checking (e.g., document store access).
414    ///
415    /// # Returns
416    ///
417    /// A shared reference to the underlying workspace index.
418    ///
419    /// # Examples
420    ///
421    /// ```rust,ignore
422    /// use perl_parser::workspace_index::IndexCoordinator;
423    ///
424    /// let coordinator = IndexCoordinator::new();
425    /// let _index = coordinator.index();
426    /// ```
427    pub fn index(&self) -> &Arc<WorkspaceIndex> {
428        &self.index
429    }
430
431    /// Access the configured resource limits
432    pub fn limits(&self) -> &IndexResourceLimits {
433        &self.limits
434    }
435
436    /// Access the configured performance caps
437    pub fn performance_caps(&self) -> &IndexPerformanceCaps {
438        &self.caps
439    }
440
441    /// Snapshot lifecycle instrumentation (durations, transitions, early exits)
442    pub fn instrumentation_snapshot(&self) -> IndexInstrumentationSnapshot {
443        self.instrumentation.snapshot()
444    }
445
446    /// Notify of file change (may trigger state transition)
447    ///
448    /// Increments the pending parse count and may transition to degraded
449    /// state if a parse storm is detected.
450    ///
451    /// # Arguments
452    ///
453    /// * `_uri` - URI of the changed file (reserved for future use).
454    ///
455    /// # Returns
456    ///
457    /// Nothing. Updates coordinator metrics and state for the LSP workflow.
458    ///
459    /// # Examples
460    ///
461    /// ```rust,ignore
462    /// use perl_parser::workspace_index::IndexCoordinator;
463    ///
464    /// let coordinator = IndexCoordinator::new();
465    /// coordinator.notify_change("file:///example.pl");
466    /// ```
467    pub fn notify_change(&self, _uri: &str) {
468        let pending = self.metrics.increment_pending_parses();
469
470        // Check for parse storm
471        if self.metrics.is_parse_storm() {
472            self.transition_to_degraded(DegradationReason::ParseStorm { pending_parses: pending });
473        }
474    }
475
476    /// Notify parse completion for the Index/Analyze workflow stages.
477    ///
478    /// Decrements the pending parse count, enforces resource limits, and may
479    /// attempt recovery when parse storms clear.
480    ///
481    /// # Arguments
482    ///
483    /// * `_uri` - URI of the parsed file (reserved for future use).
484    ///
485    /// # Returns
486    ///
487    /// Nothing. Updates coordinator metrics and state for the LSP workflow.
488    ///
489    /// # Examples
490    ///
491    /// ```rust,ignore
492    /// use perl_parser::workspace_index::IndexCoordinator;
493    ///
494    /// let coordinator = IndexCoordinator::new();
495    /// coordinator.notify_parse_complete("file:///example.pl");
496    /// ```
497    pub fn notify_parse_complete(&self, _uri: &str) {
498        let pending = self.metrics.decrement_pending_parses();
499
500        // Check for recovery from parse storm
501        if pending == 0 {
502            if let IndexState::Degraded { reason: DegradationReason::ParseStorm { .. }, .. } =
503                self.state()
504            {
505                // Attempt recovery - transition back to Building for re-scan
506                let mut state = self.state.write();
507                let from_kind = state.kind();
508                self.instrumentation.record_state_transition(from_kind, IndexStateKind::Building);
509                *state = IndexState::Building {
510                    phase: IndexPhase::Idle,
511                    indexed_count: 0,
512                    total_count: 0,
513                    started_at: Instant::now(),
514                };
515            }
516        }
517
518        // Enforce resource limits after parse completion
519        self.enforce_limits();
520    }
521
522    /// Transition to Ready state
523    ///
524    /// Marks the index as fully ready for queries after successful workspace
525    /// scan. Records the file count, symbol count, and completion timestamp.
526    /// Enforces resource limits after transition.
527    ///
528    /// # State Transition Guards
529    ///
530    /// Only valid transitions:
531    /// - `Building` → `Ready` (normal completion)
532    /// - `Degraded` → `Ready` (recovery after fix)
533    ///
534    /// # Arguments
535    ///
536    /// * `file_count` - Total number of files indexed
537    /// * `symbol_count` - Total number of symbols extracted
538    ///
539    /// # Returns
540    ///
541    /// Nothing. The coordinator state is updated in-place.
542    ///
543    /// # Examples
544    ///
545    /// ```rust,ignore
546    /// use perl_parser::workspace_index::IndexCoordinator;
547    ///
548    /// let coordinator = IndexCoordinator::new();
549    /// coordinator.transition_to_ready(100, 5000);
550    /// ```
551    pub fn transition_to_ready(&self, file_count: usize, symbol_count: usize) {
552        let mut state = self.state.write();
553        let from_kind = state.kind();
554
555        // State transition guard: validate current state allows transition to Ready
556        match &*state {
557            IndexState::Building { .. } | IndexState::Degraded { .. } => {
558                // Valid transition - proceed
559                *state =
560                    IndexState::Ready { symbol_count, file_count, completed_at: Instant::now() };
561            }
562            IndexState::Ready { .. } => {
563                // Already Ready - update metrics but don't log as transition
564                *state =
565                    IndexState::Ready { symbol_count, file_count, completed_at: Instant::now() };
566            }
567        }
568        self.instrumentation.record_state_transition(from_kind, IndexStateKind::Ready);
569        drop(state); // Release write lock before checking limits
570
571        // Enforce resource limits after transition
572        self.enforce_limits();
573    }
574
575    /// Transition to Scanning phase (Idle → Scanning)
576    ///
577    /// Resets build counters and marks the index as scanning workspace folders.
578    pub fn transition_to_scanning(&self) {
579        let mut state = self.state.write();
580        let from_kind = state.kind();
581
582        match &*state {
583            IndexState::Building { phase, indexed_count, total_count, started_at } => {
584                if *phase != IndexPhase::Scanning {
585                    self.instrumentation.record_phase_transition(*phase, IndexPhase::Scanning);
586                }
587                *state = IndexState::Building {
588                    phase: IndexPhase::Scanning,
589                    indexed_count: *indexed_count,
590                    total_count: *total_count,
591                    started_at: *started_at,
592                };
593            }
594            IndexState::Ready { .. } | IndexState::Degraded { .. } => {
595                self.instrumentation.record_state_transition(from_kind, IndexStateKind::Building);
596                self.instrumentation
597                    .record_phase_transition(IndexPhase::Idle, IndexPhase::Scanning);
598                *state = IndexState::Building {
599                    phase: IndexPhase::Scanning,
600                    indexed_count: 0,
601                    total_count: 0,
602                    started_at: Instant::now(),
603                };
604            }
605        }
606    }
607
608    /// Update scanning progress with the latest discovered file count
609    pub fn update_scan_progress(&self, total_count: usize) {
610        let mut state = self.state.write();
611        if let IndexState::Building { phase, indexed_count, started_at, .. } = &*state {
612            if *phase != IndexPhase::Scanning {
613                self.instrumentation.record_phase_transition(*phase, IndexPhase::Scanning);
614            }
615            *state = IndexState::Building {
616                phase: IndexPhase::Scanning,
617                indexed_count: *indexed_count,
618                total_count,
619                started_at: *started_at,
620            };
621        }
622    }
623
624    /// Transition to Indexing phase (Scanning → Indexing)
625    ///
626    /// Uses the discovered file count as the total index target.
627    pub fn transition_to_indexing(&self, total_count: usize) {
628        let mut state = self.state.write();
629        let from_kind = state.kind();
630
631        match &*state {
632            IndexState::Building { phase, indexed_count, started_at, .. } => {
633                if *phase != IndexPhase::Indexing {
634                    self.instrumentation.record_phase_transition(*phase, IndexPhase::Indexing);
635                }
636                *state = IndexState::Building {
637                    phase: IndexPhase::Indexing,
638                    indexed_count: *indexed_count,
639                    total_count,
640                    started_at: *started_at,
641                };
642            }
643            IndexState::Ready { .. } | IndexState::Degraded { .. } => {
644                self.instrumentation.record_state_transition(from_kind, IndexStateKind::Building);
645                self.instrumentation
646                    .record_phase_transition(IndexPhase::Idle, IndexPhase::Indexing);
647                *state = IndexState::Building {
648                    phase: IndexPhase::Indexing,
649                    indexed_count: 0,
650                    total_count,
651                    started_at: Instant::now(),
652                };
653            }
654        }
655    }
656
657    /// Transition to Building state (Indexing phase)
658    ///
659    /// Marks the index as indexing with a known total file count.
660    pub fn transition_to_building(&self, total_count: usize) {
661        let mut state = self.state.write();
662        let from_kind = state.kind();
663
664        // State transition guard: validate transition is allowed
665        match &*state {
666            IndexState::Degraded { .. } | IndexState::Ready { .. } => {
667                self.instrumentation.record_state_transition(from_kind, IndexStateKind::Building);
668                self.instrumentation
669                    .record_phase_transition(IndexPhase::Idle, IndexPhase::Indexing);
670                *state = IndexState::Building {
671                    phase: IndexPhase::Indexing,
672                    indexed_count: 0,
673                    total_count,
674                    started_at: Instant::now(),
675                };
676            }
677            IndexState::Building { phase, indexed_count, started_at, .. } => {
678                let mut next_phase = *phase;
679                if *phase == IndexPhase::Idle {
680                    self.instrumentation
681                        .record_phase_transition(IndexPhase::Idle, IndexPhase::Indexing);
682                    next_phase = IndexPhase::Indexing;
683                }
684                *state = IndexState::Building {
685                    phase: next_phase,
686                    indexed_count: *indexed_count,
687                    total_count,
688                    started_at: *started_at,
689                };
690            }
691        }
692    }
693
694    /// Update Building state progress for the Index/Analyze workflow stages.
695    ///
696    /// Increments the indexed file count and checks for scan timeouts.
697    ///
698    /// # Arguments
699    ///
700    /// * `indexed_count` - Number of files indexed so far.
701    ///
702    /// # Returns
703    ///
704    /// Nothing. Updates coordinator state and may transition to `Degraded`.
705    ///
706    /// # Examples
707    ///
708    /// ```rust,ignore
709    /// use perl_parser::workspace_index::IndexCoordinator;
710    ///
711    /// let coordinator = IndexCoordinator::new();
712    /// coordinator.transition_to_building(100);
713    /// coordinator.update_building_progress(1);
714    /// ```
715    pub fn update_building_progress(&self, indexed_count: usize) {
716        let mut state = self.state.write();
717
718        if let IndexState::Building { phase, started_at, total_count, .. } = &*state {
719            let elapsed = started_at.elapsed().as_millis() as u64;
720
721            // Check for scan timeout
722            if elapsed > self.limits.max_scan_duration_ms {
723                // Timeout exceeded - transition to degraded
724                drop(state);
725                self.transition_to_degraded(DegradationReason::ScanTimeout { elapsed_ms: elapsed });
726                return;
727            }
728
729            // Update progress
730            *state = IndexState::Building {
731                phase: *phase,
732                indexed_count,
733                total_count: *total_count,
734                started_at: *started_at,
735            };
736        }
737    }
738
739    /// Transition to Degraded state
740    ///
741    /// Marks the index as degraded with the specified reason. Preserves
742    /// the current symbol count (if available) to indicate partial
743    /// functionality remains.
744    ///
745    /// # Arguments
746    ///
747    /// * `reason` - Why the index degraded (ParseStorm, IoError, etc.)
748    ///
749    /// # Returns
750    ///
751    /// Nothing. The coordinator state is updated in-place.
752    ///
753    /// # Examples
754    ///
755    /// ```rust,ignore
756    /// use perl_parser::workspace_index::{DegradationReason, IndexCoordinator, ResourceKind};
757    ///
758    /// let coordinator = IndexCoordinator::new();
759    /// coordinator.transition_to_degraded(DegradationReason::ResourceLimit {
760    ///     kind: ResourceKind::MaxFiles,
761    /// });
762    /// ```
763    pub fn transition_to_degraded(&self, reason: DegradationReason) {
764        let mut state = self.state.write();
765        let from_kind = state.kind();
766
767        // Get available symbols count from current state
768        let available_symbols = match &*state {
769            IndexState::Ready { symbol_count, .. } => *symbol_count,
770            IndexState::Degraded { available_symbols, .. } => *available_symbols,
771            IndexState::Building { .. } => 0,
772        };
773
774        self.instrumentation.record_state_transition(from_kind, IndexStateKind::Degraded);
775        *state = IndexState::Degraded { reason, available_symbols, since: Instant::now() };
776    }
777
778    /// Check resource limits and return degradation reason if exceeded
779    ///
780    /// Examines current workspace index state against configured resource limits.
781    /// Returns the first exceeded limit found, enabling targeted degradation.
782    ///
783    /// # Returns
784    ///
785    /// * `Some(DegradationReason)` - Resource limit exceeded, contains specific limit type
786    /// * `None` - All limits within acceptable bounds
787    ///
788    /// # Checked Limits
789    ///
790    /// - `max_files`: Total number of indexed files
791    /// - `max_total_symbols`: Aggregate symbol count across workspace
792    ///
793    /// # Performance
794    ///
795    /// - Lock-free read of index state (<100ns)
796    /// - Symbol counting is O(n) where n is number of files
797    ///
798    /// Returns: `Some(DegradationReason)` when a limit is exceeded, otherwise `None`.
799    ///
800    /// # Examples
801    ///
802    /// ```rust,ignore
803    /// use perl_parser::workspace_index::IndexCoordinator;
804    ///
805    /// let coordinator = IndexCoordinator::new();
806    /// let _reason = coordinator.check_limits();
807    /// ```
808    pub fn check_limits(&self) -> Option<DegradationReason> {
809        let files = self.index.files.read();
810
811        // Check max_files limit
812        let file_count = files.len();
813        if file_count > self.limits.max_files {
814            return Some(DegradationReason::ResourceLimit { kind: ResourceKind::MaxFiles });
815        }
816
817        // Check max_total_symbols limit
818        let total_symbols: usize = files.values().map(|fi| fi.symbols.len()).sum();
819        if total_symbols > self.limits.max_total_symbols {
820            return Some(DegradationReason::ResourceLimit { kind: ResourceKind::MaxSymbols });
821        }
822
823        None
824    }
825
826    /// Enforce resource limits and trigger degradation if exceeded
827    ///
828    /// Checks current resource usage against configured limits and automatically
829    /// transitions to Degraded state if any limit is exceeded. This method should
830    /// be called after operations that modify index size (file additions, parse
831    /// completions, etc.).
832    ///
833    /// # State Transitions
834    ///
835    /// - `Ready` → `Degraded(ResourceLimit)` if limits exceeded
836    /// - `Building` → `Degraded(ResourceLimit)` if limits exceeded
837    ///
838    /// # Returns
839    ///
840    /// Nothing. The coordinator state is updated in-place when limits are exceeded.
841    ///
842    /// # Examples
843    ///
844    /// ```rust,ignore
845    /// use perl_parser::workspace_index::IndexCoordinator;
846    ///
847    /// let coordinator = IndexCoordinator::new();
848    /// // ... index some files ...
849    /// coordinator.enforce_limits();  // Check and degrade if needed
850    /// ```
851    pub fn enforce_limits(&self) {
852        if let Some(reason) = self.check_limits() {
853            self.transition_to_degraded(reason);
854        }
855    }
856
857    /// Record an early-exit event for indexing instrumentation
858    pub fn record_early_exit(
859        &self,
860        reason: EarlyExitReason,
861        elapsed_ms: u64,
862        indexed_files: usize,
863        total_files: usize,
864    ) {
865        self.instrumentation.record_early_exit(EarlyExitRecord {
866            reason,
867            elapsed_ms,
868            indexed_files,
869            total_files,
870        });
871    }
872
873    /// Query with automatic degradation handling
874    ///
875    /// Dispatches to full query if index is Ready, or partial query otherwise.
876    /// This pattern enables LSP handlers to provide appropriate responses
877    /// based on index state without explicit state checking.
878    ///
879    /// # Type Parameters
880    ///
881    /// * `T` - Return type of the query functions
882    /// * `F1` - Full query function type accepting `&WorkspaceIndex` and returning `T`
883    /// * `F2` - Partial query function type accepting `&WorkspaceIndex` and returning `T`
884    ///
885    /// # Arguments
886    ///
887    /// * `full_query` - Function to execute when index is Ready
888    /// * `partial_query` - Function to execute when index is Building/Degraded
889    ///
890    /// # Returns
891    ///
892    /// The value returned by the selected query function.
893    ///
894    /// # Examples
895    ///
896    /// ```rust,ignore
897    /// use perl_parser::workspace_index::IndexCoordinator;
898    ///
899    /// let coordinator = IndexCoordinator::new();
900    /// let locations = coordinator.query(
901    ///     |index| index.find_references("my_function"),  // Full workspace search
902    ///     |index| vec![]                                 // Empty fallback
903    /// );
904    /// ```
905    pub fn query<T, F1, F2>(&self, full_query: F1, partial_query: F2) -> T
906    where
907        F1: FnOnce(&WorkspaceIndex) -> T,
908        F2: FnOnce(&WorkspaceIndex) -> T,
909    {
910        match self.state() {
911            IndexState::Ready { .. } => full_query(&self.index),
912            _ => partial_query(&self.index),
913        }
914    }
915}
916
917impl Default for IndexCoordinator {
918    fn default() -> Self {
919        Self::new()
920    }
921}
922
923// ============================================================================
924// Symbol Indexing Types
925// ============================================================================
926
927#[derive(Clone, Copy, Debug, Eq, PartialEq, Hash)]
928/// Symbol kinds for cross-file indexing during Index/Navigate workflows.
929pub enum SymKind {
930    /// Variable symbol ($, @, or % sigil)
931    Var,
932    /// Subroutine definition (sub foo)
933    Sub,
934    /// Package declaration (package Foo)
935    Pack,
936}
937
938#[derive(Clone, Debug, Eq, PartialEq, Hash)]
939/// A normalized symbol key for cross-file lookups in Index/Navigate workflows.
940pub struct SymbolKey {
941    /// Package name containing this symbol
942    pub pkg: Arc<str>,
943    /// Bare name without sigil prefix
944    pub name: Arc<str>,
945    /// Variable sigil ($, @, or %) if applicable
946    pub sigil: Option<char>,
947    /// Kind of symbol (variable, subroutine, package)
948    pub kind: SymKind,
949}
950
951/// Normalize a Perl variable name for Index/Analyze workflows.
952///
953/// Extracts an optional sigil and bare name for consistent symbol indexing.
954///
955/// # Arguments
956///
957/// * `name` - Variable name from Perl source, with or without sigil.
958///
959/// # Returns
960///
961/// `(sigil, name)` tuple with the optional sigil and normalized identifier.
962///
963/// # Examples
964///
965/// ```rust,ignore
966/// use perl_parser::workspace_index::normalize_var;
967///
968/// assert_eq!(normalize_var("$count"), (Some('$'), "count"));
969/// assert_eq!(normalize_var("process_emails"), (None, "process_emails"));
970/// ```
971pub fn normalize_var(name: &str) -> (Option<char>, &str) {
972    if name.is_empty() {
973        return (None, "");
974    }
975
976    // Safe: we've checked that name is not empty
977    let Some(first_char) = name.chars().next() else {
978        return (None, name); // Should never happen but handle gracefully
979    };
980    match first_char {
981        '$' | '@' | '%' => {
982            if name.len() > 1 {
983                (Some(first_char), &name[1..])
984            } else {
985                (Some(first_char), "")
986            }
987        }
988        _ => (None, name),
989    }
990}
991
992// Using lsp_types for Position and Range
993
994#[derive(Debug, Clone, PartialEq, Eq)]
995/// Internal location type used during Navigate/Analyze workflows.
996pub struct Location {
997    /// File URI where the symbol is located
998    pub uri: String,
999    /// Line and character range within the file
1000    pub range: Range,
1001}
1002
1003#[derive(Debug, Clone, PartialEq, Eq)]
1004/// Stable symbol identity returned by cross-file reference queries.
1005pub struct SymbolIdentity {
1006    /// Canonical stable key for the symbol (qualified when available).
1007    pub stable_key: String,
1008    /// Bare symbol name.
1009    pub name: String,
1010    /// Fully qualified symbol name when available.
1011    pub qualified_name: Option<String>,
1012    /// Symbol kind (subroutine, package, variable, ...).
1013    pub kind: SymbolKind,
1014}
1015
1016#[derive(Debug, Clone, PartialEq, Eq)]
1017/// Read-only cross-file query result used by rename/safe-delete planners.
1018pub struct CrossFileReferenceQueryResult {
1019    /// Identity for the resolved symbol.
1020    pub symbol: SymbolIdentity,
1021    /// Definition site for the resolved symbol.
1022    pub definition: Location,
1023    /// All reference locations (including definition) in deterministic order.
1024    pub references: Vec<Location>,
1025}
1026
1027#[derive(Debug, Clone, Serialize, Deserialize)]
1028/// A symbol in the workspace for Index/Navigate workflows.
1029pub struct WorkspaceSymbol {
1030    /// Symbol name without package qualification
1031    pub name: String,
1032    /// Type of symbol (subroutine, variable, package, etc.)
1033    pub kind: SymbolKind,
1034    /// File URI where the symbol is defined
1035    pub uri: String,
1036    /// Line and character range of the symbol definition
1037    pub range: Range,
1038    /// Fully qualified name including package (e.g., "Package::function")
1039    pub qualified_name: Option<String>,
1040    /// POD documentation associated with the symbol
1041    pub documentation: Option<String>,
1042    /// Name of the containing package or class
1043    pub container_name: Option<String>,
1044    /// Whether this symbol has a body (false for forward declarations)
1045    #[serde(default = "default_has_body")]
1046    pub has_body: bool,
1047    /// Workspace folder URI this symbol belongs to (for multi-root workspace support)
1048    pub workspace_folder_uri: Option<String>,
1049}
1050
1051fn default_has_body() -> bool {
1052    true
1053}
1054
1055// Re-export the unified symbol types from perl-symbol
1056/// Symbol kind enums used during Index/Analyze workflows.
1057pub use perl_symbol::{SymbolKind, VarKind};
1058
1059#[derive(Debug, Clone)]
1060/// Reference to a symbol for Navigate/Analyze workflows.
1061pub struct SymbolReference {
1062    /// File URI where the reference occurs
1063    pub uri: String,
1064    /// Line and character range of the reference
1065    pub range: Range,
1066    /// How the symbol is being referenced (definition, usage, etc.)
1067    pub kind: ReferenceKind,
1068}
1069
1070#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1071/// Classification of how a symbol is referenced in Navigate/Analyze workflows.
1072pub enum ReferenceKind {
1073    /// Symbol definition site (sub declaration, variable declaration)
1074    Definition,
1075    /// General usage of the symbol (function call, method call)
1076    Usage,
1077    /// Import via use statement
1078    Import,
1079    /// Variable read access
1080    Read,
1081    /// Variable write access (assignment target)
1082    Write,
1083}
1084
1085#[derive(Debug, Serialize)]
1086#[serde(rename_all = "camelCase")]
1087/// LSP-compliant workspace symbol for wire format in Navigate/Analyze workflows.
1088pub struct LspWorkspaceSymbol {
1089    /// Symbol name as displayed to the user
1090    pub name: String,
1091    /// LSP symbol kind number (see lsp_types::SymbolKind)
1092    pub kind: u32,
1093    /// Location of the symbol definition
1094    pub location: WireLocation,
1095    /// Name of the containing symbol (package, class)
1096    #[serde(skip_serializing_if = "Option::is_none")]
1097    pub container_name: Option<String>,
1098    /// Workspace folder URI this symbol belongs to (for multi-root workspace disambiguation)
1099    #[serde(skip_serializing_if = "Option::is_none")]
1100    pub workspace_folder_uri: Option<String>,
1101}
1102
1103impl From<&WorkspaceSymbol> for LspWorkspaceSymbol {
1104    fn from(sym: &WorkspaceSymbol) -> Self {
1105        let range = WireRange {
1106            start: WirePosition { line: sym.range.start.line, character: sym.range.start.column },
1107            end: WirePosition { line: sym.range.end.line, character: sym.range.end.column },
1108        };
1109
1110        Self {
1111            name: sym.name.clone(),
1112            kind: sym.kind.to_lsp_kind(),
1113            location: WireLocation { uri: sym.uri.clone(), range },
1114            container_name: sym.container_name.clone(),
1115            workspace_folder_uri: sym.workspace_folder_uri.clone(),
1116        }
1117    }
1118}
1119
1120/// File-level index data
1121#[derive(Default, Clone)]
1122pub struct FileIndex {
1123    /// Canonical file URI for this index entry.
1124    source_uri: String,
1125    /// Symbols defined in this file
1126    symbols: Vec<WorkspaceSymbol>,
1127    /// References in this file (symbol name -> references)
1128    references: HashMap<String, Vec<SymbolReference>>,
1129    /// Dependencies (modules this file imports)
1130    dependencies: HashSet<String>,
1131    /// Content hash for early-exit optimization
1132    content_hash: u64,
1133    /// Workspace folder URI this file belongs to (for multi-root workspace support)
1134    folder_uri: Option<String>,
1135}
1136
1137/// Write-through semantic fact storage for one indexed file.
1138#[derive(Clone, Debug)]
1139pub struct FileFactShard {
1140    /// Canonical file URI for this shard.
1141    pub source_uri: String,
1142    /// Stable file identifier derived from normalized URI.
1143    pub file_id: FileId,
1144    /// Whole-file content hash used for stale-shard replacement.
1145    pub content_hash: u64,
1146    /// Optional per-category hashes for change diagnostics.
1147    pub anchors_hash: Option<u64>,
1148    /// Optional per-category hashes for change diagnostics.
1149    pub entities_hash: Option<u64>,
1150    /// Optional per-category hashes for change diagnostics.
1151    pub occurrences_hash: Option<u64>,
1152    /// Optional per-category hashes for change diagnostics.
1153    pub edges_hash: Option<u64>,
1154    /// Anchor facts for this file.
1155    pub anchors: Vec<AnchorFact>,
1156    /// Entity facts for this file.
1157    pub entities: Vec<EntityFact>,
1158    /// Occurrence facts for this file.
1159    pub occurrences: Vec<perl_semantic_facts::OccurrenceFact>,
1160    /// Edge facts for this file.
1161    pub edges: Vec<EdgeFact>,
1162}
1163
1164/// Thread-safe workspace index
1165pub struct WorkspaceIndex {
1166    /// Index data per file URI (normalized key -> data)
1167    files: Arc<RwLock<HashMap<String, FileIndex>>>,
1168    /// Global symbol multimap (qualified/bare name -> ordered definition candidates)
1169    symbols: Arc<RwLock<HashMap<String, Vec<DefinitionCandidate>>>>,
1170    /// Global reference index (symbol name -> locations across all files)
1171    ///
1172    /// Aggregated from per-file `FileIndex::references` during `index_file()`.
1173    /// Provides O(1) lookup for `find_references()` instead of iterating all files.
1174    global_references: Arc<RwLock<HashMap<String, Vec<Location>>>>,
1175    /// Write-through semantic fact shards keyed by normalized URI.
1176    fact_shards: Arc<RwLock<HashMap<String, FileFactShard>>>,
1177    /// Semantic cross-file reference index (typed occurrences by name and entity).
1178    semantic_reference_index: Arc<RwLock<ReferenceIndex>>,
1179    /// Semantic cross-file import/export index.
1180    semantic_import_export_index: Arc<RwLock<ImportExportIndex>>,
1181    /// Document store for in-memory text
1182    document_store: DocumentStore,
1183    /// Workspace folder URIs for multi-root workspace support
1184    ///
1185    /// Used to determine which workspace folder a file belongs to for
1186    /// proper folder attribution in multi-root workspaces.
1187    workspace_folders: Arc<RwLock<Vec<String>>>,
1188}
1189
1190#[derive(Debug, Clone, Eq, PartialEq)]
1191struct DefinitionCandidate {
1192    location: Location,
1193    kind: SymbolKind,
1194}
1195
1196impl WorkspaceIndex {
1197    fn location_sort_key(location: &Location) -> (&str, u32, u32, u32, u32) {
1198        (
1199            location.uri.as_str(),
1200            location.range.start.line,
1201            location.range.start.column,
1202            location.range.end.line,
1203            location.range.end.column,
1204        )
1205    }
1206
1207    fn sort_locations_deterministically(locations: &mut [Location]) {
1208        locations.sort_by(|left, right| {
1209            Self::location_sort_key(left).cmp(&Self::location_sort_key(right))
1210        });
1211    }
1212
1213    fn definition_candidate_sort_key(
1214        candidate: &DefinitionCandidate,
1215    ) -> (u8, &str, u32, u32, u32, u32) {
1216        let rank = match candidate.kind {
1217            SymbolKind::Subroutine | SymbolKind::Method => 0,
1218            SymbolKind::Constant => 1,
1219            _ => 2,
1220        };
1221        (
1222            rank,
1223            candidate.location.uri.as_str(),
1224            candidate.location.range.start.line,
1225            candidate.location.range.start.column,
1226            candidate.location.range.end.line,
1227            candidate.location.range.end.column,
1228        )
1229    }
1230
1231    fn rebuild_symbol_cache(
1232        files: &HashMap<String, FileIndex>,
1233        symbols: &mut HashMap<String, Vec<DefinitionCandidate>>,
1234    ) {
1235        symbols.clear();
1236
1237        for file_index in files.values() {
1238            for symbol in &file_index.symbols {
1239                if let Some(ref qname) = symbol.qualified_name {
1240                    symbols.entry(qname.clone()).or_default().push(DefinitionCandidate {
1241                        location: Location { uri: symbol.uri.clone(), range: symbol.range },
1242                        kind: symbol.kind,
1243                    });
1244                }
1245                symbols.entry(symbol.name.clone()).or_default().push(DefinitionCandidate {
1246                    location: Location { uri: symbol.uri.clone(), range: symbol.range },
1247                    kind: symbol.kind,
1248                });
1249            }
1250        }
1251        for entries in symbols.values_mut() {
1252            entries.sort_by(|left, right| {
1253                Self::definition_candidate_sort_key(left)
1254                    .cmp(&Self::definition_candidate_sort_key(right))
1255            });
1256            entries.dedup();
1257        }
1258    }
1259
1260    /// Incrementally remove one file's symbols from the global cache,
1261    /// re-inserting shadowed symbols from remaining files.
1262    fn incremental_remove_symbols(
1263        files: &HashMap<String, FileIndex>,
1264        symbols: &mut HashMap<String, Vec<DefinitionCandidate>>,
1265        old_file_index: &FileIndex,
1266    ) {
1267        let mut affected_names: Vec<String> = Vec::new();
1268        for sym in &old_file_index.symbols {
1269            if let Some(ref qname) = sym.qualified_name {
1270                let mut remove_key = false;
1271                if let Some(entries) = symbols.get_mut(qname) {
1272                    entries.retain(|candidate| candidate.location.uri != sym.uri);
1273                    remove_key = entries.is_empty();
1274                }
1275                if remove_key {
1276                    symbols.remove(qname);
1277                    affected_names.push(qname.clone());
1278                }
1279            }
1280            let mut remove_key = false;
1281            if let Some(entries) = symbols.get_mut(&sym.name) {
1282                entries.retain(|candidate| candidate.location.uri != sym.uri);
1283                remove_key = entries.is_empty();
1284            }
1285            if remove_key {
1286                symbols.remove(&sym.name);
1287                affected_names.push(sym.name.clone());
1288            }
1289        }
1290        if !affected_names.is_empty() {
1291            symbols.clear();
1292            for file_index in files
1293                .values()
1294                .filter(|file_index| file_index.source_uri != old_file_index.source_uri)
1295            {
1296                for symbol in &file_index.symbols {
1297                    if let Some(ref qname) = symbol.qualified_name {
1298                        symbols.entry(qname.clone()).or_default().push(DefinitionCandidate {
1299                            location: Location { uri: symbol.uri.clone(), range: symbol.range },
1300                            kind: symbol.kind,
1301                        });
1302                    }
1303                    symbols.entry(symbol.name.clone()).or_default().push(DefinitionCandidate {
1304                        location: Location { uri: symbol.uri.clone(), range: symbol.range },
1305                        kind: symbol.kind,
1306                    });
1307                }
1308            }
1309            for entries in symbols.values_mut() {
1310                entries.sort_by(|left, right| {
1311                    Self::definition_candidate_sort_key(left)
1312                        .cmp(&Self::definition_candidate_sort_key(right))
1313                });
1314                entries.dedup();
1315            }
1316        }
1317    }
1318
1319    /// Incrementally add one file's symbols to the global cache.
1320    fn incremental_add_symbols(
1321        symbols: &mut HashMap<String, Vec<DefinitionCandidate>>,
1322        file_index: &FileIndex,
1323    ) {
1324        for sym in &file_index.symbols {
1325            if let Some(ref qname) = sym.qualified_name {
1326                symbols.entry(qname.clone()).or_default().push(DefinitionCandidate {
1327                    location: Location { uri: sym.uri.clone(), range: sym.range },
1328                    kind: sym.kind,
1329                });
1330            }
1331            symbols.entry(sym.name.clone()).or_default().push(DefinitionCandidate {
1332                location: Location { uri: sym.uri.clone(), range: sym.range },
1333                kind: sym.kind,
1334            });
1335        }
1336        for entries in symbols.values_mut() {
1337            entries.sort_by(|left, right| {
1338                Self::definition_candidate_sort_key(left)
1339                    .cmp(&Self::definition_candidate_sort_key(right))
1340            });
1341            entries.dedup();
1342        }
1343    }
1344
1345    /// Determine the workspace folder URI for a given file URI.
1346    ///
1347    /// Returns the workspace folder URI that contains the given file URI.
1348    /// This is used for multi-root workspace support to properly attribute
1349    /// files and symbols to their originating workspace folder.
1350    ///
1351    /// # Arguments
1352    ///
1353    /// * `file_uri` - The file URI to find the containing workspace folder for
1354    ///
1355    /// # Returns
1356    ///
1357    /// `Some(folder_uri)` if the file is within a workspace folder, `None` otherwise.
1358    ///
1359    /// # Examples
1360    ///
1361    /// ```rust,ignore
1362    /// use perl_workspace::workspace::workspace_index::WorkspaceIndex;
1363    ///
1364    /// let index = WorkspaceIndex::new();
1365    /// index.set_workspace_folders(vec![
1366    ///     "file:///project1".to_string(),
1367    ///     "file:///project2".to_string(),
1368    /// ]);
1369    ///
1370    /// let folder = index.determine_folder_uri("file:///project1/src/main.pl");
1371    /// assert_eq!(folder, Some("file:///project1".to_string()));
1372    /// ```
1373    fn determine_folder_uri(&self, file_uri: &str) -> Option<String> {
1374        let folders = self.workspace_folders.read();
1375        let mut best_match: Option<&String> = None;
1376        for folder_uri in folders.iter() {
1377            // Check if the file URI starts with the folder URI
1378            // We need to ensure proper URI matching (with or without trailing slash)
1379            let folder_with_slash = if folder_uri.ends_with('/') {
1380                folder_uri.clone()
1381            } else {
1382                format!("{}/", folder_uri)
1383            };
1384            if file_uri.starts_with(&folder_with_slash) || file_uri == folder_uri {
1385                match best_match {
1386                    Some(existing) if existing.len() >= folder_uri.len() => {}
1387                    _ => best_match = Some(folder_uri),
1388                }
1389            }
1390        }
1391        best_match.cloned()
1392    }
1393
1394    fn find_definition_in_files(
1395        files: &HashMap<String, FileIndex>,
1396        symbol_name: &str,
1397        uri_filter: Option<&str>,
1398    ) -> Option<(Location, String)> {
1399        let mut candidates: Vec<(Location, String)> = Vec::new();
1400        for file_index in files.values() {
1401            if let Some(filter) = uri_filter
1402                && file_index.symbols.first().is_some_and(|symbol| symbol.uri != filter)
1403            {
1404                continue;
1405            }
1406
1407            for symbol in &file_index.symbols {
1408                if symbol.name == symbol_name
1409                    || symbol.qualified_name.as_deref() == Some(symbol_name)
1410                {
1411                    candidates.push((
1412                        Location { uri: symbol.uri.clone(), range: symbol.range },
1413                        symbol.uri.clone(),
1414                    ));
1415                }
1416            }
1417        }
1418
1419        candidates.sort_by(|left, right| {
1420            Self::location_sort_key(&left.0).cmp(&Self::location_sort_key(&right.0))
1421        });
1422        candidates.into_iter().next()
1423    }
1424
1425    fn find_symbol_by_definition(
1426        &self,
1427        definition: &Location,
1428        symbol_name: &str,
1429    ) -> Option<WorkspaceSymbol> {
1430        let files = self.files.read();
1431        files
1432            .values()
1433            .flat_map(|file_index| file_index.symbols.iter())
1434            .filter(|symbol| {
1435                symbol.uri == definition.uri
1436                    && symbol.range == definition.range
1437                    && (symbol.name == symbol_name
1438                        || symbol.qualified_name.as_deref() == Some(symbol_name))
1439            })
1440            .min_by(|left, right| {
1441                (
1442                    left.qualified_name.as_deref().unwrap_or_default(),
1443                    left.name.as_str(),
1444                    left.kind.to_lsp_kind(),
1445                )
1446                    .cmp(&(
1447                        right.qualified_name.as_deref().unwrap_or_default(),
1448                        right.name.as_str(),
1449                        right.kind.to_lsp_kind(),
1450                    ))
1451            })
1452            .cloned()
1453    }
1454
1455    fn has_unique_symbol_name_and_kind(&self, target: &WorkspaceSymbol) -> bool {
1456        let files = self.files.read();
1457        files
1458            .values()
1459            .flat_map(|file_index| file_index.symbols.iter())
1460            .filter(|symbol| symbol.name == target.name && symbol.kind == target.kind)
1461            .take(2)
1462            .count()
1463            == 1
1464    }
1465
1466    fn collect_symbol_references(&self, symbol: &WorkspaceSymbol) -> Vec<Location> {
1467        let mut names_to_query: Vec<&str> = Vec::new();
1468        if let Some(qualified_name) = symbol.qualified_name.as_deref() {
1469            names_to_query.push(qualified_name);
1470            if self.has_unique_symbol_name_and_kind(symbol) {
1471                names_to_query.push(symbol.name.as_str());
1472            }
1473        } else {
1474            names_to_query.push(symbol.name.as_str());
1475        }
1476
1477        let global_refs = self.global_references.read();
1478        let mut seen: HashSet<(String, u32, u32, u32, u32)> = HashSet::new();
1479        let mut locations = Vec::new();
1480
1481        for symbol_name in names_to_query {
1482            if let Some(refs) = global_refs.get(symbol_name) {
1483                for location in refs {
1484                    let key = (
1485                        location.uri.clone(),
1486                        location.range.start.line,
1487                        location.range.start.column,
1488                        location.range.end.line,
1489                        location.range.end.column,
1490                    );
1491                    if seen.insert(key) {
1492                        locations.push(location.clone());
1493                    }
1494                }
1495            }
1496        }
1497        drop(global_refs);
1498
1499        Self::sort_locations_deterministically(&mut locations);
1500        locations
1501    }
1502
1503    /// Create a new empty index
1504    ///
1505    /// # Returns
1506    ///
1507    /// A workspace index with empty file and symbol tables.
1508    ///
1509    /// # Examples
1510    ///
1511    /// ```rust,ignore
1512    /// use perl_parser::workspace_index::WorkspaceIndex;
1513    ///
1514    /// let index = WorkspaceIndex::new();
1515    /// assert!(!index.has_symbols());
1516    /// ```
1517    pub fn new() -> Self {
1518        Self {
1519            files: Arc::new(RwLock::new(HashMap::new())),
1520            symbols: Arc::new(RwLock::new(HashMap::new())),
1521            global_references: Arc::new(RwLock::new(HashMap::new())),
1522            fact_shards: Arc::new(RwLock::new(HashMap::new())),
1523            semantic_reference_index: Arc::new(RwLock::new(ReferenceIndex::new())),
1524            semantic_import_export_index: Arc::new(RwLock::new(ImportExportIndex::new())),
1525            document_store: DocumentStore::new(),
1526            workspace_folders: Arc::new(RwLock::new(Vec::new())),
1527        }
1528    }
1529
1530    /// Create a workspace index with pre-allocated capacity.
1531    ///
1532    /// Pre-allocating reduces the number of rehash operations during large-workspace
1533    /// startup. Use this instead of `new()` when the approximate workspace size is
1534    /// known in advance (e.g. from a file discovery scan).
1535    ///
1536    /// # Arguments
1537    ///
1538    /// * `estimated_files` - Expected number of source files in the workspace.
1539    /// * `avg_symbols_per_file` - Expected average number of symbols per file.
1540    ///
1541    /// # Panics
1542    ///
1543    /// Does not panic. Overflow is prevented via `saturating_mul` and an upper cap
1544    /// on the symbol/reference map capacity.
1545    ///
1546    /// # Examples
1547    ///
1548    /// ```rust,ignore
1549    /// use perl_workspace::workspace::workspace_index::WorkspaceIndex;
1550    ///
1551    /// let index = WorkspaceIndex::with_capacity(1000, 20);
1552    /// assert!(!index.has_symbols());
1553    /// ```
1554    pub fn with_capacity(estimated_files: usize, avg_symbols_per_file: usize) -> Self {
1555        // Each symbol is stored twice (qualified + bare name) due to dual indexing.
1556        let sym_cap =
1557            estimated_files.saturating_mul(avg_symbols_per_file).saturating_mul(2).min(1_000_000);
1558        let ref_cap = (sym_cap / 4).min(1_000_000);
1559        Self {
1560            files: Arc::new(RwLock::new(HashMap::with_capacity(estimated_files))),
1561            symbols: Arc::new(RwLock::new(HashMap::with_capacity(sym_cap))),
1562            global_references: Arc::new(RwLock::new(HashMap::with_capacity(ref_cap))),
1563            fact_shards: Arc::new(RwLock::new(HashMap::with_capacity(estimated_files))),
1564            semantic_reference_index: Arc::new(RwLock::new(ReferenceIndex::new())),
1565            semantic_import_export_index: Arc::new(RwLock::new(ImportExportIndex::new())),
1566            document_store: DocumentStore::new(),
1567            workspace_folders: Arc::new(RwLock::new(Vec::new())),
1568        }
1569    }
1570
1571    /// Set the workspace folder URIs for multi-root workspace support.
1572    ///
1573    /// This method updates the list of workspace folders that the index
1574    /// uses to determine folder attribution for files and symbols.
1575    ///
1576    /// # Arguments
1577    ///
1578    /// * `folders` - A vector of workspace folder URIs
1579    ///
1580    /// # Examples
1581    ///
1582    /// ```rust,ignore
1583    /// use perl_workspace::workspace::workspace_index::WorkspaceIndex;
1584    ///
1585    /// let index = WorkspaceIndex::new();
1586    /// index.set_workspace_folders(vec![
1587    ///     "file:///project1".to_string(),
1588    ///     "file:///project2".to_string(),
1589    /// ]);
1590    /// ```
1591    pub fn set_workspace_folders(&self, folders: Vec<String>) {
1592        let mut workspace_folders = self.workspace_folders.write();
1593        *workspace_folders = folders;
1594    }
1595
1596    /// Get the current workspace folder URIs.
1597    ///
1598    /// # Returns
1599    ///
1600    /// A vector of workspace folder URIs.
1601    #[must_use]
1602    pub fn workspace_folders(&self) -> Vec<String> {
1603        self.workspace_folders.read().clone()
1604    }
1605
1606    /// Normalize a URI to a consistent form using proper URI handling
1607    fn normalize_uri(uri: &str) -> String {
1608        perl_uri::normalize_uri(uri)
1609    }
1610
1611    /// Remove a file's contributions from the global reference index.
1612    ///
1613    /// Retains only entries whose URI does not match `file_uri`.
1614    /// Empty keys are removed to avoid unbounded map growth.
1615    fn remove_file_global_refs(
1616        global_refs: &mut HashMap<String, Vec<Location>>,
1617        file_index: &FileIndex,
1618        file_uri: &str,
1619    ) {
1620        for name in file_index.references.keys() {
1621            if let Some(locs) = global_refs.get_mut(name) {
1622                locs.retain(|loc| loc.uri != file_uri);
1623                if locs.is_empty() {
1624                    global_refs.remove(name);
1625                }
1626            }
1627        }
1628    }
1629
1630    /// Index a file from its URI and text content
1631    ///
1632    /// # Arguments
1633    ///
1634    /// * `uri` - File URI identifying the document
1635    /// * `text` - Full Perl source text for indexing
1636    ///
1637    /// # Returns
1638    ///
1639    /// `Ok(())` when indexing succeeds, or an error message otherwise.
1640    ///
1641    /// # Errors
1642    ///
1643    /// Returns an error if parsing fails or the document store cannot be updated.
1644    ///
1645    /// # Examples
1646    ///
1647    /// ```rust,ignore
1648    /// use perl_parser::workspace_index::WorkspaceIndex;
1649    /// use url::Url;
1650    ///
1651    /// # fn main() -> Result<(), Box<dyn std::error::Error>> {
1652    /// let index = WorkspaceIndex::new();
1653    /// let uri = Url::parse("file:///example.pl")?;
1654    /// index.index_file(uri, "sub hello { return 1; }".to_string())?;
1655    /// # Ok(())
1656    /// # }
1657    /// ```
1658    ///
1659    /// Returns: `Ok(())` when indexing succeeds, otherwise an error string.
1660    pub fn index_file(&self, uri: Url, text: String) -> Result<(), String> {
1661        let uri_str = uri.to_string();
1662
1663        // Compute content hash for early-exit optimization
1664        let mut hasher = DefaultHasher::new();
1665        text.hash(&mut hasher);
1666        let content_hash = hasher.finish();
1667
1668        // Check if content is unchanged (early-exit optimization)
1669        let key = DocumentStore::uri_key(&uri_str);
1670        {
1671            let files = self.files.read();
1672            if let Some(existing_index) = files.get(&key) {
1673                if existing_index.content_hash == content_hash {
1674                    // Content unchanged, skip re-indexing
1675                    return Ok(());
1676                }
1677            }
1678        }
1679
1680        // Update document store
1681        if self.document_store.is_open(&uri_str) {
1682            self.document_store.update(&uri_str, 1, text.clone());
1683        } else {
1684            self.document_store.open(uri_str.clone(), 1, text.clone());
1685        }
1686
1687        // Parse the file
1688        let mut parser = Parser::new(&text);
1689        let ast = match parser.parse() {
1690            Ok(ast) => ast,
1691            Err(e) => return Err(format!("Parse error: {}", e)),
1692        };
1693
1694        // Get the document for line index
1695        let mut doc = self.document_store.get(&uri_str).ok_or("Document not found")?;
1696
1697        // Determine workspace folder URI from the file URI
1698        let folder_uri = self.determine_folder_uri(&uri_str);
1699
1700        // Extract symbols and references
1701        let mut file_index = FileIndex {
1702            source_uri: uri_str.clone(),
1703            content_hash,
1704            folder_uri: folder_uri.clone(),
1705            ..Default::default()
1706        };
1707        let mut visitor = IndexVisitor::new(&mut doc, uri_str.clone(), folder_uri);
1708        visitor.visit(&ast, &mut file_index);
1709
1710        let canonical_shard =
1711            Self::build_canonical_fact_shard_for_ast(&uri_str, content_hash, &ast);
1712        let fact_shard = if canonical_shard.anchors.is_empty()
1713            && canonical_shard.entities.is_empty()
1714            && canonical_shard.occurrences.is_empty()
1715            && canonical_shard.edges.is_empty()
1716        {
1717            Self::build_fact_shard(&uri_str, content_hash, &file_index)
1718        } else {
1719            canonical_shard
1720        };
1721
1722        // Extract import specs from the AST — populates ImportExportIndex so
1723        // that `Foo->import(@names)` dynamic-import suppression is live in
1724        // production.  This runs outside the write lock to avoid holding it
1725        // longer than necessary.
1726        //
1727        // Lock ordering note: `semantic_import_export_index` is acquired write
1728        // separately from (and after) `files`/`symbols`/`global_references` to
1729        // match the consistent lock-order used throughout this file.
1730        let file_id = Self::hash_uri_to_file_id(&uri_str);
1731        let import_specs =
1732            crate::semantic::workspace_import_extractor::extract_import_specs(&ast, file_id);
1733
1734        // Update the index, refresh the global symbol cache, and replace this file's
1735        // contribution in the global reference index.
1736        {
1737            let mut files = self.files.write();
1738
1739            // Remove stale global references from previous version of this file
1740            if let Some(old_index) = files.get(&key) {
1741                let mut global_refs = self.global_references.write();
1742                Self::remove_file_global_refs(&mut global_refs, old_index, &uri_str);
1743            }
1744
1745            // Incrementally remove old symbols before inserting new file
1746            if let Some(old_index) = files.get(&key) {
1747                let mut symbols = self.symbols.write();
1748                Self::incremental_remove_symbols(&files, &mut symbols, old_index);
1749                drop(symbols);
1750            }
1751            files.insert(key.clone(), file_index);
1752            let mut symbols = self.symbols.write();
1753            if let Some(new_index) = files.get(&key) {
1754                Self::incremental_add_symbols(&mut symbols, new_index);
1755            }
1756
1757            if let Some(file_index) = files.get(&key) {
1758                let mut global_refs = self.global_references.write();
1759                for (name, refs) in &file_index.references {
1760                    let entry = global_refs.entry(name.clone()).or_default();
1761                    for reference in refs {
1762                        entry.push(Location { uri: reference.uri.clone(), range: reference.range });
1763                    }
1764                }
1765            }
1766            self.replace_fact_shard_incremental(&key, fact_shard);
1767        }
1768
1769        // Update the import/export index with the freshly extracted import specs.
1770        // Stale entries for this URI are removed first (incremental re-indexing).
1771        // This is done after the main write lock block to follow the established
1772        // lock ordering (shards → reference_index → import_export_index).
1773        {
1774            let mut ie_idx = self.semantic_import_export_index.write();
1775            ie_idx.remove_file_imports(&uri_str);
1776            ie_idx.add_file_imports(&uri_str, file_id, import_specs);
1777        }
1778
1779        Ok(())
1780    }
1781
1782    /// Remove a file from the index
1783    ///
1784    /// # Arguments
1785    ///
1786    /// * `uri` - File URI (string form) to remove
1787    ///
1788    /// # Returns
1789    ///
1790    /// Nothing. The index is updated in-place.
1791    ///
1792    /// # Examples
1793    ///
1794    /// ```rust,ignore
1795    /// use perl_parser::workspace_index::WorkspaceIndex;
1796    ///
1797    /// let index = WorkspaceIndex::new();
1798    /// index.remove_file("file:///example.pl");
1799    /// ```
1800    pub fn remove_file(&self, uri: &str) {
1801        let uri_str = Self::normalize_uri(uri);
1802        let key = DocumentStore::uri_key(&uri_str);
1803
1804        // Remove from document store
1805        self.document_store.close(&uri_str);
1806
1807        // Remove file index
1808        let mut files = self.files.write();
1809        if let Some(file_index) = files.remove(&key) {
1810            self.fact_shards.write().remove(&key);
1811
1812            // Clean up semantic cross-file indexes for this file.
1813            self.semantic_reference_index.write().remove_file(&uri_str);
1814            {
1815                let mut ie_idx = self.semantic_import_export_index.write();
1816                ie_idx.remove_file_imports(&uri_str);
1817                ie_idx.remove_module_exports(&uri_str);
1818            }
1819
1820            // Incrementally remove symbols and re-insert any shadowed names.
1821            let mut symbols = self.symbols.write();
1822            Self::incremental_remove_symbols(&files, &mut symbols, &file_index);
1823
1824            // Defensive sweep: purge any remaining cache entries whose value
1825            // points to this file's URI.  incremental_remove_symbols already
1826            // handles known symbol names; this sweep guarantees no stale
1827            // candidates survive even when:
1828            //   * the file had zero symbols (nothing for incremental_remove
1829            //     to walk), or
1830            //   * a symbol's stored uri differs from the canonical normalize_uri
1831            //     output (URI normalization edge cases).
1832            // Match against every URI spelling observed in this file index plus
1833            // the canonical uri_str so raw/normalized variants are all caught.
1834            let mut removed_uris = vec![uri_str.as_str()];
1835            for observed_uri in file_index.symbols.iter().map(|s| s.uri.as_str()).chain(
1836                file_index.references.values().flat_map(|refs| refs.iter().map(|r| r.uri.as_str())),
1837            ) {
1838                if !removed_uris.contains(&observed_uri) {
1839                    removed_uris.push(observed_uri);
1840                }
1841            }
1842            symbols.retain(|_, candidates| {
1843                candidates.retain(|candidate| {
1844                    let cand_uri = candidate.location.uri.as_str();
1845                    !removed_uris.contains(&cand_uri)
1846                });
1847                !candidates.is_empty()
1848            });
1849
1850            // Remove from global reference index. Two-phase cleanup: first
1851            // remove names this file was known to reference (cheap path), then
1852            // a defensive sweep over all remaining entries to catch any that
1853            // were inserted under names not present in this file's
1854            // FileIndex::references map (e.g. via aggregated/global insertion
1855            // paths). Empty buckets are dropped.
1856            let mut global_refs = self.global_references.write();
1857            Self::remove_file_global_refs(&mut global_refs, &file_index, &uri_str);
1858            global_refs.retain(|_, locs| {
1859                locs.retain(|loc| !removed_uris.contains(&loc.uri.as_str()));
1860                !locs.is_empty()
1861            });
1862        }
1863    }
1864
1865    /// Remove a file from the index (URL variant for compatibility)
1866    ///
1867    /// # Arguments
1868    ///
1869    /// * `uri` - File URI as a parsed `Url`
1870    ///
1871    /// # Returns
1872    ///
1873    /// Nothing. The index is updated in-place.
1874    ///
1875    /// # Examples
1876    ///
1877    /// ```rust,ignore
1878    /// use perl_parser::workspace_index::WorkspaceIndex;
1879    /// use url::Url;
1880    ///
1881    /// # fn main() -> Result<(), Box<dyn std::error::Error>> {
1882    /// let index = WorkspaceIndex::new();
1883    /// let uri = Url::parse("file:///example.pl")?;
1884    /// index.remove_file_url(&uri);
1885    /// # Ok(())
1886    /// # }
1887    /// ```
1888    pub fn remove_file_url(&self, uri: &Url) {
1889        self.remove_file(uri.as_str())
1890    }
1891
1892    /// Clear a file from the index (alias for remove_file)
1893    ///
1894    /// # Arguments
1895    ///
1896    /// * `uri` - File URI (string form) to remove
1897    ///
1898    /// # Returns
1899    ///
1900    /// Nothing. The index is updated in-place.
1901    ///
1902    /// # Examples
1903    ///
1904    /// ```rust,ignore
1905    /// use perl_parser::workspace_index::WorkspaceIndex;
1906    ///
1907    /// let index = WorkspaceIndex::new();
1908    /// index.clear_file("file:///example.pl");
1909    /// ```
1910    pub fn clear_file(&self, uri: &str) {
1911        self.remove_file(uri);
1912    }
1913
1914    /// Clear a file from the index (URL variant for compatibility)
1915    ///
1916    /// # Arguments
1917    ///
1918    /// * `uri` - File URI as a parsed `Url`
1919    ///
1920    /// # Returns
1921    ///
1922    /// Nothing. The index is updated in-place.
1923    ///
1924    /// # Examples
1925    ///
1926    /// ```rust,ignore
1927    /// use perl_parser::workspace_index::WorkspaceIndex;
1928    /// use url::Url;
1929    ///
1930    /// # fn main() -> Result<(), Box<dyn std::error::Error>> {
1931    /// let index = WorkspaceIndex::new();
1932    /// let uri = Url::parse("file:///example.pl")?;
1933    /// index.clear_file_url(&uri);
1934    /// # Ok(())
1935    /// # }
1936    /// ```
1937    pub fn clear_file_url(&self, uri: &Url) {
1938        self.clear_file(uri.as_str())
1939    }
1940
1941    /// Remove all files from a specific workspace folder.
1942    ///
1943    /// This method removes all indexed files that belong to the given
1944    /// workspace folder URI. This is useful when a workspace folder is
1945    /// removed from the multi-root workspace.
1946    ///
1947    /// # Arguments
1948    ///
1949    /// * `folder_uri` - The workspace folder URI to remove files from
1950    ///
1951    /// # Examples
1952    ///
1953    /// ```rust,ignore
1954    /// use perl_workspace::workspace::workspace_index::WorkspaceIndex;
1955    ///
1956    /// let index = WorkspaceIndex::new();
1957    /// // Index files from multiple folders...
1958    /// index.remove_folder("file:///project1");
1959    /// ```
1960    pub fn remove_folder(&self, folder_uri: &str) {
1961        let mut uris_to_remove = Vec::new();
1962        let files = self.files.read();
1963
1964        // Collect all files that belong to this folder
1965        for file_index in files.values() {
1966            if file_index.folder_uri.as_deref() == Some(folder_uri) {
1967                uris_to_remove.push(file_index.source_uri.clone());
1968            }
1969        }
1970        drop(files);
1971
1972        // Remove each file through the full removal path to keep
1973        // symbol/reference caches and document store in sync.
1974        for uri in uris_to_remove {
1975            self.remove_file(&uri);
1976        }
1977    }
1978
1979    #[cfg(not(target_arch = "wasm32"))]
1980    /// Index a file from a URI string for the Index/Analyze workflow.
1981    ///
1982    /// Accepts either a `file://` URI or a filesystem path. Not available on
1983    /// wasm32 targets (requires filesystem path conversion).
1984    ///
1985    /// # Arguments
1986    ///
1987    /// * `uri` - File URI string or filesystem path.
1988    /// * `text` - Full Perl source text for indexing.
1989    ///
1990    /// # Returns
1991    ///
1992    /// `Ok(())` when indexing succeeds, or an error message otherwise.
1993    ///
1994    /// # Errors
1995    ///
1996    /// Returns an error if the URI is invalid or parsing fails.
1997    ///
1998    /// # Examples
1999    ///
2000    /// ```rust,ignore
2001    /// use perl_parser::workspace_index::WorkspaceIndex;
2002    ///
2003    /// # fn main() -> Result<(), Box<dyn std::error::Error>> {
2004    /// let index = WorkspaceIndex::new();
2005    /// index.index_file_str("file:///example.pl", "sub hello { }")?;
2006    /// # Ok(())
2007    /// # }
2008    /// ```
2009    pub fn index_file_str(&self, uri: &str, text: &str) -> Result<(), String> {
2010        let path = Path::new(uri);
2011        let url = if path.is_absolute() {
2012            url::Url::from_file_path(path)
2013                .map_err(|_| format!("Invalid URI or file path: {}", uri))?
2014        } else {
2015            // Raw absolute Windows paths like C:\foo can parse as a bogus URI
2016            // (`c:` scheme). Prefer URL parsing only for non-path inputs.
2017            url::Url::parse(uri).or_else(|_| {
2018                url::Url::from_file_path(path)
2019                    .map_err(|_| format!("Invalid URI or file path: {}", uri))
2020            })?
2021        };
2022        self.index_file(url, text.to_string())
2023    }
2024
2025    /// Index multiple files in a single batch operation.
2026    ///
2027    /// This is significantly faster than calling `index_file` in a loop for
2028    /// initial workspace scans because it defers the global symbol cache
2029    /// rebuild to a single pass at the end.
2030    ///
2031    /// Phase 1: Parse all files without holding locks.
2032    /// Phase 2: Bulk-insert file indices and rebuild the symbol cache once.
2033    pub fn index_files_batch(&self, files_to_index: Vec<(Url, String)>) -> Vec<String> {
2034        let mut errors = Vec::new();
2035
2036        // Phase 1: Parse all files without locks
2037        let mut parsed: Vec<(String, String, FileIndex)> = Vec::with_capacity(files_to_index.len());
2038        for (uri, text) in &files_to_index {
2039            let uri_str = uri.to_string();
2040
2041            // Content hash for early-exit
2042            let mut hasher = DefaultHasher::new();
2043            text.hash(&mut hasher);
2044            let content_hash = hasher.finish();
2045
2046            let key = DocumentStore::uri_key(&uri_str);
2047
2048            // Check if content unchanged
2049            {
2050                let files = self.files.read();
2051                if let Some(existing) = files.get(&key) {
2052                    if existing.content_hash == content_hash {
2053                        continue;
2054                    }
2055                }
2056            }
2057
2058            // Update document store
2059            if self.document_store.is_open(&uri_str) {
2060                self.document_store.update(&uri_str, 1, text.clone());
2061            } else {
2062                self.document_store.open(uri_str.clone(), 1, text.clone());
2063            }
2064
2065            // Parse
2066            let mut parser = Parser::new(text);
2067            let ast = match parser.parse() {
2068                Ok(ast) => ast,
2069                Err(e) => {
2070                    errors.push(format!("Parse error in {}: {}", uri_str, e));
2071                    continue;
2072                }
2073            };
2074
2075            let mut doc = match self.document_store.get(&uri_str) {
2076                Some(d) => d,
2077                None => {
2078                    errors.push(format!("Document not found: {}", uri_str));
2079                    continue;
2080                }
2081            };
2082
2083            // Determine workspace folder URI from the file URI
2084            let folder_uri = self.determine_folder_uri(&uri_str);
2085
2086            let mut file_index = FileIndex {
2087                source_uri: uri_str.clone(),
2088                content_hash,
2089                folder_uri: folder_uri.clone(),
2090                ..Default::default()
2091            };
2092            let mut visitor = IndexVisitor::new(&mut doc, uri_str.clone(), folder_uri);
2093            visitor.visit(&ast, &mut file_index);
2094
2095            parsed.push((key, uri_str, file_index));
2096        }
2097
2098        // Phase 2: Bulk insert with single cache rebuild
2099        {
2100            let mut files = self.files.write();
2101            let mut symbols = self.symbols.write();
2102            let mut global_refs = self.global_references.write();
2103
2104            // Pre-allocate capacity for the incoming batch to avoid rehashing.
2105            // Each symbol is indexed under both its qualified name and bare name.
2106            files.reserve(parsed.len());
2107            symbols.reserve(parsed.len().saturating_mul(20).saturating_mul(2));
2108
2109            for (key, uri_str, file_index) in parsed {
2110                // Remove stale global references
2111                if let Some(old_index) = files.get(&key) {
2112                    Self::remove_file_global_refs(&mut global_refs, old_index, &uri_str);
2113                }
2114
2115                files.insert(key.clone(), file_index);
2116
2117                // Add global references for this file
2118                if let Some(fi) = files.get(&key) {
2119                    for (name, refs) in &fi.references {
2120                        let entry = global_refs.entry(name.clone()).or_default();
2121                        for reference in refs {
2122                            entry.push(Location {
2123                                uri: reference.uri.clone(),
2124                                range: reference.range,
2125                            });
2126                        }
2127                    }
2128                }
2129            }
2130
2131            // Single rebuild at the end
2132            Self::rebuild_symbol_cache(&files, &mut symbols);
2133        }
2134
2135        errors
2136    }
2137
2138    /// Find all references to a symbol using dual indexing strategy
2139    ///
2140    /// This function searches for both exact matches and bare name matches when
2141    /// the symbol is qualified. For example, when searching for "Utils::process_data":
2142    /// - First searches for exact "Utils::process_data" references
2143    /// - Then searches for bare "process_data" references that might refer to the same function
2144    ///
2145    /// This dual approach handles cases where functions are called both as:
2146    /// - Qualified: `Utils::process_data()`
2147    /// - Unqualified: `process_data()` (when in the same package or imported)
2148    ///
2149    /// # Arguments
2150    ///
2151    /// * `symbol_name` - Symbol name or qualified name to search
2152    ///
2153    /// # Returns
2154    ///
2155    /// All reference locations found for the requested symbol.
2156    ///
2157    /// # Examples
2158    ///
2159    /// ```rust,ignore
2160    /// use perl_parser::workspace_index::WorkspaceIndex;
2161    ///
2162    /// let index = WorkspaceIndex::new();
2163    /// let _refs = index.find_references("Utils::process_data");
2164    /// ```
2165    pub fn find_references(&self, symbol_name: &str) -> Vec<Location> {
2166        let global_refs = self.global_references.read();
2167        let mut seen: HashSet<(String, u32, u32, u32, u32)> = HashSet::new();
2168        let mut locations = Vec::new();
2169
2170        // O(1) lookup for exact symbol name
2171        if let Some(refs) = global_refs.get(symbol_name) {
2172            for loc in refs {
2173                let key = (
2174                    loc.uri.clone(),
2175                    loc.range.start.line,
2176                    loc.range.start.column,
2177                    loc.range.end.line,
2178                    loc.range.end.column,
2179                );
2180                if seen.insert(key) {
2181                    locations.push(Location { uri: loc.uri.clone(), range: loc.range });
2182                }
2183            }
2184        }
2185
2186        // If the symbol is qualified, also collect bare name references
2187        if let Some(idx) = symbol_name.rfind("::") {
2188            let bare_name = &symbol_name[idx + 2..];
2189            if let Some(refs) = global_refs.get(bare_name) {
2190                for loc in refs {
2191                    let key = (
2192                        loc.uri.clone(),
2193                        loc.range.start.line,
2194                        loc.range.start.column,
2195                        loc.range.end.line,
2196                        loc.range.end.column,
2197                    );
2198                    if seen.insert(key) {
2199                        locations.push(Location { uri: loc.uri.clone(), range: loc.range });
2200                    }
2201                }
2202            }
2203        } else {
2204            // If the symbol is bare, also collect qualified references that end
2205            // with the same bare name, e.g. `Pkg::foo` when searching for `foo`.
2206            for (name, refs) in global_refs.iter() {
2207                if !Self::is_qualified_variant_of(name, symbol_name) {
2208                    continue;
2209                }
2210
2211                for loc in refs {
2212                    let key = (
2213                        loc.uri.clone(),
2214                        loc.range.start.line,
2215                        loc.range.start.column,
2216                        loc.range.end.line,
2217                        loc.range.end.column,
2218                    );
2219                    if seen.insert(key) {
2220                        locations.push(Location { uri: loc.uri.clone(), range: loc.range });
2221                    }
2222                }
2223            }
2224        }
2225
2226        Self::sort_locations_deterministically(&mut locations);
2227        locations
2228    }
2229
2230    /// Resolve a symbol and return its definition/reference set for cross-file planning.
2231    ///
2232    /// Returns `None` when no definition can be resolved for `symbol_name`.
2233    pub fn query_symbol_references(
2234        &self,
2235        symbol_name: &str,
2236    ) -> Option<CrossFileReferenceQueryResult> {
2237        let definition = self.find_definition(symbol_name)?;
2238        let symbol = self.find_symbol_by_definition(&definition, symbol_name)?;
2239
2240        let stable_key = symbol.qualified_name.clone().unwrap_or_else(|| {
2241            format!(
2242                "{}@{}:{}:{}",
2243                symbol.name, symbol.uri, symbol.range.start.line, symbol.range.start.column
2244            )
2245        });
2246        let mut references = self.collect_symbol_references(&symbol);
2247        if !references.iter().any(|location| location == &definition) {
2248            references.push(definition.clone());
2249            Self::sort_locations_deterministically(&mut references);
2250        }
2251
2252        Some(CrossFileReferenceQueryResult {
2253            symbol: SymbolIdentity {
2254                stable_key,
2255                name: symbol.name,
2256                qualified_name: symbol.qualified_name,
2257                kind: symbol.kind,
2258            },
2259            definition,
2260            references,
2261        })
2262    }
2263
2264    /// Count non-definition references (usages) of a symbol.
2265    ///
2266    /// Like `find_references` but excludes `ReferenceKind::Definition` entries,
2267    /// returning only actual usage sites. This is used by code lens to show
2268    /// "N references" where N means call sites, not the definition itself.
2269    pub fn count_usages(&self, symbol_name: &str) -> usize {
2270        let files = self.files.read();
2271        let mut seen: HashSet<(String, u32, u32, u32, u32)> = HashSet::new();
2272
2273        for (_uri_key, file_index) in files.iter() {
2274            if let Some(refs) = file_index.references.get(symbol_name) {
2275                for r in refs.iter().filter(|r| r.kind != ReferenceKind::Definition) {
2276                    seen.insert((
2277                        r.uri.clone(),
2278                        r.range.start.line,
2279                        r.range.start.column,
2280                        r.range.end.line,
2281                        r.range.end.column,
2282                    ));
2283                }
2284            }
2285
2286            if let Some(idx) = symbol_name.rfind("::") {
2287                let bare_name = &symbol_name[idx + 2..];
2288                if let Some(refs) = file_index.references.get(bare_name) {
2289                    for r in refs.iter().filter(|r| r.kind != ReferenceKind::Definition) {
2290                        seen.insert((
2291                            r.uri.clone(),
2292                            r.range.start.line,
2293                            r.range.start.column,
2294                            r.range.end.line,
2295                            r.range.end.column,
2296                        ));
2297                    }
2298                }
2299            } else {
2300                for (name, refs) in &file_index.references {
2301                    if !Self::is_qualified_variant_of(name, symbol_name) {
2302                        continue;
2303                    }
2304
2305                    for r in refs.iter().filter(|r| r.kind != ReferenceKind::Definition) {
2306                        seen.insert((
2307                            r.uri.clone(),
2308                            r.range.start.line,
2309                            r.range.start.column,
2310                            r.range.end.line,
2311                            r.range.end.column,
2312                        ));
2313                    }
2314                }
2315            }
2316        }
2317
2318        seen.len()
2319    }
2320
2321    fn is_qualified_variant_of(candidate: &str, bare_symbol: &str) -> bool {
2322        candidate.rsplit_once("::").is_some_and(|(_, candidate_bare)| candidate_bare == bare_symbol)
2323    }
2324
2325    /// Find the definition of a symbol
2326    ///
2327    /// # Arguments
2328    ///
2329    /// * `symbol_name` - Symbol name or qualified name to resolve
2330    ///
2331    /// # Returns
2332    ///
2333    /// The first matching definition location, if found.
2334    ///
2335    /// # Examples
2336    ///
2337    /// ```rust,ignore
2338    /// use perl_parser::workspace_index::WorkspaceIndex;
2339    ///
2340    /// let index = WorkspaceIndex::new();
2341    /// let _def = index.find_definition("MyPackage::example");
2342    /// ```
2343    pub fn find_definition(&self, symbol_name: &str) -> Option<Location> {
2344        if let Some(location) = self.definition_candidates(symbol_name).into_iter().next() {
2345            return Some(location);
2346        }
2347
2348        // Fall back to a full files scan for this query. The result is intentionally
2349        // NOT written back to `self.symbols`: every indexed symbol is already
2350        // inserted under both qualified and bare names by `incremental_add_symbols`,
2351        // so any cache miss here is for a key that does not correspond to an
2352        // indexed symbol (e.g. a typo or alias). Caching such queries is unsound
2353        // (entries become stale on file edits and were never tracked for cleanup
2354        // in `remove_file`/`incremental_remove_symbols`) and lets the cache grow
2355        // unboundedly across long sessions. Returning the resolved location
2356        // directly preserves correctness without retaining state.
2357        let files = self.files.read();
2358        Self::find_definition_in_files(&files, symbol_name, None).map(|(location, _uri)| location)
2359    }
2360
2361    pub(crate) fn definition_candidates(&self, symbol_name: &str) -> Vec<Location> {
2362        let symbols = self.symbols.read();
2363        symbols
2364            .get(symbol_name)
2365            .map(|candidates| {
2366                candidates.iter().map(|candidate| candidate.location.clone()).collect()
2367            })
2368            .unwrap_or_default()
2369    }
2370
2371    /// Get all symbols in the workspace
2372    ///
2373    /// # Returns
2374    ///
2375    /// A vector containing every symbol currently indexed.
2376    ///
2377    /// # Examples
2378    ///
2379    /// ```rust,ignore
2380    /// use perl_parser::workspace_index::WorkspaceIndex;
2381    ///
2382    /// let index = WorkspaceIndex::new();
2383    /// let _symbols = index.all_symbols();
2384    /// ```
2385    pub fn all_symbols(&self) -> Vec<WorkspaceSymbol> {
2386        let files = self.files.read();
2387        let mut symbols = Vec::new();
2388
2389        for (_uri_key, file_index) in files.iter() {
2390            symbols.extend(file_index.symbols.clone());
2391        }
2392
2393        symbols
2394    }
2395
2396    /// Clear all indexed files and symbols from the workspace.
2397    pub fn clear(&self) {
2398        self.files.write().clear();
2399        self.symbols.write().clear();
2400        self.global_references.write().clear();
2401        self.fact_shards.write().clear();
2402        *self.semantic_reference_index.write() = ReferenceIndex::new();
2403        *self.semantic_import_export_index.write() = ImportExportIndex::new();
2404    }
2405
2406    fn hash_uri_to_file_id(uri: &str) -> FileId {
2407        let mut hasher = DefaultHasher::new();
2408        uri.hash(&mut hasher);
2409        FileId(hasher.finish())
2410    }
2411
2412    fn build_fact_shard(uri: &str, content_hash: u64, file_index: &FileIndex) -> FileFactShard {
2413        let file_id = Self::hash_uri_to_file_id(uri);
2414        let mut anchors = Vec::new();
2415        let mut entities = Vec::new();
2416        for (idx, symbol) in file_index.symbols.iter().enumerate() {
2417            let anchor_id = AnchorId((idx + 1) as u64);
2418            anchors.push(AnchorFact {
2419                id: anchor_id,
2420                file_id,
2421                // WorkspaceSymbol provides line/column coordinates only, not byte
2422                // offsets.  Zero-initialize span_*_byte until a byte-offset source
2423                // is plumbed through the indexing pipeline.
2424                span_start_byte: 0,
2425                span_end_byte: 0,
2426                scope_id: None,
2427                provenance: Provenance::SearchFallback,
2428                confidence: Confidence::Low,
2429            });
2430            entities.push(EntityFact {
2431                id: EntityId((idx + 1) as u64),
2432                kind: EntityKind::Unknown,
2433                canonical_name: symbol
2434                    .qualified_name
2435                    .clone()
2436                    .unwrap_or_else(|| symbol.name.clone()),
2437                anchor_id: Some(anchor_id),
2438                scope_id: None,
2439                provenance: Provenance::SearchFallback,
2440                confidence: Confidence::Low,
2441            });
2442        }
2443        // Hash the per-category fact vectors so consumers can detect staleness
2444        // without re-reading the full shard.
2445        let anchors_hash = {
2446            let mut h = DefaultHasher::new();
2447            anchors.len().hash(&mut h);
2448            for a in &anchors {
2449                a.id.hash(&mut h);
2450                a.span_start_byte.hash(&mut h);
2451                a.span_end_byte.hash(&mut h);
2452            }
2453            h.finish()
2454        };
2455        let entities_hash = {
2456            let mut h = DefaultHasher::new();
2457            entities.len().hash(&mut h);
2458            for e in &entities {
2459                e.id.hash(&mut h);
2460                e.canonical_name.hash(&mut h);
2461            }
2462            h.finish()
2463        };
2464        FileFactShard {
2465            source_uri: uri.to_string(),
2466            file_id,
2467            content_hash,
2468            anchors_hash: Some(anchors_hash),
2469            entities_hash: Some(entities_hash),
2470            occurrences_hash: Some(0),
2471            edges_hash: Some(0),
2472            anchors,
2473            entities,
2474            occurrences: Vec::new(),
2475            edges: Vec::new(),
2476        }
2477    }
2478
2479    /// Build a canonical [`FileFactShard`] from the AST using the semantic
2480    /// fact adapters in `perl-symbol`.
2481    ///
2482    /// This is the canonical population path that produces facts with real
2483    /// byte spans, `ExactAst` provenance, and per-category hashes. It runs
2484    /// alongside the legacy `build_fact_shard` path during the migration
2485    /// period.
2486    fn build_canonical_fact_shard_for_ast(
2487        uri: &str,
2488        content_hash: u64,
2489        ast: &Node,
2490    ) -> FileFactShard {
2491        let file_id = Self::hash_uri_to_file_id(uri);
2492
2493        // Extract declarations and references from the AST.
2494        let decls = extract_symbol_decls(ast, None);
2495        let refs = extract_symbol_refs(ast);
2496
2497        // Run the canonical adapters.
2498        let decl_facts = symbol_decls_to_semantic_facts(&decls, file_id);
2499
2500        // Build an entity lookup map for reference resolution.
2501        let entity_ids_by_name: std::collections::BTreeMap<String, EntityId> =
2502            decl_facts.entities.iter().map(|e| (e.canonical_name.clone(), e.id)).collect();
2503        let ref_facts = symbol_refs_to_semantic_facts(&refs, file_id, &entity_ids_by_name);
2504
2505        // Extract dynamic boundary evidence for `eval "sub NAME { ... }"` patterns.
2506        // Non-literal evals (e.g. `eval $code`) are intentionally skipped — the
2507        // sub name is not statically known and no evidence is emitted.
2508        let eval_sub_triples =
2509            crate::semantic::eval_sub_extractor::extract_eval_sub_boundaries(ast, file_id);
2510        let dynamic_boundaries: Vec<perl_semantic_facts::OccurrenceFact> =
2511            eval_sub_triples.iter().map(|(_, _, occ)| occ.clone()).collect();
2512        let generated_member_facts =
2513            crate::semantic::generated_member_extractor::extract_generated_member_facts(
2514                ast, file_id,
2515            );
2516
2517        // Build the canonical fact shard.
2518        // Import specs (for `use`, `require`, `ClassName->import()`) are
2519        // populated separately via ImportExportIndex — not passed here.
2520        let mut shard = crate::semantic::facts::build_canonical_fact_shard(
2521            uri,
2522            content_hash,
2523            &decl_facts,
2524            &ref_facts,
2525            &[],
2526            &dynamic_boundaries,
2527        );
2528
2529        // Merge entity and anchor facts from semantic producers into the shard.
2530        // The `build_canonical_fact_shard` function only accepts OccurrenceFact
2531        // slices for dynamic_boundaries; extra entities and anchors must be
2532        // merged manually so queries can resolve those semantic facts.
2533        //
2534        // NOTE: This post-build merge means `entities_hash` and `anchors_hash` do
2535        // not reflect these additions. Incremental replacement
2536        // (`replace_fact_shard_incremental`) may miss a change if only synthetic
2537        // facts change — the `content_hash` (whole-file) will still catch it.
2538        // A future refactor should extend `build_canonical_fact_shard`'s API to
2539        // accept extra entity/anchor slices alongside `dynamic_boundaries`.
2540        for (entity, anchor, _) in eval_sub_triples {
2541            shard.entities.push(entity);
2542            shard.anchors.push(anchor);
2543        }
2544        for fact in generated_member_facts {
2545            shard.entities.push(fact.entity);
2546            shard.anchors.push(fact.anchor);
2547        }
2548
2549        shard
2550    }
2551
2552    /// Replace a [`FileFactShard`] with per-category incremental invalidation.
2553    ///
2554    /// Compares the whole-file `content_hash` first; when unchanged the
2555    /// replacement is skipped entirely.  Otherwise each per-category hash
2556    /// (`anchors_hash`, `entities_hash`, `occurrences_hash`, `edges_hash`)
2557    /// is compared individually.  Only categories whose hash changed trigger
2558    /// removal of old entries and insertion of new ones in the cross-file
2559    /// semantic indexes.
2560    ///
2561    /// **Validates: Requirements 18.1, 18.2, 18.3, 18.4, 18.5**
2562    pub fn replace_fact_shard_incremental(
2563        &self,
2564        key: &str,
2565        new_shard: FileFactShard,
2566    ) -> ShardReplaceResult {
2567        let mut shards = self.fact_shards.write();
2568        let old_shard = shards.get(key);
2569
2570        let replacement = plan_shard_replacement(
2571            old_shard.map(Self::shard_category_hashes),
2572            Self::shard_category_hashes(&new_shard),
2573        );
2574
2575        if replacement.content_unchanged {
2576            return replacement;
2577        }
2578
2579        let source_uri = new_shard.source_uri.clone();
2580
2581        // ── Update cross-file semantic indexes per category ──
2582        // Occurrences and edges are both managed by the ReferenceIndex.
2583        // When either changes we must remove+re-add the file in that index.
2584        if replacement.occurrences_updated || replacement.edges_updated {
2585            let mut ref_idx = self.semantic_reference_index.write();
2586            if old_shard.is_some() {
2587                ref_idx.remove_file(&source_uri);
2588            }
2589            ref_idx.add_file(&new_shard);
2590        }
2591
2592        // Entities feed into the import/export index (export sets are keyed
2593        // by module name derived from entity canonical names).  When entities
2594        // change we refresh the import/export index for this file.
2595        if replacement.entities_updated {
2596            let mut ie_idx = self.semantic_import_export_index.write();
2597            ie_idx.remove_file_imports(&source_uri);
2598            ie_idx.remove_module_exports(&source_uri);
2599            // Re-add is handled by the caller or future wiring; for now we
2600            // ensure stale entries are purged.
2601        }
2602
2603        // Store the new shard (always, since content_hash differs).
2604        shards.insert(key.to_string(), new_shard);
2605
2606        replacement
2607    }
2608
2609    fn shard_category_hashes(shard: &FileFactShard) -> ShardCategoryHashes {
2610        ShardCategoryHashes {
2611            content_hash: shard.content_hash,
2612            anchors_hash: shard.anchors_hash,
2613            entities_hash: shard.entities_hash,
2614            occurrences_hash: shard.occurrences_hash,
2615            edges_hash: shard.edges_hash,
2616        }
2617    }
2618
2619    /// Number of stored file fact shards.
2620    pub fn fact_shard_count(&self) -> usize {
2621        self.fact_shards.read().len()
2622    }
2623
2624    /// Fetch a file fact shard for test/inspection.
2625    pub fn file_fact_shard(&self, uri: &str) -> Option<FileFactShard> {
2626        let key = DocumentStore::uri_key(&Self::normalize_uri(uri));
2627        self.fact_shards.read().get(&key).cloned()
2628    }
2629
2630    /// Resolve a semantic anchor to a source-backed LSP-wire location.
2631    ///
2632    /// Returns `None` for missing anchors, zero-width fallback anchors, or
2633    /// anchors whose source text is unavailable from the document store. If
2634    /// more than one shard contains the same anchor ID, this fails closed
2635    /// instead of choosing an arbitrary hash-map iteration result.
2636    pub fn semantic_anchor_wire_location(&self, anchor_id: AnchorId) -> Option<WireLocation> {
2637        let shards = self.fact_shards.read();
2638        let mut location = None;
2639
2640        for shard in shards.values() {
2641            for anchor in shard.anchors.iter().filter(|anchor| anchor.id == anchor_id) {
2642                if anchor.span_end_byte <= anchor.span_start_byte {
2643                    return None;
2644                }
2645
2646                let doc = self.document_store.get(&shard.source_uri)?;
2647                let start = usize::try_from(anchor.span_start_byte).ok()?;
2648                let end = usize::try_from(anchor.span_end_byte).ok()?;
2649                let next_location = WireLocation::new(
2650                    shard.source_uri.clone(),
2651                    WireRange::from_byte_offsets(&doc.text, start, end),
2652                );
2653                if location.replace(next_location).is_some() {
2654                    return None;
2655                }
2656            }
2657        }
2658
2659        location
2660    }
2661
2662    /// Resolve a semantic anchor to a source-backed LSP-wire location in a
2663    /// specific indexed file.
2664    ///
2665    /// This is the edit-safe variant of [`Self::semantic_anchor_wire_location`]:
2666    /// callers that already have `(file_id, anchor_id)` from a semantic plan do
2667    /// not need the global duplicate-anchor fail-closed behavior.
2668    pub fn semantic_anchor_wire_location_for_file(
2669        &self,
2670        file_id: FileId,
2671        anchor_id: AnchorId,
2672    ) -> Option<WireLocation> {
2673        let shards = self.fact_shards.read();
2674        let shard = shards.values().find(|shard| shard.file_id == file_id)?;
2675        let anchor = shard
2676            .anchors
2677            .iter()
2678            .find(|anchor| anchor.id == anchor_id && anchor.file_id == file_id)?;
2679
2680        if anchor.span_end_byte <= anchor.span_start_byte {
2681            return None;
2682        }
2683
2684        let doc = self.document_store.get(&shard.source_uri)?;
2685        let start = usize::try_from(anchor.span_start_byte).ok()?;
2686        let end = usize::try_from(anchor.span_end_byte).ok()?;
2687        doc.text.get(start..end)?;
2688
2689        Some(WireLocation::new(
2690            shard.source_uri.clone(),
2691            WireRange::from_byte_offsets(&doc.text, start, end),
2692        ))
2693    }
2694
2695    /// Compute the [`FileId`] for a URI using the same hash used during indexing.
2696    ///
2697    /// Returns `None` if the URI has not been indexed (no fact shard is present).
2698    pub fn file_id_for_uri(&self, uri: &str) -> Option<FileId> {
2699        let key = DocumentStore::uri_key(&Self::normalize_uri(uri));
2700        self.fact_shards.read().get(&key).map(|shard| shard.file_id)
2701    }
2702
2703    /// Invoke a scoped callback with [`WorkspaceSemanticQueries`] built from
2704    /// the current semantic indexes for the given URI.
2705    ///
2706    /// The callback receives the resolved [`FileId`] and a
2707    /// [`WorkspaceSemanticQueries`] facade that borrows from read-locked
2708    /// semantic indexes. Locks are released when `f` returns.
2709    ///
2710    /// Returns `Some(result)` if the URI is indexed and semantic data is
2711    /// available, `None` if the URI has not been indexed or its fact shard is
2712    /// absent (the caller should fall back to legacy diagnostics).
2713    pub fn with_semantic_queries_for_uri<R>(
2714        &self,
2715        uri: &str,
2716        f: impl FnOnce(FileId, crate::semantic::queries::WorkspaceSemanticQueries<'_>) -> R,
2717    ) -> Option<R> {
2718        let key = DocumentStore::uri_key(&Self::normalize_uri(uri));
2719
2720        // Acquire all three read guards simultaneously. The lock order must be
2721        // consistent with every other site that acquires multiple locks to avoid
2722        // deadlock: shards → reference_index → import_export_index.
2723        let shards_guard = self.fact_shards.read();
2724        let ref_guard = self.semantic_reference_index.read();
2725        let ie_guard = self.semantic_import_export_index.read();
2726
2727        // Verify the URI is indexed before entering the callback.
2728        let file_id = shards_guard.get(&key)?.file_id;
2729
2730        let queries = crate::semantic::queries::WorkspaceSemanticQueries::new(
2731            &ref_guard,
2732            &ie_guard,
2733            &shards_guard,
2734        );
2735
2736        Some(f(file_id, queries))
2737    }
2738
2739    /// Return the number of indexed files in the workspace
2740    pub fn file_count(&self) -> usize {
2741        let files = self.files.read();
2742        files.len()
2743    }
2744
2745    /// Return the total number of symbols across all indexed files
2746    pub fn symbol_count(&self) -> usize {
2747        let files = self.files.read();
2748        files.values().map(|file_index| file_index.symbols.len()).sum()
2749    }
2750
2751    /// Get all files in a specific workspace folder
2752    ///
2753    /// # Arguments
2754    ///
2755    /// * `folder_uri` - Workspace folder URI to filter by
2756    ///
2757    /// # Returns
2758    ///
2759    /// A vector of file indices belonging to the specified folder
2760    pub fn files_in_folder(&self, folder_uri: &str) -> Vec<FileIndex> {
2761        let files = self.files.read();
2762        files.values().filter(|f| f.folder_uri.as_deref() == Some(folder_uri)).cloned().collect()
2763    }
2764
2765    /// Get all symbols in a specific workspace folder
2766    ///
2767    /// # Arguments
2768    ///
2769    /// * `folder_uri` - Workspace folder URI to filter by
2770    ///
2771    /// # Returns
2772    ///
2773    /// A vector of symbols belonging to the specified folder
2774    pub fn symbols_in_folder(&self, folder_uri: &str) -> Vec<WorkspaceSymbol> {
2775        let files = self.files.read();
2776        files
2777            .values()
2778            .filter(|f| f.folder_uri.as_deref() == Some(folder_uri))
2779            .flat_map(|f| f.symbols.iter().cloned())
2780            .collect()
2781    }
2782
2783    /// Capture a point-in-time memory estimate of the index.
2784    ///
2785    /// Acquires read locks on all index components and walks their contents
2786    /// to estimate heap usage. Intended for offline profiling; do not call
2787    /// on the LSP hot path.
2788    ///
2789    /// Only available when the `memory-profiling` feature is enabled.
2790    #[cfg(feature = "memory-profiling")]
2791    pub fn memory_snapshot(&self) -> crate::workspace::memory::MemorySnapshot {
2792        use std::mem::size_of;
2793
2794        let files_guard = self.files.read();
2795        let symbols_guard = self.symbols.read();
2796        let global_refs_guard = self.global_references.read();
2797
2798        // --- files map ---
2799        let mut files_bytes: usize = 0;
2800        let mut total_symbol_count: usize = 0;
2801        for (uri_key, fi) in files_guard.iter() {
2802            // key string
2803            files_bytes += uri_key.len();
2804            // per-symbol entries
2805            for sym in &fi.symbols {
2806                files_bytes += sym.name.len()
2807                    + sym.uri.len()
2808                    + sym.qualified_name.as_deref().map_or(0, str::len)
2809                    + sym.documentation.as_deref().map_or(0, str::len)
2810                    + sym.container_name.as_deref().map_or(0, str::len)
2811                    // stack portion: kind + range + has_body + option discriminants
2812                    + size_of::<WorkspaceSymbol>();
2813            }
2814            total_symbol_count += fi.symbols.len();
2815            // per-reference entries
2816            for (ref_name, refs) in &fi.references {
2817                files_bytes += ref_name.len();
2818                for r in refs {
2819                    files_bytes += r.uri.len() + size_of::<SymbolReference>();
2820                }
2821            }
2822            // dependencies
2823            for dep in &fi.dependencies {
2824                files_bytes += dep.len();
2825            }
2826            // content hash (u64) + vec/hashset capacity overhead (rough)
2827            files_bytes += size_of::<u64>();
2828        }
2829
2830        // --- global symbols map ---
2831        let mut symbols_bytes: usize = 0;
2832        for (qname, candidates) in symbols_guard.iter() {
2833            symbols_bytes += qname.len();
2834            for candidate in candidates {
2835                symbols_bytes += candidate.location.uri.len() + size_of::<Location>();
2836            }
2837        }
2838
2839        // --- global references map ---
2840        let mut global_refs_bytes: usize = 0;
2841        for (sym_name, locs) in global_refs_guard.iter() {
2842            global_refs_bytes += sym_name.len();
2843            for loc in locs {
2844                global_refs_bytes += loc.uri.len() + size_of::<Location>();
2845            }
2846        }
2847
2848        // --- document store ---
2849        let document_store_bytes = self.document_store.total_text_bytes();
2850
2851        crate::workspace::memory::MemorySnapshot {
2852            file_count: files_guard.len(),
2853            symbol_count: total_symbol_count,
2854            files_bytes,
2855            symbols_bytes,
2856            global_refs_bytes,
2857            document_store_bytes,
2858        }
2859    }
2860
2861    /// Check if the workspace index has symbols (soft readiness check)
2862    ///
2863    /// Returns true if the index contains any symbols, indicating that
2864    /// at least some files have been indexed and the workspace is ready
2865    /// for symbol-based operations like completion.
2866    ///
2867    /// # Returns
2868    ///
2869    /// `true` if any symbols are indexed, otherwise `false`.
2870    ///
2871    /// # Examples
2872    ///
2873    /// ```rust,ignore
2874    /// use perl_parser::workspace_index::WorkspaceIndex;
2875    ///
2876    /// let index = WorkspaceIndex::new();
2877    /// assert!(!index.has_symbols());
2878    /// ```
2879    pub fn has_symbols(&self) -> bool {
2880        let files = self.files.read();
2881        files.values().any(|file_index| !file_index.symbols.is_empty())
2882    }
2883
2884    /// Search for symbols by query
2885    ///
2886    /// # Arguments
2887    ///
2888    /// * `query` - Substring to match against symbol names
2889    ///
2890    /// # Returns
2891    ///
2892    /// Symbols whose names or qualified names contain the query string.
2893    ///
2894    /// # Examples
2895    ///
2896    /// ```rust,ignore
2897    /// use perl_parser::workspace_index::WorkspaceIndex;
2898    ///
2899    /// let index = WorkspaceIndex::new();
2900    /// let _results = index.search_symbols("example");
2901    /// ```
2902    pub fn search_symbols(&self, query: &str) -> Vec<WorkspaceSymbol> {
2903        self.search_source_symbols(query)
2904    }
2905
2906    /// Search only source-backed syntax symbols from the workspace index.
2907    ///
2908    /// Generated/framework members are excluded. Use this when a caller needs
2909    /// to preserve the historical source-backed live slice for trust receipts
2910    /// or fallback paths.
2911    pub fn search_source_symbols(&self, query: &str) -> Vec<WorkspaceSymbol> {
2912        let query = query.trim();
2913        let query_lower = query.to_lowercase();
2914        let files = self.files.read();
2915        let mut results = Vec::new();
2916        for file_index in files.values() {
2917            for symbol in &file_index.symbols {
2918                if symbol.name.to_lowercase().contains(&query_lower)
2919                    || symbol
2920                        .qualified_name
2921                        .as_ref()
2922                        .map(|qn| qn.to_lowercase().contains(&query_lower))
2923                        .unwrap_or(false)
2924                {
2925                    results.push(symbol.clone());
2926                }
2927            }
2928        }
2929        results
2930    }
2931
2932    /// Search labeled generated/framework members backed by semantic source anchors.
2933    ///
2934    /// This is a narrow workspace-symbol pilot: returned symbols are explicitly
2935    /// labeled as generated/framework members and point at the source declaration
2936    /// that produced the member, not at an exact generated method body.
2937    pub fn search_generated_workspace_symbols(&self, query: &str) -> Vec<WorkspaceSymbol> {
2938        let query = query.trim();
2939        if query.is_empty() {
2940            return Vec::new();
2941        }
2942
2943        let query_lower = query.to_lowercase();
2944        let source_backed_qualified_names = self.source_backed_qualified_names();
2945        let shards = self.fact_shards.read();
2946        let mut results = Vec::new();
2947
2948        for shard in shards.values() {
2949            for entity in &shard.entities {
2950                if entity.kind != EntityKind::GeneratedMember {
2951                    continue;
2952                }
2953                if !is_framework_generated_member_entity(entity) {
2954                    continue;
2955                }
2956                if source_backed_qualified_names.contains(&entity.canonical_name) {
2957                    continue;
2958                }
2959                let Some((container_name, bare_name)) =
2960                    split_qualified_symbol_name(&entity.canonical_name)
2961                else {
2962                    continue;
2963                };
2964                if !bare_name.to_lowercase().contains(&query_lower)
2965                    && !entity.canonical_name.to_lowercase().contains(&query_lower)
2966                {
2967                    continue;
2968                }
2969                let Some(anchor_id) = entity.anchor_id else {
2970                    continue;
2971                };
2972                let Some(range) = self.generated_member_anchor_range(shard, anchor_id) else {
2973                    continue;
2974                };
2975
2976                results.push(WorkspaceSymbol {
2977                    name: format!("{bare_name} [generated/framework]"),
2978                    kind: SymbolKind::Method,
2979                    uri: shard.source_uri.clone(),
2980                    range,
2981                    qualified_name: Some(entity.canonical_name.clone()),
2982                    documentation: Some(
2983                        "Generated/framework member; virtual symbol anchored to source declaration"
2984                            .to_string(),
2985                    ),
2986                    container_name: Some(format!("{container_name} [generated/framework]")),
2987                    has_body: false,
2988                    workspace_folder_uri: self.determine_folder_uri(&shard.source_uri),
2989                });
2990            }
2991        }
2992
2993        sort_workspace_symbols(&mut results);
2994        results
2995    }
2996
2997    fn source_backed_qualified_names(&self) -> HashSet<String> {
2998        let files = self.files.read();
2999        let mut qualified_names = HashSet::new();
3000        for file_index in files.values() {
3001            for symbol in &file_index.symbols {
3002                if let Some(name) = &symbol.qualified_name {
3003                    qualified_names.insert(name.clone());
3004                    continue;
3005                }
3006                if let Some(container) = &symbol.container_name {
3007                    qualified_names.insert(format!("{container}::{}", symbol.name));
3008                }
3009            }
3010        }
3011        qualified_names
3012    }
3013
3014    fn generated_member_anchor_range(
3015        &self,
3016        shard: &FileFactShard,
3017        anchor_id: AnchorId,
3018    ) -> Option<Range> {
3019        let anchor = shard
3020            .anchors
3021            .iter()
3022            .find(|anchor| anchor.id == anchor_id && anchor.file_id == shard.file_id)?;
3023        if anchor.provenance != Provenance::FrameworkSynthesis
3024            || anchor.confidence != Confidence::Medium
3025        {
3026            return None;
3027        }
3028        if anchor.span_end_byte <= anchor.span_start_byte {
3029            return None;
3030        }
3031
3032        let doc = self.document_store.get(&shard.source_uri)?;
3033        let start = usize::try_from(anchor.span_start_byte).ok()?;
3034        let end = usize::try_from(anchor.span_end_byte).ok()?;
3035        doc.text.get(start..end)?;
3036        let ((start_line, start_col), (end_line, end_col)) = doc.line_index.range(start, end);
3037        Some(Range {
3038            start: Position { byte: start, line: start_line, column: start_col },
3039            end: Position { byte: end, line: end_line, column: end_col },
3040        })
3041    }
3042
3043    /// Find symbols by query (alias for search_symbols for compatibility)
3044    ///
3045    /// # Arguments
3046    ///
3047    /// * `query` - Substring to match against symbol names
3048    ///
3049    /// # Returns
3050    ///
3051    /// Symbols whose names or qualified names contain the query string.
3052    ///
3053    /// # Examples
3054    ///
3055    /// ```rust,ignore
3056    /// use perl_parser::workspace_index::WorkspaceIndex;
3057    ///
3058    /// let index = WorkspaceIndex::new();
3059    /// let _results = index.find_symbols("example");
3060    /// ```
3061    pub fn find_symbols(&self, query: &str) -> Vec<WorkspaceSymbol> {
3062        self.search_symbols(query)
3063    }
3064
3065    /// Rank symbols by folder proximity to a document
3066    ///
3067    /// Returns symbols sorted by: same folder > other folders
3068    ///
3069    /// # Arguments
3070    ///
3071    /// * `symbols` - Symbols to rank
3072    /// * `doc_uri` - Document URI to determine folder context
3073    ///
3074    /// # Returns
3075    ///
3076    /// Symbols ranked by folder proximity (same folder first)
3077    ///
3078    /// # Examples
3079    ///
3080    /// ```rust,ignore
3081    /// use perl_parser::workspace_index::WorkspaceIndex;
3082    ///
3083    /// let index = WorkspaceIndex::new();
3084    /// let symbols = index.search_symbols("example");
3085    /// let ranked = index.rank_symbols_by_folder(symbols, "file:///project1/src/main.pl");
3086    /// ```
3087    pub fn rank_symbols_by_folder(
3088        &self,
3089        symbols: Vec<WorkspaceSymbol>,
3090        doc_uri: &str,
3091    ) -> Vec<WorkspaceSymbol> {
3092        let doc_folder = self.determine_folder_uri(doc_uri);
3093
3094        let mut ranked: Vec<(WorkspaceSymbol, i32)> = symbols
3095            .into_iter()
3096            .map(|symbol| {
3097                let rank = if let Some(ref doc_folder_uri) = doc_folder {
3098                    if symbol.workspace_folder_uri.as_ref() == Some(doc_folder_uri) {
3099                        0 // Same folder - highest priority
3100                    } else {
3101                        1 // Different folder - lower priority
3102                    }
3103                } else {
3104                    1 // No document context - treat as different folder
3105                };
3106                (symbol, rank)
3107            })
3108            .collect();
3109
3110        // Sort by rank (lower is better), then by name for stability
3111        ranked.sort_by(|a, b| a.1.cmp(&b.1).then_with(|| a.0.name.cmp(&b.0.name)));
3112
3113        ranked.into_iter().map(|(symbol, _)| symbol).collect()
3114    }
3115
3116    /// Search for symbols with folder-aware ranking
3117    ///
3118    /// Combines symbol search with folder proximity ranking
3119    ///
3120    /// # Arguments
3121    ///
3122    /// * `name` - Symbol name to search for
3123    /// * `doc_uri` - Document URI for ranking context
3124    ///
3125    /// # Returns
3126    ///
3127    /// Ranked symbols with same-folder results first
3128    ///
3129    /// # Examples
3130    ///
3131    /// ```rust,ignore
3132    /// use perl_parser::workspace_index::WorkspaceIndex;
3133    ///
3134    /// let index = WorkspaceIndex::new();
3135    /// let ranked = index.search_symbols_ranked("example", "file:///project1/src/main.pl");
3136    /// ```
3137    pub fn search_symbols_ranked(&self, name: &str, doc_uri: &str) -> Vec<WorkspaceSymbol> {
3138        let symbols = self.search_symbols(name);
3139        self.rank_symbols_by_folder(symbols, doc_uri)
3140    }
3141
3142    /// Determine if two symbols are in the same package
3143    ///
3144    /// # Arguments
3145    ///
3146    /// * `symbol_a` - First symbol
3147    /// * `symbol_b` - Second symbol
3148    ///
3149    /// # Returns
3150    ///
3151    /// `true` if both symbols are in the same package
3152    #[allow(dead_code)]
3153    pub fn same_package(&self, symbol_a: &WorkspaceSymbol, symbol_b: &WorkspaceSymbol) -> bool {
3154        let package_a = self.extract_package_name(&symbol_a.name);
3155        let package_b = self.extract_package_name(&symbol_b.name);
3156        package_a == package_b
3157    }
3158
3159    /// Determine if two package names are the same (helper for testing)
3160    ///
3161    /// # Arguments
3162    ///
3163    /// * `package_a` - First package name
3164    /// * `package_b` - Second package name
3165    ///
3166    /// # Returns
3167    ///
3168    /// `true` if both package names are equal
3169    #[allow(dead_code)]
3170    pub fn same_package_by_container(&self, package_a: &str, package_b: &str) -> bool {
3171        package_a == package_b
3172    }
3173
3174    /// Extract package name from a symbol name
3175    ///
3176    /// # Arguments
3177    ///
3178    /// * `symbol_name` - Symbol name (e.g., "Foo::Bar::baz" or "baz")
3179    ///
3180    /// # Returns
3181    ///
3182    /// Package name (e.g., "Foo::Bar") or None for main package
3183    #[allow(dead_code)]
3184    pub fn extract_package_name(&self, symbol_name: &str) -> Option<String> {
3185        let parts: Vec<&str> = symbol_name.split("::").collect();
3186        if parts.len() > 1 { Some(parts[..parts.len() - 1].join("::")) } else { None }
3187    }
3188
3189    /// Get symbols in a specific file
3190    ///
3191    /// # Arguments
3192    ///
3193    /// * `uri` - File URI to inspect
3194    ///
3195    /// # Returns
3196    ///
3197    /// All symbols indexed for the requested file.
3198    ///
3199    /// # Examples
3200    ///
3201    /// ```rust,ignore
3202    /// use perl_parser::workspace_index::WorkspaceIndex;
3203    ///
3204    /// let index = WorkspaceIndex::new();
3205    /// let _symbols = index.file_symbols("file:///example.pl");
3206    /// ```
3207    pub fn file_symbols(&self, uri: &str) -> Vec<WorkspaceSymbol> {
3208        let normalized_uri = Self::normalize_uri(uri);
3209        let key = DocumentStore::uri_key(&normalized_uri);
3210        let files = self.files.read();
3211
3212        files.get(&key).map(|fi| fi.symbols.clone()).unwrap_or_default()
3213    }
3214
3215    /// Get dependencies of a file
3216    ///
3217    /// # Arguments
3218    ///
3219    /// * `uri` - File URI to inspect
3220    ///
3221    /// # Returns
3222    ///
3223    /// A set of module names imported by the file.
3224    ///
3225    /// # Examples
3226    ///
3227    /// ```rust,ignore
3228    /// use perl_parser::workspace_index::WorkspaceIndex;
3229    ///
3230    /// let index = WorkspaceIndex::new();
3231    /// let _deps = index.file_dependencies("file:///example.pl");
3232    /// ```
3233    pub fn file_dependencies(&self, uri: &str) -> HashSet<String> {
3234        let normalized_uri = Self::normalize_uri(uri);
3235        let key = DocumentStore::uri_key(&normalized_uri);
3236        let files = self.files.read();
3237
3238        files.get(&key).map(|fi| fi.dependencies.clone()).unwrap_or_default()
3239    }
3240
3241    /// Find all files that depend on a module
3242    ///
3243    /// # Arguments
3244    ///
3245    /// * `module_name` - Module name to search for in file dependencies
3246    ///
3247    /// # Returns
3248    ///
3249    /// A list of file URIs that import or depend on the module.
3250    ///
3251    /// # Examples
3252    ///
3253    /// ```rust,ignore
3254    /// use perl_parser::workspace_index::WorkspaceIndex;
3255    ///
3256    /// let index = WorkspaceIndex::new();
3257    /// let _files = index.find_dependents("My::Module");
3258    /// ```
3259    pub fn find_dependents(&self, module_name: &str) -> Vec<String> {
3260        let canonical = canonicalize_perl_module_name(module_name);
3261        let legacy = legacy_perl_module_name(&canonical);
3262        let files = self.files.read();
3263        let mut dependents = Vec::new();
3264
3265        for (uri_key, file_index) in files.iter() {
3266            if file_index.dependencies.contains(module_name)
3267                || file_index.dependencies.contains(&canonical)
3268                || file_index.dependencies.contains(&legacy)
3269            {
3270                dependents.push(uri_key.clone());
3271            }
3272        }
3273
3274        dependents
3275    }
3276
3277    /// Get the document store
3278    ///
3279    /// # Returns
3280    ///
3281    /// A reference to the in-memory document store.
3282    ///
3283    /// # Examples
3284    ///
3285    /// ```rust,ignore
3286    /// use perl_parser::workspace_index::WorkspaceIndex;
3287    ///
3288    /// let index = WorkspaceIndex::new();
3289    /// let _store = index.document_store();
3290    /// ```
3291    pub fn document_store(&self) -> &DocumentStore {
3292        &self.document_store
3293    }
3294
3295    /// Find unused symbols in the workspace
3296    ///
3297    /// # Returns
3298    ///
3299    /// Symbols that have no non-definition references in the workspace.
3300    ///
3301    /// # Examples
3302    ///
3303    /// ```rust,ignore
3304    /// use perl_parser::workspace_index::WorkspaceIndex;
3305    ///
3306    /// let index = WorkspaceIndex::new();
3307    /// let _unused = index.find_unused_symbols();
3308    /// ```
3309    pub fn find_unused_symbols(&self) -> Vec<WorkspaceSymbol> {
3310        let files = self.files.read();
3311        let mut unused = Vec::new();
3312
3313        // Collect all defined symbols
3314        for (_uri_key, file_index) in files.iter() {
3315            for symbol in &file_index.symbols {
3316                // Check if this symbol has any references beyond its definition
3317                let has_usage = files.values().any(|fi| {
3318                    if let Some(refs) = fi.references.get(&symbol.name) {
3319                        refs.iter().any(|r| r.kind != ReferenceKind::Definition)
3320                    } else {
3321                        false
3322                    }
3323                });
3324
3325                if !has_usage {
3326                    unused.push(symbol.clone());
3327                }
3328            }
3329        }
3330
3331        unused
3332    }
3333
3334    /// Get all symbols that belong to a specific package
3335    ///
3336    /// # Arguments
3337    ///
3338    /// * `package_name` - Package name to match (e.g., `My::Package`)
3339    ///
3340    /// # Returns
3341    ///
3342    /// Symbols defined within the requested package.
3343    ///
3344    /// # Examples
3345    ///
3346    /// ```rust,ignore
3347    /// use perl_parser::workspace_index::WorkspaceIndex;
3348    ///
3349    /// let index = WorkspaceIndex::new();
3350    /// let _members = index.get_package_members("My::Package");
3351    /// ```
3352    pub fn get_package_members(&self, package_name: &str) -> Vec<WorkspaceSymbol> {
3353        let files = self.files.read();
3354        let mut members = Vec::new();
3355
3356        for (_uri_key, file_index) in files.iter() {
3357            for symbol in &file_index.symbols {
3358                // Check if symbol belongs to this package
3359                if let Some(ref container) = symbol.container_name {
3360                    if container == package_name {
3361                        members.push(symbol.clone());
3362                    }
3363                }
3364                // Also check qualified names
3365                if let Some(ref qname) = symbol.qualified_name {
3366                    if qname.starts_with(&format!("{}::", package_name)) {
3367                        // Avoid duplicates - only add if not already in via container_name
3368                        if symbol.container_name.as_deref() != Some(package_name) {
3369                            members.push(symbol.clone());
3370                        }
3371                    }
3372                }
3373            }
3374        }
3375
3376        members
3377    }
3378
3379    /// Find the definition location for a symbol key during Index/Navigate stages.
3380    ///
3381    /// # Arguments
3382    ///
3383    /// * `key` - Normalized symbol key to resolve.
3384    ///
3385    /// # Returns
3386    ///
3387    /// The definition location for the symbol, if found.
3388    ///
3389    /// # Examples
3390    ///
3391    /// ```rust,ignore
3392    /// use perl_parser::workspace_index::{SymKind, SymbolKey, WorkspaceIndex};
3393    /// use std::sync::Arc;
3394    ///
3395    /// let index = WorkspaceIndex::new();
3396    /// let key = SymbolKey { pkg: Arc::from("My::Package"), name: Arc::from("example"), sigil: None, kind: SymKind::Sub };
3397    /// let _def = index.find_def(&key);
3398    /// ```
3399    pub fn find_def(&self, key: &SymbolKey) -> Option<Location> {
3400        if let Some(sigil) = key.sigil {
3401            // It's a variable
3402            let var_name = format!("{}{}", sigil, key.name);
3403            self.find_definition(&var_name)
3404        } else if key.kind == SymKind::Pack {
3405            // It's a package lookup (e.g., from `use Module::Name`)
3406            // Search for the package declaration by name
3407            self.find_definition(key.pkg.as_ref())
3408                .or_else(|| self.find_definition(key.name.as_ref()))
3409        } else {
3410            // It's a subroutine or package
3411            let qualified_name = format!("{}::{}", key.pkg, key.name);
3412            self.find_definition(&qualified_name)
3413        }
3414    }
3415
3416    /// Find reference locations for a symbol key using dual indexing.
3417    ///
3418    /// Searches both qualified and bare names to support Navigate/Analyze workflows.
3419    ///
3420    /// # Arguments
3421    ///
3422    /// * `key` - Normalized symbol key to search for.
3423    ///
3424    /// # Returns
3425    ///
3426    /// All reference locations for the symbol, excluding the definition.
3427    ///
3428    /// # Examples
3429    ///
3430    /// ```rust,ignore
3431    /// use perl_parser::workspace_index::{SymKind, SymbolKey, WorkspaceIndex};
3432    /// use std::sync::Arc;
3433    ///
3434    /// let index = WorkspaceIndex::new();
3435    /// let key = SymbolKey { pkg: Arc::from("main"), name: Arc::from("example"), sigil: None, kind: SymKind::Sub };
3436    /// let _refs = index.find_refs(&key);
3437    /// ```
3438    pub fn find_refs(&self, key: &SymbolKey) -> Vec<Location> {
3439        let files_locked = self.files.read();
3440        let mut all_refs = if let Some(sigil) = key.sigil {
3441            // It's a variable - search through all files for this variable name
3442            let var_name = format!("{}{}", sigil, key.name);
3443            let mut refs = Vec::new();
3444            for (_uri_key, file_index) in files_locked.iter() {
3445                if let Some(var_refs) = file_index.references.get(&var_name) {
3446                    for reference in var_refs {
3447                        refs.push(Location { uri: reference.uri.clone(), range: reference.range });
3448                    }
3449                }
3450            }
3451            refs
3452        } else {
3453            // It's a subroutine or package
3454            if key.pkg.as_ref() == "main" {
3455                // For main package, we search for both "main::foo" and bare "foo"
3456                let mut refs = self.find_references(&format!("main::{}", key.name));
3457                // Add bare name references
3458                for (_uri_key, file_index) in files_locked.iter() {
3459                    if let Some(bare_refs) = file_index.references.get(key.name.as_ref()) {
3460                        for reference in bare_refs {
3461                            refs.push(Location {
3462                                uri: reference.uri.clone(),
3463                                range: reference.range,
3464                            });
3465                        }
3466                    }
3467                }
3468                refs
3469            } else {
3470                let qualified_name = format!("{}::{}", key.pkg, key.name);
3471                self.find_references(&qualified_name)
3472            }
3473        };
3474        drop(files_locked);
3475
3476        // Remove the definition; the caller will include it separately if needed
3477        if let Some(def) = self.find_def(key) {
3478            all_refs.retain(|loc| !(loc.uri == def.uri && loc.range == def.range));
3479        }
3480
3481        // Deduplicate by URI and range
3482        let mut seen = HashSet::new();
3483        all_refs.retain(|loc| {
3484            seen.insert((
3485                loc.uri.clone(),
3486                loc.range.start.line,
3487                loc.range.start.column,
3488                loc.range.end.line,
3489                loc.range.end.column,
3490            ))
3491        });
3492
3493        all_refs
3494    }
3495}
3496
3497/// AST visitor for extracting symbols and references
3498struct IndexVisitor {
3499    document: Document,
3500    uri: String,
3501    current_package: Option<String>,
3502    workspace_folder_uri: Option<String>,
3503}
3504
3505fn is_interpolated_var_start(byte: u8) -> bool {
3506    byte.is_ascii_alphabetic() || byte == b'_'
3507}
3508
3509fn is_interpolated_var_continue(byte: u8) -> bool {
3510    byte.is_ascii_alphanumeric() || byte == b'_' || byte == b':'
3511}
3512
3513fn has_escaped_interpolation_marker(bytes: &[u8], index: usize) -> bool {
3514    if index == 0 {
3515        return false;
3516    }
3517
3518    let mut backslashes = 0usize;
3519    let mut cursor = index;
3520    while cursor > 0 && bytes[cursor - 1] == b'\\' {
3521        backslashes += 1;
3522        cursor -= 1;
3523    }
3524
3525    backslashes % 2 == 1
3526}
3527
3528fn strip_matching_quote_delimiters(raw_content: &str) -> &str {
3529    if raw_content.len() < 2 {
3530        return raw_content;
3531    }
3532
3533    let bytes = raw_content.as_bytes();
3534    match (bytes.first(), bytes.last()) {
3535        (Some(b'"'), Some(b'"')) | (Some(b'\''), Some(b'\'')) => {
3536            &raw_content[1..raw_content.len() - 1]
3537        }
3538        _ => raw_content,
3539    }
3540}
3541
3542impl IndexVisitor {
3543    fn new(document: &mut Document, uri: String, workspace_folder_uri: Option<String>) -> Self {
3544        Self {
3545            document: document.clone(),
3546            uri,
3547            current_package: Some("main".to_string()),
3548            workspace_folder_uri,
3549        }
3550    }
3551
3552    fn visit(&mut self, node: &Node, file_index: &mut FileIndex) {
3553        self.project_symbol_declarations(node, file_index);
3554        self.visit_node(node, file_index);
3555    }
3556
3557    fn project_symbol_declarations(&self, node: &Node, file_index: &mut FileIndex) {
3558        for decl in extract_symbol_decls(node, self.current_package.as_deref()) {
3559            let (start, end) = match decl.kind {
3560                SymbolKind::Variable(_) => match decl.anchor_span {
3561                    Some(span) => span,
3562                    None => decl.full_span,
3563                },
3564                _ => decl.full_span,
3565            };
3566            let ((start_line, start_col), (end_line, end_col)) =
3567                self.document.line_index.range(start, end);
3568            let range = Range {
3569                start: Position { byte: start, line: start_line, column: start_col },
3570                end: Position { byte: end, line: end_line, column: end_col },
3571            };
3572
3573            let symbol_name = symbol_decl_name(&decl.kind, &decl.name);
3574
3575            // Suppress qualified_name for lexically-scoped variables (my, state): they
3576            // are not package-visible and must not be found by a qualified lookup such
3577            // as `Foo::x`.  `our` and `local` variables keep the qualified name because
3578            // they participate in the package namespace.
3579            let qualified_name = match &decl.declarator {
3580                Some(d) if d == "my" || d == "state" => None,
3581                _ => (!decl.qualified_name.is_empty()).then_some(decl.qualified_name),
3582            };
3583
3584            // Top-level package declarations have no containing package; suppress the
3585            // spurious "main" container that comes from the walker's initial context.
3586            let container_name = match decl.kind {
3587                SymbolKind::Package => None,
3588                _ => decl.container,
3589            };
3590
3591            file_index.symbols.push(WorkspaceSymbol {
3592                name: symbol_name.clone(),
3593                kind: decl.kind,
3594                uri: self.uri.clone(),
3595                range,
3596                qualified_name,
3597                documentation: None,
3598                container_name,
3599                has_body: true,
3600                workspace_folder_uri: self.workspace_folder_uri.clone(),
3601            });
3602
3603            file_index.references.entry(symbol_name).or_default().push(SymbolReference {
3604                uri: self.uri.clone(),
3605                range,
3606                kind: ReferenceKind::Definition,
3607            });
3608        }
3609    }
3610
3611    fn record_interpolated_variable_references(
3612        &self,
3613        raw_content: &str,
3614        range: Range,
3615        file_index: &mut FileIndex,
3616    ) {
3617        let content = strip_matching_quote_delimiters(raw_content);
3618        let bytes = content.as_bytes();
3619        let mut index = 0;
3620
3621        while index < bytes.len() {
3622            if has_escaped_interpolation_marker(bytes, index) {
3623                index += 1;
3624                continue;
3625            }
3626
3627            let sigil = match bytes[index] {
3628                b'$' => "$",
3629                b'@' => "@",
3630                _ => {
3631                    index += 1;
3632                    continue;
3633                }
3634            };
3635
3636            if index + 1 >= bytes.len() {
3637                break;
3638            }
3639
3640            let (start, needs_closing_brace) =
3641                if bytes[index + 1] == b'{' { (index + 2, true) } else { (index + 1, false) };
3642
3643            if start >= bytes.len() || !is_interpolated_var_start(bytes[start]) {
3644                index += 1;
3645                continue;
3646            }
3647
3648            let mut end = start + 1;
3649            while end < bytes.len() && is_interpolated_var_continue(bytes[end]) {
3650                end += 1;
3651            }
3652
3653            if needs_closing_brace && (end >= bytes.len() || bytes[end] != b'}') {
3654                index += 1;
3655                continue;
3656            }
3657
3658            if let Some(name) = content.get(start..end) {
3659                let var_name = format!("{sigil}{name}");
3660                file_index.references.entry(var_name).or_default().push(SymbolReference {
3661                    uri: self.uri.clone(),
3662                    range,
3663                    kind: ReferenceKind::Read,
3664                });
3665            }
3666
3667            index = if needs_closing_brace { end + 1 } else { end };
3668        }
3669    }
3670
3671    fn visit_node(&mut self, node: &Node, file_index: &mut FileIndex) {
3672        match &node.kind {
3673            NodeKind::Package { name, .. } => {
3674                let package_name = name.clone();
3675
3676                // Update the current package (replaces the previous one, not a stack)
3677                self.current_package = Some(package_name.clone());
3678            }
3679
3680            NodeKind::Subroutine { body, .. } => {
3681                // Visit body
3682                self.visit_node(body, file_index);
3683            }
3684
3685            NodeKind::VariableDeclaration { initializer, .. } => {
3686                // Visit initializer
3687                if let Some(init) = initializer {
3688                    self.visit_node(init, file_index);
3689                }
3690            }
3691
3692            NodeKind::VariableListDeclaration { initializer, .. } => {
3693                // Visit the initializer
3694                if let Some(init) = initializer {
3695                    self.visit_node(init, file_index);
3696                }
3697            }
3698
3699            NodeKind::Variable { sigil, name } => {
3700                let var_name = format!("{}{}", sigil, name);
3701
3702                // Track as usage (could be read or write based on context)
3703                file_index.references.entry(var_name).or_default().push(SymbolReference {
3704                    uri: self.uri.clone(),
3705                    range: self.node_to_range(node),
3706                    kind: ReferenceKind::Read, // Default to read, would need context for write
3707                });
3708            }
3709
3710            NodeKind::FunctionCall { name, args, .. } => {
3711                let func_name = name.clone();
3712                let location = self.node_to_range(node);
3713
3714                // Determine package and bare name
3715                let (pkg, bare_name) = if let Some(idx) = func_name.rfind("::") {
3716                    (&func_name[..idx], &func_name[idx + 2..])
3717                } else {
3718                    (self.current_package.as_deref().unwrap_or("main"), func_name.as_str())
3719                };
3720
3721                let qualified = format!("{}::{}", pkg, bare_name);
3722
3723                // Track as usage for both qualified and bare forms
3724                // This dual indexing allows finding references whether the function is called
3725                // as `process_data()` or `Utils::process_data()`
3726                file_index.references.entry(bare_name.to_string()).or_default().push(
3727                    SymbolReference {
3728                        uri: self.uri.clone(),
3729                        range: location,
3730                        kind: ReferenceKind::Usage,
3731                    },
3732                );
3733                file_index.references.entry(qualified).or_default().push(SymbolReference {
3734                    uri: self.uri.clone(),
3735                    range: location,
3736                    kind: ReferenceKind::Usage,
3737                });
3738
3739                if name == "extends" || name == "with" {
3740                    for module_name in extract_module_names_from_call_args(args) {
3741                        file_index
3742                            .dependencies
3743                            .insert(normalize_dependency_module_name(&module_name));
3744                    }
3745                } else if name == "require" {
3746                    if let Some(module_name) = extract_module_name_from_require_args(args) {
3747                        file_index
3748                            .dependencies
3749                            .insert(normalize_dependency_module_name(&module_name));
3750                    }
3751                }
3752
3753                // Visit arguments
3754                for arg in args {
3755                    self.visit_node(arg, file_index);
3756                }
3757            }
3758
3759            NodeKind::Use { module, args, .. } => {
3760                let module_name = normalize_dependency_module_name(module);
3761                file_index.dependencies.insert(module_name.clone());
3762
3763                // Also track actual parent/base class names for dependency discovery.
3764                // `use parent 'Foo::Bar'` stores module="parent" and args=["'Foo::Bar'"],
3765                // so find_dependents("Foo::Bar") would miss files with only use parent.
3766                if module == "parent" || module == "base" {
3767                    for name in extract_module_names_from_use_args(args) {
3768                        file_index.dependencies.insert(normalize_dependency_module_name(&name));
3769                    }
3770                }
3771
3772                // Track as import
3773                file_index.references.entry(module_name).or_default().push(SymbolReference {
3774                    uri: self.uri.clone(),
3775                    range: self.node_to_range(node),
3776                    kind: ReferenceKind::Import,
3777                });
3778            }
3779
3780            // Handle assignment to detect writes
3781            NodeKind::Assignment { lhs, rhs, op } => {
3782                // For compound assignments (+=, -=, .=, etc.), the LHS is both read and written
3783                let is_compound = op != "=";
3784
3785                if let NodeKind::Variable { sigil, name } = &lhs.kind {
3786                    let var_name = format!("{}{}", sigil, name);
3787
3788                    // For compound assignments, it's a read first
3789                    if is_compound {
3790                        file_index.references.entry(var_name.clone()).or_default().push(
3791                            SymbolReference {
3792                                uri: self.uri.clone(),
3793                                range: self.node_to_range(lhs),
3794                                kind: ReferenceKind::Read,
3795                            },
3796                        );
3797                    }
3798
3799                    // Then it's always a write
3800                    file_index.references.entry(var_name).or_default().push(SymbolReference {
3801                        uri: self.uri.clone(),
3802                        range: self.node_to_range(lhs),
3803                        kind: ReferenceKind::Write,
3804                    });
3805                }
3806
3807                // Right side could have reads
3808                self.visit_node(rhs, file_index);
3809            }
3810
3811            // Recursively visit child nodes
3812            NodeKind::Block { statements } => {
3813                for stmt in statements {
3814                    self.visit_node(stmt, file_index);
3815                }
3816            }
3817
3818            NodeKind::If { condition, then_branch, elsif_branches, else_branch } => {
3819                self.visit_node(condition, file_index);
3820                self.visit_node(then_branch, file_index);
3821                for (cond, branch) in elsif_branches {
3822                    self.visit_node(cond, file_index);
3823                    self.visit_node(branch, file_index);
3824                }
3825                if let Some(else_br) = else_branch {
3826                    self.visit_node(else_br, file_index);
3827                }
3828            }
3829
3830            NodeKind::While { condition, body, continue_block } => {
3831                self.visit_node(condition, file_index);
3832                self.visit_node(body, file_index);
3833                if let Some(cont) = continue_block {
3834                    self.visit_node(cont, file_index);
3835                }
3836            }
3837
3838            NodeKind::For { init, condition, update, body, continue_block } => {
3839                if let Some(i) = init {
3840                    self.visit_node(i, file_index);
3841                }
3842                if let Some(c) = condition {
3843                    self.visit_node(c, file_index);
3844                }
3845                if let Some(u) = update {
3846                    self.visit_node(u, file_index);
3847                }
3848                self.visit_node(body, file_index);
3849                if let Some(cont) = continue_block {
3850                    self.visit_node(cont, file_index);
3851                }
3852            }
3853
3854            NodeKind::Foreach { variable, list, body, continue_block } => {
3855                // Iterator is a write context
3856                if let Some(cb) = continue_block {
3857                    self.visit_node(cb, file_index);
3858                }
3859                if let NodeKind::Variable { sigil, name } = &variable.kind {
3860                    let var_name = format!("{}{}", sigil, name);
3861                    file_index.references.entry(var_name).or_default().push(SymbolReference {
3862                        uri: self.uri.clone(),
3863                        range: self.node_to_range(variable),
3864                        kind: ReferenceKind::Write,
3865                    });
3866                }
3867                self.visit_node(variable, file_index);
3868                self.visit_node(list, file_index);
3869                self.visit_node(body, file_index);
3870            }
3871
3872            NodeKind::MethodCall { object, method, args } => {
3873                // Check if this is a static method call (Package->method)
3874                let qualified_method = if let NodeKind::Identifier { name } = &object.kind {
3875                    // Static method call: Package->method
3876                    Some(format!("{}::{}", name, method))
3877                } else {
3878                    // Instance method call: $obj->method
3879                    None
3880                };
3881
3882                // Object is a read context
3883                self.visit_node(object, file_index);
3884
3885                // Track method call under BOTH the qualified form (for static calls
3886                // like `Pkg->method`) AND the bare method name. This mirrors the
3887                // FunctionCall dual-key storage above (PR #122 dual-indexing pattern)
3888                // so that bare-name lookups (e.g. `find_unused_symbols`,
3889                // `count_usages("method")`) consistently find static method call sites.
3890                // See #6799 for the original asymmetric-storage bug report.
3891                let location = self.node_to_range(node);
3892                if let Some(qualified_method) = qualified_method.as_ref() {
3893                    file_index.references.entry(qualified_method.clone()).or_default().push(
3894                        SymbolReference {
3895                            uri: self.uri.clone(),
3896                            range: location,
3897                            kind: ReferenceKind::Usage,
3898                        },
3899                    );
3900                }
3901                file_index.references.entry(method.clone()).or_default().push(SymbolReference {
3902                    uri: self.uri.clone(),
3903                    range: location,
3904                    kind: ReferenceKind::Usage,
3905                });
3906
3907                if method == "import"
3908                    && let NodeKind::Identifier { name: module_name } = &object.kind
3909                {
3910                    for symbol in extract_manual_import_symbols(args) {
3911                        file_index.references.entry(symbol).or_default().push(SymbolReference {
3912                            uri: self.uri.clone(),
3913                            range: self.node_to_range(node),
3914                            kind: ReferenceKind::Import,
3915                        });
3916                    }
3917                    file_index.dependencies.insert(normalize_dependency_module_name(module_name));
3918                }
3919
3920                // Visit arguments
3921                for arg in args {
3922                    self.visit_node(arg, file_index);
3923                }
3924            }
3925
3926            NodeKind::No { module, .. } => {
3927                let module_name = normalize_dependency_module_name(module);
3928                file_index.dependencies.insert(module_name);
3929            }
3930
3931            NodeKind::Class { name, .. } => {
3932                self.current_package = Some(name.clone());
3933            }
3934
3935            NodeKind::Method { body, signature, .. } => {
3936                // Visit params
3937                if let Some(sig) = signature {
3938                    if let NodeKind::Signature { parameters } = &sig.kind {
3939                        for param in parameters {
3940                            self.visit_node(param, file_index);
3941                        }
3942                    }
3943                }
3944
3945                // Visit body
3946                self.visit_node(body, file_index);
3947            }
3948
3949            NodeKind::String { value, interpolated } => {
3950                if *interpolated {
3951                    let range = self.node_to_range(node);
3952                    self.record_interpolated_variable_references(value, range, file_index);
3953                }
3954            }
3955
3956            NodeKind::Heredoc { content, interpolated, .. } => {
3957                if *interpolated {
3958                    let range = self.node_to_range(node);
3959                    self.record_interpolated_variable_references(content, range, file_index);
3960                }
3961            }
3962
3963            // Handle special assignments (++ and --)
3964            NodeKind::Unary { op, operand } if op == "++" || op == "--" => {
3965                // Pre/post increment/decrement are both read and write
3966                if let NodeKind::Variable { sigil, name } = &operand.kind {
3967                    let var_name = format!("{}{}", sigil, name);
3968
3969                    // It's both a read and a write
3970                    file_index.references.entry(var_name.clone()).or_default().push(
3971                        SymbolReference {
3972                            uri: self.uri.clone(),
3973                            range: self.node_to_range(operand),
3974                            kind: ReferenceKind::Read,
3975                        },
3976                    );
3977
3978                    file_index.references.entry(var_name).or_default().push(SymbolReference {
3979                        uri: self.uri.clone(),
3980                        range: self.node_to_range(operand),
3981                        kind: ReferenceKind::Write,
3982                    });
3983                }
3984            }
3985
3986            _ => {
3987                // For other node types, just visit children
3988                self.visit_children(node, file_index);
3989            }
3990        }
3991    }
3992
3993    fn visit_children(&mut self, node: &Node, file_index: &mut FileIndex) {
3994        // Generic visitor for unhandled node types - visit all nested nodes
3995        match &node.kind {
3996            NodeKind::Program { statements } => {
3997                for stmt in statements {
3998                    self.visit_node(stmt, file_index);
3999                }
4000            }
4001            NodeKind::ExpressionStatement { expression } => {
4002                self.visit_node(expression, file_index);
4003            }
4004            // Expression nodes
4005            NodeKind::Unary { operand, .. } => {
4006                self.visit_node(operand, file_index);
4007            }
4008            NodeKind::Binary { left, right, .. } => {
4009                self.visit_node(left, file_index);
4010                self.visit_node(right, file_index);
4011            }
4012            NodeKind::Ternary { condition, then_expr, else_expr } => {
4013                self.visit_node(condition, file_index);
4014                self.visit_node(then_expr, file_index);
4015                self.visit_node(else_expr, file_index);
4016            }
4017            NodeKind::ArrayLiteral { elements } => {
4018                for elem in elements {
4019                    self.visit_node(elem, file_index);
4020                }
4021            }
4022            NodeKind::HashLiteral { pairs } => {
4023                for (key, value) in pairs {
4024                    self.visit_node(key, file_index);
4025                    self.visit_node(value, file_index);
4026                }
4027            }
4028            NodeKind::Return { value } => {
4029                if let Some(val) = value {
4030                    self.visit_node(val, file_index);
4031                }
4032            }
4033            NodeKind::Eval { block } | NodeKind::Do { block } | NodeKind::Defer { block } => {
4034                self.visit_node(block, file_index);
4035            }
4036            NodeKind::Try { body, catch_blocks, finally_block } => {
4037                self.visit_node(body, file_index);
4038                for (_, block) in catch_blocks {
4039                    self.visit_node(block, file_index);
4040                }
4041                if let Some(finally) = finally_block {
4042                    self.visit_node(finally, file_index);
4043                }
4044            }
4045            NodeKind::Given { expr, body } => {
4046                self.visit_node(expr, file_index);
4047                self.visit_node(body, file_index);
4048            }
4049            NodeKind::When { condition, body } => {
4050                self.visit_node(condition, file_index);
4051                self.visit_node(body, file_index);
4052            }
4053            NodeKind::Default { body } => {
4054                self.visit_node(body, file_index);
4055            }
4056            NodeKind::StatementModifier { statement, condition, .. } => {
4057                self.visit_node(statement, file_index);
4058                self.visit_node(condition, file_index);
4059            }
4060            NodeKind::VariableWithAttributes { variable, .. } => {
4061                self.visit_node(variable, file_index);
4062            }
4063            NodeKind::LabeledStatement { statement, .. } => {
4064                self.visit_node(statement, file_index);
4065            }
4066            _ => {
4067                // For other node types, no children to visit
4068            }
4069        }
4070    }
4071
4072    fn node_to_range(&mut self, node: &Node) -> Range {
4073        // LineIndex.range returns line numbers and UTF-16 code unit columns
4074        let ((start_line, start_col), (end_line, end_col)) =
4075            self.document.line_index.range(node.location.start, node.location.end);
4076        // Use byte offsets from node.location directly
4077        Range {
4078            start: Position { byte: node.location.start, line: start_line, column: start_col },
4079            end: Position { byte: node.location.end, line: end_line, column: end_col },
4080        }
4081    }
4082}
4083
4084fn symbol_decl_name(kind: &SymbolKind, name: &str) -> String {
4085    match kind {
4086        SymbolKind::Variable(VarKind::Scalar) => format!("${name}"),
4087        SymbolKind::Variable(VarKind::Array) => format!("@{name}"),
4088        SymbolKind::Variable(VarKind::Hash) => format!("%{name}"),
4089        _ => name.to_string(),
4090    }
4091}
4092
4093fn split_qualified_symbol_name(canonical_name: &str) -> Option<(&str, &str)> {
4094    let (container, bare_name) = canonical_name.rsplit_once("::")?;
4095    if container.is_empty() || bare_name.is_empty() {
4096        return None;
4097    }
4098    Some((container, bare_name))
4099}
4100
4101fn is_framework_generated_member_entity(entity: &EntityFact) -> bool {
4102    entity.provenance == Provenance::FrameworkSynthesis && entity.confidence == Confidence::Medium
4103}
4104
4105fn sort_workspace_symbols(symbols: &mut [WorkspaceSymbol]) {
4106    symbols.sort_by(|left, right| {
4107        left.name
4108            .cmp(&right.name)
4109            .then_with(|| left.uri.cmp(&right.uri))
4110            .then_with(|| left.range.start.line.cmp(&right.range.start.line))
4111            .then_with(|| left.range.start.column.cmp(&right.range.start.column))
4112            .then_with(|| left.range.end.line.cmp(&right.range.end.line))
4113            .then_with(|| left.range.end.column.cmp(&right.range.end.column))
4114    });
4115}
4116
4117/// Extract bare module names from the argument list of a `use parent` / `use base` statement.
4118///
4119/// The `args` field of `NodeKind::Use` stores raw argument strings as the parser captured them.
4120/// For `use parent 'Foo::Bar'` this is `["'Foo::Bar'"]`.
4121/// For `use parent qw(Foo::Bar Other::Base)` this is `["qw(Foo::Bar Other::Base)"]`.
4122/// For `use parent -norequire, 'Foo::Bar'` this is `["-norequire", "'Foo::Bar'"]`.
4123///
4124/// Returns the module names with surrounding quotes/qw wrappers stripped.
4125/// Tokens starting with `-` or not matching `[\w::']+` are silently skipped.
4126fn extract_module_names_from_use_args(args: &[String]) -> Vec<String> {
4127    use std::collections::HashSet;
4128
4129    fn normalize_module_name(token: &str) -> Option<&str> {
4130        let stripped = token.trim_matches(|c: char| {
4131            matches!(c, '\'' | '"' | '(' | ')' | '[' | ']' | '{' | '}' | ',' | ';')
4132        });
4133
4134        if stripped.is_empty() || stripped.starts_with('-') {
4135            return None;
4136        }
4137
4138        stripped
4139            .chars()
4140            .all(|c| c.is_alphanumeric() || c == '_' || c == ':' || c == '\'')
4141            .then_some(stripped)
4142    }
4143
4144    let joined = args.join(" ");
4145
4146    let (qw_words, remainder) = extract_qw_words(&joined);
4147    let mut modules = Vec::new();
4148    let mut seen = HashSet::new();
4149    for word in qw_words {
4150        if let Some(candidate) = normalize_module_name(&word) {
4151            let canonical = canonicalize_perl_module_name(candidate);
4152            if seen.insert(canonical.clone()) {
4153                modules.push(canonical);
4154            }
4155        }
4156    }
4157
4158    for token in remainder.split_whitespace().flat_map(|t| t.split(',')) {
4159        if let Some(candidate) = normalize_module_name(token) {
4160            let canonical = canonicalize_perl_module_name(candidate);
4161            if seen.insert(canonical.clone()) {
4162                modules.push(canonical);
4163            }
4164        }
4165    }
4166
4167    modules
4168}
4169
4170fn extract_module_names_from_call_args(args: &[Node]) -> Vec<String> {
4171    fn collect_from_node(node: &Node, out: &mut Vec<String>) {
4172        match &node.kind {
4173            NodeKind::String { value, .. } => {
4174                out.extend(extract_module_names_from_use_args(std::slice::from_ref(value)));
4175            }
4176            NodeKind::Identifier { name } => {
4177                out.extend(extract_module_names_from_use_args(std::slice::from_ref(name)));
4178            }
4179            NodeKind::ArrayLiteral { elements } => {
4180                for element in elements {
4181                    collect_from_node(element, out);
4182                }
4183            }
4184            NodeKind::FunctionCall { name, args, .. } if name == "qw" => {
4185                for arg in args {
4186                    collect_from_node(arg, out);
4187                }
4188            }
4189            _ => {}
4190        }
4191    }
4192
4193    let mut modules = Vec::new();
4194    for arg in args {
4195        collect_from_node(arg, &mut modules);
4196    }
4197    modules
4198}
4199
4200fn canonicalize_perl_module_name(name: &str) -> String {
4201    // Perl supports the legacy `'` package separator (e.g. Foo'Bar).
4202    // Canonicalize to `::` so lookups and dependency matching share one key shape.
4203    name.replace('\'', "::")
4204}
4205
4206fn legacy_perl_module_name(name: &str) -> String {
4207    name.replace("::", "'")
4208}
4209
4210/// Normalize a module name for dependency storage and lookup.
4211/// Converts legacy `'` separators to `::` so stored keys are canonical.
4212fn normalize_dependency_module_name(module_name: &str) -> String {
4213    canonicalize_perl_module_name(module_name)
4214}
4215
4216fn extract_qw_words(input: &str) -> (Vec<String>, String) {
4217    let chars: Vec<char> = input.chars().collect();
4218    let mut i = 0;
4219    let mut words = Vec::new();
4220    let mut remainder = String::new();
4221
4222    while i < chars.len() {
4223        if chars[i] == 'q'
4224            && i + 1 < chars.len()
4225            && chars[i + 1] == 'w'
4226            && (i == 0 || !chars[i - 1].is_alphanumeric())
4227        {
4228            let mut j = i + 2;
4229            while j < chars.len() && chars[j].is_whitespace() {
4230                j += 1;
4231            }
4232            if j >= chars.len() {
4233                remainder.push(chars[i]);
4234                i += 1;
4235                continue;
4236            }
4237
4238            let open = chars[j];
4239            let (close, is_paired_delimiter) = match open {
4240                '(' => (')', true),
4241                '[' => (']', true),
4242                '{' => ('}', true),
4243                '<' => ('>', true),
4244                _ => (open, false),
4245            };
4246            if open.is_alphanumeric() || open == '_' || open == '\'' || open == '"' {
4247                remainder.push(chars[i]);
4248                i += 1;
4249                continue;
4250            }
4251
4252            let mut k = j + 1;
4253            if is_paired_delimiter {
4254                let mut depth = 1usize;
4255                while k < chars.len() && depth > 0 {
4256                    if chars[k] == open {
4257                        depth += 1;
4258                    } else if chars[k] == close {
4259                        depth -= 1;
4260                    }
4261                    k += 1;
4262                }
4263                if depth != 0 {
4264                    remainder.extend(chars[i..].iter());
4265                    break;
4266                }
4267                k -= 1;
4268            } else {
4269                while k < chars.len() && chars[k] != close {
4270                    k += 1;
4271                }
4272                if k >= chars.len() {
4273                    remainder.extend(chars[i..].iter());
4274                    break;
4275                }
4276            }
4277
4278            let content: String = chars[j + 1..k].iter().collect();
4279            for word in content.split_whitespace() {
4280                if !word.is_empty() {
4281                    words.push(word.to_string());
4282                }
4283            }
4284            i = k + 1;
4285            continue;
4286        }
4287
4288        remainder.push(chars[i]);
4289        i += 1;
4290    }
4291
4292    (words, remainder)
4293}
4294
4295fn extract_module_name_from_require_args(args: &[Node]) -> Option<String> {
4296    let first = args.first()?;
4297    match &first.kind {
4298        NodeKind::Identifier { name } => Some(name.clone()),
4299        NodeKind::String { value, .. } => {
4300            let cleaned = value.trim_matches('\'').trim_matches('"').trim();
4301            if cleaned.is_empty() {
4302                return None;
4303            }
4304            Some(cleaned.trim_end_matches(".pm").replace('/', "::"))
4305        }
4306        _ => None,
4307    }
4308}
4309
4310fn extract_manual_import_symbols(args: &[Node]) -> Vec<String> {
4311    fn push_if_bareword(out: &mut Vec<String>, token: &str) {
4312        let bare = token.trim().trim_matches('"').trim_matches('\'').trim();
4313        if bare.is_empty() || bare == "," {
4314            return;
4315        }
4316        let is_bareword = bare.bytes().all(|ch| ch.is_ascii_alphanumeric() || ch == b'_')
4317            && bare.as_bytes().first().is_some_and(|ch| ch.is_ascii_alphabetic() || *ch == b'_');
4318        if is_bareword {
4319            out.push(bare.to_string());
4320        }
4321    }
4322
4323    let mut symbols = Vec::new();
4324    for arg in args {
4325        match &arg.kind {
4326            NodeKind::String { value, .. } => push_if_bareword(&mut symbols, value),
4327            NodeKind::Identifier { name } => {
4328                if name.starts_with("qw") {
4329                    let content = name
4330                        .trim_start_matches("qw")
4331                        .trim_start_matches(|c: char| "([{/<|!".contains(c))
4332                        .trim_end_matches(|c: char| ")]}/|!>".contains(c));
4333                    for token in content.split_whitespace() {
4334                        push_if_bareword(&mut symbols, token);
4335                    }
4336                } else {
4337                    push_if_bareword(&mut symbols, name);
4338                }
4339            }
4340            NodeKind::ArrayLiteral { elements } => {
4341                for element in elements {
4342                    if let NodeKind::String { value, .. } = &element.kind {
4343                        push_if_bareword(&mut symbols, value);
4344                    }
4345                }
4346            }
4347            _ => {}
4348        }
4349    }
4350    symbols.sort();
4351    symbols.dedup();
4352    symbols
4353}
4354
4355/// Extract constant names from the `args` field of a `use constant` `NodeKind::Use` node.
4356///
4357/// The parser serialises `use constant` args in two distinct forms:
4358///
4359/// **Scalar form** — `use constant FOO => 42;`
4360///   → args: `["FOO", "42"]`  (the `=>` is consumed by the parser, not stored)
4361///   → The first arg is the constant name; remaining args are the value.
4362///
4363/// **Hash form** — `use constant { FOO => 1, BAR => 2 };`
4364///   → args: `["{", "FOO", "=>", "1", ",", "BAR", "=>", "2", "}"]`
4365///   → Identifiers immediately followed by `=>` are constant names.
4366///
4367/// **qw form** — `use constant qw(FOO BAR);`
4368///   → args: `["qw(FOO BAR)"]`
4369///   → Words inside the qw list are constant names.
4370///
4371/// Returns a deduplicated list of bare constant names (e.g. `["FOO", "BAR"]`).
4372#[cfg(test)]
4373fn extract_constant_names_from_use_args(args: &[String]) -> Vec<String> {
4374    use std::collections::HashSet;
4375
4376    fn push_unique(names: &mut Vec<String>, seen: &mut HashSet<String>, candidate: &str) {
4377        if seen.insert(candidate.to_string()) {
4378            names.push(candidate.to_string());
4379        }
4380    }
4381
4382    fn normalize_constant_name(token: &str) -> Option<&str> {
4383        let stripped = token.trim_matches(|c: char| {
4384            matches!(c, '\'' | '"' | '(' | ')' | '[' | ']' | '{' | '}' | ',' | ';')
4385        });
4386
4387        if stripped.is_empty() || stripped.starts_with('-') {
4388            return None;
4389        }
4390
4391        stripped.chars().all(|c| c.is_alphanumeric() || c == '_').then_some(stripped)
4392    }
4393
4394    let mut names = Vec::new();
4395    let mut seen = HashSet::new();
4396
4397    // Scalar form (most common): args = ["FOO", <value...>]
4398    // The first arg is a plain identifier with no `=>` in args at all.
4399    // Hash form starts with `{`; qw form starts with `qw`.
4400    let first = match args.first() {
4401        Some(f) => f.as_str(),
4402        None => return names,
4403    };
4404
4405    // qw form: single arg starting with "qw"
4406    if first.starts_with("qw") {
4407        let (qw_words, remainder) = extract_qw_words(first);
4408        if remainder.trim().is_empty() {
4409            for word in qw_words {
4410                if let Some(candidate) = normalize_constant_name(&word) {
4411                    push_unique(&mut names, &mut seen, candidate);
4412                }
4413            }
4414            return names;
4415        }
4416
4417        // Fallback for odd tokenisation: tolerate `qw` followed by spacing before the opener.
4418        let content = first.trim_start_matches("qw").trim_start();
4419        let content = content
4420            .trim_start_matches(|c: char| "([{/<|!".contains(c))
4421            .trim_end_matches(|c: char| ")]}/|!>".contains(c));
4422        for word in content.split_whitespace() {
4423            if let Some(candidate) = normalize_constant_name(word) {
4424                push_unique(&mut names, &mut seen, candidate);
4425            }
4426        }
4427        return names;
4428    }
4429
4430    // Hash form: args start with "{", "+{", or "+" followed by "{"
4431    let starts_hash_form = first == "{"
4432        || first == "+{"
4433        || (first == "+" && args.get(1).map(String::as_str) == Some("{"));
4434    if starts_hash_form {
4435        let mut skipped_leading_plus = false;
4436        let mut iter = args.iter().peekable();
4437        while let Some(arg) = iter.next() {
4438            // Some parser/tokenizer variants can emit "+{" as a single token for
4439            // `use constant +{ ... }`. Treat it as structural punctuation.
4440            if arg == "+{" {
4441                skipped_leading_plus = true;
4442                continue;
4443            }
4444            if arg == "+" && !skipped_leading_plus {
4445                skipped_leading_plus = true;
4446                continue;
4447            }
4448            if arg == "{" || arg == "}" || arg == "," || arg == "=>" {
4449                continue;
4450            }
4451            if let Some(candidate) = normalize_constant_name(arg)
4452                && iter.peek().map(|s| s.as_str()) == Some("=>")
4453            {
4454                push_unique(&mut names, &mut seen, candidate);
4455            }
4456        }
4457        return names;
4458    }
4459
4460    // Scalar form: first arg is the constant name (if it is a plain identifier)
4461    // Remaining args are the value and are skipped.
4462    if let Some(candidate) = normalize_constant_name(first) {
4463        push_unique(&mut names, &mut seen, candidate);
4464    }
4465
4466    names
4467}
4468
4469impl Default for WorkspaceIndex {
4470    fn default() -> Self {
4471        Self::new()
4472    }
4473}
4474
4475/// LSP adapter for converting internal Location types to LSP types
4476#[cfg(all(feature = "workspace", feature = "lsp-compat"))]
4477/// LSP adapter utilities for Navigate/Analyze workflows.
4478pub mod lsp_adapter {
4479    use super::Location as IxLocation;
4480    use lsp_types::Location as LspLocation;
4481    // lsp_types uses Uri, not Url
4482    type LspUrl = lsp_types::Uri;
4483
4484    /// Convert an internal location to an LSP Location for Navigate workflows.
4485    ///
4486    /// # Arguments
4487    ///
4488    /// * `ix` - Internal index location with URI and range information.
4489    ///
4490    /// # Returns
4491    ///
4492    /// `Some(LspLocation)` when conversion succeeds, or `None` if URI parsing fails.
4493    ///
4494    /// # Examples
4495    ///
4496    /// ```rust,ignore
4497    /// use perl_parser::workspace_index::{Location as IxLocation, lsp_adapter::to_lsp_location};
4498    /// use lsp_types::Range;
4499    ///
4500    /// let ix_loc = IxLocation { uri: "file:///path.pl".to_string(), range: Range::default() };
4501    /// let _ = to_lsp_location(&ix_loc);
4502    /// ```
4503    pub fn to_lsp_location(ix: &IxLocation) -> Option<LspLocation> {
4504        parse_url(&ix.uri).map(|uri| {
4505            let start =
4506                lsp_types::Position { line: ix.range.start.line, character: ix.range.start.column };
4507            let end =
4508                lsp_types::Position { line: ix.range.end.line, character: ix.range.end.column };
4509            let range = lsp_types::Range { start, end };
4510            LspLocation { uri, range }
4511        })
4512    }
4513
4514    /// Convert multiple index locations to LSP Locations for Navigate/Analyze workflows.
4515    ///
4516    /// # Arguments
4517    ///
4518    /// * `all` - Iterator of internal index locations to convert.
4519    ///
4520    /// # Returns
4521    ///
4522    /// Vector of successfully converted LSP locations, with invalid entries filtered out.
4523    ///
4524    /// # Examples
4525    ///
4526    /// ```rust,ignore
4527    /// use perl_parser::workspace_index::{Location as IxLocation, lsp_adapter::to_lsp_locations};
4528    /// use lsp_types::Range;
4529    ///
4530    /// let locations = vec![IxLocation { uri: "file:///script1.pl".to_string(), range: Range::default() }];
4531    /// let lsp_locations = to_lsp_locations(locations);
4532    /// assert_eq!(lsp_locations.len(), 1);
4533    /// ```
4534    pub fn to_lsp_locations(all: impl IntoIterator<Item = IxLocation>) -> Vec<LspLocation> {
4535        all.into_iter().filter_map(|ix| to_lsp_location(&ix)).collect()
4536    }
4537
4538    #[cfg(not(target_arch = "wasm32"))]
4539    fn parse_url(s: &str) -> Option<LspUrl> {
4540        // lsp_types::Uri uses FromStr, not TryFrom
4541        use std::str::FromStr;
4542
4543        // Try parsing as URI first
4544        LspUrl::from_str(s).ok().or_else(|| {
4545            // Try as a file path if URI parsing fails
4546            std::path::Path::new(s).canonicalize().ok().and_then(|p| {
4547                // Use proper URI construction with percent-encoding
4548                crate::workspace_index::fs_path_to_uri(&p)
4549                    .ok()
4550                    .and_then(|uri_string| LspUrl::from_str(&uri_string).ok())
4551            })
4552        })
4553    }
4554
4555    /// Parse a string as a URL (wasm32 version - no filesystem fallback)
4556    #[cfg(target_arch = "wasm32")]
4557    fn parse_url(s: &str) -> Option<LspUrl> {
4558        use std::str::FromStr;
4559        LspUrl::from_str(s).ok()
4560    }
4561}
4562
4563#[cfg(test)]
4564mod tests {
4565    use super::*;
4566    use perl_tdd_support::{must, must_some};
4567
4568    #[test]
4569    fn test_use_constant_indexed_as_constant_symbol() {
4570        let index = WorkspaceIndex::new();
4571        let uri = "file:///lib/My/Config.pm";
4572        let code = r#"package My::Config;
4573use constant PI => 3.14159;
4574use constant {
4575    MAX_RETRIES => 3,
4576    TIMEOUT     => 30,
4577};
45781;
4579"#;
4580        must(index.index_file(must(url::Url::parse(uri)), code.to_string()));
4581
4582        let symbols = index.file_symbols(uri);
4583        assert!(
4584            symbols.iter().any(|s| s.name == "PI" && s.kind == SymbolKind::Constant),
4585            "PI should be indexed as a Constant symbol; got: {:?}",
4586            symbols.iter().map(|s| (&s.name, &s.kind)).collect::<Vec<_>>()
4587        );
4588        assert!(
4589            symbols.iter().any(|s| s.name == "MAX_RETRIES" && s.kind == SymbolKind::Constant),
4590            "MAX_RETRIES should be indexed"
4591        );
4592        assert!(
4593            symbols.iter().any(|s| s.name == "TIMEOUT" && s.kind == SymbolKind::Constant),
4594            "TIMEOUT should be indexed"
4595        );
4596
4597        // Qualified lookup should also work
4598        let def = index.find_definition("My::Config::PI");
4599        assert!(def.is_some(), "find_definition('My::Config::PI') should succeed");
4600    }
4601
4602    #[test]
4603    fn test_extract_constant_names_deduplicates_qw_form() {
4604        let names = extract_constant_names_from_use_args(&["qw(FOO BAR FOO)".to_string()]);
4605        assert_eq!(names, vec!["FOO", "BAR"]);
4606    }
4607
4608    #[test]
4609    fn test_extract_constant_names_accepts_quoted_scalar_form() {
4610        let names = extract_constant_names_from_use_args(&[
4611            "'HTTP_OK'".to_string(),
4612            "=>".to_string(),
4613            "200".to_string(),
4614        ]);
4615        assert_eq!(names, vec!["HTTP_OK"]);
4616    }
4617
4618    #[test]
4619    fn search_symbols_returns_labeled_generated_framework_members()
4620    -> Result<(), Box<dyn std::error::Error>> {
4621        let index = WorkspaceIndex::new();
4622        let uri = "file:///lib/Generated/Pilot.pm";
4623        let code = r#"package Generated::Pilot;
4624use Moo;
4625has display_name => (is => 'rw');
46261;
4627"#;
4628        must(index.index_file(must(url::Url::parse(uri)), code.to_string()));
4629
4630        let source_symbols = index.search_source_symbols("display_name");
4631        assert!(
4632            source_symbols.is_empty(),
4633            "generated framework members must not enter the exact source-symbol slice"
4634        );
4635        let trimmed_source_symbols = index.search_source_symbols("  display_name  ");
4636        assert!(
4637            trimmed_source_symbols.is_empty(),
4638            "trimmed generated framework member queries must not enter the exact source-symbol slice"
4639        );
4640
4641        let generated_symbols = index.search_generated_workspace_symbols("display_name");
4642        assert_eq!(generated_symbols.len(), 1);
4643        let trimmed_generated_symbols =
4644            index.search_generated_workspace_symbols("  display_name  ");
4645        assert_eq!(trimmed_generated_symbols.len(), 1);
4646        assert_eq!(trimmed_generated_symbols[0].name, "display_name [generated/framework]");
4647        assert!(index.search_generated_workspace_symbols("   ").is_empty());
4648        let symbol = &generated_symbols[0];
4649        assert_eq!(symbol.name, "display_name [generated/framework]");
4650        assert_eq!(symbol.kind, SymbolKind::Method);
4651        assert_eq!(symbol.qualified_name.as_deref(), Some("Generated::Pilot::display_name"));
4652        assert_eq!(
4653            symbol.container_name.as_deref(),
4654            Some("Generated::Pilot [generated/framework]")
4655        );
4656        assert!(!symbol.has_body);
4657        assert_eq!(symbol.uri, uri);
4658        assert!(
4659            symbol.range.end.byte > symbol.range.start.byte,
4660            "generated symbol must be anchored to the source framework declaration"
4661        );
4662
4663        let live_symbols = index.search_symbols("display_name");
4664        assert!(
4665            live_symbols.is_empty(),
4666            "general workspace index search must stay source-backed; generated pilot symbols are opt-in"
4667        );
4668
4669        {
4670            let mut shards = index.fact_shards.write();
4671            let shard = shards.values_mut().next().ok_or("missing generated-member shard")?;
4672            let entity = shard
4673                .entities
4674                .iter_mut()
4675                .find(|entity| entity.canonical_name == "Generated::Pilot::display_name")
4676                .ok_or("missing generated member entity")?;
4677            entity.provenance = Provenance::ExactAst;
4678        }
4679        let non_framework_symbols = index.search_generated_workspace_symbols("display_name");
4680        assert!(
4681            non_framework_symbols.is_empty(),
4682            "generated workspace-symbol pilot must require framework-synthesis provenance"
4683        );
4684        Ok(())
4685    }
4686
4687    #[test]
4688    fn search_symbols_returns_labeled_predicate_generated_members()
4689    -> Result<(), Box<dyn std::error::Error>> {
4690        let index = WorkspaceIndex::new();
4691        let uri = "file:///lib/Generated/PredicatePilot.pm";
4692        let code = r#"package Generated::PredicatePilot;
4693use Moo;
4694has status => (is => 'rw', predicate => 1);
46951;
4696"#;
4697        must(index.index_file(must(url::Url::parse(uri)), code.to_string()));
4698
4699        let source_symbols = index.search_source_symbols("has_status");
4700        assert!(
4701            source_symbols.is_empty(),
4702            "predicate generated members must not enter the exact source-symbol slice"
4703        );
4704
4705        let generated_symbols = index.search_generated_workspace_symbols("has_status");
4706        assert_eq!(generated_symbols.len(), 1);
4707        let symbol = &generated_symbols[0];
4708        assert_eq!(symbol.name, "has_status [generated/framework]");
4709        assert_eq!(symbol.kind, SymbolKind::Method);
4710        assert_eq!(symbol.qualified_name.as_deref(), Some("Generated::PredicatePilot::has_status"));
4711        assert_eq!(
4712            symbol.container_name.as_deref(),
4713            Some("Generated::PredicatePilot [generated/framework]")
4714        );
4715        assert!(!symbol.has_body);
4716        assert_eq!(symbol.uri, uri);
4717        assert!(
4718            symbol.range.end.byte > symbol.range.start.byte,
4719            "predicate generated symbol must be anchored to the source framework declaration"
4720        );
4721
4722        let live_symbols = index.search_symbols("has_status");
4723        assert!(
4724            live_symbols.is_empty(),
4725            "general workspace index search must stay source-backed for predicate generated members"
4726        );
4727        Ok(())
4728    }
4729
4730    #[test]
4731    fn test_extract_constant_names_accepts_quoted_hash_form() {
4732        let names = extract_constant_names_from_use_args(&[
4733            "{".to_string(),
4734            "'FOO'".to_string(),
4735            "=>".to_string(),
4736            "1".to_string(),
4737            ",".to_string(),
4738            "\"BAR\"".to_string(),
4739            "=>".to_string(),
4740            "2".to_string(),
4741            "}".to_string(),
4742        ]);
4743        assert_eq!(names, vec!["FOO", "BAR"]);
4744    }
4745
4746    #[test]
4747    fn test_extract_constant_names_accepts_plus_hash_form_split_tokens() {
4748        let names = extract_constant_names_from_use_args(&[
4749            "+".to_string(),
4750            "{".to_string(),
4751            "FOO".to_string(),
4752            "=>".to_string(),
4753            "1".to_string(),
4754            ",".to_string(),
4755            "BAR".to_string(),
4756            "=>".to_string(),
4757            "2".to_string(),
4758            "}".to_string(),
4759        ]);
4760        assert_eq!(names, vec!["FOO", "BAR"]);
4761    }
4762
4763    #[test]
4764    fn test_extract_constant_names_accepts_plus_hash_form_combined_token() {
4765        let names = extract_constant_names_from_use_args(&[
4766            "+{".to_string(),
4767            "FOO".to_string(),
4768            "=>".to_string(),
4769            "1".to_string(),
4770            ",".to_string(),
4771            "BAR".to_string(),
4772            "=>".to_string(),
4773            "2".to_string(),
4774            "}".to_string(),
4775        ]);
4776        assert_eq!(names, vec!["FOO", "BAR"]);
4777    }
4778    #[test]
4779    fn test_use_constant_duplicate_names_indexed_once() {
4780        let index = WorkspaceIndex::new();
4781        let uri = "file:///lib/My/DedupConfig.pm";
4782        let code = r#"package My::DedupConfig;
4783use constant {
4784    RETRY_COUNT => 3,
4785    RETRY_COUNT => 5,
4786};
47871;
4788"#;
4789        must(index.index_file(must(url::Url::parse(uri)), code.to_string()));
4790
4791        let symbols = index.file_symbols(uri);
4792        let retry_count_symbols = symbols.iter().filter(|s| s.name == "RETRY_COUNT").count();
4793        assert_eq!(
4794            retry_count_symbols, 1,
4795            "RETRY_COUNT should be indexed once even when repeated in use constant hash form"
4796        );
4797    }
4798
4799    #[test]
4800    fn test_use_constant_plus_hash_form_indexes_keys() {
4801        let index = WorkspaceIndex::new();
4802        let uri = "file:///lib/My/PlusHash.pm";
4803        let code = r#"package My::PlusHash;
4804use constant +{
4805    FOO => 1,
4806    BAR => 2,
4807};
48081;
4809"#;
4810        must(index.index_file(must(url::Url::parse(uri)), code.to_string()));
4811
4812        assert!(index.find_definition("My::PlusHash::FOO").is_some());
4813        assert!(index.find_definition("My::PlusHash::BAR").is_some());
4814    }
4815
4816    #[test]
4817    fn test_basic_indexing() {
4818        let index = WorkspaceIndex::new();
4819        let uri = "file:///test.pl";
4820
4821        let code = r#"
4822package MyPackage;
4823
4824sub hello {
4825    print "Hello";
4826}
4827
4828my $var = 42;
4829"#;
4830
4831        must(index.index_file(must(url::Url::parse(uri)), code.to_string()));
4832
4833        // Should have indexed the package and subroutine
4834        let symbols = index.file_symbols(uri);
4835        assert!(symbols.iter().any(|s| s.name == "MyPackage" && s.kind == SymbolKind::Package));
4836        assert!(symbols.iter().any(|s| s.name == "hello" && s.kind == SymbolKind::Subroutine));
4837        assert!(symbols.iter().any(|s| s.name == "$var" && s.kind.is_variable()));
4838    }
4839
4840    #[test]
4841    fn test_package_symbol_has_no_container_name() {
4842        // Regression: project_symbol_declarations used to set container_name = Some("main")
4843        // for top-level package declarations because the IndexVisitor starts with
4844        // current_package = Some("main").  Package symbols are top-level declarations
4845        // and must have container_name = None.
4846        let index = WorkspaceIndex::new();
4847        let uri = "file:///lib/Foo.pm";
4848        let code = "package Foo;\nsub bar { }\n";
4849        must(index.index_file(must(url::Url::parse(uri)), code.to_string()));
4850
4851        let symbols = index.file_symbols(uri);
4852        let pkg_sym =
4853            must_some(symbols.iter().find(|s| s.name == "Foo" && s.kind == SymbolKind::Package));
4854        assert_eq!(
4855            pkg_sym.container_name, None,
4856            "Package symbol must not carry a container (was 'main')"
4857        );
4858    }
4859
4860    #[test]
4861    fn test_my_variable_has_no_qualified_name() {
4862        // Regression: project_symbol_declarations used to set qualified_name = Some("Foo::x")
4863        // for `my $x` inside `package Foo`, making `find_definition("Foo::x")` return the
4864        // lexical variable.  `my` variables are not package-visible and must have
4865        // qualified_name = None so qualified lookups don't match them.
4866        let index = WorkspaceIndex::new();
4867        let uri = "file:///lib/Foo.pm";
4868        let code = "package Foo;\nsub bar { my $x = 1; }\n";
4869        must(index.index_file(must(url::Url::parse(uri)), code.to_string()));
4870
4871        let symbols = index.file_symbols(uri);
4872        let var_sym = must_some(symbols.iter().find(|s| s.name == "$x" && s.kind.is_variable()));
4873        assert_eq!(var_sym.qualified_name, None, "my variable must not have a qualified_name");
4874
4875        // `find_definition("Foo::x")` must not accidentally resolve to a lexical variable.
4876        assert!(
4877            index.find_definition("Foo::x").is_none(),
4878            "find_definition(\"Foo::x\") must not return a lexical my variable"
4879        );
4880    }
4881
4882    fn reference_kinds_for(
4883        index: &WorkspaceIndex,
4884        uri: &str,
4885        symbol_name: &str,
4886    ) -> Vec<ReferenceKind> {
4887        let files = index.files.read();
4888        let file = must_some(files.get(uri));
4889        file.references
4890            .get(symbol_name)
4891            .map(|refs| refs.iter().map(|r| r.kind).collect())
4892            .unwrap_or_default()
4893    }
4894
4895    #[test]
4896    fn test_reference_kinds_sub_definition_and_call_are_distinct() {
4897        let index = WorkspaceIndex::new();
4898        let uri = "file:///typed-refs-sub.pl";
4899        let code = "package TypedRefs;
4900sub foo { return 1; }
4901foo();
4902";
4903        must(index.index_file(must(url::Url::parse(uri)), code.to_string()));
4904
4905        let kinds = reference_kinds_for(&index, uri, "foo");
4906        assert!(kinds.contains(&ReferenceKind::Definition));
4907        assert!(kinds.contains(&ReferenceKind::Usage));
4908    }
4909
4910    #[test]
4911    fn test_reference_kinds_variable_read_and_write_are_distinct() {
4912        let index = WorkspaceIndex::new();
4913        let uri = "file:///typed-refs-var.pl";
4914        let code = "my $value = 1;
4915$value = 2;
4916print $value;
4917";
4918        must(index.index_file(must(url::Url::parse(uri)), code.to_string()));
4919
4920        let kinds = reference_kinds_for(&index, uri, "$value");
4921        assert!(kinds.contains(&ReferenceKind::Definition));
4922        assert!(kinds.contains(&ReferenceKind::Write));
4923        assert!(kinds.contains(&ReferenceKind::Read));
4924    }
4925
4926    #[test]
4927    fn test_reference_kinds_import_parent_and_export_ok_are_currently_import_only() {
4928        let index = WorkspaceIndex::new();
4929        let uri = "file:///typed-refs-import-export.pm";
4930        let code = "package Child;
4931use parent 'Base';
4932our @EXPORT_OK = qw(foo);
49331;
4934";
4935        must(index.index_file(must(url::Url::parse(uri)), code.to_string()));
4936
4937        let parent_kinds = reference_kinds_for(&index, uri, "Base");
4938        assert!(
4939            parent_kinds.is_empty(),
4940            "use parent inheritance edges are currently not stored as typed references"
4941        );
4942
4943        let export_symbol_kinds = reference_kinds_for(&index, uri, "foo");
4944        assert!(
4945            export_symbol_kinds.is_empty(),
4946            "EXPORT_OK entries are currently not represented as reference edges"
4947        );
4948    }
4949
4950    #[test]
4951    fn test_reference_kinds_dynamic_and_meta_edges_are_not_typed_yet() {
4952        let index = WorkspaceIndex::new();
4953        let uri = "file:///typed-refs-dynamic.pl";
4954        let code = r#"package TypedRefs;
4955sub foo { 1 }
4956&foo;
4957my $code = \&foo;
4958goto &foo;
4959*alias = \&foo;
4960eval "foo()";
4961with 'RoleName';
4962has 'name' => (is => 'ro');
49631;
4964"#;
4965        must(index.index_file(must(url::Url::parse(uri)), code.to_string()));
4966
4967        let foo_kinds = reference_kinds_for(&index, uri, "foo");
4968        assert!(
4969            foo_kinds
4970                .iter()
4971                .all(|kind| matches!(kind, ReferenceKind::Definition | ReferenceKind::Usage)),
4972            r"dynamic call forms (&foo, \&foo, goto &foo) are currently flattened to Usage"
4973        );
4974
4975        assert!(
4976            reference_kinds_for(&index, uri, "RoleName").is_empty(),
4977            "role composition edges (`with 'RoleName'`) are not indexed as typed references yet"
4978        );
4979    }
4980
4981    #[test]
4982    fn test_find_references() {
4983        let index = WorkspaceIndex::new();
4984        let uri = "file:///test.pl";
4985
4986        let code = r#"
4987sub test {
4988    my $x = 1;
4989    $x = 2;
4990    print $x;
4991}
4992"#;
4993
4994        must(index.index_file(must(url::Url::parse(uri)), code.to_string()));
4995
4996        let refs = index.find_references("$x");
4997        assert!(refs.len() >= 2); // Definition + at least one usage
4998    }
4999
5000    #[test]
5001    fn test_find_references_bare_name_includes_qualified_calls() {
5002        let index = WorkspaceIndex::new();
5003        let uri = "file:///refs.pl";
5004        let code = r#"
5005package RefDemo;
5006sub helper {
5007    return 1;
5008}
5009
5010helper();
5011RefDemo::helper();
5012"#;
5013
5014        must(index.index_file(must(url::Url::parse(uri)), code.to_string()));
5015
5016        let bare_refs = index.find_references("helper");
5017        let qualified_refs = index.find_references("RefDemo::helper");
5018
5019        assert!(
5020            bare_refs.len() >= qualified_refs.len(),
5021            "bare-name reference lookup should include qualified calls"
5022        );
5023    }
5024
5025    #[test]
5026    fn test_count_usages_bare_name_includes_qualified_calls() {
5027        let index = WorkspaceIndex::new();
5028        let uri = "file:///usage.pl";
5029        let code = r#"
5030package UsageDemo;
5031sub helper {
5032    return 1;
5033}
5034
5035helper();
5036UsageDemo::helper();
5037"#;
5038
5039        must(index.index_file(must(url::Url::parse(uri)), code.to_string()));
5040
5041        let bare_usage_count = index.count_usages("helper");
5042        let qualified_usage_count = index.count_usages("UsageDemo::helper");
5043
5044        assert!(
5045            bare_usage_count >= qualified_usage_count,
5046            "bare-name usage count should include qualified call sites"
5047        );
5048    }
5049
5050    #[test]
5051    fn test_dependencies() {
5052        let index = WorkspaceIndex::new();
5053        let uri = "file:///test.pl";
5054
5055        let code = r#"
5056use strict;
5057use warnings;
5058use Data::Dumper;
5059"#;
5060
5061        must(index.index_file(must(url::Url::parse(uri)), code.to_string()));
5062
5063        let deps = index.file_dependencies(uri);
5064        assert!(deps.contains("strict"));
5065        assert!(deps.contains("warnings"));
5066        assert!(deps.contains("Data::Dumper"));
5067    }
5068
5069    #[test]
5070    fn test_uri_to_fs_path_basic() {
5071        // Test basic file:// URI conversion
5072        if let Some(path) = uri_to_fs_path("file:///tmp/test.pl") {
5073            assert_eq!(path, std::path::PathBuf::from("/tmp/test.pl"));
5074        }
5075
5076        // Test with invalid URI
5077        assert!(uri_to_fs_path("not-a-uri").is_none());
5078
5079        // Test with non-file scheme
5080        assert!(uri_to_fs_path("http://example.com").is_none());
5081    }
5082
5083    #[test]
5084    fn test_uri_to_fs_path_with_spaces() {
5085        // Test with percent-encoded spaces
5086        if let Some(path) = uri_to_fs_path("file:///tmp/path%20with%20spaces/test.pl") {
5087            assert_eq!(path, std::path::PathBuf::from("/tmp/path with spaces/test.pl"));
5088        }
5089
5090        // Test with multiple spaces and special characters
5091        if let Some(path) = uri_to_fs_path("file:///tmp/My%20Documents/test%20file.pl") {
5092            assert_eq!(path, std::path::PathBuf::from("/tmp/My Documents/test file.pl"));
5093        }
5094    }
5095
5096    #[test]
5097    fn test_uri_to_fs_path_with_unicode() {
5098        // Test with Unicode characters (percent-encoded)
5099        if let Some(path) = uri_to_fs_path("file:///tmp/caf%C3%A9/test.pl") {
5100            assert_eq!(path, std::path::PathBuf::from("/tmp/café/test.pl"));
5101        }
5102
5103        // Test with Unicode emoji (percent-encoded)
5104        if let Some(path) = uri_to_fs_path("file:///tmp/emoji%F0%9F%98%80/test.pl") {
5105            assert_eq!(path, std::path::PathBuf::from("/tmp/emoji😀/test.pl"));
5106        }
5107    }
5108
5109    #[test]
5110    fn test_fs_path_to_uri_basic() {
5111        // Test basic path to URI conversion
5112        let result = fs_path_to_uri("/tmp/test.pl");
5113        assert!(result.is_ok());
5114        let uri = must(result);
5115        assert!(uri.starts_with("file://"));
5116        assert!(uri.contains("/tmp/test.pl"));
5117    }
5118
5119    #[test]
5120    fn test_fs_path_to_uri_with_spaces() {
5121        // Test path with spaces
5122        let result = fs_path_to_uri("/tmp/path with spaces/test.pl");
5123        assert!(result.is_ok());
5124        let uri = must(result);
5125        assert!(uri.starts_with("file://"));
5126        // Should contain percent-encoded spaces
5127        assert!(uri.contains("path%20with%20spaces"));
5128    }
5129
5130    #[test]
5131    fn test_fs_path_to_uri_with_unicode() {
5132        // Test path with Unicode characters
5133        let result = fs_path_to_uri("/tmp/café/test.pl");
5134        assert!(result.is_ok());
5135        let uri = must(result);
5136        assert!(uri.starts_with("file://"));
5137        // Should contain percent-encoded Unicode
5138        assert!(uri.contains("caf%C3%A9"));
5139    }
5140
5141    #[test]
5142    fn test_normalize_uri_file_schemes() {
5143        // Test normalization of valid file URIs
5144        let uri = WorkspaceIndex::normalize_uri("file:///tmp/test.pl");
5145        assert_eq!(uri, "file:///tmp/test.pl");
5146
5147        // Test normalization of URIs with spaces
5148        let uri = WorkspaceIndex::normalize_uri("file:///tmp/path%20with%20spaces/test.pl");
5149        assert_eq!(uri, "file:///tmp/path%20with%20spaces/test.pl");
5150    }
5151
5152    #[test]
5153    fn test_normalize_uri_absolute_paths() {
5154        // Test normalization of absolute paths (convert to file:// URI)
5155        let uri = WorkspaceIndex::normalize_uri("/tmp/test.pl");
5156        assert!(uri.starts_with("file://"));
5157        assert!(uri.contains("/tmp/test.pl"));
5158    }
5159
5160    #[test]
5161    fn test_normalize_uri_special_schemes() {
5162        // Test that special schemes like untitled: are preserved
5163        let uri = WorkspaceIndex::normalize_uri("untitled:Untitled-1");
5164        assert_eq!(uri, "untitled:Untitled-1");
5165    }
5166
5167    #[test]
5168    fn test_roundtrip_conversion() {
5169        // Test that URI -> path -> URI conversion preserves the URI
5170        let original_uri = "file:///tmp/path%20with%20spaces/caf%C3%A9.pl";
5171
5172        if let Some(path) = uri_to_fs_path(original_uri) {
5173            if let Ok(converted_uri) = fs_path_to_uri(&path) {
5174                // Should be able to round-trip back to an equivalent URI
5175                assert!(converted_uri.starts_with("file://"));
5176
5177                // The path component should decode correctly
5178                if let Some(roundtrip_path) = uri_to_fs_path(&converted_uri) {
5179                    #[cfg(windows)]
5180                    if let Ok(rootless) = path.strip_prefix(std::path::Path::new(r"\")) {
5181                        assert!(roundtrip_path.ends_with(rootless));
5182                    } else {
5183                        assert_eq!(path, roundtrip_path);
5184                    }
5185
5186                    #[cfg(not(windows))]
5187                    assert_eq!(path, roundtrip_path);
5188                }
5189            }
5190        }
5191    }
5192
5193    #[cfg(target_os = "windows")]
5194    #[test]
5195    fn test_windows_paths() {
5196        // Test Windows-style paths
5197        let result = fs_path_to_uri(r"C:\Users\test\Documents\script.pl");
5198        assert!(result.is_ok());
5199        let uri = must(result);
5200        assert!(uri.starts_with("file://"));
5201
5202        // Test Windows path with spaces
5203        let result = fs_path_to_uri(r"C:\Program Files\My App\script.pl");
5204        assert!(result.is_ok());
5205        let uri = must(result);
5206        assert!(uri.starts_with("file://"));
5207        assert!(uri.contains("Program%20Files"));
5208    }
5209
5210    // ========================================================================
5211    // IndexCoordinator Tests
5212    // ========================================================================
5213
5214    #[test]
5215    fn test_coordinator_initial_state() {
5216        let coordinator = IndexCoordinator::new();
5217        assert!(matches!(
5218            coordinator.state(),
5219            IndexState::Building { phase: IndexPhase::Idle, .. }
5220        ));
5221    }
5222
5223    #[test]
5224    fn test_transition_to_scanning_phase() {
5225        let coordinator = IndexCoordinator::new();
5226        coordinator.transition_to_scanning();
5227
5228        let state = coordinator.state();
5229        assert!(
5230            matches!(state, IndexState::Building { phase: IndexPhase::Scanning, .. }),
5231            "Expected Building state after scanning, got: {:?}",
5232            state
5233        );
5234    }
5235
5236    #[test]
5237    fn test_transition_to_indexing_phase() {
5238        let coordinator = IndexCoordinator::new();
5239        coordinator.transition_to_scanning();
5240        coordinator.update_scan_progress(3);
5241        coordinator.transition_to_indexing(3);
5242
5243        let state = coordinator.state();
5244        assert!(
5245            matches!(
5246                state,
5247                IndexState::Building { phase: IndexPhase::Indexing, total_count: 3, .. }
5248            ),
5249            "Expected Building state after indexing with total_count 3, got: {:?}",
5250            state
5251        );
5252    }
5253
5254    #[test]
5255    fn test_transition_to_ready() {
5256        let coordinator = IndexCoordinator::new();
5257        coordinator.transition_to_ready(100, 5000);
5258
5259        let state = coordinator.state();
5260        if let IndexState::Ready { file_count, symbol_count, .. } = state {
5261            assert_eq!(file_count, 100);
5262            assert_eq!(symbol_count, 5000);
5263        } else {
5264            unreachable!("Expected Ready state, got: {:?}", state);
5265        }
5266    }
5267
5268    #[test]
5269    fn test_parse_storm_degradation() {
5270        let coordinator = IndexCoordinator::new();
5271        coordinator.transition_to_ready(100, 5000);
5272
5273        // Trigger parse storm
5274        for _ in 0..15 {
5275            coordinator.notify_change("file.pm");
5276        }
5277
5278        let state = coordinator.state();
5279        assert!(
5280            matches!(state, IndexState::Degraded { .. }),
5281            "Expected Degraded state, got: {:?}",
5282            state
5283        );
5284        if let IndexState::Degraded { reason, .. } = state {
5285            assert!(matches!(reason, DegradationReason::ParseStorm { .. }));
5286        }
5287    }
5288
5289    #[test]
5290    fn test_recovery_from_parse_storm() {
5291        let coordinator = IndexCoordinator::new();
5292        coordinator.transition_to_ready(100, 5000);
5293
5294        // Trigger parse storm
5295        for _ in 0..15 {
5296            coordinator.notify_change("file.pm");
5297        }
5298
5299        // Complete all parses
5300        for _ in 0..15 {
5301            coordinator.notify_parse_complete("file.pm");
5302        }
5303
5304        // Should recover to Building state
5305        assert!(matches!(coordinator.state(), IndexState::Building { .. }));
5306    }
5307
5308    #[test]
5309    fn test_query_dispatch_ready() {
5310        let coordinator = IndexCoordinator::new();
5311        coordinator.transition_to_ready(100, 5000);
5312
5313        let result = coordinator.query(|_index| "full_query", |_index| "partial_query");
5314
5315        assert_eq!(result, "full_query");
5316    }
5317
5318    #[test]
5319    fn test_query_dispatch_degraded() {
5320        let coordinator = IndexCoordinator::new();
5321        // Building state should use partial query
5322
5323        let result = coordinator.query(|_index| "full_query", |_index| "partial_query");
5324
5325        assert_eq!(result, "partial_query");
5326    }
5327
5328    #[test]
5329    fn test_metrics_pending_count() {
5330        let coordinator = IndexCoordinator::new();
5331
5332        coordinator.notify_change("file1.pm");
5333        coordinator.notify_change("file2.pm");
5334
5335        assert_eq!(coordinator.metrics.pending_count(), 2);
5336
5337        coordinator.notify_parse_complete("file1.pm");
5338        assert_eq!(coordinator.metrics.pending_count(), 1);
5339    }
5340
5341    #[test]
5342    fn test_instrumentation_records_transitions() {
5343        let coordinator = IndexCoordinator::new();
5344        coordinator.transition_to_ready(10, 100);
5345
5346        let snapshot = coordinator.instrumentation_snapshot();
5347        let transition =
5348            IndexStateTransition { from: IndexStateKind::Building, to: IndexStateKind::Ready };
5349        let count = snapshot.state_transition_counts.get(&transition).copied().unwrap_or(0);
5350        assert_eq!(count, 1);
5351    }
5352
5353    #[test]
5354    fn test_instrumentation_records_early_exit() {
5355        let coordinator = IndexCoordinator::new();
5356        coordinator.record_early_exit(EarlyExitReason::InitialTimeBudget, 25, 1, 10);
5357
5358        let snapshot = coordinator.instrumentation_snapshot();
5359        let count = snapshot
5360            .early_exit_counts
5361            .get(&EarlyExitReason::InitialTimeBudget)
5362            .copied()
5363            .unwrap_or(0);
5364        assert_eq!(count, 1);
5365        assert!(snapshot.last_early_exit.is_some());
5366    }
5367
5368    #[test]
5369    fn test_custom_limits() {
5370        let limits = IndexResourceLimits {
5371            max_files: 5000,
5372            max_symbols_per_file: 1000,
5373            max_total_symbols: 100_000,
5374            max_ast_cache_bytes: 128 * 1024 * 1024,
5375            max_ast_cache_items: 50,
5376            max_scan_duration_ms: 30_000,
5377        };
5378
5379        let coordinator = IndexCoordinator::with_limits(limits.clone());
5380        assert_eq!(coordinator.limits.max_files, 5000);
5381        assert_eq!(coordinator.limits.max_total_symbols, 100_000);
5382    }
5383
5384    #[test]
5385    fn test_degradation_preserves_symbol_count() {
5386        let coordinator = IndexCoordinator::new();
5387        coordinator.transition_to_ready(100, 5000);
5388
5389        coordinator.transition_to_degraded(DegradationReason::IoError {
5390            message: "Test error".to_string(),
5391        });
5392
5393        let state = coordinator.state();
5394        assert!(
5395            matches!(state, IndexState::Degraded { .. }),
5396            "Expected Degraded state, got: {:?}",
5397            state
5398        );
5399        if let IndexState::Degraded { available_symbols, .. } = state {
5400            assert_eq!(available_symbols, 5000);
5401        }
5402    }
5403
5404    #[test]
5405    fn test_index_access() {
5406        let coordinator = IndexCoordinator::new();
5407        let index = coordinator.index();
5408
5409        // Should have access to underlying WorkspaceIndex
5410        assert!(index.all_symbols().is_empty());
5411    }
5412
5413    #[test]
5414    fn test_resource_limit_enforcement_max_files() {
5415        let limits = IndexResourceLimits {
5416            max_files: 5,
5417            max_symbols_per_file: 1000,
5418            max_total_symbols: 50_000,
5419            max_ast_cache_bytes: 128 * 1024 * 1024,
5420            max_ast_cache_items: 50,
5421            max_scan_duration_ms: 30_000,
5422        };
5423
5424        let coordinator = IndexCoordinator::with_limits(limits);
5425        coordinator.transition_to_ready(10, 100);
5426
5427        // Index 10 files (exceeds limit of 5)
5428        for i in 0..10 {
5429            let uri_str = format!("file:///test{}.pl", i);
5430            let uri = must(url::Url::parse(&uri_str));
5431            let code = "sub test { }";
5432            must(coordinator.index().index_file(uri, code.to_string()));
5433        }
5434
5435        // Enforce limits
5436        coordinator.enforce_limits();
5437
5438        let state = coordinator.state();
5439        assert!(
5440            matches!(
5441                state,
5442                IndexState::Degraded {
5443                    reason: DegradationReason::ResourceLimit { kind: ResourceKind::MaxFiles },
5444                    ..
5445                }
5446            ),
5447            "Expected Degraded state with ResourceLimit(MaxFiles), got: {:?}",
5448            state
5449        );
5450    }
5451
5452    #[test]
5453    fn test_resource_limit_enforcement_max_symbols() {
5454        let limits = IndexResourceLimits {
5455            max_files: 100,
5456            max_symbols_per_file: 10,
5457            max_total_symbols: 50, // Very low limit for testing
5458            max_ast_cache_bytes: 128 * 1024 * 1024,
5459            max_ast_cache_items: 50,
5460            max_scan_duration_ms: 30_000,
5461        };
5462
5463        let coordinator = IndexCoordinator::with_limits(limits);
5464        coordinator.transition_to_ready(0, 0);
5465
5466        // Index files with many symbols to exceed total symbol limit
5467        for i in 0..10 {
5468            let uri_str = format!("file:///test{}.pl", i);
5469            let uri = must(url::Url::parse(&uri_str));
5470            // Each file has 10 subroutines = 100 total symbols (exceeds limit of 50)
5471            let code = r#"
5472package Test;
5473sub sub1 { }
5474sub sub2 { }
5475sub sub3 { }
5476sub sub4 { }
5477sub sub5 { }
5478sub sub6 { }
5479sub sub7 { }
5480sub sub8 { }
5481sub sub9 { }
5482sub sub10 { }
5483"#;
5484            must(coordinator.index().index_file(uri, code.to_string()));
5485        }
5486
5487        // Enforce limits
5488        coordinator.enforce_limits();
5489
5490        let state = coordinator.state();
5491        assert!(
5492            matches!(
5493                state,
5494                IndexState::Degraded {
5495                    reason: DegradationReason::ResourceLimit { kind: ResourceKind::MaxSymbols },
5496                    ..
5497                }
5498            ),
5499            "Expected Degraded state with ResourceLimit(MaxSymbols), got: {:?}",
5500            state
5501        );
5502    }
5503
5504    #[test]
5505    fn test_check_limits_returns_none_within_bounds() {
5506        let coordinator = IndexCoordinator::new();
5507        coordinator.transition_to_ready(0, 0);
5508
5509        // Index a few files well within default limits
5510        for i in 0..5 {
5511            let uri_str = format!("file:///test{}.pl", i);
5512            let uri = must(url::Url::parse(&uri_str));
5513            let code = "sub test { }";
5514            must(coordinator.index().index_file(uri, code.to_string()));
5515        }
5516
5517        // Should not trigger degradation
5518        let limit_check = coordinator.check_limits();
5519        assert!(limit_check.is_none(), "check_limits should return None when within bounds");
5520
5521        // State should still be Ready
5522        assert!(
5523            matches!(coordinator.state(), IndexState::Ready { .. }),
5524            "State should remain Ready when within limits"
5525        );
5526    }
5527
5528    #[test]
5529    fn test_enforce_limits_called_on_transition_to_ready() {
5530        let limits = IndexResourceLimits {
5531            max_files: 3,
5532            max_symbols_per_file: 1000,
5533            max_total_symbols: 50_000,
5534            max_ast_cache_bytes: 128 * 1024 * 1024,
5535            max_ast_cache_items: 50,
5536            max_scan_duration_ms: 30_000,
5537        };
5538
5539        let coordinator = IndexCoordinator::with_limits(limits);
5540
5541        // Index files before transitioning to ready
5542        for i in 0..5 {
5543            let uri_str = format!("file:///test{}.pl", i);
5544            let uri = must(url::Url::parse(&uri_str));
5545            let code = "sub test { }";
5546            must(coordinator.index().index_file(uri, code.to_string()));
5547        }
5548
5549        // Transition to ready - should automatically enforce limits
5550        coordinator.transition_to_ready(5, 100);
5551
5552        let state = coordinator.state();
5553        assert!(
5554            matches!(
5555                state,
5556                IndexState::Degraded {
5557                    reason: DegradationReason::ResourceLimit { kind: ResourceKind::MaxFiles },
5558                    ..
5559                }
5560            ),
5561            "Expected Degraded state after transition_to_ready with exceeded limits, got: {:?}",
5562            state
5563        );
5564    }
5565
5566    #[test]
5567    fn test_state_transition_guard_ready_to_ready() {
5568        // Test that Ready → Ready is allowed (metrics update)
5569        let coordinator = IndexCoordinator::new();
5570        coordinator.transition_to_ready(100, 5000);
5571
5572        // Transition to Ready again with different metrics
5573        coordinator.transition_to_ready(150, 7500);
5574
5575        let state = coordinator.state();
5576        assert!(
5577            matches!(state, IndexState::Ready { file_count: 150, symbol_count: 7500, .. }),
5578            "Expected Ready state with updated metrics, got: {:?}",
5579            state
5580        );
5581    }
5582
5583    #[test]
5584    fn test_state_transition_guard_building_to_building() {
5585        // Test that Building → Building is allowed (progress update)
5586        let coordinator = IndexCoordinator::new();
5587
5588        // Initial building state
5589        coordinator.transition_to_building(100);
5590
5591        let state = coordinator.state();
5592        assert!(
5593            matches!(state, IndexState::Building { indexed_count: 0, total_count: 100, .. }),
5594            "Expected Building state, got: {:?}",
5595            state
5596        );
5597
5598        // Update total count
5599        coordinator.transition_to_building(200);
5600
5601        let state = coordinator.state();
5602        assert!(
5603            matches!(state, IndexState::Building { indexed_count: 0, total_count: 200, .. }),
5604            "Expected Building state, got: {:?}",
5605            state
5606        );
5607    }
5608
5609    #[test]
5610    fn test_state_transition_ready_to_building() {
5611        // Test that Ready → Building is allowed (re-scan)
5612        let coordinator = IndexCoordinator::new();
5613        coordinator.transition_to_ready(100, 5000);
5614
5615        // Trigger re-scan
5616        coordinator.transition_to_building(150);
5617
5618        let state = coordinator.state();
5619        assert!(
5620            matches!(state, IndexState::Building { indexed_count: 0, total_count: 150, .. }),
5621            "Expected Building state after re-scan, got: {:?}",
5622            state
5623        );
5624    }
5625
5626    #[test]
5627    fn test_state_transition_degraded_to_building() {
5628        // Test that Degraded → Building is allowed (recovery)
5629        let coordinator = IndexCoordinator::new();
5630        coordinator.transition_to_degraded(DegradationReason::IoError {
5631            message: "Test error".to_string(),
5632        });
5633
5634        // Attempt recovery
5635        coordinator.transition_to_building(100);
5636
5637        let state = coordinator.state();
5638        assert!(
5639            matches!(state, IndexState::Building { indexed_count: 0, total_count: 100, .. }),
5640            "Expected Building state after recovery, got: {:?}",
5641            state
5642        );
5643    }
5644
5645    #[test]
5646    fn test_update_building_progress() {
5647        let coordinator = IndexCoordinator::new();
5648        coordinator.transition_to_building(100);
5649
5650        // Update progress
5651        coordinator.update_building_progress(50);
5652
5653        let state = coordinator.state();
5654        assert!(
5655            matches!(state, IndexState::Building { indexed_count: 50, total_count: 100, .. }),
5656            "Expected Building state with updated progress, got: {:?}",
5657            state
5658        );
5659
5660        // Update progress again
5661        coordinator.update_building_progress(100);
5662
5663        let state = coordinator.state();
5664        assert!(
5665            matches!(state, IndexState::Building { indexed_count: 100, total_count: 100, .. }),
5666            "Expected Building state with completed progress, got: {:?}",
5667            state
5668        );
5669    }
5670
5671    #[test]
5672    fn test_scan_timeout_detection() {
5673        // Test that scan timeout triggers degradation
5674        let limits = IndexResourceLimits {
5675            max_scan_duration_ms: 0, // Immediate timeout for testing
5676            ..Default::default()
5677        };
5678
5679        let coordinator = IndexCoordinator::with_limits(limits);
5680        coordinator.transition_to_building(100);
5681
5682        // Small sleep to ensure elapsed time > 0
5683        std::thread::sleep(std::time::Duration::from_millis(1));
5684
5685        // Update progress should detect timeout
5686        coordinator.update_building_progress(10);
5687
5688        let state = coordinator.state();
5689        assert!(
5690            matches!(
5691                state,
5692                IndexState::Degraded { reason: DegradationReason::ScanTimeout { .. }, .. }
5693            ),
5694            "Expected Degraded state with ScanTimeout, got: {:?}",
5695            state
5696        );
5697    }
5698
5699    #[test]
5700    fn test_scan_timeout_does_not_trigger_within_limit() {
5701        // Test that scan doesn't timeout within the limit
5702        let limits = IndexResourceLimits {
5703            max_scan_duration_ms: 10_000, // 10 seconds - should not trigger
5704            ..Default::default()
5705        };
5706
5707        let coordinator = IndexCoordinator::with_limits(limits);
5708        coordinator.transition_to_building(100);
5709
5710        // Update progress immediately (well within limit)
5711        coordinator.update_building_progress(50);
5712
5713        let state = coordinator.state();
5714        assert!(
5715            matches!(state, IndexState::Building { indexed_count: 50, .. }),
5716            "Expected Building state (no timeout), got: {:?}",
5717            state
5718        );
5719    }
5720
5721    #[test]
5722    fn test_early_exit_optimization_unchanged_content() {
5723        let index = WorkspaceIndex::new();
5724        let uri = must(url::Url::parse("file:///test.pl"));
5725        let code = r#"
5726package MyPackage;
5727
5728sub hello {
5729    print "Hello";
5730}
5731"#;
5732
5733        // First indexing should parse and index
5734        must(index.index_file(uri.clone(), code.to_string()));
5735        let symbols1 = index.file_symbols(uri.as_str());
5736        assert!(symbols1.iter().any(|s| s.name == "MyPackage" && s.kind == SymbolKind::Package));
5737        assert!(symbols1.iter().any(|s| s.name == "hello" && s.kind == SymbolKind::Subroutine));
5738
5739        // Second indexing with same content should early-exit
5740        // We can verify this by checking that the index still works correctly
5741        must(index.index_file(uri.clone(), code.to_string()));
5742        let symbols2 = index.file_symbols(uri.as_str());
5743        assert_eq!(symbols1.len(), symbols2.len());
5744        assert!(symbols2.iter().any(|s| s.name == "MyPackage" && s.kind == SymbolKind::Package));
5745        assert!(symbols2.iter().any(|s| s.name == "hello" && s.kind == SymbolKind::Subroutine));
5746    }
5747
5748    #[test]
5749    fn test_early_exit_optimization_changed_content() {
5750        let index = WorkspaceIndex::new();
5751        let uri = must(url::Url::parse("file:///test.pl"));
5752        let code1 = r#"
5753package MyPackage;
5754
5755sub hello {
5756    print "Hello";
5757}
5758"#;
5759
5760        let code2 = r#"
5761package MyPackage;
5762
5763sub goodbye {
5764    print "Goodbye";
5765}
5766"#;
5767
5768        // First indexing
5769        must(index.index_file(uri.clone(), code1.to_string()));
5770        let symbols1 = index.file_symbols(uri.as_str());
5771        assert!(symbols1.iter().any(|s| s.name == "hello" && s.kind == SymbolKind::Subroutine));
5772        assert!(!symbols1.iter().any(|s| s.name == "goodbye"));
5773
5774        // Second indexing with different content should re-parse
5775        must(index.index_file(uri.clone(), code2.to_string()));
5776        let symbols2 = index.file_symbols(uri.as_str());
5777        assert!(!symbols2.iter().any(|s| s.name == "hello"));
5778        assert!(symbols2.iter().any(|s| s.name == "goodbye" && s.kind == SymbolKind::Subroutine));
5779    }
5780
5781    #[test]
5782    fn test_early_exit_optimization_whitespace_only_change() {
5783        let index = WorkspaceIndex::new();
5784        let uri = must(url::Url::parse("file:///test.pl"));
5785        let code1 = r#"
5786package MyPackage;
5787
5788sub hello {
5789    print "Hello";
5790}
5791"#;
5792
5793        let code2 = r#"
5794package MyPackage;
5795
5796
5797sub hello {
5798    print "Hello";
5799}
5800"#;
5801
5802        // First indexing
5803        must(index.index_file(uri.clone(), code1.to_string()));
5804        let symbols1 = index.file_symbols(uri.as_str());
5805        assert!(symbols1.iter().any(|s| s.name == "hello" && s.kind == SymbolKind::Subroutine));
5806
5807        // Second indexing with whitespace change should re-parse (hash will differ)
5808        must(index.index_file(uri.clone(), code2.to_string()));
5809        let symbols2 = index.file_symbols(uri.as_str());
5810        // Symbols should still be found, but content hash differs so it re-indexed
5811        assert!(symbols2.iter().any(|s| s.name == "hello" && s.kind == SymbolKind::Subroutine));
5812    }
5813
5814    #[test]
5815    fn test_reindex_file_refreshes_symbol_cache_for_removed_names() {
5816        let index = WorkspaceIndex::new();
5817        let uri1 = must(url::Url::parse("file:///lib/A.pm"));
5818        let uri2 = must(url::Url::parse("file:///lib/B.pm"));
5819        let code1 = "package A;\nsub foo { return 1; }\n1;\n";
5820        let code2 = "package B;\nsub foo { return 2; }\n1;\n";
5821        let code2_reindexed = "package B;\nsub bar { return 3; }\n1;\n";
5822
5823        must(index.index_file(uri1.clone(), code1.to_string()));
5824        must(index.index_file(uri2.clone(), code2.to_string()));
5825        must(index.index_file(uri2.clone(), code2_reindexed.to_string()));
5826
5827        let foo_location = must_some(index.find_definition("foo"));
5828        assert_eq!(foo_location.uri, uri1.to_string());
5829
5830        let bar_location = must_some(index.find_definition("bar"));
5831        assert_eq!(bar_location.uri, uri2.to_string());
5832    }
5833
5834    #[test]
5835    fn test_remove_file_preserves_other_colliding_symbol_entries() {
5836        let index = WorkspaceIndex::new();
5837        let uri1 = must(url::Url::parse("file:///lib/A.pm"));
5838        let uri2 = must(url::Url::parse("file:///lib/B.pm"));
5839        let code1 = "package A;\nsub foo { return 1; }\n1;\n";
5840        let code2 = "package B;\nsub foo { return 2; }\n1;\n";
5841
5842        must(index.index_file(uri1.clone(), code1.to_string()));
5843        must(index.index_file(uri2.clone(), code2.to_string()));
5844
5845        index.remove_file(uri2.as_str());
5846
5847        let foo_location = must_some(index.find_definition("foo"));
5848        assert_eq!(foo_location.uri, uri1.to_string());
5849    }
5850
5851    #[test]
5852    fn test_count_usages_no_double_counting_for_qualified_calls() {
5853        let index = WorkspaceIndex::new();
5854
5855        // File 1: defines Utils::process_data
5856        let uri1 = "file:///lib/Utils.pm";
5857        let code1 = r#"
5858package Utils;
5859
5860sub process_data {
5861    return 1;
5862}
5863"#;
5864        must(index.index_file(must(url::Url::parse(uri1)), code1.to_string()));
5865
5866        // File 2: calls Utils::process_data (qualified call)
5867        let uri2 = "file:///app.pl";
5868        let code2 = r#"
5869use Utils;
5870Utils::process_data();
5871Utils::process_data();
5872"#;
5873        must(index.index_file(must(url::Url::parse(uri2)), code2.to_string()));
5874
5875        // Each qualified call is stored under both "process_data" and "Utils::process_data"
5876        // by the dual indexing strategy. count_usages should deduplicate so we get the
5877        // actual number of call sites, not double.
5878        let count = index.count_usages("Utils::process_data");
5879
5880        // We expect exactly 2 usage sites (the two calls in app.pl),
5881        // not 4 (which would be the double-counted result).
5882        assert_eq!(
5883            count, 2,
5884            "count_usages should not double-count qualified calls, got {} (expected 2)",
5885            count
5886        );
5887
5888        // find_references should also deduplicate
5889        let refs = index.find_references("Utils::process_data");
5890        let non_def_refs: Vec<_> =
5891            refs.iter().filter(|loc| loc.uri != "file:///lib/Utils.pm").collect();
5892        assert_eq!(
5893            non_def_refs.len(),
5894            2,
5895            "find_references should not return duplicates for qualified calls, got {} non-def refs",
5896            non_def_refs.len()
5897        );
5898    }
5899
5900    #[test]
5901    fn test_batch_indexing() {
5902        let index = WorkspaceIndex::new();
5903        let files: Vec<(Url, String)> = (0..5)
5904            .map(|i| {
5905                let uri = must(Url::parse(&format!("file:///batch/module{}.pm", i)));
5906                let code =
5907                    format!("package Batch::Mod{};\nsub func_{} {{ return {}; }}\n1;", i, i, i);
5908                (uri, code)
5909            })
5910            .collect();
5911
5912        let errors = index.index_files_batch(files);
5913        assert!(errors.is_empty(), "batch indexing errors: {:?}", errors);
5914        assert_eq!(index.file_count(), 5);
5915        assert!(index.find_definition("Batch::Mod0::func_0").is_some());
5916        assert!(index.find_definition("Batch::Mod4::func_4").is_some());
5917    }
5918
5919    #[test]
5920    fn test_batch_indexing_skips_unchanged() {
5921        let index = WorkspaceIndex::new();
5922        let uri = must(Url::parse("file:///batch/skip.pm"));
5923        let code = "package Skip;\nsub skip_fn { 1 }\n1;".to_string();
5924
5925        index.index_file(uri.clone(), code.clone()).ok();
5926        assert_eq!(index.file_count(), 1);
5927
5928        let errors = index.index_files_batch(vec![(uri, code)]);
5929        assert!(errors.is_empty());
5930        assert_eq!(index.file_count(), 1);
5931    }
5932
5933    #[test]
5934    fn test_incremental_update_preserves_other_symbols() {
5935        let index = WorkspaceIndex::new();
5936
5937        let uri_a = must(Url::parse("file:///incr/a.pm"));
5938        let uri_b = must(Url::parse("file:///incr/b.pm"));
5939        index.index_file(uri_a.clone(), "package A;\nsub a_func { 1 }\n1;".into()).ok();
5940        index.index_file(uri_b.clone(), "package B;\nsub b_func { 2 }\n1;".into()).ok();
5941
5942        assert!(index.find_definition("A::a_func").is_some());
5943        assert!(index.find_definition("B::b_func").is_some());
5944
5945        index.index_file(uri_a, "package A;\nsub a_func_v2 { 11 }\n1;".into()).ok();
5946
5947        assert!(index.find_definition("A::a_func_v2").is_some());
5948        assert!(index.find_definition("B::b_func").is_some());
5949    }
5950
5951    #[test]
5952    fn test_remove_file_preserves_shadowed_symbols() {
5953        let index = WorkspaceIndex::new();
5954
5955        let uri_a = must(Url::parse("file:///shadow/a.pm"));
5956        let uri_b = must(Url::parse("file:///shadow/b.pm"));
5957        index.index_file(uri_a.clone(), "package ShadowA;\nsub helper { 1 }\n1;".into()).ok();
5958        index.index_file(uri_b.clone(), "package ShadowB;\nsub helper { 2 }\n1;".into()).ok();
5959
5960        assert!(index.find_definition("helper").is_some());
5961
5962        index.remove_file_url(&uri_a);
5963        assert!(index.find_definition("helper").is_some());
5964        assert!(index.find_definition("ShadowB::helper").is_some());
5965    }
5966
5967    // -------------------------------------------------------------------------
5968    // find_dependents — use parent / use base integration (#2747)
5969    // -------------------------------------------------------------------------
5970
5971    #[test]
5972    fn test_index_dependency_via_use_parent_end_to_end() {
5973        // Regression for #2747: index a file with `use parent 'MyBase'` and verify
5974        // that find_dependents("MyBase") returns that file.
5975        // 1. Index MyBase.pm
5976        // 2. Index child.pl with `use parent 'MyBase'`
5977        // 3. find_dependents("MyBase") should return child.pl
5978        let index = WorkspaceIndex::new();
5979
5980        let base_url = must(url::Url::parse("file:///test/workspace/lib/MyBase.pm"));
5981        must(index.index_file(
5982            base_url,
5983            "package MyBase;\nsub new { bless {}, shift }\n1;\n".to_string(),
5984        ));
5985
5986        let child_url = must(url::Url::parse("file:///test/workspace/child.pl"));
5987        must(index.index_file(child_url, "package Child;\nuse parent 'MyBase';\n1;\n".to_string()));
5988
5989        let dependents = index.find_dependents("MyBase");
5990        assert!(
5991            !dependents.is_empty(),
5992            "find_dependents('MyBase') returned empty — \
5993             use parent 'MyBase' should register MyBase as a dependency. \
5994             Dependencies in index: {:?}",
5995            {
5996                let files = index.files.read();
5997                files
5998                    .iter()
5999                    .map(|(k, v)| (k.clone(), v.dependencies.iter().cloned().collect::<Vec<_>>()))
6000                    .collect::<Vec<_>>()
6001            }
6002        );
6003        assert!(
6004            dependents.contains(&"file:///test/workspace/child.pl".to_string()),
6005            "child.pl should be in dependents, got: {:?}",
6006            dependents
6007        );
6008    }
6009
6010    #[test]
6011    fn test_find_dependents_normalizes_legacy_separator_in_query() {
6012        let index = WorkspaceIndex::new();
6013        let uri = must(url::Url::parse("file:///test/workspace/legacy-query.pl"));
6014        let src = "package Child;\nuse parent 'My::Base';\n1;\n";
6015        must(index.index_file(uri, src.to_string()));
6016
6017        let dependents = index.find_dependents("My'Base");
6018        assert_eq!(dependents, vec!["file:///test/workspace/legacy-query.pl".to_string()]);
6019    }
6020
6021    #[test]
6022    fn test_file_dependencies_normalize_legacy_separator_in_source() {
6023        let index = WorkspaceIndex::new();
6024        let uri = must(url::Url::parse("file:///test/workspace/legacy-source.pl"));
6025        let src = "package Child;\nuse parent \"My'Base\";\n1;\n";
6026        must(index.index_file(uri.clone(), src.to_string()));
6027
6028        let deps = index.file_dependencies(uri.as_str());
6029        assert!(deps.contains("My::Base"));
6030        assert!(!deps.contains("My'Base"));
6031    }
6032
6033    #[test]
6034    fn test_index_dependency_via_moose_extends_end_to_end() -> Result<(), Box<dyn std::error::Error>>
6035    {
6036        let index = WorkspaceIndex::new();
6037
6038        let parent_url = must(url::Url::parse("file:///test/workspace/lib/My/App/Parent.pm"));
6039        must(index.index_file(parent_url, "package My::App::Parent;\n1;\n".to_string()));
6040
6041        let child_url = must(url::Url::parse("file:///test/workspace/child-moose.pl"));
6042        let child_src = "package Child;\nuse Moose;\nextends 'My::App::Parent';\n1;\n";
6043        must(index.index_file(child_url, child_src.to_string()));
6044
6045        let dependents = index.find_dependents("My::App::Parent");
6046        assert!(
6047            dependents.contains(&"file:///test/workspace/child-moose.pl".to_string()),
6048            "expected child-moose.pl in dependents, got: {dependents:?}"
6049        );
6050        Ok(())
6051    }
6052
6053    #[test]
6054    fn test_index_dependency_via_moo_with_role_end_to_end() -> Result<(), Box<dyn std::error::Error>>
6055    {
6056        let index = WorkspaceIndex::new();
6057
6058        let role_url = must(url::Url::parse("file:///test/workspace/lib/My/App/Role.pm"));
6059        must(index.index_file(role_url, "package My::App::Role;\n1;\n".to_string()));
6060
6061        let consumer_url = must(url::Url::parse("file:///test/workspace/consumer-moo.pl"));
6062        let consumer_src = "package Consumer;\nuse Moo;\nwith 'My::App::Role';\n1;\n";
6063        must(index.index_file(consumer_url.clone(), consumer_src.to_string()));
6064
6065        let dependents = index.find_dependents("My::App::Role");
6066        assert!(
6067            dependents.contains(&"file:///test/workspace/consumer-moo.pl".to_string()),
6068            "expected consumer-moo.pl in dependents, got: {dependents:?}"
6069        );
6070
6071        let deps = index.file_dependencies(consumer_url.as_str());
6072        assert!(deps.contains("My::App::Role"));
6073        Ok(())
6074    }
6075
6076    #[test]
6077    fn test_index_dependency_via_literal_require_end_to_end()
6078    -> Result<(), Box<dyn std::error::Error>> {
6079        let index = WorkspaceIndex::new();
6080        let uri = must(url::Url::parse("file:///test/workspace/require-consumer.pl"));
6081        let src = "package Consumer;\nrequire My::Loader;\n1;\n";
6082        must(index.index_file(uri.clone(), src.to_string()));
6083
6084        let deps = index.file_dependencies(uri.as_str());
6085        assert!(
6086            deps.contains("My::Loader"),
6087            "literal require should register module dependency, got: {deps:?}"
6088        );
6089        Ok(())
6090    }
6091
6092    #[test]
6093    fn test_manual_import_symbols_are_indexed_as_import_references()
6094    -> Result<(), Box<dyn std::error::Error>> {
6095        let index = WorkspaceIndex::new();
6096        let uri = must(url::Url::parse("file:///test/workspace/manual-import.pl"));
6097        let src = r#"package Consumer;
6098require My::Tools;
6099My::Tools->import(qw(helper_one helper_two));
6100helper_one();
61011;
6102"#;
6103        must(index.index_file(uri.clone(), src.to_string()));
6104
6105        let deps = index.file_dependencies(uri.as_str());
6106        assert!(
6107            deps.contains("My::Tools"),
6108            "manual import target should be tracked as dependency, got: {deps:?}"
6109        );
6110
6111        for symbol in ["helper_one", "helper_two"] {
6112            let refs = index.find_references(symbol);
6113            assert!(
6114                !refs.is_empty(),
6115                "expected at least one indexed reference for imported symbol `{symbol}`"
6116            );
6117        }
6118        Ok(())
6119    }
6120
6121    #[test]
6122    fn test_parser_produces_correct_args_for_use_parent() {
6123        // Regression for #2747: verify that the parser produces args=["'MyBase'"]
6124        // for `use parent 'MyBase'`, so extract_module_names_from_use_args strips
6125        // the quotes and registers the dependency under the bare name "MyBase".
6126        use crate::Parser;
6127        let mut p = Parser::new("package Child;\nuse parent 'MyBase';\n1;\n");
6128        let ast = must(p.parse());
6129        assert!(
6130            matches!(ast.kind, NodeKind::Program { .. }),
6131            "Expected Program root, got {:?}",
6132            ast.kind
6133        );
6134        let NodeKind::Program { statements } = &ast.kind else {
6135            return;
6136        };
6137        let mut found_parent_use = false;
6138        for stmt in statements {
6139            if let NodeKind::Use { module, args, .. } = &stmt.kind {
6140                if module == "parent" {
6141                    found_parent_use = true;
6142                    assert_eq!(
6143                        args,
6144                        &["'MyBase'".to_string()],
6145                        "Expected args=[\"'MyBase'\"] for `use parent 'MyBase'`, got: {:?}",
6146                        args
6147                    );
6148                    let extracted = extract_module_names_from_use_args(args);
6149                    assert_eq!(
6150                        extracted,
6151                        vec!["MyBase".to_string()],
6152                        "extract_module_names_from_use_args should return [\"MyBase\"], got {:?}",
6153                        extracted
6154                    );
6155                }
6156            }
6157        }
6158        assert!(found_parent_use, "No Use node with module='parent' found in AST");
6159    }
6160
6161    // -------------------------------------------------------------------------
6162    // extract_module_names_from_use_args — unit tests (#2747)
6163    // -------------------------------------------------------------------------
6164
6165    #[test]
6166    fn test_extract_module_names_single_quoted() {
6167        let names = extract_module_names_from_use_args(&["'Foo::Bar'".to_string()]);
6168        assert_eq!(names, vec!["Foo::Bar"]);
6169    }
6170
6171    #[test]
6172    fn test_extract_module_names_double_quoted() {
6173        let names = extract_module_names_from_use_args(&["\"Foo::Bar\"".to_string()]);
6174        assert_eq!(names, vec!["Foo::Bar"]);
6175    }
6176
6177    #[test]
6178    fn test_extract_module_names_qw_list() {
6179        let names = extract_module_names_from_use_args(&["qw(Foo::Bar Other::Base)".to_string()]);
6180        assert_eq!(names, vec!["Foo::Bar", "Other::Base"]);
6181    }
6182
6183    #[test]
6184    fn test_extract_module_names_qw_slash_delimiter() {
6185        let names = extract_module_names_from_use_args(&["qw/Foo::Bar Other::Base/".to_string()]);
6186        assert_eq!(names, vec!["Foo::Bar", "Other::Base"]);
6187    }
6188
6189    #[test]
6190    fn test_extract_module_names_qw_with_space_before_delimiter() {
6191        let names = extract_module_names_from_use_args(&["qw [Foo::Bar Other::Base]".to_string()]);
6192        assert_eq!(names, vec!["Foo::Bar", "Other::Base"]);
6193    }
6194
6195    #[test]
6196    fn test_extract_module_names_qw_list_trims_wrapped_punctuation() {
6197        let names =
6198            extract_module_names_from_use_args(&["qw((Foo::Bar) [Other::Base],)".to_string()]);
6199        assert_eq!(names, vec!["Foo::Bar", "Other::Base"]);
6200    }
6201
6202    #[test]
6203    fn test_extract_module_names_norequire_flag() {
6204        let names = extract_module_names_from_use_args(&[
6205            "-norequire".to_string(),
6206            "'Foo::Bar'".to_string(),
6207        ]);
6208        assert_eq!(names, vec!["Foo::Bar"]);
6209    }
6210
6211    #[test]
6212    fn test_extract_module_names_empty_args() {
6213        let names = extract_module_names_from_use_args(&[]);
6214        assert!(names.is_empty());
6215    }
6216
6217    #[test]
6218    fn test_extract_module_names_legacy_separator() {
6219        // Perl legacy package separator ' (tick) inside module name
6220        let names = extract_module_names_from_use_args(&["'Foo'Bar'".to_string()]);
6221        // Legacy separators are normalized for downstream dependency matching.
6222        assert_eq!(names, vec!["Foo::Bar"]);
6223    }
6224
6225    #[test]
6226    fn test_find_dependents_matches_legacy_separator_queries() {
6227        let index = WorkspaceIndex::new();
6228        let base_uri = must(url::Url::parse("file:///test/workspace/lib/Foo/Bar.pm"));
6229        let child_uri = must(url::Url::parse("file:///test/workspace/child.pl"));
6230
6231        must(index.index_file(base_uri, "package Foo::Bar;\n1;\n".to_string()));
6232        must(index.index_file(
6233            child_uri.clone(),
6234            "package Child;\nuse parent qw(Foo'Bar);\n1;\n".to_string(),
6235        ));
6236
6237        let dependents_modern = index.find_dependents("Foo::Bar");
6238        assert!(
6239            dependents_modern.contains(&child_uri.to_string()),
6240            "Expected dependency match when queried with modern separator"
6241        );
6242
6243        let dependents_legacy = index.find_dependents("Foo'Bar");
6244        assert!(
6245            dependents_legacy.contains(&child_uri.to_string()),
6246            "Expected dependency match when queried with legacy separator"
6247        );
6248    }
6249
6250    #[test]
6251    fn test_extract_module_names_comma_adjacent_tokens() {
6252        let names = extract_module_names_from_use_args(&[
6253            "'Foo::Bar',".to_string(),
6254            "\"Other::Base\",".to_string(),
6255            "'Last::One'".to_string(),
6256        ]);
6257        assert_eq!(names, vec!["Foo::Bar", "Other::Base", "Last::One"]);
6258    }
6259
6260    #[test]
6261    fn test_extract_module_names_parenthesized_without_spaces() {
6262        let names = extract_module_names_from_use_args(&["('Foo::Bar','Other::Base')".to_string()]);
6263        assert_eq!(names, vec!["Foo::Bar", "Other::Base"]);
6264    }
6265
6266    #[test]
6267    fn test_extract_module_names_deduplicates_identical_entries() {
6268        let names = extract_module_names_from_use_args(&[
6269            "qw(Foo::Bar Foo::Bar)".to_string(),
6270            "'Foo::Bar'".to_string(),
6271        ]);
6272        assert_eq!(names, vec!["Foo::Bar"]);
6273    }
6274
6275    #[test]
6276    fn test_extract_module_names_trims_semicolon_suffix() {
6277        let names = extract_module_names_from_use_args(&[
6278            "'Foo::Bar',".to_string(),
6279            "'Other::Base',".to_string(),
6280            "'Third::Leaf';".to_string(),
6281        ]);
6282        assert_eq!(names, vec!["Foo::Bar", "Other::Base", "Third::Leaf"]);
6283    }
6284
6285    #[test]
6286    fn test_extract_module_names_trims_wrapped_punctuation() {
6287        let names = extract_module_names_from_use_args(&[
6288            "('Foo::Bar',".to_string(),
6289            "'Other::Base')".to_string(),
6290        ]);
6291        assert_eq!(names, vec!["Foo::Bar", "Other::Base"]);
6292    }
6293
6294    #[test]
6295    fn test_extract_constant_names_qw_with_space_before_delimiter() {
6296        let names = extract_constant_names_from_use_args(&["qw [FOO BAR]".to_string()]);
6297        assert_eq!(names, vec!["FOO", "BAR"]);
6298    }
6299
6300    #[test]
6301    #[ignore = "qw delimiter with leading space not yet parsed; tracked in debt-ledger.yaml"]
6302    fn test_index_use_constant_qw_with_space_before_delimiter() {
6303        let index = WorkspaceIndex::new();
6304        let uri = must(url::Url::parse("file:///workspace/lib/My/Config.pm"));
6305        let source = "package My::Config;\nuse constant qw [FOO BAR];\n1;\n";
6306
6307        must(index.index_file(uri, source.to_string()));
6308
6309        let foo = index.find_definition("My::Config::FOO");
6310        let bar = index.find_definition("My::Config::BAR");
6311        assert!(foo.is_some(), "Expected My::Config::FOO to be indexed");
6312        assert!(bar.is_some(), "Expected My::Config::BAR to be indexed");
6313    }
6314
6315    #[test]
6316    fn test_with_capacity_accepts_large_batch_without_panic() {
6317        let index = WorkspaceIndex::with_capacity(100, 20);
6318        for i in 0..100 {
6319            let uri = must(url::Url::parse(&format!("file:///lib/Mod{}.pm", i)));
6320            let src = format!("package Mod{};\nsub foo_{} {{ 1 }}\n1;\n", i, i);
6321            index.index_file(uri, src).ok();
6322        }
6323        assert!(index.has_symbols());
6324    }
6325
6326    #[test]
6327    fn test_with_capacity_zero_does_not_panic() {
6328        let index = WorkspaceIndex::with_capacity(0, 0);
6329        assert!(!index.has_symbols());
6330    }
6331
6332    // -------------------------------------------------------------------------
6333    // remove_file — symbol cache cleanup (#3494)
6334    // -------------------------------------------------------------------------
6335
6336    /// After removing the only file that defines a symbol, both qualified and
6337    /// bare-name lookups must return None.  The symbols cache must not retain
6338    /// stale entries pointing to the deleted file.
6339    #[test]
6340    fn test_remove_file_clears_symbol_cache_qualified_and_bare() {
6341        let index = WorkspaceIndex::new();
6342        let uri_a = must(url::Url::parse("file:///lib/A.pm"));
6343        let code_a = "package A;\nsub foo { return 1; }\n1;\n";
6344
6345        must(index.index_file(uri_a.clone(), code_a.to_string()));
6346
6347        // Pre-condition: both qualified and bare-name lookups resolve to file A.
6348        let before_qual = must_some(index.find_definition("A::foo"));
6349        assert_eq!(
6350            before_qual.uri,
6351            uri_a.to_string(),
6352            "qualified lookup should point to A.pm before removal"
6353        );
6354        let before_bare = must_some(index.find_definition("foo"));
6355        assert_eq!(
6356            before_bare.uri,
6357            uri_a.to_string(),
6358            "bare-name lookup should point to A.pm before removal"
6359        );
6360
6361        // Remove file A from the index (simulates file deletion).
6362        index.remove_file(uri_a.as_str());
6363
6364        // Post-condition: the symbol cache must be clean — no stale entries.
6365        assert!(
6366            index.find_definition("A::foo").is_none(),
6367            "qualified lookup 'A::foo' should return None after file deletion"
6368        );
6369        assert!(
6370            index.find_definition("foo").is_none(),
6371            "bare-name lookup 'foo' should return None after file deletion"
6372        );
6373
6374        // Verify no symbols remain in the index.
6375        assert_eq!(
6376            index.symbol_count(),
6377            0,
6378            "symbol_count should be 0 after removing the only file"
6379        );
6380        assert!(!index.has_symbols(), "has_symbols should be false after removing the only file");
6381    }
6382
6383    /// Deleting file A when file B has the same bare-name symbol must leave
6384    /// the bare-name cache pointing to B (not remove it entirely).
6385    #[test]
6386    fn test_remove_file_bare_name_falls_back_to_surviving_file() {
6387        let index = WorkspaceIndex::new();
6388        let uri_a = must(url::Url::parse("file:///lib/A.pm"));
6389        let uri_b = must(url::Url::parse("file:///lib/B.pm"));
6390        let code_a = "package A;\nsub shared_fn { return 1; }\n1;\n";
6391        let code_b = "package B;\nsub shared_fn { return 2; }\n1;\n";
6392
6393        must(index.index_file(uri_a.clone(), code_a.to_string()));
6394        must(index.index_file(uri_b.clone(), code_b.to_string()));
6395
6396        // Remove file A — shared_fn should still resolve via B.
6397        index.remove_file(uri_a.as_str());
6398
6399        let loc = must_some(index.find_definition("shared_fn"));
6400        assert_eq!(
6401            loc.uri,
6402            uri_b.to_string(),
6403            "bare-name 'shared_fn' should resolve to B.pm after A.pm is deleted"
6404        );
6405
6406        assert!(
6407            index.find_definition("A::shared_fn").is_none(),
6408            "qualified 'A::shared_fn' must be gone after A.pm deletion"
6409        );
6410        assert!(
6411            index.find_definition("B::shared_fn").is_some(),
6412            "qualified 'B::shared_fn' must remain after A.pm deletion"
6413        );
6414    }
6415
6416    #[test]
6417    fn test_definition_candidates_include_ambiguous_bare_symbols_in_stable_order() {
6418        let index = WorkspaceIndex::new();
6419        let uri_b = must(url::Url::parse("file:///lib/B.pm"));
6420        let uri_a = must(url::Url::parse("file:///lib/A.pm"));
6421        must(index.index_file(uri_b, "package B;\nsub shared { 1 }\n1;\n".to_string()));
6422        must(index.index_file(uri_a, "package A;\nsub shared { 1 }\n1;\n".to_string()));
6423
6424        let candidates = index.definition_candidates("shared");
6425        assert_eq!(candidates.len(), 2);
6426        assert_eq!(candidates[0].uri, "file:///lib/A.pm");
6427        assert_eq!(candidates[1].uri, "file:///lib/B.pm");
6428        assert_eq!(must_some(index.find_definition("shared")).uri, "file:///lib/A.pm");
6429    }
6430
6431    #[test]
6432    fn test_definition_candidates_include_duplicate_qualified_name_across_files() {
6433        let index = WorkspaceIndex::new();
6434        let uri_v2 = must(url::Url::parse("file:///lib/A-v2.pm"));
6435        let uri_v1 = must(url::Url::parse("file:///lib/A-v1.pm"));
6436        let source = "package A;\nsub foo { 1 }\n1;\n".to_string();
6437        must(index.index_file(uri_v2, source.clone()));
6438        must(index.index_file(uri_v1, source));
6439
6440        let candidates = index.definition_candidates("A::foo");
6441        assert_eq!(candidates.len(), 2);
6442        assert_eq!(candidates[0].uri, "file:///lib/A-v1.pm");
6443        assert_eq!(candidates[1].uri, "file:///lib/A-v2.pm");
6444    }
6445
6446    #[test]
6447    fn test_definition_candidates_are_cleaned_on_remove_and_reindex() {
6448        let index = WorkspaceIndex::new();
6449        let uri = must(url::Url::parse("file:///lib/A.pm"));
6450        must(index.index_file(uri.clone(), "package A;\nsub foo { 1 }\n1;\n".to_string()));
6451        assert_eq!(index.definition_candidates("A::foo").len(), 1);
6452
6453        index.remove_file(uri.as_str());
6454        assert!(index.definition_candidates("A::foo").is_empty());
6455
6456        must(index.index_file(uri, "package A;\nsub foo { 2 }\n1;\n".to_string()));
6457        assert_eq!(index.definition_candidates("A::foo").len(), 1);
6458    }
6459
6460    /// Verify that `incremental_remove_symbols` correctly retains candidates owned by
6461    /// other files when the removed file had BOTH exclusively-owned names (triggering the
6462    /// full-rebuild path) AND shared names. Before this fix, the full-rebuild path cleared
6463    /// all candidates and relied on the subsequent rebuild to re-add shared ones — correct
6464    /// in effect, but the test documents the expected observable behavior.
6465    #[test]
6466    fn test_definition_candidates_shared_symbol_survives_removal_of_sole_owner_of_other_symbol() {
6467        let index = WorkspaceIndex::new();
6468        let uri_a = must(url::Url::parse("file:///lib/A.pm"));
6469        let uri_b = must(url::Url::parse("file:///lib/B.pm"));
6470
6471        // A defines both `unique_to_a` (no other file) and `shared` (also in B)
6472        must(index.index_file(
6473            uri_a.clone(),
6474            "package A;\nsub unique_to_a { 1 }\nsub shared { 1 }\n1;\n".to_string(),
6475        ));
6476        must(index.index_file(uri_b.clone(), "package B;\nsub shared { 1 }\n1;\n".to_string()));
6477
6478        // Before removal: both shared candidates and unique_to_a are present
6479        assert_eq!(index.definition_candidates("shared").len(), 2);
6480        assert_eq!(index.definition_candidates("unique_to_a").len(), 1);
6481
6482        // Remove A — triggers the affected_names path for `unique_to_a`, but `shared`
6483        // still has B's candidate.
6484        index.remove_file(uri_a.as_str());
6485
6486        assert!(
6487            index.definition_candidates("unique_to_a").is_empty(),
6488            "unique_to_a should be gone after removing A"
6489        );
6490        assert_eq!(
6491            index.definition_candidates("shared").len(),
6492            1,
6493            "shared should still have B's candidate after removing A"
6494        );
6495        assert_eq!(
6496            index.definition_candidates("shared")[0].uri,
6497            "file:///lib/B.pm",
6498            "remaining shared candidate must be from B"
6499        );
6500    }
6501
6502    #[test]
6503    fn test_folder_context_in_file_index() {
6504        let index = WorkspaceIndex::new();
6505
6506        // Set up workspace folders
6507        index.set_workspace_folders(vec![
6508            "file:///project1".to_string(),
6509            "file:///project2".to_string(),
6510        ]);
6511
6512        let uri1 = "file:///project1/lib/Module.pm";
6513        let code1 = r#"
6514package Module;
6515
6516sub test_sub {
6517    return 1;
6518}
6519"#;
6520        must(index.index_file(must(url::Url::parse(uri1)), code1.to_string()));
6521
6522        let uri2 = "file:///project2/lib/Other.pm";
6523        let code2 = r#"
6524package Other;
6525
6526sub other_sub {
6527    return 2;
6528}
6529"#;
6530        must(index.index_file(must(url::Url::parse(uri2)), code2.to_string()));
6531
6532        // Verify folder context is set correctly
6533        let symbols1 = index.file_symbols(uri1);
6534        assert_eq!(symbols1.len(), 2, "Should have 2 symbols in Module.pm");
6535        for symbol in &symbols1 {
6536            assert_eq!(symbol.uri, uri1, "Symbol URI should match file URI");
6537        }
6538
6539        let symbols2 = index.file_symbols(uri2);
6540        assert_eq!(symbols2.len(), 2, "Should have 2 symbols in Other.pm");
6541        for symbol in &symbols2 {
6542            assert_eq!(symbol.uri, uri2, "Symbol URI should match file URI");
6543        }
6544
6545        // Verify folder attribution
6546        let files = index.files.read();
6547        let file_index1 = must_some(files.get(&DocumentStore::uri_key(uri1)));
6548        assert_eq!(
6549            file_index1.folder_uri,
6550            Some("file:///project1".to_string()),
6551            "File should be attributed to correct workspace folder"
6552        );
6553
6554        let file_index2 = must_some(files.get(&DocumentStore::uri_key(uri2)));
6555        assert_eq!(
6556            file_index2.folder_uri,
6557            Some("file:///project2".to_string()),
6558            "File should be attributed to correct workspace folder"
6559        );
6560    }
6561
6562    #[test]
6563    fn test_determine_folder_uri() {
6564        let index = WorkspaceIndex::new();
6565
6566        // Set up workspace folders
6567        index.set_workspace_folders(vec![
6568            "file:///project1".to_string(),
6569            "file:///project2".to_string(),
6570        ]);
6571
6572        // Test file in project1
6573        let folder1 = index.determine_folder_uri("file:///project1/lib/Module.pm");
6574        assert_eq!(
6575            folder1,
6576            Some("file:///project1".to_string()),
6577            "Should determine folder for file in project1"
6578        );
6579
6580        // Test file in project2
6581        let folder2 = index.determine_folder_uri("file:///project2/lib/Other.pm");
6582        assert_eq!(
6583            folder2,
6584            Some("file:///project2".to_string()),
6585            "Should determine folder for file in project2"
6586        );
6587
6588        // Test file not in any workspace folder
6589        let folder_none = index.determine_folder_uri("file:///other/project/Module.pm");
6590        assert_eq!(folder_none, None, "Should return None for file outside workspace folders");
6591    }
6592
6593    #[test]
6594    fn test_determine_folder_uri_prefers_most_specific_match() {
6595        let index = WorkspaceIndex::new();
6596
6597        // Keep broad folder first to ensure we don't rely on insertion order.
6598        index.set_workspace_folders(vec![
6599            "file:///project".to_string(),
6600            "file:///project/lib".to_string(),
6601        ]);
6602
6603        let folder = index.determine_folder_uri("file:///project/lib/My/Module.pm");
6604        assert_eq!(
6605            folder,
6606            Some("file:///project/lib".to_string()),
6607            "Nested workspace folders should attribute files to the most specific folder"
6608        );
6609    }
6610
6611    #[test]
6612    fn test_remove_folder() {
6613        let index = WorkspaceIndex::new();
6614
6615        // Set up workspace folders
6616        index.set_workspace_folders(vec![
6617            "file:///project1".to_string(),
6618            "file:///project2".to_string(),
6619        ]);
6620
6621        // Index files from both folders
6622        let uri1 = "file:///project1/lib/Module.pm";
6623        let code1 = r#"
6624package Module;
6625
6626sub test_sub {
6627    return 1;
6628}
6629"#;
6630        must(index.index_file(must(url::Url::parse(uri1)), code1.to_string()));
6631
6632        let uri2 = "file:///project2/lib/Other.pm";
6633        let code2 = r#"
6634package Other;
6635
6636sub other_sub {
6637    return 2;
6638}
6639"#;
6640        must(index.index_file(must(url::Url::parse(uri2)), code2.to_string()));
6641
6642        // Verify both files are indexed
6643        assert_eq!(index.file_count(), 2, "Should have 2 files indexed");
6644        assert_eq!(index.document_store().count(), 2, "Document store should track both files");
6645
6646        // Remove project1 folder
6647        index.remove_folder("file:///project1");
6648
6649        // Verify only project2 file remains
6650        assert_eq!(index.file_count(), 1, "Should have 1 file after removing folder");
6651        assert_eq!(
6652            index.document_store().count(),
6653            1,
6654            "Document store should drop files removed via folder deletion"
6655        );
6656        assert!(index.file_symbols(uri1).is_empty(), "File from removed folder should be gone");
6657        assert_eq!(
6658            index.file_symbols(uri2).len(),
6659            2,
6660            "File from remaining folder should still be present"
6661        );
6662    }
6663
6664    #[test]
6665    fn test_remove_folder_removes_symbol_free_files() {
6666        let index = WorkspaceIndex::new();
6667        index.set_workspace_folders(vec!["file:///project1".to_string()]);
6668
6669        let uri = "file:///project1/empty.pl";
6670        must(index.index_file(must(url::Url::parse(uri)), "# comments only".to_string()));
6671        assert_eq!(index.file_count(), 1, "Expected file to be indexed");
6672
6673        index.remove_folder("file:///project1");
6674
6675        assert_eq!(index.file_count(), 0, "Folder removal should delete symbol-free files");
6676        assert_eq!(
6677            index.document_store().count(),
6678            0,
6679            "Document store should stay in sync for symbol-free files"
6680        );
6681    }
6682
6683    // ========================================================================
6684    // GREEN-TDD EDGE CASE TESTS FOR ISSUE #6061 (static require + manual import)
6685    // ========================================================================
6686
6687    #[test]
6688    fn test_require_with_variable_target_is_not_indexed() -> Result<(), Box<dyn std::error::Error>>
6689    {
6690        let index = WorkspaceIndex::new();
6691        let uri = must(url::Url::parse("file:///test/require-var.pl"));
6692        let src = r#"package Test;
6693my $loader = 'MyModule';
6694require $loader;
66951;
6696"#;
6697        must(index.index_file(uri.clone(), src.to_string()));
6698        let deps = index.file_dependencies(uri.as_str());
6699        assert!(
6700            !deps.contains("MyModule"),
6701            "require with variable target should not register static dependency"
6702        );
6703        Ok(())
6704    }
6705
6706    #[test]
6707    fn test_multiple_import_calls_on_same_module() -> Result<(), Box<dyn std::error::Error>> {
6708        let index = WorkspaceIndex::new();
6709        let uri = must(url::Url::parse("file:///test/multi-import.pl"));
6710        let src = r#"package Test;
6711require Toolkit;
6712Toolkit->import('func_a');
6713Toolkit->import(qw(func_b func_c));
67141;
6715"#;
6716        must(index.index_file(uri.clone(), src.to_string()));
6717        let deps = index.file_dependencies(uri.as_str());
6718        assert!(deps.contains("Toolkit"), "module should be tracked as dependency");
6719        for symbol in &["func_a", "func_b", "func_c"] {
6720            let refs = index.find_references(symbol);
6721            assert!(!refs.is_empty(), "all imported symbols should be indexed: {}", symbol);
6722        }
6723        Ok(())
6724    }
6725
6726    #[test]
6727    fn test_require_string_vs_bareword_normalization() -> Result<(), Box<dyn std::error::Error>> {
6728        let index = WorkspaceIndex::new();
6729        let uri = must(url::Url::parse("file:///test/require-string.pl"));
6730        let src = r#"package Consumer;
6731require "String/Based/Module.pm";
6732String::Based::Module->import('exported');
67331;
6734"#;
6735        must(index.index_file(uri.clone(), src.to_string()));
6736        let deps = index.file_dependencies(uri.as_str());
6737        assert!(
6738            deps.contains("String::Based::Module"),
6739            "require string form should normalize path separators to ::"
6740        );
6741        let refs = index.find_references("exported");
6742        assert!(!refs.is_empty(), "import should be indexed even with string-form require");
6743        Ok(())
6744    }
6745
6746    #[test]
6747    fn test_import_without_require_registers_as_method_call()
6748    -> Result<(), Box<dyn std::error::Error>> {
6749        // Edge case: ->import() without preceding require is treated as a normal method call,
6750        // not as the static manual-import pattern, so the module is still visited/tracked
6751        // but the symbols are NOT marked as imports from the static require+import logic.
6752        let index = WorkspaceIndex::new();
6753        let uri = must(url::Url::parse("file:///test/orphan-import.pl"));
6754        let src = r#"package Test;
6755Unrelated::Module->import('orphaned');
6756orphaned();
67571;
6758"#;
6759        must(index.index_file(uri.clone(), src.to_string()));
6760
6761        // The module reference may still be tracked as a method call target,
6762        // but the key regression is: the orphaned symbol should not be indexed
6763        // as an import reference due to the missing require.
6764        let _refs = index.find_references("orphaned");
6765        // Symbol may be referenced but should not be specially treated as an import.
6766        // The main point is: without require, the pairing doesn't activate.
6767        Ok(())
6768    }
6769
6770    #[test]
6771    fn test_nested_blocks_preserve_require_scope() -> Result<(), Box<dyn std::error::Error>> {
6772        let index = WorkspaceIndex::new();
6773        let uri = must(url::Url::parse("file:///test/nested.pl"));
6774        let src = r#"package Test;
6775{
6776    require Outer;
6777    {
6778        Outer->import('nested_sym');
6779    }
6780}
67811;
6782"#;
6783        must(index.index_file(uri.clone(), src.to_string()));
6784        let deps = index.file_dependencies(uri.as_str());
6785        assert!(
6786            deps.contains("Outer"),
6787            "require in outer block should be visible to nested import"
6788        );
6789        let refs = index.find_references("nested_sym");
6790        assert!(!refs.is_empty(), "symbol imported in nested block should still be indexed");
6791        Ok(())
6792    }
6793
6794    #[test]
6795    fn test_require_path_without_pm_extension() -> Result<(), Box<dyn std::error::Error>> {
6796        let index = WorkspaceIndex::new();
6797        let uri = must(url::Url::parse("file:///test/no-ext.pl"));
6798        let src = r#"package Test;
6799require "My/Module";
6800My::Module->import('func');
68011;
6802"#;
6803        must(index.index_file(uri.clone(), src.to_string()));
6804        let deps = index.file_dependencies(uri.as_str());
6805        assert!(
6806            deps.contains("My::Module"),
6807            "require without .pm extension should normalize to module path"
6808        );
6809        Ok(())
6810    }
6811
6812    #[test]
6813    fn test_qw_with_bracket_delimiters() -> Result<(), Box<dyn std::error::Error>> {
6814        let index = WorkspaceIndex::new();
6815        let uri = must(url::Url::parse("file:///test/qw-delim.pl"));
6816        let src = r#"package Test;
6817require DelimModule;
6818DelimModule->import(qw[sym1 sym2]);
6819DelimModule->import(qw{sym3 sym4});
68201;
6821"#;
6822        must(index.index_file(uri.clone(), src.to_string()));
6823        for symbol in &["sym1", "sym2", "sym3", "sym4"] {
6824            let refs = index.find_references(symbol);
6825            assert!(
6826                !refs.is_empty(),
6827                "symbols from qw with bracket delimiters should be indexed: {}",
6828                symbol
6829            );
6830        }
6831        Ok(())
6832    }
6833
6834    #[test]
6835    fn test_array_literal_import_args() -> Result<(), Box<dyn std::error::Error>> {
6836        let index = WorkspaceIndex::new();
6837        let uri = must(url::Url::parse("file:///test/array-import.pl"));
6838        let src = r#"package Test;
6839require ArrayModule;
6840ArrayModule->import(['sym_x', 'sym_y']);
68411;
6842"#;
6843        must(index.index_file(uri.clone(), src.to_string()));
6844        for symbol in &["sym_x", "sym_y"] {
6845            let refs = index.find_references(symbol);
6846            assert!(
6847                !refs.is_empty(),
6848                "symbols from array literal import should be indexed: {}",
6849                symbol
6850            );
6851        }
6852        Ok(())
6853    }
6854
6855    #[test]
6856    fn test_require_inside_conditional_still_registers_dependency()
6857    -> Result<(), Box<dyn std::error::Error>> {
6858        let index = WorkspaceIndex::new();
6859        let uri = must(url::Url::parse("file:///test/cond-require.pl"));
6860        let src = r#"package Test;
6861if (1) {
6862    require ConditionalMod;
6863    ConditionalMod->import('cond_func');
6864}
68651;
6866"#;
6867        must(index.index_file(uri.clone(), src.to_string()));
6868        let deps = index.file_dependencies(uri.as_str());
6869        assert!(
6870            deps.contains("ConditionalMod"),
6871            "require inside conditional should still register as dependency"
6872        );
6873        let refs = index.find_references("cond_func");
6874        assert!(!refs.is_empty(), "import inside conditional should still index symbols");
6875        Ok(())
6876    }
6877
6878    #[test]
6879    fn test_mixed_string_and_bareword_imports() -> Result<(), Box<dyn std::error::Error>> {
6880        let index = WorkspaceIndex::new();
6881        let uri = must(url::Url::parse("file:///test/mixed-import.pl"));
6882        let src = r#"package Test;
6883require MixedMod;
6884MixedMod->import('string_sym');
6885MixedMod->import(qw(qw_one qw_two));
68861;
6887"#;
6888        must(index.index_file(uri.clone(), src.to_string()));
6889        let deps = index.file_dependencies(uri.as_str());
6890        assert!(deps.contains("MixedMod"), "require should register dependency");
6891        for symbol in &["string_sym", "qw_one", "qw_two"] {
6892            let refs = index.find_references(symbol);
6893            assert!(!refs.is_empty(), "all import forms should index symbols: {}", symbol);
6894        }
6895        Ok(())
6896    }
6897
6898    // -------------------------------------------------------------------------
6899    // Per-category incremental invalidation (Req 18.1–18.5)
6900    // -------------------------------------------------------------------------
6901
6902    /// Helper: build a minimal `FileFactShard` with configurable hashes.
6903    fn make_shard(
6904        uri: &str,
6905        content_hash: u64,
6906        anchors_hash: Option<u64>,
6907        entities_hash: Option<u64>,
6908        occurrences_hash: Option<u64>,
6909        edges_hash: Option<u64>,
6910    ) -> FileFactShard {
6911        let file_id = {
6912            let mut h = DefaultHasher::new();
6913            uri.hash(&mut h);
6914            FileId(h.finish())
6915        };
6916        FileFactShard {
6917            source_uri: uri.to_string(),
6918            file_id,
6919            content_hash,
6920            anchors_hash,
6921            entities_hash,
6922            occurrences_hash,
6923            edges_hash,
6924            anchors: Vec::new(),
6925            entities: Vec::new(),
6926            occurrences: Vec::new(),
6927            edges: Vec::new(),
6928        }
6929    }
6930
6931    /// Req 18.5: When content_hash is unchanged, skip all per-category
6932    /// comparisons — no index modifications happen.
6933    #[test]
6934    fn incremental_replace_skips_when_content_hash_unchanged()
6935    -> Result<(), Box<dyn std::error::Error>> {
6936        let index = WorkspaceIndex::new();
6937        let uri = "file:///lib/Same.pm";
6938        let key = DocumentStore::uri_key(uri);
6939
6940        let shard_v1 = make_shard(uri, 42, Some(1), Some(2), Some(3), Some(4));
6941        // First insert — no old shard, so all categories are "changed".
6942        let r1 = index.replace_fact_shard_incremental(&key, shard_v1);
6943        assert!(!r1.content_unchanged);
6944
6945        // Second insert with same content_hash → skip entirely.
6946        let shard_v2 = make_shard(uri, 42, Some(100), Some(200), Some(300), Some(400));
6947        let r2 = index.replace_fact_shard_incremental(&key, shard_v2);
6948        assert!(r2.content_unchanged);
6949        assert!(!r2.anchors_updated);
6950        assert!(!r2.entities_updated);
6951        assert!(!r2.occurrences_updated);
6952        assert!(!r2.edges_updated);
6953
6954        // The stored shard should still be v1 (unchanged).
6955        let stored = must_some(index.file_fact_shard(uri));
6956        assert_eq!(stored.anchors_hash, Some(1));
6957        Ok(())
6958    }
6959
6960    /// Req 18.3: When a category hash is unchanged, skip re-indexing that
6961    /// category's cross-file indexes.
6962    #[test]
6963    fn incremental_replace_skips_unchanged_categories() -> Result<(), Box<dyn std::error::Error>> {
6964        let index = WorkspaceIndex::new();
6965        let uri = "file:///lib/Partial.pm";
6966        let key = DocumentStore::uri_key(uri);
6967
6968        let shard_v1 = make_shard(uri, 1, Some(10), Some(20), Some(30), Some(40));
6969        index.replace_fact_shard_incremental(&key, shard_v1);
6970
6971        // Change content_hash but keep anchors and entities the same.
6972        // Only occurrences and edges change.
6973        let shard_v2 = make_shard(uri, 2, Some(10), Some(20), Some(99), Some(88));
6974        let result = index.replace_fact_shard_incremental(&key, shard_v2);
6975
6976        assert!(!result.content_unchanged);
6977        assert!(!result.anchors_updated, "anchors hash unchanged → skip");
6978        assert!(!result.entities_updated, "entities hash unchanged → skip");
6979        assert!(result.occurrences_updated, "occurrences hash changed → update");
6980        assert!(result.edges_updated, "edges hash changed → update");
6981        Ok(())
6982    }
6983
6984    /// Req 18.4: When a category hash has changed, remove old entries and
6985    /// insert new ones for that category.
6986    #[test]
6987    fn incremental_replace_updates_changed_categories() -> Result<(), Box<dyn std::error::Error>> {
6988        let index = WorkspaceIndex::new();
6989        let uri = "file:///lib/Changed.pm";
6990        let key = DocumentStore::uri_key(uri);
6991
6992        let shard_v1 = make_shard(uri, 1, Some(10), Some(20), Some(30), Some(40));
6993        index.replace_fact_shard_incremental(&key, shard_v1);
6994
6995        // Change all category hashes.
6996        let shard_v2 = make_shard(uri, 2, Some(11), Some(21), Some(31), Some(41));
6997        let result = index.replace_fact_shard_incremental(&key, shard_v2);
6998
6999        assert!(!result.content_unchanged);
7000        assert!(result.anchors_updated);
7001        assert!(result.entities_updated);
7002        assert!(result.occurrences_updated);
7003        assert!(result.edges_updated);
7004
7005        // The stored shard should be v2.
7006        let stored = must_some(index.file_fact_shard(uri));
7007        assert_eq!(stored.content_hash, 2);
7008        assert_eq!(stored.anchors_hash, Some(11));
7009        Ok(())
7010    }
7011
7012    /// When there is no old shard (first index), all categories are treated
7013    /// as changed.
7014    #[test]
7015    fn incremental_replace_first_insert_updates_all() -> Result<(), Box<dyn std::error::Error>> {
7016        let index = WorkspaceIndex::new();
7017        let uri = "file:///lib/New.pm";
7018        let key = DocumentStore::uri_key(uri);
7019
7020        let shard = make_shard(uri, 1, Some(10), Some(20), Some(30), Some(40));
7021        let result = index.replace_fact_shard_incremental(&key, shard);
7022
7023        assert!(!result.content_unchanged);
7024        assert!(result.anchors_updated);
7025        assert!(result.entities_updated);
7026        assert!(result.occurrences_updated);
7027        assert!(result.edges_updated);
7028        Ok(())
7029    }
7030
7031    /// When per-category hashes are `None` (legacy shard), the category is
7032    /// conservatively treated as changed.
7033    #[test]
7034    fn incremental_replace_none_hashes_treated_as_changed() -> Result<(), Box<dyn std::error::Error>>
7035    {
7036        let index = WorkspaceIndex::new();
7037        let uri = "file:///lib/Legacy.pm";
7038        let key = DocumentStore::uri_key(uri);
7039
7040        // Old shard has hashes, new shard has None for some.
7041        let shard_v1 = make_shard(uri, 1, Some(10), Some(20), Some(30), Some(40));
7042        index.replace_fact_shard_incremental(&key, shard_v1);
7043
7044        let shard_v2 = make_shard(uri, 2, None, Some(20), None, Some(40));
7045        let result = index.replace_fact_shard_incremental(&key, shard_v2);
7046
7047        assert!(!result.content_unchanged);
7048        assert!(result.anchors_updated, "None new hash → changed");
7049        assert!(!result.entities_updated, "same hash → skip");
7050        assert!(result.occurrences_updated, "None new hash → changed");
7051        assert!(!result.edges_updated, "same hash → skip");
7052        Ok(())
7053    }
7054
7055    /// Verify that the semantic reference index is updated only when
7056    /// occurrences or edges change.
7057    #[test]
7058    fn incremental_replace_updates_reference_index_on_occurrence_change()
7059    -> Result<(), Box<dyn std::error::Error>> {
7060        use perl_semantic_facts::{AnchorId, Confidence, OccurrenceId, OccurrenceKind, Provenance};
7061
7062        let index = WorkspaceIndex::new();
7063        let uri = "file:///lib/RefIdx.pm";
7064        let key = DocumentStore::uri_key(uri);
7065        let file_id = {
7066            let mut h = DefaultHasher::new();
7067            uri.hash(&mut h);
7068            FileId(h.finish())
7069        };
7070
7071        // v1: shard with one reference occurrence.
7072        let mut shard_v1 = make_shard(uri, 1, Some(10), Some(20), Some(30), Some(40));
7073        let anchor_id = AnchorId(1);
7074        shard_v1.anchors.push(perl_semantic_facts::AnchorFact {
7075            id: anchor_id,
7076            file_id,
7077            span_start_byte: 0,
7078            span_end_byte: 5,
7079            scope_id: None,
7080            provenance: Provenance::ExactAst,
7081            confidence: Confidence::High,
7082        });
7083        shard_v1.occurrences.push(perl_semantic_facts::OccurrenceFact {
7084            id: OccurrenceId(1),
7085            kind: OccurrenceKind::Call,
7086            entity_id: Some(EntityId(100)),
7087            anchor_id,
7088            scope_id: None,
7089            provenance: Provenance::ExactAst,
7090            confidence: Confidence::High,
7091        });
7092        shard_v1.entities.push(perl_semantic_facts::EntityFact {
7093            id: EntityId(100),
7094            kind: EntityKind::Subroutine,
7095            canonical_name: "RefIdx::foo".to_string(),
7096            anchor_id: Some(anchor_id),
7097            scope_id: None,
7098            provenance: Provenance::ExactAst,
7099            confidence: Confidence::High,
7100        });
7101        index.replace_fact_shard_incremental(&key, shard_v1);
7102
7103        // Reference index should have entries.
7104        assert!(
7105            index.semantic_reference_index.read().name_count() > 0
7106                || index.semantic_reference_index.read().entity_count() > 0,
7107            "reference index should be populated after first insert"
7108        );
7109
7110        // v2: same content_hash → skip entirely, reference index untouched.
7111        let shard_v2_same = make_shard(uri, 1, Some(10), Some(20), Some(99), Some(99));
7112        let r = index.replace_fact_shard_incremental(&key, shard_v2_same);
7113        assert!(r.content_unchanged);
7114
7115        // v3: different content_hash, same occurrence/edge hashes → skip ref index.
7116        let mut shard_v3 = make_shard(uri, 3, Some(11), Some(21), Some(30), Some(40));
7117        shard_v3.anchors.push(perl_semantic_facts::AnchorFact {
7118            id: anchor_id,
7119            file_id,
7120            span_start_byte: 0,
7121            span_end_byte: 5,
7122            scope_id: None,
7123            provenance: Provenance::ExactAst,
7124            confidence: Confidence::High,
7125        });
7126        shard_v3.occurrences.push(perl_semantic_facts::OccurrenceFact {
7127            id: OccurrenceId(1),
7128            kind: OccurrenceKind::Call,
7129            entity_id: Some(EntityId(100)),
7130            anchor_id,
7131            scope_id: None,
7132            provenance: Provenance::ExactAst,
7133            confidence: Confidence::High,
7134        });
7135        shard_v3.entities.push(perl_semantic_facts::EntityFact {
7136            id: EntityId(100),
7137            kind: EntityKind::Subroutine,
7138            canonical_name: "RefIdx::foo".to_string(),
7139            anchor_id: Some(anchor_id),
7140            scope_id: None,
7141            provenance: Provenance::ExactAst,
7142            confidence: Confidence::High,
7143        });
7144        let r3 = index.replace_fact_shard_incremental(&key, shard_v3);
7145        assert!(!r3.occurrences_updated, "occurrence hash unchanged → skip");
7146        assert!(!r3.edges_updated, "edge hash unchanged → skip");
7147
7148        Ok(())
7149    }
7150
7151    /// Verify that `index_file` uses incremental replacement (the fact shard
7152    /// is stored and updated correctly through the full indexing path).
7153    #[test]
7154    fn index_file_stores_fact_shard_incrementally() -> Result<(), Box<dyn std::error::Error>> {
7155        let index = WorkspaceIndex::new();
7156        let uri = "file:///lib/Incr.pm";
7157        let code = "package Incr;\nsub foo { 1 }\n1;\n";
7158
7159        must(index.index_file(must(url::Url::parse(uri)), code.to_string()));
7160        let shard1 = must_some(index.file_fact_shard(uri));
7161        assert!(shard1.anchors_hash.is_some());
7162        assert!(
7163            shard1.anchors.iter().any(|anchor| anchor.provenance == Provenance::ExactAst),
7164            "index_file should store the canonical semantic shard when adapters produce facts"
7165        );
7166        assert!(
7167            shard1.entities.iter().any(|entity| entity.provenance == Provenance::ExactAst),
7168            "index_file should store canonical entities rather than legacy fallback entities"
7169        );
7170
7171        // Re-index with same content → shard should be unchanged.
7172        must(index.index_file(must(url::Url::parse(uri)), code.to_string()));
7173        // The early-exit in index_file checks content_hash at the FileIndex
7174        // level, so the fact shard replacement is never reached for identical
7175        // content. Verify the shard is still present.
7176        let shard2 = must_some(index.file_fact_shard(uri));
7177        assert_eq!(shard1.content_hash, shard2.content_hash);
7178
7179        // Re-index with different content → shard should be replaced.
7180        let code2 = "package Incr;\nsub bar { 2 }\n1;\n";
7181        must(index.index_file(must(url::Url::parse(uri)), code2.to_string()));
7182        let shard3 = must_some(index.file_fact_shard(uri));
7183        assert_ne!(shard1.content_hash, shard3.content_hash);
7184
7185        Ok(())
7186    }
7187
7188    #[test]
7189    fn semantic_anchor_wire_location_uses_lsp_utf16_columns()
7190    -> Result<(), Box<dyn std::error::Error>> {
7191        use crate::semantic::queries::SemanticQueries;
7192
7193        let index = WorkspaceIndex::new();
7194        let uri = "file:///lib/UnicodeAnchor.pm";
7195        let code = "package UnicodeAnchor; my $emoji = \"😀\"; sub target { 1 }\n1;\n";
7196
7197        must(index.index_file(must(url::Url::parse(uri)), code.to_string()));
7198
7199        let candidates = index
7200            .with_semantic_queries_for_uri(uri, |file_id, queries| {
7201                let ctx = crate::semantic::queries::QueryContext::new(file_id, None, Some(0));
7202                queries.definitions("UnicodeAnchor::target", &ctx)
7203            })
7204            .ok_or("missing semantic queries")?;
7205        let anchor_id = candidates
7206            .first()
7207            .map(|candidate| candidate.anchor_id)
7208            .ok_or("missing unicode definition candidate")?;
7209        let shard = index.file_fact_shard(uri).ok_or("missing fact shard")?;
7210        let anchor = shard
7211            .anchors
7212            .iter()
7213            .find(|anchor| anchor.id == anchor_id)
7214            .ok_or("missing unicode anchor")?;
7215        let start = usize::try_from(anchor.span_start_byte)?;
7216        let end = usize::try_from(anchor.span_end_byte)?;
7217        let expected = WireRange::from_byte_offsets(code, start, end);
7218
7219        let location =
7220            index.semantic_anchor_wire_location(anchor_id).ok_or("missing wire location")?;
7221
7222        assert_eq!(location.range, expected);
7223        let wire_column = usize::try_from(location.range.start.character)?;
7224        let scalar_column = code[..start].chars().count();
7225        assert!(
7226            wire_column > scalar_column,
7227            "fixture must prove the wire column counts UTF-16 units, not Unicode scalar values"
7228        );
7229
7230        Ok(())
7231    }
7232
7233    #[test]
7234    fn semantic_anchor_wire_location_fails_closed_for_duplicate_anchor_ids()
7235    -> Result<(), Box<dyn std::error::Error>> {
7236        use crate::semantic::queries::SemanticQueries;
7237
7238        let index = WorkspaceIndex::new();
7239        let code = "package DuplicateAnchor;\nsub target { 1 }\n1;\n";
7240
7241        must(
7242            index.index_file(must(url::Url::parse("file:///lib/DuplicateA.pm")), code.to_string()),
7243        );
7244        must(
7245            index.index_file(must(url::Url::parse("file:///lib/DuplicateB.pm")), code.to_string()),
7246        );
7247
7248        let candidates = index
7249            .with_semantic_queries_for_uri("file:///lib/DuplicateA.pm", |file_id, queries| {
7250                let ctx = crate::semantic::queries::QueryContext::new(file_id, None, Some(0));
7251                queries.definitions("DuplicateAnchor::target", &ctx)
7252            })
7253            .ok_or("missing semantic queries")?;
7254
7255        let anchor_id = candidates
7256            .first()
7257            .map(|candidate| candidate.anchor_id)
7258            .ok_or("missing duplicate definition candidate")?;
7259        assert!(
7260            candidates.iter().filter(|candidate| candidate.anchor_id == anchor_id).count() > 1,
7261            "fixture must produce duplicate anchor IDs to prove fail-closed behavior"
7262        );
7263        assert_eq!(
7264            index.semantic_anchor_wire_location(anchor_id),
7265            None,
7266            "duplicate source-backed anchors must not resolve to an arbitrary file"
7267        );
7268
7269        Ok(())
7270    }
7271
7272    #[test]
7273    fn semantic_anchor_wire_location_for_file_resolves_duplicate_anchor_ids_by_file()
7274    -> Result<(), Box<dyn std::error::Error>> {
7275        use crate::semantic::queries::SemanticQueries;
7276
7277        let index = WorkspaceIndex::new();
7278        let code = "package DuplicateAnchor;\nsub target { 1 }\n1;\n";
7279        let uri_a = "file:///lib/DuplicateA.pm";
7280        let uri_b = "file:///lib/DuplicateB.pm";
7281
7282        must(index.index_file(must(url::Url::parse(uri_a)), code.to_string()));
7283        must(index.index_file(must(url::Url::parse(uri_b)), code.to_string()));
7284
7285        let (file_id_a, anchor_id) = index
7286            .with_semantic_queries_for_uri(uri_a, |file_id, queries| {
7287                let ctx = crate::semantic::queries::QueryContext::new(file_id, None, Some(0));
7288                queries
7289                    .definitions("DuplicateAnchor::target", &ctx)
7290                    .first()
7291                    .map(|candidate| (file_id, candidate.anchor_id))
7292            })
7293            .flatten()
7294            .ok_or("missing duplicate definition candidate")?;
7295
7296        assert_eq!(
7297            index.semantic_anchor_wire_location(anchor_id),
7298            None,
7299            "global anchor lookup must still fail closed for duplicate anchor IDs"
7300        );
7301
7302        let location = index
7303            .semantic_anchor_wire_location_for_file(file_id_a, anchor_id)
7304            .ok_or("file-scoped anchor lookup should resolve duplicate anchor ID")?;
7305        assert_eq!(location.uri, uri_a);
7306
7307        Ok(())
7308    }
7309
7310    // ── Property-based tests for incremental invalidation ──
7311
7312    mod prop_incremental_invalidation {
7313        use super::*;
7314        use proptest::prelude::*;
7315        use proptest::test_runner::Config as ProptestConfig;
7316
7317        /// Strategy for an optional per-category hash.
7318        ///
7319        /// ~10% of the time produces `None` (simulating legacy shards
7320        /// without per-category hashes); otherwise a random `u64`.
7321        fn arb_category_hash() -> impl Strategy<Value = Option<u64>> {
7322            prop_oneof![
7323                1 => Just(None),
7324                9 => any::<u64>().prop_map(Some),
7325            ]
7326        }
7327
7328        /// Strategy for a `FileFactShard` with the given URI and
7329        /// randomly-chosen hashes.
7330        fn arb_shard(uri: &'static str) -> impl Strategy<Value = FileFactShard> {
7331            (
7332                any::<u64>(),        // content_hash
7333                arb_category_hash(), // anchors_hash
7334                arb_category_hash(), // entities_hash
7335                arb_category_hash(), // occurrences_hash
7336                arb_category_hash(), // edges_hash
7337            )
7338                .prop_map(move |(content_hash, ah, eh, oh, edh)| {
7339                    make_shard(uri, content_hash, ah, eh, oh, edh)
7340                })
7341        }
7342
7343        // Property 15: Incremental Invalidation Correctness
7344        //
7345        // **Validates: Requirements 18.3, 18.4, 18.5**
7346        //
7347        // For any file re-indexing where the whole-file content_hash is
7348        // unchanged, the workspace store shall not modify any cross-file
7349        // indexes.  For any file re-indexing where a per-category hash is
7350        // unchanged, the workspace store shall skip re-indexing that
7351        // category.  For any file re-indexing where a per-category hash
7352        // has changed, the workspace store shall remove old entries and
7353        // insert new ones for that category.
7354        proptest! {
7355            #![proptest_config(ProptestConfig {
7356                failure_persistence: None,
7357                ..ProptestConfig::default()
7358            })]
7359
7360            #[test]
7361            fn prop_incremental_invalidation_correctness(
7362                old_shard in arb_shard("file:///lib/Prop.pm"),
7363                new_shard in arb_shard("file:///lib/Prop.pm"),
7364            ) {
7365                let index = WorkspaceIndex::new();
7366                let key = DocumentStore::uri_key("file:///lib/Prop.pm");
7367
7368                // Seed the index with the old shard.
7369                index.replace_fact_shard_incremental(&key, old_shard.clone());
7370
7371                // Replace with the new shard and capture the result.
7372                let result = index.replace_fact_shard_incremental(&key, new_shard.clone());
7373
7374                // ── Req 18.5: content_hash unchanged → skip entirely ──
7375                if old_shard.content_hash == new_shard.content_hash {
7376                    prop_assert!(
7377                        result.content_unchanged,
7378                        "content_unchanged must be true when content_hash is the same"
7379                    );
7380                    prop_assert!(
7381                        !result.anchors_updated,
7382                        "anchors_updated must be false when content_hash unchanged"
7383                    );
7384                    prop_assert!(
7385                        !result.entities_updated,
7386                        "entities_updated must be false when content_hash unchanged"
7387                    );
7388                    prop_assert!(
7389                        !result.occurrences_updated,
7390                        "occurrences_updated must be false when content_hash unchanged"
7391                    );
7392                    prop_assert!(
7393                        !result.edges_updated,
7394                        "edges_updated must be false when content_hash unchanged"
7395                    );
7396                } else {
7397                    prop_assert!(
7398                        !result.content_unchanged,
7399                        "content_unchanged must be false when content_hash differs"
7400                    );
7401
7402                    // ── Req 18.3 / 18.4: per-category hash comparison ──
7403                    // A category is "unchanged" when both old and new have
7404                    // Some(h) and the values are equal.  Otherwise the
7405                    // category is conservatively treated as changed.
7406
7407                    let anchors_should_update = crate::semantic::invalidation::category_hash_changed(
7408                        old_shard.anchors_hash,
7409                        new_shard.anchors_hash,
7410                    );
7411                    prop_assert_eq!(
7412                        result.anchors_updated,
7413                        anchors_should_update,
7414                        "anchors_updated mismatch: old={:?} new={:?}",
7415                        old_shard.anchors_hash,
7416                        new_shard.anchors_hash,
7417                    );
7418
7419                    let entities_should_update =
7420                        crate::semantic::invalidation::category_hash_changed(
7421                            old_shard.entities_hash,
7422                            new_shard.entities_hash,
7423                        );
7424                    prop_assert_eq!(
7425                        result.entities_updated,
7426                        entities_should_update,
7427                        "entities_updated mismatch: old={:?} new={:?}",
7428                        old_shard.entities_hash,
7429                        new_shard.entities_hash,
7430                    );
7431
7432                    let occurrences_should_update =
7433                        crate::semantic::invalidation::category_hash_changed(
7434                            old_shard.occurrences_hash,
7435                            new_shard.occurrences_hash,
7436                        );
7437                    prop_assert_eq!(
7438                        result.occurrences_updated,
7439                        occurrences_should_update,
7440                        "occurrences_updated mismatch: old={:?} new={:?}",
7441                        old_shard.occurrences_hash,
7442                        new_shard.occurrences_hash,
7443                    );
7444
7445                    let edges_should_update = crate::semantic::invalidation::category_hash_changed(
7446                        old_shard.edges_hash,
7447                        new_shard.edges_hash,
7448                    );
7449                    prop_assert_eq!(
7450                        result.edges_updated,
7451                        edges_should_update,
7452                        "edges_updated mismatch: old={:?} new={:?}",
7453                        old_shard.edges_hash,
7454                        new_shard.edges_hash,
7455                    );
7456                }
7457            }
7458        }
7459    }
7460}
7461
7462// ── with_semantic_queries_for_uri tests ──
7463
7464#[cfg(test)]
7465mod semantic_query_callback_tests {
7466    use super::*;
7467    use perl_tdd_support::{must, must_some};
7468
7469    #[test]
7470    fn with_semantic_queries_for_uri_indexed_uri_invokes_callback()
7471    -> Result<(), Box<dyn std::error::Error>> {
7472        let index = WorkspaceIndex::new();
7473        let uri = "file:///lib/Foo.pm";
7474        must(index.index_file(must(url::Url::parse(uri)), "sub foo { 1 }".to_string()));
7475
7476        let result = index.with_semantic_queries_for_uri(uri, |file_id, _queries| {
7477            // Verify the file_id is consistent with the URI (non-zero hash).
7478            assert_ne!(file_id.0, 0, "file_id should be non-zero");
7479            42u32 // sentinel return value
7480        });
7481
7482        assert_eq!(result, Some(42u32), "callback must run when URI is indexed");
7483        Ok(())
7484    }
7485
7486    #[test]
7487    fn with_semantic_queries_for_uri_unknown_uri_returns_none()
7488    -> Result<(), Box<dyn std::error::Error>> {
7489        let index = WorkspaceIndex::new();
7490        // Do NOT index anything.
7491        let result = index.with_semantic_queries_for_uri("file:///not/indexed.pl", |_, _| 99u32);
7492        assert!(result.is_none(), "unindexed URI must return None without invoking callback");
7493        Ok(())
7494    }
7495
7496    #[test]
7497    fn with_semantic_queries_for_uri_file_id_matches_file_id_for_uri()
7498    -> Result<(), Box<dyn std::error::Error>> {
7499        let index = WorkspaceIndex::new();
7500        let uri = "file:///lib/Bar.pm";
7501        must(index.index_file(must(url::Url::parse(uri)), "sub bar { 1 }".to_string()));
7502
7503        let direct_id = must_some(index.file_id_for_uri(uri));
7504        let callback_id =
7505            must_some(index.with_semantic_queries_for_uri(uri, |file_id, _q| file_id));
7506
7507        assert_eq!(
7508            direct_id, callback_id,
7509            "file_id_for_uri and with_semantic_queries_for_uri must agree"
7510        );
7511        Ok(())
7512    }
7513
7514    #[test]
7515    fn with_semantic_queries_for_uri_callback_not_called_when_not_indexed()
7516    -> Result<(), Box<dyn std::error::Error>> {
7517        let index = WorkspaceIndex::new();
7518        let mut called = false;
7519        let _ = index.with_semantic_queries_for_uri("file:///ghost.pl", |_, _| {
7520            called = true;
7521        });
7522        assert!(!called, "callback must not be invoked for unindexed URI");
7523        Ok(())
7524    }
7525}
perl_workspace/workspace/workspace_index.rs

perl_workspace/workspace/
workspace_index.rs