codeprysm_core/lazy/
manager.rs

1//! Lazy Graph Manager
2//!
3//! Orchestrates lazy-loading of code graph partitions from SQLite into petgraph.
4//! Provides transparent access to nodes and edges, loading partitions on-demand.
5
6use crate::discovery::{DiscoveredRoot, DiscoveryError, RootDiscovery, RootType};
7use crate::graph::{EdgeData, Node, PetCodeGraph};
8use crate::lazy::cache::{CacheMetrics, MemoryBudgetCache, PartitionStats as CachePartitionStats};
9use crate::lazy::cross_refs::{CrossRef, CrossRefError, CrossRefIndex, CrossRefStore};
10use crate::lazy::partition::{PartitionConnection, PartitionError};
11use dashmap::{DashMap, DashSet};
12use parking_lot::RwLock;
13use std::collections::{HashMap, HashSet};
14use std::path::{Path, PathBuf};
15use std::sync::Arc;
16use thiserror::Error;
17
18/// Errors that can occur during lazy graph operations
19#[derive(Debug, Error)]
20pub enum LazyGraphError {
21    #[error("Partition error: {0}")]
22    Partition(#[from] PartitionError),
23
24    #[error("Manifest error: {0}")]
25    Manifest(String),
26
27    #[error("IO error: {0}")]
28    Io(#[from] std::io::Error),
29
30    #[error("JSON error: {0}")]
31    Json(#[from] serde_json::Error),
32
33    #[error("Partition not found: {0}")]
34    PartitionNotFound(String),
35
36    #[error("Node not found: {0}")]
37    NodeNotFound(String),
38
39    #[error("Discovery error: {0}")]
40    Discovery(#[from] DiscoveryError),
41
42    #[error("Cross-ref error: {0}")]
43    CrossRef(#[from] CrossRefError),
44}
45
46/// Manifest entry for a file's partition assignment
47#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
48pub struct ManifestEntry {
49    /// The partition ID this file belongs to
50    pub partition_id: String,
51    /// Content hash of the file (for change detection)
52    pub content_hash: Option<String>,
53}
54
55/// Information about a discovered code root
56#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
57pub struct RootInfo {
58    /// Unique name for this root (typically directory name)
59    pub name: String,
60    /// Type of root: "git" or "code"
61    pub root_type: String,
62    /// Relative path from workspace root
63    pub relative_path: String,
64    /// Remote URL for git repositories
65    #[serde(skip_serializing_if = "Option::is_none")]
66    pub remote_url: Option<String>,
67    /// Branch name for git repositories
68    #[serde(skip_serializing_if = "Option::is_none")]
69    pub branch: Option<String>,
70    /// Commit SHA for git repositories
71    #[serde(skip_serializing_if = "Option::is_none")]
72    pub commit: Option<String>,
73}
74
75impl RootInfo {
76    /// Create a RootInfo from a DiscoveredRoot
77    pub fn from_discovered_root(discovered: &DiscoveredRoot) -> Self {
78        let (root_type, remote_url, branch, commit) = match &discovered.root_type {
79            RootType::GitRepository {
80                remote,
81                branch,
82                commit,
83            } => (
84                "git".to_string(),
85                remote.clone(),
86                branch.clone(),
87                commit.clone(),
88            ),
89            RootType::CodeDirectory => ("code".to_string(), None, None, None),
90        };
91
92        Self {
93            name: discovered.name.clone(),
94            root_type,
95            relative_path: discovered.relative_path.clone(),
96            remote_url,
97            branch,
98            commit,
99        }
100    }
101}
102
103/// Manifest that maps files to partitions
104#[derive(Debug, Clone, Default, serde::Serialize, serde::Deserialize)]
105pub struct Manifest {
106    /// Schema version for compatibility
107    pub schema_version: String,
108    /// Map from root name to root info (multi-root support)
109    #[serde(default)]
110    pub roots: HashMap<String, RootInfo>,
111    /// Map from file path to manifest entry
112    pub files: HashMap<String, ManifestEntry>,
113    /// Map from partition ID to partition file name
114    pub partitions: HashMap<String, String>,
115}
116
117impl Manifest {
118    /// Create a new empty manifest
119    pub fn new() -> Self {
120        Self {
121            schema_version: "1.0".to_string(),
122            roots: HashMap::new(),
123            files: HashMap::new(),
124            partitions: HashMap::new(),
125        }
126    }
127
128    /// Load manifest from a JSON file
129    pub fn load(path: &Path) -> Result<Self, LazyGraphError> {
130        let content = std::fs::read_to_string(path)?;
131        let manifest: Manifest = serde_json::from_str(&content)?;
132        Ok(manifest)
133    }
134
135    /// Save manifest to a JSON file
136    pub fn save(&self, path: &Path) -> Result<(), LazyGraphError> {
137        let content = serde_json::to_string_pretty(self)?;
138        if let Some(parent) = path.parent() {
139            std::fs::create_dir_all(parent)?;
140        }
141        std::fs::write(path, content)?;
142        Ok(())
143    }
144
145    /// Get the partition ID for a file
146    pub fn get_partition_for_file(&self, file: &str) -> Option<&str> {
147        self.files.get(file).map(|e| e.partition_id.as_str())
148    }
149
150    /// Get the partition database file name
151    pub fn get_partition_file(&self, partition_id: &str) -> Option<&str> {
152        self.partitions.get(partition_id).map(|s| s.as_str())
153    }
154
155    /// Add or update a file entry
156    pub fn set_file(&mut self, file: String, partition_id: String, content_hash: Option<String>) {
157        self.files.insert(
158            file,
159            ManifestEntry {
160                partition_id,
161                content_hash,
162            },
163        );
164    }
165
166    /// Register a partition
167    pub fn register_partition(&mut self, partition_id: String, filename: String) {
168        self.partitions.insert(partition_id, filename);
169    }
170
171    /// Register a root
172    pub fn register_root(&mut self, root_info: RootInfo) {
173        self.roots.insert(root_info.name.clone(), root_info);
174    }
175
176    /// Get a root by name
177    pub fn get_root(&self, name: &str) -> Option<&RootInfo> {
178        self.roots.get(name)
179    }
180
181    /// Get all root names
182    pub fn root_names(&self) -> impl Iterator<Item = &str> {
183        self.roots.keys().map(|s| s.as_str())
184    }
185
186    /// Check if this is a multi-root workspace
187    pub fn is_multi_root(&self) -> bool {
188        self.roots.len() > 1
189    }
190
191    /// Get the number of roots
192    pub fn root_count(&self) -> usize {
193        self.roots.len()
194    }
195}
196
197/// Registry tracking which nodes belong to which partition
198///
199/// Uses interior mutability via DashMap/DashSet for thread-safe concurrent access.
200/// All methods take `&self` to enable concurrent operations without external locking.
201struct PartitionRegistry {
202    /// Map from partition ID to the set of node IDs in that partition
203    partition_to_nodes: DashMap<String, HashSet<String>>,
204    /// Map from node ID to the partition it belongs to
205    node_to_partition: DashMap<String, String>,
206    /// Set of currently loaded partitions
207    loaded_partitions: DashSet<String>,
208    /// Per-partition loading locks to prevent duplicate concurrent loads
209    /// Uses Arc<Mutex> so we can clone and lock without holding the DashMap entry
210    loading_locks: DashMap<String, Arc<parking_lot::Mutex<()>>>,
211}
212
213impl Default for PartitionRegistry {
214    fn default() -> Self {
215        Self {
216            partition_to_nodes: DashMap::new(),
217            node_to_partition: DashMap::new(),
218            loaded_partitions: DashSet::new(),
219            loading_locks: DashMap::new(),
220        }
221    }
222}
223
224impl PartitionRegistry {
225    fn new() -> Self {
226        Self::default()
227    }
228
229    /// Check if a partition is currently loaded
230    fn is_loaded(&self, partition_id: &str) -> bool {
231        self.loaded_partitions.contains(partition_id)
232    }
233
234    /// Mark a partition as loaded and register its nodes
235    fn register_loaded(&self, partition_id: &str, node_ids: Vec<String>) {
236        self.loaded_partitions.insert(partition_id.to_string());
237        let nodes_set: HashSet<String> = node_ids.iter().cloned().collect();
238        for node_id in &node_ids {
239            self.node_to_partition
240                .insert(node_id.clone(), partition_id.to_string());
241        }
242        self.partition_to_nodes
243            .insert(partition_id.to_string(), nodes_set);
244    }
245
246    /// Mark a partition as unloaded and remove its node registrations
247    fn unregister(&self, partition_id: &str) -> Option<HashSet<String>> {
248        self.loaded_partitions.remove(partition_id);
249        if let Some((_, nodes)) = self.partition_to_nodes.remove(partition_id) {
250            for node_id in &nodes {
251                self.node_to_partition.remove(node_id);
252            }
253            Some(nodes)
254        } else {
255            None
256        }
257    }
258
259    /// Get the partition a node belongs to (if known)
260    ///
261    /// Returns owned String because DashMap doesn't support returning references.
262    fn get_node_partition(&self, node_id: &str) -> Option<String> {
263        self.node_to_partition.get(node_id).map(|r| r.clone())
264    }
265
266    /// Get the set of loaded partition IDs
267    fn loaded_partition_ids(&self) -> Vec<String> {
268        self.loaded_partitions.iter().map(|r| r.clone()).collect()
269    }
270
271    /// Get count of loaded partitions
272    fn loaded_count(&self) -> usize {
273        self.loaded_partitions.len()
274    }
275
276    /// Get node IDs belonging to a loaded partition
277    fn get_node_ids(&self, partition_id: &str) -> Option<Vec<String>> {
278        self.partition_to_nodes
279            .get(partition_id)
280            .map(|r| r.iter().cloned().collect())
281    }
282
283    /// Get or create a loading lock for a partition
284    ///
285    /// Used for double-checked locking during partition loading.
286    /// Returns the Arc<Mutex> which the caller can lock.
287    fn get_loading_lock(&self, partition_id: &str) -> Arc<parking_lot::Mutex<()>> {
288        self.loading_locks
289            .entry(partition_id.to_string())
290            .or_insert_with(|| Arc::new(parking_lot::Mutex::new(())))
291            .clone()
292    }
293}
294
295/// The lazy-loading graph manager
296///
297/// Manages partitioned code graphs stored in SQLite, loading them on-demand
298/// into a petgraph-based in-memory representation.
299///
300/// Uses interior mutability via `RwLock<PetCodeGraph>` to enable concurrent
301/// read access to the graph while maintaining thread safety for writes.
302pub struct LazyGraphManager {
303    /// The petgraph instance holding all loaded nodes/edges
304    /// Protected by RwLock for concurrent read access during queries
305    graph: RwLock<PetCodeGraph>,
306
307    /// Registry tracking loaded partitions and node ownership
308    registry: PartitionRegistry,
309
310    /// Manifest for partition lookups
311    manifest: Manifest,
312
313    /// Memory budget cache for partition eviction
314    cache: MemoryBudgetCache,
315
316    /// Cross-partition edge index (always in memory)
317    cross_refs: CrossRefIndex,
318
319    /// Base directory for partition storage (.codeprysm/partitions/)
320    partitions_dir: PathBuf,
321
322    /// Path to manifest file
323    manifest_path: PathBuf,
324
325    /// Path to cross_refs.db
326    cross_refs_path: PathBuf,
327}
328
329impl LazyGraphManager {
330    /// Create a new lazy graph manager with default memory budget (512 MB)
331    ///
332    /// # Arguments
333    /// * `prism_dir` - The .codeprysm directory path
334    pub fn new(prism_dir: &Path) -> Self {
335        Self::with_memory_budget(prism_dir, None)
336    }
337
338    /// Create a new lazy graph manager with a custom memory budget
339    ///
340    /// # Arguments
341    /// * `prism_dir` - The .codeprysm directory path
342    /// * `memory_budget_bytes` - Optional memory budget in bytes (default: 512 MB)
343    pub fn with_memory_budget(prism_dir: &Path, memory_budget_bytes: Option<usize>) -> Self {
344        let partitions_dir = prism_dir.join("partitions");
345        let manifest_path = prism_dir.join("manifest.json");
346        let cross_refs_path = prism_dir.join("cross_refs.db");
347
348        let cache = match memory_budget_bytes {
349            Some(bytes) => MemoryBudgetCache::new(bytes),
350            None => MemoryBudgetCache::with_default_budget(),
351        };
352
353        Self {
354            graph: RwLock::new(PetCodeGraph::new()),
355            registry: PartitionRegistry::new(),
356            manifest: Manifest::new(),
357            cache,
358            cross_refs: CrossRefIndex::new(),
359            partitions_dir,
360            manifest_path,
361            cross_refs_path,
362        }
363    }
364
365    /// Open an existing lazy graph from a .codeprysm directory
366    ///
367    /// Loads the manifest but does not load any partitions yet.
368    pub fn open(prism_dir: &Path) -> Result<Self, LazyGraphError> {
369        Self::open_with_memory_budget(prism_dir, None)
370    }
371
372    /// Open an existing lazy graph with a custom memory budget
373    pub fn open_with_memory_budget(
374        prism_dir: &Path,
375        memory_budget_bytes: Option<usize>,
376    ) -> Result<Self, LazyGraphError> {
377        let partitions_dir = prism_dir.join("partitions");
378        let manifest_path = prism_dir.join("manifest.json");
379        let cross_refs_path = prism_dir.join("cross_refs.db");
380
381        let manifest = if manifest_path.exists() {
382            Manifest::load(&manifest_path)?
383        } else {
384            Manifest::new()
385        };
386
387        // Load cross-refs from SQLite if database exists
388        let cross_refs = if cross_refs_path.exists() {
389            let store = CrossRefStore::open(&cross_refs_path)?;
390            store.load_all()?
391        } else {
392            CrossRefIndex::new()
393        };
394
395        let cache = match memory_budget_bytes {
396            Some(bytes) => MemoryBudgetCache::new(bytes),
397            None => MemoryBudgetCache::with_default_budget(),
398        };
399
400        Ok(Self {
401            graph: RwLock::new(PetCodeGraph::new()),
402            registry: PartitionRegistry::new(),
403            manifest,
404            cache,
405            cross_refs,
406            partitions_dir,
407            manifest_path,
408            cross_refs_path,
409        })
410    }
411
412    /// Initialize a new lazy graph with an empty manifest
413    ///
414    /// Creates the partitions directory if it doesn't exist.
415    pub fn init(prism_dir: &Path) -> Result<Self, LazyGraphError> {
416        Self::init_with_memory_budget(prism_dir, None)
417    }
418
419    /// Initialize a new lazy graph with a custom memory budget
420    pub fn init_with_memory_budget(
421        prism_dir: &Path,
422        memory_budget_bytes: Option<usize>,
423    ) -> Result<Self, LazyGraphError> {
424        let partitions_dir = prism_dir.join("partitions");
425        std::fs::create_dir_all(&partitions_dir)?;
426
427        let manifest_path = prism_dir.join("manifest.json");
428        let cross_refs_path = prism_dir.join("cross_refs.db");
429
430        let manifest = Manifest::new();
431        manifest.save(&manifest_path)?;
432
433        // Create empty cross_refs.db
434        CrossRefStore::create(&cross_refs_path)?;
435
436        let cache = match memory_budget_bytes {
437            Some(bytes) => MemoryBudgetCache::new(bytes),
438            None => MemoryBudgetCache::with_default_budget(),
439        };
440
441        Ok(Self {
442            graph: RwLock::new(PetCodeGraph::new()),
443            registry: PartitionRegistry::new(),
444            manifest,
445            cache,
446            cross_refs: CrossRefIndex::new(),
447            partitions_dir,
448            manifest_path,
449            cross_refs_path,
450        })
451    }
452
453    /// Initialize a lazy graph by discovering roots in a workspace
454    ///
455    /// Uses `RootDiscovery` to find git repositories and code directories,
456    /// then registers them in the manifest for multi-root support.
457    ///
458    /// # Arguments
459    /// * `workspace_path` - The workspace root to discover roots in
460    /// * `prism_dir` - The .codeprysm directory for storage
461    ///
462    /// # Examples
463    /// ```no_run
464    /// use codeprysm_core::lazy::manager::LazyGraphManager;
465    /// use std::path::Path;
466    ///
467    /// // Single repo workspace
468    /// let manager = LazyGraphManager::init_workspace(
469    ///     Path::new("/path/to/repo"),
470    ///     Path::new("/path/to/repo/.codeprysm"),
471    /// ).unwrap();
472    ///
473    /// // Multi-root workspace
474    /// let manager = LazyGraphManager::init_workspace(
475    ///     Path::new("/path/to/workspace"),
476    ///     Path::new("/path/to/workspace/.codeprysm"),
477    /// ).unwrap();
478    /// ```
479    pub fn init_workspace(workspace_path: &Path, prism_dir: &Path) -> Result<Self, LazyGraphError> {
480        Self::init_workspace_with_options(workspace_path, prism_dir, None, None)
481    }
482
483    /// Initialize a lazy graph with custom options
484    ///
485    /// # Arguments
486    /// * `workspace_path` - The workspace root to discover roots in
487    /// * `prism_dir` - The .codeprysm directory for storage
488    /// * `memory_budget_bytes` - Optional memory budget (default: 512 MB)
489    /// * `max_discovery_depth` - Optional max depth for root discovery (default: 3)
490    pub fn init_workspace_with_options(
491        workspace_path: &Path,
492        prism_dir: &Path,
493        memory_budget_bytes: Option<usize>,
494        max_discovery_depth: Option<usize>,
495    ) -> Result<Self, LazyGraphError> {
496        // Create directory structure
497        let partitions_dir = prism_dir.join("partitions");
498        std::fs::create_dir_all(&partitions_dir)?;
499
500        // Discover roots
501        let discovery = match max_discovery_depth {
502            Some(depth) => RootDiscovery::with_defaults().with_max_depth(depth),
503            None => RootDiscovery::with_defaults(),
504        };
505
506        let discovered_roots = discovery.discover(workspace_path)?;
507
508        // Create manifest with discovered roots
509        let mut manifest = Manifest::new();
510        for discovered in &discovered_roots {
511            let root_info = RootInfo::from_discovered_root(discovered);
512            manifest.register_root(root_info);
513        }
514
515        // Save manifest
516        let manifest_path = prism_dir.join("manifest.json");
517        manifest.save(&manifest_path)?;
518
519        // Create empty cross_refs.db
520        let cross_refs_path = prism_dir.join("cross_refs.db");
521        CrossRefStore::create(&cross_refs_path)?;
522
523        // Create cache
524        let cache = match memory_budget_bytes {
525            Some(bytes) => MemoryBudgetCache::new(bytes),
526            None => MemoryBudgetCache::with_default_budget(),
527        };
528
529        Ok(Self {
530            graph: RwLock::new(PetCodeGraph::new()),
531            registry: PartitionRegistry::new(),
532            manifest,
533            cache,
534            cross_refs: CrossRefIndex::new(),
535            partitions_dir,
536            manifest_path,
537            cross_refs_path,
538        })
539    }
540
541    /// Get the discovered roots from the manifest
542    pub fn roots(&self) -> impl Iterator<Item = &RootInfo> {
543        self.manifest.roots.values()
544    }
545
546    /// Check if this is a multi-root workspace
547    pub fn is_multi_root(&self) -> bool {
548        self.manifest.is_multi_root()
549    }
550
551    // =========================================================================
552    // Manifest Operations
553    // =========================================================================
554
555    /// Get the manifest
556    pub fn manifest(&self) -> &Manifest {
557        &self.manifest
558    }
559
560    /// Get mutable access to the manifest
561    pub fn manifest_mut(&mut self) -> &mut Manifest {
562        &mut self.manifest
563    }
564
565    /// Reload manifest from disk
566    pub fn reload_manifest(&mut self) -> Result<(), LazyGraphError> {
567        if self.manifest_path.exists() {
568            self.manifest = Manifest::load(&self.manifest_path)?;
569        }
570        Ok(())
571    }
572
573    /// Save manifest to disk
574    pub fn save_manifest(&self) -> Result<(), LazyGraphError> {
575        self.manifest.save(&self.manifest_path)
576    }
577
578    // =========================================================================
579    // Partition Lookup
580    // =========================================================================
581
582    /// Get the partition ID for a file path
583    pub fn get_partition_for_file(&self, file: &str) -> Option<&str> {
584        self.manifest.get_partition_for_file(file)
585    }
586
587    /// Get the partition ID for a node by looking at its file path
588    ///
589    /// Returns owned String since registry uses interior mutability (DashMap).
590    pub fn get_partition_for_node(&self, node_id: &str) -> Option<String> {
591        // First check if we already know this node's partition
592        if let Some(partition) = self.registry.get_node_partition(node_id) {
593            return Some(partition);
594        }
595
596        // Otherwise, extract file from node ID and look up in manifest
597        // Node IDs are typically "file.py:ClassName:method_name"
598        let file = node_id.split(':').next()?;
599        self.manifest
600            .get_partition_for_file(file)
601            .map(|s| s.to_string())
602    }
603
604    /// Compute the partition ID for a file path (directory-based partitioning)
605    ///
606    /// For multi-root workspaces, use `compute_partition_id_for_root` instead.
607    /// Returns the parent directory path as the partition ID.
608    #[deprecated(note = "Use compute_partition_id_for_root for multi-root support")]
609    pub fn compute_partition_id(file: &str) -> String {
610        Self::compute_directory_partition(file)
611    }
612
613    /// Compute partition ID for a file in a specific root (multi-root support)
614    ///
615    /// Returns partition ID in format `{root_name}_{directory}` to avoid
616    /// collisions between roots with similar directory structures.
617    ///
618    /// # Examples
619    /// ```
620    /// use codeprysm_core::lazy::manager::LazyGraphManager;
621    ///
622    /// let pid = LazyGraphManager::compute_partition_id_for_root("repo-a", "src/core/main.py");
623    /// assert_eq!(pid, "repo-a_src/core");
624    ///
625    /// let pid = LazyGraphManager::compute_partition_id_for_root("repo-b", "src/core/main.py");
626    /// assert_eq!(pid, "repo-b_src/core");
627    ///
628    /// // Root-level files use "{root}_root" partition
629    /// let pid = LazyGraphManager::compute_partition_id_for_root("myrepo", "main.py");
630    /// assert_eq!(pid, "myrepo_root");
631    /// ```
632    pub fn compute_partition_id_for_root(root_name: &str, file: &str) -> String {
633        let dir_part = Self::compute_directory_partition(file);
634        format!("{}_{}", root_name, dir_part)
635    }
636
637    /// Internal helper: compute the directory-based partition component
638    fn compute_directory_partition(file: &str) -> String {
639        Path::new(file)
640            .parent()
641            .filter(|p| !p.as_os_str().is_empty())
642            .map(|p| p.to_string_lossy().to_string())
643            .unwrap_or_else(|| "root".to_string())
644    }
645
646    /// Get the SQLite database path for a partition
647    fn partition_db_path(&self, partition_id: &str) -> PathBuf {
648        // Sanitize partition ID for use as filename
649        let safe_name = partition_id.replace(['/', '\\', ':'], "_");
650        self.partitions_dir.join(format!("{}.db", safe_name))
651    }
652
653    // =========================================================================
654    // Partition Loading
655    // =========================================================================
656
657    /// Check if a partition is currently loaded
658    pub fn is_partition_loaded(&self, partition_id: &str) -> bool {
659        self.registry.is_loaded(partition_id)
660    }
661
662    /// Get the number of loaded partitions
663    pub fn loaded_partition_count(&self) -> usize {
664        self.registry.loaded_count()
665    }
666
667    /// Get the list of loaded partition IDs
668    pub fn loaded_partitions(&self) -> Vec<String> {
669        self.registry.loaded_partition_ids()
670    }
671
672    /// Get all partition IDs from manifest (whether loaded or not)
673    pub fn partition_ids(&self) -> Vec<String> {
674        self.manifest.partitions.keys().cloned().collect()
675    }
676
677    /// Get node IDs that belong to a specific loaded partition
678    ///
679    /// Returns None if the partition is not currently loaded.
680    /// Use this after load_partition() to get the nodes for indexing.
681    pub fn node_ids_in_partition(&self, partition_id: &str) -> Option<Vec<String>> {
682        self.registry.get_node_ids(partition_id)
683    }
684
685    /// Load a partition from SQLite into the petgraph
686    ///
687    /// Uses double-checked locking to prevent duplicate concurrent loads of the same
688    /// partition while allowing concurrent loads of different partitions.
689    ///
690    /// If the partition is already loaded, this is a no-op (cache hit).
691    /// If memory budget is exceeded, evicts least-recently-used partitions first.
692    pub fn load_partition(&self, partition_id: &str) -> Result<(), LazyGraphError> {
693        // First check: Is the partition already loaded? (lock-free read via DashSet)
694        if self.registry.is_loaded(partition_id) {
695            self.cache.touch(partition_id);
696            return Ok(());
697        }
698
699        // Acquire per-partition loading lock to prevent concurrent loads of same partition
700        // This allows different partitions to load concurrently while preventing duplicate work
701        let loading_lock = self.registry.get_loading_lock(partition_id);
702        let _guard = loading_lock.lock();
703
704        // Second check: Re-check after acquiring lock (another thread may have loaded it)
705        if self.registry.is_loaded(partition_id) {
706            self.cache.touch(partition_id);
707            return Ok(());
708        }
709
710        // Cache miss
711        self.cache.touch(partition_id); // records miss since not in cache
712
713        // Open partition database
714        let db_path = self.partition_db_path(partition_id);
715        if !db_path.exists() {
716            return Err(LazyGraphError::PartitionNotFound(partition_id.to_string()));
717        }
718
719        let conn = PartitionConnection::open(&db_path, partition_id)?;
720
721        // Get partition stats for memory estimation
722        let partition_stats = conn.stats()?;
723        let cache_stats =
724            CachePartitionStats::new(partition_stats.node_count, partition_stats.edge_count);
725
726        // Check if we need to evict partitions to make room
727        let needed = self.cache.memory_needed_for(cache_stats.estimated_bytes);
728        if needed > 0 {
729            let candidates = self.cache.get_eviction_candidates_for(needed);
730            for candidate_id in candidates {
731                self.unload_partition(&candidate_id);
732            }
733        }
734
735        // Load all nodes from partition
736        let nodes = conn.query_all_nodes()?;
737        let node_ids: Vec<String> = nodes.iter().map(|n| n.id.clone()).collect();
738
739        // Load all edges from partition
740        let edges = conn.query_all_edges()?;
741
742        // Acquire write lock and add nodes/edges
743        {
744            let mut graph = self.graph.write();
745            for node in nodes {
746                graph.add_node(node);
747            }
748            for edge in edges {
749                graph.add_edge(
750                    &edge.source,
751                    &edge.target,
752                    EdgeData {
753                        edge_type: edge.edge_type,
754                        ref_line: edge.ref_line,
755                        ident: edge.ident,
756                        version_spec: edge.version_spec,
757                        is_dev_dependency: edge.is_dev_dependency,
758                    },
759                );
760            }
761        }
762
763        // Register partition as loaded
764        self.registry.register_loaded(partition_id, node_ids);
765
766        // Track in cache
767        self.cache
768            .record_loaded(partition_id.to_string(), cache_stats);
769
770        Ok(())
771    }
772
773    /// Ensure a partition is loaded (load if not already loaded)
774    pub fn ensure_partition_loaded(&self, partition_id: &str) -> Result<(), LazyGraphError> {
775        self.load_partition(partition_id)
776    }
777
778    /// Load all partitions into memory
779    ///
780    /// This is useful when you need access to the entire graph, such as
781    /// for full reindexing. Note that this bypasses the memory budget
782    /// and may use significant memory for large repositories.
783    ///
784    /// Returns the number of partitions loaded.
785    pub fn load_all_partitions(&self) -> Result<usize, LazyGraphError> {
786        let partition_ids: Vec<String> = self.manifest.partitions.keys().cloned().collect();
787        let mut loaded = 0;
788
789        for partition_id in &partition_ids {
790            if !self.registry.is_loaded(partition_id) {
791                self.load_partition(partition_id)?;
792                loaded += 1;
793            }
794        }
795
796        Ok(loaded)
797    }
798
799    /// Unload a partition from petgraph to free memory
800    ///
801    /// Removes all nodes and edges belonging to this partition.
802    /// Returns the number of nodes unloaded.
803    ///
804    /// Thread-safe: Uses interior mutability via DashMap (registry), Mutex (cache),
805    /// and RwLock (graph).
806    pub fn unload_partition(&self, partition_id: &str) -> usize {
807        // Get the nodes belonging to this partition
808        let nodes = match self.registry.unregister(partition_id) {
809            Some(nodes) => nodes,
810            None => return 0,
811        };
812
813        // Remove from cache (tracks eviction metrics)
814        self.cache.remove(partition_id);
815
816        // Remove all nodes (this also removes their incident edges)
817        let count = nodes.len();
818        {
819            let mut graph = self.graph.write();
820            for node_id in nodes {
821                graph.remove_node(&node_id);
822            }
823        }
824
825        count
826    }
827
828    // =========================================================================
829    // Node Access (Lazy)
830    // =========================================================================
831
832    /// Get a node by ID, loading its partition if necessary
833    ///
834    /// Returns an owned clone of the node for thread safety. This enables
835    /// concurrent read access without holding locks across call boundaries.
836    pub fn get_node(&self, id: &str) -> Result<Option<Node>, LazyGraphError> {
837        // Fast path: check if in graph (read lock)
838        {
839            let graph = self.graph.read();
840            if let Some(node) = graph.get_node(id) {
841                return Ok(Some(node.clone()));
842            }
843        }
844
845        // Slow path: load partition then fetch
846        if let Some(partition_id) = self.get_partition_for_node(id) {
847            self.load_partition(&partition_id)?;
848            let graph = self.graph.read();
849            Ok(graph.get_node(id).cloned())
850        } else {
851            Ok(None)
852        }
853    }
854
855    /// Get a node by ID without loading (only returns if already loaded)
856    ///
857    /// Returns an owned clone of the node for thread safety.
858    pub fn get_node_if_loaded(&self, id: &str) -> Option<Node> {
859        let graph = self.graph.read();
860        graph.get_node(id).cloned()
861    }
862
863    /// Check if a node exists (may require loading partition)
864    pub fn contains_node(&self, id: &str) -> Result<bool, LazyGraphError> {
865        Ok(self.get_node(id)?.is_some())
866    }
867
868    /// Check if a node exists in the currently loaded graph (no lazy loading)
869    pub fn contains_node_if_loaded(&self, id: &str) -> bool {
870        let graph = self.graph.read();
871        graph.contains_node(id)
872    }
873
874    // =========================================================================
875    // Edge Access (Lazy)
876    // =========================================================================
877
878    /// Get incoming edges for a node, loading its partition if necessary
879    ///
880    /// This includes both intra-partition edges (from petgraph) and cross-partition
881    /// edges (from CrossRefIndex). Cross-partition source nodes are loaded as needed.
882    pub fn get_incoming_edges(
883        &self,
884        node_id: &str,
885    ) -> Result<Vec<(Node, EdgeData)>, LazyGraphError> {
886        // Ensure the target node's partition is loaded
887        if let Some(partition_id) = self.get_partition_for_node(node_id) {
888            self.load_partition(&partition_id)?;
889        }
890
891        // Collect intra-partition edges from petgraph (read lock)
892        let mut edges: Vec<(Node, EdgeData)> = {
893            let graph = self.graph.read();
894            graph
895                .incoming_edges(node_id)
896                .map(|(n, e)| (n.clone(), e.clone()))
897                .collect()
898        };
899
900        // Clone cross-refs to avoid borrow conflict when loading partitions
901        let cross_refs: Vec<CrossRef> = self
902            .cross_refs
903            .get_by_target(node_id)
904            .cloned()
905            .unwrap_or_default();
906
907        // Add cross-partition edges
908        for cross_ref in cross_refs {
909            // Load the source partition if not already loaded
910            self.load_partition(&cross_ref.source_partition)?;
911
912            // Get the source node (read lock)
913            let graph = self.graph.read();
914            if let Some(source_node) = graph.get_node(&cross_ref.source_id) {
915                let edge_data = EdgeData {
916                    edge_type: cross_ref.edge_type,
917                    ref_line: cross_ref.ref_line,
918                    ident: cross_ref.ident,
919                    version_spec: cross_ref.version_spec,
920                    is_dev_dependency: cross_ref.is_dev_dependency,
921                };
922                edges.push((source_node.clone(), edge_data));
923            }
924        }
925
926        Ok(edges)
927    }
928
929    /// Get outgoing edges from a node, loading its partition if necessary
930    ///
931    /// This includes both intra-partition edges (from petgraph) and cross-partition
932    /// edges (from CrossRefIndex). Cross-partition target nodes are loaded as needed.
933    pub fn get_outgoing_edges(
934        &self,
935        node_id: &str,
936    ) -> Result<Vec<(Node, EdgeData)>, LazyGraphError> {
937        // Ensure the source node's partition is loaded
938        if let Some(partition_id) = self.get_partition_for_node(node_id) {
939            self.load_partition(&partition_id)?;
940        }
941
942        // Collect intra-partition edges from petgraph (read lock)
943        let mut edges: Vec<(Node, EdgeData)> = {
944            let graph = self.graph.read();
945            graph
946                .outgoing_edges(node_id)
947                .map(|(n, e)| (n.clone(), e.clone()))
948                .collect()
949        };
950
951        // Clone cross-refs to avoid borrow conflict when loading partitions
952        let cross_refs: Vec<CrossRef> = self
953            .cross_refs
954            .get_by_source(node_id)
955            .cloned()
956            .unwrap_or_default();
957
958        // Add cross-partition edges
959        for cross_ref in cross_refs {
960            // Load the target partition if not already loaded
961            self.load_partition(&cross_ref.target_partition)?;
962
963            // Get the target node (read lock)
964            let graph = self.graph.read();
965            if let Some(target_node) = graph.get_node(&cross_ref.target_id) {
966                let edge_data = EdgeData {
967                    edge_type: cross_ref.edge_type,
968                    ref_line: cross_ref.ref_line,
969                    ident: cross_ref.ident,
970                    version_spec: cross_ref.version_spec,
971                    is_dev_dependency: cross_ref.is_dev_dependency,
972                };
973                edges.push((target_node.clone(), edge_data));
974            }
975        }
976
977        Ok(edges)
978    }
979
980    // =========================================================================
981    // Cross-Partition Edge Management
982    // =========================================================================
983
984    /// Add a cross-partition edge reference
985    ///
986    /// Use this when an edge spans two different partitions.
987    pub fn add_cross_ref(&mut self, cross_ref: CrossRef) {
988        self.cross_refs.add(cross_ref);
989    }
990
991    /// Add multiple cross-partition edge references
992    pub fn add_cross_refs(&mut self, refs: impl IntoIterator<Item = CrossRef>) {
993        self.cross_refs.add_all(refs);
994    }
995
996    /// Get cross-partition edges targeting a specific node
997    pub fn get_cross_refs_by_target(&self, target_id: &str) -> Option<&Vec<CrossRef>> {
998        self.cross_refs.get_by_target(target_id)
999    }
1000
1001    /// Get cross-partition edges from a specific source node
1002    pub fn get_cross_refs_by_source(&self, source_id: &str) -> Option<&Vec<CrossRef>> {
1003        self.cross_refs.get_by_source(source_id)
1004    }
1005
1006    /// Remove all cross-partition edges involving a specific partition
1007    ///
1008    /// Call this when a partition is being rebuilt.
1009    pub fn remove_cross_refs_by_partition(&mut self, partition: &str) {
1010        self.cross_refs.remove_by_partition(partition);
1011    }
1012
1013    /// Get the number of cross-partition edges
1014    pub fn cross_ref_count(&self) -> usize {
1015        self.cross_refs.len()
1016    }
1017
1018    /// Save cross-partition edges to SQLite
1019    pub fn save_cross_refs(&self) -> Result<(), LazyGraphError> {
1020        let store = if self.cross_refs_path.exists() {
1021            CrossRefStore::open(&self.cross_refs_path)?
1022        } else {
1023            CrossRefStore::create(&self.cross_refs_path)?
1024        };
1025        store.save_all(&self.cross_refs)?;
1026        Ok(())
1027    }
1028
1029    /// Reload cross-partition edges from SQLite
1030    pub fn reload_cross_refs(&mut self) -> Result<(), LazyGraphError> {
1031        if self.cross_refs_path.exists() {
1032            let store = CrossRefStore::open(&self.cross_refs_path)?;
1033            self.cross_refs = store.load_all()?;
1034        }
1035        Ok(())
1036    }
1037
1038    // =========================================================================
1039    // Direct Graph Access
1040    // =========================================================================
1041
1042    /// Get access to the underlying RwLock-protected PetCodeGraph
1043    ///
1044    /// Callers must acquire read or write lock as appropriate.
1045    /// Use this for operations that don't require lazy loading.
1046    pub fn graph(&self) -> &RwLock<PetCodeGraph> {
1047        &self.graph
1048    }
1049
1050    /// Get a read lock guard on the underlying PetCodeGraph
1051    ///
1052    /// Convenience method for read-only graph operations.
1053    pub fn graph_read(&self) -> parking_lot::RwLockReadGuard<'_, PetCodeGraph> {
1054        self.graph.read()
1055    }
1056
1057    /// Get a write lock guard on the underlying PetCodeGraph
1058    ///
1059    /// Convenience method for mutable graph operations.
1060    pub fn graph_write(&self) -> parking_lot::RwLockWriteGuard<'_, PetCodeGraph> {
1061        self.graph.write()
1062    }
1063
1064    // =========================================================================
1065    // Cache Operations
1066    // =========================================================================
1067
1068    /// Get a snapshot of cache metrics (hit/miss rates, evictions)
1069    pub fn cache_metrics(&self) -> CacheMetrics {
1070        self.cache.metrics()
1071    }
1072
1073    /// Reset cache metrics
1074    pub fn reset_cache_metrics(&self) {
1075        self.cache.reset_metrics();
1076    }
1077
1078    /// Get current memory usage in bytes
1079    pub fn memory_usage_bytes(&self) -> usize {
1080        self.cache.current_memory_bytes()
1081    }
1082
1083    /// Get memory budget in bytes
1084    pub fn memory_budget_bytes(&self) -> usize {
1085        self.cache.max_memory_bytes()
1086    }
1087
1088    /// Get memory usage as a ratio (0.0 - 1.0)
1089    pub fn memory_usage_ratio(&self) -> f64 {
1090        self.cache.memory_usage_ratio()
1091    }
1092
1093    /// Check if memory usage exceeds budget
1094    pub fn is_over_budget(&self) -> bool {
1095        self.cache.is_over_budget()
1096    }
1097
1098    // =========================================================================
1099    // Statistics
1100    // =========================================================================
1101
1102    /// Get statistics about the lazy graph
1103    pub fn stats(&self) -> LazyGraphStats {
1104        let cache_metrics = self.cache.metrics().clone();
1105        let graph = self.graph.read();
1106        LazyGraphStats {
1107            loaded_partitions: self.registry.loaded_count(),
1108            total_partitions: self.manifest.partitions.len(),
1109            loaded_nodes: graph.node_count(),
1110            loaded_edges: graph.edge_count(),
1111            cross_partition_edges: self.cross_refs.len(),
1112            total_files: self.manifest.files.len(),
1113            memory_usage_bytes: self.cache.current_memory_bytes(),
1114            memory_budget_bytes: self.cache.max_memory_bytes(),
1115            cache_hit_rate: cache_metrics.hit_rate(),
1116            cache_evictions: cache_metrics.evictions,
1117        }
1118    }
1119}
1120
1121/// Statistics about the lazy graph state
1122#[derive(Debug, Clone)]
1123pub struct LazyGraphStats {
1124    /// Number of currently loaded partitions
1125    pub loaded_partitions: usize,
1126    /// Total number of partitions in manifest
1127    pub total_partitions: usize,
1128    /// Number of nodes currently in memory
1129    pub loaded_nodes: usize,
1130    /// Number of edges currently in memory (intra-partition)
1131    pub loaded_edges: usize,
1132    /// Number of cross-partition edges in index
1133    pub cross_partition_edges: usize,
1134    /// Total number of files tracked in manifest
1135    pub total_files: usize,
1136    /// Current memory usage in bytes
1137    pub memory_usage_bytes: usize,
1138    /// Memory budget in bytes
1139    pub memory_budget_bytes: usize,
1140    /// Cache hit rate (0.0 - 1.0)
1141    pub cache_hit_rate: f64,
1142    /// Number of partitions evicted
1143    pub cache_evictions: u64,
1144}
1145
1146#[cfg(test)]
1147mod tests {
1148    use super::*;
1149    use crate::graph::{CallableKind, NodeType};
1150    use tempfile::TempDir;
1151
1152    fn create_test_node(id: &str, name: &str, file: &str) -> Node {
1153        Node {
1154            id: id.to_string(),
1155            name: name.to_string(),
1156            node_type: NodeType::Callable,
1157            kind: Some(CallableKind::Function.as_str().to_string()),
1158            subtype: None,
1159            file: file.to_string(),
1160            line: 1,
1161            end_line: 10,
1162            text: Some("def test(): pass".to_string()),
1163            hash: None,
1164            metadata: Default::default(),
1165        }
1166    }
1167
1168    #[test]
1169    fn test_manifest_new() {
1170        let manifest = Manifest::new();
1171        assert_eq!(manifest.schema_version, "1.0");
1172        assert!(manifest.files.is_empty());
1173        assert!(manifest.partitions.is_empty());
1174    }
1175
1176    #[test]
1177    fn test_manifest_save_load() {
1178        let temp_dir = TempDir::new().unwrap();
1179        let manifest_path = temp_dir.path().join("manifest.json");
1180
1181        let mut manifest = Manifest::new();
1182        manifest.set_file(
1183            "src/main.py".to_string(),
1184            "src".to_string(),
1185            Some("abc123".to_string()),
1186        );
1187        manifest.register_partition("src".to_string(), "src.db".to_string());
1188
1189        manifest.save(&manifest_path).unwrap();
1190
1191        let loaded = Manifest::load(&manifest_path).unwrap();
1192        assert_eq!(loaded.get_partition_for_file("src/main.py"), Some("src"));
1193        assert_eq!(loaded.get_partition_file("src"), Some("src.db"));
1194    }
1195
1196    #[test]
1197    fn test_manifest_roots() {
1198        let temp_dir = TempDir::new().unwrap();
1199        let manifest_path = temp_dir.path().join("manifest.json");
1200
1201        let mut manifest = Manifest::new();
1202
1203        // Register git root
1204        manifest.register_root(RootInfo {
1205            name: "repo-a".to_string(),
1206            root_type: "git".to_string(),
1207            relative_path: "repo-a".to_string(),
1208            remote_url: Some("https://github.com/org/repo-a".to_string()),
1209            branch: Some("main".to_string()),
1210            commit: Some("abc123".to_string()),
1211        });
1212
1213        // Register code directory root
1214        manifest.register_root(RootInfo {
1215            name: "scripts".to_string(),
1216            root_type: "code".to_string(),
1217            relative_path: "scripts".to_string(),
1218            remote_url: None,
1219            branch: None,
1220            commit: None,
1221        });
1222
1223        assert_eq!(manifest.root_count(), 2);
1224        assert!(manifest.is_multi_root());
1225
1226        // Save and reload
1227        manifest.save(&manifest_path).unwrap();
1228        let loaded = Manifest::load(&manifest_path).unwrap();
1229
1230        assert_eq!(loaded.root_count(), 2);
1231        assert!(loaded.is_multi_root());
1232
1233        let repo_a = loaded.get_root("repo-a").unwrap();
1234        assert_eq!(repo_a.root_type, "git");
1235        assert_eq!(
1236            repo_a.remote_url,
1237            Some("https://github.com/org/repo-a".to_string())
1238        );
1239
1240        let scripts = loaded.get_root("scripts").unwrap();
1241        assert_eq!(scripts.root_type, "code");
1242        assert!(scripts.remote_url.is_none());
1243    }
1244
1245    #[test]
1246    fn test_manifest_single_root() {
1247        let mut manifest = Manifest::new();
1248
1249        manifest.register_root(RootInfo {
1250            name: "myrepo".to_string(),
1251            root_type: "git".to_string(),
1252            relative_path: ".".to_string(),
1253            remote_url: None,
1254            branch: None,
1255            commit: None,
1256        });
1257
1258        assert_eq!(manifest.root_count(), 1);
1259        assert!(!manifest.is_multi_root());
1260
1261        // Root names iterator
1262        let names: Vec<&str> = manifest.root_names().collect();
1263        assert_eq!(names, vec!["myrepo"]);
1264    }
1265
1266    #[test]
1267    #[allow(deprecated)]
1268    fn test_compute_partition_id() {
1269        assert_eq!(
1270            LazyGraphManager::compute_partition_id("src/core/main.py"),
1271            "src/core"
1272        );
1273        assert_eq!(LazyGraphManager::compute_partition_id("main.py"), "root");
1274        assert_eq!(
1275            LazyGraphManager::compute_partition_id("a/b/c/d.rs"),
1276            "a/b/c"
1277        );
1278    }
1279
1280    #[test]
1281    fn test_compute_partition_id_for_root() {
1282        // Multi-root: partition IDs are namespaced by root
1283        assert_eq!(
1284            LazyGraphManager::compute_partition_id_for_root("repo-a", "src/core/main.py"),
1285            "repo-a_src/core"
1286        );
1287        assert_eq!(
1288            LazyGraphManager::compute_partition_id_for_root("repo-b", "src/core/main.py"),
1289            "repo-b_src/core"
1290        );
1291        // No collision between different roots with same directory structure
1292        assert_ne!(
1293            LazyGraphManager::compute_partition_id_for_root("repo-a", "src/main.py"),
1294            LazyGraphManager::compute_partition_id_for_root("repo-b", "src/main.py")
1295        );
1296        // Root-level files
1297        assert_eq!(
1298            LazyGraphManager::compute_partition_id_for_root("myrepo", "main.py"),
1299            "myrepo_root"
1300        );
1301    }
1302
1303    #[test]
1304    fn test_lazy_graph_manager_init() {
1305        let temp_dir = TempDir::new().unwrap();
1306        let prism_dir = temp_dir.path().join(".codeprysm");
1307
1308        let manager = LazyGraphManager::init(&prism_dir).unwrap();
1309
1310        assert!(prism_dir.join("partitions").exists());
1311        assert!(prism_dir.join("manifest.json").exists());
1312        assert_eq!(manager.loaded_partition_count(), 0);
1313    }
1314
1315    #[test]
1316    fn test_lazy_graph_manager_open() {
1317        let temp_dir = TempDir::new().unwrap();
1318        let prism_dir = temp_dir.path().join(".codeprysm");
1319
1320        // Init first
1321        let _manager = LazyGraphManager::init(&prism_dir).unwrap();
1322
1323        // Then open
1324        let manager = LazyGraphManager::open(&prism_dir).unwrap();
1325        assert_eq!(manager.loaded_partition_count(), 0);
1326    }
1327
1328    #[test]
1329    fn test_partition_registry() {
1330        let registry = PartitionRegistry::new();
1331
1332        // Register some nodes (methods now take &self due to interior mutability)
1333        registry.register_loaded(
1334            "src/core",
1335            vec![
1336                "src/core/main.py:func1".to_string(),
1337                "src/core/main.py:func2".to_string(),
1338            ],
1339        );
1340
1341        assert!(registry.is_loaded("src/core"));
1342        assert!(!registry.is_loaded("src/other"));
1343        assert_eq!(
1344            registry.get_node_partition("src/core/main.py:func1"),
1345            Some("src/core".to_string())
1346        );
1347
1348        // Unregister
1349        let nodes = registry.unregister("src/core").unwrap();
1350        assert_eq!(nodes.len(), 2);
1351        assert!(!registry.is_loaded("src/core"));
1352        assert!(
1353            registry
1354                .get_node_partition("src/core/main.py:func1")
1355                .is_none()
1356        );
1357    }
1358
1359    #[test]
1360    fn test_load_unload_partition() {
1361        let temp_dir = TempDir::new().unwrap();
1362        let prism_dir = temp_dir.path().join(".codeprysm");
1363
1364        // Init manager
1365        let mut manager = LazyGraphManager::init(&prism_dir).unwrap();
1366
1367        // Create a partition with some data
1368        let partition_id = "test_partition";
1369        let db_path = manager.partition_db_path(partition_id);
1370        let conn = PartitionConnection::create(&db_path, partition_id).unwrap();
1371
1372        let node1 = create_test_node("test.py:func1", "func1", "test.py");
1373        let node2 = create_test_node("test.py:func2", "func2", "test.py");
1374        conn.insert_node(&node1).unwrap();
1375        conn.insert_node(&node2).unwrap();
1376
1377        // Register in manifest
1378        manager
1379            .manifest_mut()
1380            .set_file("test.py".to_string(), partition_id.to_string(), None);
1381        manager
1382            .manifest_mut()
1383            .register_partition(partition_id.to_string(), "test_partition.db".to_string());
1384
1385        // Load partition
1386        manager.load_partition(partition_id).unwrap();
1387        assert!(manager.is_partition_loaded(partition_id));
1388        assert_eq!(manager.graph_read().node_count(), 2);
1389
1390        // Unload partition
1391        let unloaded = manager.unload_partition(partition_id);
1392        assert_eq!(unloaded, 2);
1393        assert!(!manager.is_partition_loaded(partition_id));
1394        assert_eq!(manager.graph_read().node_count(), 0);
1395    }
1396
1397    #[test]
1398    fn test_get_node_lazy() {
1399        let temp_dir = TempDir::new().unwrap();
1400        let prism_dir = temp_dir.path().join(".codeprysm");
1401
1402        let mut manager = LazyGraphManager::init(&prism_dir).unwrap();
1403
1404        // Create partition
1405        let partition_id = "src";
1406        let db_path = manager.partition_db_path(partition_id);
1407        let conn = PartitionConnection::create(&db_path, partition_id).unwrap();
1408
1409        let node = create_test_node("src/main.py:main", "main", "src/main.py");
1410        conn.insert_node(&node).unwrap();
1411
1412        // Register in manifest
1413        manager
1414            .manifest_mut()
1415            .set_file("src/main.py".to_string(), partition_id.to_string(), None);
1416        manager
1417            .manifest_mut()
1418            .register_partition(partition_id.to_string(), "src.db".to_string());
1419
1420        // Node not loaded yet
1421        assert!(manager.get_node_if_loaded("src/main.py:main").is_none());
1422
1423        // Lazy load via get_node
1424        let node = manager.get_node("src/main.py:main").unwrap();
1425        assert!(node.is_some());
1426        assert_eq!(node.unwrap().name, "main");
1427
1428        // Now it should be loaded
1429        assert!(manager.is_partition_loaded(partition_id));
1430    }
1431
1432    #[test]
1433    fn test_stats() {
1434        let temp_dir = TempDir::new().unwrap();
1435        let prism_dir = temp_dir.path().join(".codeprysm");
1436
1437        let mut manager = LazyGraphManager::init(&prism_dir).unwrap();
1438
1439        // Add some manifest entries
1440        manager
1441            .manifest_mut()
1442            .set_file("src/a.py".to_string(), "src".to_string(), None);
1443        manager
1444            .manifest_mut()
1445            .set_file("src/b.py".to_string(), "src".to_string(), None);
1446        manager
1447            .manifest_mut()
1448            .register_partition("src".to_string(), "src.db".to_string());
1449        manager
1450            .manifest_mut()
1451            .register_partition("tests".to_string(), "tests.db".to_string());
1452
1453        let stats = manager.stats();
1454        assert_eq!(stats.loaded_partitions, 0);
1455        assert_eq!(stats.total_partitions, 2);
1456        assert_eq!(stats.total_files, 2);
1457        assert_eq!(stats.loaded_nodes, 0);
1458        // Check cache stats are included
1459        assert_eq!(stats.memory_usage_bytes, 0);
1460        assert!(stats.memory_budget_bytes > 0);
1461    }
1462
1463    #[test]
1464    fn test_cache_eviction() {
1465        let temp_dir = TempDir::new().unwrap();
1466        let prism_dir = temp_dir.path().join(".codeprysm");
1467
1468        // Create manager with a small memory budget (15 KB) to trigger eviction
1469        // Each partition with 10 nodes is ~7168 bytes (10 * 512 * 1.4)
1470        // With budget of 15KB and min_partitions=2, loading 4 partitions should
1471        // trigger eviction of at least one partition
1472        let mut manager =
1473            LazyGraphManager::init_with_memory_budget(&prism_dir, Some(15_000)).unwrap();
1474
1475        // Create four partitions with nodes
1476        for i in 1..=4 {
1477            let partition_id = format!("partition_{}", i);
1478            let db_path = manager.partition_db_path(&partition_id);
1479            let conn = PartitionConnection::create(&db_path, &partition_id).unwrap();
1480
1481            // Add enough nodes to use some memory (~7KB per partition)
1482            for j in 0..10 {
1483                let node_id = format!("p{}/file.py:func_{}", i, j);
1484                let node =
1485                    create_test_node(&node_id, &format!("func_{}", j), &format!("p{}/file.py", i));
1486                conn.insert_node(&node).unwrap();
1487            }
1488
1489            // Register in manifest
1490            manager
1491                .manifest_mut()
1492                .set_file(format!("p{}/file.py", i), partition_id.clone(), None);
1493            manager
1494                .manifest_mut()
1495                .register_partition(partition_id.clone(), format!("{}.db", partition_id));
1496        }
1497
1498        // Load first two partitions (should fit within budget)
1499        manager.load_partition("partition_1").unwrap();
1500        manager.load_partition("partition_2").unwrap();
1501        assert!(manager.is_partition_loaded("partition_1"));
1502        assert!(manager.is_partition_loaded("partition_2"));
1503
1504        // Load third partition - should trigger eviction
1505        // Current: 2 partitions (~14KB), budget 15KB, loading ~7KB more
1506        // Need to free space, but min_partitions=2 prevents eviction
1507        manager.load_partition("partition_3").unwrap();
1508        assert!(manager.is_partition_loaded("partition_3"));
1509
1510        // Load fourth partition - should definitely trigger eviction
1511        // Now we have 3 partitions, min_partitions=2, so we can evict 1
1512        manager.load_partition("partition_4").unwrap();
1513        assert!(manager.is_partition_loaded("partition_4"));
1514
1515        // Verify eviction happened
1516        let stats = manager.stats();
1517        assert!(stats.cache_evictions > 0, "Expected at least one eviction");
1518
1519        // Should have 2-3 partitions loaded (at least min_partitions kept)
1520        assert!(
1521            stats.loaded_partitions >= 2,
1522            "Should keep at least min_partitions"
1523        );
1524    }
1525
1526    #[test]
1527    fn test_cache_hit_miss_tracking() {
1528        let temp_dir = TempDir::new().unwrap();
1529        let prism_dir = temp_dir.path().join(".codeprysm");
1530
1531        let mut manager = LazyGraphManager::init(&prism_dir).unwrap();
1532
1533        // Create a partition
1534        let partition_id = "test_partition";
1535        let db_path = manager.partition_db_path(partition_id);
1536        let conn = PartitionConnection::create(&db_path, partition_id).unwrap();
1537
1538        let node = create_test_node("test.py:func", "func", "test.py");
1539        conn.insert_node(&node).unwrap();
1540
1541        manager
1542            .manifest_mut()
1543            .set_file("test.py".to_string(), partition_id.to_string(), None);
1544        manager
1545            .manifest_mut()
1546            .register_partition(partition_id.to_string(), "test_partition.db".to_string());
1547
1548        // First load should be a miss
1549        manager.load_partition(partition_id).unwrap();
1550        let metrics = manager.cache_metrics();
1551        assert_eq!(metrics.misses, 1);
1552        assert_eq!(metrics.hits, 0);
1553
1554        // Second load should be a hit
1555        manager.load_partition(partition_id).unwrap();
1556        let metrics = manager.cache_metrics();
1557        assert_eq!(metrics.hits, 1);
1558        assert_eq!(metrics.misses, 1);
1559        assert!((metrics.hit_rate() - 0.5).abs() < 0.01);
1560    }
1561
1562    #[test]
1563    fn test_init_workspace_single_git_repo() {
1564        let temp_dir = TempDir::new().unwrap();
1565        let workspace = temp_dir.path();
1566        let prism_dir = workspace.join(".codeprysm");
1567
1568        // Create a git repo with source files
1569        std::fs::create_dir(workspace.join(".git")).unwrap();
1570        std::fs::write(workspace.join("main.py"), "print('hello')").unwrap();
1571
1572        let manager = LazyGraphManager::init_workspace(workspace, &prism_dir).unwrap();
1573
1574        assert!(!manager.is_multi_root());
1575        assert_eq!(manager.manifest().root_count(), 1);
1576
1577        // Check the root was discovered correctly
1578        let roots: Vec<_> = manager.roots().collect();
1579        assert_eq!(roots.len(), 1);
1580        assert_eq!(roots[0].root_type, "git");
1581        assert_eq!(roots[0].relative_path, ".");
1582    }
1583
1584    #[test]
1585    fn test_init_workspace_multi_root() {
1586        let temp_dir = TempDir::new().unwrap();
1587        let workspace = temp_dir.path();
1588        let prism_dir = workspace.join(".codeprysm");
1589
1590        // Create two git repos
1591        let repo_a = workspace.join("repo-a");
1592        let repo_b = workspace.join("repo-b");
1593
1594        std::fs::create_dir_all(repo_a.join(".git")).unwrap();
1595        std::fs::write(repo_a.join("main.py"), "# repo a").unwrap();
1596
1597        std::fs::create_dir_all(repo_b.join(".git")).unwrap();
1598        std::fs::write(repo_b.join("main.rs"), "fn main() {}").unwrap();
1599
1600        let manager = LazyGraphManager::init_workspace(workspace, &prism_dir).unwrap();
1601
1602        assert!(manager.is_multi_root());
1603        assert_eq!(manager.manifest().root_count(), 2);
1604
1605        // Check roots were discovered
1606        let root_names: Vec<_> = manager.manifest().root_names().collect();
1607        assert!(root_names.contains(&"repo-a"));
1608        assert!(root_names.contains(&"repo-b"));
1609    }
1610
1611    #[test]
1612    fn test_init_workspace_code_directory() {
1613        let temp_dir = TempDir::new().unwrap();
1614        let workspace = temp_dir.path();
1615        let prism_dir = workspace.join(".codeprysm");
1616
1617        // Create a code directory (no .git)
1618        std::fs::write(workspace.join("script.py"), "print('hello')").unwrap();
1619
1620        let manager = LazyGraphManager::init_workspace(workspace, &prism_dir).unwrap();
1621
1622        assert!(!manager.is_multi_root());
1623        let roots: Vec<_> = manager.roots().collect();
1624        assert_eq!(roots.len(), 1);
1625        assert_eq!(roots[0].root_type, "code");
1626    }
1627
1628    #[test]
1629    fn test_root_info_from_discovered_root() {
1630        use crate::discovery::{DiscoveredRoot, RootType};
1631
1632        // Test git repo conversion
1633        let git_root = DiscoveredRoot {
1634            path: std::path::PathBuf::from("/workspace/repo"),
1635            relative_path: "repo".to_string(),
1636            root_type: RootType::GitRepository {
1637                remote: Some("https://github.com/org/repo".to_string()),
1638                branch: Some("main".to_string()),
1639                commit: Some("abc123".to_string()),
1640            },
1641            name: "repo".to_string(),
1642        };
1643
1644        let root_info = RootInfo::from_discovered_root(&git_root);
1645        assert_eq!(root_info.name, "repo");
1646        assert_eq!(root_info.root_type, "git");
1647        assert_eq!(root_info.relative_path, "repo");
1648        assert_eq!(
1649            root_info.remote_url,
1650            Some("https://github.com/org/repo".to_string())
1651        );
1652        assert_eq!(root_info.branch, Some("main".to_string()));
1653        assert_eq!(root_info.commit, Some("abc123".to_string()));
1654
1655        // Test code directory conversion
1656        let code_root = DiscoveredRoot {
1657            path: std::path::PathBuf::from("/workspace/scripts"),
1658            relative_path: "scripts".to_string(),
1659            root_type: RootType::CodeDirectory,
1660            name: "scripts".to_string(),
1661        };
1662
1663        let root_info = RootInfo::from_discovered_root(&code_root);
1664        assert_eq!(root_info.name, "scripts");
1665        assert_eq!(root_info.root_type, "code");
1666        assert!(root_info.remote_url.is_none());
1667        assert!(root_info.branch.is_none());
1668        assert!(root_info.commit.is_none());
1669    }
1670
1671    #[test]
1672    fn test_cross_partition_edges() {
1673        use crate::graph::EdgeType;
1674
1675        let temp_dir = TempDir::new().unwrap();
1676        let prism_dir = temp_dir.path().join(".codeprysm");
1677
1678        let mut manager = LazyGraphManager::init(&prism_dir).unwrap();
1679
1680        // Create partition A with a function
1681        let partition_a = "partition_a";
1682        let db_path_a = manager.partition_db_path(partition_a);
1683        let conn_a = PartitionConnection::create(&db_path_a, partition_a).unwrap();
1684        let node_a = create_test_node("a/main.py:caller", "caller", "a/main.py");
1685        conn_a.insert_node(&node_a).unwrap();
1686
1687        // Create partition B with a function that is called
1688        let partition_b = "partition_b";
1689        let db_path_b = manager.partition_db_path(partition_b);
1690        let conn_b = PartitionConnection::create(&db_path_b, partition_b).unwrap();
1691        let node_b = create_test_node("b/lib.py:helper", "helper", "b/lib.py");
1692        conn_b.insert_node(&node_b).unwrap();
1693
1694        // Register in manifest
1695        manager
1696            .manifest_mut()
1697            .set_file("a/main.py".to_string(), partition_a.to_string(), None);
1698        manager
1699            .manifest_mut()
1700            .set_file("b/lib.py".to_string(), partition_b.to_string(), None);
1701        manager
1702            .manifest_mut()
1703            .register_partition(partition_a.to_string(), "partition_a.db".to_string());
1704        manager
1705            .manifest_mut()
1706            .register_partition(partition_b.to_string(), "partition_b.db".to_string());
1707
1708        // Add a cross-partition edge: a/main.py:caller -> b/lib.py:helper
1709        let cross_ref = CrossRef::new(
1710            "a/main.py:caller".to_string(),
1711            partition_a.to_string(),
1712            "b/lib.py:helper".to_string(),
1713            partition_b.to_string(),
1714            EdgeType::Uses,
1715            Some(10),
1716            Some("helper".to_string()),
1717        );
1718        manager.add_cross_ref(cross_ref);
1719
1720        // Verify cross-ref was added
1721        assert_eq!(manager.cross_ref_count(), 1);
1722        assert_eq!(manager.stats().cross_partition_edges, 1);
1723
1724        // Get outgoing edges from caller - should include cross-partition edge to helper
1725        let outgoing = manager.get_outgoing_edges("a/main.py:caller").unwrap();
1726        assert_eq!(outgoing.len(), 1);
1727        assert_eq!(outgoing[0].0.id, "b/lib.py:helper");
1728        assert_eq!(outgoing[0].1.edge_type, EdgeType::Uses);
1729        assert_eq!(outgoing[0].1.ref_line, Some(10));
1730        assert_eq!(outgoing[0].1.ident, Some("helper".to_string()));
1731
1732        // Verify both partitions are now loaded (cross-ref triggered load)
1733        assert!(manager.is_partition_loaded(partition_a));
1734        assert!(manager.is_partition_loaded(partition_b));
1735
1736        // Get incoming edges to helper - should include cross-partition edge from caller
1737        let incoming = manager.get_incoming_edges("b/lib.py:helper").unwrap();
1738        assert_eq!(incoming.len(), 1);
1739        assert_eq!(incoming[0].0.id, "a/main.py:caller");
1740    }
1741
1742    #[test]
1743    fn test_cross_refs_persistence() {
1744        use crate::graph::EdgeType;
1745
1746        let temp_dir = TempDir::new().unwrap();
1747        let prism_dir = temp_dir.path().join(".codeprysm");
1748
1749        // Create manager and add cross-refs
1750        {
1751            let mut manager = LazyGraphManager::init(&prism_dir).unwrap();
1752
1753            manager.add_cross_ref(CrossRef::new(
1754                "a:x".to_string(),
1755                "p1".to_string(),
1756                "b:y".to_string(),
1757                "p2".to_string(),
1758                EdgeType::Uses,
1759                Some(5),
1760                None,
1761            ));
1762            manager.add_cross_ref(CrossRef::new(
1763                "c:z".to_string(),
1764                "p1".to_string(),
1765                "d:w".to_string(),
1766                "p2".to_string(),
1767                EdgeType::Defines,
1768                None,
1769                Some("w".to_string()),
1770            ));
1771
1772            assert_eq!(manager.cross_ref_count(), 2);
1773
1774            // Save cross-refs to SQLite
1775            manager.save_cross_refs().unwrap();
1776        }
1777
1778        // Open manager again - should load cross-refs
1779        {
1780            let manager = LazyGraphManager::open(&prism_dir).unwrap();
1781            assert_eq!(manager.cross_ref_count(), 2);
1782
1783            // Verify data integrity
1784            let refs_to_b = manager.get_cross_refs_by_target("b:y").unwrap();
1785            assert_eq!(refs_to_b.len(), 1);
1786            assert_eq!(refs_to_b[0].source_id, "a:x");
1787            assert_eq!(refs_to_b[0].edge_type, EdgeType::Uses);
1788            assert_eq!(refs_to_b[0].ref_line, Some(5));
1789        }
1790    }
1791
1792    #[test]
1793    fn test_remove_cross_refs_by_partition() {
1794        use crate::graph::EdgeType;
1795
1796        let temp_dir = TempDir::new().unwrap();
1797        let prism_dir = temp_dir.path().join(".codeprysm");
1798
1799        let mut manager = LazyGraphManager::init(&prism_dir).unwrap();
1800
1801        // Add cross-refs from different partitions
1802        manager.add_cross_ref(CrossRef::new(
1803            "a:x".to_string(),
1804            "p1".to_string(),
1805            "b:y".to_string(),
1806            "p2".to_string(),
1807            EdgeType::Uses,
1808            None,
1809            None,
1810        ));
1811        manager.add_cross_ref(CrossRef::new(
1812            "c:z".to_string(),
1813            "p2".to_string(),
1814            "d:w".to_string(),
1815            "p3".to_string(),
1816            EdgeType::Uses,
1817            None,
1818            None,
1819        ));
1820        manager.add_cross_ref(CrossRef::new(
1821            "e:v".to_string(),
1822            "p3".to_string(),
1823            "f:u".to_string(),
1824            "p4".to_string(),
1825            EdgeType::Uses,
1826            None,
1827            None,
1828        ));
1829
1830        assert_eq!(manager.cross_ref_count(), 3);
1831
1832        // Remove all cross-refs involving p2 (should remove first two)
1833        manager.remove_cross_refs_by_partition("p2");
1834
1835        assert_eq!(manager.cross_ref_count(), 1);
1836
1837        // Remaining should be p3 -> p4
1838        let remaining = manager.get_cross_refs_by_source("e:v").unwrap();
1839        assert_eq!(remaining.len(), 1);
1840        assert_eq!(remaining[0].target_id, "f:u");
1841    }
1842}