1use std::collections::HashMap;
8use std::path::{Path, PathBuf};
9
10use thiserror::Error;
11use tracing::{debug, info, warn};
12
13use crate::builder::{BuilderConfig, GraphBuilder};
14use crate::graph::PetCodeGraph;
15use crate::lazy::manager::LazyGraphManager;
16use crate::lazy::partitioner::GraphPartitioner;
17use crate::merkle::{ChangeSet, ExclusionFilter, MerkleTree, MerkleTreeManager};
18
19#[derive(Debug, Error)]
25pub enum UpdaterError {
26 #[error("IO error: {0}")]
28 Io(#[from] std::io::Error),
29
30 #[error("Graph error: {0}")]
32 LazyGraph(#[from] crate::lazy::manager::LazyGraphError),
33
34 #[error("Partition error: {0}")]
36 Partition(#[from] crate::lazy::partitioner::PartitionerError),
37
38 #[error("Repository not found: {0}")]
40 RepoNotFound(PathBuf),
41
42 #[error("Queries directory not found: {0}")]
44 QueriesNotFound(PathBuf),
45
46 #[error("Builder error: {0}")]
48 Builder(#[from] crate::builder::BuilderError),
49
50 #[error("Merkle tree error: {0}")]
52 Merkle(#[from] crate::merkle::MerkleError),
53}
54
55pub type Result<T> = std::result::Result<T, UpdaterError>;
57
58pub struct IncrementalUpdater {
86 repo_path: PathBuf,
88 prism_dir: PathBuf,
90 queries_dir: Option<PathBuf>,
92 builder_config: BuilderConfig,
94 merkle_manager: MerkleTreeManager,
96 graph: Option<PetCodeGraph>,
98 current_merkle_tree: MerkleTree,
100}
101
102impl IncrementalUpdater {
103 pub fn new_with_embedded_queries(repo_path: &Path, prism_dir: &Path) -> Result<Self> {
117 if !repo_path.exists() {
118 return Err(UpdaterError::RepoNotFound(repo_path.to_path_buf()));
119 }
120
121 let merkle_manager = MerkleTreeManager::default();
122
123 Ok(Self {
124 repo_path: repo_path.to_path_buf(),
125 prism_dir: prism_dir.to_path_buf(),
126 queries_dir: None,
127 builder_config: BuilderConfig::default(),
128 merkle_manager,
129 graph: None,
130 current_merkle_tree: HashMap::new(),
131 })
132 }
133
134 pub fn with_embedded_queries(
136 repo_path: &Path,
137 prism_dir: &Path,
138 exclusion_filter: ExclusionFilter,
139 builder_config: BuilderConfig,
140 ) -> Result<Self> {
141 if !repo_path.exists() {
142 return Err(UpdaterError::RepoNotFound(repo_path.to_path_buf()));
143 }
144
145 let merkle_manager = MerkleTreeManager::new(exclusion_filter);
146
147 Ok(Self {
148 repo_path: repo_path.to_path_buf(),
149 prism_dir: prism_dir.to_path_buf(),
150 queries_dir: None,
151 builder_config,
152 merkle_manager,
153 graph: None,
154 current_merkle_tree: HashMap::new(),
155 })
156 }
157
158 pub fn new(repo_path: &Path, prism_dir: &Path, queries_dir: &Path) -> Result<Self> {
170 if !repo_path.exists() {
172 return Err(UpdaterError::RepoNotFound(repo_path.to_path_buf()));
173 }
174 if !queries_dir.exists() {
175 return Err(UpdaterError::QueriesNotFound(queries_dir.to_path_buf()));
176 }
177
178 let merkle_manager = MerkleTreeManager::default();
179
180 Ok(Self {
181 repo_path: repo_path.to_path_buf(),
182 prism_dir: prism_dir.to_path_buf(),
183 queries_dir: Some(queries_dir.to_path_buf()),
184 builder_config: BuilderConfig::default(),
185 merkle_manager,
186 graph: None,
187 current_merkle_tree: HashMap::new(),
188 })
189 }
190
191 pub fn with_config(
193 repo_path: &Path,
194 prism_dir: &Path,
195 queries_dir: &Path,
196 exclusion_filter: ExclusionFilter,
197 builder_config: BuilderConfig,
198 ) -> Result<Self> {
199 if !repo_path.exists() {
200 return Err(UpdaterError::RepoNotFound(repo_path.to_path_buf()));
201 }
202 if !queries_dir.exists() {
203 return Err(UpdaterError::QueriesNotFound(queries_dir.to_path_buf()));
204 }
205
206 let merkle_manager = MerkleTreeManager::new(exclusion_filter);
207
208 Ok(Self {
209 repo_path: repo_path.to_path_buf(),
210 prism_dir: prism_dir.to_path_buf(),
211 queries_dir: Some(queries_dir.to_path_buf()),
212 builder_config,
213 merkle_manager,
214 graph: None,
215 current_merkle_tree: HashMap::new(),
216 })
217 }
218
219 pub fn load_graph_state(&mut self) -> Result<bool> {
225 let manifest_path = self.prism_dir.join("manifest.json");
226 if !manifest_path.exists() {
227 info!("Partitioned graph not found: {:?}", self.prism_dir);
228 return Ok(false);
229 }
230
231 info!("Loading graph from {:?}", self.prism_dir);
232
233 let manager = LazyGraphManager::open(&self.prism_dir)?;
235 manager.load_all_partitions()?;
236
237 let graph = manager.graph_read().clone();
239
240 info!(
241 "Loaded graph: {} nodes, {} edges",
242 graph.node_count(),
243 graph.edge_count()
244 );
245
246 self.current_merkle_tree = self.extract_merkle_tree_from_graph(&graph);
248 info!(
249 "Extracted Merkle tree: {} files",
250 self.current_merkle_tree.len()
251 );
252
253 self.graph = Some(graph);
254 Ok(true)
255 }
256
257 fn extract_merkle_tree_from_graph(&self, graph: &PetCodeGraph) -> MerkleTree {
260 let mut merkle_tree = HashMap::new();
261
262 for node in graph.iter_nodes().filter(|n| n.is_file()) {
263 if let Some(hash) = &node.hash {
264 merkle_tree.insert(node.file.clone(), hash.clone());
265 }
266 }
267
268 merkle_tree
269 }
270
271 pub fn detect_repository_changes(&mut self) -> Result<ChangeSet> {
276 info!("Detecting repository changes...");
277
278 let new_merkle_tree = self.merkle_manager.build_merkle_tree(&self.repo_path)?;
280
281 let changes = self
283 .merkle_manager
284 .detect_changes(&self.current_merkle_tree, &new_merkle_tree);
285
286 self.current_merkle_tree = new_merkle_tree;
288
289 Ok(changes)
290 }
291
292 pub fn update_repository(&mut self, force_rebuild: bool) -> Result<UpdateResult> {
302 if force_rebuild {
303 info!("Performing force rebuild...");
304 return self.full_rebuild();
305 }
306
307 if !self.load_graph_state()? {
309 info!("No existing graph found, performing initial build...");
310 return self.full_rebuild();
311 }
312
313 let changes = self.detect_repository_changes()?;
315
316 if !changes.has_changes() {
317 info!("No changes detected, graph is up to date");
318 return Ok(UpdateResult {
319 success: true,
320 changes,
321 was_full_rebuild: false,
322 });
323 }
324
325 info!("Processing {} changed files...", changes.total_changes());
326
327 self.process_changes(&changes)?;
329
330 self.save_graph()?;
332
333 info!("Incremental update completed successfully");
334
335 Ok(UpdateResult {
336 success: true,
337 changes,
338 was_full_rebuild: false,
339 })
340 }
341
342 fn process_changes(&mut self, changes: &ChangeSet) -> Result<()> {
344 let start = std::time::Instant::now();
345
346 if !changes.deleted.is_empty() {
348 self.process_deleted_files(&changes.deleted);
349 }
350
351 if !changes.modified.is_empty() {
353 self.process_modified_files(&changes.modified);
354 }
355
356 let files_to_reparse: Vec<String> = changes
358 .modified
359 .iter()
360 .chain(changes.added.iter())
361 .cloned()
362 .collect();
363
364 if !files_to_reparse.is_empty() {
365 self.reparse_files(&files_to_reparse)?;
366 }
367
368 let elapsed = start.elapsed();
369 info!(
370 "Change processing completed in {:.2}s",
371 elapsed.as_secs_f64()
372 );
373
374 Ok(())
375 }
376
377 fn process_deleted_files(&mut self, deleted_files: &[String]) {
379 info!("Processing {} deleted files...", deleted_files.len());
380
381 let graph = self
382 .graph
383 .as_mut()
384 .expect("Graph must be loaded before processing changes");
385
386 for file_path in deleted_files {
387 graph.remove_file_nodes(file_path);
388 debug!("Removed nodes for deleted file: {}", file_path);
389 }
390 }
391
392 fn process_modified_files(&mut self, modified_files: &[String]) {
394 let graph = self
395 .graph
396 .as_mut()
397 .expect("Graph must be loaded before processing changes");
398
399 for file_path in modified_files {
400 graph.remove_file_nodes(file_path);
401 debug!("Removed nodes for modified file: {}", file_path);
402 }
403 }
404
405 fn reparse_files(&mut self, file_paths: &[String]) -> Result<()> {
407 info!("Reparsing {} files...", file_paths.len());
408
409 let mut builder = match &self.queries_dir {
411 Some(dir) => GraphBuilder::with_config(dir, self.builder_config.clone())?,
412 None => GraphBuilder::with_embedded_queries(self.builder_config.clone()),
413 };
414
415 let mut file_graphs = Vec::new();
417
418 for rel_path in file_paths {
419 let abs_path = self.repo_path.join(rel_path);
420
421 if !abs_path.exists() {
422 warn!("File not found during reparse: {}", rel_path);
423 continue;
424 }
425
426 match builder.parse_file(&abs_path, rel_path) {
428 Ok(file_graph) => {
429 file_graphs.push((rel_path.clone(), file_graph));
430 }
431 Err(e) => {
432 warn!("Error reparsing {}: {}", rel_path, e);
433 }
434 }
435 }
436
437 let graph = self
439 .graph
440 .as_mut()
441 .expect("Graph must be loaded before processing changes");
442
443 for (rel_path, file_graph) in file_graphs {
444 Self::merge_file_graph(graph, file_graph);
445 debug!("Reparsed file: {}", rel_path);
446 }
447
448 Ok(())
449 }
450
451 fn merge_file_graph(main_graph: &mut PetCodeGraph, file_graph: PetCodeGraph) {
453 for node in file_graph.iter_nodes() {
455 if !main_graph.contains_node(&node.id) {
456 main_graph.add_node(node.clone());
457 }
458 }
459
460 for edge in file_graph.iter_edges() {
462 main_graph.add_edge_from_struct(&edge);
463 }
464 }
465
466 fn save_graph(&self) -> Result<()> {
468 let graph = self.graph.as_ref().expect("Graph must exist to save");
469
470 info!("Saving graph to {:?}", self.prism_dir);
471
472 let root_name = self
474 .repo_path
475 .file_name()
476 .map(|s| s.to_string_lossy().to_string())
477 .unwrap_or_else(|| "default".to_string());
478
479 let (_, stats) =
481 GraphPartitioner::partition_with_stats(graph, &self.prism_dir, Some(&root_name))?;
482
483 info!(
484 "Saved graph: {} nodes, {} partitions, {} cross-partition edges",
485 stats.total_nodes, stats.partition_count, stats.cross_partition_edges
486 );
487
488 Ok(())
489 }
490
491 fn full_rebuild(&mut self) -> Result<UpdateResult> {
493 info!("Performing full rebuild...");
494
495 let merkle_tree = self.merkle_manager.build_merkle_tree(&self.repo_path)?;
497
498 let mut builder = match &self.queries_dir {
500 Some(dir) => GraphBuilder::with_config(dir, self.builder_config.clone())?,
501 None => GraphBuilder::with_embedded_queries(self.builder_config.clone()),
502 };
503 let mut graph = builder.build_from_directory(&self.repo_path)?;
504
505 let file_nodes: Vec<(String, String)> = graph
508 .iter_nodes()
509 .filter(|n| n.is_file())
510 .map(|n| (n.id.clone(), n.file.clone()))
511 .collect();
512
513 for (node_id, file_path) in file_nodes {
514 if let Some(hash) = merkle_tree.get(&file_path) {
515 if let Some(node_mut) = graph.get_node_mut(&node_id) {
516 node_mut.hash = Some(hash.clone());
517 }
518 }
519 }
520
521 self.graph = Some(graph);
523 self.current_merkle_tree = merkle_tree.clone();
524
525 self.save_graph()?;
527
528 let changes = ChangeSet {
530 added: merkle_tree.keys().cloned().collect(),
531 modified: vec![],
532 deleted: vec![],
533 };
534
535 info!("Full rebuild completed");
536
537 Ok(UpdateResult {
538 success: true,
539 changes,
540 was_full_rebuild: true,
541 })
542 }
543
544 pub fn graph(&self) -> Option<&PetCodeGraph> {
546 self.graph.as_ref()
547 }
548
549 pub fn graph_mut(&mut self) -> Option<&mut PetCodeGraph> {
551 self.graph.as_mut()
552 }
553
554 pub fn merkle_tree(&self) -> &MerkleTree {
556 &self.current_merkle_tree
557 }
558}
559
560#[derive(Debug, Clone)]
566pub struct UpdateResult {
567 pub success: bool,
569 pub changes: ChangeSet,
571 pub was_full_rebuild: bool,
573}
574
575impl UpdateResult {
576 pub fn has_changes(&self) -> bool {
578 self.changes.has_changes() || self.was_full_rebuild
579 }
580}
581
582#[cfg(test)]
587mod tests {
588 use super::*;
589 use std::fs::File;
590 use std::io::Write;
591 use tempfile::TempDir;
592
593 fn setup_test_repo() -> (TempDir, PathBuf) {
594 let temp_dir = TempDir::new().unwrap();
595 let repo_path = temp_dir.path().to_path_buf();
596
597 let py_file = repo_path.join("test.py");
599 let mut file = File::create(&py_file).unwrap();
600 writeln!(file, "def hello():").unwrap();
601 writeln!(file, " print('Hello, World!')").unwrap();
602
603 (temp_dir, repo_path)
604 }
605
606 fn get_queries_dir() -> PathBuf {
607 let manifest_dir = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
609 let queries_dir = manifest_dir.join("queries");
610
611 if queries_dir.exists() {
612 queries_dir
613 } else {
614 manifest_dir
616 .parent()
617 .unwrap()
618 .parent()
619 .unwrap()
620 .join("src")
621 .join("queries")
622 }
623 }
624
625 #[test]
626 fn test_updater_creation() {
627 let (_temp_dir, repo_path) = setup_test_repo();
628 let prism_dir = repo_path.join(".codeprysm");
629 std::fs::create_dir_all(&prism_dir).unwrap();
630 let queries_dir = get_queries_dir();
631
632 if !queries_dir.exists() {
633 return;
635 }
636
637 let result = IncrementalUpdater::new(&repo_path, &prism_dir, &queries_dir);
638 assert!(result.is_ok());
639 }
640
641 #[test]
642 fn test_updater_missing_repo() {
643 let prism_dir = PathBuf::from("/tmp/.codeprysm");
644 let queries_dir = get_queries_dir();
645
646 if !queries_dir.exists() {
647 return;
648 }
649
650 let result =
651 IncrementalUpdater::new(Path::new("/nonexistent/repo"), &prism_dir, &queries_dir);
652
653 assert!(matches!(result, Err(UpdaterError::RepoNotFound(_))));
654 }
655
656 #[test]
657 fn test_extract_merkle_tree_from_graph() {
658 let (_temp_dir, repo_path) = setup_test_repo();
659 let prism_dir = repo_path.join(".codeprysm");
660 std::fs::create_dir_all(&prism_dir).unwrap();
661 let queries_dir = get_queries_dir();
662
663 if !queries_dir.exists() {
664 return;
665 }
666
667 let updater = IncrementalUpdater::new(&repo_path, &prism_dir, &queries_dir).unwrap();
668
669 let mut graph = PetCodeGraph::new();
671 graph.add_node(crate::graph::Node::source_file(
672 "test.py".to_string(),
673 "test.py".to_string(),
674 "abc123".to_string(),
675 100,
676 ));
677 graph.add_node(crate::graph::Node::source_file(
678 "main.py".to_string(),
679 "main.py".to_string(),
680 "def456".to_string(),
681 100,
682 ));
683
684 let merkle_tree = updater.extract_merkle_tree_from_graph(&graph);
685
686 assert_eq!(merkle_tree.len(), 2);
687 assert_eq!(merkle_tree.get("test.py"), Some(&"abc123".to_string()));
688 assert_eq!(merkle_tree.get("main.py"), Some(&"def456".to_string()));
689 }
690
691 #[test]
692 fn test_update_result() {
693 let result = UpdateResult {
694 success: true,
695 changes: ChangeSet {
696 modified: vec!["a.py".to_string()],
697 added: vec![],
698 deleted: vec![],
699 },
700 was_full_rebuild: false,
701 };
702
703 assert!(result.has_changes());
704
705 let result_no_changes = UpdateResult {
706 success: true,
707 changes: ChangeSet::new(),
708 was_full_rebuild: false,
709 };
710
711 assert!(!result_no_changes.has_changes());
712
713 let result_full_rebuild = UpdateResult {
714 success: true,
715 changes: ChangeSet::new(),
716 was_full_rebuild: true,
717 };
718
719 assert!(result_full_rebuild.has_changes());
720 }
721}