1use crate::graph::{Edge, Node, PetCodeGraph};
29use crate::lazy::cross_refs::{CrossRef, CrossRefIndex, CrossRefStore};
30use crate::lazy::manager::{LazyGraphManager, Manifest, RootInfo};
31use crate::lazy::partition::PartitionConnection;
32use std::collections::{HashMap, HashSet};
33use std::path::Path;
34use thiserror::Error;
35
36#[derive(Debug, Error)]
38pub enum PartitionerError {
39 #[error("Partition error: {0}")]
40 Partition(#[from] crate::lazy::partition::PartitionError),
41
42 #[error("Cross-ref error: {0}")]
43 CrossRef(#[from] crate::lazy::cross_refs::CrossRefError),
44
45 #[error("IO error: {0}")]
46 Io(#[from] std::io::Error),
47
48 #[error("JSON error: {0}")]
49 Json(#[from] serde_json::Error),
50
51 #[error("Manifest error: {0}")]
52 Manifest(#[from] crate::lazy::manager::LazyGraphError),
53}
54
55#[derive(Debug, Clone)]
57pub struct PartitioningStats {
58 pub total_nodes: usize,
60 pub total_edges: usize,
62 pub partition_count: usize,
64 pub cross_partition_edges: usize,
66 pub intra_partition_edges: usize,
68}
69
70pub struct GraphPartitioner;
74
75impl GraphPartitioner {
76 pub fn partition(
86 graph: &PetCodeGraph,
87 prism_dir: &Path,
88 root_name: Option<&str>,
89 ) -> Result<Manifest, PartitionerError> {
90 let (manifest, _stats) = Self::partition_with_stats(graph, prism_dir, root_name)?;
91 Ok(manifest)
92 }
93
94 pub fn partition_with_stats(
98 graph: &PetCodeGraph,
99 prism_dir: &Path,
100 root_name: Option<&str>,
101 ) -> Result<(Manifest, PartitioningStats), PartitionerError> {
102 let root = root_name.unwrap_or("default");
103 let partitions_dir = prism_dir.join("partitions");
104
105 std::fs::create_dir_all(&partitions_dir)?;
107
108 let node_partitions = Self::group_nodes_by_partition(graph, root);
110
111 let node_to_partition: HashMap<String, String> = graph
113 .iter_nodes()
114 .map(|node| {
115 let partition_id =
116 LazyGraphManager::compute_partition_id_for_root(root, &node.file);
117 (node.id.clone(), partition_id)
118 })
119 .collect();
120
121 let (intra_edges, cross_refs) = Self::classify_edges(graph, &node_to_partition);
123
124 let mut manifest = Manifest::new();
126 let mut partition_filenames: HashMap<String, String> = HashMap::new();
127
128 for (partition_id, nodes) in &node_partitions {
129 let safe_name = partition_id.replace(['/', '\\', ':'], "_");
131 let filename = format!("{}.db", safe_name);
132 let db_path = partitions_dir.join(&filename);
133
134 let conn = PartitionConnection::create(&db_path, partition_id)?;
136
137 let node_vec: Vec<Node> = nodes.to_vec();
139 conn.insert_nodes(&node_vec)?;
140
141 if let Some(edges) = intra_edges.get(partition_id) {
143 let edge_vec: Vec<Edge> = edges.to_vec();
144 conn.insert_edges(&edge_vec)?;
145 }
146
147 partition_filenames.insert(partition_id.clone(), filename.clone());
149 manifest.register_partition(partition_id.clone(), filename);
150
151 for node in nodes {
153 if !node.file.is_empty() {
154 manifest.set_file(node.file.clone(), partition_id.clone(), node.hash.clone());
155 }
156 }
157 }
158
159 let cross_refs_path = prism_dir.join("cross_refs.db");
161 let cross_ref_store = CrossRefStore::create(&cross_refs_path)?;
162 let cross_ref_vec: Vec<CrossRef> = cross_refs.iter().cloned().collect();
163 cross_ref_store.add_refs(&cross_ref_vec)?;
164
165 manifest.register_root(RootInfo {
167 name: root.to_string(),
168 root_type: "code".to_string(), relative_path: ".".to_string(),
170 remote_url: None,
171 branch: None,
172 commit: None,
173 });
174
175 let manifest_path = prism_dir.join("manifest.json");
177 manifest.save(&manifest_path)?;
178
179 let stats = PartitioningStats {
181 total_nodes: graph.node_count(),
182 total_edges: graph.edge_count(),
183 partition_count: node_partitions.len(),
184 cross_partition_edges: cross_refs.len(),
185 intra_partition_edges: intra_edges.values().map(|v| v.len()).sum(),
186 };
187
188 Ok((manifest, stats))
189 }
190
191 pub fn partition_with_root_info(
196 graph: &PetCodeGraph,
197 prism_dir: &Path,
198 root_info: RootInfo,
199 ) -> Result<(Manifest, PartitioningStats), PartitionerError> {
200 let root = &root_info.name;
201 let partitions_dir = prism_dir.join("partitions");
202
203 std::fs::create_dir_all(&partitions_dir)?;
205
206 let node_partitions = Self::group_nodes_by_partition(graph, root);
208
209 let node_to_partition: HashMap<String, String> = graph
211 .iter_nodes()
212 .map(|node| {
213 let partition_id =
214 LazyGraphManager::compute_partition_id_for_root(root, &node.file);
215 (node.id.clone(), partition_id)
216 })
217 .collect();
218
219 let (intra_edges, cross_refs) = Self::classify_edges(graph, &node_to_partition);
221
222 let mut manifest = Manifest::new();
224
225 for (partition_id, nodes) in &node_partitions {
226 let safe_name = partition_id.replace(['/', '\\', ':'], "_");
227 let filename = format!("{}.db", safe_name);
228 let db_path = partitions_dir.join(&filename);
229
230 let conn = PartitionConnection::create(&db_path, partition_id)?;
231
232 let node_vec: Vec<Node> = nodes.to_vec();
233 conn.insert_nodes(&node_vec)?;
234
235 if let Some(edges) = intra_edges.get(partition_id) {
236 let edge_vec: Vec<Edge> = edges.to_vec();
237 conn.insert_edges(&edge_vec)?;
238 }
239
240 manifest.register_partition(partition_id.clone(), filename);
241
242 for node in nodes {
243 if !node.file.is_empty() {
244 manifest.set_file(node.file.clone(), partition_id.clone(), node.hash.clone());
245 }
246 }
247 }
248
249 let cross_refs_path = prism_dir.join("cross_refs.db");
251 let cross_ref_store = CrossRefStore::create(&cross_refs_path)?;
252 let cross_ref_vec: Vec<CrossRef> = cross_refs.iter().cloned().collect();
253 cross_ref_store.add_refs(&cross_ref_vec)?;
254
255 manifest.register_root(root_info);
257
258 let manifest_path = prism_dir.join("manifest.json");
260 manifest.save(&manifest_path)?;
261
262 let stats = PartitioningStats {
263 total_nodes: graph.node_count(),
264 total_edges: graph.edge_count(),
265 partition_count: node_partitions.len(),
266 cross_partition_edges: cross_refs.len(),
267 intra_partition_edges: intra_edges.values().map(|v| v.len()).sum(),
268 };
269
270 Ok((manifest, stats))
271 }
272
273 fn group_nodes_by_partition(
277 graph: &PetCodeGraph,
278 root_name: &str,
279 ) -> HashMap<String, Vec<Node>> {
280 let mut partitions: HashMap<String, Vec<Node>> = HashMap::new();
281
282 for node in graph.iter_nodes() {
283 let partition_id =
284 LazyGraphManager::compute_partition_id_for_root(root_name, &node.file);
285 partitions
286 .entry(partition_id)
287 .or_default()
288 .push(node.clone());
289 }
290
291 partitions
292 }
293
294 fn classify_edges(
300 graph: &PetCodeGraph,
301 node_to_partition: &HashMap<String, String>,
302 ) -> (HashMap<String, Vec<Edge>>, CrossRefIndex) {
303 let mut intra_edges: HashMap<String, Vec<Edge>> = HashMap::new();
304 let mut cross_refs = CrossRefIndex::new();
305
306 for edge in graph.iter_edges() {
307 let source_partition = node_to_partition.get(&edge.source);
308 let target_partition = node_to_partition.get(&edge.target);
309
310 match (source_partition, target_partition) {
311 (Some(src_part), Some(tgt_part)) if src_part == tgt_part => {
312 intra_edges.entry(src_part.clone()).or_default().push(edge);
314 }
315 (Some(src_part), Some(tgt_part)) => {
316 cross_refs.add(CrossRef::new(
318 edge.source.clone(),
319 src_part.clone(),
320 edge.target.clone(),
321 tgt_part.clone(),
322 edge.edge_type,
323 edge.ref_line,
324 edge.ident.clone(),
325 ));
326 }
327 _ => {
328 }
332 }
333 }
334
335 (intra_edges, cross_refs)
336 }
337
338 pub fn update_partition(
348 graph: &PetCodeGraph,
349 prism_dir: &Path,
350 partition_id: &str,
351 root_name: &str,
352 ) -> Result<(), PartitionerError> {
353 let partitions_dir = prism_dir.join("partitions");
354 let safe_name = partition_id.replace(['/', '\\', ':'], "_");
355 let db_path = partitions_dir.join(format!("{}.db", safe_name));
356
357 let conn = if db_path.exists() {
359 PartitionConnection::open(&db_path, partition_id)?
360 } else {
361 std::fs::create_dir_all(&partitions_dir)?;
362 PartitionConnection::create(&db_path, partition_id)?
363 };
364
365 conn.clear()?;
367
368 let node_to_partition: HashMap<String, String> = graph
370 .iter_nodes()
371 .map(|node| {
372 let pid = LazyGraphManager::compute_partition_id_for_root(root_name, &node.file);
373 (node.id.clone(), pid)
374 })
375 .collect();
376
377 let nodes: Vec<Node> = graph
379 .iter_nodes()
380 .filter(|node| {
381 let pid = LazyGraphManager::compute_partition_id_for_root(root_name, &node.file);
382 pid == partition_id
383 })
384 .cloned()
385 .collect();
386
387 conn.insert_nodes(&nodes)?;
388
389 let (intra_edges, _) = Self::classify_edges(graph, &node_to_partition);
391 if let Some(edges) = intra_edges.get(partition_id) {
392 conn.insert_edges(edges)?;
393 }
394
395 Ok(())
396 }
397
398 pub fn get_unique_files(graph: &PetCodeGraph) -> HashSet<String> {
402 graph
403 .iter_nodes()
404 .filter(|n| !n.file.is_empty())
405 .map(|n| n.file.clone())
406 .collect()
407 }
408}
409
410#[cfg(test)]
411mod tests {
412 use super::*;
413 use crate::graph::{CallableKind, EdgeData, Node};
414 use tempfile::TempDir;
415
416 fn create_test_node(id: &str, name: &str, file: &str) -> Node {
417 Node::callable(
418 id.to_string(),
419 name.to_string(),
420 CallableKind::Function,
421 file.to_string(),
422 1,
423 10,
424 )
425 }
426
427 fn create_test_graph() -> PetCodeGraph {
428 let mut graph = PetCodeGraph::new();
429
430 graph.add_node(Node::source_file(
432 "src/core/main.py".to_string(),
433 "src/core/main.py".to_string(),
434 "abc123".to_string(),
435 100,
436 ));
437 graph.add_node(Node::source_file(
438 "src/utils/helper.py".to_string(),
439 "src/utils/helper.py".to_string(),
440 "def456".to_string(),
441 50,
442 ));
443
444 graph.add_node(create_test_node(
446 "src/core/main.py:main",
447 "main",
448 "src/core/main.py",
449 ));
450 graph.add_node(create_test_node(
451 "src/core/main.py:process",
452 "process",
453 "src/core/main.py",
454 ));
455 graph.add_node(create_test_node(
456 "src/utils/helper.py:helper",
457 "helper",
458 "src/utils/helper.py",
459 ));
460
461 graph.add_edge(
464 "src/core/main.py",
465 "src/core/main.py:main",
466 EdgeData::contains(),
467 );
468 graph.add_edge(
469 "src/core/main.py",
470 "src/core/main.py:process",
471 EdgeData::contains(),
472 );
473 graph.add_edge(
474 "src/utils/helper.py",
475 "src/utils/helper.py:helper",
476 EdgeData::contains(),
477 );
478
479 graph.add_edge(
481 "src/core/main.py:main",
482 "src/core/main.py:process",
483 EdgeData::uses(Some(5), Some("process".to_string())),
484 );
485
486 graph.add_edge(
488 "src/core/main.py:main",
489 "src/utils/helper.py:helper",
490 EdgeData::uses(Some(10), Some("helper".to_string())),
491 );
492
493 graph
494 }
495
496 #[test]
497 fn test_partition_basic() {
498 let temp_dir = TempDir::new().unwrap();
499 let prism_dir = temp_dir.path().join(".codeprysm");
500
501 let graph = create_test_graph();
502 let manifest = GraphPartitioner::partition(&graph, &prism_dir, Some("myrepo")).unwrap();
503
504 assert!(!manifest.partitions.is_empty());
506
507 assert!(
509 manifest
510 .get_partition_for_file("src/core/main.py")
511 .is_some()
512 );
513 assert!(
514 manifest
515 .get_partition_for_file("src/utils/helper.py")
516 .is_some()
517 );
518
519 assert!(prism_dir.join("partitions").exists());
521
522 assert!(prism_dir.join("cross_refs.db").exists());
524
525 assert!(prism_dir.join("manifest.json").exists());
527 }
528
529 #[test]
530 fn test_partition_with_stats() {
531 let temp_dir = TempDir::new().unwrap();
532 let prism_dir = temp_dir.path().join(".codeprysm");
533
534 let graph = create_test_graph();
535 let (manifest, stats) =
536 GraphPartitioner::partition_with_stats(&graph, &prism_dir, Some("myrepo")).unwrap();
537
538 assert_eq!(stats.total_nodes, 5);
540 assert_eq!(stats.total_edges, 5);
541 assert_eq!(stats.partition_count, 2); assert_eq!(stats.cross_partition_edges, 1);
545
546 assert_eq!(stats.intra_partition_edges, 4);
548
549 assert_eq!(manifest.partitions.len(), 2);
551 }
552
553 #[test]
554 fn test_partition_creates_correct_files() {
555 let temp_dir = TempDir::new().unwrap();
556 let prism_dir = temp_dir.path().join(".codeprysm");
557
558 let graph = create_test_graph();
559 GraphPartitioner::partition(&graph, &prism_dir, Some("myrepo")).unwrap();
560
561 let partitions_dir = prism_dir.join("partitions");
563 let entries: Vec<_> = std::fs::read_dir(&partitions_dir)
564 .unwrap()
565 .filter_map(|e| e.ok())
566 .collect();
567
568 assert_eq!(entries.len(), 2);
570 }
571
572 #[test]
573 fn test_partition_roundtrip() {
574 let temp_dir = TempDir::new().unwrap();
575 let prism_dir = temp_dir.path().join(".codeprysm");
576
577 let graph = create_test_graph();
578 GraphPartitioner::partition(&graph, &prism_dir, Some("myrepo")).unwrap();
579
580 let loaded_manifest = Manifest::load(&prism_dir.join("manifest.json")).unwrap();
582
583 let partition_for_main = loaded_manifest
585 .get_partition_for_file("src/core/main.py")
586 .unwrap();
587 assert!(partition_for_main.contains("src/core"));
588
589 let partition_for_helper = loaded_manifest
590 .get_partition_for_file("src/utils/helper.py")
591 .unwrap();
592 assert!(partition_for_helper.contains("src/utils"));
593
594 let cross_ref_store = CrossRefStore::open(&prism_dir.join("cross_refs.db")).unwrap();
596 let cross_refs = cross_ref_store.load_all().unwrap();
597
598 assert_eq!(cross_refs.len(), 1);
600
601 let refs_to_helper = cross_refs
603 .get_by_target("src/utils/helper.py:helper")
604 .unwrap();
605 assert_eq!(refs_to_helper.len(), 1);
606 assert_eq!(refs_to_helper[0].source_id, "src/core/main.py:main");
607 }
608
609 #[test]
610 fn test_group_nodes_by_partition() {
611 let graph = create_test_graph();
612 let partitions = GraphPartitioner::group_nodes_by_partition(&graph, "myrepo");
613
614 assert_eq!(partitions.len(), 2);
616
617 assert!(partitions.contains_key("myrepo_src/core"));
619 assert!(partitions.contains_key("myrepo_src/utils"));
620
621 let core_nodes = partitions.get("myrepo_src/core").unwrap();
623 assert_eq!(core_nodes.len(), 3); let utils_nodes = partitions.get("myrepo_src/utils").unwrap();
626 assert_eq!(utils_nodes.len(), 2); }
628
629 #[test]
630 fn test_classify_edges() {
631 let graph = create_test_graph();
632
633 let node_to_partition: HashMap<String, String> = graph
634 .iter_nodes()
635 .map(|node| {
636 let partition_id =
637 LazyGraphManager::compute_partition_id_for_root("myrepo", &node.file);
638 (node.id.clone(), partition_id)
639 })
640 .collect();
641
642 let (intra_edges, cross_refs) =
643 GraphPartitioner::classify_edges(&graph, &node_to_partition);
644
645 assert!(intra_edges.contains_key("myrepo_src/core"));
647 assert!(intra_edges.contains_key("myrepo_src/utils"));
648
649 assert_eq!(intra_edges.get("myrepo_src/core").unwrap().len(), 3);
651
652 assert_eq!(intra_edges.get("myrepo_src/utils").unwrap().len(), 1);
654
655 assert_eq!(cross_refs.len(), 1);
657 }
658
659 #[test]
660 fn test_partition_with_root_info() {
661 let temp_dir = TempDir::new().unwrap();
662 let prism_dir = temp_dir.path().join(".codeprysm");
663
664 let graph = create_test_graph();
665
666 let root_info = RootInfo {
667 name: "test-repo".to_string(),
668 root_type: "git".to_string(),
669 relative_path: ".".to_string(),
670 remote_url: Some("https://github.com/org/repo".to_string()),
671 branch: Some("main".to_string()),
672 commit: Some("abc123".to_string()),
673 };
674
675 let (manifest, stats) =
676 GraphPartitioner::partition_with_root_info(&graph, &prism_dir, root_info).unwrap();
677
678 let root = manifest.get_root("test-repo").unwrap();
680 assert_eq!(root.root_type, "git");
681 assert_eq!(
682 root.remote_url,
683 Some("https://github.com/org/repo".to_string())
684 );
685 assert_eq!(root.branch, Some("main".to_string()));
686
687 assert_eq!(stats.partition_count, 2);
689 }
690
691 #[test]
692 fn test_partition_empty_graph() {
693 let temp_dir = TempDir::new().unwrap();
694 let prism_dir = temp_dir.path().join(".codeprysm");
695
696 let graph = PetCodeGraph::new();
697 let (manifest, stats) =
698 GraphPartitioner::partition_with_stats(&graph, &prism_dir, None).unwrap();
699
700 assert_eq!(stats.total_nodes, 0);
701 assert_eq!(stats.total_edges, 0);
702 assert_eq!(stats.partition_count, 0);
703 assert!(manifest.files.is_empty());
704 }
705
706 #[test]
707 fn test_partition_single_file() {
708 let temp_dir = TempDir::new().unwrap();
709 let prism_dir = temp_dir.path().join(".codeprysm");
710
711 let mut graph = PetCodeGraph::new();
712 graph.add_node(create_test_node("main.py:func", "func", "main.py"));
713
714 let (manifest, stats) =
715 GraphPartitioner::partition_with_stats(&graph, &prism_dir, Some("myrepo")).unwrap();
716
717 assert_eq!(stats.total_nodes, 1);
718 assert_eq!(stats.partition_count, 1);
719
720 let partition = manifest.get_partition_for_file("main.py").unwrap();
722 assert_eq!(partition, "myrepo_root");
723 }
724
725 #[test]
726 fn test_update_partition() {
727 let temp_dir = TempDir::new().unwrap();
728 let prism_dir = temp_dir.path().join(".codeprysm");
729
730 let mut graph = PetCodeGraph::new();
732 graph.add_node(create_test_node(
733 "src/core/main.py:func1",
734 "func1",
735 "src/core/main.py",
736 ));
737
738 GraphPartitioner::partition(&graph, &prism_dir, Some("myrepo")).unwrap();
739
740 let mut updated_graph = PetCodeGraph::new();
742 updated_graph.add_node(create_test_node(
743 "src/core/main.py:func1",
744 "func1",
745 "src/core/main.py",
746 ));
747 updated_graph.add_node(create_test_node(
748 "src/core/main.py:func2",
749 "func2",
750 "src/core/main.py",
751 ));
752
753 GraphPartitioner::update_partition(&updated_graph, &prism_dir, "myrepo_src/core", "myrepo")
754 .unwrap();
755
756 let db_path = prism_dir.join("partitions/myrepo_src_core.db");
758 let conn = PartitionConnection::open(&db_path, "myrepo_src/core").unwrap();
759 let stats = conn.stats().unwrap();
760
761 assert_eq!(stats.node_count, 2);
762 }
763
764 #[test]
765 fn test_get_unique_files() {
766 let graph = create_test_graph();
767 let files = GraphPartitioner::get_unique_files(&graph);
768
769 assert_eq!(files.len(), 2);
770 assert!(files.contains("src/core/main.py"));
771 assert!(files.contains("src/utils/helper.py"));
772 }
773}