1use std::collections::HashMap;
17use std::path::{Path, PathBuf};
18
19use ignore::WalkBuilder;
20use thiserror::Error;
21use tracing::{debug, info, warn};
22
23use crate::discovery::{DiscoveredRoot, RootDiscovery};
24use crate::graph::{
25 CallableKind, ContainerKind, DataKind, Edge, EdgeType, Node, NodeMetadata, NodeType,
26 PetCodeGraph,
27};
28use crate::manifest::{DependencyType, LocalDependency, ManifestInfo, ManifestParser};
29use crate::merkle::compute_file_hash;
30use crate::parser::{
31 ContainmentContext, ManifestLanguage, MetadataExtractor, SupportedLanguage, TagExtractor,
32 generate_node_id,
33};
34use crate::tags::{TagParseResult, parse_tag_string};
35
36#[derive(Debug, Error)]
42pub enum BuilderError {
43 #[error("IO error: {0}")]
45 Io(#[from] std::io::Error),
46
47 #[error("Parser error: {0}")]
49 Parser(#[from] crate::parser::ParserError),
50
51 #[error("Query directory not found: {0}")]
53 QueryDirNotFound(PathBuf),
54
55 #[error("No supported files found in directory: {0}")]
57 NoFilesFound(PathBuf),
58}
59
60#[derive(Debug, Clone)]
66pub struct BuilderConfig {
67 pub skip_data_nodes: bool,
69 pub max_containment_depth: Option<usize>,
71 pub max_files: Option<usize>,
73 pub exclude_patterns: Vec<String>,
75}
76
77impl Default for BuilderConfig {
78 fn default() -> Self {
79 Self {
80 skip_data_nodes: false,
81 max_containment_depth: None,
82 max_files: None,
83 exclude_patterns: vec![
84 "**/.git/**".to_string(),
85 "**/node_modules/**".to_string(),
86 "**/target/**".to_string(),
87 "**/__pycache__/**".to_string(),
88 "**/.venv/**".to_string(),
89 "**/venv/**".to_string(),
90 "**/.tox/**".to_string(),
91 "**/dist/**".to_string(),
92 "**/build/**".to_string(),
93 ],
94 }
95 }
96}
97
98#[derive(Debug, Clone)]
104struct ReferenceInfo {
105 source_id: String,
107 line: usize,
109}
110
111pub struct GraphBuilder {
134 queries_dir: Option<PathBuf>,
136 config: BuilderConfig,
138}
139
140impl GraphBuilder {
141 pub fn new_with_embedded_queries() -> Self {
146 Self {
147 queries_dir: None,
148 config: BuilderConfig::default(),
149 }
150 }
151
152 pub fn with_embedded_queries(config: BuilderConfig) -> Self {
157 Self {
158 queries_dir: None,
159 config,
160 }
161 }
162
163 pub fn new(queries_dir: &Path) -> Result<Self, BuilderError> {
173 Self::with_config(queries_dir, BuilderConfig::default())
174 }
175
176 pub fn with_config(queries_dir: &Path, config: BuilderConfig) -> Result<Self, BuilderError> {
187 if !queries_dir.exists() {
188 return Err(BuilderError::QueryDirNotFound(queries_dir.to_path_buf()));
189 }
190
191 Ok(Self {
192 queries_dir: Some(queries_dir.to_path_buf()),
193 config,
194 })
195 }
196
197 pub fn build_from_directory(&mut self, directory: &Path) -> Result<PetCodeGraph, BuilderError> {
211 let mut graph = PetCodeGraph::new();
212
213 let repo_name = get_repo_name(directory);
215 let (git_remote, git_branch, git_commit) = extract_git_metadata(directory);
216 let repo_metadata = NodeMetadata::default().with_git(git_remote, git_branch, git_commit);
217 let repo_node = Node::repository(repo_name.clone(), repo_metadata);
218 graph.add_node(repo_node);
219
220 info!("Created repository node: {}", repo_name);
221
222 let mut defines: HashMap<String, String> = HashMap::new();
224 let mut references: HashMap<String, Vec<ReferenceInfo>> = HashMap::new();
225
226 let mut file_count = 0;
228 let mut skipped_data_nodes = 0;
229 let mut skipped_depth_nodes = 0;
230
231 info!("Processing files in {}", directory.display());
232
233 let files: Vec<PathBuf> = self.collect_files(directory)?;
235
236 if files.is_empty() {
237 return Err(BuilderError::NoFilesFound(directory.to_path_buf()));
238 }
239
240 info!("Found {} files to process", files.len());
241
242 for file_path in files {
244 if let Some(max) = self.config.max_files {
246 if file_count >= max {
247 info!("Reached maximum file limit of {}", max);
248 break;
249 }
250 }
251
252 let rel_path = file_path
254 .strip_prefix(directory)
255 .unwrap_or(&file_path)
256 .to_string_lossy()
257 .to_string();
258
259 match self.process_file(
261 &file_path,
262 &rel_path,
263 &repo_name,
264 &mut graph,
265 &mut defines,
266 &mut references,
267 &mut skipped_data_nodes,
268 &mut skipped_depth_nodes,
269 ) {
270 Ok(_) => {
271 file_count += 1;
272 if file_count % 100 == 0 {
273 debug!("Processed {} files", file_count);
274 }
275 }
276 Err(e) => {
277 warn!("Error processing {}: {}", rel_path, e);
278 }
279 }
280 }
281
282 info!("Processed {} files", file_count);
283
284 self.resolve_references(&mut graph, &defines, &references);
286
287 let contains_count = graph.edges_by_type(EdgeType::Contains).count();
289 let uses_count = graph.edges_by_type(EdgeType::Uses).count();
290 let defines_count = graph.edges_by_type(EdgeType::Defines).count();
291
292 info!("Graph summary:");
293 info!(" - Nodes: {}", graph.node_count());
294 info!(" - CONTAINS edges: {}", contains_count);
295 info!(" - USES edges: {}", uses_count);
296 info!(" - DEFINES edges: {}", defines_count);
297 info!(" - Total edges: {}", graph.edge_count());
298
299 if skipped_data_nodes > 0 || skipped_depth_nodes > 0 {
300 info!("Performance filtering:");
301 if skipped_data_nodes > 0 {
302 info!(" - Skipped Data nodes: {}", skipped_data_nodes);
303 }
304 if skipped_depth_nodes > 0 {
305 info!(" - Skipped nodes (max depth): {}", skipped_depth_nodes);
306 }
307 }
308
309 Ok(graph)
310 }
311
312 pub fn build_from_workspace(
344 &mut self,
345 workspace_path: &Path,
346 ) -> Result<(PetCodeGraph, Vec<DiscoveredRoot>), BuilderError> {
347 let workspace_path = workspace_path.canonicalize().map_err(BuilderError::Io)?;
348
349 info!("Building workspace graph from {:?}", workspace_path);
350
351 let discovery = RootDiscovery::with_defaults();
353 let roots = discovery
354 .discover(&workspace_path)
355 .map_err(|e| BuilderError::Io(std::io::Error::other(e.to_string())))?;
356
357 info!("Discovered {} code root(s)", roots.len());
358
359 if roots.len() == 1 && roots[0].relative_path == "." {
362 info!("Single root at workspace path, using standard build");
363 let graph = self.build_from_directory(&workspace_path)?;
364 return Ok((graph, roots));
365 }
366
367 let workspace_name = workspace_path
369 .file_name()
370 .map(|n| n.to_string_lossy().to_string())
371 .unwrap_or_else(|| "workspace".to_string());
372
373 info!(
374 "Creating workspace '{}' with {} roots",
375 workspace_name,
376 roots.len()
377 );
378
379 let mut workspace_graph = PetCodeGraph::new();
381
382 let workspace_node = Node::workspace(workspace_name.clone());
384 let workspace_id = workspace_node.id.clone();
385 workspace_graph.add_node(workspace_node);
386
387 for root in &roots {
389 info!("Processing root: {} ({:?})", root.name, root.root_type);
390
391 let root_graph = match self.build_from_directory(&root.path) {
393 Ok(g) => g,
394 Err(e) => {
395 warn!("Failed to build graph for {}: {}", root.name, e);
396 continue;
397 }
398 };
399
400 let root_node_id = self.find_root_node_id(&root_graph, root);
402
403 self.merge_root_graph(
405 &mut workspace_graph,
406 root_graph,
407 &workspace_id,
408 &root_node_id,
409 );
410
411 info!("Merged root '{}' into workspace graph", root.name);
412 }
413
414 info!(
415 "Workspace graph complete: {} nodes, {} edges across {} roots",
416 workspace_graph.node_count(),
417 workspace_graph.edge_count(),
418 roots.len()
419 );
420
421 Ok((workspace_graph, roots))
422 }
423
424 fn find_root_node_id(&self, graph: &PetCodeGraph, root: &DiscoveredRoot) -> String {
426 graph
428 .iter_nodes()
429 .find(|n| n.is_repository())
430 .map(|n| n.id.clone())
431 .unwrap_or_else(|| root.name.clone())
432 }
433
434 fn merge_root_graph(
436 &self,
437 workspace_graph: &mut PetCodeGraph,
438 root_graph: PetCodeGraph,
439 workspace_id: &str,
440 root_node_id: &str,
441 ) {
442 for node in root_graph.iter_nodes() {
444 workspace_graph.add_node(node.clone());
445 }
446
447 for edge in root_graph.iter_edges() {
449 workspace_graph.add_edge_from_struct(&edge);
450 }
451
452 workspace_graph.add_edge_from_struct(&Edge::contains(
454 workspace_id.to_string(),
455 root_node_id.to_string(),
456 ));
457 }
458
459 fn collect_files(&self, directory: &Path) -> Result<Vec<PathBuf>, BuilderError> {
466 let mut files = Vec::new();
467 let glob_set = self.build_exclude_glob_set();
468
469 let walker = WalkBuilder::new(directory)
471 .follow_links(false)
472 .hidden(true) .git_ignore(true) .git_global(true) .git_exclude(true) .add_custom_ignore_filename(".codeprysmignore") .build();
478
479 for entry in walker {
480 let entry = match entry {
481 Ok(e) => e,
482 Err(e) => {
483 debug!("Error walking directory: {}", e);
484 continue;
485 }
486 };
487
488 let file_type = match entry.file_type() {
490 Some(ft) => ft,
491 None => continue,
492 };
493 if !file_type.is_file() {
494 continue;
495 }
496
497 let path = entry.path();
498
499 if SupportedLanguage::from_path(path).is_none() {
501 continue;
502 }
503
504 let rel_path = path
506 .strip_prefix(directory)
507 .unwrap_or(path)
508 .to_string_lossy();
509 if glob_set.is_match(rel_path.as_ref()) {
510 continue;
511 }
512
513 files.push(path.to_path_buf());
514 }
515
516 files.sort();
518
519 Ok(files)
520 }
521
522 fn build_exclude_glob_set(&self) -> globset::GlobSet {
524 let mut builder = globset::GlobSetBuilder::new();
525 for pattern in &self.config.exclude_patterns {
526 if let Ok(glob) = globset::Glob::new(pattern) {
527 builder.add(glob);
528 }
529 }
530 builder
531 .build()
532 .unwrap_or_else(|_| globset::GlobSet::empty())
533 }
534
535 #[allow(clippy::too_many_arguments)]
537 fn process_file(
538 &mut self,
539 file_path: &Path,
540 rel_path: &str,
541 repo_name: &str,
542 graph: &mut PetCodeGraph,
543 defines: &mut HashMap<String, String>,
544 references: &mut HashMap<String, Vec<ReferenceInfo>>,
545 skipped_data_nodes: &mut usize,
546 skipped_depth_nodes: &mut usize,
547 ) -> Result<(), BuilderError> {
548 let language = match SupportedLanguage::from_path(file_path) {
550 Some(lang) => lang,
551 None => return Ok(()), };
553
554 let source = std::fs::read_to_string(file_path)?;
556
557 let file_hash = compute_file_hash(file_path)?;
559
560 let line_count = source.lines().count();
562
563 let file_node = Node::source_file(
565 rel_path.to_string(),
566 rel_path.to_string(),
567 file_hash,
568 line_count,
569 );
570 graph.add_node(file_node);
571
572 if !repo_name.is_empty() {
574 graph
575 .add_edge_from_struct(&Edge::contains(repo_name.to_string(), rel_path.to_string()));
576 }
577
578 let mut extractor = match &self.queries_dir {
580 Some(dir) => TagExtractor::from_queries_dir(language, dir)?,
581 None => TagExtractor::from_embedded(language)?,
582 };
583 let metadata_extractor = MetadataExtractor::new(language);
584
585 let tags = extractor.extract(&source)?;
587
588 let mut definition_tags: Vec<_> = tags
593 .iter()
594 .filter(|t| t.tag.starts_with("name.") && t.tag.contains(".definition."))
595 .collect();
596
597 let reference_tags: Vec<_> = tags
598 .iter()
599 .filter(|t| t.tag.starts_with("name.") && t.tag.contains(".reference."))
600 .collect();
601
602 definition_tags.sort_by_key(|t| (t.start_line, t.end_line));
604
605 let mut containment_ctx = ContainmentContext::new();
607
608 for tag in &definition_tags {
610 let tag_string = normalize_tag_string(&tag.tag);
612 let tag_info = match parse_tag_string(&tag_string) {
613 Ok(info) => info,
614 Err(e) => {
615 warn!(
616 "Could not parse tag type '{}' in {}:{}: {}",
617 tag.tag,
618 rel_path,
619 tag.line_number(),
620 e
621 );
622 continue;
623 }
624 };
625
626 if self.config.skip_data_nodes && tag_info.node_type == NodeType::Data {
628 *skipped_data_nodes += 1;
629 continue;
630 }
631
632 containment_ctx.update(tag.containment_start_line());
634
635 if let Some(max_depth) = self.config.max_containment_depth {
637 let current_depth = containment_ctx.depth();
638 if current_depth >= max_depth {
639 *skipped_depth_nodes += 1;
640 continue;
641 }
642 }
643
644 let (containment_path, parent_id) = if let Some(impl_type) = &tag.impl_target {
646 let impl_type_id = format!("{}:{}", rel_path, impl_type);
648 (vec![impl_type.as_str()], impl_type_id)
649 } else {
650 let path = containment_ctx.get_containment_path();
652 let parent = containment_ctx
653 .get_current_parent_id()
654 .map(String::from)
655 .unwrap_or_else(|| rel_path.to_string());
656 (path, parent)
657 };
658
659 if containment_path.last() == Some(&tag.name.as_str()) {
661 continue;
662 }
663
664 let node_id = generate_node_id(
666 rel_path,
667 &containment_path,
668 &tag.name,
669 Some(tag.line_number()),
670 );
671
672 defines.insert(tag.name.clone(), node_id.clone());
674
675 let node = self.create_node_from_tag(
677 &node_id,
678 &tag.name,
679 &tag_info,
680 rel_path,
681 tag.line_number(),
682 tag.end_line_number(),
683 &metadata_extractor,
684 );
685
686 if graph.contains_node(&node_id) {
688 continue;
689 }
690
691 graph.add_node(node);
693
694 graph.add_edge_from_struct(&Edge::contains(parent_id.clone(), node_id.clone()));
696
697 if tag_info.node_type == NodeType::Data && parent_id != rel_path {
699 graph.add_edge_from_struct(&Edge::defines(parent_id.clone(), node_id.clone()));
700 }
701
702 let node_type_str = tag_info.node_type.as_str();
704 if node_type_str == "Container" || node_type_str == "Callable" {
705 containment_ctx.push_container(
706 node_id,
707 node_type_str.to_string(),
708 tag.containment_start_line(),
709 tag.containment_end_line(),
710 tag.name.clone(),
711 );
712 }
713 }
714
715 for tag in &reference_tags {
717 let tag_string = normalize_tag_string(&tag.tag);
718 let _tag_info = match parse_tag_string(&tag_string) {
719 Ok(info) => info,
720 Err(_) => continue,
721 };
722
723 let source_id = self.find_enclosing_context(&definition_tags, tag.start_line, rel_path);
726
727 references
729 .entry(tag.name.clone())
730 .or_default()
731 .push(ReferenceInfo {
732 source_id,
733 line: tag.line_number(),
734 });
735 }
736
737 Ok(())
738 }
739
740 #[allow(clippy::too_many_arguments)]
742 fn create_node_from_tag(
743 &self,
744 node_id: &str,
745 name: &str,
746 tag_info: &TagParseResult,
747 file: &str,
748 line: usize,
749 end_line: usize,
750 metadata_extractor: &MetadataExtractor,
751 ) -> Node {
752 let metadata = metadata_extractor.extract_from_name(name);
754
755 match tag_info.node_type {
756 NodeType::Container => {
757 let kind = match &tag_info.kind {
758 Some(crate::graph::NodeKind::Container(k)) => *k,
759 _ => ContainerKind::Type,
760 };
761 if kind == ContainerKind::File {
763 Node::source_file(
764 node_id.to_string(),
765 file.to_string(),
766 String::new(),
767 end_line,
768 )
769 } else {
770 Node::container(
771 node_id.to_string(),
772 name.to_string(),
773 kind,
774 tag_info.subtype.clone(),
775 file.to_string(),
776 line,
777 end_line,
778 )
779 .with_metadata(metadata)
780 }
781 }
782 NodeType::Callable => {
783 let kind = match &tag_info.kind {
784 Some(crate::graph::NodeKind::Callable(k)) => *k,
785 _ => CallableKind::Function,
786 };
787 let node = Node::callable(
788 node_id.to_string(),
789 name.to_string(),
790 kind,
791 file.to_string(),
792 line,
793 end_line,
794 );
795
796 let mut meta = metadata;
798 if let Some(scope) = &tag_info.scope {
799 meta.scope = Some(scope.clone());
800 }
801 node.with_metadata(meta)
802 }
803 NodeType::Data => {
804 let kind = match &tag_info.kind {
805 Some(crate::graph::NodeKind::Data(k)) => *k,
806 _ => DataKind::Value,
807 };
808 Node::data(
809 node_id.to_string(),
810 name.to_string(),
811 kind,
812 tag_info.subtype.clone(),
813 file.to_string(),
814 line,
815 end_line,
816 )
817 .with_metadata(metadata)
818 }
819 }
820 }
821
822 fn find_enclosing_context(
824 &self,
825 definition_tags: &[&crate::parser::ExtractedTag],
826 line: usize,
827 file: &str,
828 ) -> String {
829 let mut enclosing: Option<&crate::parser::ExtractedTag> = None;
831
832 for tag in definition_tags {
833 let tag_start = tag.containment_start_line();
835 let tag_end = tag.containment_end_line();
836 if tag_start <= line && tag_end >= line {
837 let tag_string = normalize_tag_string(&tag.tag);
839 if let Ok(info) = parse_tag_string(&tag_string) {
840 if info.node_type == NodeType::Container || info.node_type == NodeType::Callable
841 {
842 if let Some(current) = enclosing {
844 let current_start = current.containment_start_line();
845 let current_end = current.containment_end_line();
846 if tag_start >= current_start && tag_end <= current_end {
847 enclosing = Some(tag);
848 }
849 } else {
850 enclosing = Some(tag);
851 }
852 }
853 }
854 }
855 }
856
857 if let Some(enc) = enclosing {
859 let mut path = Vec::new();
860 let enc_start = enc.containment_start_line();
861 let enc_end = enc.containment_end_line();
862
863 for tag in definition_tags {
865 let tag_start = tag.containment_start_line();
866 let tag_end = tag.containment_end_line();
867 if tag_start < enc_start && tag_end >= enc_end {
868 let tag_string = normalize_tag_string(&tag.tag);
869 if let Ok(info) = parse_tag_string(&tag_string) {
870 if info.node_type == NodeType::Container
871 || info.node_type == NodeType::Callable
872 {
873 path.push(tag.name.as_str());
874 }
875 }
876 }
877 }
878
879 path.push(enc.name.as_str());
880 generate_node_id(file, &path[..path.len() - 1], path.last().unwrap(), None)
881 } else {
882 file.to_string()
884 }
885 }
886
887 fn resolve_references(
889 &self,
890 graph: &mut PetCodeGraph,
891 defines: &HashMap<String, String>,
892 references: &HashMap<String, Vec<ReferenceInfo>>,
893 ) {
894 info!("Creating USES relationships...");
895 let mut uses_count = 0;
896 let mut forward_refs = 0;
897 let mut skipped_missing_source = 0;
898
899 for (name, refs) in references {
900 if let Some(target_id) = defines.get(name) {
901 for ref_info in refs {
903 if ref_info.source_id != *target_id {
905 if graph.contains_node(&ref_info.source_id) {
907 graph.add_edge_from_struct(&Edge::uses(
908 ref_info.source_id.clone(),
909 target_id.clone(),
910 Some(ref_info.line),
911 Some(name.clone()),
912 ));
913 uses_count += 1;
914 } else {
915 skipped_missing_source += 1;
918 debug!(
919 "Skipped USES edge: source '{}' not found (ref to '{}')",
920 ref_info.source_id, name
921 );
922 }
923 }
924 }
925 } else {
926 forward_refs += refs.len();
928 debug!(
929 "Forward/external reference to '{}' ({} occurrences)",
930 name,
931 refs.len()
932 );
933 }
934 }
935
936 info!("Created {} USES relationships", uses_count);
937 if forward_refs > 0 {
938 info!("Skipped {} forward/external references", forward_refs);
939 }
940 if skipped_missing_source > 0 {
941 info!(
942 "Skipped {} references with missing source nodes",
943 skipped_missing_source
944 );
945 }
946 }
947
948 pub fn parse_file(
970 &mut self,
971 file_path: &Path,
972 rel_path: &str,
973 ) -> Result<PetCodeGraph, BuilderError> {
974 let mut graph = PetCodeGraph::new();
975 let mut defines = HashMap::new();
976 let mut references = HashMap::new();
977 let mut skipped_data = 0;
978 let mut skipped_depth = 0;
979
980 self.process_file(
981 file_path,
982 rel_path,
983 "", &mut graph,
985 &mut defines,
986 &mut references,
987 &mut skipped_data,
988 &mut skipped_depth,
989 )?;
990
991 debug!(
992 "Parsed {}: {} nodes, {} edges",
993 rel_path,
994 graph.node_count(),
995 graph.edge_count()
996 );
997
998 Ok(graph)
999 }
1000}
1001
1002#[derive(Debug, Clone)]
1008pub struct DiscoveredComponent {
1009 pub node_id: String,
1011 pub name: String,
1013 pub manifest_path: String,
1015 pub directory: String,
1017 pub info: ManifestInfo,
1019}
1020
1021pub struct ComponentBuilder {
1041 parser: ManifestParser,
1043 path_index: HashMap<PathBuf, String>,
1045}
1046
1047impl ComponentBuilder {
1048 pub fn new() -> Result<Self, BuilderError> {
1050 let parser = ManifestParser::new()
1051 .map_err(|e| BuilderError::Io(std::io::Error::other(e.to_string())))?;
1052
1053 Ok(Self {
1054 parser,
1055 path_index: HashMap::new(),
1056 })
1057 }
1058
1059 pub fn discover_components(
1073 &mut self,
1074 root: &Path,
1075 exclude_patterns: &[String],
1076 ) -> Result<Vec<DiscoveredComponent>, BuilderError> {
1077 let root = root.canonicalize().map_err(BuilderError::Io)?;
1078 let repo_name = get_repo_name(&root);
1079
1080 let mut components = Vec::new();
1081 let glob_set = build_exclude_glob_set(exclude_patterns);
1082
1083 info!("Discovering components in {}", root.display());
1084
1085 let walker = WalkBuilder::new(&root)
1087 .follow_links(false)
1088 .hidden(true) .git_ignore(true) .git_global(true) .git_exclude(true) .add_custom_ignore_filename(".codeprysmignore") .build();
1094
1095 for entry in walker {
1096 let entry = match entry {
1097 Ok(e) => e,
1098 Err(e) => {
1099 debug!("Error walking directory: {}", e);
1100 continue;
1101 }
1102 };
1103
1104 let file_type = match entry.file_type() {
1106 Some(ft) => ft,
1107 None => continue,
1108 };
1109 if !file_type.is_file() {
1110 continue;
1111 }
1112
1113 let path = entry.path();
1114
1115 if ManifestLanguage::from_path(path).is_none() {
1117 continue;
1118 }
1119
1120 let rel_path = path.strip_prefix(&root).unwrap_or(path);
1122 let rel_path_str = rel_path.to_string_lossy();
1123 if glob_set.is_match(rel_path_str.as_ref()) {
1124 debug!("Skipping excluded manifest: {}", rel_path_str);
1125 continue;
1126 }
1127
1128 match self.parse_manifest_file(path, &root, &repo_name) {
1130 Ok(Some(component)) => {
1131 debug!(
1132 "Discovered component: {} at {}",
1133 component.name, component.manifest_path
1134 );
1135 components.push(component);
1136 }
1137 Ok(None) => {
1138 debug!("No component info in {}", rel_path_str);
1140 }
1141 Err(e) => {
1142 warn!("Failed to parse manifest {}: {}", rel_path_str, e);
1143 }
1144 }
1145 }
1146
1147 info!("Discovered {} components", components.len());
1148 Ok(components)
1149 }
1150
1151 fn parse_manifest_file(
1153 &mut self,
1154 path: &Path,
1155 root: &Path,
1156 repo_name: &str,
1157 ) -> Result<Option<DiscoveredComponent>, BuilderError> {
1158 let content = std::fs::read_to_string(path)?;
1159 let rel_path = path
1160 .strip_prefix(root)
1161 .unwrap_or(path)
1162 .to_string_lossy()
1163 .to_string();
1164
1165 let info = self
1166 .parser
1167 .parse(path, &content)
1168 .map_err(|e| BuilderError::Io(std::io::Error::other(e.to_string())))?;
1169
1170 let manifest_dir = path
1172 .parent()
1173 .and_then(|p| p.strip_prefix(root).ok())
1174 .map(|p| p.to_string_lossy().to_string())
1175 .unwrap_or_default();
1176
1177 let name = info.component_name.clone().unwrap_or_else(|| {
1179 if manifest_dir.is_empty() {
1181 repo_name.to_string()
1182 } else {
1183 manifest_dir
1184 .rsplit('/')
1185 .find(|s| !s.is_empty())
1186 .unwrap_or(&manifest_dir)
1187 .to_string()
1188 }
1189 });
1190
1191 if info.is_empty() && info.component_name.is_none() {
1193 return Ok(None);
1194 }
1195
1196 let node_id = if manifest_dir.is_empty() {
1198 format!("component:{}", repo_name)
1199 } else {
1200 format!(
1201 "component:{}:{}",
1202 repo_name,
1203 manifest_dir.replace('\\', "/")
1204 )
1205 };
1206
1207 Ok(Some(DiscoveredComponent {
1208 node_id,
1209 name,
1210 manifest_path: rel_path,
1211 directory: manifest_dir,
1212 info,
1213 }))
1214 }
1215
1216 pub fn add_to_graph(
1231 &mut self,
1232 graph: &mut PetCodeGraph,
1233 repo_name: &str,
1234 components: &[DiscoveredComponent],
1235 ) -> Result<usize, BuilderError> {
1236 self.build_path_index(components);
1238
1239 let mut added = 0;
1241 for component in components {
1242 self.add_component_node(graph, component);
1243 added += 1;
1244 }
1245
1246 self.build_containment_hierarchy(graph, repo_name, components);
1248
1249 self.create_dependency_edges(graph, components);
1251
1252 info!(
1253 "Added {} components with {} dependency edges",
1254 added,
1255 graph.edges_by_type(EdgeType::DependsOn).count()
1256 );
1257
1258 Ok(added)
1259 }
1260
1261 fn build_path_index(&mut self, components: &[DiscoveredComponent]) {
1263 self.path_index.clear();
1264
1265 for component in components {
1266 let dir_path = PathBuf::from(&component.directory);
1268 self.path_index
1269 .insert(dir_path.clone(), component.node_id.clone());
1270
1271 let normalized = component.directory.replace('\\', "/");
1273 if normalized != component.directory {
1274 self.path_index
1275 .insert(PathBuf::from(normalized), component.node_id.clone());
1276 }
1277 }
1278
1279 debug!("Built path index with {} entries", self.path_index.len());
1280 }
1281
1282 fn add_component_node(&self, graph: &mut PetCodeGraph, component: &DiscoveredComponent) {
1284 let metadata = NodeMetadata::default().with_component(
1285 Some(component.info.is_workspace_root),
1286 Some(component.info.is_publishable()),
1287 Some(component.manifest_path.clone()),
1288 );
1289
1290 let node = Node::component(
1291 component.node_id.clone(),
1292 component.name.clone(),
1293 component.manifest_path.clone(),
1294 metadata,
1295 );
1296
1297 graph.add_node(node);
1298 }
1299
1300 fn build_containment_hierarchy(
1305 &self,
1306 graph: &mut PetCodeGraph,
1307 repo_name: &str,
1308 components: &[DiscoveredComponent],
1309 ) {
1310 let workspace_roots: Vec<_> = components
1312 .iter()
1313 .filter(|c| c.info.is_workspace_root)
1314 .collect();
1315
1316 for component in components {
1317 let parent_id = self.find_parent_component(component, &workspace_roots, repo_name);
1319
1320 if graph.contains_node(&parent_id) && graph.contains_node(&component.node_id) {
1322 graph.add_edge_from_struct(&Edge::contains(parent_id, component.node_id.clone()));
1323 }
1324 }
1325 }
1326
1327 fn find_parent_component(
1332 &self,
1333 component: &DiscoveredComponent,
1334 workspace_roots: &[&DiscoveredComponent],
1335 repo_name: &str,
1336 ) -> String {
1337 for root in workspace_roots {
1339 if root.node_id == component.node_id {
1341 continue;
1342 }
1343
1344 for pattern in &root.info.workspace_members {
1346 if self.matches_workspace_pattern(&component.directory, pattern, &root.directory) {
1347 return root.node_id.clone();
1348 }
1349 }
1350 }
1351
1352 repo_name.to_string()
1354 }
1355
1356 fn matches_workspace_pattern(
1358 &self,
1359 component_dir: &str,
1360 pattern: &str,
1361 workspace_dir: &str,
1362 ) -> bool {
1363 let component_dir = component_dir.replace('\\', "/");
1365 let pattern = pattern.replace('\\', "/");
1366 let workspace_dir = workspace_dir.replace('\\', "/");
1367
1368 let full_pattern = if workspace_dir.is_empty() {
1370 pattern.clone()
1371 } else {
1372 format!("{}/{}", workspace_dir, pattern)
1373 };
1374
1375 if full_pattern.ends_with("/*") {
1377 let prefix = full_pattern.trim_end_matches("/*");
1378 component_dir.starts_with(prefix) && component_dir != prefix
1379 } else if full_pattern.contains('*') {
1380 let prefix = full_pattern.split('*').next().unwrap_or("");
1382 !prefix.is_empty() && component_dir.starts_with(prefix)
1383 } else {
1384 component_dir == full_pattern
1386 }
1387 }
1388
1389 fn create_dependency_edges(
1391 &self,
1392 graph: &mut PetCodeGraph,
1393 components: &[DiscoveredComponent],
1394 ) {
1395 for component in components {
1396 for dep in &component.info.local_dependencies {
1397 if let Some(target_id) = self.resolve_dependency(component, dep) {
1398 if graph.contains_node(&component.node_id) && graph.contains_node(&target_id) {
1400 let version_spec = self.format_version_spec(dep);
1401 let edge = Edge::depends_on(
1402 component.node_id.clone(),
1403 target_id,
1404 Some(dep.name.clone()),
1405 version_spec,
1406 Some(dep.is_dev),
1407 );
1408 graph.add_edge_from_struct(&edge);
1409 }
1410 } else {
1411 debug!(
1412 "Could not resolve dependency '{}' from {} (path: {:?})",
1413 dep.name, component.node_id, dep.path
1414 );
1415 }
1416 }
1417 }
1418 }
1419
1420 fn resolve_dependency(
1422 &self,
1423 from: &DiscoveredComponent,
1424 dep: &LocalDependency,
1425 ) -> Option<String> {
1426 if let Some(ref dep_path) = dep.path {
1428 let mut resolved = self.resolve_dependency_path(&from.directory, dep_path);
1429
1430 if dep.dep_type == DependencyType::ProjectReference {
1433 if let Some(parent) = resolved.parent() {
1434 resolved = parent.to_path_buf();
1435 }
1436 }
1437
1438 if let Some(id) = self.path_index.get(&resolved) {
1439 return Some(id.clone());
1440 }
1441
1442 if let Some(id) = self.path_index.get(&PathBuf::from(dep_path)) {
1444 return Some(id.clone());
1445 }
1446 }
1447
1448 if dep.dep_type == DependencyType::Workspace {
1450 for (path, id) in &self.path_index {
1453 let dir_name = path
1454 .file_name()
1455 .map(|n| n.to_string_lossy().to_string())
1456 .unwrap_or_default();
1457 if dir_name == dep.name || dep.name.ends_with(&format!("/{}", dir_name)) {
1458 return Some(id.clone());
1459 }
1460 }
1461 }
1462
1463 None
1464 }
1465
1466 fn resolve_dependency_path(&self, from_dir: &str, dep_path: &str) -> PathBuf {
1468 let from_dir = from_dir.replace('\\', "/");
1469 let dep_path = dep_path.replace('\\', "/");
1470
1471 if let Some(stripped) = dep_path.strip_prefix('/') {
1473 return PathBuf::from(stripped);
1474 }
1475
1476 let from_parts: Vec<&str> = from_dir.split('/').filter(|s| !s.is_empty()).collect();
1478 let dep_parts: Vec<&str> = dep_path.split('/').filter(|s| !s.is_empty()).collect();
1479
1480 let mut result: Vec<&str> = from_parts.clone();
1481
1482 for part in dep_parts {
1483 match part {
1484 ".." => {
1485 result.pop();
1486 }
1487 "." => {}
1488 _ => {
1489 result.push(part);
1490 }
1491 }
1492 }
1493
1494 PathBuf::from(result.join("/"))
1495 }
1496
1497 fn format_version_spec(&self, dep: &LocalDependency) -> Option<String> {
1499 match dep.dep_type {
1500 DependencyType::Path => dep.path.as_ref().map(|p| format!("path:{}", p)),
1501 DependencyType::Workspace => Some("workspace:*".to_string()),
1502 DependencyType::ProjectReference => dep.path.as_ref().map(|p| format!("project:{}", p)),
1503 DependencyType::Replace => dep.path.as_ref().map(|p| format!("replace:{}", p)),
1504 DependencyType::Subdirectory => dep.path.as_ref().map(|p| format!("subdir:{}", p)),
1505 }
1506 }
1507
1508 pub fn path_index(&self) -> &HashMap<PathBuf, String> {
1510 &self.path_index
1511 }
1512}
1513
1514fn build_exclude_glob_set(patterns: &[String]) -> globset::GlobSet {
1516 let mut builder = globset::GlobSetBuilder::new();
1517 for pattern in patterns {
1518 if let Ok(glob) = globset::Glob::new(pattern) {
1519 builder.add(glob);
1520 }
1521 }
1522 for pattern in &[
1524 "**/.git/**",
1525 "**/node_modules/**",
1526 "**/target/**",
1527 "**/__pycache__/**",
1528 "**/.venv/**",
1529 "**/venv/**",
1530 "**/.tox/**",
1531 "**/dist/**",
1532 "**/build/**",
1533 ] {
1534 if let Ok(glob) = globset::Glob::new(pattern) {
1535 builder.add(glob);
1536 }
1537 }
1538 builder
1539 .build()
1540 .unwrap_or_else(|_| globset::GlobSet::empty())
1541}
1542
1543fn normalize_tag_string(tag: &str) -> String {
1553 let tag = tag.strip_prefix('@').unwrap_or(tag);
1555
1556 if let Some(stripped) = tag.strip_prefix("name.") {
1558 return stripped.to_string();
1559 }
1560
1561 tag.to_string()
1562}
1563
1564fn extract_git_metadata(repo_path: &Path) -> (Option<String>, Option<String>, Option<String>) {
1568 let git_dir = repo_path.join(".git");
1569 if !git_dir.exists() {
1570 return (None, None, None);
1571 }
1572
1573 let remote = std::process::Command::new("git")
1575 .args(["remote", "get-url", "origin"])
1576 .current_dir(repo_path)
1577 .output()
1578 .ok()
1579 .filter(|o| o.status.success())
1580 .map(|o| String::from_utf8_lossy(&o.stdout).trim().to_string())
1581 .filter(|s| !s.is_empty());
1582
1583 let branch = std::process::Command::new("git")
1585 .args(["rev-parse", "--abbrev-ref", "HEAD"])
1586 .current_dir(repo_path)
1587 .output()
1588 .ok()
1589 .filter(|o| o.status.success())
1590 .map(|o| String::from_utf8_lossy(&o.stdout).trim().to_string())
1591 .filter(|s| !s.is_empty());
1592
1593 let commit = std::process::Command::new("git")
1595 .args(["rev-parse", "HEAD"])
1596 .current_dir(repo_path)
1597 .output()
1598 .ok()
1599 .filter(|o| o.status.success())
1600 .map(|o| String::from_utf8_lossy(&o.stdout).trim().to_string())
1601 .filter(|s| !s.is_empty());
1602
1603 (remote, branch, commit)
1604}
1605
1606fn get_repo_name(directory: &Path) -> String {
1608 directory
1609 .file_name()
1610 .map(|n| n.to_string_lossy().to_string())
1611 .unwrap_or_else(|| "repository".to_string())
1612}
1613
1614#[cfg(test)]
1619mod tests {
1620 use super::*;
1621
1622 #[test]
1623 fn test_normalize_tag_string() {
1624 assert_eq!(
1625 normalize_tag_string("@definition.callable.function"),
1626 "definition.callable.function"
1627 );
1628 assert_eq!(
1629 normalize_tag_string("definition.callable.function"),
1630 "definition.callable.function"
1631 );
1632 assert_eq!(
1633 normalize_tag_string("name.definition.callable.function"),
1634 "definition.callable.function"
1635 );
1636 }
1637
1638 #[test]
1639 fn test_builder_config_default() {
1640 let config = BuilderConfig::default();
1641 assert!(!config.skip_data_nodes);
1642 assert!(config.max_containment_depth.is_none());
1643 assert!(config.max_files.is_none());
1644 assert!(!config.exclude_patterns.is_empty());
1645 }
1646
1647 #[test]
1648 fn test_builder_new_missing_queries_dir() {
1649 let result = GraphBuilder::new(Path::new("/nonexistent/queries"));
1650 assert!(matches!(result, Err(BuilderError::QueryDirNotFound(_))));
1651 }
1652
1653 #[test]
1658 fn test_component_builder_new() {
1659 let builder = ComponentBuilder::new();
1660 assert!(builder.is_ok());
1661 }
1662
1663 #[test]
1664 fn test_resolve_dependency_path_relative() {
1665 let builder = ComponentBuilder::new().unwrap();
1666
1667 let result = builder.resolve_dependency_path("packages/core", "../utils");
1669 assert_eq!(result, PathBuf::from("packages/utils"));
1670
1671 let result = builder.resolve_dependency_path("packages/core", "./lib");
1673 assert_eq!(result, PathBuf::from("packages/core/lib"));
1674
1675 let result = builder.resolve_dependency_path("", "packages/shared");
1677 assert_eq!(result, PathBuf::from("packages/shared"));
1678
1679 let result = builder.resolve_dependency_path("deep/nested/path", "../../sibling");
1681 assert_eq!(result, PathBuf::from("deep/sibling"));
1682 }
1683
1684 #[test]
1685 fn test_resolve_dependency_path_windows() {
1686 let builder = ComponentBuilder::new().unwrap();
1687
1688 let result = builder.resolve_dependency_path("packages\\core", "..\\utils");
1690 assert_eq!(result, PathBuf::from("packages/utils"));
1691 }
1692
1693 #[test]
1694 fn test_matches_workspace_pattern() {
1695 let builder = ComponentBuilder::new().unwrap();
1696
1697 assert!(builder.matches_workspace_pattern("packages/core", "packages/*", ""));
1699 assert!(builder.matches_workspace_pattern("packages/utils", "packages/*", ""));
1700 assert!(!builder.matches_workspace_pattern("packages", "packages/*", "")); assert!(!builder.matches_workspace_pattern("other/core", "packages/*", ""));
1702
1703 assert!(builder.matches_workspace_pattern("apps/web/core", "core", "apps/web"));
1705
1706 assert!(builder.matches_workspace_pattern("crates/codeprysm-core", "crates/*", ""));
1708 assert!(builder.matches_workspace_pattern("crates/codeprysm-search", "crates/*", ""));
1709 }
1710
1711 #[test]
1712 fn test_format_version_spec() {
1713 let builder = ComponentBuilder::new().unwrap();
1714
1715 let path_dep = LocalDependency::with_path(
1716 "my-dep".to_string(),
1717 "../shared".to_string(),
1718 DependencyType::Path,
1719 );
1720 assert_eq!(
1721 builder.format_version_spec(&path_dep),
1722 Some("path:../shared".to_string())
1723 );
1724
1725 let workspace_dep = LocalDependency::new("my-dep".to_string(), DependencyType::Workspace);
1726 assert_eq!(
1727 builder.format_version_spec(&workspace_dep),
1728 Some("workspace:*".to_string())
1729 );
1730
1731 let project_ref_dep = LocalDependency::with_path(
1732 "Shared".to_string(),
1733 "../Shared/Shared.csproj".to_string(),
1734 DependencyType::ProjectReference,
1735 );
1736 assert_eq!(
1737 builder.format_version_spec(&project_ref_dep),
1738 Some("project:../Shared/Shared.csproj".to_string())
1739 );
1740 }
1741
1742 #[test]
1743 fn test_build_exclude_glob_set() {
1744 let default_set = build_exclude_glob_set(&[]);
1745
1746 assert!(default_set.is_match("node_modules/foo"));
1748 assert!(default_set.is_match("target/debug"));
1749 assert!(default_set.is_match(".git/objects"));
1750 assert!(default_set.is_match("__pycache__/module"));
1751
1752 let custom_set = build_exclude_glob_set(&["vendor/**".to_string()]);
1754 assert!(custom_set.is_match("vendor/github.com"));
1755 }
1756
1757 #[test]
1758 fn test_discovered_component_creation() {
1759 let info = ManifestInfo {
1760 component_name: Some("my-package".to_string()),
1761 version: Some("1.0.0".to_string()),
1762 is_workspace_root: false,
1763 workspace_members: vec![],
1764 local_dependencies: vec![],
1765 ecosystem: Some("npm".to_string()),
1766 };
1767
1768 let component = DiscoveredComponent {
1769 node_id: "component:my-repo:packages/core".to_string(),
1770 name: "my-package".to_string(),
1771 manifest_path: "packages/core/package.json".to_string(),
1772 directory: "packages/core".to_string(),
1773 info,
1774 };
1775
1776 assert_eq!(component.node_id, "component:my-repo:packages/core");
1777 assert_eq!(component.name, "my-package");
1778 assert!(!component.info.is_workspace_root);
1779 assert!(component.info.is_publishable());
1780 }
1781
1782 #[test]
1783 fn test_add_component_node() {
1784 let builder = ComponentBuilder::new().unwrap();
1785 let mut graph = PetCodeGraph::new();
1786
1787 let info = ManifestInfo {
1788 component_name: Some("test-component".to_string()),
1789 version: Some("0.1.0".to_string()),
1790 is_workspace_root: true,
1791 workspace_members: vec!["packages/*".to_string()],
1792 local_dependencies: vec![],
1793 ecosystem: Some("cargo".to_string()),
1794 };
1795
1796 let component = DiscoveredComponent {
1797 node_id: "component:test-repo".to_string(),
1798 name: "test-component".to_string(),
1799 manifest_path: "Cargo.toml".to_string(),
1800 directory: "".to_string(),
1801 info,
1802 };
1803
1804 builder.add_component_node(&mut graph, &component);
1805
1806 assert!(graph.contains_node("component:test-repo"));
1808 let node = graph.get_node("component:test-repo").unwrap();
1809 assert_eq!(node.name, "test-component");
1810 assert_eq!(node.node_type, NodeType::Container);
1811 assert_eq!(node.kind, Some("component".to_string()));
1812
1813 assert_eq!(node.metadata.is_workspace_root, Some(true));
1815 assert_eq!(node.metadata.is_publishable, Some(true));
1816 assert_eq!(node.metadata.manifest_path, Some("Cargo.toml".to_string()));
1817 }
1818
1819 #[test]
1820 fn test_build_path_index() {
1821 let mut builder = ComponentBuilder::new().unwrap();
1822
1823 let components = vec![
1824 DiscoveredComponent {
1825 node_id: "component:repo:packages/core".to_string(),
1826 name: "core".to_string(),
1827 manifest_path: "packages/core/package.json".to_string(),
1828 directory: "packages/core".to_string(),
1829 info: ManifestInfo::new(),
1830 },
1831 DiscoveredComponent {
1832 node_id: "component:repo:packages/utils".to_string(),
1833 name: "utils".to_string(),
1834 manifest_path: "packages/utils/package.json".to_string(),
1835 directory: "packages/utils".to_string(),
1836 info: ManifestInfo::new(),
1837 },
1838 ];
1839
1840 builder.build_path_index(&components);
1841
1842 let index = builder.path_index();
1843 assert_eq!(index.len(), 2);
1844 assert_eq!(
1845 index.get(&PathBuf::from("packages/core")),
1846 Some(&"component:repo:packages/core".to_string())
1847 );
1848 assert_eq!(
1849 index.get(&PathBuf::from("packages/utils")),
1850 Some(&"component:repo:packages/utils".to_string())
1851 );
1852 }
1853
1854 #[test]
1855 fn test_resolve_dependency() {
1856 let mut builder = ComponentBuilder::new().unwrap();
1857
1858 let components = vec![
1859 DiscoveredComponent {
1860 node_id: "component:repo:packages/core".to_string(),
1861 name: "core".to_string(),
1862 manifest_path: "packages/core/package.json".to_string(),
1863 directory: "packages/core".to_string(),
1864 info: ManifestInfo::new(),
1865 },
1866 DiscoveredComponent {
1867 node_id: "component:repo:packages/utils".to_string(),
1868 name: "utils".to_string(),
1869 manifest_path: "packages/utils/package.json".to_string(),
1870 directory: "packages/utils".to_string(),
1871 info: ManifestInfo::new(),
1872 },
1873 ];
1874
1875 builder.build_path_index(&components);
1876
1877 let from = &components[0]; let dep = LocalDependency::with_path(
1880 "utils".to_string(),
1881 "../utils".to_string(),
1882 DependencyType::Path,
1883 );
1884
1885 let resolved = builder.resolve_dependency(from, &dep);
1886 assert_eq!(resolved, Some("component:repo:packages/utils".to_string()));
1887 }
1888
1889 #[test]
1890 fn test_dependency_edges_created() {
1891 let mut builder = ComponentBuilder::new().unwrap();
1892 let mut graph = PetCodeGraph::new();
1893
1894 let dep = LocalDependency::with_path(
1895 "utils".to_string(),
1896 "../utils".to_string(),
1897 DependencyType::Path,
1898 );
1899
1900 let mut core_info = ManifestInfo::new();
1901 core_info.component_name = Some("core".to_string());
1902 core_info.local_dependencies.push(dep);
1903
1904 let mut utils_info = ManifestInfo::new();
1905 utils_info.component_name = Some("utils".to_string());
1906
1907 let components = vec![
1908 DiscoveredComponent {
1909 node_id: "component:repo:packages/core".to_string(),
1910 name: "core".to_string(),
1911 manifest_path: "packages/core/package.json".to_string(),
1912 directory: "packages/core".to_string(),
1913 info: core_info,
1914 },
1915 DiscoveredComponent {
1916 node_id: "component:repo:packages/utils".to_string(),
1917 name: "utils".to_string(),
1918 manifest_path: "packages/utils/package.json".to_string(),
1919 directory: "packages/utils".to_string(),
1920 info: utils_info,
1921 },
1922 ];
1923
1924 builder
1925 .add_to_graph(&mut graph, "repo", &components)
1926 .unwrap();
1927
1928 let deps_edges: Vec<_> = graph.edges_by_type(EdgeType::DependsOn).collect();
1930 assert_eq!(deps_edges.len(), 1);
1931
1932 let (source, target, data) = &deps_edges[0];
1933 assert_eq!(source.id, "component:repo:packages/core");
1934 assert_eq!(target.id, "component:repo:packages/utils");
1935 assert_eq!(data.ident, Some("utils".to_string()));
1936 assert_eq!(data.version_spec, Some("path:../utils".to_string()));
1937 }
1938}