codeprysm_core/
graph.rs

1//! Graph Schema Definitions for Code Graph Model v2
2//!
3//! This module defines the schema for the Container/Callable/Data node model
4//! with declarative tag-based categorization and semantic metadata.
5//!
6//! Schema Version: 2.0
7//!
8//! This module provides the `PetCodeGraph` implementation using petgraph for efficient
9//! traversal and graph algorithms.
10
11use petgraph::Direction;
12use petgraph::stable_graph::{EdgeIndex, NodeIndex, StableGraph};
13use petgraph::visit::{EdgeRef, IntoEdgeReferences};
14use serde::{Deserialize, Serialize};
15use std::collections::HashMap;
16
17/// Schema version constant
18pub const GRAPH_SCHEMA_VERSION: &str = "2.0";
19
20// ============================================================================
21// Edge Types
22// ============================================================================
23
24/// Types of relationships between code entities.
25#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
26#[serde(rename_all = "SCREAMING_SNAKE_CASE")]
27pub enum EdgeType {
28    /// Hierarchical containment (File→Module, Module→Class, Class→Method)
29    Contains,
30    /// Dependencies (Callable→Callable, Callable→Data, Data→Data)
31    Uses,
32    /// Definition relationships (Container→Data, Callable→Data)
33    Defines,
34    /// Component dependency (Component→Component for local workspace dependencies)
35    /// Used for: workspace:*, path dependencies, ProjectReference, replace directives
36    DependsOn,
37}
38
39impl EdgeType {
40    /// Get the string representation matching Python format
41    pub fn as_str(&self) -> &'static str {
42        match self {
43            EdgeType::Contains => "CONTAINS",
44            EdgeType::Uses => "USES",
45            EdgeType::Defines => "DEFINES",
46            EdgeType::DependsOn => "DEPENDS_ON",
47        }
48    }
49}
50
51// ============================================================================
52// Node Types
53// ============================================================================
54
55/// High-level node type classification.
56///
57/// Note: The legacy `FILE` type has been removed. Files are now represented as
58/// `Container` nodes with `kind="file"`. For backward compatibility, deserializing
59/// "FILE" from JSON/SQLite is handled via custom deserialization logic that
60/// converts it to `Container` with the appropriate kind.
61#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize)]
62pub enum NodeType {
63    /// Structural organization entity (namespace, module, class, file, etc.)
64    Container,
65    /// Executable code entity (function, method, constructor)
66    Callable,
67    /// State and value entity (constant, variable, field, parameter)
68    Data,
69}
70
71impl NodeType {
72    /// Get the string representation matching Python format
73    pub fn as_str(&self) -> &'static str {
74        match self {
75            NodeType::Container => "Container",
76            NodeType::Callable => "Callable",
77            NodeType::Data => "Data",
78        }
79    }
80}
81
82/// Custom deserializer to handle legacy "FILE" type.
83/// Converts legacy "FILE" to Container (the kind must be set separately).
84impl<'de> Deserialize<'de> for NodeType {
85    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
86    where
87        D: serde::Deserializer<'de>,
88    {
89        let s = String::deserialize(deserializer)?;
90        match s.as_str() {
91            "Container" => Ok(NodeType::Container),
92            "Callable" => Ok(NodeType::Callable),
93            "Data" => Ok(NodeType::Data),
94            // Legacy: "FILE" is now Container with kind="file"
95            // Note: The kind field must be set to "file" by the caller
96            "FILE" => Ok(NodeType::Container),
97            _ => Err(serde::de::Error::unknown_variant(
98                &s,
99                &["Container", "Callable", "Data"],
100            )),
101        }
102    }
103}
104
105// ============================================================================
106// Kind Enums
107// ============================================================================
108
109/// Kinds of Container nodes - structural organization entities.
110#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
111#[serde(rename_all = "lowercase")]
112pub enum ContainerKind {
113    /// Workspace root container (for multi-repo workspaces)
114    Workspace,
115    /// Repository root container (with git metadata)
116    Repository,
117    /// Source file container
118    File,
119    /// Namespace, package, or module
120    Namespace,
121    /// Module or compilation unit
122    Module,
123    /// Package declaration
124    Package,
125    /// Type definition (class, struct, interface, enum, etc.)
126    Type,
127    /// Component (npm package, Cargo crate, Go module, C# project, etc.)
128    /// Represents a logical package with its own manifest file.
129    Component,
130}
131
132impl ContainerKind {
133    /// Get the string representation matching Python format
134    pub fn as_str(&self) -> &'static str {
135        match self {
136            ContainerKind::Workspace => "workspace",
137            ContainerKind::Repository => "repository",
138            ContainerKind::File => "file",
139            ContainerKind::Namespace => "namespace",
140            ContainerKind::Module => "module",
141            ContainerKind::Package => "package",
142            ContainerKind::Type => "type",
143            ContainerKind::Component => "component",
144        }
145    }
146}
147
148/// Kinds of Callable nodes - executable code entities.
149#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
150#[serde(rename_all = "lowercase")]
151pub enum CallableKind {
152    /// Function or procedure
153    Function,
154    /// Class or instance method
155    Method,
156    /// Constructor or initializer
157    Constructor,
158    /// Macro (Rust, C/C++)
159    Macro,
160}
161
162impl CallableKind {
163    /// Get the string representation matching Python format
164    pub fn as_str(&self) -> &'static str {
165        match self {
166            CallableKind::Function => "function",
167            CallableKind::Method => "method",
168            CallableKind::Constructor => "constructor",
169            CallableKind::Macro => "macro",
170        }
171    }
172}
173
174/// Kinds of Data nodes - state and value entities.
175#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
176#[serde(rename_all = "lowercase")]
177pub enum DataKind {
178    /// Constant or const value
179    Constant,
180    /// Variable or value binding
181    Value,
182    /// Class or struct field
183    Field,
184    /// Property (C#, Python @property, etc.)
185    Property,
186    /// Function/method parameter
187    Parameter,
188    /// Local variable within a callable
189    Local,
190}
191
192impl DataKind {
193    /// Get the string representation matching Python format
194    pub fn as_str(&self) -> &'static str {
195        match self {
196            DataKind::Constant => "constant",
197            DataKind::Value => "value",
198            DataKind::Field => "field",
199            DataKind::Property => "property",
200            DataKind::Parameter => "parameter",
201            DataKind::Local => "local",
202        }
203    }
204}
205
206/// Unified kind enum that can represent any node kind.
207#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
208#[serde(untagged)]
209pub enum NodeKind {
210    Container(ContainerKind),
211    Callable(CallableKind),
212    Data(DataKind),
213}
214
215impl NodeKind {
216    /// Get the string representation
217    pub fn as_str(&self) -> &'static str {
218        match self {
219            NodeKind::Container(k) => k.as_str(),
220            NodeKind::Callable(k) => k.as_str(),
221            NodeKind::Data(k) => k.as_str(),
222        }
223    }
224
225    /// Get the parent node type for this kind
226    pub fn node_type(&self) -> NodeType {
227        match self {
228            NodeKind::Container(_) => NodeType::Container,
229            NodeKind::Callable(_) => NodeType::Callable,
230            NodeKind::Data(_) => NodeType::Data,
231        }
232    }
233}
234
235// ============================================================================
236// Node Metadata
237// ============================================================================
238
239/// Optional metadata for code entities.
240///
241/// All fields are optional to support graceful degradation across languages.
242#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize)]
243pub struct NodeMetadata {
244    /// Visibility: "public", "private", "protected", "internal"
245    #[serde(skip_serializing_if = "Option::is_none")]
246    pub visibility: Option<String>,
247
248    /// Async callable (function/method/constructor) - execution modifier
249    #[serde(rename = "async", skip_serializing_if = "Option::is_none")]
250    pub is_async: Option<bool>,
251
252    /// Static member
253    #[serde(rename = "static", skip_serializing_if = "Option::is_none")]
254    pub is_static: Option<bool>,
255
256    /// Abstract class/method
257    #[serde(rename = "abstract", skip_serializing_if = "Option::is_none")]
258    pub is_abstract: Option<bool>,
259
260    /// Virtual method
261    #[serde(rename = "virtual", skip_serializing_if = "Option::is_none")]
262    pub is_virtual: Option<bool>,
263
264    /// Python decorators or C# attributes
265    #[serde(skip_serializing_if = "Option::is_none")]
266    pub decorators: Option<Vec<String>>,
267
268    /// Other language-specific modifiers (e.g., final, sealed, inline)
269    #[serde(skip_serializing_if = "Option::is_none")]
270    pub modifiers: Option<Vec<String>>,
271
272    /// Semantic scope from overlay tags (e.g., "test", "benchmark", "example")
273    #[serde(skip_serializing_if = "Option::is_none")]
274    pub scope: Option<String>,
275
276    // --- Git metadata (for Repository containers) ---
277    /// Git remote URL (e.g., "https://github.com/org/repo.git")
278    #[serde(skip_serializing_if = "Option::is_none")]
279    pub git_remote: Option<String>,
280
281    /// Git branch name (e.g., "main", "feature/xyz")
282    #[serde(skip_serializing_if = "Option::is_none")]
283    pub git_branch: Option<String>,
284
285    /// Git commit SHA (e.g., "abc123def456...")
286    #[serde(skip_serializing_if = "Option::is_none")]
287    pub git_commit: Option<String>,
288
289    // --- Component metadata (for Component containers) ---
290    /// Whether this component is a workspace root (defines workspace members)
291    #[serde(skip_serializing_if = "Option::is_none")]
292    pub is_workspace_root: Option<bool>,
293
294    /// Whether this component is publishable to a registry
295    #[serde(skip_serializing_if = "Option::is_none")]
296    pub is_publishable: Option<bool>,
297
298    /// Path to the manifest file relative to repo root (for quick lookup)
299    #[serde(skip_serializing_if = "Option::is_none")]
300    pub manifest_path: Option<String>,
301}
302
303impl NodeMetadata {
304    /// Create empty metadata
305    pub fn new() -> Self {
306        Self::default()
307    }
308
309    /// Check if metadata has any values set
310    pub fn is_empty(&self) -> bool {
311        self.visibility.is_none()
312            && self.is_async.is_none()
313            && self.is_static.is_none()
314            && self.is_abstract.is_none()
315            && self.is_virtual.is_none()
316            && self.decorators.is_none()
317            && self.modifiers.is_none()
318            && self.scope.is_none()
319            && self.git_remote.is_none()
320            && self.git_branch.is_none()
321            && self.git_commit.is_none()
322            && self.is_workspace_root.is_none()
323            && self.is_publishable.is_none()
324            && self.manifest_path.is_none()
325    }
326
327    /// Create git metadata for a repository container
328    pub fn with_git(
329        mut self,
330        remote: Option<String>,
331        branch: Option<String>,
332        commit: Option<String>,
333    ) -> Self {
334        self.git_remote = remote;
335        self.git_branch = branch;
336        self.git_commit = commit;
337        self
338    }
339
340    /// Create component metadata for a component container
341    ///
342    /// # Arguments
343    /// * `is_workspace_root` - Whether this component defines workspace members
344    /// * `is_publishable` - Whether this component is publishable to a registry
345    /// * `manifest_path` - Path to the manifest file relative to repo root
346    pub fn with_component(
347        mut self,
348        is_workspace_root: Option<bool>,
349        is_publishable: Option<bool>,
350        manifest_path: Option<String>,
351    ) -> Self {
352        self.is_workspace_root = is_workspace_root;
353        self.is_publishable = is_publishable;
354        self.manifest_path = manifest_path;
355        self
356    }
357}
358
359// ============================================================================
360// Node
361// ============================================================================
362
363/// A node in the code graph representing a code entity.
364#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
365pub struct Node {
366    /// Hierarchical node ID (e.g., "file.py:Module:Class:Method")
367    pub id: String,
368
369    /// Entity name
370    pub name: String,
371
372    /// Node type: FILE, Container, Callable, or Data
373    #[serde(rename = "type")]
374    pub node_type: NodeType,
375
376    /// Kind within the node type (e.g., "function", "class", "field")
377    /// None for FILE nodes
378    #[serde(skip_serializing_if = "Option::is_none")]
379    pub kind: Option<String>,
380
381    /// Language-specific subtype (e.g., "struct", "interface", "class" for Container/type)
382    #[serde(skip_serializing_if = "Option::is_none")]
383    pub subtype: Option<String>,
384
385    /// Source file path
386    pub file: String,
387
388    /// Starting line number (1-indexed)
389    pub line: usize,
390
391    /// Ending line number (1-indexed)
392    pub end_line: usize,
393
394    /// Source code text
395    #[serde(skip_serializing_if = "Option::is_none")]
396    pub text: Option<String>,
397
398    /// Semantic metadata
399    #[serde(default, skip_serializing_if = "NodeMetadata::is_empty")]
400    pub metadata: NodeMetadata,
401
402    /// File content hash (only for FILE nodes)
403    #[serde(skip_serializing_if = "Option::is_none")]
404    pub hash: Option<String>,
405}
406
407impl Node {
408    /// Create a new Workspace container node (root for multi-repo workspaces)
409    pub fn workspace(name: String) -> Self {
410        Self {
411            id: name.clone(),
412            name: name.clone(),
413            node_type: NodeType::Container,
414            kind: Some(ContainerKind::Workspace.as_str().to_string()),
415            subtype: None,
416            file: String::new(), // Workspace has no file
417            line: 0,
418            end_line: 0,
419            text: None,
420            metadata: NodeMetadata::default(),
421            hash: None,
422        }
423    }
424
425    /// Create a new Repository container node (root of the graph hierarchy)
426    pub fn repository(name: String, metadata: NodeMetadata) -> Self {
427        Self {
428            id: name.clone(),
429            name: name.clone(),
430            node_type: NodeType::Container,
431            kind: Some(ContainerKind::Repository.as_str().to_string()),
432            subtype: None,
433            file: String::new(), // Repository has no file
434            line: 0,
435            end_line: 0,
436            text: None,
437            metadata,
438            hash: None,
439        }
440    }
441
442    /// Create a new Component container node (npm package, Cargo crate, Go module, etc.)
443    ///
444    /// # Arguments
445    /// * `id` - Hierarchical node ID (e.g., "my-repo:packages/core")
446    /// * `name` - Component name from manifest (e.g., "@myorg/core")
447    /// * `manifest_path` - Path to the manifest file relative to repo root
448    /// * `metadata` - Component metadata (is_workspace_root, is_publishable)
449    pub fn component(
450        id: String,
451        name: String,
452        manifest_path: String,
453        metadata: NodeMetadata,
454    ) -> Self {
455        Self {
456            id,
457            name,
458            node_type: NodeType::Container,
459            kind: Some(ContainerKind::Component.as_str().to_string()),
460            subtype: None,
461            file: manifest_path, // The manifest file path
462            line: 1,
463            end_line: 1,
464            text: None,
465            metadata,
466            hash: None,
467        }
468    }
469
470    /// Create a new source file Container node (replaces legacy FILE node type)
471    pub fn source_file(id: String, file_path: String, hash: String, line_count: usize) -> Self {
472        Self {
473            id,
474            name: file_path.clone(),
475            node_type: NodeType::Container,
476            kind: Some(ContainerKind::File.as_str().to_string()),
477            subtype: None,
478            file: file_path,
479            line: 1,
480            end_line: line_count.max(1),
481            text: None,
482            metadata: NodeMetadata::default(),
483            hash: Some(hash),
484        }
485    }
486
487    /// Create a new Container node
488    pub fn container(
489        id: String,
490        name: String,
491        kind: ContainerKind,
492        subtype: Option<String>,
493        file: String,
494        line: usize,
495        end_line: usize,
496    ) -> Self {
497        Self {
498            id,
499            name,
500            node_type: NodeType::Container,
501            kind: Some(kind.as_str().to_string()),
502            subtype,
503            file,
504            line,
505            end_line,
506            text: None,
507            metadata: NodeMetadata::default(),
508            hash: None,
509        }
510    }
511
512    /// Create a new Callable node
513    pub fn callable(
514        id: String,
515        name: String,
516        kind: CallableKind,
517        file: String,
518        line: usize,
519        end_line: usize,
520    ) -> Self {
521        Self {
522            id,
523            name,
524            node_type: NodeType::Callable,
525            kind: Some(kind.as_str().to_string()),
526            subtype: None,
527            file,
528            line,
529            end_line,
530            text: None,
531            metadata: NodeMetadata::default(),
532            hash: None,
533        }
534    }
535
536    /// Create a new Data node
537    pub fn data(
538        id: String,
539        name: String,
540        kind: DataKind,
541        subtype: Option<String>,
542        file: String,
543        line: usize,
544        end_line: usize,
545    ) -> Self {
546        Self {
547            id,
548            name,
549            node_type: NodeType::Data,
550            kind: Some(kind.as_str().to_string()),
551            subtype,
552            file,
553            line,
554            end_line,
555            text: None,
556            metadata: NodeMetadata::default(),
557            hash: None,
558        }
559    }
560
561    /// Set the source text
562    pub fn with_text(mut self, text: String) -> Self {
563        self.text = Some(text);
564        self
565    }
566
567    /// Set the metadata
568    pub fn with_metadata(mut self, metadata: NodeMetadata) -> Self {
569        self.metadata = metadata;
570        self
571    }
572
573    /// Check if this is a file node (Container with kind="file")
574    pub fn is_file(&self) -> bool {
575        self.node_type == NodeType::Container && self.kind.as_deref() == Some("file")
576    }
577
578    /// Check if this is a repository container node
579    pub fn is_repository(&self) -> bool {
580        self.node_type == NodeType::Container && self.kind.as_deref() == Some("repository")
581    }
582
583    /// Check if this is a workspace container node
584    pub fn is_workspace(&self) -> bool {
585        self.node_type == NodeType::Container && self.kind.as_deref() == Some("workspace")
586    }
587
588    /// Check if this is a component container node
589    pub fn is_component(&self) -> bool {
590        self.node_type == NodeType::Container && self.kind.as_deref() == Some("component")
591    }
592
593    /// Check if this is a Container node (any kind)
594    pub fn is_container(&self) -> bool {
595        self.node_type == NodeType::Container
596    }
597
598    /// Check if this is a Callable node
599    pub fn is_callable(&self) -> bool {
600        self.node_type == NodeType::Callable
601    }
602
603    /// Check if this is a Data node
604    pub fn is_data(&self) -> bool {
605        self.node_type == NodeType::Data
606    }
607
608    /// Get the container kind if this is a Container node
609    pub fn container_kind(&self) -> Option<ContainerKind> {
610        if self.node_type == NodeType::Container {
611            self.kind.as_deref().and_then(parse_container_kind)
612        } else {
613            None
614        }
615    }
616}
617
618// ============================================================================
619// Edge
620// ============================================================================
621
622/// An edge in the code graph representing a relationship between nodes.
623#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
624pub struct Edge {
625    /// Source node ID
626    pub source: String,
627
628    /// Target node ID
629    pub target: String,
630
631    /// Relationship type
632    #[serde(rename = "type")]
633    pub edge_type: EdgeType,
634
635    /// Line number where the reference occurs (for USES edges)
636    #[serde(skip_serializing_if = "Option::is_none")]
637    pub ref_line: Option<usize>,
638
639    /// The identifier text at the reference site (for USES/DEPENDS_ON edges)
640    #[serde(skip_serializing_if = "Option::is_none")]
641    pub ident: Option<String>,
642
643    // --- DependsOn edge metadata (for Component dependencies) ---
644    /// Version specification (e.g., "workspace:*", "^1.0.0", "path:../core")
645    #[serde(skip_serializing_if = "Option::is_none")]
646    pub version_spec: Option<String>,
647
648    /// Whether this is a development dependency (devDependencies, dev-dependencies, etc.)
649    #[serde(skip_serializing_if = "Option::is_none")]
650    pub is_dev_dependency: Option<bool>,
651}
652
653impl Edge {
654    /// Create a CONTAINS edge (parent contains child)
655    pub fn contains(parent: String, child: String) -> Self {
656        Self {
657            source: parent,
658            target: child,
659            edge_type: EdgeType::Contains,
660            ref_line: None,
661            ident: None,
662            version_spec: None,
663            is_dev_dependency: None,
664        }
665    }
666
667    /// Create a USES edge (source uses/references target)
668    pub fn uses(
669        source: String,
670        target: String,
671        ref_line: Option<usize>,
672        ident: Option<String>,
673    ) -> Self {
674        Self {
675            source,
676            target,
677            edge_type: EdgeType::Uses,
678            ref_line,
679            ident,
680            version_spec: None,
681            is_dev_dependency: None,
682        }
683    }
684
685    /// Create a DEFINES edge (container defines member)
686    pub fn defines(container: String, member: String) -> Self {
687        Self {
688            source: container,
689            target: member,
690            edge_type: EdgeType::Defines,
691            ref_line: None,
692            ident: None,
693            version_spec: None,
694            is_dev_dependency: None,
695        }
696    }
697
698    /// Create a DEPENDS_ON edge (component depends on another component)
699    ///
700    /// # Arguments
701    /// * `source` - The dependent component node ID
702    /// * `target` - The dependency component node ID
703    /// * `ident` - The dependency name as specified in manifest (e.g., "@myorg/core")
704    /// * `version_spec` - Version specification (e.g., "workspace:*", "^1.0.0", "path:../core")
705    /// * `is_dev` - Whether this is a development dependency
706    pub fn depends_on(
707        source: String,
708        target: String,
709        ident: Option<String>,
710        version_spec: Option<String>,
711        is_dev: Option<bool>,
712    ) -> Self {
713        Self {
714            source,
715            target,
716            edge_type: EdgeType::DependsOn,
717            ref_line: None,
718            ident,
719            version_spec,
720            is_dev_dependency: is_dev,
721        }
722    }
723}
724
725// ============================================================================
726// Helper Functions
727// ============================================================================
728
729/// Validate that a kind value is valid for the given node type.
730pub fn validate_node_kind(node_type: NodeType, kind: &str) -> bool {
731    match node_type {
732        NodeType::Container => matches!(
733            kind,
734            "workspace"
735                | "repository"
736                | "file"
737                | "namespace"
738                | "module"
739                | "package"
740                | "type"
741                | "component"
742        ),
743        NodeType::Callable => matches!(kind, "function" | "method" | "constructor" | "macro"),
744        NodeType::Data => matches!(
745            kind,
746            "constant" | "value" | "field" | "property" | "parameter" | "local"
747        ),
748    }
749}
750
751/// Determine the node type from a kind value.
752pub fn get_node_type_from_kind(kind: &str) -> Option<NodeType> {
753    match kind {
754        "workspace" | "repository" | "file" | "namespace" | "module" | "package" | "type"
755        | "component" => Some(NodeType::Container),
756        "function" | "method" | "constructor" | "macro" => Some(NodeType::Callable),
757        "constant" | "value" | "field" | "property" | "parameter" | "local" => Some(NodeType::Data),
758        _ => None,
759    }
760}
761
762/// Parse a kind string into the appropriate Kind enum
763pub fn parse_container_kind(kind: &str) -> Option<ContainerKind> {
764    match kind {
765        "workspace" => Some(ContainerKind::Workspace),
766        "repository" => Some(ContainerKind::Repository),
767        "file" => Some(ContainerKind::File),
768        "namespace" => Some(ContainerKind::Namespace),
769        "module" => Some(ContainerKind::Module),
770        "package" => Some(ContainerKind::Package),
771        "type" => Some(ContainerKind::Type),
772        "component" => Some(ContainerKind::Component),
773        _ => None,
774    }
775}
776
777/// Parse a kind string into CallableKind
778pub fn parse_callable_kind(kind: &str) -> Option<CallableKind> {
779    match kind {
780        "function" => Some(CallableKind::Function),
781        "method" => Some(CallableKind::Method),
782        "constructor" => Some(CallableKind::Constructor),
783        "macro" => Some(CallableKind::Macro),
784        _ => None,
785    }
786}
787
788/// Parse a kind string into DataKind
789pub fn parse_data_kind(kind: &str) -> Option<DataKind> {
790    match kind {
791        "constant" => Some(DataKind::Constant),
792        "value" => Some(DataKind::Value),
793        "field" => Some(DataKind::Field),
794        "property" => Some(DataKind::Property),
795        "parameter" => Some(DataKind::Parameter),
796        "local" => Some(DataKind::Local),
797        _ => None,
798    }
799}
800
801// ============================================================================
802// PetGraph-Based Code Graph (for efficient traversal and algorithms)
803// ============================================================================
804
805/// Edge data stored as edge weights in petgraph.
806///
807/// This struct carries the relationship information for edges in the graph,
808/// enabling efficient traversal while preserving edge semantics.
809#[derive(Debug, Clone, PartialEq, Eq)]
810pub struct EdgeData {
811    /// Relationship type (CONTAINS, USES, DEFINES, DEPENDS_ON)
812    pub edge_type: EdgeType,
813    /// Line number where the reference occurs (for USES edges)
814    pub ref_line: Option<usize>,
815    /// The identifier text at the reference site (for USES/DEPENDS_ON edges)
816    pub ident: Option<String>,
817    /// Version specification (for DEPENDS_ON edges)
818    pub version_spec: Option<String>,
819    /// Whether this is a development dependency (for DEPENDS_ON edges)
820    pub is_dev_dependency: Option<bool>,
821}
822
823impl EdgeData {
824    /// Create a CONTAINS edge data
825    pub fn contains() -> Self {
826        Self {
827            edge_type: EdgeType::Contains,
828            ref_line: None,
829            ident: None,
830            version_spec: None,
831            is_dev_dependency: None,
832        }
833    }
834
835    /// Create a USES edge data
836    pub fn uses(ref_line: Option<usize>, ident: Option<String>) -> Self {
837        Self {
838            edge_type: EdgeType::Uses,
839            ref_line,
840            ident,
841            version_spec: None,
842            is_dev_dependency: None,
843        }
844    }
845
846    /// Create a DEFINES edge data
847    pub fn defines() -> Self {
848        Self {
849            edge_type: EdgeType::Defines,
850            ref_line: None,
851            ident: None,
852            version_spec: None,
853            is_dev_dependency: None,
854        }
855    }
856
857    /// Create a DEPENDS_ON edge data (component dependency)
858    ///
859    /// # Arguments
860    /// * `ident` - The dependency name as specified in manifest
861    /// * `version_spec` - Version specification (e.g., "workspace:*", "^1.0.0")
862    /// * `is_dev` - Whether this is a development dependency
863    pub fn depends_on(
864        ident: Option<String>,
865        version_spec: Option<String>,
866        is_dev: Option<bool>,
867    ) -> Self {
868        Self {
869            edge_type: EdgeType::DependsOn,
870            ref_line: None,
871            ident,
872            version_spec,
873            is_dev_dependency: is_dev,
874        }
875    }
876}
877
878impl From<&Edge> for EdgeData {
879    fn from(edge: &Edge) -> Self {
880        Self {
881            edge_type: edge.edge_type,
882            ref_line: edge.ref_line,
883            ident: edge.ident.clone(),
884            version_spec: edge.version_spec.clone(),
885            is_dev_dependency: edge.is_dev_dependency,
886        }
887    }
888}
889
890/// A petgraph-based code graph for efficient traversal and graph algorithms.
891///
892/// This implementation uses `petgraph::StableGraph` which:
893/// - Supports O(1) neighbor access via adjacency lists
894/// - Provides stable indices (node/edge removal doesn't invalidate others)
895/// - Enables built-in graph algorithms (BFS, DFS, topological sort, etc.)
896///
897/// Use this for runtime operations that require graph traversal.
898#[derive(Debug, Clone)]
899pub struct PetCodeGraph {
900    /// The underlying petgraph instance
901    graph: StableGraph<Node, EdgeData, petgraph::Directed>,
902
903    /// Map from node ID (string) to petgraph NodeIndex for O(1) lookup
904    node_index_map: HashMap<String, NodeIndex>,
905
906    /// Schema version for compatibility
907    schema_version: String,
908}
909
910impl Default for PetCodeGraph {
911    fn default() -> Self {
912        Self::new()
913    }
914}
915
916impl PetCodeGraph {
917    /// Create a new empty petgraph-based code graph
918    pub fn new() -> Self {
919        Self {
920            graph: StableGraph::new(),
921            node_index_map: HashMap::new(),
922            schema_version: GRAPH_SCHEMA_VERSION.to_string(),
923        }
924    }
925
926    /// Get the schema version
927    pub fn schema_version(&self) -> &str {
928        &self.schema_version
929    }
930
931    // ------------------------------------------------------------------------
932    // Node Operations
933    // ------------------------------------------------------------------------
934
935    /// Add a node to the graph, returning its NodeIndex.
936    ///
937    /// If a node with the same ID already exists, it will be replaced.
938    pub fn add_node(&mut self, node: Node) -> NodeIndex {
939        let node_id = node.id.clone();
940
941        // Remove existing node if present (replace semantics)
942        if let Some(&existing_idx) = self.node_index_map.get(&node_id) {
943            self.graph.remove_node(existing_idx);
944        }
945
946        let idx = self.graph.add_node(node);
947        self.node_index_map.insert(node_id, idx);
948        idx
949    }
950
951    /// Get a node by its string ID
952    pub fn get_node(&self, id: &str) -> Option<&Node> {
953        self.node_index_map
954            .get(id)
955            .and_then(|&idx| self.graph.node_weight(idx))
956    }
957
958    /// Get a mutable node by its string ID
959    pub fn get_node_mut(&mut self, id: &str) -> Option<&mut Node> {
960        self.node_index_map
961            .get(id)
962            .copied()
963            .and_then(|idx| self.graph.node_weight_mut(idx))
964    }
965
966    /// Get a node by its NodeIndex
967    pub fn get_node_by_index(&self, idx: NodeIndex) -> Option<&Node> {
968        self.graph.node_weight(idx)
969    }
970
971    /// Get the NodeIndex for a node ID
972    pub fn get_node_index(&self, id: &str) -> Option<NodeIndex> {
973        self.node_index_map.get(id).copied()
974    }
975
976    /// Check if the graph contains a node with the given ID
977    pub fn contains_node(&self, id: &str) -> bool {
978        self.node_index_map.contains_key(id)
979    }
980
981    /// Remove a node and all its incident edges
982    pub fn remove_node(&mut self, id: &str) -> Option<Node> {
983        if let Some(idx) = self.node_index_map.remove(id) {
984            self.graph.remove_node(idx)
985        } else {
986            None
987        }
988    }
989
990    /// Get the number of nodes
991    pub fn node_count(&self) -> usize {
992        self.graph.node_count()
993    }
994
995    /// Iterate over all nodes
996    pub fn iter_nodes(&self) -> impl Iterator<Item = &Node> {
997        self.graph.node_weights()
998    }
999
1000    /// Get nodes by type
1001    pub fn nodes_by_type(&self, node_type: NodeType) -> impl Iterator<Item = &Node> {
1002        self.graph
1003            .node_weights()
1004            .filter(move |n| n.node_type == node_type)
1005    }
1006
1007    // ------------------------------------------------------------------------
1008    // Edge Operations
1009    // ------------------------------------------------------------------------
1010
1011    /// Add an edge between two nodes by their string IDs.
1012    ///
1013    /// Returns `Some(EdgeIndex)` if both nodes exist, `None` otherwise.
1014    pub fn add_edge(
1015        &mut self,
1016        source_id: &str,
1017        target_id: &str,
1018        data: EdgeData,
1019    ) -> Option<EdgeIndex> {
1020        let source_idx = self.node_index_map.get(source_id)?;
1021        let target_idx = self.node_index_map.get(target_id)?;
1022        Some(self.graph.add_edge(*source_idx, *target_idx, data))
1023    }
1024
1025    /// Add an edge using an Edge struct.
1026    ///
1027    /// Returns `Some(EdgeIndex)` if both nodes exist, `None` otherwise.
1028    pub fn add_edge_from_struct(&mut self, edge: &Edge) -> Option<EdgeIndex> {
1029        self.add_edge(
1030            &edge.source,
1031            &edge.target,
1032            EdgeData {
1033                edge_type: edge.edge_type,
1034                ref_line: edge.ref_line,
1035                ident: edge.ident.clone(),
1036                version_spec: edge.version_spec.clone(),
1037                is_dev_dependency: edge.is_dev_dependency,
1038            },
1039        )
1040    }
1041
1042    /// Add an edge using NodeIndices directly
1043    pub fn add_edge_by_index(
1044        &mut self,
1045        source: NodeIndex,
1046        target: NodeIndex,
1047        data: EdgeData,
1048    ) -> EdgeIndex {
1049        self.graph.add_edge(source, target, data)
1050    }
1051
1052    /// Get all incoming edges for a node (edges where this node is the target)
1053    pub fn incoming_edges(&self, id: &str) -> impl Iterator<Item = (&Node, &EdgeData)> {
1054        let idx = self.node_index_map.get(id).copied();
1055        self.graph
1056            .edges_directed(
1057                idx.unwrap_or(NodeIndex::new(usize::MAX)),
1058                Direction::Incoming,
1059            )
1060            .filter_map(move |edge_ref| {
1061                let source_node = self.graph.node_weight(edge_ref.source())?;
1062                Some((source_node, edge_ref.weight()))
1063            })
1064    }
1065
1066    /// Get all outgoing edges from a node (edges where this node is the source)
1067    pub fn outgoing_edges(&self, id: &str) -> impl Iterator<Item = (&Node, &EdgeData)> {
1068        let idx = self.node_index_map.get(id).copied();
1069        self.graph
1070            .edges_directed(
1071                idx.unwrap_or(NodeIndex::new(usize::MAX)),
1072                Direction::Outgoing,
1073            )
1074            .filter_map(move |edge_ref| {
1075                let target_node = self.graph.node_weight(edge_ref.target())?;
1076                Some((target_node, edge_ref.weight()))
1077            })
1078    }
1079
1080    /// Get the number of edges
1081    pub fn edge_count(&self) -> usize {
1082        self.graph.edge_count()
1083    }
1084
1085    /// Iterate over all edges, returning Edge structs.
1086    ///
1087    /// Note: This creates Edge structs on-the-fly. For performance-critical code,
1088    /// consider using `edges_by_type()` or `outgoing_edges()` instead.
1089    pub fn iter_edges(&self) -> impl Iterator<Item = Edge> + '_ {
1090        self.graph.edge_references().filter_map(move |edge_ref| {
1091            let source = self.graph.node_weight(edge_ref.source())?;
1092            let target = self.graph.node_weight(edge_ref.target())?;
1093            let edge_data = edge_ref.weight();
1094            Some(Edge {
1095                source: source.id.clone(),
1096                target: target.id.clone(),
1097                edge_type: edge_data.edge_type,
1098                ref_line: edge_data.ref_line,
1099                ident: edge_data.ident.clone(),
1100                version_spec: edge_data.version_spec.clone(),
1101                is_dev_dependency: edge_data.is_dev_dependency,
1102            })
1103        })
1104    }
1105
1106    /// Get edges by type
1107    pub fn edges_by_type(
1108        &self,
1109        edge_type: EdgeType,
1110    ) -> impl Iterator<Item = (&Node, &Node, &EdgeData)> {
1111        self.graph.edge_references().filter_map(move |edge_ref| {
1112            if edge_ref.weight().edge_type == edge_type {
1113                let source = self.graph.node_weight(edge_ref.source())?;
1114                let target = self.graph.node_weight(edge_ref.target())?;
1115                Some((source, target, edge_ref.weight()))
1116            } else {
1117                None
1118            }
1119        })
1120    }
1121
1122    // ------------------------------------------------------------------------
1123    // Traversal Operations
1124    // ------------------------------------------------------------------------
1125
1126    /// Get all neighbor nodes (both incoming and outgoing)
1127    pub fn neighbors(&self, id: &str) -> impl Iterator<Item = &Node> {
1128        let idx = self.node_index_map.get(id).copied();
1129        self.graph
1130            .neighbors_undirected(idx.unwrap_or(NodeIndex::new(usize::MAX)))
1131            .filter_map(|neighbor_idx| self.graph.node_weight(neighbor_idx))
1132    }
1133
1134    /// Get children (outgoing CONTAINS edges)
1135    pub fn children(&self, id: &str) -> impl Iterator<Item = &Node> {
1136        self.outgoing_edges(id)
1137            .filter(|(_, edge_data)| edge_data.edge_type == EdgeType::Contains)
1138            .map(|(node, _)| node)
1139    }
1140
1141    /// Get parent (incoming CONTAINS edge) - typically only one
1142    pub fn parent(&self, id: &str) -> Option<&Node> {
1143        self.incoming_edges(id)
1144            .find(|(_, edge_data)| edge_data.edge_type == EdgeType::Contains)
1145            .map(|(node, _)| node)
1146    }
1147
1148    // ------------------------------------------------------------------------
1149    // File Operations
1150    // ------------------------------------------------------------------------
1151
1152    /// Remove all nodes from a file and their incident edges
1153    pub fn remove_file_nodes(&mut self, file_path: &str) {
1154        // Collect node IDs to remove
1155        let ids_to_remove: Vec<String> = self
1156            .graph
1157            .node_weights()
1158            .filter(|n| n.file == file_path)
1159            .map(|n| n.id.clone())
1160            .collect();
1161
1162        // Remove nodes (edges are automatically removed by petgraph)
1163        for id in ids_to_remove {
1164            self.remove_node(&id);
1165        }
1166    }
1167
1168    // ------------------------------------------------------------------------
1169    // Low-level Access (for advanced use cases)
1170    // ------------------------------------------------------------------------
1171
1172    /// Get a reference to the underlying petgraph
1173    pub fn inner(&self) -> &StableGraph<Node, EdgeData, petgraph::Directed> {
1174        &self.graph
1175    }
1176
1177    /// Get a mutable reference to the underlying petgraph
1178    pub fn inner_mut(&mut self) -> &mut StableGraph<Node, EdgeData, petgraph::Directed> {
1179        &mut self.graph
1180    }
1181
1182    /// Get a reference to the node index map
1183    pub fn node_index_map(&self) -> &HashMap<String, NodeIndex> {
1184        &self.node_index_map
1185    }
1186}
1187
1188#[cfg(test)]
1189mod tests {
1190    use super::*;
1191
1192    #[test]
1193    fn test_edge_type_serialization() {
1194        let edge_type = EdgeType::Contains;
1195        let json = serde_json::to_string(&edge_type).unwrap();
1196        assert_eq!(json, "\"CONTAINS\"");
1197
1198        let edge_type = EdgeType::Uses;
1199        let json = serde_json::to_string(&edge_type).unwrap();
1200        assert_eq!(json, "\"USES\"");
1201    }
1202
1203    #[test]
1204    fn test_node_type_serialization() {
1205        // Test serialization
1206        let node_type = NodeType::Container;
1207        let json = serde_json::to_string(&node_type).unwrap();
1208        assert_eq!(json, "\"Container\"");
1209
1210        let node_type = NodeType::Callable;
1211        let json = serde_json::to_string(&node_type).unwrap();
1212        assert_eq!(json, "\"Callable\"");
1213
1214        let node_type = NodeType::Data;
1215        let json = serde_json::to_string(&node_type).unwrap();
1216        assert_eq!(json, "\"Data\"");
1217    }
1218
1219    #[test]
1220    fn test_node_type_legacy_file_deserialization() {
1221        // Test that legacy "FILE" deserializes to Container (for backward compatibility)
1222        let node_type: NodeType = serde_json::from_str("\"FILE\"").unwrap();
1223        assert_eq!(node_type, NodeType::Container);
1224
1225        // Normal deserialization should still work
1226        let node_type: NodeType = serde_json::from_str("\"Container\"").unwrap();
1227        assert_eq!(node_type, NodeType::Container);
1228    }
1229
1230    #[test]
1231    fn test_container_kind_serialization() {
1232        let kind = ContainerKind::Type;
1233        let json = serde_json::to_string(&kind).unwrap();
1234        assert_eq!(json, "\"type\"");
1235    }
1236
1237    #[test]
1238    fn test_node_metadata_empty() {
1239        let metadata = NodeMetadata::default();
1240        assert!(metadata.is_empty());
1241
1242        let metadata = NodeMetadata {
1243            visibility: Some("public".to_string()),
1244            ..Default::default()
1245        };
1246        assert!(!metadata.is_empty());
1247    }
1248
1249    #[test]
1250    fn test_node_creation() {
1251        let node = Node::callable(
1252            "test.py:my_func".to_string(),
1253            "my_func".to_string(),
1254            CallableKind::Function,
1255            "test.py".to_string(),
1256            10,
1257            20,
1258        );
1259
1260        assert_eq!(node.id, "test.py:my_func");
1261        assert_eq!(node.name, "my_func");
1262        assert_eq!(node.node_type, NodeType::Callable);
1263        assert_eq!(node.kind, Some("function".to_string()));
1264        assert_eq!(node.file, "test.py");
1265        assert_eq!(node.line, 10);
1266        assert_eq!(node.end_line, 20);
1267    }
1268
1269    #[test]
1270    fn test_file_node() {
1271        let node = Node::source_file(
1272            "test.py".to_string(),
1273            "test.py".to_string(),
1274            "abc123".to_string(),
1275            100,
1276        );
1277
1278        assert!(node.is_file());
1279        assert_eq!(node.hash, Some("abc123".to_string()));
1280        // Files are now Containers with kind="file"
1281        assert_eq!(node.kind.as_deref(), Some("file"));
1282        assert_eq!(node.node_type, NodeType::Container);
1283    }
1284
1285    #[test]
1286    fn test_edge_creation() {
1287        let edge = Edge::contains("parent".to_string(), "child".to_string());
1288        assert_eq!(edge.edge_type, EdgeType::Contains);
1289        assert!(edge.ref_line.is_none());
1290
1291        let edge = Edge::uses(
1292            "caller".to_string(),
1293            "callee".to_string(),
1294            Some(42),
1295            Some("func_name".to_string()),
1296        );
1297        assert_eq!(edge.edge_type, EdgeType::Uses);
1298        assert_eq!(edge.ref_line, Some(42));
1299        assert_eq!(edge.ident, Some("func_name".to_string()));
1300    }
1301
1302    #[test]
1303    fn test_validate_node_kind() {
1304        assert!(validate_node_kind(NodeType::Container, "type"));
1305        assert!(validate_node_kind(NodeType::Container, "namespace"));
1306        assert!(!validate_node_kind(NodeType::Container, "function"));
1307
1308        assert!(validate_node_kind(NodeType::Callable, "function"));
1309        assert!(validate_node_kind(NodeType::Callable, "method"));
1310        assert!(!validate_node_kind(NodeType::Callable, "type"));
1311
1312        assert!(validate_node_kind(NodeType::Data, "field"));
1313        assert!(validate_node_kind(NodeType::Data, "parameter"));
1314        assert!(!validate_node_kind(NodeType::Data, "function"));
1315    }
1316
1317    #[test]
1318    fn test_get_node_type_from_kind() {
1319        assert_eq!(get_node_type_from_kind("type"), Some(NodeType::Container));
1320        assert_eq!(
1321            get_node_type_from_kind("function"),
1322            Some(NodeType::Callable)
1323        );
1324        assert_eq!(get_node_type_from_kind("field"), Some(NodeType::Data));
1325        assert_eq!(get_node_type_from_kind("unknown"), None);
1326    }
1327
1328    #[test]
1329    fn test_nodes_by_type() {
1330        let mut graph = PetCodeGraph::new();
1331
1332        // File is now a Container with kind="file"
1333        graph.add_node(Node::source_file(
1334            "a.py".to_string(),
1335            "a.py".to_string(),
1336            "x".to_string(),
1337            100,
1338        ));
1339        graph.add_node(Node::callable(
1340            "a.py:f".to_string(),
1341            "f".to_string(),
1342            CallableKind::Function,
1343            "a.py".to_string(),
1344            1,
1345            1,
1346        ));
1347        graph.add_node(Node::container(
1348            "a.py:C".to_string(),
1349            "C".to_string(),
1350            ContainerKind::Type,
1351            None,
1352            "a.py".to_string(),
1353            1,
1354            1,
1355        ));
1356
1357        // Files are now Containers, so use is_file() to filter
1358        let files: Vec<_> = graph.iter_nodes().filter(|n| n.is_file()).collect();
1359        assert_eq!(files.len(), 1);
1360
1361        let callables: Vec<_> = graph.nodes_by_type(NodeType::Callable).collect();
1362        assert_eq!(callables.len(), 1);
1363
1364        // Containers now includes file + class = 2
1365        let containers: Vec<_> = graph.nodes_by_type(NodeType::Container).collect();
1366        assert_eq!(containers.len(), 2);
1367    }
1368
1369    #[test]
1370    fn test_repository_node() {
1371        let metadata = NodeMetadata::default().with_git(
1372            Some("https://github.com/org/repo.git".to_string()),
1373            Some("main".to_string()),
1374            Some("abc123".to_string()),
1375        );
1376        let node = Node::repository("my-repo".to_string(), metadata);
1377
1378        assert!(node.is_repository());
1379        assert!(node.is_container());
1380        assert!(!node.is_file());
1381        assert_eq!(node.id, "my-repo");
1382        assert_eq!(node.name, "my-repo");
1383        assert_eq!(node.kind, Some("repository".to_string()));
1384        assert_eq!(
1385            node.metadata.git_remote,
1386            Some("https://github.com/org/repo.git".to_string())
1387        );
1388        assert_eq!(node.metadata.git_branch, Some("main".to_string()));
1389        assert_eq!(node.metadata.git_commit, Some("abc123".to_string()));
1390        assert_eq!(node.container_kind(), Some(ContainerKind::Repository));
1391    }
1392
1393    #[test]
1394    fn test_source_file_node() {
1395        let node = Node::source_file(
1396            "src/main.rs".to_string(),
1397            "src/main.rs".to_string(),
1398            "sha256:abc123".to_string(),
1399            100,
1400        );
1401
1402        assert!(node.is_file());
1403        assert!(node.is_container());
1404        assert!(!node.is_repository());
1405        assert_eq!(node.node_type, NodeType::Container);
1406        assert_eq!(node.kind, Some("file".to_string()));
1407        assert_eq!(node.hash, Some("sha256:abc123".to_string()));
1408        assert_eq!(node.line, 1);
1409        assert_eq!(node.end_line, 100);
1410        assert_eq!(node.container_kind(), Some(ContainerKind::File));
1411    }
1412
1413    #[test]
1414    fn test_is_file() {
1415        // File nodes are Container with kind="file"
1416        let file = Node::source_file(
1417            "test.py".to_string(),
1418            "test.py".to_string(),
1419            "abc".to_string(),
1420            50,
1421        );
1422        assert!(file.is_file());
1423
1424        // Non-file containers should not be files
1425        let class = Node::container(
1426            "test.py:MyClass".to_string(),
1427            "MyClass".to_string(),
1428            ContainerKind::Type,
1429            Some("class".to_string()),
1430            "test.py".to_string(),
1431            1,
1432            10,
1433        );
1434        assert!(!class.is_file());
1435    }
1436
1437    #[test]
1438    fn test_container_kind_parsing() {
1439        assert_eq!(
1440            parse_container_kind("repository"),
1441            Some(ContainerKind::Repository)
1442        );
1443        assert_eq!(parse_container_kind("file"), Some(ContainerKind::File));
1444        assert_eq!(
1445            parse_container_kind("namespace"),
1446            Some(ContainerKind::Namespace)
1447        );
1448        assert_eq!(parse_container_kind("module"), Some(ContainerKind::Module));
1449        assert_eq!(
1450            parse_container_kind("package"),
1451            Some(ContainerKind::Package)
1452        );
1453        assert_eq!(parse_container_kind("type"), Some(ContainerKind::Type));
1454        assert_eq!(parse_container_kind("invalid"), None);
1455    }
1456
1457    #[test]
1458    fn test_validate_container_kinds() {
1459        assert!(validate_node_kind(NodeType::Container, "repository"));
1460        assert!(validate_node_kind(NodeType::Container, "file"));
1461        assert!(validate_node_kind(NodeType::Container, "type"));
1462        assert!(!validate_node_kind(NodeType::Container, "invalid"));
1463    }
1464
1465    #[test]
1466    fn test_get_node_type_from_new_kinds() {
1467        assert_eq!(
1468            get_node_type_from_kind("repository"),
1469            Some(NodeType::Container)
1470        );
1471        assert_eq!(get_node_type_from_kind("file"), Some(NodeType::Container));
1472    }
1473
1474    #[test]
1475    fn test_git_metadata() {
1476        let metadata = NodeMetadata::default().with_git(
1477            Some("origin".to_string()),
1478            Some("develop".to_string()),
1479            Some("deadbeef".to_string()),
1480        );
1481
1482        assert!(!metadata.is_empty());
1483        assert_eq!(metadata.git_remote, Some("origin".to_string()));
1484        assert_eq!(metadata.git_branch, Some("develop".to_string()));
1485        assert_eq!(metadata.git_commit, Some("deadbeef".to_string()));
1486    }
1487
1488    // ========================================================================
1489    // PetCodeGraph Tests
1490    // ========================================================================
1491
1492    #[test]
1493    fn test_pet_code_graph_new() {
1494        let graph = PetCodeGraph::new();
1495        assert_eq!(graph.node_count(), 0);
1496        assert_eq!(graph.edge_count(), 0);
1497        assert_eq!(graph.schema_version(), GRAPH_SCHEMA_VERSION);
1498    }
1499
1500    #[test]
1501    fn test_pet_code_graph_add_node() {
1502        let mut graph = PetCodeGraph::new();
1503
1504        let node = Node::callable(
1505            "test.py:my_func".to_string(),
1506            "my_func".to_string(),
1507            CallableKind::Function,
1508            "test.py".to_string(),
1509            1,
1510            10,
1511        );
1512
1513        let idx = graph.add_node(node);
1514        assert_eq!(graph.node_count(), 1);
1515        assert!(graph.contains_node("test.py:my_func"));
1516
1517        let retrieved = graph.get_node("test.py:my_func").unwrap();
1518        assert_eq!(retrieved.name, "my_func");
1519
1520        // Check index lookup
1521        let by_index = graph.get_node_by_index(idx).unwrap();
1522        assert_eq!(by_index.id, "test.py:my_func");
1523    }
1524
1525    #[test]
1526    fn test_pet_code_graph_remove_node() {
1527        let mut graph = PetCodeGraph::new();
1528
1529        graph.add_node(Node::callable(
1530            "test.py:func1".to_string(),
1531            "func1".to_string(),
1532            CallableKind::Function,
1533            "test.py".to_string(),
1534            1,
1535            5,
1536        ));
1537        graph.add_node(Node::callable(
1538            "test.py:func2".to_string(),
1539            "func2".to_string(),
1540            CallableKind::Function,
1541            "test.py".to_string(),
1542            6,
1543            10,
1544        ));
1545
1546        assert_eq!(graph.node_count(), 2);
1547
1548        let removed = graph.remove_node("test.py:func1");
1549        assert!(removed.is_some());
1550        assert_eq!(removed.unwrap().name, "func1");
1551        assert_eq!(graph.node_count(), 1);
1552        assert!(!graph.contains_node("test.py:func1"));
1553        assert!(graph.contains_node("test.py:func2"));
1554    }
1555
1556    #[test]
1557    fn test_pet_code_graph_add_edge() {
1558        let mut graph = PetCodeGraph::new();
1559
1560        graph.add_node(Node::source_file(
1561            "test.py".to_string(),
1562            "test.py".to_string(),
1563            "abc".to_string(),
1564            100,
1565        ));
1566        graph.add_node(Node::callable(
1567            "test.py:func".to_string(),
1568            "func".to_string(),
1569            CallableKind::Function,
1570            "test.py".to_string(),
1571            1,
1572            10,
1573        ));
1574
1575        let edge_idx = graph.add_edge("test.py", "test.py:func", EdgeData::contains());
1576        assert!(edge_idx.is_some());
1577        assert_eq!(graph.edge_count(), 1);
1578
1579        // Adding edge with non-existent nodes returns None
1580        let invalid_edge = graph.add_edge("nonexistent", "test.py:func", EdgeData::contains());
1581        assert!(invalid_edge.is_none());
1582    }
1583
1584    #[test]
1585    fn test_pet_code_graph_incoming_outgoing_edges() {
1586        let mut graph = PetCodeGraph::new();
1587
1588        graph.add_node(Node::source_file(
1589            "test.py".to_string(),
1590            "test.py".to_string(),
1591            "abc".to_string(),
1592            100,
1593        ));
1594        graph.add_node(Node::callable(
1595            "test.py:func1".to_string(),
1596            "func1".to_string(),
1597            CallableKind::Function,
1598            "test.py".to_string(),
1599            1,
1600            5,
1601        ));
1602        graph.add_node(Node::callable(
1603            "test.py:func2".to_string(),
1604            "func2".to_string(),
1605            CallableKind::Function,
1606            "test.py".to_string(),
1607            6,
1608            10,
1609        ));
1610
1611        graph.add_edge("test.py", "test.py:func1", EdgeData::contains());
1612        graph.add_edge("test.py", "test.py:func2", EdgeData::contains());
1613        graph.add_edge(
1614            "test.py:func1",
1615            "test.py:func2",
1616            EdgeData::uses(Some(3), Some("func2".to_string())),
1617        );
1618
1619        // Check outgoing edges from test.py
1620        let outgoing: Vec<_> = graph.outgoing_edges("test.py").collect();
1621        assert_eq!(outgoing.len(), 2);
1622
1623        // Check incoming edges to func2
1624        let incoming: Vec<_> = graph.incoming_edges("test.py:func2").collect();
1625        assert_eq!(incoming.len(), 2); // from test.py (CONTAINS) and func1 (USES)
1626
1627        // Verify USES edge has metadata
1628        let uses_edge = incoming
1629            .iter()
1630            .find(|(_, e)| e.edge_type == EdgeType::Uses)
1631            .unwrap();
1632        assert_eq!(uses_edge.1.ref_line, Some(3));
1633        assert_eq!(uses_edge.1.ident, Some("func2".to_string()));
1634    }
1635
1636    #[test]
1637    fn test_pet_code_graph_children_parent() {
1638        let mut graph = PetCodeGraph::new();
1639
1640        graph.add_node(Node::container(
1641            "test.py:MyClass".to_string(),
1642            "MyClass".to_string(),
1643            ContainerKind::Type,
1644            Some("class".to_string()),
1645            "test.py".to_string(),
1646            1,
1647            50,
1648        ));
1649        graph.add_node(Node::callable(
1650            "test.py:MyClass:method1".to_string(),
1651            "method1".to_string(),
1652            CallableKind::Method,
1653            "test.py".to_string(),
1654            2,
1655            10,
1656        ));
1657        graph.add_node(Node::callable(
1658            "test.py:MyClass:method2".to_string(),
1659            "method2".to_string(),
1660            CallableKind::Method,
1661            "test.py".to_string(),
1662            11,
1663            20,
1664        ));
1665
1666        graph.add_edge(
1667            "test.py:MyClass",
1668            "test.py:MyClass:method1",
1669            EdgeData::contains(),
1670        );
1671        graph.add_edge(
1672            "test.py:MyClass",
1673            "test.py:MyClass:method2",
1674            EdgeData::contains(),
1675        );
1676
1677        // Check children
1678        let children: Vec<_> = graph.children("test.py:MyClass").collect();
1679        assert_eq!(children.len(), 2);
1680
1681        // Check parent
1682        let parent = graph.parent("test.py:MyClass:method1").unwrap();
1683        assert_eq!(parent.id, "test.py:MyClass");
1684    }
1685
1686    #[test]
1687    fn test_pet_code_graph_neighbors() {
1688        let mut graph = PetCodeGraph::new();
1689
1690        graph.add_node(Node::callable(
1691            "a".to_string(),
1692            "a".to_string(),
1693            CallableKind::Function,
1694            "test.py".to_string(),
1695            1,
1696            5,
1697        ));
1698        graph.add_node(Node::callable(
1699            "b".to_string(),
1700            "b".to_string(),
1701            CallableKind::Function,
1702            "test.py".to_string(),
1703            6,
1704            10,
1705        ));
1706        graph.add_node(Node::callable(
1707            "c".to_string(),
1708            "c".to_string(),
1709            CallableKind::Function,
1710            "test.py".to_string(),
1711            11,
1712            15,
1713        ));
1714
1715        graph.add_edge("a", "b", EdgeData::uses(None, None));
1716        graph.add_edge("c", "a", EdgeData::uses(None, None));
1717
1718        // a's neighbors are b (outgoing) and c (incoming)
1719        let neighbors: Vec<_> = graph.neighbors("a").collect();
1720        assert_eq!(neighbors.len(), 2);
1721    }
1722
1723    #[test]
1724    fn test_pet_code_graph_remove_file_nodes() {
1725        let mut graph = PetCodeGraph::new();
1726
1727        graph.add_node(Node::source_file(
1728            "test.py".to_string(),
1729            "test.py".to_string(),
1730            "abc".to_string(),
1731            100,
1732        ));
1733        graph.add_node(Node::callable(
1734            "test.py:func1".to_string(),
1735            "func1".to_string(),
1736            CallableKind::Function,
1737            "test.py".to_string(),
1738            1,
1739            5,
1740        ));
1741        graph.add_node(Node::callable(
1742            "other.py:func".to_string(),
1743            "func".to_string(),
1744            CallableKind::Function,
1745            "other.py".to_string(),
1746            1,
1747            5,
1748        ));
1749
1750        graph.add_edge("test.py", "test.py:func1", EdgeData::contains());
1751        graph.add_edge(
1752            "test.py:func1",
1753            "other.py:func",
1754            EdgeData::uses(Some(3), None),
1755        );
1756
1757        assert_eq!(graph.node_count(), 3);
1758        assert_eq!(graph.edge_count(), 2);
1759
1760        graph.remove_file_nodes("test.py");
1761
1762        assert_eq!(graph.node_count(), 1);
1763        assert_eq!(graph.edge_count(), 0); // petgraph removes edges automatically
1764        assert!(!graph.contains_node("test.py"));
1765        assert!(!graph.contains_node("test.py:func1"));
1766        assert!(graph.contains_node("other.py:func"));
1767    }
1768
1769    #[test]
1770    fn test_pet_code_graph_edges_by_type() {
1771        let mut graph = PetCodeGraph::new();
1772
1773        graph.add_node(Node::container(
1774            "class".to_string(),
1775            "MyClass".to_string(),
1776            ContainerKind::Type,
1777            None,
1778            "test.py".to_string(),
1779            1,
1780            50,
1781        ));
1782        graph.add_node(Node::callable(
1783            "method".to_string(),
1784            "method".to_string(),
1785            CallableKind::Method,
1786            "test.py".to_string(),
1787            2,
1788            10,
1789        ));
1790        graph.add_node(Node::data(
1791            "field".to_string(),
1792            "my_field".to_string(),
1793            DataKind::Field,
1794            None,
1795            "test.py".to_string(),
1796            3,
1797            3,
1798        ));
1799
1800        graph.add_edge("class", "method", EdgeData::contains());
1801        graph.add_edge("class", "field", EdgeData::defines());
1802        graph.add_edge(
1803            "method",
1804            "field",
1805            EdgeData::uses(Some(5), Some("my_field".to_string())),
1806        );
1807
1808        let contains_edges: Vec<_> = graph.edges_by_type(EdgeType::Contains).collect();
1809        assert_eq!(contains_edges.len(), 1);
1810
1811        let uses_edges: Vec<_> = graph.edges_by_type(EdgeType::Uses).collect();
1812        assert_eq!(uses_edges.len(), 1);
1813
1814        let defines_edges: Vec<_> = graph.edges_by_type(EdgeType::Defines).collect();
1815        assert_eq!(defines_edges.len(), 1);
1816    }
1817
1818    #[test]
1819    fn test_pet_code_graph_node_replace_semantics() {
1820        let mut graph = PetCodeGraph::new();
1821
1822        // Add initial node
1823        graph.add_node(Node::callable(
1824            "test.py:func".to_string(),
1825            "func".to_string(),
1826            CallableKind::Function,
1827            "test.py".to_string(),
1828            1,
1829            5,
1830        ));
1831
1832        assert_eq!(graph.node_count(), 1);
1833        assert_eq!(graph.get_node("test.py:func").unwrap().end_line, 5);
1834
1835        // Replace with updated node (same ID)
1836        graph.add_node(Node::callable(
1837            "test.py:func".to_string(),
1838            "func".to_string(),
1839            CallableKind::Function,
1840            "test.py".to_string(),
1841            1,
1842            10, // Different end_line
1843        ));
1844
1845        // Should still have 1 node, but with updated content
1846        assert_eq!(graph.node_count(), 1);
1847        assert_eq!(graph.get_node("test.py:func").unwrap().end_line, 10);
1848    }
1849
1850    #[test]
1851    fn test_edge_data_constructors() {
1852        let contains = EdgeData::contains();
1853        assert_eq!(contains.edge_type, EdgeType::Contains);
1854        assert!(contains.ref_line.is_none());
1855        assert!(contains.ident.is_none());
1856
1857        let uses = EdgeData::uses(Some(42), Some("foo".to_string()));
1858        assert_eq!(uses.edge_type, EdgeType::Uses);
1859        assert_eq!(uses.ref_line, Some(42));
1860        assert_eq!(uses.ident, Some("foo".to_string()));
1861
1862        let defines = EdgeData::defines();
1863        assert_eq!(defines.edge_type, EdgeType::Defines);
1864        assert!(defines.ref_line.is_none());
1865        assert!(defines.ident.is_none());
1866    }
1867
1868    #[test]
1869    fn test_edge_data_from_edge() {
1870        let edge = Edge::uses(
1871            "source".to_string(),
1872            "target".to_string(),
1873            Some(10),
1874            Some("call".to_string()),
1875        );
1876
1877        let edge_data = EdgeData::from(&edge);
1878        assert_eq!(edge_data.edge_type, EdgeType::Uses);
1879        assert_eq!(edge_data.ref_line, Some(10));
1880        assert_eq!(edge_data.ident, Some("call".to_string()));
1881    }
1882
1883    // ========================================================================
1884    // Phase 1.3: Component & DependsOn Serialization Tests
1885    // ========================================================================
1886
1887    #[test]
1888    fn test_edge_type_depends_on_serialization() {
1889        // Test serialization
1890        let edge_type = EdgeType::DependsOn;
1891        let json = serde_json::to_string(&edge_type).unwrap();
1892        assert_eq!(json, "\"DEPENDS_ON\"");
1893
1894        // Test deserialization
1895        let parsed: EdgeType = serde_json::from_str("\"DEPENDS_ON\"").unwrap();
1896        assert_eq!(parsed, EdgeType::DependsOn);
1897    }
1898
1899    #[test]
1900    fn test_container_kind_component_serialization() {
1901        // Test serialization
1902        let kind = ContainerKind::Component;
1903        let json = serde_json::to_string(&kind).unwrap();
1904        assert_eq!(json, "\"component\"");
1905
1906        // Test deserialization
1907        let parsed: ContainerKind = serde_json::from_str("\"component\"").unwrap();
1908        assert_eq!(parsed, ContainerKind::Component);
1909    }
1910
1911    #[test]
1912    fn test_depends_on_edge_serialization_round_trip() {
1913        let edge = Edge::depends_on(
1914            "pkg/frontend".to_string(),
1915            "pkg/core".to_string(),
1916            Some("@myorg/core".to_string()),
1917            Some("workspace:*".to_string()),
1918            Some(true),
1919        );
1920
1921        // Serialize
1922        let json = serde_json::to_string_pretty(&edge).unwrap();
1923
1924        // Verify JSON contains expected fields
1925        assert!(json.contains("\"DEPENDS_ON\""), "Should contain edge type");
1926        assert!(
1927            json.contains("\"version_spec\""),
1928            "Should contain version_spec"
1929        );
1930        assert!(
1931            json.contains("\"workspace:*\""),
1932            "Should contain version value"
1933        );
1934        assert!(
1935            json.contains("\"is_dev_dependency\""),
1936            "Should contain is_dev_dependency"
1937        );
1938        assert!(json.contains("true"), "Should contain dev dep value");
1939
1940        // Deserialize and verify round-trip
1941        let parsed: Edge = serde_json::from_str(&json).unwrap();
1942        assert_eq!(parsed.source, "pkg/frontend");
1943        assert_eq!(parsed.target, "pkg/core");
1944        assert_eq!(parsed.edge_type, EdgeType::DependsOn);
1945        assert_eq!(parsed.ident, Some("@myorg/core".to_string()));
1946        assert_eq!(parsed.version_spec, Some("workspace:*".to_string()));
1947        assert_eq!(parsed.is_dev_dependency, Some(true));
1948    }
1949
1950    #[test]
1951    fn test_depends_on_edge_minimal_serialization() {
1952        // DependsOn with no optional fields
1953        let edge = Edge::depends_on("pkg/a".to_string(), "pkg/b".to_string(), None, None, None);
1954
1955        let json = serde_json::to_string(&edge).unwrap();
1956
1957        // Optional fields should be skipped
1958        assert!(
1959            !json.contains("version_spec"),
1960            "Should skip None version_spec"
1961        );
1962        assert!(
1963            !json.contains("is_dev_dependency"),
1964            "Should skip None is_dev_dependency"
1965        );
1966        assert!(!json.contains("ident"), "Should skip None ident");
1967
1968        // Round-trip
1969        let parsed: Edge = serde_json::from_str(&json).unwrap();
1970        assert_eq!(parsed.edge_type, EdgeType::DependsOn);
1971        assert_eq!(parsed.version_spec, None);
1972        assert_eq!(parsed.is_dev_dependency, None);
1973    }
1974
1975    #[test]
1976    fn test_component_node_serialization_round_trip() {
1977        let metadata = NodeMetadata::default().with_component(
1978            Some(true), // is_workspace_root
1979            Some(true), // is_publishable
1980            Some("packages/core/package.json".to_string()),
1981        );
1982
1983        let node = Node::component(
1984            "my-repo:packages/core".to_string(),
1985            "@myorg/core".to_string(),
1986            "packages/core/package.json".to_string(),
1987            metadata,
1988        );
1989
1990        // Serialize
1991        let json = serde_json::to_string_pretty(&node).unwrap();
1992
1993        // Verify JSON structure
1994        assert!(json.contains("\"component\""), "Should have component kind");
1995        assert!(
1996            json.contains("\"is_workspace_root\""),
1997            "Should have workspace root field"
1998        );
1999        assert!(
2000            json.contains("\"is_publishable\""),
2001            "Should have publishable field"
2002        );
2003        assert!(
2004            json.contains("\"manifest_path\""),
2005            "Should have manifest path"
2006        );
2007
2008        // Round-trip
2009        let parsed: Node = serde_json::from_str(&json).unwrap();
2010        assert!(parsed.is_component());
2011        assert_eq!(parsed.id, "my-repo:packages/core");
2012        assert_eq!(parsed.name, "@myorg/core");
2013        assert_eq!(parsed.kind, Some("component".to_string()));
2014        assert_eq!(parsed.metadata.is_workspace_root, Some(true));
2015        assert_eq!(parsed.metadata.is_publishable, Some(true));
2016        assert_eq!(
2017            parsed.metadata.manifest_path,
2018            Some("packages/core/package.json".to_string())
2019        );
2020    }
2021
2022    #[test]
2023    fn test_node_metadata_component_fields() {
2024        let metadata = NodeMetadata::default().with_component(
2025            Some(false), // not workspace root
2026            Some(true),  // publishable
2027            Some("Cargo.toml".to_string()),
2028        );
2029
2030        assert!(!metadata.is_empty());
2031        assert_eq!(metadata.is_workspace_root, Some(false));
2032        assert_eq!(metadata.is_publishable, Some(true));
2033        assert_eq!(metadata.manifest_path, Some("Cargo.toml".to_string()));
2034
2035        // Serialize and verify
2036        let json = serde_json::to_string(&metadata).unwrap();
2037        assert!(json.contains("\"is_workspace_root\":false"));
2038        assert!(json.contains("\"is_publishable\":true"));
2039        assert!(json.contains("\"manifest_path\":\"Cargo.toml\""));
2040
2041        // Round-trip
2042        let parsed: NodeMetadata = serde_json::from_str(&json).unwrap();
2043        assert_eq!(parsed.is_workspace_root, Some(false));
2044        assert_eq!(parsed.is_publishable, Some(true));
2045        assert_eq!(parsed.manifest_path, Some("Cargo.toml".to_string()));
2046    }
2047
2048    #[test]
2049    fn test_edge_data_depends_on_constructor() {
2050        let edge_data = EdgeData::depends_on(
2051            Some("my-dep".to_string()),
2052            Some("^1.0.0".to_string()),
2053            Some(false),
2054        );
2055
2056        assert_eq!(edge_data.edge_type, EdgeType::DependsOn);
2057        assert_eq!(edge_data.ident, Some("my-dep".to_string()));
2058        assert_eq!(edge_data.version_spec, Some("^1.0.0".to_string()));
2059        assert_eq!(edge_data.is_dev_dependency, Some(false));
2060        assert!(edge_data.ref_line.is_none()); // DependsOn doesn't use ref_line
2061    }
2062
2063    #[test]
2064    fn test_edge_data_from_depends_on_edge() {
2065        let edge = Edge::depends_on(
2066            "source".to_string(),
2067            "target".to_string(),
2068            Some("dep-name".to_string()),
2069            Some("path:../lib".to_string()),
2070            Some(true),
2071        );
2072
2073        let edge_data = EdgeData::from(&edge);
2074        assert_eq!(edge_data.edge_type, EdgeType::DependsOn);
2075        assert_eq!(edge_data.ident, Some("dep-name".to_string()));
2076        assert_eq!(edge_data.version_spec, Some("path:../lib".to_string()));
2077        assert_eq!(edge_data.is_dev_dependency, Some(true));
2078    }
2079
2080    #[test]
2081    fn test_validate_node_kind_component() {
2082        assert!(validate_node_kind(NodeType::Container, "component"));
2083        assert!(!validate_node_kind(NodeType::Callable, "component"));
2084        assert!(!validate_node_kind(NodeType::Data, "component"));
2085    }
2086
2087    #[test]
2088    fn test_get_node_type_from_kind_component() {
2089        assert_eq!(
2090            get_node_type_from_kind("component"),
2091            Some(NodeType::Container)
2092        );
2093    }
2094
2095    #[test]
2096    fn test_parse_container_kind_component() {
2097        assert_eq!(
2098            parse_container_kind("component"),
2099            Some(ContainerKind::Component)
2100        );
2101    }
2102
2103    #[test]
2104    fn test_workspace_node() {
2105        let node = Node::workspace("my-workspace".to_string());
2106
2107        assert!(node.is_workspace());
2108        assert!(node.is_container());
2109        assert!(!node.is_component());
2110        assert!(!node.is_file());
2111        assert_eq!(node.id, "my-workspace");
2112        assert_eq!(node.kind, Some("workspace".to_string()));
2113        assert_eq!(node.container_kind(), Some(ContainerKind::Workspace));
2114    }
2115
2116    #[test]
2117    fn test_container_kind_workspace_serialization() {
2118        let kind = ContainerKind::Workspace;
2119        let json = serde_json::to_string(&kind).unwrap();
2120        assert_eq!(json, "\"workspace\"");
2121
2122        let parsed: ContainerKind = serde_json::from_str("\"workspace\"").unwrap();
2123        assert_eq!(parsed, ContainerKind::Workspace);
2124    }
2125}