Skip to main content

sqry_core/graph/
node.rs

1//! Node types for the unified code graph
2//!
3//! This module defines the core node types that represent code entities
4//! (functions, classes, modules, etc.) in the unified graph architecture.
5
6use serde::{Deserialize, Serialize};
7use std::fmt;
8use std::sync::Arc;
9
10/// Language identifier
11#[derive(Debug, Clone, Copy, Hash, Eq, PartialEq, Ord, PartialOrd, Serialize, Deserialize)]
12pub enum Language {
13    /// C language
14    C,
15    /// C++ language
16    Cpp,
17    /// C# language
18    CSharp,
19    /// CSS language
20    Css,
21    /// JavaScript language
22    JavaScript,
23    /// Python language
24    Python,
25    /// TypeScript language
26    TypeScript,
27    /// Rust language
28    Rust,
29    /// Go language
30    Go,
31    /// Java language
32    Java,
33    /// Ruby language
34    Ruby,
35    /// PHP language
36    Php,
37    /// Swift language
38    Swift,
39    /// Kotlin language
40    Kotlin,
41    /// Scala language
42    Scala,
43    /// SQL language
44    Sql,
45    /// Dart language
46    Dart,
47    /// Lua language
48    Lua,
49    /// Perl language
50    Perl,
51    /// Shell (Bash) language
52    Shell,
53    /// Groovy language
54    Groovy,
55    /// Elixir language
56    Elixir,
57    /// R language
58    R,
59    /// Haskell language
60    Haskell,
61    /// HTML language
62    Html,
63    /// Svelte language
64    Svelte,
65    /// Vue language
66    Vue,
67    /// Zig language
68    Zig,
69    /// Terraform (HCL) language
70    Terraform,
71    /// Puppet language
72    Puppet,
73    /// Pulumi language
74    Pulumi,
75    /// Virtual language for HTTP endpoints
76    Http,
77    /// Oracle PL/SQL language
78    Plsql,
79    /// Salesforce Apex language
80    Apex,
81    /// SAP ABAP language
82    Abap,
83    /// `ServiceNow` (Xanadu) language
84    ServiceNow,
85    /// JSON configuration files
86    Json,
87}
88
89impl fmt::Display for Language {
90    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
91        match self {
92            Language::C => write!(f, "c"),
93            Language::Cpp => write!(f, "cpp"),
94            Language::CSharp => write!(f, "csharp"),
95            Language::Css => write!(f, "css"),
96            Language::JavaScript => write!(f, "js"),
97            Language::Python => write!(f, "py"),
98            Language::TypeScript => write!(f, "ts"),
99            Language::Rust => write!(f, "rust"),
100            Language::Go => write!(f, "go"),
101            Language::Java => write!(f, "java"),
102            Language::Ruby => write!(f, "ruby"),
103            Language::Php => write!(f, "php"),
104            Language::Swift => write!(f, "swift"),
105            Language::Kotlin => write!(f, "kotlin"),
106            Language::Scala => write!(f, "scala"),
107            Language::Sql => write!(f, "sql"),
108            Language::Dart => write!(f, "dart"),
109            Language::Lua => write!(f, "lua"),
110            Language::Perl => write!(f, "perl"),
111            Language::Shell => write!(f, "shell"),
112            Language::Groovy => write!(f, "groovy"),
113            Language::Elixir => write!(f, "elixir"),
114            Language::R => write!(f, "r"),
115            Language::Haskell => write!(f, "haskell"),
116            Language::Html => write!(f, "html"),
117            Language::Svelte => write!(f, "svelte"),
118            Language::Vue => write!(f, "vue"),
119            Language::Zig => write!(f, "zig"),
120            Language::Terraform => write!(f, "terraform"),
121            Language::Puppet => write!(f, "puppet"),
122            Language::Pulumi => write!(f, "pulumi"),
123            Language::Http => write!(f, "http"),
124            Language::Plsql => write!(f, "plsql"),
125            Language::Apex => write!(f, "apex"),
126            Language::Abap => write!(f, "abap"),
127            Language::ServiceNow => write!(f, "servicenow"),
128            Language::Json => write!(f, "json"),
129        }
130    }
131}
132
133impl Language {
134    /// Parse a language identifier or common alias into a `Language`.
135    #[must_use]
136    pub fn from_id(value: &str) -> Option<Self> {
137        match value.trim().to_ascii_lowercase().as_str() {
138            "c" => Some(Self::C),
139            "cpp" | "c++" => Some(Self::Cpp),
140            "csharp" | "c#" | "cs" => Some(Self::CSharp),
141            "css" => Some(Self::Css),
142            "javascript" | "js" => Some(Self::JavaScript),
143            "python" | "py" => Some(Self::Python),
144            "typescript" | "ts" => Some(Self::TypeScript),
145            "rust" | "rs" => Some(Self::Rust),
146            "go" | "golang" => Some(Self::Go),
147            "java" => Some(Self::Java),
148            "ruby" | "rb" => Some(Self::Ruby),
149            "php" => Some(Self::Php),
150            "swift" => Some(Self::Swift),
151            "kotlin" | "kt" => Some(Self::Kotlin),
152            "scala" => Some(Self::Scala),
153            "sql" => Some(Self::Sql),
154            "dart" => Some(Self::Dart),
155            "lua" => Some(Self::Lua),
156            "perl" | "pl" => Some(Self::Perl),
157            "shell" | "bash" | "sh" => Some(Self::Shell),
158            "groovy" => Some(Self::Groovy),
159            "elixir" | "ex" | "exs" => Some(Self::Elixir),
160            "r" => Some(Self::R),
161            "haskell" | "hs" => Some(Self::Haskell),
162            "html" => Some(Self::Html),
163            "svelte" => Some(Self::Svelte),
164            "vue" => Some(Self::Vue),
165            "zig" => Some(Self::Zig),
166            "terraform" | "hcl" => Some(Self::Terraform),
167            "puppet" => Some(Self::Puppet),
168            "pulumi" => Some(Self::Pulumi),
169            "http" => Some(Self::Http),
170            "plsql" => Some(Self::Plsql),
171            "apex" | "salesforce" => Some(Self::Apex),
172            "abap" => Some(Self::Abap),
173            "servicenow" => Some(Self::ServiceNow),
174            "json" => Some(Self::Json),
175            _ => None,
176        }
177    }
178}
179
180/// Universal node identifier with string interning for memory efficiency
181///
182/// Per AGENTS.md:149-151, uses `Arc<str>` to reduce memory usage for
183/// symbol-heavy data structures (saves 10-50 MB for typical repos).
184///
185/// # Examples
186///
187/// ```
188/// use sqry_core::graph::node::{NodeId, Language};
189/// use std::sync::Arc;
190///
191/// let node_id = NodeId::new(
192///     Language::Cpp,
193///     "src/main.cpp",
194///     "main"
195/// );
196///
197/// // Arc<str> makes cloning cheap (only refcount increment)
198/// let cloned = node_id.clone();
199/// assert_eq!(node_id, cloned);
200/// ```
201#[derive(Debug, Clone, Hash, Eq, PartialEq, Ord, PartialOrd)]
202pub struct NodeId {
203    /// Language of origin
204    pub language: Language,
205    /// File path (interned via `Arc<str>`)
206    pub file: Arc<str>,
207    /// Qualified name (interned via `Arc<str>`)
208    /// Examples: "`std::vector::push_back`", "MyClass.process", "__main__"
209    pub qualified_name: Arc<str>,
210}
211
212impl NodeId {
213    /// Create a new `NodeId` with string interning
214    ///
215    /// Automatically interns strings via `Arc<str>` for memory efficiency.
216    ///
217    /// # Examples
218    ///
219    /// ```
220    /// use sqry_core::graph::node::{NodeId, Language};
221    ///
222    /// let id = NodeId::new(Language::Python, "api.py", "User.authenticate");
223    /// println!("{}", id); // "py:api.py:User.authenticate"
224    /// ```
225    pub fn new(language: Language, file: impl AsRef<str>, qualified_name: impl AsRef<str>) -> Self {
226        Self {
227            language,
228            file: Arc::from(file.as_ref()),
229            qualified_name: Arc::from(qualified_name.as_ref()),
230        }
231    }
232
233    /// Get the symbol name without namespace qualification
234    ///
235    /// # Examples
236    ///
237    /// ```
238    /// use sqry_core::graph::node::{NodeId, Language};
239    ///
240    /// let id = NodeId::new(Language::Cpp, "main.cpp", "std::vector::push_back");
241    /// assert_eq!(id.symbol_name(), "push_back");
242    /// ```
243    #[must_use]
244    pub fn symbol_name(&self) -> &str {
245        // Try C++ style first (::), then Python/Java style (.)
246        if let Some(name) = self.qualified_name.rsplit("::").next()
247            && name != self.qualified_name.as_ref()
248        {
249            return name;
250        }
251
252        if let Some(name) = self.qualified_name.rsplit('.').next() {
253            return name;
254        }
255
256        &self.qualified_name
257    }
258}
259
260impl fmt::Display for NodeId {
261    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
262        write!(f, "{}:{}:{}", self.language, self.file, self.qualified_name)
263    }
264}
265
266/// Source code span (line and column information)
267#[derive(Debug, Clone, Copy, Hash, Eq, PartialEq, Default, Serialize, Deserialize)]
268pub struct Span {
269    /// Starting position
270    pub start: Position,
271    /// Ending position
272    pub end: Position,
273}
274
275impl Span {
276    /// Create a new span
277    #[must_use]
278    pub fn new(start: Position, end: Position) -> Self {
279        Self { start, end }
280    }
281
282    /// Create a span from byte offsets (legacy compatibility)
283    #[must_use]
284    pub fn from_bytes(start: usize, end: usize) -> Self {
285        Self {
286            start: Position {
287                line: 0,
288                column: start,
289            },
290            end: Position {
291                line: 0,
292                column: end,
293            },
294        }
295    }
296}
297
298/// Position in source code (line and column)
299#[derive(Debug, Clone, Copy, Hash, Eq, PartialEq, Default, Serialize, Deserialize)]
300pub struct Position {
301    /// Line number (0-indexed)
302    pub line: usize,
303    /// Column number (0-indexed)
304    pub column: usize,
305}
306
307impl Position {
308    /// Create a new position
309    #[must_use]
310    pub fn new(line: usize, column: usize) -> Self {
311        Self { line, column }
312    }
313}
314
315/// Type of code entity
316#[derive(Debug, Clone, PartialEq)]
317pub enum NodeKind {
318    /// Function or method
319    Function {
320        /// Function parameters
321        params: Vec<Param>,
322        /// Return type (if known)
323        return_type: Option<Type>,
324        /// Whether the function is async
325        is_async: bool,
326    },
327    /// Class or struct
328    Class {
329        /// Base classes
330        bases: Vec<NodeId>,
331        /// Implemented interfaces
332        interfaces: Vec<NodeId>,
333    },
334    /// Module or namespace
335    Module {
336        /// Exported symbols
337        exports: Vec<NodeId>,
338    },
339    /// Variable, constant, or field
340    Variable {
341        /// Variable type (if known)
342        var_type: Option<Type>,
343    },
344}
345
346/// Function parameter
347#[derive(Debug, Clone, PartialEq)]
348pub struct Param {
349    /// Parameter name
350    pub name: String,
351    /// Parameter type (if known)
352    pub param_type: Option<Type>,
353}
354
355/// Type information (simplified for now)
356#[derive(Debug, Clone, PartialEq)]
357pub struct Type {
358    /// Type name
359    pub name: String,
360}
361
362/// Additional metadata for a node
363#[derive(Debug, Clone, Default)]
364pub struct NodeMetadata {
365    /// Visibility (public, private, etc.)
366    pub visibility: Option<String>,
367    /// Documentation string
368    pub doc_comment: Option<String>,
369    /// Attributes/decorators
370    pub attributes: Vec<String>,
371}
372
373/// A node in the code graph representing a code entity
374#[derive(Debug, Clone)]
375pub struct CodeNode {
376    /// Unique identifier
377    pub id: NodeId,
378    /// Node type (function, class, module, etc.)
379    pub kind: NodeKind,
380    /// Source location
381    pub span: Span,
382    /// Additional metadata
383    pub metadata: NodeMetadata,
384}
385
386#[cfg(test)]
387mod tests {
388    use super::*;
389
390    #[test]
391    fn test_node_id_creation() {
392        let id = NodeId::new(Language::Cpp, "src/main.cpp", "main");
393        assert_eq!(id.language, Language::Cpp);
394        assert_eq!(id.file.as_ref(), "src/main.cpp");
395        assert_eq!(id.qualified_name.as_ref(), "main");
396    }
397
398    #[test]
399    fn test_node_id_display() {
400        let id = NodeId::new(Language::Python, "api.py", "User.authenticate");
401        assert_eq!(id.to_string(), "py:api.py:User.authenticate");
402    }
403
404    #[test]
405    fn test_node_id_hash() {
406        use std::collections::HashSet;
407
408        let id1 = NodeId::new(Language::JavaScript, "api.js", "fetchUsers");
409        let id2 = NodeId::new(Language::JavaScript, "api.js", "fetchUsers");
410        let id3 = NodeId::new(Language::JavaScript, "api.js", "createUser");
411
412        let mut set = HashSet::new();
413        set.insert(id1.clone());
414        set.insert(id2.clone());
415        set.insert(id3.clone());
416
417        assert_eq!(set.len(), 2); // id1 and id2 are equal
418    }
419
420    #[test]
421    fn test_node_id_clone_cheap() {
422        let id1 = NodeId::new(Language::Cpp, "src/utils.cpp", "std::vector::push_back");
423        let id2 = id1.clone();
424
425        // Arc<str> means the underlying string is NOT copied
426        assert_eq!(Arc::as_ptr(&id1.file), Arc::as_ptr(&id2.file));
427        assert_eq!(
428            Arc::as_ptr(&id1.qualified_name),
429            Arc::as_ptr(&id2.qualified_name)
430        );
431    }
432
433    #[test]
434    fn test_symbol_name_extraction() {
435        let id1 = NodeId::new(Language::Cpp, "main.cpp", "std::vector::push_back");
436        assert_eq!(id1.symbol_name(), "push_back");
437
438        let id2 = NodeId::new(Language::Python, "api.py", "User.authenticate");
439        assert_eq!(id2.symbol_name(), "authenticate");
440
441        let id3 = NodeId::new(Language::JavaScript, "api.js", "fetchUsers");
442        assert_eq!(id3.symbol_name(), "fetchUsers");
443    }
444
445    #[test]
446    fn test_span_creation() {
447        let span = Span::new(Position::new(10, 0), Position::new(20, 1));
448
449        assert_eq!(span.start.line, 10);
450        assert_eq!(span.end.line, 20);
451    }
452
453    #[test]
454    fn test_language_display() {
455        assert_eq!(Language::Cpp.to_string(), "cpp");
456        assert_eq!(Language::JavaScript.to_string(), "js");
457        assert_eq!(Language::Python.to_string(), "py");
458        assert_eq!(Language::Ruby.to_string(), "ruby");
459        assert_eq!(Language::Php.to_string(), "php");
460        assert_eq!(Language::Swift.to_string(), "swift");
461        assert_eq!(Language::Kotlin.to_string(), "kotlin");
462        assert_eq!(Language::Scala.to_string(), "scala");
463        assert_eq!(Language::Http.to_string(), "http");
464    }
465
466    #[test]
467    fn test_language_from_id() {
468        assert_eq!(Language::from_id("javascript"), Some(Language::JavaScript));
469        assert_eq!(Language::from_id("js"), Some(Language::JavaScript));
470        assert_eq!(Language::from_id("c#"), Some(Language::CSharp));
471        assert_eq!(Language::from_id("rb"), Some(Language::Ruby));
472        assert_eq!(Language::from_id("json"), Some(Language::Json));
473        assert_eq!(Language::from_id("unknown"), None);
474    }
475}