Skip to main content

sqry_core/graph/
node.rs

1//! Node types for the unified code graph
2//!
3//! This module defines the core node types that represent code entities
4//! (functions, classes, modules, etc.) in the unified graph architecture.
5
6use serde::{Deserialize, Serialize};
7use std::fmt;
8use std::sync::Arc;
9
10/// Language identifier
11#[derive(Debug, Clone, Copy, Hash, Eq, PartialEq, Ord, PartialOrd, Serialize, Deserialize)]
12pub enum Language {
13    /// C language
14    C,
15    /// C++ language
16    Cpp,
17    /// C# language
18    CSharp,
19    /// CSS language
20    Css,
21    /// JavaScript language
22    JavaScript,
23    /// Python language
24    Python,
25    /// TypeScript language
26    TypeScript,
27    /// Rust language
28    Rust,
29    /// Go language
30    Go,
31    /// Java language
32    Java,
33    /// Ruby language
34    Ruby,
35    /// PHP language
36    Php,
37    /// Swift language
38    Swift,
39    /// Kotlin language
40    Kotlin,
41    /// Scala language
42    Scala,
43    /// SQL language
44    Sql,
45    /// Dart language
46    Dart,
47    /// Lua language
48    Lua,
49    /// Perl language
50    Perl,
51    /// Shell (Bash) language
52    Shell,
53    /// Groovy language
54    Groovy,
55    /// Elixir language
56    Elixir,
57    /// R language
58    R,
59    /// Haskell language
60    Haskell,
61    /// HTML language
62    Html,
63    /// Svelte language
64    Svelte,
65    /// Vue language
66    Vue,
67    /// Zig language
68    Zig,
69    /// Terraform (HCL) language
70    Terraform,
71    /// Puppet language
72    Puppet,
73    /// Pulumi language
74    Pulumi,
75    /// Virtual language for HTTP endpoints
76    Http,
77    /// Oracle PL/SQL language
78    Plsql,
79    /// Salesforce Apex language
80    Apex,
81    /// SAP ABAP language
82    Abap,
83    /// `ServiceNow` (Xanadu) language
84    ServiceNow,
85}
86
87impl fmt::Display for Language {
88    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
89        match self {
90            Language::C => write!(f, "c"),
91            Language::Cpp => write!(f, "cpp"),
92            Language::CSharp => write!(f, "csharp"),
93            Language::Css => write!(f, "css"),
94            Language::JavaScript => write!(f, "js"),
95            Language::Python => write!(f, "py"),
96            Language::TypeScript => write!(f, "ts"),
97            Language::Rust => write!(f, "rust"),
98            Language::Go => write!(f, "go"),
99            Language::Java => write!(f, "java"),
100            Language::Ruby => write!(f, "ruby"),
101            Language::Php => write!(f, "php"),
102            Language::Swift => write!(f, "swift"),
103            Language::Kotlin => write!(f, "kotlin"),
104            Language::Scala => write!(f, "scala"),
105            Language::Sql => write!(f, "sql"),
106            Language::Dart => write!(f, "dart"),
107            Language::Lua => write!(f, "lua"),
108            Language::Perl => write!(f, "perl"),
109            Language::Shell => write!(f, "shell"),
110            Language::Groovy => write!(f, "groovy"),
111            Language::Elixir => write!(f, "elixir"),
112            Language::R => write!(f, "r"),
113            Language::Haskell => write!(f, "haskell"),
114            Language::Html => write!(f, "html"),
115            Language::Svelte => write!(f, "svelte"),
116            Language::Vue => write!(f, "vue"),
117            Language::Zig => write!(f, "zig"),
118            Language::Terraform => write!(f, "terraform"),
119            Language::Puppet => write!(f, "puppet"),
120            Language::Pulumi => write!(f, "pulumi"),
121            Language::Http => write!(f, "http"),
122            Language::Plsql => write!(f, "plsql"),
123            Language::Apex => write!(f, "apex"),
124            Language::Abap => write!(f, "abap"),
125            Language::ServiceNow => write!(f, "servicenow"),
126        }
127    }
128}
129
130impl Language {
131    /// Parse a language identifier or common alias into a `Language`.
132    #[must_use]
133    pub fn from_id(value: &str) -> Option<Self> {
134        match value.trim().to_ascii_lowercase().as_str() {
135            "c" => Some(Self::C),
136            "cpp" | "c++" => Some(Self::Cpp),
137            "csharp" | "c#" | "cs" => Some(Self::CSharp),
138            "css" => Some(Self::Css),
139            "javascript" | "js" => Some(Self::JavaScript),
140            "python" | "py" => Some(Self::Python),
141            "typescript" | "ts" => Some(Self::TypeScript),
142            "rust" | "rs" => Some(Self::Rust),
143            "go" | "golang" => Some(Self::Go),
144            "java" => Some(Self::Java),
145            "ruby" | "rb" => Some(Self::Ruby),
146            "php" => Some(Self::Php),
147            "swift" => Some(Self::Swift),
148            "kotlin" | "kt" => Some(Self::Kotlin),
149            "scala" => Some(Self::Scala),
150            "sql" => Some(Self::Sql),
151            "dart" => Some(Self::Dart),
152            "lua" => Some(Self::Lua),
153            "perl" | "pl" => Some(Self::Perl),
154            "shell" | "bash" | "sh" => Some(Self::Shell),
155            "groovy" => Some(Self::Groovy),
156            "elixir" | "ex" | "exs" => Some(Self::Elixir),
157            "r" => Some(Self::R),
158            "haskell" | "hs" => Some(Self::Haskell),
159            "html" => Some(Self::Html),
160            "svelte" => Some(Self::Svelte),
161            "vue" => Some(Self::Vue),
162            "zig" => Some(Self::Zig),
163            "terraform" | "hcl" => Some(Self::Terraform),
164            "puppet" => Some(Self::Puppet),
165            "pulumi" => Some(Self::Pulumi),
166            "http" => Some(Self::Http),
167            "plsql" => Some(Self::Plsql),
168            "apex" | "salesforce" => Some(Self::Apex),
169            "abap" => Some(Self::Abap),
170            "servicenow" => Some(Self::ServiceNow),
171            _ => None,
172        }
173    }
174}
175
176/// Universal node identifier with string interning for memory efficiency
177///
178/// Per AGENTS.md:149-151, uses `Arc<str>` to reduce memory usage for
179/// symbol-heavy data structures (saves 10-50 MB for typical repos).
180///
181/// # Examples
182///
183/// ```
184/// use sqry_core::graph::node::{NodeId, Language};
185/// use std::sync::Arc;
186///
187/// let node_id = NodeId::new(
188///     Language::Cpp,
189///     "src/main.cpp",
190///     "main"
191/// );
192///
193/// // Arc<str> makes cloning cheap (only refcount increment)
194/// let cloned = node_id.clone();
195/// assert_eq!(node_id, cloned);
196/// ```
197#[derive(Debug, Clone, Hash, Eq, PartialEq, Ord, PartialOrd)]
198pub struct NodeId {
199    /// Language of origin
200    pub language: Language,
201    /// File path (interned via `Arc<str>`)
202    pub file: Arc<str>,
203    /// Qualified name (interned via `Arc<str>`)
204    /// Examples: "`std::vector::push_back`", "MyClass.process", "__main__"
205    pub qualified_name: Arc<str>,
206}
207
208impl NodeId {
209    /// Create a new `NodeId` with string interning
210    ///
211    /// Automatically interns strings via `Arc<str>` for memory efficiency.
212    ///
213    /// # Examples
214    ///
215    /// ```
216    /// use sqry_core::graph::node::{NodeId, Language};
217    ///
218    /// let id = NodeId::new(Language::Python, "api.py", "User.authenticate");
219    /// println!("{}", id); // "py:api.py:User.authenticate"
220    /// ```
221    pub fn new(language: Language, file: impl AsRef<str>, qualified_name: impl AsRef<str>) -> Self {
222        Self {
223            language,
224            file: Arc::from(file.as_ref()),
225            qualified_name: Arc::from(qualified_name.as_ref()),
226        }
227    }
228
229    /// Get the symbol name without namespace qualification
230    ///
231    /// # Examples
232    ///
233    /// ```
234    /// use sqry_core::graph::node::{NodeId, Language};
235    ///
236    /// let id = NodeId::new(Language::Cpp, "main.cpp", "std::vector::push_back");
237    /// assert_eq!(id.symbol_name(), "push_back");
238    /// ```
239    #[must_use]
240    pub fn symbol_name(&self) -> &str {
241        // Try C++ style first (::), then Python/Java style (.)
242        if let Some(name) = self.qualified_name.rsplit("::").next()
243            && name != self.qualified_name.as_ref()
244        {
245            return name;
246        }
247
248        if let Some(name) = self.qualified_name.rsplit('.').next() {
249            return name;
250        }
251
252        &self.qualified_name
253    }
254}
255
256impl fmt::Display for NodeId {
257    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
258        write!(f, "{}:{}:{}", self.language, self.file, self.qualified_name)
259    }
260}
261
262/// Source code span (line and column information)
263#[derive(Debug, Clone, Copy, Hash, Eq, PartialEq, Default, Serialize, Deserialize)]
264pub struct Span {
265    /// Starting position
266    pub start: Position,
267    /// Ending position
268    pub end: Position,
269}
270
271impl Span {
272    /// Create a new span
273    #[must_use]
274    pub fn new(start: Position, end: Position) -> Self {
275        Self { start, end }
276    }
277
278    /// Create a span from byte offsets (legacy compatibility)
279    #[must_use]
280    pub fn from_bytes(start: usize, end: usize) -> Self {
281        Self {
282            start: Position {
283                line: 0,
284                column: start,
285            },
286            end: Position {
287                line: 0,
288                column: end,
289            },
290        }
291    }
292}
293
294/// Position in source code (line and column)
295#[derive(Debug, Clone, Copy, Hash, Eq, PartialEq, Default, Serialize, Deserialize)]
296pub struct Position {
297    /// Line number (0-indexed)
298    pub line: usize,
299    /// Column number (0-indexed)
300    pub column: usize,
301}
302
303impl Position {
304    /// Create a new position
305    #[must_use]
306    pub fn new(line: usize, column: usize) -> Self {
307        Self { line, column }
308    }
309}
310
311/// Type of code entity
312#[derive(Debug, Clone, PartialEq)]
313pub enum NodeKind {
314    /// Function or method
315    Function {
316        /// Function parameters
317        params: Vec<Param>,
318        /// Return type (if known)
319        return_type: Option<Type>,
320        /// Whether the function is async
321        is_async: bool,
322    },
323    /// Class or struct
324    Class {
325        /// Base classes
326        bases: Vec<NodeId>,
327        /// Implemented interfaces
328        interfaces: Vec<NodeId>,
329    },
330    /// Module or namespace
331    Module {
332        /// Exported symbols
333        exports: Vec<NodeId>,
334    },
335    /// Variable, constant, or field
336    Variable {
337        /// Variable type (if known)
338        var_type: Option<Type>,
339    },
340}
341
342/// Function parameter
343#[derive(Debug, Clone, PartialEq)]
344pub struct Param {
345    /// Parameter name
346    pub name: String,
347    /// Parameter type (if known)
348    pub param_type: Option<Type>,
349}
350
351/// Type information (simplified for now)
352#[derive(Debug, Clone, PartialEq)]
353pub struct Type {
354    /// Type name
355    pub name: String,
356}
357
358/// Additional metadata for a node
359#[derive(Debug, Clone, Default)]
360pub struct NodeMetadata {
361    /// Visibility (public, private, etc.)
362    pub visibility: Option<String>,
363    /// Documentation string
364    pub doc_comment: Option<String>,
365    /// Attributes/decorators
366    pub attributes: Vec<String>,
367}
368
369/// A node in the code graph representing a code entity
370#[derive(Debug, Clone)]
371pub struct CodeNode {
372    /// Unique identifier
373    pub id: NodeId,
374    /// Node type (function, class, module, etc.)
375    pub kind: NodeKind,
376    /// Source location
377    pub span: Span,
378    /// Additional metadata
379    pub metadata: NodeMetadata,
380}
381
382#[cfg(test)]
383mod tests {
384    use super::*;
385
386    #[test]
387    fn test_node_id_creation() {
388        let id = NodeId::new(Language::Cpp, "src/main.cpp", "main");
389        assert_eq!(id.language, Language::Cpp);
390        assert_eq!(id.file.as_ref(), "src/main.cpp");
391        assert_eq!(id.qualified_name.as_ref(), "main");
392    }
393
394    #[test]
395    fn test_node_id_display() {
396        let id = NodeId::new(Language::Python, "api.py", "User.authenticate");
397        assert_eq!(id.to_string(), "py:api.py:User.authenticate");
398    }
399
400    #[test]
401    fn test_node_id_hash() {
402        use std::collections::HashSet;
403
404        let id1 = NodeId::new(Language::JavaScript, "api.js", "fetchUsers");
405        let id2 = NodeId::new(Language::JavaScript, "api.js", "fetchUsers");
406        let id3 = NodeId::new(Language::JavaScript, "api.js", "createUser");
407
408        let mut set = HashSet::new();
409        set.insert(id1.clone());
410        set.insert(id2.clone());
411        set.insert(id3.clone());
412
413        assert_eq!(set.len(), 2); // id1 and id2 are equal
414    }
415
416    #[test]
417    fn test_node_id_clone_cheap() {
418        let id1 = NodeId::new(Language::Cpp, "src/utils.cpp", "std::vector::push_back");
419        let id2 = id1.clone();
420
421        // Arc<str> means the underlying string is NOT copied
422        assert_eq!(Arc::as_ptr(&id1.file), Arc::as_ptr(&id2.file));
423        assert_eq!(
424            Arc::as_ptr(&id1.qualified_name),
425            Arc::as_ptr(&id2.qualified_name)
426        );
427    }
428
429    #[test]
430    fn test_symbol_name_extraction() {
431        let id1 = NodeId::new(Language::Cpp, "main.cpp", "std::vector::push_back");
432        assert_eq!(id1.symbol_name(), "push_back");
433
434        let id2 = NodeId::new(Language::Python, "api.py", "User.authenticate");
435        assert_eq!(id2.symbol_name(), "authenticate");
436
437        let id3 = NodeId::new(Language::JavaScript, "api.js", "fetchUsers");
438        assert_eq!(id3.symbol_name(), "fetchUsers");
439    }
440
441    #[test]
442    fn test_span_creation() {
443        let span = Span::new(Position::new(10, 0), Position::new(20, 1));
444
445        assert_eq!(span.start.line, 10);
446        assert_eq!(span.end.line, 20);
447    }
448
449    #[test]
450    fn test_language_display() {
451        assert_eq!(Language::Cpp.to_string(), "cpp");
452        assert_eq!(Language::JavaScript.to_string(), "js");
453        assert_eq!(Language::Python.to_string(), "py");
454        assert_eq!(Language::Ruby.to_string(), "ruby");
455        assert_eq!(Language::Php.to_string(), "php");
456        assert_eq!(Language::Swift.to_string(), "swift");
457        assert_eq!(Language::Kotlin.to_string(), "kotlin");
458        assert_eq!(Language::Scala.to_string(), "scala");
459        assert_eq!(Language::Http.to_string(), "http");
460    }
461
462    #[test]
463    fn test_language_from_id() {
464        assert_eq!(Language::from_id("javascript"), Some(Language::JavaScript));
465        assert_eq!(Language::from_id("js"), Some(Language::JavaScript));
466        assert_eq!(Language::from_id("c#"), Some(Language::CSharp));
467        assert_eq!(Language::from_id("rb"), Some(Language::Ruby));
468        assert_eq!(Language::from_id("unknown"), None);
469    }
470}