Skip to main content

cartog_core/
lib.rs

1//! Core types and utilities for the cartog code graph indexer.
2//!
3//! Defines the shared data model ([`Symbol`], [`Edge`], [`SymbolKind`], [`EdgeKind`])
4//! used by all other cartog crates. Symbol IDs are deterministic and invariant to
5//! line movements — built from `file_path:kind:qualified_name`.
6//!
7//! Also provides [`detect_language`] for mapping file extensions to language names
8//! without pulling in tree-sitter grammar dependencies.
9
10use std::path::Path;
11
12use serde::Serialize;
13
14#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
15pub struct Symbol {
16    pub id: String,
17    pub name: String,
18    pub kind: SymbolKind,
19    pub file_path: String,
20    pub start_line: u32,
21    pub end_line: u32,
22    pub start_byte: u32,
23    pub end_byte: u32,
24    pub parent_id: Option<String>,
25    pub signature: Option<String>,
26    pub visibility: Visibility,
27    pub is_async: bool,
28    pub docstring: Option<String>,
29    pub in_degree: u32,
30    pub content_hash: Option<String>,
31    pub subtree_hash: Option<String>,
32}
33
34impl Symbol {
35    /// Create a new symbol with a stable ID: `file_path:kind:qualified_name`.
36    ///
37    /// `parent_name` is the unqualified name chain of the parent symbol (e.g. `"Outer.Inner"`).
38    /// It is used to build the stable ID and also stored as `parent_id` (the parent's full ID).
39    ///
40    /// Optional fields (`signature`, `docstring`) default to `None`,
41    /// `visibility` defaults to `Public`, and `is_async` defaults to `false`.
42    /// Use the builder-style setters to override.
43    #[allow(clippy::too_many_arguments)]
44    pub fn new(
45        name: impl Into<String>,
46        kind: SymbolKind,
47        file_path: &str,
48        start_line: u32,
49        end_line: u32,
50        start_byte: u32,
51        end_byte: u32,
52        parent_name: Option<&str>,
53    ) -> Self {
54        let name = name.into();
55        let id = symbol_id(file_path, kind.as_str(), &name, parent_name);
56        Self {
57            id,
58            name,
59            kind,
60            file_path: file_path.to_string(),
61            start_line,
62            end_line,
63            start_byte,
64            end_byte,
65            parent_id: None,
66            signature: None,
67            visibility: Visibility::Public,
68            is_async: false,
69            docstring: None,
70            in_degree: 0,
71            content_hash: None,
72            subtree_hash: None,
73        }
74    }
75
76    /// Set the parent symbol ID.
77    pub fn with_parent(mut self, parent_id: Option<&str>) -> Self {
78        self.parent_id = parent_id.map(str::to_string);
79        self
80    }
81
82    /// Set the function/method signature.
83    pub fn with_signature(mut self, signature: Option<String>) -> Self {
84        self.signature = signature;
85        self
86    }
87
88    /// Set the visibility.
89    pub fn with_visibility(mut self, visibility: Visibility) -> Self {
90        self.visibility = visibility;
91        self
92    }
93
94    /// Mark as async.
95    pub fn with_async(mut self, is_async: bool) -> Self {
96        self.is_async = is_async;
97        self
98    }
99
100    /// Set the docstring.
101    pub fn with_docstring(mut self, docstring: Option<String>) -> Self {
102        self.docstring = docstring;
103        self
104    }
105}
106
107#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize)]
108#[serde(rename_all = "snake_case")]
109pub enum SymbolKind {
110    Function,
111    Class,
112    Method,
113    Variable,
114    Import,
115    Interface,
116    Enum,
117    TypeAlias,
118    Trait,
119    Module,
120    Document,
121}
122
123impl SymbolKind {
124    pub fn as_str(&self) -> &'static str {
125        match self {
126            Self::Function => "function",
127            Self::Class => "class",
128            Self::Method => "method",
129            Self::Variable => "variable",
130            Self::Import => "import",
131            Self::Interface => "interface",
132            Self::Enum => "enum",
133            Self::TypeAlias => "type_alias",
134            Self::Trait => "trait",
135            Self::Module => "module",
136            Self::Document => "document",
137        }
138    }
139}
140
141impl std::str::FromStr for SymbolKind {
142    type Err = anyhow::Error;
143
144    fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {
145        match s {
146            "function" => Ok(Self::Function),
147            "class" => Ok(Self::Class),
148            "method" => Ok(Self::Method),
149            "variable" => Ok(Self::Variable),
150            "import" => Ok(Self::Import),
151            "interface" => Ok(Self::Interface),
152            "enum" => Ok(Self::Enum),
153            "type_alias" => Ok(Self::TypeAlias),
154            "trait" => Ok(Self::Trait),
155            "module" => Ok(Self::Module),
156            "document" => Ok(Self::Document),
157            _ => Err(anyhow::anyhow!("unknown symbol kind: '{s}'")),
158        }
159    }
160}
161
162impl std::fmt::Display for SymbolKind {
163    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
164        f.write_str(self.as_str())
165    }
166}
167
168#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize)]
169#[serde(rename_all = "snake_case")]
170pub enum Visibility {
171    Public,
172    Private,
173    Protected,
174}
175
176impl Visibility {
177    pub fn as_str(&self) -> &'static str {
178        match self {
179            Self::Public => "public",
180            Self::Private => "private",
181            Self::Protected => "protected",
182        }
183    }
184
185    /// Parse a visibility string, defaulting to `Public` for unknown values.
186    pub fn from_str_lossy(s: &str) -> Self {
187        match s {
188            "private" => Self::Private,
189            "protected" => Self::Protected,
190            _ => Self::Public,
191        }
192    }
193}
194
195impl std::fmt::Display for Visibility {
196    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
197        f.write_str(self.as_str())
198    }
199}
200
201#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
202pub struct Edge {
203    pub source_id: String,
204    pub target_name: String,
205    pub target_id: Option<String>,
206    pub kind: EdgeKind,
207    pub file_path: String,
208    pub line: u32,
209}
210
211impl Edge {
212    /// Create a new edge with `target_id` set to `None` (resolved later by `db.resolve_edges()`).
213    pub fn new(
214        source_id: impl Into<String>,
215        target_name: impl Into<String>,
216        kind: EdgeKind,
217        file_path: &str,
218        line: u32,
219    ) -> Self {
220        Self {
221            source_id: source_id.into(),
222            target_name: target_name.into(),
223            target_id: None,
224            kind,
225            file_path: file_path.to_string(),
226            line,
227        }
228    }
229}
230
231#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize)]
232#[serde(rename_all = "snake_case")]
233pub enum EdgeKind {
234    Calls,
235    Imports,
236    Inherits,
237    References,
238    Raises,
239    Implements,
240    TypeOf,
241}
242
243impl EdgeKind {
244    pub fn as_str(&self) -> &'static str {
245        match self {
246            Self::Calls => "calls",
247            Self::Imports => "imports",
248            Self::Inherits => "inherits",
249            Self::References => "references",
250            Self::Raises => "raises",
251            Self::Implements => "implements",
252            Self::TypeOf => "type_of",
253        }
254    }
255}
256
257impl std::str::FromStr for EdgeKind {
258    type Err = anyhow::Error;
259
260    fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {
261        match s {
262            "calls" => Ok(Self::Calls),
263            "imports" => Ok(Self::Imports),
264            "inherits" => Ok(Self::Inherits),
265            "references" => Ok(Self::References),
266            "raises" => Ok(Self::Raises),
267            "implements" => Ok(Self::Implements),
268            "type_of" => Ok(Self::TypeOf),
269            _ => Err(anyhow::anyhow!("unknown edge kind: '{s}'")),
270        }
271    }
272}
273
274impl std::fmt::Display for EdgeKind {
275    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
276        f.write_str(self.as_str())
277    }
278}
279
280#[derive(Debug, Clone, PartialEq, Serialize)]
281pub struct FileInfo {
282    pub path: String,
283    pub last_modified: f64,
284    pub hash: String,
285    pub language: String,
286    pub num_symbols: u32,
287}
288
289#[derive(Debug, Clone, Serialize)]
290pub struct ChangesResult {
291    pub changed_files: Vec<String>,
292    pub symbols: Vec<Symbol>,
293}
294
295/// Build a stable symbol ID: `file_path:kind:qualified_name`
296///
297/// The qualified name encodes the parent chain using `.` separators:
298/// - Top-level function: `src/auth.py:function:validate`
299/// - Method in class:    `src/auth.py:method:TokenService.validate`
300/// - Nested class:       `src/auth.py:class:Outer.Inner`
301///
302/// This ID is stable across line movements within a file.
303pub fn symbol_id(file_path: &str, kind: &str, name: &str, parent_name: Option<&str>) -> String {
304    match parent_name {
305        Some(pn) => format!("{file_path}:{kind}:{pn}.{name}"),
306        None => format!("{file_path}:{kind}:{name}"),
307    }
308}
309
310/// Map file extension to language name.
311pub fn detect_language(path: &Path) -> Option<&'static str> {
312    let ext = path.extension()?.to_str()?;
313    match ext {
314        "py" | "pyi" => Some("python"),
315        "ts" => Some("typescript"),
316        "tsx" => Some("tsx"),
317        "js" | "jsx" | "mjs" | "cjs" => Some("javascript"),
318        "rs" => Some("rust"),
319        "go" => Some("go"),
320        "rb" => Some("ruby"),
321        "java" => Some("java"),
322        "md" => Some("markdown"),
323        _ => None,
324    }
325}
326
327#[cfg(test)]
328mod tests {
329    use super::*;
330
331    #[test]
332    fn stable_id_top_level() {
333        assert_eq!(
334            symbol_id("src/auth.py", "function", "validate", None),
335            "src/auth.py:function:validate"
336        );
337    }
338
339    #[test]
340    fn stable_id_with_parent() {
341        assert_eq!(
342            symbol_id("src/auth.py", "method", "validate", Some("TokenService")),
343            "src/auth.py:method:TokenService.validate"
344        );
345    }
346
347    #[test]
348    fn stable_id_nested_parent() {
349        assert_eq!(
350            symbol_id("src/auth.py", "method", "do_work", Some("Outer.Inner")),
351            "src/auth.py:method:Outer.Inner.do_work"
352        );
353    }
354
355    #[test]
356    fn stable_id_invariant_to_line_changes() {
357        let sym_at_line_10 = Symbol::new(
358            "validate",
359            SymbolKind::Function,
360            "src/auth.py",
361            10,
362            20,
363            100,
364            500,
365            None,
366        );
367        let sym_at_line_50 = Symbol::new(
368            "validate",
369            SymbolKind::Function,
370            "src/auth.py",
371            50,
372            60,
373            800,
374            1200,
375            None,
376        );
377        assert_eq!(sym_at_line_10.id, sym_at_line_50.id);
378    }
379
380    #[test]
381    fn stable_id_differs_by_kind() {
382        let func_id = symbol_id("f.py", "function", "foo", None);
383        let var_id = symbol_id("f.py", "variable", "foo", None);
384        assert_ne!(func_id, var_id);
385    }
386
387    #[test]
388    fn test_detect_language() {
389        assert_eq!(detect_language(Path::new("src/main.py")), Some("python"));
390        assert_eq!(detect_language(Path::new("lib.pyi")), Some("python"));
391        assert_eq!(detect_language(Path::new("app.ts")), Some("typescript"));
392        assert_eq!(detect_language(Path::new("App.tsx")), Some("tsx"));
393        assert_eq!(detect_language(Path::new("index.js")), Some("javascript"));
394        assert_eq!(detect_language(Path::new("util.mjs")), Some("javascript"));
395        assert_eq!(detect_language(Path::new("main.rs")), Some("rust"));
396        assert_eq!(detect_language(Path::new("server.go")), Some("go"));
397        assert_eq!(detect_language(Path::new("app.rb")), Some("ruby"));
398        assert_eq!(detect_language(Path::new("README.md")), Some("markdown"));
399        assert_eq!(detect_language(Path::new("Makefile")), None);
400        assert_eq!(detect_language(Path::new("Main.java")), Some("java"));
401    }
402}