Skip to main content

nyx_scanner/symbol/
mod.rs

1//! Core language and function identity types.
2//!
3//! [`Lang`] is the 10-language enum (Rust, C, C++, Java, Go, PHP, Python,
4//! Ruby, TypeScript, JavaScript). [`FuncKey`] is the canonical cross-file
5//! function identity: name, arity, language, container (class/struct/module),
6//! and an optional disambiguator for overloaded functions.
7//!
8//! [`FuncKey`] is the node type in the call graph and the lookup key in
9//! [`crate::summary::GlobalSummaries`]. [`FuncKind`] distinguishes constructors,
10//! methods, closures, and free functions so callers can apply language-specific
11//! resolution heuristics.
12
13use serde::{Deserialize, Serialize};
14use std::fmt;
15
16/// Supported source-code languages.
17///
18/// `Default` is provided only so that [`FuncKey`] can derive `Default` for
19/// test ergonomics, production code always constructs a `Lang` explicitly
20/// (via `from_slug` / `from_extension`).  `Rust` was chosen as the default
21/// purely because it is the host language of the scanner; tests that rely
22/// on lang-specific behaviour should set `lang` explicitly, not rely on the
23/// default.
24#[derive(Clone, Copy, Debug, Default, Hash, PartialEq, Eq, Serialize, Deserialize)]
25#[serde(rename_all = "lowercase")]
26pub enum Lang {
27    #[default]
28    Rust,
29    C,
30    Cpp,
31    Java,
32    Go,
33    Php,
34    Python,
35    Ruby,
36    TypeScript,
37    JavaScript,
38}
39
40impl Lang {
41    /// Parse a language slug (as returned by `lang_for_path`) into a `Lang`.
42    pub fn from_slug(s: &str) -> Option<Lang> {
43        match s {
44            "rust" => Some(Lang::Rust),
45            "c" => Some(Lang::C),
46            "cpp" => Some(Lang::Cpp),
47            "java" => Some(Lang::Java),
48            "go" => Some(Lang::Go),
49            "php" => Some(Lang::Php),
50            "python" => Some(Lang::Python),
51            "ruby" => Some(Lang::Ruby),
52            "typescript" | "ts" => Some(Lang::TypeScript),
53            "javascript" | "js" => Some(Lang::JavaScript),
54            _ => None,
55        }
56    }
57
58    /// Derive `Lang` from a file extension (e.g. `"rs"`, `"py"`).
59    ///
60    /// Mirrors the extension→language mapping in `ast::lang_for_path()` so that
61    /// callers outside `ast` can obtain a `Lang` from a path without needing a
62    /// `FuncSummary`.
63    pub fn from_extension(ext: &str) -> Option<Lang> {
64        match ext {
65            "rs" => Some(Lang::Rust),
66            "c" => Some(Lang::C),
67            "cpp" => Some(Lang::Cpp),
68            "java" => Some(Lang::Java),
69            "go" => Some(Lang::Go),
70            "php" => Some(Lang::Php),
71            "py" => Some(Lang::Python),
72            "ts" => Some(Lang::TypeScript),
73            "js" => Some(Lang::JavaScript),
74            "rb" => Some(Lang::Ruby),
75            _ => None,
76        }
77    }
78
79    /// Canonical slug string for this language.
80    pub fn as_str(&self) -> &'static str {
81        match self {
82            Lang::Rust => "rust",
83            Lang::C => "c",
84            Lang::Cpp => "cpp",
85            Lang::Java => "java",
86            Lang::Go => "go",
87            Lang::Php => "php",
88            Lang::Python => "python",
89            Lang::Ruby => "ruby",
90            Lang::TypeScript => "typescript",
91            Lang::JavaScript => "javascript",
92        }
93    }
94}
95
96impl fmt::Display for Lang {
97    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
98        f.write_str(self.as_str())
99    }
100}
101
102/// The structural role a function plays in its source.
103///
104/// Used alongside `container` and `disambig` to distinguish same-name
105/// definitions.  Deserialization falls back to `Function` so old JSON
106/// loads cleanly.
107#[derive(Clone, Copy, Debug, Default, Hash, PartialEq, Eq, Serialize, Deserialize)]
108pub enum FuncKind {
109    /// Free/top-level function (Rust `fn`, Go `func`, Python module-level `def`).
110    #[default]
111    Function,
112    /// Method bound to a class / impl / struct / interface receiver.
113    Method,
114    /// Constructor (`__init__`, `new`, class constructor, Java `<init>`).
115    Constructor,
116    /// Anonymous / closure / lambda / arrow function.
117    Closure,
118    /// Getter (property getter, Ruby `attr_reader` style).
119    Getter,
120    /// Setter (property setter, Ruby `attr_writer` style).
121    Setter,
122    /// Implicit top-level / module body ("main script").
123    TopLevel,
124}
125
126impl FuncKind {
127    /// Short slug for display / logging.
128    pub fn as_str(&self) -> &'static str {
129        match self {
130            FuncKind::Function => "fn",
131            FuncKind::Method => "method",
132            FuncKind::Constructor => "ctor",
133            FuncKind::Closure => "closure",
134            FuncKind::Getter => "getter",
135            FuncKind::Setter => "setter",
136            FuncKind::TopLevel => "toplevel",
137        }
138    }
139
140    /// Parse a kind slug (as written by `as_str`) back into a `FuncKind`.
141    /// Unrecognized slugs fall back to `Function` to keep round-trips lenient.
142    pub fn from_slug(s: &str) -> FuncKind {
143        match s {
144            "fn" => FuncKind::Function,
145            "method" => FuncKind::Method,
146            "ctor" => FuncKind::Constructor,
147            "closure" => FuncKind::Closure,
148            "getter" => FuncKind::Getter,
149            "setter" => FuncKind::Setter,
150            "toplevel" => FuncKind::TopLevel,
151            _ => FuncKind::Function,
152        }
153    }
154}
155
156/// Uniquely identifies a function across the entire project.
157///
158/// Identity is a 6-tuple: `(lang, namespace, container, name, arity, disambig)`
159/// plus a structural `kind` tag.  Every field is deliberately narrow so
160/// legitimately-distinct definitions never collide:
161///
162/// * `lang`, prevents cross-language aliasing.
163/// * `namespace`, project-relative source file path.
164/// * `container`, enclosing class / impl / module / namespace / outer function
165///   (qualified with `::` for nested containers).  Empty string for free
166///   top-level functions.
167/// * `name`, leaf identifier as written in source.
168/// * `arity`, parameter count (including `self`/`this`) for languages that
169///   discriminate by arity.  `None` for unknown.
170/// * `disambig`, numeric discriminator for same-name definitions in the same
171///   container (closure byte offset, nested-function occurrence index).
172///   `None` for the common case of a single definition.
173/// * `kind`, structural role (see [`FuncKind`]).  Separates e.g. a getter
174///   named `size` from a method `size()`.
175///
176/// Backward-compat: `container`, `disambig`, and `kind` all have serde
177/// defaults, so JSON summaries written by the old identity model still
178/// deserialise cleanly and land on `FuncKind::Function` with empty
179/// container/disambig.
180#[derive(Clone, Debug, Default, Hash, PartialEq, Eq, Serialize, Deserialize)]
181pub struct FuncKey {
182    pub lang: Lang,
183    /// Project-relative file path (e.g. `"src/lib.rs"`).
184    pub namespace: String,
185    /// Enclosing container path (class / impl / module / nested function).
186    /// Empty for free top-level functions.  Segments joined with `::`.
187    #[serde(default)]
188    pub container: String,
189    pub name: String,
190    pub arity: Option<usize>,
191    /// Numeric discriminator for same-name siblings (closures, duplicate defs).
192    /// Typically the function node's start byte offset.
193    #[serde(default)]
194    pub disambig: Option<u32>,
195    /// Structural role, Function, Method, Constructor, Closure, etc.
196    #[serde(default)]
197    pub kind: FuncKind,
198}
199
200impl FuncKey {
201    /// Construct a plain free-function key (no container, no disambig).
202    /// Kept as a convenience for call sites and tests that do not need the
203    /// extra discriminators.
204    pub fn new_function(
205        lang: Lang,
206        namespace: impl Into<String>,
207        name: impl Into<String>,
208        arity: Option<usize>,
209    ) -> Self {
210        FuncKey {
211            lang,
212            namespace: namespace.into(),
213            container: String::new(),
214            name: name.into(),
215            arity,
216            disambig: None,
217            kind: FuncKind::Function,
218        }
219    }
220
221    /// Fully-qualified name like `"Class::method"` or just `"func"` for free
222    /// functions.  Used for diagnostics and container-aware callee matching.
223    pub fn qualified_name(&self) -> String {
224        if self.container.is_empty() {
225            self.name.clone()
226        } else {
227            format!("{}::{}", self.container, self.name)
228        }
229    }
230}
231
232impl fmt::Display for FuncKey {
233    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
234        write!(f, "{}::{}::", self.lang, self.namespace)?;
235        if !self.container.is_empty() {
236            write!(f, "{}::", self.container)?;
237        }
238        write!(f, "{}", self.name)?;
239        if let Some(a) = self.arity {
240            write!(f, "/{a}")?;
241        }
242        if let Some(d) = self.disambig {
243            write!(f, "#{d}")?;
244        }
245        if self.kind != FuncKind::Function {
246            write!(f, "[{}]", self.kind.as_str())?;
247        }
248        Ok(())
249    }
250}
251
252/// Strip `root` prefix from `abs_path` to produce a stable project-relative path.
253///
254/// Falls back to the full path if stripping fails (e.g. in tests with synthetic paths).
255pub fn normalize_namespace(abs_path: &str, root: Option<&str>) -> String {
256    if let Some(r) = root {
257        let r = r.trim_end_matches('/');
258        if let Some(rest) = abs_path.strip_prefix(r) {
259            return rest.trim_start_matches('/').to_string();
260        }
261    }
262    abs_path.to_string()
263}
264
265#[cfg(test)]
266mod tests;