nyx_scanner/symbol/mod.rs
1//! Core language and function identity types.
2//!
3//! [`Lang`] is the 10-language enum (Rust, C, C++, Java, Go, PHP, Python,
4//! Ruby, TypeScript, JavaScript). [`FuncKey`] is the canonical cross-file
5//! function identity: name, arity, language, container (class/struct/module),
6//! and an optional disambiguator for overloaded functions.
7//!
8//! [`FuncKey`] is the node type in the call graph and the lookup key in
9//! [`crate::summary::GlobalSummaries`]. [`FuncKind`] distinguishes constructors,
10//! methods, closures, and free functions so callers can apply language-specific
11//! resolution heuristics.
12
13use serde::{Deserialize, Serialize};
14use std::fmt;
15
16/// Supported source-code languages.
17///
18/// `Default` is provided only so that [`FuncKey`] can derive `Default` for
19/// test ergonomics, production code always constructs a `Lang` explicitly
20/// (via `from_slug` / `from_extension`). `Rust` was chosen as the default
21/// purely because it is the host language of the scanner; tests that rely
22/// on lang-specific behaviour should set `lang` explicitly, not rely on the
23/// default.
24#[derive(Clone, Copy, Debug, Default, Hash, PartialEq, Eq, Serialize, Deserialize)]
25#[serde(rename_all = "lowercase")]
26pub enum Lang {
27 #[default]
28 Rust,
29 C,
30 Cpp,
31 Java,
32 Go,
33 Php,
34 Python,
35 Ruby,
36 TypeScript,
37 JavaScript,
38}
39
40impl Lang {
41 /// Parse a language slug (as returned by `lang_for_path`) into a `Lang`.
42 pub fn from_slug(s: &str) -> Option<Lang> {
43 match s {
44 "rust" => Some(Lang::Rust),
45 "c" => Some(Lang::C),
46 "cpp" => Some(Lang::Cpp),
47 "java" => Some(Lang::Java),
48 "go" => Some(Lang::Go),
49 "php" => Some(Lang::Php),
50 "python" => Some(Lang::Python),
51 "ruby" => Some(Lang::Ruby),
52 "typescript" | "ts" => Some(Lang::TypeScript),
53 "javascript" | "js" => Some(Lang::JavaScript),
54 _ => None,
55 }
56 }
57
58 /// Derive `Lang` from a file extension (e.g. `"rs"`, `"py"`).
59 ///
60 /// Mirrors the extension→language mapping in `ast::lang_for_path()` so that
61 /// callers outside `ast` can obtain a `Lang` from a path without needing a
62 /// `FuncSummary`.
63 pub fn from_extension(ext: &str) -> Option<Lang> {
64 match ext {
65 "rs" => Some(Lang::Rust),
66 "c" => Some(Lang::C),
67 "cpp" => Some(Lang::Cpp),
68 "java" => Some(Lang::Java),
69 "go" => Some(Lang::Go),
70 "php" => Some(Lang::Php),
71 "py" => Some(Lang::Python),
72 "ts" => Some(Lang::TypeScript),
73 "js" => Some(Lang::JavaScript),
74 "rb" => Some(Lang::Ruby),
75 _ => None,
76 }
77 }
78
79 /// Canonical slug string for this language.
80 pub fn as_str(&self) -> &'static str {
81 match self {
82 Lang::Rust => "rust",
83 Lang::C => "c",
84 Lang::Cpp => "cpp",
85 Lang::Java => "java",
86 Lang::Go => "go",
87 Lang::Php => "php",
88 Lang::Python => "python",
89 Lang::Ruby => "ruby",
90 Lang::TypeScript => "typescript",
91 Lang::JavaScript => "javascript",
92 }
93 }
94}
95
96impl fmt::Display for Lang {
97 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
98 f.write_str(self.as_str())
99 }
100}
101
102/// The structural role a function plays in its source.
103///
104/// Used alongside `container` and `disambig` to distinguish same-name
105/// definitions. Deserialization falls back to `Function` so old JSON
106/// loads cleanly.
107#[derive(Clone, Copy, Debug, Default, Hash, PartialEq, Eq, Serialize, Deserialize)]
108pub enum FuncKind {
109 /// Free/top-level function (Rust `fn`, Go `func`, Python module-level `def`).
110 #[default]
111 Function,
112 /// Method bound to a class / impl / struct / interface receiver.
113 Method,
114 /// Constructor (`__init__`, `new`, class constructor, Java `<init>`).
115 Constructor,
116 /// Anonymous / closure / lambda / arrow function.
117 Closure,
118 /// Getter (property getter, Ruby `attr_reader` style).
119 Getter,
120 /// Setter (property setter, Ruby `attr_writer` style).
121 Setter,
122 /// Implicit top-level / module body ("main script").
123 TopLevel,
124}
125
126impl FuncKind {
127 /// Short slug for display / logging.
128 pub fn as_str(&self) -> &'static str {
129 match self {
130 FuncKind::Function => "fn",
131 FuncKind::Method => "method",
132 FuncKind::Constructor => "ctor",
133 FuncKind::Closure => "closure",
134 FuncKind::Getter => "getter",
135 FuncKind::Setter => "setter",
136 FuncKind::TopLevel => "toplevel",
137 }
138 }
139
140 /// Parse a kind slug (as written by `as_str`) back into a `FuncKind`.
141 /// Unrecognized slugs fall back to `Function` to keep round-trips lenient.
142 pub fn from_slug(s: &str) -> FuncKind {
143 match s {
144 "fn" => FuncKind::Function,
145 "method" => FuncKind::Method,
146 "ctor" => FuncKind::Constructor,
147 "closure" => FuncKind::Closure,
148 "getter" => FuncKind::Getter,
149 "setter" => FuncKind::Setter,
150 "toplevel" => FuncKind::TopLevel,
151 _ => FuncKind::Function,
152 }
153 }
154}
155
156/// Uniquely identifies a function across the entire project.
157///
158/// Identity is a 6-tuple: `(lang, namespace, container, name, arity, disambig)`
159/// plus a structural `kind` tag. Every field is deliberately narrow so
160/// legitimately-distinct definitions never collide:
161///
162/// * `lang`, prevents cross-language aliasing.
163/// * `namespace`, project-relative source file path.
164/// * `container`, enclosing class / impl / module / namespace / outer function
165/// (qualified with `::` for nested containers). Empty string for free
166/// top-level functions.
167/// * `name`, leaf identifier as written in source.
168/// * `arity`, parameter count (including `self`/`this`) for languages that
169/// discriminate by arity. `None` for unknown.
170/// * `disambig`, numeric discriminator for same-name definitions in the same
171/// container (closure byte offset, nested-function occurrence index).
172/// `None` for the common case of a single definition.
173/// * `kind`, structural role (see [`FuncKind`]). Separates e.g. a getter
174/// named `size` from a method `size()`.
175///
176/// Backward-compat: `container`, `disambig`, and `kind` all have serde
177/// defaults, so JSON summaries written by the old identity model still
178/// deserialise cleanly and land on `FuncKind::Function` with empty
179/// container/disambig.
180#[derive(Clone, Debug, Default, Hash, PartialEq, Eq, Serialize, Deserialize)]
181pub struct FuncKey {
182 pub lang: Lang,
183 /// Project-relative file path (e.g. `"src/lib.rs"`).
184 pub namespace: String,
185 /// Enclosing container path (class / impl / module / nested function).
186 /// Empty for free top-level functions. Segments joined with `::`.
187 #[serde(default)]
188 pub container: String,
189 pub name: String,
190 pub arity: Option<usize>,
191 /// Numeric discriminator for same-name siblings (closures, duplicate defs).
192 /// Typically the function node's start byte offset.
193 #[serde(default)]
194 pub disambig: Option<u32>,
195 /// Structural role, Function, Method, Constructor, Closure, etc.
196 #[serde(default)]
197 pub kind: FuncKind,
198}
199
200impl FuncKey {
201 /// Construct a plain free-function key (no container, no disambig).
202 /// Kept as a convenience for call sites and tests that do not need the
203 /// extra discriminators.
204 pub fn new_function(
205 lang: Lang,
206 namespace: impl Into<String>,
207 name: impl Into<String>,
208 arity: Option<usize>,
209 ) -> Self {
210 FuncKey {
211 lang,
212 namespace: namespace.into(),
213 container: String::new(),
214 name: name.into(),
215 arity,
216 disambig: None,
217 kind: FuncKind::Function,
218 }
219 }
220
221 /// Fully-qualified name like `"Class::method"` or just `"func"` for free
222 /// functions. Used for diagnostics and container-aware callee matching.
223 pub fn qualified_name(&self) -> String {
224 if self.container.is_empty() {
225 self.name.clone()
226 } else {
227 format!("{}::{}", self.container, self.name)
228 }
229 }
230}
231
232impl fmt::Display for FuncKey {
233 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
234 write!(f, "{}::{}::", self.lang, self.namespace)?;
235 if !self.container.is_empty() {
236 write!(f, "{}::", self.container)?;
237 }
238 write!(f, "{}", self.name)?;
239 if let Some(a) = self.arity {
240 write!(f, "/{a}")?;
241 }
242 if let Some(d) = self.disambig {
243 write!(f, "#{d}")?;
244 }
245 if self.kind != FuncKind::Function {
246 write!(f, "[{}]", self.kind.as_str())?;
247 }
248 Ok(())
249 }
250}
251
252/// Strip `root` prefix from `abs_path` to produce a stable project-relative path.
253///
254/// Falls back to the full path if stripping fails (e.g. in tests with synthetic paths).
255pub fn normalize_namespace(abs_path: &str, root: Option<&str>) -> String {
256 if let Some(r) = root {
257 let r = r.trim_end_matches('/');
258 if let Some(rest) = abs_path.strip_prefix(r) {
259 return rest.trim_start_matches('/').to_string();
260 }
261 }
262 abs_path.to_string()
263}
264
265#[cfg(test)]
266mod tests;