sdivi_patterns/queries/mod.rs
1//! Per-category node-kind classification rules.
2//!
3//! Each sub-module declares the tree-sitter node kinds that map to a built-in
4//! pattern category. Two classifiers are provided:
5//!
6//! - [`category_for_node_kind`] — node-kind-only, fast, does not inspect source text.
7//! - [`classify_hint`] — node-kind + callee-text, uses per-language regex tables.
8//! Prefer this for foreign extractors that have full [`PatternHintInput`] access.
9//!
10//! Category names are stable from Milestone 6 forward. Renaming any name is a
11//! breaking change requiring a `MIGRATION_NOTES.md` entry.
12
13pub mod async_patterns;
14pub mod class_hierarchy;
15pub mod data_access;
16pub mod error_handling;
17pub mod logging;
18pub mod resource_management;
19pub mod state_management;
20pub mod type_assertions;
21
22use crate::hint_input::PatternHintInput;
23
24/// All built-in category names in stable alphabetical order.
25///
26/// Note: `logging` is a catalog-only category for `snapshot_version "1.0"`.
27/// It is present here so embedders can emit `PatternInstanceInput { category: "logging", … }`
28/// and have those instances round-trip through `compute_pattern_metrics` and
29/// `compute_delta`. [`category_for_node_kind`] never returns `Some("logging")` —
30/// the relevant node kinds overlap with `data_access` and `resource_management`.
31///
32/// # Examples
33///
34/// ```rust
35/// use sdivi_patterns::queries::ALL_CATEGORIES;
36///
37/// assert!(ALL_CATEGORIES.contains(&"error_handling"));
38/// assert!(ALL_CATEGORIES.contains(&"async_patterns"));
39/// assert!(ALL_CATEGORIES.contains(&"data_access"));
40/// assert!(ALL_CATEGORIES.contains(&"logging"));
41/// assert_eq!(ALL_CATEGORIES.len(), 8);
42/// ```
43pub const ALL_CATEGORIES: &[&str] = &[
44 "async_patterns",
45 "class_hierarchy",
46 "data_access",
47 "error_handling",
48 "logging",
49 "resource_management",
50 "state_management",
51 "type_assertions",
52];
53
54/// Maps a tree-sitter `node_kind` to the built-in category it belongs to.
55///
56/// Returns `None` if the node kind does not belong to any category.
57/// The `_language` parameter is reserved for future per-language overrides.
58///
59/// **Used internally by `classify_hint` for non-call/non-macro node kinds;
60/// the native pipeline no longer calls this function directly (as of M33).**
61/// Foreign extractors with full [`PatternHintInput`] access should call
62/// [`classify_hint`] instead. This function is preserved for callers that have a
63/// node kind but no source text — and for backward compatibility with embedders
64/// that integrated against the M29 API.
65///
66/// # Examples
67///
68/// ```rust
69/// use sdivi_patterns::queries::category_for_node_kind;
70///
71/// assert_eq!(category_for_node_kind("try_expression", "rust"), Some("error_handling"));
72/// assert_eq!(category_for_node_kind("await_expression", "rust"), Some("async_patterns"));
73/// assert_eq!(category_for_node_kind("unknown_node", "rust"), None);
74/// ```
75///
76/// # See also
77///
78/// [`classify_hint`] — callee-text-aware classifier that returns `["logging"]` for
79/// matching callees and disambiguates Rust `macro_invocation` into `logging` vs
80/// `resource_management`. This is what the native pipeline calls since M33.
81pub fn category_for_node_kind(node_kind: &str, _language: &str) -> Option<&'static str> {
82 if async_patterns::NODE_KINDS.contains(&node_kind) {
83 Some("async_patterns")
84 } else if class_hierarchy::NODE_KINDS.contains(&node_kind) {
85 Some("class_hierarchy")
86 } else if data_access::NODE_KINDS.contains(&node_kind) {
87 Some("data_access")
88 } else if error_handling::NODE_KINDS.contains(&node_kind) {
89 Some("error_handling")
90 } else if resource_management::NODE_KINDS.contains(&node_kind) {
91 Some("resource_management")
92 } else if state_management::NODE_KINDS.contains(&node_kind) {
93 Some("state_management")
94 } else if type_assertions::NODE_KINDS.contains(&node_kind) {
95 Some("type_assertions")
96 } else {
97 None
98 }
99}
100
101/// Classify a [`PatternHintInput`] using both node kind and callee-text inspection.
102///
103/// Returns a `Vec` of category name(s) the hint belongs to. In v0 the return is
104/// always 0 or 1 entries — the regex tables are designed to be disjoint per language.
105/// The `Vec` return is forward-looking: a future category that legitimately co-occurs
106/// with another (e.g. `console.error(err)` as both `logging` and `error_handling`)
107/// can be added without an API break.
108///
109/// ## Dispatch order for `call_expression` / `call`
110///
111/// Priority: `async_patterns` > `logging` > `data_access`. The first match wins.
112/// The order is load-bearing only if the disjoint-regex invariant is ever relaxed.
113///
114/// ## `macro_invocation`
115///
116/// Defaults to `resource_management`. Rust logging macros (`tracing::*!`,
117/// `log::*!`, `println!`, `eprintln!`, `print!`, `eprint!`, `dbg!`) are
118/// reclassified as `logging` via [`resource_management::excludes_callee`].
119///
120/// ## Other node kinds
121///
122/// Falls through to [`category_for_node_kind`] — the existing node-kind-only table.
123///
124/// # Examples
125///
126/// ```rust
127/// use sdivi_patterns::queries::classify_hint;
128/// use sdivi_patterns::PatternHintInput;
129///
130/// let hint = PatternHintInput {
131/// node_kind: "call_expression".to_string(),
132/// text: "console.log(\"x\")".to_string(),
133/// };
134/// assert_eq!(classify_hint(&hint, "typescript"), vec!["logging"]);
135///
136/// let mac = PatternHintInput {
137/// node_kind: "macro_invocation".to_string(),
138/// text: "vec![1, 2, 3]".to_string(),
139/// };
140/// assert_eq!(classify_hint(&mac, "rust"), vec!["resource_management"]);
141/// ```
142pub fn classify_hint(hint: &PatternHintInput, language: &str) -> Vec<&'static str> {
143 match hint.node_kind.as_str() {
144 "call_expression" | "call" => {
145 if async_patterns::matches_callee(&hint.text, language) {
146 return vec!["async_patterns"];
147 }
148 if logging::matches_callee(&hint.text, language) {
149 return vec!["logging"];
150 }
151 if data_access::matches_callee(&hint.text, language) {
152 return vec!["data_access"];
153 }
154 vec![]
155 }
156 "macro_invocation" => {
157 if resource_management::excludes_callee(&hint.text, language)
158 && logging::matches_callee(&hint.text, language)
159 {
160 return vec!["logging"];
161 }
162 vec!["resource_management"]
163 }
164 other => category_for_node_kind(other, language)
165 .map(|c| vec![c])
166 .unwrap_or_default(),
167 }
168}
169
170#[cfg(test)]
171mod tests {
172 use super::*;
173
174 #[test]
175 fn try_expression_is_error_handling() {
176 assert_eq!(
177 category_for_node_kind("try_expression", "rust"),
178 Some("error_handling")
179 );
180 }
181
182 #[test]
183 fn await_expression_is_async_patterns() {
184 assert_eq!(
185 category_for_node_kind("await_expression", "rust"),
186 Some("async_patterns")
187 );
188 }
189
190 #[test]
191 fn closure_expression_is_state_management() {
192 assert_eq!(
193 category_for_node_kind("closure_expression", "rust"),
194 Some("state_management")
195 );
196 }
197
198 #[test]
199 fn macro_invocation_is_resource_management() {
200 assert_eq!(
201 category_for_node_kind("macro_invocation", "rust"),
202 Some("resource_management")
203 );
204 }
205
206 #[test]
207 fn unknown_node_kind_returns_none() {
208 assert_eq!(category_for_node_kind("unknown_xyz", "rust"), None);
209 }
210
211 #[test]
212 fn all_categories_has_eight_entries() {
213 assert_eq!(ALL_CATEGORIES.len(), 8);
214 assert!(ALL_CATEGORIES.contains(&"data_access"));
215 }
216
217 #[test]
218 fn logging_is_in_all_categories() {
219 assert!(ALL_CATEGORIES.contains(&"logging"));
220 }
221
222 #[test]
223 fn class_hierarchy_is_in_all_categories() {
224 assert!(ALL_CATEGORIES.contains(&"class_hierarchy"));
225 }
226
227 #[test]
228 fn class_declaration_is_class_hierarchy() {
229 assert_eq!(
230 category_for_node_kind("class_declaration", "typescript"),
231 Some("class_hierarchy")
232 );
233 }
234
235 #[test]
236 fn class_definition_is_class_hierarchy() {
237 assert_eq!(
238 category_for_node_kind("class_definition", "python"),
239 Some("class_hierarchy")
240 );
241 }
242
243 #[test]
244 fn impl_item_is_class_hierarchy() {
245 assert_eq!(
246 category_for_node_kind("impl_item", "rust"),
247 Some("class_hierarchy")
248 );
249 }
250
251 #[test]
252 fn interface_declaration_is_class_hierarchy() {
253 assert_eq!(
254 category_for_node_kind("interface_declaration", "java"),
255 Some("class_hierarchy")
256 );
257 }
258
259 #[test]
260 fn abstract_class_declaration_is_class_hierarchy() {
261 assert_eq!(
262 category_for_node_kind("abstract_class_declaration", "typescript"),
263 Some("class_hierarchy")
264 );
265 }
266
267 // M30 sentinel: tests `category_for_node_kind` (node-kind-only, unchanged).
268 // M33 promoted `logging` via `classify_hint`; `category_for_node_kind` is
269 // intentionally unchanged. See `tests/m33_sentinels.rs` for the M33 counterpart.
270 #[test]
271 fn category_for_node_kind_never_returns_logging() {
272 // `category_for_node_kind` never returns logging — that requires callee-text
273 // inspection (see `classify_hint`). This is unchanged through M32 and M33.
274 for kind in ["call_expression", "call", "macro_invocation"] {
275 for lang in ["rust", "python", "typescript", "javascript", "go", "java"] {
276 assert_ne!(
277 category_for_node_kind(kind, lang),
278 Some("logging"),
279 "logging is catalog-only in v0 for category_for_node_kind; \
280 routing for ({kind}, {lang}) would steal from data_access/resource_management"
281 );
282 }
283 }
284 }
285
286 // M33 positive sentinels live in `tests/m33_sentinels.rs` (file ceiling).
287
288 #[test]
289 fn call_expression_is_data_access() {
290 assert_eq!(
291 category_for_node_kind("call_expression", "typescript"),
292 Some("data_access")
293 );
294 assert_eq!(
295 category_for_node_kind("call", "python"),
296 Some("data_access")
297 );
298 }
299}