Skip to main content

sdivi_patterns/queries/
mod.rs

1//! Per-category node-kind classification rules.
2//!
3//! Each sub-module declares the tree-sitter node kinds that map to a built-in
4//! pattern category. Two classifiers are provided:
5//!
6//! - [`category_for_node_kind`] — node-kind-only, fast, does not inspect source text.
7//! - [`classify_hint`] — node-kind + callee-text, uses per-language regex tables.
8//!   Prefer this for foreign extractors that have full [`PatternHintInput`] access.
9//!
10//! Category names are stable from Milestone 6 forward. Renaming any name is a
11//! breaking change requiring a `MIGRATION_NOTES.md` entry.
12
13pub mod async_patterns;
14pub mod class_hierarchy;
15pub mod data_access;
16pub mod error_handling;
17pub mod logging;
18pub mod resource_management;
19pub mod state_management;
20pub mod type_assertions;
21
22use crate::hint_input::PatternHintInput;
23
24/// All built-in category names in stable alphabetical order.
25///
26/// Note: `logging` is a catalog-only category for `snapshot_version "1.0"`.
27/// It is present here so embedders can emit `PatternInstanceInput { category: "logging", … }`
28/// and have those instances round-trip through `compute_pattern_metrics` and
29/// `compute_delta`. [`category_for_node_kind`] never returns `Some("logging")` —
30/// the relevant node kinds overlap with `data_access` and `resource_management`.
31///
32/// # Examples
33///
34/// ```rust
35/// use sdivi_patterns::queries::ALL_CATEGORIES;
36///
37/// assert!(ALL_CATEGORIES.contains(&"error_handling"));
38/// assert!(ALL_CATEGORIES.contains(&"async_patterns"));
39/// assert!(ALL_CATEGORIES.contains(&"data_access"));
40/// assert!(ALL_CATEGORIES.contains(&"logging"));
41/// assert_eq!(ALL_CATEGORIES.len(), 8);
42/// ```
43pub const ALL_CATEGORIES: &[&str] = &[
44    "async_patterns",
45    "class_hierarchy",
46    "data_access",
47    "error_handling",
48    "logging",
49    "resource_management",
50    "state_management",
51    "type_assertions",
52];
53
54/// Maps a tree-sitter `node_kind` to the built-in category it belongs to.
55///
56/// Returns `None` if the node kind does not belong to any category.
57/// The `_language` parameter is reserved for future per-language overrides.
58///
59/// **Used internally by `classify_hint` for non-call/non-macro node kinds;
60/// the native pipeline no longer calls this function directly (as of M33).**
61/// Foreign extractors with full [`PatternHintInput`] access should call
62/// [`classify_hint`] instead. This function is preserved for callers that have a
63/// node kind but no source text — and for backward compatibility with embedders
64/// that integrated against the M29 API.
65///
66/// # Examples
67///
68/// ```rust
69/// use sdivi_patterns::queries::category_for_node_kind;
70///
71/// assert_eq!(category_for_node_kind("try_expression", "rust"), Some("error_handling"));
72/// assert_eq!(category_for_node_kind("await_expression", "rust"), Some("async_patterns"));
73/// assert_eq!(category_for_node_kind("unknown_node", "rust"), None);
74/// ```
75///
76/// # See also
77///
78/// [`classify_hint`] — callee-text-aware classifier that returns `["logging"]` for
79/// matching callees and disambiguates Rust `macro_invocation` into `logging` vs
80/// `resource_management`. This is what the native pipeline calls since M33.
81pub fn category_for_node_kind(node_kind: &str, _language: &str) -> Option<&'static str> {
82    if async_patterns::NODE_KINDS.contains(&node_kind) {
83        Some("async_patterns")
84    } else if class_hierarchy::NODE_KINDS.contains(&node_kind) {
85        Some("class_hierarchy")
86    } else if data_access::NODE_KINDS.contains(&node_kind) {
87        Some("data_access")
88    } else if error_handling::NODE_KINDS.contains(&node_kind) {
89        Some("error_handling")
90    } else if resource_management::NODE_KINDS.contains(&node_kind) {
91        Some("resource_management")
92    } else if state_management::NODE_KINDS.contains(&node_kind) {
93        Some("state_management")
94    } else if type_assertions::NODE_KINDS.contains(&node_kind) {
95        Some("type_assertions")
96    } else {
97        None
98    }
99}
100
101/// Classify a [`PatternHintInput`] using both node kind and callee-text inspection.
102///
103/// Returns a `Vec` of category name(s) the hint belongs to. In v0 the return is
104/// always 0 or 1 entries — the regex tables are designed to be disjoint per language.
105/// The `Vec` return is forward-looking: a future category that legitimately co-occurs
106/// with another (e.g. `console.error(err)` as both `logging` and `error_handling`)
107/// can be added without an API break.
108///
109/// ## Dispatch order for `call_expression` / `call`
110///
111/// Priority: `async_patterns` > `logging` > `data_access`. The first match wins.
112/// The order is load-bearing only if the disjoint-regex invariant is ever relaxed.
113///
114/// ## `macro_invocation`
115///
116/// Defaults to `resource_management`. Rust logging macros (`tracing::*!`,
117/// `log::*!`, `println!`, `eprintln!`, `print!`, `eprint!`, `dbg!`) are
118/// reclassified as `logging` via [`resource_management::excludes_callee`].
119///
120/// ## Other node kinds
121///
122/// Falls through to [`category_for_node_kind`] — the existing node-kind-only table.
123///
124/// # Examples
125///
126/// ```rust
127/// use sdivi_patterns::queries::classify_hint;
128/// use sdivi_patterns::PatternHintInput;
129///
130/// let hint = PatternHintInput {
131///     node_kind: "call_expression".to_string(),
132///     text: "console.log(\"x\")".to_string(),
133/// };
134/// assert_eq!(classify_hint(&hint, "typescript"), vec!["logging"]);
135///
136/// let mac = PatternHintInput {
137///     node_kind: "macro_invocation".to_string(),
138///     text: "vec![1, 2, 3]".to_string(),
139/// };
140/// assert_eq!(classify_hint(&mac, "rust"), vec!["resource_management"]);
141/// ```
142pub fn classify_hint(hint: &PatternHintInput, language: &str) -> Vec<&'static str> {
143    match hint.node_kind.as_str() {
144        "call_expression" | "call" => {
145            if async_patterns::matches_callee(&hint.text, language) {
146                return vec!["async_patterns"];
147            }
148            if logging::matches_callee(&hint.text, language) {
149                return vec!["logging"];
150            }
151            if data_access::matches_callee(&hint.text, language) {
152                return vec!["data_access"];
153            }
154            vec![]
155        }
156        "macro_invocation" => {
157            if resource_management::excludes_callee(&hint.text, language)
158                && logging::matches_callee(&hint.text, language)
159            {
160                return vec!["logging"];
161            }
162            vec!["resource_management"]
163        }
164        other => category_for_node_kind(other, language)
165            .map(|c| vec![c])
166            .unwrap_or_default(),
167    }
168}
169
170#[cfg(test)]
171mod tests {
172    use super::*;
173
174    #[test]
175    fn try_expression_is_error_handling() {
176        assert_eq!(
177            category_for_node_kind("try_expression", "rust"),
178            Some("error_handling")
179        );
180    }
181
182    #[test]
183    fn await_expression_is_async_patterns() {
184        assert_eq!(
185            category_for_node_kind("await_expression", "rust"),
186            Some("async_patterns")
187        );
188    }
189
190    #[test]
191    fn closure_expression_is_state_management() {
192        assert_eq!(
193            category_for_node_kind("closure_expression", "rust"),
194            Some("state_management")
195        );
196    }
197
198    #[test]
199    fn macro_invocation_is_resource_management() {
200        assert_eq!(
201            category_for_node_kind("macro_invocation", "rust"),
202            Some("resource_management")
203        );
204    }
205
206    #[test]
207    fn unknown_node_kind_returns_none() {
208        assert_eq!(category_for_node_kind("unknown_xyz", "rust"), None);
209    }
210
211    #[test]
212    fn all_categories_has_eight_entries() {
213        assert_eq!(ALL_CATEGORIES.len(), 8);
214        assert!(ALL_CATEGORIES.contains(&"data_access"));
215    }
216
217    #[test]
218    fn logging_is_in_all_categories() {
219        assert!(ALL_CATEGORIES.contains(&"logging"));
220    }
221
222    #[test]
223    fn class_hierarchy_is_in_all_categories() {
224        assert!(ALL_CATEGORIES.contains(&"class_hierarchy"));
225    }
226
227    #[test]
228    fn class_declaration_is_class_hierarchy() {
229        assert_eq!(
230            category_for_node_kind("class_declaration", "typescript"),
231            Some("class_hierarchy")
232        );
233    }
234
235    #[test]
236    fn class_definition_is_class_hierarchy() {
237        assert_eq!(
238            category_for_node_kind("class_definition", "python"),
239            Some("class_hierarchy")
240        );
241    }
242
243    #[test]
244    fn impl_item_is_class_hierarchy() {
245        assert_eq!(
246            category_for_node_kind("impl_item", "rust"),
247            Some("class_hierarchy")
248        );
249    }
250
251    #[test]
252    fn interface_declaration_is_class_hierarchy() {
253        assert_eq!(
254            category_for_node_kind("interface_declaration", "java"),
255            Some("class_hierarchy")
256        );
257    }
258
259    #[test]
260    fn abstract_class_declaration_is_class_hierarchy() {
261        assert_eq!(
262            category_for_node_kind("abstract_class_declaration", "typescript"),
263            Some("class_hierarchy")
264        );
265    }
266
267    // M30 sentinel: tests `category_for_node_kind` (node-kind-only, unchanged).
268    // M33 promoted `logging` via `classify_hint`; `category_for_node_kind` is
269    // intentionally unchanged. See `tests/m33_sentinels.rs` for the M33 counterpart.
270    #[test]
271    fn category_for_node_kind_never_returns_logging() {
272        // `category_for_node_kind` never returns logging — that requires callee-text
273        // inspection (see `classify_hint`). This is unchanged through M32 and M33.
274        for kind in ["call_expression", "call", "macro_invocation"] {
275            for lang in ["rust", "python", "typescript", "javascript", "go", "java"] {
276                assert_ne!(
277                    category_for_node_kind(kind, lang),
278                    Some("logging"),
279                    "logging is catalog-only in v0 for category_for_node_kind; \
280                     routing for ({kind}, {lang}) would steal from data_access/resource_management"
281                );
282            }
283        }
284    }
285
286    // M33 positive sentinels live in `tests/m33_sentinels.rs` (file ceiling).
287
288    #[test]
289    fn call_expression_is_data_access() {
290        assert_eq!(
291            category_for_node_kind("call_expression", "typescript"),
292            Some("data_access")
293        );
294        assert_eq!(
295            category_for_node_kind("call", "python"),
296            Some("data_access")
297        );
298    }
299}