Skip to main content

sdivi_patterns/queries/
mod.rs

1//! Per-category node-kind classification rules.
2//!
3//! Each sub-module declares the tree-sitter node kinds that map to a built-in
4//! pattern category. Two classifiers are provided:
5//!
6//! - [`category_for_node_kind`] — node-kind-only, fast, does not inspect source text.
7//! - [`classify_hint`] — node-kind + callee-text, uses per-language regex tables.
8//!   Prefer this for foreign extractors that have full [`PatternHintInput`] access.
9//!
10//! Category names are stable from Milestone 6 forward. Renaming any name is a
11//! breaking change requiring a `MIGRATION_NOTES.md` entry.
12
13pub mod async_patterns;
14pub mod class_hierarchy;
15pub mod collection_pipelines;
16pub mod comprehensions;
17pub mod concurrency;
18pub mod data_access;
19pub mod decorators;
20pub mod error_handling;
21pub mod framework_hooks;
22pub mod http_routing;
23pub mod logging;
24pub mod null_safety;
25pub mod resource_management;
26pub mod schema_validation;
27pub mod serialization;
28pub mod state_management;
29pub mod state_store;
30pub mod testing;
31pub mod type_assertions;
32
33use crate::hint_input::PatternHintInput;
34
35/// All built-in category names in stable alphabetical order.
36///
37/// ## Classification paths
38///
39/// - **Callee-text only** (via [`classify_hint`]; [`category_for_node_kind`] never returns these):
40///   `logging`, `testing`, `serialization`, `schema_validation`, `state_store`,
41///   `framework_hooks`, `http_routing`, and `collection_pipelines`.
42/// - **Node-kind only** (via [`category_for_node_kind`]; no callee-text table):
43///   `class_hierarchy`, `comprehensions`, `decorators`, `error_handling`,
44///   `null_safety`, `resource_management`, `state_management`, and `type_assertions`.
45/// - **Hybrid** (both paths active): `async_patterns` (`await_expression` node kind
46///   plus TypeScript/JavaScript Promise-chain callee regex at CALL_DISPATCH P1),
47///   `data_access` (`call_expression`/`call` node kinds plus per-language callee
48///   regex), and `concurrency` (`go_statement`/`select_statement` node kinds plus
49///   `Promise.all`/`asyncio.gather` callee regex at CALL_DISPATCH P11).
50///
51/// # Examples
52///
53/// ```rust
54/// use sdivi_patterns::queries::ALL_CATEGORIES;
55///
56/// assert!(ALL_CATEGORIES.contains(&"comprehensions"));
57/// assert!(ALL_CATEGORIES.contains(&"concurrency"));
58/// assert!(ALL_CATEGORIES.contains(&"serialization"));
59/// assert!(ALL_CATEGORIES.contains(&"testing"));
60/// assert_eq!(ALL_CATEGORIES.len(), 19);
61/// ```
62pub const ALL_CATEGORIES: &[&str] = &[
63    "async_patterns",
64    "class_hierarchy",
65    "collection_pipelines",
66    "comprehensions",
67    "concurrency",
68    "data_access",
69    "decorators",
70    "error_handling",
71    "framework_hooks",
72    "http_routing",
73    "logging",
74    "null_safety",
75    "resource_management",
76    "schema_validation",
77    "serialization",
78    "state_management",
79    "state_store",
80    "testing",
81    "type_assertions",
82];
83
84/// Maps a tree-sitter `node_kind` to the built-in category it belongs to.
85///
86/// Returns `None` if the node kind does not belong to any category.
87/// The `_language` parameter is reserved for future per-language overrides.
88///
89/// **Used internally by `classify_hint` for non-call/non-macro node kinds;
90/// the native pipeline no longer calls this function directly (as of M33).**
91/// Foreign extractors with full [`PatternHintInput`] access should call
92/// [`classify_hint`] instead. This function is preserved for callers that have a
93/// node kind but no source text — and for backward compatibility with embedders
94/// that integrated against the M29 API.
95///
96/// In particular, `call_expression` nodes always return `Some("data_access")` here because
97/// callee text is unavailable; callers that have the source text should use [`classify_hint`] to
98/// get the callee-aware result.
99///
100/// # Examples
101///
102/// ```rust
103/// use sdivi_patterns::queries::category_for_node_kind;
104///
105/// assert_eq!(category_for_node_kind("try_expression", "rust"), Some("error_handling"));
106/// assert_eq!(category_for_node_kind("await_expression", "rust"), Some("async_patterns"));
107/// assert_eq!(category_for_node_kind("unknown_node", "rust"), None);
108/// ```
109///
110/// # See also
111///
112/// [`classify_hint`] — callee-aware classifier; preferred for most callers (M33+).
113pub fn category_for_node_kind(node_kind: &str, _language: &str) -> Option<&'static str> {
114    if async_patterns::NODE_KINDS.contains(&node_kind) {
115        Some("async_patterns")
116    } else if class_hierarchy::NODE_KINDS.contains(&node_kind) {
117        Some("class_hierarchy")
118    } else if comprehensions::NODE_KINDS.contains(&node_kind) {
119        Some("comprehensions")
120    } else if concurrency::NODE_KINDS.contains(&node_kind) {
121        Some("concurrency")
122    } else if data_access::NODE_KINDS.contains(&node_kind) {
123        Some("data_access")
124    } else if decorators::NODE_KINDS.contains(&node_kind) {
125        Some("decorators")
126    } else if error_handling::NODE_KINDS.contains(&node_kind) {
127        Some("error_handling")
128    } else if null_safety::NODE_KINDS.contains(&node_kind) {
129        Some("null_safety")
130    } else if resource_management::NODE_KINDS.contains(&node_kind) {
131        Some("resource_management")
132    } else if state_management::NODE_KINDS.contains(&node_kind) {
133        Some("state_management")
134    } else if type_assertions::NODE_KINDS.contains(&node_kind) {
135        Some("type_assertions")
136    } else {
137        None
138    }
139}
140
141// Dispatch order: P1 > P2=testing > P3=serialization > P4=schema_validation > P5=state_store > P6=framework_hooks > P7=http_routing > P8=logging > P9=data_access > P10=collection_pipelines > P11=concurrency; future milestones insert at their slot
142#[allow(clippy::type_complexity)] // fn pointer tuple slice; each entry is one category
143const CALL_DISPATCH: &[(&str, fn(&str, &str) -> bool)] = &[
144    ("async_patterns", async_patterns::matches_callee),
145    ("testing", testing::matches_callee),
146    ("serialization", serialization::matches_callee),
147    ("schema_validation", schema_validation::matches_callee),
148    ("state_store", state_store::matches_callee),
149    ("framework_hooks", framework_hooks::matches_callee),
150    ("http_routing", http_routing::matches_callee),
151    ("logging", logging::matches_callee),
152    ("data_access", data_access::matches_callee),
153    ("collection_pipelines", collection_pipelines::matches_callee),
154    ("concurrency", concurrency::matches_callee),
155];
156
157/// Classify a [`PatternHintInput`] using both node kind and callee-text inspection.
158///
159/// Returns a `Vec` of category name(s) the hint belongs to. In v0 the return is
160/// always 0 or 1 entries — the regex tables are designed to be disjoint per language.
161/// The `Vec` return is forward-looking: a future category that legitimately co-occurs
162/// with another (e.g. `console.error(err)` as both `logging` and `error_handling`)
163/// can be added without an API break.
164///
165/// ## Dispatch order for `call_expression` / `call`
166///
167/// Iterates [`CALL_DISPATCH`] in order; first match wins (P1/P2/P3/P4/P5/P6/P7/P8/P9/P10/P11 active at M44).
168///
169/// ## `macro_invocation`
170///
171/// Defaults to `resource_management`. Rust logging macros (`tracing::*!`,
172/// `log::*!`, `println!`, `eprintln!`, `print!`, `eprint!`, `dbg!`) are
173/// reclassified as `logging` via [`resource_management::excludes_callee`].
174///
175/// ## Other node kinds
176///
177/// Falls through to [`category_for_node_kind`] — the existing node-kind-only table.
178///
179/// # Examples
180///
181/// ```rust
182/// use sdivi_patterns::queries::classify_hint;
183/// use sdivi_patterns::PatternHintInput;
184///
185/// let hint = PatternHintInput {
186///     node_kind: "call_expression".to_string(),
187///     text: "console.log(\"x\")".to_string(),
188/// };
189/// assert_eq!(classify_hint(&hint, "typescript"), vec!["logging"]);
190///
191/// let mac = PatternHintInput {
192///     node_kind: "macro_invocation".to_string(),
193///     text: "vec![1, 2, 3]".to_string(),
194/// };
195/// assert_eq!(classify_hint(&mac, "rust"), vec!["resource_management"]);
196/// ```
197pub fn classify_hint(hint: &PatternHintInput, language: &str) -> Vec<&'static str> {
198    match hint.node_kind.as_str() {
199        "call_expression" | "call" => {
200            for &(category, matches) in CALL_DISPATCH {
201                if matches(&hint.text, language) {
202                    return vec![category];
203                }
204            }
205            vec![]
206        }
207        "macro_invocation" => {
208            if resource_management::excludes_callee(&hint.text, language)
209                && logging::matches_callee(&hint.text, language)
210            {
211                return vec!["logging"];
212            }
213            vec!["resource_management"]
214        }
215        other => category_for_node_kind(other, language)
216            .map(|c| vec![c])
217            .unwrap_or_default(),
218    }
219}
220
221#[cfg(test)]
222mod tests;
223#[cfg(test)]
224mod tests_m45_2;