sdivi_patterns/queries/schema_validation.rs
1//! Callee-text classification for runtime schema / validation declarations.
2//!
3//! Detects schema-library call sites in TypeScript/JavaScript (Zod, Yup,
4//! Valibot, io-ts, Superstruct) and Pydantic field functions in Python.
5//! Detection is callee-text only — no tree-sitter node-kind matching.
6//! `call_expression`/`call` nodes are already collected by the TS/JS/Python
7//! adapters; this module provides callee-text discrimination in `CALL_DISPATCH`
8//! at slot P4.
9//!
10//! ## Language support
11//!
12//! - **TypeScript / JavaScript:** Zod (`z.object`, `z.string`, `z.enum`),
13//! Yup (`yup.object()`, `yup.string()`), Valibot (`v.object`, `v.pipe`),
14//! Superstruct (`s.object`). Detected via the namespace prefix regex
15//! `^(z|yup|v|s)\.\w`. Additionally, `.safeParse(` is matched as a
16//! Zod-specific validated-parse call.
17//!
18//! - **Python:** Pydantic `Field(...)`, `constr(...)`, `conint(...)` calls.
19//! Note: `class Foo(BaseModel)` is a `class_definition` already counted
20//! under `class_hierarchy`; only the *call* forms are captured here.
21//!
22//! ## Precision over recall
23//!
24//! The TS/JS regex is anchored to the schema library *namespace* (`z.`/`yup.`/`v.`/`s.`)
25//! rather than method names alone. This deliberately avoids bare `.string()`/`.object()`
26//! calls on arbitrary receivers — do not (re)introduce `\.(object|string|array|…)\(`,
27//! which floods the bucket. The trade-off: `SomeSchema.parse(x)` where the receiver
28//! name is arbitrary is not captured — receiver-type info SDIVI does not compute.
29//! Document this known recall gap.
30//!
31//! ## Pydantic class coverage
32//!
33//! `class Foo(BaseModel)` is a `class_definition` node, already counted under
34//! `class_hierarchy` (M6). Python coverage here is intentionally partial:
35//! only call forms (`Field(...)`, `constr(...)`, `conint(...)`) are classified.
36//! class-validator decorators (`@IsString()`) belong to `decorators` (M36.1/M36.2);
37//! see `docs/pattern-categories.md` for the intentional split.
38
39use std::sync::LazyLock;
40
41use regex::Regex;
42
43/// Tree-sitter node kinds for schema-validation patterns.
44///
45/// Empty — this category is detected entirely via callee-text inspection in
46/// [`matches_callee`]. The `call_expression` and `call` node kinds are already
47/// collected by the TypeScript/JavaScript/Python adapters; classification happens
48/// in `classify_hint`'s `CALL_DISPATCH` loop at slot P4.
49pub const NODE_KINDS: &[&str] = &[];
50
51// TypeScript / JavaScript:
52// ^(z|yup|v|s)\.\w — namespace-anchored: Zod (z.), Yup (yup.), Valibot (v.),
53// Superstruct (s.) followed by any word character.
54// \.safeParse\( — Zod-specific validated-parse call; receiver is arbitrary
55// but `.safeParse(` uniquely identifies schema parsing.
56static TS_JS_RE: LazyLock<Regex> = LazyLock::new(|| {
57 Regex::new(r"^(z|yup|v|s)\.\w|\.safeParse\(").expect("schema_validation TS/JS regex is valid")
58});
59
60// Python:
61// \bField\( — Pydantic Field() constructor
62// \bconstr\( — Pydantic string constraint helper
63// \bconint\( — Pydantic integer constraint helper
64static PYTHON_RE: LazyLock<Regex> = LazyLock::new(|| {
65 Regex::new(r"\bField\(|\bconstr\(|\bconint\(").expect("schema_validation Python regex is valid")
66});
67
68/// Return `true` when `text` looks like a schema-validation callee for `language`.
69///
70/// TypeScript and JavaScript share one regex table (namespace-anchored library
71/// prefixes and `.safeParse(`); Python detects Pydantic field-constraint calls.
72/// Rust, Go, and Java always return `false` in v0 — schema-library detection
73/// for those languages is deferred.
74///
75/// # Examples
76///
77/// ```rust
78/// use sdivi_patterns::queries::schema_validation::matches_callee;
79///
80/// assert!(matches_callee("z.object({})", "typescript"));
81/// assert!(matches_callee("yup.string().required()", "javascript"));
82/// assert!(matches_callee("UserSchema.safeParse(input)", "typescript"));
83/// assert!(matches_callee("Field(default=0)", "python"));
84/// assert!(!matches_callee("Math.max(a, b)", "typescript"));
85/// assert!(!matches_callee("len(x)", "python"));
86/// ```
87pub fn matches_callee(text: &str, language: &str) -> bool {
88 match language {
89 "typescript" | "javascript" => TS_JS_RE.is_match(text),
90 "python" => PYTHON_RE.is_match(text),
91 _ => false,
92 }
93}
94
95#[cfg(test)]
96mod tests {
97 use super::*;
98
99 #[test]
100 fn zod_namespace_matches_typescript() {
101 for callee in [
102 "z.object({})",
103 "z.string()",
104 "z.enum(['a','b'])",
105 "z.union([])",
106 ] {
107 assert!(
108 matches_callee(callee, "typescript"),
109 "{callee:?} should match for typescript"
110 );
111 }
112 }
113
114 #[test]
115 fn yup_namespace_matches() {
116 assert!(matches_callee("yup.object().shape({})", "typescript"));
117 assert!(matches_callee("yup.string().required()", "javascript"));
118 }
119
120 #[test]
121 fn valibot_namespace_matches() {
122 assert!(matches_callee("v.object({})", "typescript"));
123 assert!(matches_callee(
124 "v.pipe(v.string(), v.minLength(1))",
125 "javascript"
126 ));
127 }
128
129 #[test]
130 fn superstruct_namespace_matches() {
131 assert!(matches_callee("s.object({})", "typescript"));
132 }
133
134 #[test]
135 fn safe_parse_matches() {
136 assert!(matches_callee("UserSchema.safeParse(input)", "typescript"));
137 assert!(matches_callee("schema.safeParse(data)", "javascript"));
138 }
139
140 #[test]
141 fn pydantic_field_matches_python() {
142 assert!(matches_callee("Field(default=0)", "python"));
143 assert!(matches_callee("Field(...)", "python"));
144 assert!(matches_callee("constr(min_length=1)", "python"));
145 assert!(matches_callee("conint(gt=0)", "python"));
146 }
147
148 #[test]
149 fn bare_method_call_does_not_match() {
150 assert!(!matches_callee(".string()", "typescript"));
151 assert!(!matches_callee(".object()", "typescript"));
152 assert!(!matches_callee("parse(x)", "typescript"));
153 assert!(!matches_callee("Math.max(a, b)", "typescript"));
154 }
155
156 #[test]
157 fn generic_python_calls_do_not_match() {
158 assert!(!matches_callee("len(x)", "python"));
159 assert!(!matches_callee("open(path)", "python"));
160 }
161
162 #[test]
163 fn other_languages_return_false() {
164 for lang in ["rust", "go", "java"] {
165 assert!(
166 !matches_callee("z.object({})", lang),
167 "z.object should not match for {lang}"
168 );
169 }
170 }
171
172 #[test]
173 fn node_kinds_is_empty() {
174 // NODE_KINDS is intentionally empty: this category is callee-only (classified
175 // via classify_hint). The assertion guards that contract against regressions.
176 #[allow(clippy::const_is_empty)]
177 let empty = NODE_KINDS.is_empty();
178 assert!(empty);
179 }
180}