Skip to main content

sdivi_patterns/queries/
schema_validation.rs

1//! Callee-text classification for runtime schema / validation declarations.
2//!
3//! Detects schema-library call sites in TypeScript/JavaScript (Zod, Yup,
4//! Valibot, io-ts, Superstruct) and Pydantic field functions in Python.
5//! Detection is callee-text only — no tree-sitter node-kind matching.
6//! `call_expression`/`call` nodes are already collected by the TS/JS/Python
7//! adapters; this module provides callee-text discrimination in `CALL_DISPATCH`
8//! at slot P4.
9//!
10//! ## Language support
11//!
12//! - **TypeScript / JavaScript:** Zod (`z.object`, `z.string`, `z.enum`),
13//!   Yup (`yup.object()`, `yup.string()`), Valibot (`v.object`, `v.pipe`),
14//!   Superstruct (`s.object`). Detected via the namespace prefix regex
15//!   `^(z|yup|v|s)\.\w`. Additionally, `.safeParse(` is matched as a
16//!   Zod-specific validated-parse call.
17//!
18//! - **Python:** Pydantic `Field(...)`, `constr(...)`, `conint(...)` calls.
19//!   Note: `class Foo(BaseModel)` is a `class_definition` already counted
20//!   under `class_hierarchy`; only the *call* forms are captured here.
21//!
22//! ## Precision over recall
23//!
24//! The TS/JS regex is anchored to the schema library *namespace* (`z.`/`yup.`/`v.`/`s.`)
25//! rather than method names alone. This deliberately avoids bare `.string()`/`.object()`
26//! calls on arbitrary receivers — do not (re)introduce `\.(object|string|array|…)\(`,
27//! which floods the bucket. The trade-off: `SomeSchema.parse(x)` where the receiver
28//! name is arbitrary is not captured — receiver-type info SDIVI does not compute.
29//! Document this known recall gap.
30//!
31//! ## Pydantic class coverage
32//!
33//! `class Foo(BaseModel)` is a `class_definition` node, already counted under
34//! `class_hierarchy` (M6). Python coverage here is intentionally partial:
35//! only call forms (`Field(...)`, `constr(...)`, `conint(...)`) are classified.
36//! class-validator decorators (`@IsString()`) belong to `decorators` (M36.1/M36.2);
37//! see `docs/pattern-categories.md` for the intentional split.
38
39use std::sync::LazyLock;
40
41use regex::Regex;
42
43/// Tree-sitter node kinds for schema-validation patterns.
44///
45/// Empty — this category is detected entirely via callee-text inspection in
46/// [`matches_callee`]. The `call_expression` and `call` node kinds are already
47/// collected by the TypeScript/JavaScript/Python adapters; classification happens
48/// in `classify_hint`'s `CALL_DISPATCH` loop at slot P4.
49pub const NODE_KINDS: &[&str] = &[];
50
51// TypeScript / JavaScript:
52//   ^(z|yup|v|s)\.\w  — namespace-anchored: Zod (z.), Yup (yup.), Valibot (v.),
53//                        Superstruct (s.) followed by any word character.
54//   \.safeParse\(     — Zod-specific validated-parse call; receiver is arbitrary
55//                        but `.safeParse(` uniquely identifies schema parsing.
56static TS_JS_RE: LazyLock<Regex> = LazyLock::new(|| {
57    Regex::new(r"^(z|yup|v|s)\.\w|\.safeParse\(").expect("schema_validation TS/JS regex is valid")
58});
59
60// Python:
61//   \bField\(   — Pydantic Field() constructor
62//   \bconstr\(  — Pydantic string constraint helper
63//   \bconint\(  — Pydantic integer constraint helper
64static PYTHON_RE: LazyLock<Regex> = LazyLock::new(|| {
65    Regex::new(r"\bField\(|\bconstr\(|\bconint\(").expect("schema_validation Python regex is valid")
66});
67
68/// Return `true` when `text` looks like a schema-validation callee for `language`.
69///
70/// TypeScript and JavaScript share one regex table (namespace-anchored library
71/// prefixes and `.safeParse(`); Python detects Pydantic field-constraint calls.
72/// Rust, Go, and Java always return `false` in v0 — schema-library detection
73/// for those languages is deferred.
74///
75/// # Examples
76///
77/// ```rust
78/// use sdivi_patterns::queries::schema_validation::matches_callee;
79///
80/// assert!(matches_callee("z.object({})", "typescript"));
81/// assert!(matches_callee("yup.string().required()", "javascript"));
82/// assert!(matches_callee("UserSchema.safeParse(input)", "typescript"));
83/// assert!(matches_callee("Field(default=0)", "python"));
84/// assert!(!matches_callee("Math.max(a, b)", "typescript"));
85/// assert!(!matches_callee("len(x)", "python"));
86/// ```
87pub fn matches_callee(text: &str, language: &str) -> bool {
88    match language {
89        "typescript" | "javascript" => TS_JS_RE.is_match(text),
90        "python" => PYTHON_RE.is_match(text),
91        _ => false,
92    }
93}
94
95#[cfg(test)]
96mod tests {
97    use super::*;
98
99    #[test]
100    fn zod_namespace_matches_typescript() {
101        for callee in [
102            "z.object({})",
103            "z.string()",
104            "z.enum(['a','b'])",
105            "z.union([])",
106        ] {
107            assert!(
108                matches_callee(callee, "typescript"),
109                "{callee:?} should match for typescript"
110            );
111        }
112    }
113
114    #[test]
115    fn yup_namespace_matches() {
116        assert!(matches_callee("yup.object().shape({})", "typescript"));
117        assert!(matches_callee("yup.string().required()", "javascript"));
118    }
119
120    #[test]
121    fn valibot_namespace_matches() {
122        assert!(matches_callee("v.object({})", "typescript"));
123        assert!(matches_callee(
124            "v.pipe(v.string(), v.minLength(1))",
125            "javascript"
126        ));
127    }
128
129    #[test]
130    fn superstruct_namespace_matches() {
131        assert!(matches_callee("s.object({})", "typescript"));
132    }
133
134    #[test]
135    fn safe_parse_matches() {
136        assert!(matches_callee("UserSchema.safeParse(input)", "typescript"));
137        assert!(matches_callee("schema.safeParse(data)", "javascript"));
138    }
139
140    #[test]
141    fn pydantic_field_matches_python() {
142        assert!(matches_callee("Field(default=0)", "python"));
143        assert!(matches_callee("Field(...)", "python"));
144        assert!(matches_callee("constr(min_length=1)", "python"));
145        assert!(matches_callee("conint(gt=0)", "python"));
146    }
147
148    #[test]
149    fn bare_method_call_does_not_match() {
150        assert!(!matches_callee(".string()", "typescript"));
151        assert!(!matches_callee(".object()", "typescript"));
152        assert!(!matches_callee("parse(x)", "typescript"));
153        assert!(!matches_callee("Math.max(a, b)", "typescript"));
154    }
155
156    #[test]
157    fn generic_python_calls_do_not_match() {
158        assert!(!matches_callee("len(x)", "python"));
159        assert!(!matches_callee("open(path)", "python"));
160    }
161
162    #[test]
163    fn other_languages_return_false() {
164        for lang in ["rust", "go", "java"] {
165            assert!(
166                !matches_callee("z.object({})", lang),
167                "z.object should not match for {lang}"
168            );
169        }
170    }
171
172    #[test]
173    fn node_kinds_is_empty() {
174        // NODE_KINDS is intentionally empty: this category is callee-only (classified
175        // via classify_hint). The assertion guards that contract against regressions.
176        #[allow(clippy::const_is_empty)]
177        let empty = NODE_KINDS.is_empty();
178        assert!(empty);
179    }
180}