Skip to main content

sdivi_patterns/queries/
serialization.rs

1//! Callee-text classification for serialization boundary calls.
2//!
3//! Detects (de)serialization calls with anchored receiver names:
4//!
5//! - **TypeScript / JavaScript:** `JSON.parse(…)`, `JSON.stringify(…)`,
6//!   `structuredClone(…)`.
7//! - **Python:** `json.loads(…)`, `json.dumps(…)`, `json.load(…)`, `json.dump(…)`,
8//!   `pickle.loads(…)`, `pickle.dumps(…)`.
9//! - **Go:** `json.Marshal(…)`, `json.Unmarshal(…)`, `json.MarshalIndent(…)`,
10//!   `json.NewEncoder(…)`, `json.NewDecoder(…)`.
11//!
12//! ## CALL_DISPATCH slot
13//!
14//! Registered at P3 — below `testing` (P2) and above `schema_validation` (P4).
15//! Receiver-anchored and specific: resolves before broader categories.
16//!
17//! ## Design: receiver-anchored only
18//!
19//! Bare `.parse(` is intentionally not matched — it collides with schema validators
20//! (`schema.parse`) and config parsers. Only calls whose receiver is `JSON`, `json`,
21//! or `pickle` are classified here.
22//!
23//! ## Seeds forward
24//!
25//! Protobuf/Avro/MessagePack codecs and ORM serialize hooks are adjacent; defer
26//! until requested.
27
28use std::sync::LazyLock;
29
30use regex::Regex;
31
32/// Tree-sitter node kinds for serialization patterns.
33///
34/// Empty — detection is entirely via callee-text inspection in [`matches_callee`].
35/// Classification happens in `classify_hint`'s `CALL_DISPATCH` loop at slot P3.
36pub const NODE_KINDS: &[&str] = &[];
37
38// TypeScript / JavaScript — JSON built-in and structuredClone.
39// Anchored at `^` on the `JSON.` receiver; structuredClone is a bare global.
40static TS_JS_RE: LazyLock<Regex> = LazyLock::new(|| {
41    Regex::new(r"^(JSON\.(parse|stringify)\(|structuredClone\()")
42        .expect("serialization TS/JS regex is valid")
43});
44
45// Python — json and pickle stdlib modules.
46static PYTHON_RE: LazyLock<Regex> = LazyLock::new(|| {
47    Regex::new(r"^(json|pickle)\.(loads|dumps|load|dump)\(")
48        .expect("serialization Python regex is valid")
49});
50
51// Go — encoding/json standard library.
52static GO_RE: LazyLock<Regex> = LazyLock::new(|| {
53    Regex::new(r"^json\.(Marshal|Unmarshal|MarshalIndent|NewEncoder|NewDecoder)\(")
54        .expect("serialization Go regex is valid")
55});
56
57/// Return `true` when `text` looks like a (de)serialization boundary call.
58///
59/// Covers `JSON.parse`/`JSON.stringify`/`structuredClone` (TS/JS), `json.*`/`pickle.*`
60/// (Python), and `json.*` encoding calls (Go). See module doc for the receiver-anchored
61/// design rationale.
62///
63/// # Examples
64///
65/// ```rust
66/// use sdivi_patterns::queries::serialization::matches_callee;
67///
68/// assert!(matches_callee("JSON.parse(s)", "typescript"));
69/// assert!(matches_callee("JSON.stringify(obj)", "javascript"));
70/// assert!(matches_callee("structuredClone(data)", "typescript"));
71/// assert!(matches_callee("json.loads(s)", "python"));
72/// assert!(matches_callee("json.dumps(o)", "python"));
73/// assert!(matches_callee("pickle.dumps(o)", "python"));
74/// assert!(matches_callee("json.Marshal(v)", "go"));
75/// assert!(matches_callee("json.Unmarshal(b, &v)", "go"));
76/// assert!(matches_callee("json.NewDecoder(r)", "go"));
77/// assert!(!matches_callee("schema.parse(x)", "typescript"));
78/// assert!(!matches_callee("JSON.error(x)", "typescript"));
79/// assert!(!matches_callee("len(x)", "python"));
80/// ```
81pub fn matches_callee(text: &str, language: &str) -> bool {
82    match language {
83        "typescript" | "javascript" => TS_JS_RE.is_match(text),
84        "python" => PYTHON_RE.is_match(text),
85        "go" => GO_RE.is_match(text),
86        _ => false,
87    }
88}
89
90#[cfg(test)]
91mod tests {
92    use super::*;
93
94    #[test]
95    fn json_parse_matches_ts() {
96        assert!(matches_callee("JSON.parse(s)", "typescript"));
97    }
98
99    #[test]
100    fn json_stringify_matches_js() {
101        assert!(matches_callee("JSON.stringify(obj)", "javascript"));
102    }
103
104    #[test]
105    fn structured_clone_matches_ts() {
106        assert!(matches_callee("structuredClone(data)", "typescript"));
107    }
108
109    #[test]
110    fn json_parse_matches_js() {
111        assert!(matches_callee("JSON.parse(text)", "javascript"));
112    }
113
114    #[test]
115    fn json_loads_matches_python() {
116        assert!(matches_callee("json.loads(s)", "python"));
117    }
118
119    #[test]
120    fn json_dumps_matches_python() {
121        assert!(matches_callee("json.dumps(o)", "python"));
122    }
123
124    #[test]
125    fn json_load_matches_python() {
126        assert!(matches_callee("json.load(f)", "python"));
127    }
128
129    #[test]
130    fn json_dump_matches_python() {
131        assert!(matches_callee("json.dump(o, f)", "python"));
132    }
133
134    #[test]
135    fn pickle_loads_matches_python() {
136        assert!(matches_callee("pickle.loads(b)", "python"));
137    }
138
139    #[test]
140    fn pickle_dumps_matches_python() {
141        assert!(matches_callee("pickle.dumps(o)", "python"));
142    }
143
144    #[test]
145    fn json_marshal_matches_go() {
146        assert!(matches_callee("json.Marshal(v)", "go"));
147    }
148
149    #[test]
150    fn json_unmarshal_matches_go() {
151        assert!(matches_callee("json.Unmarshal(b, &v)", "go"));
152    }
153
154    #[test]
155    fn json_marshal_indent_matches_go() {
156        assert!(matches_callee("json.MarshalIndent(v, \"\", \"  \")", "go"));
157    }
158
159    #[test]
160    fn json_new_encoder_matches_go() {
161        assert!(matches_callee("json.NewEncoder(w)", "go"));
162    }
163
164    #[test]
165    fn json_new_decoder_matches_go() {
166        assert!(matches_callee("json.NewDecoder(r)", "go"));
167    }
168
169    #[test]
170    fn schema_parse_does_not_match_ts() {
171        assert!(!matches_callee("schema.parse(x)", "typescript"));
172    }
173
174    #[test]
175    fn json_other_method_does_not_match_ts() {
176        assert!(!matches_callee("JSON.error(x)", "typescript"));
177    }
178
179    #[test]
180    fn len_does_not_match_python() {
181        assert!(!matches_callee("len(x)", "python"));
182    }
183
184    #[test]
185    fn requests_does_not_match_python() {
186        assert!(!matches_callee("requests.get(url)", "python"));
187    }
188
189    #[test]
190    fn json_other_does_not_match_go() {
191        assert!(!matches_callee("json.Something(v)", "go"));
192    }
193
194    #[test]
195    fn fmt_println_does_not_match_go() {
196        assert!(!matches_callee("fmt.Println(x)", "go"));
197    }
198
199    #[test]
200    fn rust_returns_false() {
201        assert!(!matches_callee("serde_json::to_string(&v)", "rust"));
202    }
203
204    #[test]
205    fn node_kinds_is_empty() {
206        // NODE_KINDS is intentionally empty: this category is callee-only (classified
207        // via classify_hint). The assertion guards that contract against regressions.
208        #[allow(clippy::const_is_empty)]
209        let empty = NODE_KINDS.is_empty();
210        assert!(empty);
211    }
212}