1use serde_json::Value;
2
3#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
4pub struct InputCapabilityScan {
5 pub requires_filesystem: bool,
6 pub requires_network: bool,
7 pub requires_environment: bool,
8}
9
10#[derive(Debug, Clone)]
11struct StringToken {
12 key: Option<String>,
13 in_path_context: bool,
14 in_model_context: bool,
15 value: String,
16}
17
18pub fn scan_input_capabilities(input: &Value) -> InputCapabilityScan {
19 let mut scan = InputCapabilityScan::default();
20 let mut tokens = Vec::new();
21 collect_strings(input, None, false, false, &mut tokens, &mut scan);
22
23 for token in tokens {
24 let value = token.value.trim();
25 if value.is_empty() {
26 continue;
27 }
28 if is_url(value) {
29 scan.requires_network = true;
30 }
31 if looks_like_filesystem_path(
32 value,
33 token.key.as_deref(),
34 token.in_path_context,
35 token.in_model_context,
36 ) {
37 scan.requires_filesystem = true;
38 }
39 }
40
41 scan
42}
43
44fn is_path_key(key: &str) -> bool {
45 ["path", "file", "filepath", "directory", "dir", "filename"].contains(&key)
46}
47
48fn is_model_key(key: &str) -> bool {
49 [
50 "model",
51 "model_id",
52 "provider",
53 "provider_model",
54 "engine",
55 "primary",
56 "fallback",
57 "model_name",
58 ]
59 .contains(&key)
60}
61
62fn is_network_key(key: &str) -> bool {
63 ["url", "endpoint", "host", "api"].contains(&key)
64}
65
66fn is_environment_key(key: &str) -> bool {
67 ["env", "environment", "env_var", "env_key"].contains(&key)
68}
69
70fn is_url(v: &str) -> bool {
71 let lower = v.trim().to_ascii_lowercase();
72 lower.starts_with("http://")
73 || lower.starts_with("https://")
74 || lower.starts_with("ws://")
75 || lower.starts_with("wss://")
76}
77
78fn collect_strings(
79 value: &Value,
80 key_ctx: Option<&str>,
81 in_path_context: bool,
82 in_model_context: bool,
83 out: &mut Vec<StringToken>,
84 scan: &mut InputCapabilityScan,
85) {
86 match value {
87 Value::String(s) => out.push(StringToken {
88 key: key_ctx.map(|k| k.to_string()),
89 in_path_context,
90 in_model_context,
91 value: s.clone(),
92 }),
93 Value::Array(arr) => {
94 for item in arr {
95 collect_strings(item, key_ctx, in_path_context, in_model_context, out, scan);
96 }
97 }
98 Value::Object(map) => {
99 for (key, item) in map {
100 let lower_key = key.to_lowercase();
101 if is_network_key(&lower_key) {
102 scan.requires_network = true;
103 }
104 if is_environment_key(&lower_key) {
105 scan.requires_environment = true;
106 }
107
108 let next_path_context = in_path_context || is_path_key(&lower_key);
109 let next_model_context = in_model_context || is_model_key(&lower_key);
110 collect_strings(
111 item,
112 Some(&lower_key),
113 next_path_context,
114 next_model_context,
115 out,
116 scan,
117 );
118 }
119 }
120 _ => {}
121 }
122}
123
124fn looks_like_filesystem_path(
125 v: &str,
126 key: Option<&str>,
127 in_path_context: bool,
128 in_model_context: bool,
129) -> bool {
130 if in_path_context || key.is_some_and(is_path_key) {
131 return true;
132 }
133 if is_url(v) {
134 return false;
135 }
136 if v.starts_with('/')
137 || v.starts_with("./")
138 || v.starts_with("../")
139 || v.starts_with("~/")
140 || v.starts_with(".\\")
141 || v.starts_with("..\\")
142 || v.starts_with("~\\")
143 || v.starts_with("\\\\")
144 {
145 return true;
146 }
147 if v.len() > 2
148 && v.as_bytes().get(1) == Some(&b':')
149 && matches!(v.as_bytes().get(2), Some(b'\\' | b'/'))
150 {
151 return v.as_bytes()[0].is_ascii_alphabetic();
152 }
153 if in_model_context {
154 return false;
155 }
156 if v.contains('/') && !key.is_some_and(is_model_key) {
158 return true;
159 }
160 false
161}
162
163#[cfg(test)]
164mod tests {
165 use super::*;
166 use serde_json::json;
167
168 #[test]
169 fn url_and_websocket_values_require_network_only() {
170 let scan = scan_input_capabilities(&json!({
171 "endpoint": "https://example.com/v1",
172 "socket": "wss://stream.example.com",
173 }));
174 assert!(scan.requires_network);
175 assert!(!scan.requires_filesystem);
176 }
177
178 #[test]
179 fn model_identifier_is_not_filesystem_but_explicit_path_is() {
180 let model_scan = scan_input_capabilities(&json!({"model": "openai/gpt-4o"}));
181 assert!(!model_scan.requires_filesystem);
182
183 let model_path_scan = scan_input_capabilities(&json!({"model": "/etc/passwd"}));
184 assert!(model_path_scan.requires_filesystem);
185 }
186
187 #[test]
188 fn regex_like_string_is_not_filesystem() {
189 let scan = scan_input_capabilities(&json!({"pattern": "\\d+\\w+\\s*"}));
190 assert!(!scan.requires_filesystem);
191 }
192
193 #[test]
194 fn environment_keys_require_environment_capability() {
195 let scan = scan_input_capabilities(&json!({"env_var": "SECRET_TOKEN"}));
196 assert!(scan.requires_environment);
197 }
198
199 #[test]
202 fn is_url_detects_http() {
203 assert!(is_url("http://example.com"));
204 assert!(is_url("HTTP://EXAMPLE.COM"));
205 }
206
207 #[test]
208 fn is_url_detects_https() {
209 assert!(is_url("https://example.com/path"));
210 }
211
212 #[test]
213 fn is_url_detects_ws() {
214 assert!(is_url("ws://localhost:8080"));
215 assert!(is_url("wss://secure.example.com"));
216 }
217
218 #[test]
219 fn is_url_rejects_non_urls() {
220 assert!(!is_url("not a url"));
221 assert!(!is_url("/etc/passwd"));
222 assert!(!is_url("ftp://something"));
223 assert!(!is_url(""));
224 }
225
226 #[test]
229 fn path_context_always_returns_true() {
230 assert!(looks_like_filesystem_path("anything", None, true, false));
231 }
232
233 #[test]
234 fn path_key_always_returns_true() {
235 assert!(looks_like_filesystem_path(
236 "anything",
237 Some("path"),
238 false,
239 false
240 ));
241 assert!(looks_like_filesystem_path(
242 "anything",
243 Some("file"),
244 false,
245 false
246 ));
247 assert!(looks_like_filesystem_path(
248 "anything",
249 Some("directory"),
250 false,
251 false
252 ));
253 assert!(looks_like_filesystem_path(
254 "anything",
255 Some("dir"),
256 false,
257 false
258 ));
259 assert!(looks_like_filesystem_path(
260 "anything",
261 Some("filename"),
262 false,
263 false
264 ));
265 assert!(looks_like_filesystem_path(
266 "anything",
267 Some("filepath"),
268 false,
269 false
270 ));
271 }
272
273 #[test]
274 fn url_is_not_filesystem_path() {
275 assert!(!looks_like_filesystem_path(
276 "https://example.com",
277 None,
278 false,
279 false
280 ));
281 }
282
283 #[test]
284 fn absolute_paths_detected() {
285 assert!(looks_like_filesystem_path(
286 "/etc/passwd",
287 None,
288 false,
289 false
290 ));
291 assert!(looks_like_filesystem_path("./relative", None, false, false));
292 assert!(looks_like_filesystem_path("../parent", None, false, false));
293 assert!(looks_like_filesystem_path("~/home", None, false, false));
294 }
295
296 #[test]
297 fn backslash_paths_detected() {
298 assert!(looks_like_filesystem_path(".\\windows", None, false, false));
299 assert!(looks_like_filesystem_path("..\\parent", None, false, false));
300 assert!(looks_like_filesystem_path("~\\user", None, false, false));
301 assert!(looks_like_filesystem_path(
302 "\\\\server\\share",
303 None,
304 false,
305 false
306 ));
307 }
308
309 #[test]
310 fn windows_drive_path_detected() {
311 assert!(looks_like_filesystem_path(
312 "C:\\Users\\test",
313 None,
314 false,
315 false
316 ));
317 assert!(looks_like_filesystem_path("D:/path", None, false, false));
318 }
319
320 #[test]
321 fn model_context_suppresses_slash_heuristic() {
322 assert!(!looks_like_filesystem_path(
323 "openai/gpt-4",
324 None,
325 false,
326 true
327 ));
328 }
329
330 #[test]
331 fn slash_separated_without_model_context_is_path() {
332 assert!(looks_like_filesystem_path(
333 "some/path/here",
334 None,
335 false,
336 false
337 ));
338 }
339
340 #[test]
341 fn model_key_suppresses_slash_heuristic() {
342 assert!(!looks_like_filesystem_path(
343 "openai/gpt-4",
344 Some("model"),
345 false,
346 false
347 ));
348 }
349
350 #[test]
351 fn plain_string_is_not_path() {
352 assert!(!looks_like_filesystem_path(
353 "hello world",
354 None,
355 false,
356 false
357 ));
358 }
359
360 #[test]
363 fn helper_key_functions() {
364 assert!(is_path_key("path"));
365 assert!(is_path_key("file"));
366 assert!(!is_path_key("name"));
367
368 assert!(is_model_key("model"));
369 assert!(is_model_key("engine"));
370 assert!(is_model_key("primary"));
371 assert!(!is_model_key("name"));
372
373 assert!(is_network_key("url"));
374 assert!(is_network_key("endpoint"));
375 assert!(!is_network_key("name"));
376
377 assert!(is_environment_key("env"));
378 assert!(is_environment_key("env_var"));
379 assert!(!is_environment_key("name"));
380 }
381
382 #[test]
385 fn nested_object_with_path_context() {
386 let scan = scan_input_capabilities(&json!({
387 "file": {
388 "name": "config.toml"
389 }
390 }));
391 assert!(scan.requires_filesystem);
392 }
393
394 #[test]
395 fn array_values_scanned() {
396 let scan = scan_input_capabilities(&json!({
397 "urls": ["https://a.com", "https://b.com"]
398 }));
399 assert!(scan.requires_network);
400 }
401
402 #[test]
403 fn network_key_sets_network_flag() {
404 let scan = scan_input_capabilities(&json!({
405 "url": "some-value"
406 }));
407 assert!(scan.requires_network);
408 }
409
410 #[test]
411 fn null_and_boolean_values_ignored() {
412 let scan = scan_input_capabilities(&json!({
413 "flag": true,
414 "nothing": null,
415 "count": 42
416 }));
417 assert!(!scan.requires_filesystem);
418 assert!(!scan.requires_network);
419 assert!(!scan.requires_environment);
420 }
421
422 #[test]
423 fn empty_string_values_skipped() {
424 let scan = scan_input_capabilities(&json!({
425 "data": ""
426 }));
427 assert!(!scan.requires_filesystem);
428 assert!(!scan.requires_network);
429 }
430
431 #[test]
432 fn input_capability_scan_default() {
433 let scan = InputCapabilityScan::default();
434 assert!(!scan.requires_filesystem);
435 assert!(!scan.requires_network);
436 assert!(!scan.requires_environment);
437 }
438
439 #[test]
440 fn windows_drive_path_non_alpha_not_detected() {
441 assert!(!looks_like_filesystem_path("1:\\path", None, false, false));
443 }
444}