Skip to main content

roboticus_core/
input_capability_scan.rs

1use serde_json::Value;
2
3#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
4pub struct InputCapabilityScan {
5    pub requires_filesystem: bool,
6    pub requires_network: bool,
7    pub requires_environment: bool,
8}
9
10#[derive(Debug, Clone)]
11struct StringToken {
12    key: Option<String>,
13    in_path_context: bool,
14    in_model_context: bool,
15    value: String,
16}
17
18pub fn scan_input_capabilities(input: &Value) -> InputCapabilityScan {
19    let mut scan = InputCapabilityScan::default();
20    let mut tokens = Vec::new();
21    collect_strings(input, None, false, false, &mut tokens, &mut scan);
22
23    for token in tokens {
24        let value = token.value.trim();
25        if value.is_empty() {
26            continue;
27        }
28        if is_url(value) {
29            scan.requires_network = true;
30        }
31        if looks_like_filesystem_path(
32            value,
33            token.key.as_deref(),
34            token.in_path_context,
35            token.in_model_context,
36        ) {
37            scan.requires_filesystem = true;
38        }
39    }
40
41    scan
42}
43
44fn is_path_key(key: &str) -> bool {
45    ["path", "file", "filepath", "directory", "dir", "filename"].contains(&key)
46}
47
48fn is_model_key(key: &str) -> bool {
49    [
50        "model",
51        "model_id",
52        "provider",
53        "provider_model",
54        "engine",
55        "primary",
56        "fallback",
57        "model_name",
58    ]
59    .contains(&key)
60}
61
62fn is_network_key(key: &str) -> bool {
63    ["url", "endpoint", "host", "api"].contains(&key)
64}
65
66fn is_environment_key(key: &str) -> bool {
67    ["env", "environment", "env_var", "env_key"].contains(&key)
68}
69
70fn is_url(v: &str) -> bool {
71    let lower = v.trim().to_ascii_lowercase();
72    lower.starts_with("http://")
73        || lower.starts_with("https://")
74        || lower.starts_with("ws://")
75        || lower.starts_with("wss://")
76}
77
78fn collect_strings(
79    value: &Value,
80    key_ctx: Option<&str>,
81    in_path_context: bool,
82    in_model_context: bool,
83    out: &mut Vec<StringToken>,
84    scan: &mut InputCapabilityScan,
85) {
86    match value {
87        Value::String(s) => out.push(StringToken {
88            key: key_ctx.map(|k| k.to_string()),
89            in_path_context,
90            in_model_context,
91            value: s.clone(),
92        }),
93        Value::Array(arr) => {
94            for item in arr {
95                collect_strings(item, key_ctx, in_path_context, in_model_context, out, scan);
96            }
97        }
98        Value::Object(map) => {
99            for (key, item) in map {
100                let lower_key = key.to_lowercase();
101                if is_network_key(&lower_key) {
102                    scan.requires_network = true;
103                }
104                if is_environment_key(&lower_key) {
105                    scan.requires_environment = true;
106                }
107
108                let next_path_context = in_path_context || is_path_key(&lower_key);
109                let next_model_context = in_model_context || is_model_key(&lower_key);
110                collect_strings(
111                    item,
112                    Some(&lower_key),
113                    next_path_context,
114                    next_model_context,
115                    out,
116                    scan,
117                );
118            }
119        }
120        _ => {}
121    }
122}
123
124fn looks_like_filesystem_path(
125    v: &str,
126    key: Option<&str>,
127    in_path_context: bool,
128    in_model_context: bool,
129) -> bool {
130    if in_path_context || key.is_some_and(is_path_key) {
131        return true;
132    }
133    if is_url(v) {
134        return false;
135    }
136    if v.starts_with('/')
137        || v.starts_with("./")
138        || v.starts_with("../")
139        || v.starts_with("~/")
140        || v.starts_with(".\\")
141        || v.starts_with("..\\")
142        || v.starts_with("~\\")
143        || v.starts_with("\\\\")
144    {
145        return true;
146    }
147    if v.len() > 2
148        && v.as_bytes().get(1) == Some(&b':')
149        && matches!(v.as_bytes().get(2), Some(b'\\' | b'/'))
150    {
151        return v.as_bytes()[0].is_ascii_alphabetic();
152    }
153    if in_model_context {
154        return false;
155    }
156    // Treat slash-separated values as path-like after excluding URLs/model context.
157    if v.contains('/') && !key.is_some_and(is_model_key) {
158        return true;
159    }
160    false
161}
162
163#[cfg(test)]
164mod tests {
165    use super::*;
166    use serde_json::json;
167
168    #[test]
169    fn url_and_websocket_values_require_network_only() {
170        let scan = scan_input_capabilities(&json!({
171            "endpoint": "https://example.com/v1",
172            "socket": "wss://stream.example.com",
173        }));
174        assert!(scan.requires_network);
175        assert!(!scan.requires_filesystem);
176    }
177
178    #[test]
179    fn model_identifier_is_not_filesystem_but_explicit_path_is() {
180        let model_scan = scan_input_capabilities(&json!({"model": "openai/gpt-4o"}));
181        assert!(!model_scan.requires_filesystem);
182
183        let model_path_scan = scan_input_capabilities(&json!({"model": "/etc/passwd"}));
184        assert!(model_path_scan.requires_filesystem);
185    }
186
187    #[test]
188    fn regex_like_string_is_not_filesystem() {
189        let scan = scan_input_capabilities(&json!({"pattern": "\\d+\\w+\\s*"}));
190        assert!(!scan.requires_filesystem);
191    }
192
193    #[test]
194    fn environment_keys_require_environment_capability() {
195        let scan = scan_input_capabilities(&json!({"env_var": "SECRET_TOKEN"}));
196        assert!(scan.requires_environment);
197    }
198
199    // ── is_url coverage ─────────────────────────────────────────────────
200
201    #[test]
202    fn is_url_detects_http() {
203        assert!(is_url("http://example.com"));
204        assert!(is_url("HTTP://EXAMPLE.COM"));
205    }
206
207    #[test]
208    fn is_url_detects_https() {
209        assert!(is_url("https://example.com/path"));
210    }
211
212    #[test]
213    fn is_url_detects_ws() {
214        assert!(is_url("ws://localhost:8080"));
215        assert!(is_url("wss://secure.example.com"));
216    }
217
218    #[test]
219    fn is_url_rejects_non_urls() {
220        assert!(!is_url("not a url"));
221        assert!(!is_url("/etc/passwd"));
222        assert!(!is_url("ftp://something"));
223        assert!(!is_url(""));
224    }
225
226    // ── looks_like_filesystem_path coverage ──────────────────────────────
227
228    #[test]
229    fn path_context_always_returns_true() {
230        assert!(looks_like_filesystem_path("anything", None, true, false));
231    }
232
233    #[test]
234    fn path_key_always_returns_true() {
235        assert!(looks_like_filesystem_path(
236            "anything",
237            Some("path"),
238            false,
239            false
240        ));
241        assert!(looks_like_filesystem_path(
242            "anything",
243            Some("file"),
244            false,
245            false
246        ));
247        assert!(looks_like_filesystem_path(
248            "anything",
249            Some("directory"),
250            false,
251            false
252        ));
253        assert!(looks_like_filesystem_path(
254            "anything",
255            Some("dir"),
256            false,
257            false
258        ));
259        assert!(looks_like_filesystem_path(
260            "anything",
261            Some("filename"),
262            false,
263            false
264        ));
265        assert!(looks_like_filesystem_path(
266            "anything",
267            Some("filepath"),
268            false,
269            false
270        ));
271    }
272
273    #[test]
274    fn url_is_not_filesystem_path() {
275        assert!(!looks_like_filesystem_path(
276            "https://example.com",
277            None,
278            false,
279            false
280        ));
281    }
282
283    #[test]
284    fn absolute_paths_detected() {
285        assert!(looks_like_filesystem_path(
286            "/etc/passwd",
287            None,
288            false,
289            false
290        ));
291        assert!(looks_like_filesystem_path("./relative", None, false, false));
292        assert!(looks_like_filesystem_path("../parent", None, false, false));
293        assert!(looks_like_filesystem_path("~/home", None, false, false));
294    }
295
296    #[test]
297    fn backslash_paths_detected() {
298        assert!(looks_like_filesystem_path(".\\windows", None, false, false));
299        assert!(looks_like_filesystem_path("..\\parent", None, false, false));
300        assert!(looks_like_filesystem_path("~\\user", None, false, false));
301        assert!(looks_like_filesystem_path(
302            "\\\\server\\share",
303            None,
304            false,
305            false
306        ));
307    }
308
309    #[test]
310    fn windows_drive_path_detected() {
311        assert!(looks_like_filesystem_path(
312            "C:\\Users\\test",
313            None,
314            false,
315            false
316        ));
317        assert!(looks_like_filesystem_path("D:/path", None, false, false));
318    }
319
320    #[test]
321    fn model_context_suppresses_slash_heuristic() {
322        assert!(!looks_like_filesystem_path(
323            "openai/gpt-4",
324            None,
325            false,
326            true
327        ));
328    }
329
330    #[test]
331    fn slash_separated_without_model_context_is_path() {
332        assert!(looks_like_filesystem_path(
333            "some/path/here",
334            None,
335            false,
336            false
337        ));
338    }
339
340    #[test]
341    fn model_key_suppresses_slash_heuristic() {
342        assert!(!looks_like_filesystem_path(
343            "openai/gpt-4",
344            Some("model"),
345            false,
346            false
347        ));
348    }
349
350    #[test]
351    fn plain_string_is_not_path() {
352        assert!(!looks_like_filesystem_path(
353            "hello world",
354            None,
355            false,
356            false
357        ));
358    }
359
360    // ── is_path_key / is_model_key / is_network_key / is_environment_key ──
361
362    #[test]
363    fn helper_key_functions() {
364        assert!(is_path_key("path"));
365        assert!(is_path_key("file"));
366        assert!(!is_path_key("name"));
367
368        assert!(is_model_key("model"));
369        assert!(is_model_key("engine"));
370        assert!(is_model_key("primary"));
371        assert!(!is_model_key("name"));
372
373        assert!(is_network_key("url"));
374        assert!(is_network_key("endpoint"));
375        assert!(!is_network_key("name"));
376
377        assert!(is_environment_key("env"));
378        assert!(is_environment_key("env_var"));
379        assert!(!is_environment_key("name"));
380    }
381
382    // ── collect_strings / scan_input_capabilities integration ───────────
383
384    #[test]
385    fn nested_object_with_path_context() {
386        let scan = scan_input_capabilities(&json!({
387            "file": {
388                "name": "config.toml"
389            }
390        }));
391        assert!(scan.requires_filesystem);
392    }
393
394    #[test]
395    fn array_values_scanned() {
396        let scan = scan_input_capabilities(&json!({
397            "urls": ["https://a.com", "https://b.com"]
398        }));
399        assert!(scan.requires_network);
400    }
401
402    #[test]
403    fn network_key_sets_network_flag() {
404        let scan = scan_input_capabilities(&json!({
405            "url": "some-value"
406        }));
407        assert!(scan.requires_network);
408    }
409
410    #[test]
411    fn null_and_boolean_values_ignored() {
412        let scan = scan_input_capabilities(&json!({
413            "flag": true,
414            "nothing": null,
415            "count": 42
416        }));
417        assert!(!scan.requires_filesystem);
418        assert!(!scan.requires_network);
419        assert!(!scan.requires_environment);
420    }
421
422    #[test]
423    fn empty_string_values_skipped() {
424        let scan = scan_input_capabilities(&json!({
425            "data": ""
426        }));
427        assert!(!scan.requires_filesystem);
428        assert!(!scan.requires_network);
429    }
430
431    #[test]
432    fn input_capability_scan_default() {
433        let scan = InputCapabilityScan::default();
434        assert!(!scan.requires_filesystem);
435        assert!(!scan.requires_network);
436        assert!(!scan.requires_environment);
437    }
438
439    #[test]
440    fn windows_drive_path_non_alpha_not_detected() {
441        // Byte at index 0 is non-alphabetic
442        assert!(!looks_like_filesystem_path("1:\\path", None, false, false));
443    }
444}