Skip to main content

split_brain_harness/
input_validation.rs

1/// Centralized input validation layer.
2///
3/// All external inputs — user text to the harness, tool forge inputs,
4/// endpoint URLs, soul paths — pass through this module before any
5/// processing begins. Fail fast, fail loudly.
6use crate::capability::CapabilityRequest;
7
8// ---------------------------------------------------------------------------
9// Hard limits (tunable via constants, not config — these are security floors)
10// ---------------------------------------------------------------------------
11
12/// Maximum bytes accepted for harness `analyze()` input.
13pub const MAX_HARNESS_INPUT_BYTES: usize = 32_768; // 32 KB
14
15/// Maximum bytes accepted by mock tool implementations.
16pub const MAX_FORGE_INPUT_BYTES: usize = 65_536; // 64 KB
17
18/// Maximum length for a capability name.
19pub const MAX_CAPABILITY_NAME_BYTES: usize = 64;
20
21/// Maximum length for a capability reason field.
22pub const MAX_REASON_BYTES: usize = 1_024;
23
24/// Maximum length for an input/output contract description.
25pub const MAX_CONTRACT_BYTES: usize = 256;
26
27// ---------------------------------------------------------------------------
28// Error type
29// ---------------------------------------------------------------------------
30
31#[derive(Debug, Clone, PartialEq)]
32pub struct ValidationError(pub String);
33
34impl std::fmt::Display for ValidationError {
35    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
36        f.write_str(&self.0)
37    }
38}
39
40// ---------------------------------------------------------------------------
41// Harness input
42// ---------------------------------------------------------------------------
43
44/// Validate text entering the harness pipeline.
45///
46/// Rejects: oversized inputs, null bytes, non-printable control characters
47/// (ASCII 0x01–0x1F, excluding \t, \n, \r which appear in normal text).
48pub fn validate_harness_input(input: &str) -> Result<(), ValidationError> {
49    if input.len() > MAX_HARNESS_INPUT_BYTES {
50        return Err(ValidationError(format!(
51            "input too long: {} bytes (max {})",
52            input.len(),
53            MAX_HARNESS_INPUT_BYTES
54        )));
55    }
56    check_string_chars(input, "harness input")?;
57    Ok(())
58}
59
60// ---------------------------------------------------------------------------
61// Forge input
62// ---------------------------------------------------------------------------
63
64/// Validate text passed to a mock tool implementation.
65///
66/// Rejects: oversized inputs, null bytes, non-printable control characters.
67pub fn validate_forge_input(input: &str) -> Result<(), ValidationError> {
68    if input.len() > MAX_FORGE_INPUT_BYTES {
69        return Err(ValidationError(format!(
70            "forge input too long: {} bytes (max {})",
71            input.len(),
72            MAX_FORGE_INPUT_BYTES
73        )));
74    }
75    check_string_chars(input, "forge input")?;
76    Ok(())
77}
78
79// ---------------------------------------------------------------------------
80// Capability request fields
81// ---------------------------------------------------------------------------
82
83/// Validate per-field length and content rules on a CapabilityRequest.
84///
85/// Called in addition to `CapabilityRequest::validate()` which handles
86/// structural/schema correctness. This covers length-based abuse vectors.
87pub fn validate_capability_fields(req: &CapabilityRequest) -> Result<(), ValidationError> {
88    check_field_len("capability", &req.capability, MAX_CAPABILITY_NAME_BYTES)?;
89    check_field_len("reason", &req.reason, MAX_REASON_BYTES)?;
90    check_field_len("input_contract", &req.input_contract, MAX_CONTRACT_BYTES)?;
91    check_field_len("output_contract", &req.output_contract, MAX_CONTRACT_BYTES)?;
92
93    check_string_chars(&req.capability, "capability")?;
94    check_string_chars(&req.reason, "reason")?;
95    Ok(())
96}
97
98// ---------------------------------------------------------------------------
99// Endpoint URL allowlist
100// ---------------------------------------------------------------------------
101
102/// Validate a backend endpoint URL.
103///
104/// Only `http://` and `https://` are accepted. `file://`, `javascript:`,
105/// data URIs, and other schemes are rejected.
106pub fn validate_endpoint(url: &str) -> Result<(), ValidationError> {
107    let lower = url.to_lowercase();
108    if lower.starts_with("http://") || lower.starts_with("https://") {
109        Ok(())
110    } else {
111        Err(ValidationError(format!(
112            "endpoint must use http:// or https:// — got: {url}"
113        )))
114    }
115}
116
117// ---------------------------------------------------------------------------
118// Soul path
119// ---------------------------------------------------------------------------
120
121/// Validate a user-supplied soul path.
122///
123/// Empty path is allowed (means: use embedded soul).
124/// Non-empty paths must:
125/// - not contain `../` traversal sequences
126/// - end with `.md`
127pub fn validate_soul_path(path: &str) -> Result<(), ValidationError> {
128    if path.is_empty() {
129        return Ok(());
130    }
131    if path.contains("../") || path.contains("..\\") || path.starts_with("..") {
132        return Err(ValidationError(format!(
133            "soul_path contains path traversal: {path}"
134        )));
135    }
136    if !path.ends_with(".md") {
137        return Err(ValidationError(format!(
138            "soul_path must be a .md file: {path}"
139        )));
140    }
141    Ok(())
142}
143
144// ---------------------------------------------------------------------------
145// Shared helpers
146// ---------------------------------------------------------------------------
147
148fn check_field_len(name: &str, value: &str, max: usize) -> Result<(), ValidationError> {
149    if value.len() > max {
150        Err(ValidationError(format!(
151            "{name} too long: {} bytes (max {max})",
152            value.len()
153        )))
154    } else {
155        Ok(())
156    }
157}
158
159/// Reject null bytes and non-printable ASCII controls (except \t, \n, \r).
160fn check_string_chars(s: &str, label: &str) -> Result<(), ValidationError> {
161    for (i, ch) in s.char_indices() {
162        if ch == '\0' {
163            return Err(ValidationError(format!(
164                "{label}: null byte at byte offset {i}"
165            )));
166        }
167        // Reject C0 control characters other than the normal whitespace trio
168        if ch.is_control() && ch != '\n' && ch != '\r' && ch != '\t' {
169            return Err(ValidationError(format!(
170                "{label}: disallowed control character {:?} at byte offset {i}",
171                ch
172            )));
173        }
174    }
175    Ok(())
176}
177
178// ---------------------------------------------------------------------------
179// Tests
180// ---------------------------------------------------------------------------
181
182#[cfg(test)]
183mod tests {
184    use super::*;
185    use crate::capability::{CapabilityConstraints, CapabilityRequest};
186
187    fn clean_req() -> CapabilityRequest {
188        CapabilityRequest {
189            kind: "capability_request".into(),
190            capability: "word_count".into(),
191            input_contract: "utf8 text".into(),
192            output_contract: "json".into(),
193            constraints: CapabilityConstraints::default(),
194            reason: "text reasoning insufficient".into(),
195        }
196    }
197
198    // --- validate_harness_input ---
199
200    #[test]
201    fn harness_input_valid_text() {
202        assert!(validate_harness_input("hello world").is_ok());
203    }
204
205    #[test]
206    fn harness_input_with_newlines_allowed() {
207        assert!(validate_harness_input("line one\nline two\r\n").is_ok());
208    }
209
210    #[test]
211    fn harness_input_too_long() {
212        let big = "a".repeat(MAX_HARNESS_INPUT_BYTES + 1);
213        let err = validate_harness_input(&big).unwrap_err();
214        assert!(err.0.contains("too long"));
215    }
216
217    #[test]
218    fn harness_input_null_byte_rejected() {
219        let err = validate_harness_input("hello\x00world").unwrap_err();
220        assert!(err.0.contains("null byte"));
221    }
222
223    #[test]
224    fn harness_input_control_char_rejected() {
225        // ASCII 0x01 (SOH) should be rejected
226        let err = validate_harness_input("hello\x01world").unwrap_err();
227        assert!(err.0.contains("control character"));
228    }
229
230    #[test]
231    fn harness_input_tab_allowed() {
232        assert!(validate_harness_input("col1\tcol2").is_ok());
233    }
234
235    // --- validate_forge_input ---
236
237    #[test]
238    fn forge_input_valid() {
239        assert!(validate_forge_input("log line 200 OK").is_ok());
240    }
241
242    #[test]
243    fn forge_input_too_long() {
244        let big = "x".repeat(MAX_FORGE_INPUT_BYTES + 1);
245        let err = validate_forge_input(&big).unwrap_err();
246        assert!(err.0.contains("too long"));
247    }
248
249    #[test]
250    fn forge_input_null_byte_rejected() {
251        assert!(validate_forge_input("a\x00b").is_err());
252    }
253
254    // --- validate_capability_fields ---
255
256    #[test]
257    fn capability_fields_valid() {
258        assert!(validate_capability_fields(&clean_req()).is_ok());
259    }
260
261    #[test]
262    fn capability_name_too_long() {
263        let mut req = clean_req();
264        req.capability = "x".repeat(MAX_CAPABILITY_NAME_BYTES + 1);
265        let err = validate_capability_fields(&req).unwrap_err();
266        assert!(err.0.contains("capability"));
267    }
268
269    #[test]
270    fn reason_too_long() {
271        let mut req = clean_req();
272        req.reason = "r".repeat(MAX_REASON_BYTES + 1);
273        let err = validate_capability_fields(&req).unwrap_err();
274        assert!(err.0.contains("reason"));
275    }
276
277    #[test]
278    fn input_contract_too_long() {
279        let mut req = clean_req();
280        req.input_contract = "c".repeat(MAX_CONTRACT_BYTES + 1);
281        let err = validate_capability_fields(&req).unwrap_err();
282        assert!(err.0.contains("input_contract"));
283    }
284
285    #[test]
286    fn output_contract_too_long() {
287        let mut req = clean_req();
288        req.output_contract = "c".repeat(MAX_CONTRACT_BYTES + 1);
289        let err = validate_capability_fields(&req).unwrap_err();
290        assert!(err.0.contains("output_contract"));
291    }
292
293    #[test]
294    fn capability_null_byte_rejected() {
295        let mut req = clean_req();
296        req.capability = "foo\x00bar".into();
297        let err = validate_capability_fields(&req).unwrap_err();
298        assert!(err.0.contains("null byte"));
299    }
300
301    // --- validate_endpoint ---
302
303    #[test]
304    fn https_endpoint_accepted() {
305        assert!(validate_endpoint("https://api.example.com/v1").is_ok());
306    }
307
308    #[test]
309    fn http_endpoint_accepted() {
310        assert!(validate_endpoint("http://localhost:11434").is_ok());
311    }
312
313    #[test]
314    fn file_url_rejected() {
315        let err = validate_endpoint("file:///etc/passwd").unwrap_err();
316        assert!(err.0.contains("http://"));
317    }
318
319    #[test]
320    fn javascript_url_rejected() {
321        assert!(validate_endpoint("javascript:alert(1)").is_err());
322    }
323
324    #[test]
325    fn bare_hostname_rejected() {
326        assert!(validate_endpoint("localhost:8080").is_err());
327    }
328
329    // --- validate_soul_path ---
330
331    #[test]
332    fn empty_soul_path_accepted() {
333        assert!(validate_soul_path("").is_ok());
334    }
335
336    #[test]
337    fn valid_soul_path_accepted() {
338        assert!(validate_soul_path("/home/user/soul.md").is_ok());
339    }
340
341    #[test]
342    fn path_traversal_rejected() {
343        assert!(validate_soul_path("../../etc/passwd.md").is_err());
344    }
345
346    #[test]
347    fn relative_traversal_rejected() {
348        assert!(validate_soul_path("../config/soul.md").is_err());
349    }
350
351    #[test]
352    fn non_md_extension_rejected() {
353        let err = validate_soul_path("/home/user/soul.txt").unwrap_err();
354        assert!(err.0.contains(".md"));
355    }
356}