Skip to main content

chipzen_sdk/
validate.rs

1//! `chipzen-sdk validate <path>` — pre-upload conformance checks.
2//!
3//! Mirrors the Python and JavaScript validators' check shape and
4//! severity model so a `(severity, name, message)` tuple from any of
5//! the three SDKs renders identically in client tooling.
6//!
7//! Smoke-test / conformance scenarios are deferred to Phase 3 PR 3
8//! (the conformance harness needs the IP-protected starter to land
9//! first so it knows what binary shape to drive).
10
11use anyhow::{Context, Result};
12use serde::Deserialize;
13use std::collections::HashSet;
14use std::fs;
15use std::path::Path;
16
17#[derive(Debug, Clone, Copy, PartialEq, Eq)]
18pub enum Severity {
19    Pass,
20    Warn,
21    Fail,
22}
23
24#[derive(Debug, Clone)]
25pub struct ValidationResult {
26    pub severity: Severity,
27    pub name: String,
28    pub message: String,
29}
30
31#[derive(Debug, Clone)]
32pub struct ValidateOptions {
33    /// Hard-fail upload size threshold, in bytes. Defaults to 500 MB
34    /// (platform cap).
35    pub max_upload_bytes: u64,
36}
37
38impl Default for ValidateOptions {
39    fn default() -> Self {
40        Self {
41            max_upload_bytes: DEFAULT_MAX_UPLOAD_BYTES,
42        }
43    }
44}
45
46pub const DEFAULT_MAX_UPLOAD_BYTES: u64 = 500 * 1024 * 1024;
47
48/// Crates whose presence in `Cargo.toml` indicates a class of bot we
49/// don't allow. Mirrors the BLOCKED_MODULES sets in the Python and
50/// JavaScript validators.
51///
52/// **This is a courtesy linter, not a security gate.** Notable
53/// limitations:
54///
55/// - The most common Rust process-spawn vector is
56///   [`std::process::Command`], which is part of the standard library
57///   and **cannot be blocked at the Cargo dep level at all**. The
58///   server-side seccomp policy is what actually prevents `execve` /
59///   `clone` / `fork` syscalls.
60/// - Anything reachable through a `[build-dependencies]` or transitive
61///   dep is also outside this list.
62/// - Macro-generated code can hide what looks like a banned API
63///   behind a sanctioned one.
64///
65/// The runtime sandbox (`--cap-drop=ALL` + read-only rootfs +
66/// seccomp-bpf restricting outbound network egress to the platform
67/// WebSocket endpoint) is the authoritative gate. This list catches
68/// the most common upload-blocking issues a developer's laptop can
69/// surface in 100ms; relying on it for security would be a mistake.
70const BLOCKED_DEPS: &[&str] = &[
71    // Process spawning / OS escape
72    "subprocess", // (placeholder; cargo deps don't usually have this name)
73    "duct",
74    "command-group",
75    // Raw socket / packet-level networking
76    "pnet",
77    "pcap",
78    "raw_socket",
79];
80
81const WARN_DEPS: &[&str] = &[
82    // FS access — usable but the sandbox is restrictive about
83    // reads/writes outside /bot/.
84    "tempfile", "tempdir", "memmap2",
85];
86
87pub fn validate_bot(path: &Path, opts: &ValidateOptions) -> Result<Vec<ValidationResult>> {
88    let mut results = Vec::new();
89
90    let metadata = match fs::metadata(path) {
91        Ok(m) => m,
92        Err(_) => {
93            results.push(fail(
94                "file_structure",
95                format!("Path not found: {}", path.display()),
96            ));
97            return Ok(results);
98        }
99    };
100    if !metadata.is_dir() {
101        results.push(fail(
102            "file_structure",
103            format!("Path is not a directory: {}", path.display()),
104        ));
105        return Ok(results);
106    }
107
108    results.extend(check_size(path, opts.max_upload_bytes)?);
109
110    let cargo_toml_path = path.join("Cargo.toml");
111    let main_rs = path.join("src").join("main.rs");
112    let lib_rs = path.join("src").join("lib.rs");
113    if !cargo_toml_path.is_file() {
114        results.push(fail(
115            "file_structure",
116            "Cargo.toml not found in project root".to_string(),
117        ));
118        return Ok(results);
119    }
120    if !main_rs.is_file() && !lib_rs.is_file() {
121        results.push(fail(
122            "file_structure",
123            "Neither src/main.rs nor src/lib.rs found".to_string(),
124        ));
125        return Ok(results);
126    }
127    results.push(pass(
128        "file_structure",
129        format!(
130            "Cargo.toml + {} present",
131            if main_rs.is_file() {
132                "src/main.rs"
133            } else {
134                "src/lib.rs"
135            }
136        ),
137    ));
138
139    let cargo_text = fs::read_to_string(&cargo_toml_path).context("reading Cargo.toml")?;
140    let manifest: CargoManifest = match toml::from_str(&cargo_text) {
141        Ok(m) => m,
142        Err(e) => {
143            results.push(fail(
144                "cargo_metadata",
145                format!("Cargo.toml is not parseable: {e}"),
146            ));
147            return Ok(results);
148        }
149    };
150
151    results.extend(check_cargo_metadata(&manifest));
152    results.extend(check_dependencies(&manifest));
153
154    let entry = if main_rs.is_file() { main_rs } else { lib_rs };
155    let source =
156        fs::read_to_string(&entry).with_context(|| format!("reading {}", entry.display()))?;
157    results.push(check_bot_impl(&source));
158    results.push(check_decide_method(&source));
159
160    Ok(results)
161}
162
163// ---------------------------------------------------------------------------
164// Per-check implementations
165// ---------------------------------------------------------------------------
166
167fn check_size(dir: &Path, max_bytes: u64) -> Result<Vec<ValidationResult>> {
168    let total = dir_total_bytes(dir)?;
169    let mb = total as f64 / (1024.0 * 1024.0);
170    let limit_mb = max_bytes / (1024 * 1024);
171    if total > max_bytes {
172        return Ok(vec![fail(
173            "size",
174            format!("Directory is {mb:.1} MB, exceeds {limit_mb} MB upload limit"),
175        )]);
176    }
177    Ok(vec![pass(
178        "size",
179        format!("Size OK ({mb:.1} MB uncompressed / {limit_mb} MB limit)"),
180    )])
181}
182
183fn check_cargo_metadata(manifest: &CargoManifest) -> Vec<ValidationResult> {
184    let mut out = Vec::new();
185    let pkg = match manifest.package.as_ref() {
186        Some(p) => p,
187        None => {
188            out.push(fail(
189                "cargo_metadata",
190                "Cargo.toml is missing the [package] table".to_string(),
191            ));
192            return out;
193        }
194    };
195    if pkg.name.is_empty() {
196        out.push(fail("cargo_metadata", "package.name is empty".to_string()));
197    } else {
198        out.push(pass(
199            "cargo_metadata",
200            format!("package.name = {:?}, version = {:?}", pkg.name, pkg.version),
201        ));
202    }
203    out
204}
205
206fn check_dependencies(manifest: &CargoManifest) -> Vec<ValidationResult> {
207    let mut out = Vec::new();
208    let deps: HashSet<&str> = manifest.dependencies.keys().map(String::as_str).collect();
209
210    if !deps.contains("chipzen-bot") {
211        out.push(fail(
212            "imports",
213            "chipzen-bot dependency missing from Cargo.toml — add `chipzen-bot = \"0.2\"`"
214                .to_string(),
215        ));
216        return out;
217    }
218
219    let blocked: Vec<&&str> = BLOCKED_DEPS.iter().filter(|d| deps.contains(*d)).collect();
220    if !blocked.is_empty() {
221        let names: Vec<String> = blocked.iter().map(|d| (***d).to_string()).collect();
222        out.push(fail(
223            "imports",
224            format!(
225                "Blocked dependencies detected in Cargo.toml: {}",
226                names.join(", ")
227            ),
228        ));
229    } else {
230        out.push(pass(
231            "imports",
232            "No blocked dependencies detected".to_string(),
233        ));
234    }
235
236    for w in WARN_DEPS.iter().filter(|d| deps.contains(*d)) {
237        out.push(warn(
238            "imports",
239            format!("Depends on {w:?} — usable but the platform sandbox restricts what it can do"),
240        ));
241    }
242    out
243}
244
245fn check_bot_impl(source: &str) -> ValidationResult {
246    // Look for `impl Bot for X` (with or without a path prefix). We
247    // strip line and block comments first so a comment about Bot
248    // doesn't false-match.
249    let stripped = strip_comments(source);
250    let re_present = stripped
251        .lines()
252        .any(|l| l.contains("impl") && l.contains("Bot for"));
253    if re_present {
254        pass("bot_impl", "impl Bot for ... found".to_string())
255    } else {
256        fail(
257            "bot_impl",
258            "No `impl Bot for ...` found in entry point".to_string(),
259        )
260    }
261}
262
263fn check_decide_method(source: &str) -> ValidationResult {
264    // Imperfect (regex-based) — a smoke test in PR 3 will catch the
265    // actual runtime case via the conformance harness. For now,
266    // verify the symbol appears outside comments.
267    let stripped = strip_comments(source);
268    if stripped.contains("fn decide") {
269        pass("decide_method", "fn decide(...) found".to_string())
270    } else {
271        fail(
272            "decide_method",
273            "Entry point does not implement fn decide(...)".to_string(),
274        )
275    }
276}
277
278// ---------------------------------------------------------------------------
279// Helpers
280// ---------------------------------------------------------------------------
281
282fn pass(name: &str, message: String) -> ValidationResult {
283    ValidationResult {
284        severity: Severity::Pass,
285        name: name.to_string(),
286        message,
287    }
288}
289
290fn warn(name: &str, message: String) -> ValidationResult {
291    ValidationResult {
292        severity: Severity::Warn,
293        name: name.to_string(),
294        message,
295    }
296}
297
298fn fail(name: &str, message: String) -> ValidationResult {
299    ValidationResult {
300        severity: Severity::Fail,
301        name: name.to_string(),
302        message,
303    }
304}
305
306fn strip_comments(source: &str) -> String {
307    // Strip /* ... */ block comments and // ... line comments. Doesn't
308    // try to be string-literal-aware — false positives there are
309    // harmless for the regex-ish checks that come after.
310    let no_block = strip_block_comments(source);
311    no_block
312        .lines()
313        .map(|l| {
314            // Find // outside of a string literal — for an alpha tier
315            // check we just drop everything after the first // that
316            // isn't inside the trivial `"..."` window. Good enough.
317            match l.find("//") {
318                Some(idx) => &l[..idx],
319                None => l,
320            }
321        })
322        .collect::<Vec<_>>()
323        .join("\n")
324}
325
326fn strip_block_comments(source: &str) -> String {
327    let mut out = String::with_capacity(source.len());
328    let mut chars = source.char_indices().peekable();
329    while let Some((i, c)) = chars.next() {
330        if c == '/' && source[i..].starts_with("/*") {
331            chars.next(); // consume '*'
332            while let Some((_, c2)) = chars.next() {
333                if c2 == '*' && chars.peek().is_some_and(|(_, n)| *n == '/') {
334                    chars.next();
335                    break;
336                }
337            }
338        } else {
339            out.push(c);
340        }
341    }
342    out
343}
344
345fn dir_total_bytes(dir: &Path) -> Result<u64> {
346    let mut total: u64 = 0;
347    for entry in fs::read_dir(dir).with_context(|| format!("reading {}", dir.display()))? {
348        let entry = entry?;
349        let name = entry.file_name();
350        let name = name.to_string_lossy();
351        if name == "target" || name == ".git" {
352            continue;
353        }
354        let ft = entry.file_type()?;
355        let path = entry.path();
356        if ft.is_dir() {
357            total = total.saturating_add(dir_total_bytes(&path)?);
358        } else if ft.is_file() {
359            total = total.saturating_add(entry.metadata()?.len());
360        }
361    }
362    Ok(total)
363}
364
365// ---------------------------------------------------------------------------
366// Cargo.toml deserialization (only the fields we care about)
367// ---------------------------------------------------------------------------
368
369#[derive(Debug, Deserialize)]
370struct CargoManifest {
371    package: Option<PackageMetadata>,
372    #[serde(default)]
373    dependencies: std::collections::BTreeMap<String, toml::Value>,
374}
375
376#[derive(Debug, Deserialize)]
377struct PackageMetadata {
378    name: String,
379    #[serde(default)]
380    version: String,
381}