Skip to main content

arkhe_subset_rust_check/
lib.rs

1//! # arkhe-subset-rust-check — Subset-Rust Purity Lint (E14.L1-Deny)
2//!
3//! AST-level purity check for `Action::compute` bodies (E14.L1 — Compute
4//! Determinism Closure, L1 realisation). Detects determinism-
5//! breaking calls — clock / RNG / I/O / FFI — and bans `unsafe` blocks
6//! inside the scanned function. Returns a list of [`PurityViolation`]s.
7//!
8//! ## Cross-link to E14.L2-Allow (cryptographer naming convention)
9//!
10//! Spec body terminology (cryptographer cross-review):
11//!
12//! - **E14.L1-Deny** — build-time AST deny-list (this crate).
13//! - **E14.L2-Allow** — runtime host-import allow-list (WASM capability
14//!   table, `arkhe:hook/{state, emit, fuel, ...}`). The L2 allow-list is
15//!   the inverse of this L1 deny-list — compute-internal → external
16//!   communication has exactly one channel (the host imports).
17//!
18//! The two layers paired is the **dual enforcement of E14 determinism
19//! contract** — non-deterministic *inputs* are rejected at L1
20//! (clock / RNG / I/O / FFI / threading), non-deterministic *operations*
21//! at L2 (FP / SIMD / wasm-side threading).
22//!
23//! ## Crate shape
24//!
25//! Mirrors the `arkhe-trait-default-check` precedent: a syn-based lib
26//! that runs on stable Rust, integrated via the `#[arkhe_pure]`
27//! attribute macro shipped from `arkhe-forge-macros`. The macro path
28//! catches violations at every `cargo check`. Coverage assertion
29//! ("every `Action::compute` has the attribute") is delegated to a
30//! separate workspace-wide scan in `arkhe-trait-default-check`.
31//!
32//!
33//! ## Spec anchor
34//!
35//! - E14 Compute Determinism Closure (MC) — Runtime axiom layer.
36//! - E14.L1-Deny — L1 `Action::compute` realisation (this crate + `#[arkhe_pure]`).
37
38#![forbid(unsafe_code)]
39#![warn(missing_docs)]
40
41use std::collections::BTreeSet;
42use syn::{visit::Visit, ExprPath, ExprUnsafe, ItemFn, Path};
43
44/// Purity policy — exact path deny, namespace prefix deny, and an
45/// `unsafe`-block ban. The default `deny_compute_impurity` policy covers the
46/// 4-rule deny scope (Clock + RNG + I/O + FFI).
47///
48/// ## Known limitation: single-ident suffix-match false-positive
49///
50/// The visitor matches a single-segment path (e.g. `thread_rng()`) by
51/// scanning the deny-list for any entry ending in `::<ident>`. This
52/// catches the common use-import escape (`use rand::thread_rng;
53/// thread_rng()`) but also collides with **user-defined local fn of the
54/// same name** (e.g. a shell crate defining `fn random() -> u32` would
55/// have its calls to `random()` falsely flagged as `rand::random`).
56///
57/// Mitigation:
58/// - Use full-qualified path in shell code (`my_crate::random()` instead
59///   of `random()`).
60/// - Or apply `Policy::empty()` and rely on a downstream lint.
61/// - Long-term — receiver-type aware HIR resolution lands when the
62///   crate migrates to the `dylint_linting` cdylib (documented in the
63///   crate-level rustdoc).
64#[derive(Debug, Clone)]
65pub struct Policy {
66    /// Fully-qualified path strings that must not appear as call targets
67    /// or constant accesses. Match heuristics:
68    /// - exact full-path equality (e.g. `std::time::Instant::now` vs the
69    ///   path string of the visited `ExprPath`),
70    /// - `use rand::thread_rng; thread_rng()` style — the bare ident
71    ///   `thread_rng` matches the entry `rand::thread_rng` because the
72    ///   entry's last segment equals the visited path's last segment AND
73    ///   the visited path is a single ident.
74    pub denied_paths: BTreeSet<String>,
75    /// Namespace prefixes that ban every `prefix::*` call site in
76    /// expression position. Use for whole-module bans (`std::fs`,
77    /// `std::net`, `libc`, ...). A bare visited path `P` matches the
78    /// prefix `P` exactly OR `P::*`. Type-position paths are skipped
79    /// because the visitor only overrides `visit_expr_path`.
80    pub denied_prefixes: BTreeSet<String>,
81    /// When true, every `unsafe { ... }` block inside the scanned
82    /// function triggers a violation. Closes the FFI escape route
83    /// (raw `extern "C"` calls always require unsafe) plus
84    /// `transmute` / raw-pointer dereferences.
85    pub deny_unsafe: bool,
86}
87
88impl Policy {
89    /// Empty policy — accepts every function. Useful for tests.
90    pub fn empty() -> Self {
91        Self {
92            denied_paths: BTreeSet::new(),
93            denied_prefixes: BTreeSet::new(),
94            deny_unsafe: false,
95        }
96    }
97
98    /// Default deny list (4 categories): Clock + RNG + I/O + FFI plus
99    /// `unsafe` block ban. Future rounds may add Threading + Sync/atomic
100    /// + replay hazards — non-breaking additions.
101    pub fn deny_compute_impurity() -> Self {
102        // Exact path entries — clock / RNG / specific I/O.
103        let mut denied_paths = BTreeSet::new();
104        for p in [
105            // Clock crates — chain replay must not depend on wall-clock.
106            "std::time::Instant::now",
107            "std::time::SystemTime::now",
108            "std::time::UNIX_EPOCH",
109            "chrono::Utc::now",
110            "chrono::Local::now",
111            "minstant::Instant::now",
112            "quanta::Clock::now",
113            "coarsetime::Instant::now",
114            "instant::Instant::now",
115            // RNG OS-entropy paths — deterministic seeded RNGs
116            // (e.g. `rand_chacha::ChaCha20Rng::seed_from_u64(42)`) are
117            // intentionally NOT banned (cryptographer review).
118            // `from_entropy` / `from_os_rng` constructor bans depend on
119            // HIR-level method-call resolution.
120            "rand::random",
121            "rand::thread_rng",
122            "rand::rngs::OsRng",
123            "rand::rngs::ThreadRng",
124            "getrandom::getrandom",
125            "getrandom::fill", // 0.3+ API; 0.2 / 0.3 dual-pin coverage.
126            "rdrand::RdRand",
127            // Specific I/O — chain replay must not read stdin / OS state.
128            "std::io::stdin",
129            "std::io::stdout",
130            "std::io::stderr",
131        ] {
132            denied_paths.insert(p.to_string());
133        }
134        // Namespace prefixes — entire I/O / FFI modules.
135        let mut denied_prefixes = BTreeSet::new();
136        for p in [
137            // I/O — filesystem / network / process / env.
138            "std::fs",
139            "std::net",
140            "std::process",
141            "std::env",
142            "tokio::fs",
143            "tokio::net",
144            "tokio::io",
145            "tokio::time",
146            "async_std::fs",
147            "async_std::net",
148            "async_std::io",
149            "async_std::task",
150            "mio",
151            "socket2",
152            // FFI — libc.
153            "libc",
154        ] {
155            denied_prefixes.insert(p.to_string());
156        }
157        Self {
158            denied_paths,
159            denied_prefixes,
160            // Bans `extern "C"` calls (always behind unsafe), raw-pointer
161            // deref, transmute, and other dangerous primitives. Escape
162            // hatch via a `#[arkhe(unsafe_audit_cleared = "ticket-id")]`
163            // attribute is a Round-3 candidate (cryptographer dispatch).
164            deny_unsafe: true,
165        }
166    }
167}
168
169impl Default for Policy {
170    fn default() -> Self {
171        Self::deny_compute_impurity()
172    }
173}
174
175/// One purity violation — a forbidden call site, prefix-matched namespace
176/// access, or `unsafe` block inside the scanned function. Carries the
177/// matching deny-list entry plus a span label for diagnostic output.
178#[derive(Debug, Clone, PartialEq, Eq)]
179pub struct PurityViolation {
180    /// The deny-list entry (path or prefix) that matched, or
181    /// `"unsafe-block"` for the unsafe ban.
182    pub denied_path: String,
183    /// Human-readable form of the offending site.
184    pub site: String,
185    /// Reason category — `clock`, `rng`, `io`, `ffi`, `unsafe`, `other`.
186    pub reason: &'static str,
187}
188
189/// Scan a function for E14.L1-Deny purity violations under `policy`.
190pub fn check_purity(item: &ItemFn, policy: &Policy) -> Vec<PurityViolation> {
191    let mut visitor = PurityVisitor {
192        policy,
193        violations: Vec::new(),
194    };
195    visitor.visit_item_fn(item);
196    visitor.violations
197}
198
199/// Convenience wrapper — scan with the default `deny_compute_impurity` policy.
200pub fn check_purity_default(item: &ItemFn) -> Vec<PurityViolation> {
201    check_purity(item, &Policy::deny_compute_impurity())
202}
203
204struct PurityVisitor<'p> {
205    policy: &'p Policy,
206    violations: Vec<PurityViolation>,
207}
208
209impl<'ast, 'p> Visit<'ast> for PurityVisitor<'p> {
210    /// Match path expressions like `std::time::Instant::now` (when used
211    /// as a function reference) and `std::time::UNIX_EPOCH` (constant
212    /// access). Method-call form (`receiver.method()`) is intentionally
213    /// not handled in this implementation — it would require HIR-level
214    /// receiver-type resolution to avoid colliding with shell-defined
215    /// methods of the same name (e.g. a shell type with its own `.now()`).
216    /// Cryptographer review may extend the visitor with `*::method`
217    /// pattern entries once a precise receiver-type heuristic is agreed.
218    ///
219    /// Default visitor recursion is preserved for child nodes; we never
220    /// override `visit_path` so generic / type-position paths are skipped.
221    fn visit_expr_path(&mut self, node: &'ast ExprPath) {
222        let path_str = path_to_string(&node.path);
223        if let Some((denied, kind)) = self.match_against_deny_list(&node.path, &path_str) {
224            self.violations.push(PurityViolation {
225                denied_path: denied.to_string(),
226                site: format!("{} ({kind})", path_str),
227                reason: classify_reason(denied),
228            });
229        }
230        syn::visit::visit_expr_path(self, node);
231    }
232
233    /// Ban `unsafe { ... }` blocks inside the scanned function. Closes the
234    /// FFI / raw-pointer / transmute attack surface for E14.L1-Deny when
235    /// `policy.deny_unsafe` is true.
236    fn visit_expr_unsafe(&mut self, node: &'ast ExprUnsafe) {
237        if self.policy.deny_unsafe {
238            self.violations.push(PurityViolation {
239                denied_path: "unsafe-block".to_string(),
240                site: "unsafe { ... }".to_string(),
241                reason: "unsafe",
242            });
243        }
244        syn::visit::visit_expr_unsafe(self, node);
245    }
246}
247
248#[derive(Copy, Clone)]
249enum MatchKind {
250    Exact,
251    SingleIdentSuffix,
252    Prefix,
253}
254
255impl core::fmt::Display for MatchKind {
256    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
257        f.write_str(match self {
258            MatchKind::Exact => "exact",
259            MatchKind::SingleIdentSuffix => "imported-ident",
260            MatchKind::Prefix => "prefix",
261        })
262    }
263}
264
265impl<'p> PurityVisitor<'p> {
266    /// Return the matching deny-list entry plus the match kind, if any.
267    ///
268    /// Match order:
269    /// 1. Exact full-path equality against `denied_paths`.
270    /// 2. Single-ident form (`use rand::thread_rng; thread_rng()`) —
271    ///    matches when the visited path is a single segment AND a
272    ///    `denied_paths` entry ends with `::<that_ident>`.
273    /// 3. Namespace prefix match against `denied_prefixes` —
274    ///    `path == prefix` or `path` starts with `prefix::`.
275    fn match_against_deny_list<'d>(
276        &'d self,
277        path: &Path,
278        path_str: &str,
279    ) -> Option<(&'d str, MatchKind)> {
280        if let Some(entry) = self.policy.denied_paths.get(path_str) {
281            return Some((entry.as_str(), MatchKind::Exact));
282        }
283        if path.segments.len() == 1 {
284            let ident = &path.segments[0].ident;
285            let needle = format!("::{ident}");
286            for denied in &self.policy.denied_paths {
287                if denied.ends_with(&needle) {
288                    return Some((denied.as_str(), MatchKind::SingleIdentSuffix));
289                }
290            }
291        }
292        for prefix in &self.policy.denied_prefixes {
293            if path_str == prefix.as_str() || path_str.starts_with(&format!("{prefix}::")) {
294                return Some((prefix.as_str(), MatchKind::Prefix));
295            }
296        }
297        None
298    }
299}
300
301fn path_to_string(path: &Path) -> String {
302    let mut out = String::new();
303    if path.leading_colon.is_some() {
304        out.push_str("::");
305    }
306    let segs: Vec<String> = path.segments.iter().map(|s| s.ident.to_string()).collect();
307    out.push_str(&segs.join("::"));
308    out
309}
310
311fn classify_reason(denied: &str) -> &'static str {
312    if denied == "unsafe-block" {
313        return "unsafe";
314    }
315    if denied.contains("time::")
316        || denied.contains("chrono::")
317        || denied.contains("minstant::")
318        || denied.contains("quanta::")
319        || denied.contains("coarsetime::")
320        || denied.contains("instant::Instant")
321        || denied == "tokio::time"
322    {
323        "clock"
324    } else if denied.contains("rand")
325        || denied.contains("OsRng")
326        || denied.contains("getrandom")
327        || denied.contains("rdrand")
328    {
329        "rng"
330    } else if denied.contains("fs")
331        || denied.contains("net")
332        || denied.contains("io::")
333        || denied.ends_with("::io")
334        || denied.contains("process")
335        || denied.contains("env")
336        || denied == "mio"
337        || denied == "socket2"
338        || denied.contains("async_std::task")
339    {
340        "io"
341    } else if denied == "libc" || denied.contains("libc::") {
342        "ffi"
343    } else {
344        "other"
345    }
346}
347
348#[cfg(test)]
349#[allow(clippy::panic, clippy::unwrap_used)]
350mod tests {
351    use super::*;
352    use syn::parse_quote;
353
354    #[test]
355    fn pure_compute_passes() {
356        let f: ItemFn = parse_quote! {
357            fn compute(a: u32, b: u32) -> u32 {
358                a.wrapping_add(b).wrapping_mul(2)
359            }
360        };
361        let violations = check_purity_default(&f);
362        assert!(
363            violations.is_empty(),
364            "pure compute must not trigger violations: {violations:?}"
365        );
366    }
367
368    #[test]
369    fn instant_now_full_path_rejected() {
370        let f: ItemFn = parse_quote! {
371            fn compute() -> u128 {
372                let _now = std::time::Instant::now();
373                0
374            }
375        };
376        let violations = check_purity_default(&f);
377        assert_eq!(violations.len(), 1);
378        assert_eq!(violations[0].denied_path, "std::time::Instant::now");
379        assert_eq!(violations[0].reason, "clock");
380    }
381
382    #[test]
383    fn use_imported_thread_rng_single_ident_rejected() {
384        let f: ItemFn = parse_quote! {
385            fn compute() -> u32 {
386                let _r = thread_rng();
387                0
388            }
389        };
390        let violations = check_purity_default(&f);
391        assert_eq!(violations.len(), 1);
392        assert_eq!(violations[0].denied_path, "rand::thread_rng");
393        assert_eq!(violations[0].reason, "rng");
394    }
395
396    #[test]
397    fn os_rng_full_path_rejected() {
398        let f: ItemFn = parse_quote! {
399            fn compute() -> u32 {
400                let _ = rand::rngs::OsRng;
401                0
402            }
403        };
404        let violations = check_purity_default(&f);
405        assert!(violations
406            .iter()
407            .any(|v| v.denied_path == "rand::rngs::OsRng"));
408    }
409
410    #[test]
411    fn unix_epoch_constant_access_rejected() {
412        let f: ItemFn = parse_quote! {
413            fn compute() -> u128 {
414                let _ = std::time::UNIX_EPOCH;
415                0
416            }
417        };
418        let violations = check_purity_default(&f);
419        assert!(violations
420            .iter()
421            .any(|v| v.denied_path == "std::time::UNIX_EPOCH"));
422    }
423
424    #[test]
425    fn type_position_path_does_not_match() {
426        let f: ItemFn = parse_quote! {
427            fn compute() -> u32 {
428                let _x: Option<std::time::Instant> = None;
429                0
430            }
431        };
432        let violations = check_purity_default(&f);
433        assert!(
434            violations.is_empty(),
435            "type-position path must not match: {violations:?}"
436        );
437    }
438
439    #[test]
440    fn shell_defined_now_method_does_not_match() {
441        let f: ItemFn = parse_quote! {
442            fn compute(s: ShellState) -> u32 {
443                let _ = s.now();
444                0
445            }
446        };
447        let violations = check_purity_default(&f);
448        assert!(
449            violations.is_empty(),
450            "shell .now() method must not falsely trigger: {violations:?}"
451        );
452    }
453
454    #[test]
455    fn fs_namespace_prefix_rejected() {
456        let f: ItemFn = parse_quote! {
457            fn compute() -> u32 {
458                let _ = std::fs::read_to_string("/etc/passwd");
459                0
460            }
461        };
462        let violations = check_purity_default(&f);
463        assert!(
464            violations.iter().any(|v| v.denied_path == "std::fs"),
465            "std::fs::* prefix must trigger: {violations:?}"
466        );
467        assert!(violations.iter().any(|v| v.reason == "io"));
468    }
469
470    #[test]
471    fn net_namespace_prefix_rejected() {
472        let f: ItemFn = parse_quote! {
473            fn compute() -> u32 {
474                let _ = std::net::TcpStream::connect("0.0.0.0:1");
475                0
476            }
477        };
478        let violations = check_purity_default(&f);
479        assert!(violations.iter().any(|v| v.denied_path == "std::net"));
480    }
481
482    #[test]
483    fn process_namespace_prefix_rejected() {
484        let f: ItemFn = parse_quote! {
485            fn compute() -> u32 {
486                let _ = std::process::id();
487                0
488            }
489        };
490        let violations = check_purity_default(&f);
491        assert!(violations.iter().any(|v| v.denied_path == "std::process"));
492    }
493
494    #[test]
495    fn env_namespace_prefix_rejected() {
496        let f: ItemFn = parse_quote! {
497            fn compute() -> u32 {
498                let _ = std::env::var("HOME");
499                0
500            }
501        };
502        let violations = check_purity_default(&f);
503        assert!(violations.iter().any(|v| v.denied_path == "std::env"));
504    }
505
506    #[test]
507    fn libc_namespace_prefix_rejected() {
508        let f: ItemFn = parse_quote! {
509            fn compute() -> u32 {
510                let _ = libc::getpid();
511                0
512            }
513        };
514        let violations = check_purity_default(&f);
515        assert!(violations.iter().any(|v| v.denied_path == "libc"));
516        assert!(violations.iter().any(|v| v.reason == "ffi"));
517    }
518
519    #[test]
520    fn unsafe_block_rejected() {
521        let f: ItemFn = parse_quote! {
522            fn compute(x: u32) -> u32 {
523                unsafe {
524                    let p = &x as *const u32;
525                    *p
526                }
527            }
528        };
529        let violations = check_purity_default(&f);
530        assert!(
531            violations.iter().any(|v| v.denied_path == "unsafe-block"),
532            "unsafe block must trigger: {violations:?}"
533        );
534        assert!(violations.iter().any(|v| v.reason == "unsafe"));
535    }
536
537    #[test]
538    fn tokio_time_prefix_rejected() {
539        let f: ItemFn = parse_quote! {
540            fn compute() -> u32 {
541                let _ = tokio::time::Instant::now();
542                0
543            }
544        };
545        let violations = check_purity_default(&f);
546        assert!(
547            violations
548                .iter()
549                .any(|v| v.denied_path == "tokio::time" && v.reason == "clock"),
550            "tokio::time::* must trigger as clock: {violations:?}"
551        );
552    }
553
554    #[test]
555    fn tokio_io_prefix_classified_as_io() {
556        // B1 — `tokio::io` / `async_std::io` previously fell through to
557        // "other" because the `denied.contains("io::")` branch missed
558        // tail-only matches (`tokio::io` has no trailing `::`).
559        // `denied.ends_with("::io")` closes the gap (cryptographer Round 1).
560        assert_eq!(classify_reason("tokio::io"), "io");
561        assert_eq!(classify_reason("async_std::io"), "io");
562    }
563
564    #[test]
565    fn classify_reason_categorises_correctly() {
566        assert_eq!(classify_reason("std::time::Instant::now"), "clock");
567        assert_eq!(classify_reason("rand::thread_rng"), "rng");
568        assert_eq!(classify_reason("std::fs"), "io");
569        assert_eq!(classify_reason("libc"), "ffi");
570        assert_eq!(classify_reason("unsafe-block"), "unsafe");
571        assert_eq!(classify_reason("blake3::hash"), "other");
572    }
573
574    #[test]
575    fn empty_policy_accepts_anything() {
576        let f: ItemFn = parse_quote! {
577            fn compute() -> u32 {
578                let _ = std::time::Instant::now();
579                let _ = rand::thread_rng();
580                let _ = std::fs::read_to_string("/etc/passwd");
581                unsafe { let _: u8 = 1; }
582                0
583            }
584        };
585        let violations = check_purity(&f, &Policy::empty());
586        assert!(violations.is_empty());
587    }
588
589    #[test]
590    fn local_fn_named_random_currently_false_positives() {
591        // Demonstrates the documented suffix-match limitation: a
592        // user-defined local fn named `random` is flagged as
593        // `rand::random` because the visitor cannot resolve receiver /
594        // module without HIR. Once the crate migrates to the dylint
595        // cdylib path, this test inverts (the local fn becomes correctly
596        // allowed). Tracked in `Policy` rustdoc + crate-level rustdoc.
597        let f: ItemFn = parse_quote! {
598            fn compute() -> u32 {
599                fn random() -> u32 { 42 }
600                random()
601            }
602        };
603        let violations = check_purity_default(&f);
604        assert!(
605            violations.iter().any(|v| v.denied_path == "rand::random"),
606            "bare-ident `random()` is a known false positive"
607        );
608    }
609
610    #[test]
611    fn getrandom_fill_rejected() {
612        let f: ItemFn = parse_quote! {
613            fn compute(buf: &mut [u8]) -> () {
614                let _ = getrandom::fill(buf);
615            }
616        };
617        let violations = check_purity_default(&f);
618        assert!(violations
619            .iter()
620            .any(|v| v.denied_path == "getrandom::fill"));
621    }
622
623    #[test]
624    fn pure_with_blake3_passes() {
625        let f: ItemFn = parse_quote! {
626            fn compute(input: &[u8]) -> [u8; 32] {
627                let mut h = blake3::Hasher::new();
628                h.update(input);
629                *h.finalize().as_bytes()
630            }
631        };
632        let violations = check_purity_default(&f);
633        assert!(violations.is_empty());
634    }
635}