Skip to main content

candor_classify/
lib.rs

1//! candor-classify — the curated effect classifier (crate+path -> effect), extracted to a STABLE
2//! crate so both the nightly `rustc_private` lint AND a stable backend share ONE source of truth
3//! (no drift). Pure string logic; no rustc internals. The effect vocabulary lives in candor-report.
4
5use candor_report::EFFECTS;
6
7/// Project-supplied rules, consulted only when the built-in `classify` returns None.
8pub fn classify_extra(
9    crate_name: &str,
10    path: &str,
11    extra: &[(&'static str, bool, String)],
12) -> Option<&'static str> {
13    for (eff, is_crate, prefix) in extra {
14        let hit = if *is_crate { crate_name.starts_with(prefix.as_str()) } else { path.starts_with(prefix.as_str()) };
15        if hit {
16            return Some(eff);
17        }
18    }
19    None
20}
21
22/// The exact third-party crates `classify` has effect rules for, and the crate-name
23/// PREFIXES it recognizes. This is the single source of truth for "what candor knows":
24/// it is emitted beside the JSON report (`<prefix>.calibrated.json`) so the Claude Code
25/// receipt's coverage check reads candor's real coverage instead of a hand-copied list.
26/// Keep in lockstep with `classify` below — the `calibrated_set_covers_classifier` test
27/// enforces that every named crate the classifier matches appears here.
28pub const CALIBRATED_CRATES: [&str; 47] = [
29    // network (aws_config resolves credentials over the network on `.load()`;
30    // git2 remote ops — fetch/push/connect — contact the network; async_net is smol's net layer)
31    "reqwest", "isahc", "ureq", "aws_config", "git2", "tokio_tcp", "tokio_udp", "async_net",
32    "async_nats", "lapin", "lettre", "tungstenite", "elasticsearch", "tonic", "rdkafka",
33    // database (see DB_CRATES in classify)
34    "sqlx", "rusqlite", "postgres", "tokio_postgres", "diesel", "redis", "mongodb",
35    "mysql", "mysql_async", "sea_orm", "deadpool_postgres",
36    // filesystem (async_fs = smol; fs_err = std::fs wrapper; tempfile; glob) / entropy /
37    // subprocess (async_process = smol; duct) / env (dotenvy/dotenv) / clock (time) / log / clipboard
38    "memmap2", "fs_err", "async_fs", "tempfile", "glob",
39    "rand", "getrandom", "fastrand",
40    "portable_pty", "async_process", "duct",
41    "dotenvy", "dotenv",
42    "chrono", "time", "tracing", "log", "arboard",
43    // compiler diagnostic emission (a dylint lint's output) — see the Log rules in classify
44    "rustc_lint", "rustc_errors",
45    // raw syscalls via FFI — the syscall-name table that lights up the FFI-thin tier (nix etc.)
46    "libc",
47];
48
49pub const CALIBRATED_PREFIXES: [&str; 3] = ["aws_sdk_", "aws_smithy", "cap_"];
50
51/// Crates `classify` matches by PATH prefix rather than crate-name equality (their effectful modules
52/// are recognised, e.g. `tokio::net::`/`async_std::fs::`/`mio::net::`), so they're absent from
53/// `CALIBRATED_CRATES` (which the liveness test probes by crate name). The coverage check must still
54/// treat them as *covered* — otherwise it would mislabel the most common async crates as blind spots.
55pub const PATH_CALIBRATED_CRATES: [&str; 3] = ["tokio", "async_std", "mio"];
56
57/// Database client crates whose execution verbs are I/O (see the DB branch in `classify`).
58/// Module-level so `db_crates_are_calibrated` can enforce `DB_CRATES ⊆ CALIBRATED_CRATES`.
59pub const DB_CRATES: [&str; 11] = [
60    "sqlx", "rusqlite", "postgres", "tokio_postgres", "diesel", "redis", "mongodb",
61    "mysql", "mysql_async", "sea_orm", "deadpool_postgres",
62];
63
64/// Classify a resolved callee by the crate it belongs to and its full path.
65pub fn classify(crate_name: &str, path: &str) -> Option<&'static str> {
66    if crate_name.starts_with("aws_sdk_") || crate_name.starts_with("aws_smithy") {
67        // Only request dispatch is network I/O; builder setters/accessors are pure.
68        if path.ends_with("::send") || path.ends_with("::send_with") {
69            return Some("Net");
70        }
71        return None;
72    }
73    // aws-config resolves credentials/region on `.load()` — it reaches the IMDS metadata
74    // endpoint / STS over the network (and reads ~/.aws + env). Builders (`defaults()`,
75    // `SdkConfig::builder()`, `BehaviorVersion::latest()`) are pure; the `load` is the I/O.
76    // (Found hardening on a real app, ebman: `builder.load().await` was classified pure.)
77    if crate_name == "aws_config" {
78        if path.ends_with("::load") || path.ends_with("::load_defaults") {
79            return Some("Net");
80        }
81        return None;
82    }
83    // git2 (libgit2 FFI): remote operations contact the network; everything else is local
84    // to the .git directory. Match the remote verbs precisely — NOT bare `::clone`, which is
85    // the `Clone`-trait dup of a `Remote` handle (pure), not `Repository::clone`. (Found
86    // hardening on gitui: `remote.fetch`/`remote.push` were classified network-free — a git
87    // client reporting it makes no network calls.)
88    if crate_name == "git2" {
89        if path.ends_with("::fetch")
90            || path.ends_with("::push")
91            || path.ends_with("::download")
92            || path.ends_with("::connect")
93            || path.ends_with("::connect_auth")
94            || path.ends_with("::ls")
95            || path.ends_with("::upload")
96        {
97            return Some("Net");
98        }
99        return None;
100    }
101    // libc — raw syscalls via FFI. The FFI-thin tier (nix, and the syscall layer beneath rusqlite/git2)
102    // is invisible to a name classifier unless we model libc directly: a 35-crate calibration
103    // (eval/calibration) showed nix reporting ZERO library effects because every wrapper bottoms out in
104    // an unrecognised `libc::*` call. Classify by syscall name, but ONLY the UNAMBIGUOUS ones — the
105    // socket family is Net, path/dir syscalls are Fs, spawn/exec/wait is Exec, SysV/pipe IPC is Ipc,
106    // env/clock/entropy each their own. We deliberately SKIP the generic file-descriptor ops
107    // (read/write/close/lseek/dup/fcntl/ioctl/poll/select/epoll*/mmap): they operate on ANY fd — file,
108    // socket, or pipe — so a fixed label would mis-categorise as often as it helps. An honest
109    // no-classify (under-report) beats emitting the WRONG effect. Pure conversions (htons/inet_pton/
110    // gmtime) are also skipped.
111    if crate_name == "libc" {
112        let f = path.rsplit("::").next().unwrap_or(path);
113        // path / directory / metadata syscalls (incl. *64 and *at variants)
114        const FS: &[&str] = &[
115            "open", "open64", "openat", "openat2", "creat", "creat64", "stat", "stat64", "lstat",
116            "lstat64", "fstatat", "fstatat64", "newfstatat", "statx", "access", "faccessat",
117            "faccessat2", "mkdir", "mkdirat", "rmdir", "unlink", "unlinkat", "rename", "renameat",
118            "renameat2", "link", "linkat", "symlink", "symlinkat", "readlink", "readlinkat", "chmod",
119            "fchmodat", "chown", "lchown", "fchownat", "truncate", "truncate64", "ftruncate",
120            "ftruncate64", "opendir", "fdopendir", "readdir", "readdir64", "readdir_r", "closedir",
121            "rewinddir", "seekdir", "telldir", "scandir", "mkstemp", "mkstemps", "mkostemp", "mkdtemp",
122            "mknod", "mknodat", "chdir", "fchdir", "getcwd", "get_current_dir_name", "chroot",
123            "pivot_root", "statfs", "statfs64", "fstatfs", "fstatfs64", "statvfs", "fstatvfs", "mount",
124            "umount", "umount2", "fsync", "fdatasync", "sync", "syncfs", "sync_file_range", "fallocate",
125            "posix_fallocate", "posix_fadvise", "sendfile", "sendfile64", "copy_file_range", "flock",
126            "getdents", "getdents64", "utime", "utimes", "lutimes", "futimens", "utimensat", "futimesat",
127            "realpath",
128        ];
129        // socket family — these operate only on sockets, so Net is unambiguous (AF_UNIX domain isn't
130        // visible at the call, so a Unix socket reads as Net rather than Ipc; acceptable over-general).
131        const NET: &[&str] = &[
132            "socket", "setsockopt", "getsockopt", "bind", "listen", "accept", "accept4", "connect",
133            "shutdown", "send", "sendto", "sendmsg", "sendmmsg", "recv", "recvfrom", "recvmsg",
134            "recvmmsg", "getpeername", "getsockname", "getaddrinfo", "freeaddrinfo", "getnameinfo",
135        ];
136        // process creation / replacement / reaping
137        const EXEC: &[&str] = &[
138            "fork", "vfork", "clone", "clone3", "execl", "execlp", "execle", "execv", "execvp",
139            "execvpe", "execve", "execveat", "fexecve", "posix_spawn", "posix_spawnp", "system",
140            "popen", "pclose", "wait", "waitpid", "wait3", "wait4", "waitid",
141        ];
142        // pipes / FIFOs / SysV + POSIX message queues, semaphores, shared memory; socketpair (AF_UNIX)
143        const IPC: &[&str] = &[
144            "pipe", "pipe2", "mkfifo", "mkfifoat", "socketpair", "msgget", "msgsnd", "msgrcv", "msgctl",
145            "semget", "semop", "semtimedop", "semctl", "shmget", "shmat", "shmdt", "shmctl", "mq_open",
146            "mq_send", "mq_receive", "mq_timedsend", "mq_timedreceive", "mq_close", "mq_unlink",
147        ];
148        const ENV: &[&str] = &["getenv", "secure_getenv", "setenv", "putenv", "unsetenv", "clearenv"];
149        const CLOCK: &[&str] =
150            &["time", "gettimeofday", "clock_gettime", "clock_getres", "nanosleep", "clock_nanosleep"];
151        const RAND: &[&str] = &["getrandom", "getentropy", "arc4random", "arc4random_buf", "arc4random_uniform"];
152        if FS.contains(&f) {
153            return Some("Fs");
154        }
155        if NET.contains(&f) {
156            return Some("Net");
157        }
158        if EXEC.contains(&f) {
159            return Some("Exec");
160        }
161        if IPC.contains(&f) {
162            return Some("Ipc");
163        }
164        if ENV.contains(&f) {
165            return Some("Env");
166        }
167        if CLOCK.contains(&f) {
168            return Some("Clock");
169        }
170        if RAND.contains(&f) {
171            return Some("Rand");
172        }
173        return None;
174    }
175    // C-library FFI bindings: libsqlite3 (under rusqlite) and libgit2 (under git2). Like the libc tier,
176    // these crates are thin Rust over a C library, so their real I/O is invisible until the C entry
177    // points are named. Match by the DISTINCTIVE C function name (`sqlite3_*` / `git_*`) via the call's
178    // LEAF — independent of the binding crate's alias: rusqlite calls `ffi::sqlite3_step`, git2 calls
179    // `raw::git_remote_fetch`, and the nightly lint resolves the same to `libsqlite3_sys`/`libgit2_sys`;
180    // all spellings share the leaf. Only the I/O-performing entry points are listed — the in-memory
181    // accessors (`sqlite3_bind_*`/`sqlite3_column_*`, `git_*_oid`/strarray/options builders) stay pure,
182    // so a non-listed `sqlite3_`/`git_` leaf returns None (under-report, never a wrong effect). Calibrated
183    // + validated against rusqlite 0.39 / git2 0.20 source (eval/calibration).
184    {
185        let leaf = path.rsplit("::").next().unwrap_or(path);
186        if let Some(rest) = leaf.strip_prefix("sqlite3_") {
187            let _ = rest;
188            // SQLite C API operations that touch the database (open/exec/step/prepare/backup/blob/wal).
189            const DB: &[&str] = &[
190                "sqlite3_open", "sqlite3_open_v2", "sqlite3_open16", "sqlite3_close", "sqlite3_close_v2",
191                "sqlite3_exec", "sqlite3_step", "sqlite3_prepare", "sqlite3_prepare_v2",
192                "sqlite3_prepare_v3", "sqlite3_prepare16", "sqlite3_prepare16_v2", "sqlite3_prepare16_v3",
193                "sqlite3_get_table", "sqlite3_backup_init", "sqlite3_backup_step", "sqlite3_backup_finish",
194                "sqlite3_blob_open", "sqlite3_blob_read", "sqlite3_blob_write", "sqlite3_blob_reopen",
195                "sqlite3_load_extension", "sqlite3_wal_checkpoint", "sqlite3_wal_checkpoint_v2",
196            ];
197            return DB.contains(&leaf).then_some("Db");
198        }
199        if leaf.starts_with("git_") {
200            // libgit2: remote/transport operations contact the network …
201            const NET: &[&str] = &[
202                "git_clone", "git_remote_connect", "git_remote_connect_ext", "git_remote_fetch",
203                "git_remote_download", "git_remote_upload", "git_remote_push", "git_remote_ls",
204            ];
205            // … and repository/index/odb/checkout/ref/config operations touch the on-disk .git store.
206            const FS: &[&str] = &[
207                "git_repository_open", "git_repository_open_ext", "git_repository_open_bare",
208                "git_repository_init", "git_repository_init_ext", "git_repository_discover",
209                "git_checkout_tree", "git_checkout_head", "git_checkout_index", "git_index_read",
210                "git_index_write", "git_index_write_tree", "git_index_write_tree_to",
211                "git_index_add_bypath", "git_index_add_all", "git_odb_open", "git_odb_read",
212                "git_odb_write", "git_odb_open_wstream", "git_odb_open_rstream",
213                "git_blob_create_fromdisk", "git_blob_create_fromworkdir", "git_blob_create_from_disk",
214                "git_blob_create_from_workdir", "git_blob_create_from_stream", "git_commit_create",
215                "git_commit_create_v", "git_reference_create", "git_reference_set_target",
216                "git_reference_delete", "git_config_open_default", "git_config_open_ondisk",
217                "git_config_add_file_ondisk", "git_tag_create", "git_treebuilder_write",
218                "git_packbuilder_write",
219            ];
220            if NET.contains(&leaf) {
221                return Some("Net");
222            }
223            if FS.contains(&leaf) {
224                return Some("Fs");
225            }
226            return None;
227        }
228        if let Some(op) = leaf.strip_prefix("SSL_") {
229            // OpenSSL (libssl, under the `openssl`/`native-tls` crates, called `ffi::SSL_*`). The TLS
230            // handshake and record I/O run over the peer socket -> Net. Unlike libc read/write, an SSL_*
231            // op is ~always over a network BIO (the rare memory-BIO/sans-IO case is the honest exception
232            // we accept). The crypto surface (EVP_*/SHA*/AES*) and pure setup (SSL_CTX_new/SSL_set_fd) are
233            // NOT here; `BIO_*` is skipped (a BIO may be memory or socket). Validated vs openssl 0.9 source.
234            const SSL_NET: &[&str] = &[
235                "connect", "accept", "do_handshake", "read", "read_ex", "write", "write_ex", "peek",
236                "peek_ex", "shutdown",
237            ];
238            return SSL_NET.contains(&op).then_some("Net");
239        }
240    }
241    // HTTP clients use the same builder pattern as the AWS SDK: only the dispatch is
242    // I/O. (Found by the eval: ebman's reqwest calls to the Anthropic API + webhooks
243    // were silently classified network-free because reqwest wasn't recognized.)
244    if crate_name == "reqwest" || crate_name == "isahc" {
245        if path.ends_with("::send") || path.ends_with("::execute") {
246            return Some("Net");
247        }
248        return None;
249    }
250    if crate_name == "ureq" && path.ends_with("::call") {
251        return Some("Net");
252    }
253    // Message-queue clients fully encapsulate the socket (the underlying tokio::net lives
254    // inside the crate, unseen), so a user's connect/publish/consume calls ARE the I/O
255    // boundary — to a remote broker, hence Net. Match the broker round-trip verbs (snake_case
256    // methods); the CamelCase option/property builders stay pure. (Found hardening on consumer
257    // apps: lapin `basic_publish`/`queue_declare` and async-nats `publish`/`subscribe` were
258    // classified pure — a message-queue client reporting no I/O.)
259    if crate_name == "async_nats" {
260        if path.ends_with("::connect")
261            || path.contains("::publish")
262            || path.ends_with("::subscribe")
263            || path.ends_with("::queue_subscribe")
264            || path.contains("::request")
265            || path.ends_with("::flush")
266        {
267            return Some("Net");
268        }
269        return None;
270    }
271    if crate_name == "lapin" {
272        if path.ends_with("::connect")
273            || path.ends_with("::create_channel")
274            || path.contains("::basic_")
275            || path.contains("::queue_")
276            || path.contains("::exchange_")
277            || path.contains("::tx_")
278            || path.ends_with("::confirm_select")
279            || path.ends_with("::close")
280        {
281            return Some("Net");
282        }
283        return None;
284    }
285    // SMTP email — lettre's `Transport::send` is the network dispatch; Message building is
286    // pure. (Found hardening on a lettre consumer: `mailer.send(&email)` classified pure.)
287    if crate_name == "lettre" {
288        if path.ends_with("::send") || path.ends_with("::send_raw") {
289            return Some("Net");
290        }
291        return None;
292    }
293    // WebSockets — tungstenite (the modern successor to the old `websocket` crate). connect
294    // and the socket read/write/send are network; Message constructors are pure. (Found on a
295    // tungstenite consumer: connect + send + read classified pure.)
296    if crate_name == "tungstenite" {
297        if path.ends_with("::connect")
298            || path.ends_with("::read")
299            || path.ends_with("::write")
300            || path.ends_with("::send")
301            || path.ends_with("::close")
302            || path.ends_with("::flush")
303            || path.ends_with("::read_message")
304            || path.ends_with("::write_message")
305        {
306            return Some("Net");
307        }
308        return None;
309    }
310    // elasticsearch: request builders are pure; only the `.send()` dispatch is HTTP I/O
311    // (same shape as reqwest / the AWS SDK). (Found on an elasticsearch consumer.)
312    if crate_name == "elasticsearch" && path.ends_with("::send") {
313        return Some("Net");
314    }
315    // gRPC — tonic. The transport connect and the Grpc client RPC dispatch are network;
316    // codecs and request/response wrappers are pure. (connect repro-confirmed on a consumer;
317    // the unary/streaming RPC verbs are from the tonic::client::Grpc API.)
318    if crate_name == "tonic" {
319        if path.ends_with("::connect")
320            || path.ends_with("::unary")
321            || path.ends_with("::server_streaming")
322            || path.ends_with("::client_streaming")
323            || path.ends_with("::streaming")
324        {
325            return Some("Net");
326        }
327        return None;
328    }
329    // Kafka — rdkafka (FFI to librdkafka). Producer send + consumer poll/recv/subscribe/
330    // commit are network round-trips to the brokers. (API-calibrated + unit-tested; a real
331    // repro needs librdkafka/cmake, deferred.)
332    if crate_name == "rdkafka" {
333        if path.ends_with("::send")
334            || path.ends_with("::send_result")
335            || path.ends_with("::recv")
336            || path.ends_with("::poll")
337            || path.ends_with("::subscribe")
338            || path.ends_with("::commit")
339            || path.ends_with("::commit_message")
340            || path.ends_with("::commit_consumer_state")
341            || path.ends_with("::store_offset")
342            || path.ends_with("::seek")
343            || path.ends_with("::fetch_metadata")
344            || path.ends_with("::fetch_watermarks")
345            || path.ends_with("::flush")
346        {
347            return Some("Net");
348        }
349        return None;
350    }
351    // cap-std: capability-oriented std. I/O goes *through* a held capability handle
352    // (Dir/Pool/Clock/...), so these calls ARE the effect. Recognising them means a
353    // cap-std project's real I/O is detected and matches the capability it declared
354    // (via `declared_caps`/`capstd_cap`) — conformance against unforgeable capabilities.
355    if crate_name.starts_with("cap_") {
356        if path.contains("::net::Unix") || path.contains("::os::") {
357            return Some("Ipc");
358        }
359        if path.contains("::net") {
360            return Some("Net");
361        }
362        if path.contains("::time") {
363            return Some("Clock");
364        }
365        if path.contains("::fs") || crate_name == "cap_tempfile" || crate_name == "cap_directories" {
366            return Some("Fs");
367        }
368        return None;
369    }
370    // Local IPC (Unix-domain sockets) is I/O but not *network* — keep it distinct so
371    // CANDOR_NO_AMBIENT and audits don't conflate it with internet access. async-std puts its
372    // Unix sockets under `os::unix::net` (mirroring std); async-net (smol's net layer) under
373    // `unix`.
374    if path.starts_with("tokio::net::Unix")
375        || path.starts_with("std::os::unix::net")
376        || path.starts_with("async_std::os::unix::net")
377        || path.starts_with("async_net::unix")
378    {
379        return Some("Ipc");
380    }
381    // Raw sockets. Match the I/O *types* only — `std::net` also holds pure data types
382    // (SocketAddr, IpAddr, …) whose construction must NOT be flagged.
383    if path.starts_with("std::net::TcpStream")
384        || path.starts_with("std::net::TcpListener")
385        || path.starts_with("std::net::UdpSocket")
386        || path.starts_with("tokio::net::")
387    {
388        return Some("Net");
389    }
390    // Legacy tokio 0.1 socket crates — `tokio_tcp`/`tokio_udp` are *entirely* networking
391    // (no pure types to over-flag), so the whole crate is Net. (Found hardening on websocat,
392    // which is still on tokio 0.1: its `tokio_tcp::TcpStream::connect` was classified
393    // network-free — a network tool confidently reporting 0 Net.)
394    if matches!(crate_name, "tokio_tcp" | "tokio_udp") {
395        return Some("Net");
396    }
397    // The other async runtimes mirror tokio's module layout, and their `net` modules hold only
398    // socket I/O types (the pure `SocketAddr`/`IpAddr` are re-exports that resolve to `std::net`,
399    // so they're excluded by def-path). `mio` is the low-level non-blocking-socket layer under
400    // tokio/others; `async_net` is smol's net crate. Closes the async-std/smol/mio gap the
401    // tokio_tcp note flagged. (Calibrated by module structure — these crates ARE networking — not
402    // a live repro; the TCP/UDP types are defined in-crate so the def-path prefix is exact.)
403    if path.starts_with("async_std::net::")
404        || path.starts_with("mio::net::")
405        || crate_name == "async_net"
406    {
407        return Some("Net");
408    }
409    // Database clients. Like the AWS/HTTP builders, only the execution verbs are I/O;
410    // query *construction* is pure. Best-effort across crates (tune via CANDOR_CONFIG).
411    // Note: bare `::query` is deliberately omitted — it executes in postgres/rusqlite but
412    // only *builds* in sqlx, so including it would false-positive sqlx's `query()` builder.
413    if DB_CRATES.contains(&crate_name) {
414        // Postgres / SQLite-family clients: `query`/`batch_execute`/`prepare`/etc. ARE the
415        // execution (round-trips to the server). sqlx is the outlier where bare `query()`
416        // only BUILDS — it keeps the narrow set below. (Found by running on a real
417        // tokio-postgres app, pgman: candor had reported only 4 of ~20 DB call sites.)
418        if matches!(crate_name, "postgres" | "tokio_postgres" | "deadpool_postgres" | "rusqlite") {
419            const PG: [&str; 13] = [
420                "::query", "::query_one", "::query_opt", "::query_raw", "::execute",
421                "::batch_execute", "::simple_query", "::prepare", "::prepare_typed",
422                "::copy_in", "::copy_out", "::transaction", "::connect",
423            ];
424            if PG.iter().any(|v| path.ends_with(v)) {
425                return Some("Db");
426            }
427            return None;
428        }
429        // redis: the way redis is ACTUALLY used is the high-level `Commands`/`AsyncCommands`
430        // traits (`con.get`/`set`/`hset`/`lpush`/…) — every method is a round-trip — plus
431        // connection establishment. The shared VERBS below only catch the low-level
432        // `cmd("GET").query(con)`, so without this a normal redis user's calls classify as
433        // PURE. (Found hardening on redis-rs: a fn doing `con.get`/`set` reported no effects.)
434        if crate_name == "redis"
435            && (path.contains("Commands::")
436                || path.contains("::get_connection")
437                || path.contains("::get_async_connection")
438                || path.contains("::get_multiplexed_async_connection")
439                || path.contains("ConnectionManager")
440                || path.ends_with("::query")
441                || path.ends_with("::query_async")
442                || path.ends_with("::req_command")
443                || path.ends_with("::req_packed_command")
444                || path.ends_with("::req_packed_commands"))
445        {
446            return Some("Db");
447        }
448        // mongodb: a document-store API with none of the SQL verbs — the user calls
449        // `coll.find_one`/`insert_one`/`aggregate`/… and `Client::with_uri_str`. Without
450        // these a mongodb user's calls classify PURE. (Found hardening: a fn doing
451        // `find_one`+`insert_one` reported no effects.) Handle accessors (name/namespace)
452        // and option/doc builders don't match these verbs, so they stay pure.
453        if crate_name == "mongodb" {
454            const MONGO: [&str; 27] = [
455                "::with_uri_str", "::connect", "::find", "::find_one", "::insert_one",
456                "::insert_many", "::update_one", "::update_many", "::delete_one",
457                "::delete_many", "::replace_one", "::aggregate", "::count_documents",
458                "::estimated_document_count", "::count", "::distinct", "::run_command",
459                "::find_one_and_update", "::find_one_and_delete", "::find_one_and_replace",
460                "::list_collections", "::list_collection_names", "::list_databases",
461                "::list_database_names", "::create_collection", "::create_index", "::watch",
462            ];
463            if MONGO.iter().any(|v| path.ends_with(v)) {
464                return Some("Db");
465            }
466            return None;
467        }
468        // mysql / mysql_async: the `query`/`exec` families + `get_conn`/`ping` execute
469        // immediately — no build-then-execute split like sqlx, so matching `::query` is safe
470        // here. Same DB-verb-dialect gap class as redis/mongodb; calibrated from the Queryable
471        // API (unit-tested; a real-app repro is the remaining confirmation).
472        if matches!(crate_name, "mysql" | "mysql_async") {
473            const MY: [&str; 16] = [
474                "::query", "::query_first", "::query_iter", "::query_map", "::query_fold",
475                "::query_drop", "::exec", "::exec_first", "::exec_iter", "::exec_map",
476                "::exec_fold", "::exec_drop", "::exec_batch", "::prep", "::ping", "::get_conn",
477            ];
478            if MY.iter().any(|v| path.ends_with(v)) {
479                return Some("Db");
480            }
481            return None;
482        }
483        // sea_orm: an ORM whose execution is split from building (like sqlx). The query
484        // BUILDERS (`Entity::find`, `Entity::insert`) are pure; execution happens at `.all`/
485        // `.one`/`.count`/`.stream` and `Insert/Update/Delete::exec`. The write path via an
486        // ActiveModel (`model.insert(db)`) executes too — distinguished from the `EntityTrait`
487        // builder by the trait in the path (`ActiveModelTrait::`). (Found hardening on a
488        // sea_orm consumer app: `.all(db)` reads and `ActiveModel::insert` writes were pure.)
489        if crate_name == "sea_orm" {
490            if path.ends_with("::all")
491                || path.ends_with("::one")
492                || path.ends_with("::count")
493                || path.ends_with("::stream")
494                || path.ends_with("::exec")
495                || path.ends_with("::exec_with_returning")
496                || path.ends_with("::exec_without_returning")
497                || path.ends_with("::connect")
498                || path.ends_with("::execute")
499                || path.ends_with("::execute_unprepared")
500                || path.ends_with("::query_one")
501                || path.ends_with("::query_all")
502                || path.ends_with("::fetch_page")
503                || path.ends_with("::num_items")
504                || path.contains("ActiveModelTrait::")
505            {
506                return Some("Db");
507            }
508            return None;
509        }
510        const VERBS: [&str; 16] = [
511            "::execute", "::query_row", "::query_map", "::query_one", "::fetch_one",
512            "::fetch_all", "::fetch_optional", "::fetch", "::connect", "::acquire",
513            "::begin", "::commit", "::rollback", "::load", "::get_result", "::get_results",
514        ];
515        if VERBS.iter().any(|v| path.ends_with(v)) {
516            return Some("Db");
517        }
518        return None;
519    }
520    // Filesystem. `tokio::fs`/`async_std::fs` are the async mirrors of `std::fs`; `async_fs` is
521    // smol's fs crate; `fs_err` is a drop-in `std::fs` wrapper (its whole surface is fs I/O).
522    if path.starts_with("std::fs::")
523        || path.starts_with("tokio::fs::")
524        || path.starts_with("async_std::fs::")
525        || crate_name == "async_fs"
526        || crate_name == "fs_err"
527        || crate_name == "memmap2"
528    {
529        return Some("Fs");
530    }
531    // tempfile: creating a temp file/dir touches the disk. Match the create/persist verbs (the
532    // `Builder` setters — prefix/suffix/rand_bytes — stay pure). `persist`/`keep` rename/retain
533    // the file on disk; `close` removes it.
534    if crate_name == "tempfile"
535        && (path.ends_with("::tempfile")
536            || path.ends_with("::tempfile_in")
537            || path.ends_with("::tempdir")
538            || path.ends_with("::tempdir_in")
539            || path.ends_with("NamedTempFile::new")
540            || path.ends_with("NamedTempFile::new_in")
541            || path.ends_with("TempDir::new")
542            || path.ends_with("TempDir::new_in")
543            || path.ends_with("::persist")
544            || path.ends_with("::persist_noclobber")
545            || path.ends_with("::keep"))
546    {
547        return Some("Fs");
548    }
549    // glob: walks the filesystem to expand a pattern (the returned iterator reads directories).
550    // `Pattern::matches` is pure string matching — match only the directory-walking entry points.
551    if crate_name == "glob" && (path.ends_with("::glob") || path.ends_with("::glob_with")) {
552        return Some("Fs");
553    }
554    // Randomness / entropy. `getrandom`/`fastrand` are effectful end-to-end. `rand` is NOT — it
555    // mixes entropy/generation (effectful) with *pure* distribution constructors (`Uniform::new`,
556    // `Normal::new`) and deterministic-seed constructors (`seed_from_u64`). Flagging the whole crate
557    // over-reported those as `Rand`; match only the calls that actually consume randomness — the
558    // entropy sources (`OsRng`, `thread_rng`/`rng`, `from_entropy`/`from_os_rng`) and the generation
559    // verbs (`gen*`/`random*`/`fill*`/`sample*`/`next_u*`). A `Uniform::new` is now correctly pure.
560    if crate_name == "getrandom" || crate_name == "fastrand" {
561        return Some("Rand");
562    }
563    if crate_name == "rand" {
564        let rng_verb = path.ends_with("::gen")
565            || path.ends_with("::gen_range")
566            || path.ends_with("::gen_bool")
567            || path.ends_with("::gen_ratio")
568            || path.ends_with("::random")
569            || path.ends_with("::random_range")
570            || path.ends_with("::random_bool")
571            || path.ends_with("::random_ratio")
572            || path.ends_with("::random_iter") // rand 0.9 iterator generator
573            || path.ends_with("::gen_iter")
574            || path.ends_with("::fill")
575            || path.ends_with("::fill_bytes")
576            || path.ends_with("::try_fill")
577            || path.ends_with("::try_fill_bytes")
578            || path.ends_with("::sample")
579            || path.ends_with("::sample_iter")
580            || path.ends_with("::next_u32")
581            || path.ends_with("::next_u64")
582            || path.ends_with("::thread_rng")
583            || path.ends_with("::rng")
584            || path.ends_with("::from_entropy")
585            || path.ends_with("::from_os_rng");
586        if rng_verb || path.contains("OsRng") {
587            return Some("Rand");
588        }
589        return None;
590    }
591    // Subprocess spawning. `tokio::process` is the async mirror of `std::process` — it exists
592    // only to spawn/control subprocesses (`Command`/`Child`, no pure data types like std's
593    // `Stdio`/`ExitStatus`/`exit`), so spawning through it is Exec just the same. Without this an
594    // async app's `tokio::process::Command::new(..).spawn()` classified pure — a silent under-report
595    // of subprocess execution, the dangerous direction (mirrors the tokio::fs/tokio::net coverage).
596    if path.starts_with("std::process::Command")
597        || path.starts_with("std::process::Child")
598        || path.starts_with("tokio::process::Command")
599        || path.starts_with("tokio::process::Child")
600        || path.starts_with("async_std::process::Command")
601        || path.starts_with("async_std::process::Child")
602        || crate_name == "async_process"
603        || crate_name == "portable_pty"
604    {
605        return Some("Exec");
606    }
607    // duct: a subprocess-orchestration crate. `cmd()`/`cmd!` only *build* an Expression; the
608    // spawn/wait happens at `run`/`read`/`start`. Match the execution verbs, not the builder.
609    if crate_name == "duct"
610        && (path.ends_with("::run")
611            || path.ends_with("::read")
612            || path.ends_with("::start")
613            || path.ends_with("::read_chars"))
614    {
615        return Some("Exec");
616    }
617    if path.starts_with("std::env::") {
618        return Some("Env");
619    }
620    // dotenvy / dotenv: load environment variables (reading a `.env` file and mutating the process
621    // environment). Match the load/read entry points; `Error`/builder types stay pure.
622    if matches!(crate_name, "dotenvy" | "dotenv")
623        && (path.ends_with("::dotenv")
624            || path.ends_with("::dotenv_override")
625            || path.ends_with("::from_path")
626            || path.ends_with("::from_path_override")
627            || path.ends_with("::from_filename")
628            || path.ends_with("::from_filename_override")
629            || path.ends_with("::from_read")
630            || path.ends_with("::from_read_override")
631            || path.ends_with("::load")
632            || path.ends_with("::var")
633            || path.ends_with("::vars"))
634    {
635        return Some("Env");
636    }
637    // Wall-clock reads. Match the `now` accessor precisely (ends_with), not any path
638    // containing the substring "now". The `time` crate (distinct from `std::time`/`chrono`)
639    // reads the clock via `now_utc`/`now_local` (and the deprecated `Instant::now`).
640    if (crate_name == "chrono" || path.starts_with("std::time::")) && path.ends_with("::now") {
641        return Some("Clock");
642    }
643    if crate_name == "time"
644        && (path.ends_with("::now_utc") || path.ends_with("::now_local") || path.ends_with("::now"))
645    {
646        return Some("Clock");
647    }
648    if crate_name == "tracing" {
649        return Some("Log");
650    }
651    // The `log` facade: its macros route through `log::__private_api`; the crate's types
652    // (`Level`, `LevelFilter`) are pure, so match the logging entry, not the whole crate.
653    if crate_name == "log" && path.contains("::__private_api") {
654        return Some("Log");
655    }
656    // Compiler diagnostic emission — the ONE genuinely effectful operation in the otherwise-pure
657    // rustc_* surface (a dylint lint's actual OUTPUT: it writes warnings/errors to the compiler's
658    // diagnostic sink). Classified `Log` (same family as `tracing`/`log` — program output). Match the
659    // emission verbs precisely; rustc_lint/rustc_errors are mostly pure types (Lint, LintId, the Diag
660    // BUILDERS), and only the terminal `emit`/`emit_span_lint` actually produces output.
661    if crate_name == "rustc_lint"
662        && (path.ends_with("::emit_span_lint")
663            || path.ends_with("::span_lint")
664            || path.ends_with("::span_lint_hir"))
665    {
666        return Some("Log");
667    }
668    if crate_name == "rustc_errors"
669        && (path.ends_with("::emit")
670            || path.ends_with("::emit_diagnostic")
671            || path.ends_with("::emit_now"))
672    {
673        return Some("Log");
674    }
675    if crate_name == "arboard" {
676        return Some("Clipboard");
677    }
678    None
679}
680
681pub fn cap_from_name(name: &str) -> Option<&'static str> {
682    EFFECTS.iter().copied().find(|e| *e == name)
683}
684
685/// Map a cap-std capability *type* to the effect it authorises. Holding one of these
686/// (e.g. `&Dir`) is the real, unforgeable right to perform that effect — so candor
687/// treats it as a declared capability, exactly like its own `&Fs` token.
688pub fn capstd_cap(crate_name: &str, type_name: &str) -> Option<&'static str> {
689    if !crate_name.starts_with("cap_") {
690        return None;
691    }
692    Some(match type_name {
693        "Dir" => "Fs",
694        "TcpListener" | "TcpStream" | "UdpSocket" | "Pool" => "Net",
695        "UnixListener" | "UnixStream" | "UnixDatagram" => "Ipc",
696        "SystemClock" | "MonotonicClock" => "Clock",
697        _ => return None,
698    })
699}