candor_classify/lib.rs
1//! candor-classify — the curated effect classifier (crate+path -> effect), extracted to a STABLE
2//! crate so both the nightly `rustc_private` lint AND a stable backend share ONE source of truth
3//! (no drift). Pure string logic; no rustc internals. The effect vocabulary lives in candor-report.
4
5use candor_report::EFFECTS;
6
7/// The canonical CANDOR_POLICY DSL parser (SPEC §6.2), shared by the nightly gate and candor-query.
8pub mod policy;
9
10/// Project-supplied rules, consulted only when the built-in `classify` returns None.
11pub fn classify_extra(
12 crate_name: &str,
13 path: &str,
14 extra: &[(&'static str, bool, String)],
15) -> Option<&'static str> {
16 for (eff, is_crate, prefix) in extra {
17 let hit = if *is_crate { crate_name.starts_with(prefix.as_str()) } else { path.starts_with(prefix.as_str()) };
18 if hit {
19 return Some(eff);
20 }
21 }
22 None
23}
24
25/// The exact third-party crates `classify` has effect rules for, and the crate-name
26/// PREFIXES it recognizes. This is the single source of truth for "what candor knows":
27/// it is emitted beside the JSON report (`<prefix>.calibrated.json`) so the Claude Code
28/// receipt's coverage check reads candor's real coverage instead of a hand-copied list.
29/// Keep in lockstep with `classify` below — the `db_crates_are_calibrated` and
30/// `calibrated_crates_are_live` tests (in this crate's `tests` module) enforce both directions.
31pub const CALIBRATED_CRATES: [&str; 79] = [
32 // network (aws_config resolves credentials over the network on `.load()`;
33 // git2 remote ops — fetch/push/connect — contact the network; async_net is smol's net layer;
34 // pnet is raw L2/L3 packet capture)
35 "reqwest", "isahc", "ureq", "curl", "aws_config", "git2", "tokio_tcp", "tokio_udp", "async_net",
36 "async_nats", "lapin", "lettre", "tungstenite", "elasticsearch", "tonic", "rdkafka", "pnet",
37 // directory traversal (ignore = gitignore-aware walker, powers ripgrep/fd; its walk executors are Fs)
38 // + filesystem watching (notify = inotify/FSEvents/kqueue wrapper; powers watchexec/cargo-watch)
39 "ignore", "notify",
40 // database (see DB_CRATES in classify)
41 "sqlx", "rusqlite", "postgres", "tokio_postgres", "diesel", "redis", "mongodb",
42 "mysql", "mysql_async", "sea_orm", "deadpool_postgres",
43 // filesystem (async_fs = smol; fs_err = std::fs wrapper; tempfile; glob) / entropy /
44 // subprocess (async_process = smol; duct) / env (dotenvy/dotenv) / clock (time) / log / clipboard
45 "memmap2", "fs_err", "async_fs", "tempfile", "glob",
46 "rand", "getrandom", "fastrand",
47 // entropy: the password-hashing tier (salt mints + bcrypt's internal salt) + the OsRng source
48 "argon2", "bcrypt", "scrypt", "pbkdf2", "password_hash", "rand_core",
49 "portable_pty", "async_process", "duct",
50 "dotenvy", "dotenv",
51 "chrono", "time", "tracing", "log", "arboard",
52 // compiler diagnostic emission (a dylint lint's output) — see the Log rules in classify
53 "rustc_lint", "rustc_errors",
54 // raw syscalls via FFI — the syscall-name table that lights up the FFI-thin tier (nix is routed
55 // through the same table by leaf name, so a consumer of nix is covered without nix's own source)
56 "libc", "nix", "rustix",
57 // coverage-differential additions (verb-keyed; see the per-crate rules near the end of classify):
58 // sync TLS core + native-tls variants (Net); env/dir resolution + argv + LS_COLORS (Env);
59 // sqlx-core execution terminals (Net/Db); directory walk + timestamp mutation + same-file (Fs);
60 // process-spawn helpers (Exec); signal handler + interactive-tty prompts (Ipc); env_logger (Log);
61 // jiff/backoff clock reads (Clock).
62 "rustls", "native_tls_crate", "tokio_native_tls",
63 "etcetera", "wild", "lscolors",
64 "sqlx_core", "walkdir", "filetime", "clircle",
65 "execute", "ctrlc", "clap", "jiff", "env_logger",
66 "dialoguer", "console", "terminal_colorsaurus", "backoff", "grep_cli",
67];
68
69pub const CALIBRATED_PREFIXES: [&str; 3] = ["aws_sdk_", "aws_smithy", "cap_"];
70
71/// Crates `classify` matches by PATH prefix rather than crate-name equality (their effectful modules
72/// are recognised, e.g. `tokio::net::`/`async_std::fs::`/`mio::net::`), so they're absent from
73/// `CALIBRATED_CRATES` (which the liveness test probes by crate name). The coverage check must still
74/// treat them as *covered* — otherwise it would mislabel the most common async crates as blind spots.
75pub const PATH_CALIBRATED_CRATES: [&str; 3] = ["tokio", "async_std", "mio"];
76
77/// Representative path tails (each appended to a crate name) that the `calibrated_crates_are_live`
78/// liveness test probes: at least one must match for every `CALIBRATED_CRATES` entry, else the entry is
79/// dead. Exported as ONE source of truth because the nightly lint crate (`src/lib.rs`) runs the SAME
80/// liveness test — when the two probe lists were duplicated they drifted, and a rule keyed on a
81/// distinctive tail (pnet `::datalink::channel`, ignore `::WalkBuilder::build_parallel`, notify
82/// `::RecommendedWatcher::new`) added to only one list silently broke the other crate's `cargo test`.
83pub const CALIBRATION_PROBE_TAILS: &[&str] = &[
84 "::X::send", "::X::execute", "::X::call", "::X::query", "::X::fetch_one", "::Remote::fetch",
85 "::datalink::channel", "::WalkBuilder::build_parallel", "::RecommendedWatcher::new",
86 "::X::connect", "::Utc::now", "::X::load", "::__private_api::log", "::tempfile", "::glob",
87 "::X::run", "::dotenv", "::random", "::emit", "::X::emit_span_lint", "::X::anything",
88 "::SaltString::generate", "::hash", "::OsRng::fill_bytes",
89 // verb-precise crates whose whole-crate rules were narrowed to the effectful surface (the pure
90 // accessors/ctors/data-types now return None), so the liveness probe must name an EFFECTFUL path:
91 "::Mmap::map", "::event", "::u32", "::Clipboard::get_text", "::spawn_command",
92 // coverage-differential crates (each needs ≥1 effectful tail; existing tails already cover
93 // native_tls_crate/tokio_native_tls/sqlx_core via ::X::connect, execute via ::X::execute, jiff via ::now):
94 "::read_tls", "::home_dir", "::args", "::from_env", "::IntoIter::next", "::set_file_mtime",
95 "::surely_conflicts_with", "::set_handler", "::get_matches", "::init", "::interact",
96 "::write_line", "::background_color", "::retry", "::build",
97];
98
99/// Database client crates whose execution verbs are I/O (see the DB branch in `classify`).
100/// Module-level so `db_crates_are_calibrated` can enforce `DB_CRATES ⊆ CALIBRATED_CRATES`.
101pub const DB_CRATES: [&str; 11] = [
102 "sqlx", "rusqlite", "postgres", "tokio_postgres", "diesel", "redis", "mongodb",
103 "mysql", "mysql_async", "sea_orm", "deadpool_postgres",
104];
105
106/// Pure file-descriptor *ownership-transfer* leaves. These ADOPT an already-open descriptor
107/// (`from_raw_fd`/`from_raw_socket`/`from_raw_handle`), EXTRACT/BORROW one
108/// (`into_raw_fd`/`into_raw_socket`/`into_raw_handle`, `as_raw_fd`/`as_raw_socket`/`as_raw_handle`),
109/// or UNWRAP an async wrapper back to its std type (`into_std`) — none of them issue a syscall or
110/// perform I/O. candor's cardinal sin is calling a PURE function effectful, and these collide with the
111/// coarse std-type PREFIX rules (`std::net::TcpStream`/`std::fs::File`/`std::os::unix::net` → Net/Fs/Ipc)
112/// even though the descriptor was opened ELSEWHERE. The portable_pty/async_process Exec rule already
113/// exempts `from_raw_fd`; this generalises the same carve-out across the net/fs/ipc prefix rules.
114/// (Found by a real-world sweep of tokio: `TcpStream::into_std`, `*::from_raw_fd`, `*::as_raw_fd` all
115/// fabricated Net/Fs/Ipc.)
116const PURE_FD_TRANSFER: &[&str] = &[
117 "from_raw_fd", "from_raw_socket", "from_raw_handle",
118 "into_raw_fd", "into_raw_socket", "into_raw_handle",
119 "as_raw_fd", "as_raw_socket", "as_raw_handle",
120 "into_std",
121 // `SocketAddr::from_pathname` (std/async-std unix net) builds an address STRUCT from a path —
122 // it opens no socket. The `std::os::unix::net` prefix rule below would otherwise fabricate Ipc
123 // on it. (Found sweeping socket2: `SockAddr::as_unix` → `from_pathname` reported Ipc.)
124 "from_pathname",
125];
126
127/// Classify a resolved callee by the crate it belongs to and its full path.
128pub fn classify(crate_name: &str, path: &str) -> Option<&'static str> {
129 // Pure fd ownership-transfer/extraction leaves are never an effect, regardless of which std I/O
130 // type they hang off — exempt them BEFORE the coarse prefix rules can fabricate Net/Fs/Ipc.
131 if PURE_FD_TRANSFER.contains(&path.rsplit("::").next().unwrap_or(path)) {
132 return None;
133 }
134 if crate_name.starts_with("aws_sdk_") || crate_name.starts_with("aws_smithy") {
135 // Only request dispatch is network I/O; builder setters/accessors are pure.
136 if path.ends_with("::send") || path.ends_with("::send_with") {
137 return Some("Net");
138 }
139 return None;
140 }
141 // aws-config resolves credentials/region on `.load()` — it reaches the IMDS metadata
142 // endpoint / STS over the network (and reads ~/.aws + env). Builders (`defaults()`,
143 // `SdkConfig::builder()`, `BehaviorVersion::latest()`) are pure; the `load` is the I/O.
144 // (Found hardening on a real app, ebman: `builder.load().await` was classified pure.)
145 if crate_name == "aws_config" {
146 if path.ends_with("::load") || path.ends_with("::load_defaults") {
147 return Some("Net");
148 }
149 return None;
150 }
151 // git2 (libgit2 FFI): remote operations contact the network; everything else is local
152 // to the .git directory. Match the remote verbs precisely — NOT bare `::clone`, which is
153 // the `Clone`-trait dup of a `Remote` handle (pure), not `Repository::clone`. (Found
154 // hardening on gitui: `remote.fetch`/`remote.push` were classified network-free — a git
155 // client reporting it makes no network calls.)
156 if crate_name == "git2" {
157 if path.ends_with("::fetch")
158 || path.ends_with("::push")
159 || path.ends_with("::download")
160 || path.ends_with("::connect")
161 || path.ends_with("::connect_auth")
162 || path.ends_with("::ls")
163 || path.ends_with("::upload")
164 {
165 return Some("Net");
166 }
167 return None;
168 }
169 // libc — raw syscalls via FFI. The FFI-thin tier (nix, and the syscall layer beneath rusqlite/git2)
170 // is invisible to a name classifier unless we model libc directly: a 35-crate calibration
171 // (eval/calibration) showed nix reporting ZERO library effects because every wrapper bottoms out in
172 // an unrecognised `libc::*` call. Classify by syscall name, but ONLY the UNAMBIGUOUS ones — the
173 // socket family is Net, path/dir syscalls are Fs, spawn/exec/wait is Exec, SysV/pipe IPC is Ipc,
174 // env/clock/entropy each their own. We deliberately SKIP the generic file-descriptor ops
175 // (read/write/close/lseek/dup/fcntl/ioctl/poll/select/epoll*/mmap): they operate on ANY fd — file,
176 // socket, or pipe — so a fixed label would mis-categorise as often as it helps. An honest
177 // no-classify (under-report) beats emitting the WRONG effect. Pure conversions (htons/inet_pton/
178 // gmtime) are also skipped.
179 //
180 // `nix` (the idiomatic SAFE libc wrapper, in ~every Rust systems/CLI crate) is routed through the
181 // SAME table: its functions keep the syscall leaf name (`nix::fcntl::open`, `nix::sys::socket::connect`,
182 // `nix::unistd::execvp`). Without this, a CONSUMER of nix analysed without nix's own source (the
183 // stable scanner, single-crate) sees `nix::*` cross-crate and under-reports — serialport-rs opens its
184 // device via `nix::fcntl::open` and reported ZERO Fs. The nightly lint reaches `libc::*` THROUGH nix's
185 // body; this gives the scanner the same coverage directly. (Found sweeping serialport-rs.)
186 // `rustix` is the same shape as nix but does RAW syscalls (no libc underneath), so its functions MUST
187 // be classified directly. Its leaf names are the syscall names too (`rustix::time::clock_settime`,
188 // `rustix::fs::mkfifoat`/`symlink`/`stat`, `rustix::net::connect`) — route it through the same table.
189 // The rustix-specific `*at`/variant leaves it doesn't share with libc just under-report (the safe
190 // direction). VALIDATED, not speculative: coreutils' `date` reads/sets the clock via
191 // `rustix::time::clock_getres`/`clock_settime` and reported Clock=0; the file I/O that goes through
192 // std::fs was already correct, which is why only the rustix-only effects (Clock/Ipc) were missing.
193 if crate_name == "libc" || crate_name == "nix" || crate_name == "rustix" {
194 let f = path.rsplit("::").next().unwrap_or(path);
195 // path / directory / metadata syscalls (incl. *64 and *at variants)
196 const FS: &[&str] = &[
197 "open", "open64", "openat", "openat2", "creat", "creat64", "stat", "stat64", "lstat",
198 "lstat64", "fstatat", "fstatat64", "newfstatat", "statx", "access", "faccessat",
199 "faccessat2", "mkdir", "mkdirat", "rmdir", "unlink", "unlinkat", "rename", "renameat",
200 "renameat2", "link", "linkat", "symlink", "symlinkat", "readlink", "readlinkat", "chmod",
201 "fchmodat", "chown", "lchown", "fchownat", "truncate", "truncate64", "ftruncate",
202 "ftruncate64", "opendir", "fdopendir", "readdir", "readdir64", "readdir_r", "closedir",
203 "rewinddir", "seekdir", "telldir", "scandir", "mkstemp", "mkstemps", "mkostemp", "mkdtemp",
204 "mknod", "mknodat", "chdir", "fchdir", "getcwd", "get_current_dir_name", "chroot",
205 "pivot_root", "statfs", "statfs64", "fstatfs", "fstatfs64", "statvfs", "fstatvfs", "mount",
206 "umount", "umount2", "fsync", "fdatasync", "sync", "syncfs", "sync_file_range", "fallocate",
207 "posix_fallocate", "posix_fadvise", "sendfile", "sendfile64", "copy_file_range", "flock",
208 "getdents", "getdents64", "utime", "utimes", "lutimes", "futimens", "utimensat", "futimesat",
209 "realpath",
210 ];
211 // socket family — these operate only on sockets, so Net is unambiguous (AF_UNIX domain isn't
212 // visible at the call, so a Unix socket reads as Net rather than Ipc; acceptable over-general).
213 const NET: &[&str] = &[
214 "socket", "setsockopt", "getsockopt", "bind", "listen", "accept", "accept4", "connect",
215 "shutdown", "send", "sendto", "sendmsg", "sendmmsg", "recv", "recvfrom", "recvmsg",
216 "recvmmsg", "getpeername", "getsockname", "getaddrinfo", "freeaddrinfo", "getnameinfo",
217 ];
218 // process creation / replacement / reaping
219 const EXEC: &[&str] = &[
220 "fork", "vfork", "clone", "clone3", "execl", "execlp", "execle", "execv", "execvp",
221 "execvpe", "execve", "execveat", "fexecve", "posix_spawn", "posix_spawnp", "system",
222 "popen", "pclose", "wait", "waitpid", "wait3", "wait4", "waitid",
223 ];
224 // pipes / FIFOs / SysV + POSIX message queues, semaphores, shared memory; socketpair (AF_UNIX)
225 const IPC: &[&str] = &[
226 "pipe", "pipe2", "mkfifo", "mkfifoat", "socketpair", "msgget", "msgsnd", "msgrcv", "msgctl",
227 "semget", "semop", "semtimedop", "semctl", "shmget", "shmat", "shmdt", "shmctl", "mq_open",
228 "mq_send", "mq_receive", "mq_timedsend", "mq_timedreceive", "mq_close", "mq_unlink",
229 ];
230 const ENV: &[&str] = &["getenv", "secure_getenv", "setenv", "putenv", "unsetenv", "clearenv"];
231 const CLOCK: &[&str] = &[
232 "time", "gettimeofday", "clock_gettime", "clock_getres", "nanosleep", "clock_nanosleep",
233 // SETTING the system clock is a clock effect too (was unclassified — found on coreutils `date`,
234 // which sets it via `clock_settime`).
235 "clock_settime", "settimeofday", "stime", "adjtime", "adjtimex", "clock_adjtime",
236 ];
237 const RAND: &[&str] = &["getrandom", "getentropy", "arc4random", "arc4random_buf", "arc4random_uniform"];
238 if FS.contains(&f) {
239 return Some("Fs");
240 }
241 if NET.contains(&f) {
242 return Some("Net");
243 }
244 if EXEC.contains(&f) {
245 return Some("Exec");
246 }
247 if IPC.contains(&f) {
248 return Some("Ipc");
249 }
250 if ENV.contains(&f) {
251 return Some("Env");
252 }
253 if CLOCK.contains(&f) {
254 return Some("Clock");
255 }
256 if RAND.contains(&f) {
257 return Some("Rand");
258 }
259 return None;
260 }
261 // C-library FFI bindings: libsqlite3 (under rusqlite) and libgit2 (under git2). Like the libc tier,
262 // these crates are thin Rust over a C library, so their real I/O is invisible until the C entry
263 // points are named. Match by the DISTINCTIVE C function name (`sqlite3_*` / `git_*`) via the call's
264 // LEAF — independent of the binding crate's alias: rusqlite calls `ffi::sqlite3_step`, git2 calls
265 // `raw::git_remote_fetch`, and the nightly lint resolves the same to `libsqlite3_sys`/`libgit2_sys`;
266 // all spellings share the leaf. Only the I/O-performing entry points are listed — the in-memory
267 // accessors (`sqlite3_bind_*`/`sqlite3_column_*`, `git_*_oid`/strarray/options builders) stay pure,
268 // so a non-listed `sqlite3_`/`git_` leaf returns None (under-report, never a wrong effect). Calibrated
269 // + validated against rusqlite 0.39 / git2 0.20 source (eval/calibration).
270 {
271 let leaf = path.rsplit("::").next().unwrap_or(path);
272 if let Some(rest) = leaf.strip_prefix("sqlite3_") {
273 let _ = rest;
274 // SQLite C API operations that touch the database (open/exec/step/prepare/backup/blob/wal).
275 const DB: &[&str] = &[
276 "sqlite3_open", "sqlite3_open_v2", "sqlite3_open16", "sqlite3_close", "sqlite3_close_v2",
277 "sqlite3_exec", "sqlite3_step", "sqlite3_prepare", "sqlite3_prepare_v2",
278 "sqlite3_prepare_v3", "sqlite3_prepare16", "sqlite3_prepare16_v2", "sqlite3_prepare16_v3",
279 "sqlite3_get_table", "sqlite3_backup_init", "sqlite3_backup_step", "sqlite3_backup_finish",
280 "sqlite3_blob_open", "sqlite3_blob_read", "sqlite3_blob_write", "sqlite3_blob_reopen",
281 "sqlite3_load_extension", "sqlite3_wal_checkpoint", "sqlite3_wal_checkpoint_v2",
282 ];
283 return DB.contains(&leaf).then_some("Db");
284 }
285 if leaf.starts_with("git_") {
286 // libgit2: remote/transport operations contact the network … (incl. submodule clone/update,
287 // which `git_clone`/fetch the subrepo over its remote — `allow_fetch` defaults on; an A/B on
288 // git2 0.20 caught `Submodule::update`/`clone` reporting no `Net`).
289 const NET: &[&str] = &[
290 "git_clone", "git_remote_connect", "git_remote_connect_ext", "git_remote_fetch",
291 "git_remote_download", "git_remote_upload", "git_remote_push", "git_remote_ls",
292 "git_submodule_clone", "git_submodule_update",
293 ];
294 // … and repository/index/odb/checkout/ref/config operations touch the on-disk .git store.
295 const FS: &[&str] = &[
296 "git_repository_open", "git_repository_open_ext", "git_repository_open_bare",
297 "git_repository_init", "git_repository_init_ext", "git_repository_discover",
298 "git_checkout_tree", "git_checkout_head", "git_checkout_index", "git_index_read",
299 "git_index_write", "git_index_write_tree", "git_index_write_tree_to",
300 "git_index_add_bypath", "git_index_add_all", "git_odb_open", "git_odb_read",
301 "git_odb_write", "git_odb_open_wstream", "git_odb_open_rstream",
302 "git_blob_create_fromdisk", "git_blob_create_fromworkdir", "git_blob_create_from_disk",
303 "git_blob_create_from_workdir", "git_blob_create_from_stream", "git_commit_create",
304 "git_commit_create_v", "git_reference_create", "git_reference_set_target",
305 "git_reference_delete", "git_config_open_default", "git_config_open_ondisk",
306 "git_config_add_file_ondisk", "git_tag_create", "git_treebuilder_write",
307 "git_packbuilder_write",
308 ];
309 if NET.contains(&leaf) {
310 return Some("Net");
311 }
312 if FS.contains(&leaf) {
313 return Some("Fs");
314 }
315 return None;
316 }
317 if leaf.starts_with("curl_") {
318 // libcurl (under the `curl` crate, called `curl_sys::curl_*`). Only the entry points that
319 // PERFORM network I/O: the blocking transfer (`curl_easy_perform`), raw socket send/recv,
320 // the HTTP/2 keepalive PING (`upkeep`), and the multi-interface transfer pumps. The large
321 // pure surface (setopt/init/cleanup/reset/getinfo/escape/multi_add_handle/fdset/info_read)
322 // stays unclassified, as do `curl_multi_wait`/`poll` (readiness WAIT on sockets, no payload —
323 // the loop's `perform` is the tagged boundary, per the I/O-boundary principle). An A/B on
324 // curl 0.4 caught the whole crate reporting ZERO Net (`Easy::perform` read as pure).
325 const NET: &[&str] = &[
326 "curl_easy_perform", "curl_easy_send", "curl_easy_recv", "curl_easy_upkeep",
327 "curl_multi_perform", "curl_multi_socket_action",
328 ];
329 return NET.contains(&leaf).then_some("Net");
330 }
331 if let Some(op) = leaf.strip_prefix("SSL_") {
332 // OpenSSL (libssl, under the `openssl`/`native-tls` crates, called `ffi::SSL_*`). The TLS
333 // handshake and record I/O run over the peer socket -> Net. Unlike libc read/write, an SSL_*
334 // op is ~always over a network BIO (the rare memory-BIO/sans-IO case is the honest exception
335 // we accept). The crypto surface (EVP_*/SHA*/AES*) and pure setup (SSL_CTX_new/SSL_set_fd) are
336 // NOT here; `BIO_*` is skipped (a BIO may be memory or socket). Validated vs openssl 0.9 source.
337 const SSL_NET: &[&str] = &[
338 "connect", "accept", "do_handshake", "read", "read_ex", "write", "write_ex", "peek",
339 "peek_ex", "shutdown",
340 ];
341 return SSL_NET.contains(&op).then_some("Net");
342 }
343 }
344 // HTTP clients use the same builder pattern as the AWS SDK: only the dispatch is
345 // I/O. (Found by the eval: ebman's reqwest calls to the Anthropic API + webhooks
346 // were silently classified network-free because reqwest wasn't recognized.)
347 if crate_name == "reqwest" || crate_name == "isahc" {
348 // The builder chain is pure; the dispatch (`::send`/`::execute`) is the I/O. PLUS the one-shot
349 // CONVENIENCE functions `reqwest::get` / `reqwest::blocking::get` / `isahc::get`, which send
350 // immediately — they're not the `Client::get` builder (a different path, `reqwest::Client::get`),
351 // so an exact match avoids false-positiving the builder. (Found running on `xh`: a one-shot
352 // `reqwest::get(url)` was classified network-free.)
353 if path.ends_with("::send")
354 || path.ends_with("::execute")
355 || path == "reqwest::get"
356 || path == "reqwest::blocking::get"
357 || path == "isahc::get"
358 {
359 return Some("Net");
360 }
361 return None;
362 }
363 if crate_name == "ureq" && path.ends_with("::call") {
364 return Some("Net");
365 }
366 // The `curl` crate (libcurl's safe binding — cargo's own HTTP client): the dispatch verbs are
367 // `perform` (Easy/Easy2/Transfer/Multi), raw-socket `send`/`recv`, the keepalive `upkeep`, and the
368 // multi-interface `action` (socket_action). The big setopt-style builder surface stays pure.
369 // `Multi::timeout` is deliberately NOT matched: `Easy::timeout` is a pure CURLOPT_TIMEOUT setter
370 // sharing the leaf — an under-report on the rare event-loop kick beats mis-tagging every consumer
371 // that sets a timeout. (Consumer-side companion to the curl_* FFI tier, same A/B finding.)
372 if crate_name == "curl"
373 && (path.ends_with("::perform")
374 || path.ends_with("::send")
375 || path.ends_with("::recv")
376 || path.ends_with("::upkeep")
377 || path.ends_with("::action"))
378 {
379 return Some("Net");
380 }
381 // The modern async-HTTP / TLS / QUIC / DNS stack — the LAYER reqwest/ureq/isahc build on, and that
382 // crates use DIRECTLY. Found by the independent-method differential on `oha` (2026-06-17): candor
383 // honestly DISCLOSED these as blind but never CLASSIFIED them, leaving real Net reaches uncovered.
384 // Verb-keyed (the pure type/builder/codec surface stays None) and CRATE-GATED, so generic verbs
385 // (request/connect/get/read/write/accept) never fabricate across unrelated crates. Same precision
386 // discipline as the reqwest/curl rules above; complements the scan_builder_entry_effect entries.
387 match crate_name {
388 // hyper 1.x client connection I/O (the builder/Body/Request types stay pure).
389 "hyper" if path.ends_with("::send_request") || path.ends_with("::handshake") => return Some("Net"),
390 // hyper-util's pooled legacy Client + its TCP connectors.
391 "hyper_util" if path.ends_with("::request") || path.ends_with("::connect") => return Some("Net"),
392 // hickory (trust-dns) resolver — issues DNS queries over the network.
393 "hickory_resolver"
394 if path.ends_with("::lookup_ip") || path.ends_with("::lookup") || path.ends_with("_lookup")
395 || path.ends_with("::resolve") => return Some("Net"),
396 // HTTP/3 over QUIC.
397 "h3" if path.ends_with("::send_request") || path.ends_with("::recv_data")
398 || path.ends_with("::recv_response") || path.ends_with("::send_data") => return Some("Net"),
399 // QUIC transport (UDP socket send/recv): connection setup, datagrams, AND the stream byte I/O
400 // (`RecvStream::read*` / `SendStream::write*` / `finish`). Opening a stream is caught above, but a
401 // fn that only HOLDS a stream and reads/writes it would otherwise read silent-pure (review: a Net
402 // under-report). Crate-gated to quinn, where these verbs are unambiguously the socket I/O.
403 "quinn" if path.ends_with("::connect") || path.ends_with("::accept") || path.ends_with("::open_bi")
404 || path.ends_with("::open_uni") || path.ends_with("::accept_bi") || path.ends_with("::accept_uni")
405 || path.ends_with("::send_datagram") || path.ends_with("::read_datagram")
406 || path.ends_with("::read") || path.ends_with("::read_chunk") || path.ends_with("::read_chunks")
407 || path.ends_with("::read_to_end") || path.ends_with("::write") || path.ends_with("::write_all")
408 || path.ends_with("::write_chunk") || path.ends_with("::write_chunks")
409 || path.ends_with("::finish") => return Some("Net"),
410 // TLS-over-TCP stream adapters — the actual socket handshake/I/O (the config/cert types stay pure).
411 "tokio_rustls" | "native_tls"
412 if path.ends_with("::connect") || path.ends_with("::accept") || path.ends_with("::handshake") =>
413 return Some("Net"),
414 // AF_VSOCK host<->guest sockets — inter-process / VM comms.
415 "tokio_vsock" if path.ends_with("::connect") || path.ends_with("::bind") || path.ends_with("::accept") =>
416 return Some("Ipc"),
417 // Loads the OS trust store from disk (cert files / keychain).
418 "rustls_native_certs" if path.ends_with("::load_native_certs") => return Some("Fs"),
419 // `rlimit` reads/mutates the process's kernel resource limits — the closest bucket is Env (host/
420 // process config); no dedicated process-state bucket exists, so getrlimit (read) and setrlimit
421 // (mutate) share it. NOTE: `num_cpus::get`/`get_physical` are deliberately NOT modeled — asking the
422 // OS for the CPU count is a near-pure topology query, and std's equivalent `thread::
423 // available_parallelism` classifies pure; modeling it as Env would spray Env over every thread-pool
424 // constructor (review: a high-noise over-report) for no capability a reviewer cares about.
425 "rlimit" if path.ends_with("::getrlimit") || path.ends_with("::setrlimit")
426 || path.ends_with("::increase_nofile_limit") => return Some("Env"),
427 // rustls — the SYNC TLS core (tokio_rustls/native_tls above are the async/system adapters). The
428 // record-layer I/O is `read_tls`/`write_tls` (pull/push raw bytes through a held `io::Read`/`Write`)
429 // and `complete_io` (loops them until the handshake/buffers drain). The config/cert/builder types
430 // (`ClientConfig`/`ServerConfig`/`ConfigBuilder`) are PURE. `process_new_packets` is deliberately
431 // EXCLUDED — it only decrypts ALREADY-buffered bytes (no socket touch; docs say call it AFTER
432 // read_tls), so flagging it would over-report Net on the pure decrypt step.
433 "rustls" if path.ends_with("::read_tls") || path.ends_with("::write_tls")
434 || path.ends_with("::complete_io") => return Some("Net"),
435 // native-tls under its alternate crate name + the tokio async wrapper (the `native_tls` arm above
436 // is the common name). The TLS handshake over a TcpStream is Net; the builder/cert types are pure.
437 "native_tls_crate" | "tokio_native_tls"
438 if path.ends_with("::connect") || path.ends_with("::accept")
439 || path.ends_with("::handshake") => return Some("Net"),
440 _ => {}
441 }
442 // Message-queue clients fully encapsulate the socket (the underlying tokio::net lives
443 // inside the crate, unseen), so a user's connect/publish/consume calls ARE the I/O
444 // boundary — to a remote broker, hence Net. Match the broker round-trip verbs (snake_case
445 // methods); the CamelCase option/property builders stay pure. (Found hardening on consumer
446 // apps: lapin `basic_publish`/`queue_declare` and async-nats `publish`/`subscribe` were
447 // classified pure — a message-queue client reporting no I/O.)
448 if crate_name == "async_nats" {
449 if path.ends_with("::connect")
450 || path.contains("::publish")
451 || path.ends_with("::subscribe")
452 || path.ends_with("::queue_subscribe")
453 || path.contains("::request")
454 || path.ends_with("::flush")
455 {
456 return Some("Net");
457 }
458 return None;
459 }
460 if crate_name == "lapin" {
461 if path.ends_with("::connect")
462 || path.ends_with("::create_channel")
463 || path.contains("::basic_")
464 || path.contains("::queue_")
465 || path.contains("::exchange_")
466 || path.contains("::tx_")
467 || path.ends_with("::confirm_select")
468 || path.ends_with("::close")
469 {
470 return Some("Net");
471 }
472 return None;
473 }
474 // SMTP email — lettre's `Transport::send` is the network dispatch; Message building is
475 // pure. (Found hardening on a lettre consumer: `mailer.send(&email)` classified pure.)
476 if crate_name == "lettre" {
477 if path.ends_with("::send") || path.ends_with("::send_raw") {
478 return Some("Net");
479 }
480 return None;
481 }
482 // WebSockets — tungstenite (the modern successor to the old `websocket` crate). connect
483 // and the socket read/write/send are network; Message constructors are pure. (Found on a
484 // tungstenite consumer: connect + send + read classified pure.)
485 if crate_name == "tungstenite" {
486 if path.ends_with("::connect")
487 || path.ends_with("::read")
488 || path.ends_with("::write")
489 || path.ends_with("::send")
490 || path.ends_with("::close")
491 || path.ends_with("::flush")
492 || path.ends_with("::read_message")
493 || path.ends_with("::write_message")
494 {
495 return Some("Net");
496 }
497 return None;
498 }
499 // elasticsearch: request builders are pure; only the `.send()` dispatch is HTTP I/O
500 // (same shape as reqwest / the AWS SDK). (Found on an elasticsearch consumer.)
501 if crate_name == "elasticsearch" && path.ends_with("::send") {
502 return Some("Net");
503 }
504 // gRPC — tonic. The transport connect and the Grpc client RPC dispatch are network;
505 // codecs and request/response wrappers are pure. (connect repro-confirmed on a consumer;
506 // the unary/streaming RPC verbs are from the tonic::client::Grpc API.)
507 if crate_name == "tonic" {
508 if path.ends_with("::connect")
509 || path.ends_with("::unary")
510 || path.ends_with("::server_streaming")
511 || path.ends_with("::client_streaming")
512 || path.ends_with("::streaming")
513 {
514 return Some("Net");
515 }
516 return None;
517 }
518 // Kafka — rdkafka (FFI to librdkafka). Producer send + consumer poll/recv/subscribe/
519 // commit are network round-trips to the brokers. (API-calibrated + unit-tested; a real
520 // repro needs librdkafka/cmake, deferred.)
521 if crate_name == "rdkafka" {
522 if path.ends_with("::send")
523 || path.ends_with("::send_result")
524 || path.ends_with("::recv")
525 || path.ends_with("::poll")
526 || path.ends_with("::subscribe")
527 || path.ends_with("::commit")
528 || path.ends_with("::commit_message")
529 || path.ends_with("::commit_consumer_state")
530 || path.ends_with("::store_offset")
531 || path.ends_with("::seek")
532 || path.ends_with("::fetch_metadata")
533 || path.ends_with("::fetch_watermarks")
534 || path.ends_with("::flush")
535 {
536 return Some("Net");
537 }
538 return None;
539 }
540 // cap-std: capability-oriented std. I/O goes *through* a held capability handle
541 // (Dir/Pool/Clock/...), so these calls ARE the effect. Recognising them means a
542 // cap-std project's real I/O is detected and matches the capability it declared
543 // (via `declared_caps`/`capstd_cap`) — conformance against unforgeable capabilities.
544 if crate_name.starts_with("cap_") {
545 if path.contains("::net::Unix") || path.contains("::os::") {
546 return Some("Ipc");
547 }
548 if path.contains("::net") {
549 return Some("Net");
550 }
551 if path.contains("::time") {
552 return Some("Clock");
553 }
554 if path.contains("::fs") || crate_name == "cap_tempfile" || crate_name == "cap_directories" {
555 return Some("Fs");
556 }
557 return None;
558 }
559 // Local IPC (Unix-domain sockets) is I/O but not *network* — keep it distinct so
560 // CANDOR_NO_AMBIENT and audits don't conflate it with internet access. async-std puts its
561 // Unix sockets under `os::unix::net` (mirroring std); async-net (smol's net layer) under
562 // `unix`.
563 if path.starts_with("tokio::net::Unix")
564 || path.starts_with("std::os::unix::net")
565 || path.starts_with("async_std::os::unix::net")
566 || path.starts_with("async_net::unix")
567 {
568 return Some("Ipc");
569 }
570 // Raw packet capture / raw sockets — libpnet (the dominant low-level networking crate; powers
571 // bandwhich, sniffers, custom-protocol tools). `datalink::channel` opens an L2 socket and
572 // `transport::transport_channel` an L3/L4 raw socket — both ARE network I/O. Packet construction
573 // (pnet_packet / pnet_base, MacAddr, Ethernet frames…) is pure and stays unclassified. The actual
574 // frame read/write happens via methods on the returned Sender/Receiver (trait-object dispatch the
575 // syntactic backend can't resolve), so the channel-open call is the precise Net boundary. (Found
576 // scanning bandwhich — a packet sniffer — which reported Net 0.)
577 if crate_name == "pnet" || crate_name == "pnet_datalink" || crate_name == "pnet_transport" {
578 if path.ends_with("::channel") || path.ends_with("::transport_channel") {
579 return Some("Net");
580 }
581 return None;
582 }
583 // Directory traversal — `ignore` (BurntSushi's gitignore-aware walker; powers ripgrep, fd). The walk
584 // EXECUTORS read the directory tree from disk = Fs. Type-precise on purpose: the configuration builders
585 // (`OverrideBuilder::build`, `GitignoreBuilder::build`, the `WalkBuilder` setters) and `DirEntry`
586 // accessors are PURE — only `WalkBuilder::build`/`build_parallel` (which kick off the walk) and
587 // `WalkParallel::run` (which drives it) touch the filesystem. A bare `build` would wrongly flag the
588 // config builders. (Found scanning fd — a file finder — which reported Fs 2: its own `fs::read_dir`
589 // was caught, but the `ignore`-based traversal that IS fd was invisible cross-crate.)
590 if crate_name == "ignore" {
591 if path == "ignore::WalkBuilder::build"
592 || path == "ignore::WalkBuilder::build_parallel"
593 || path.ends_with("::WalkParallel::run")
594 // `add_ignore(path)` LOOKS like a config setter but reads that ignore file from disk at call
595 // time (it returns the read error) — unlike the pure `add_custom_ignore_filename(name)` which
596 // only stores a filename string. The lone Fs-touching builder method in the otherwise-pure setter
597 // surface, so it was silently pure under the covered-crate floor.
598 || path == "ignore::WalkBuilder::add_ignore"
599 {
600 return Some("Fs");
601 }
602 return None;
603 }
604 // Filesystem watching — `notify` (the de-facto fs-watch crate: watchexec, cargo-watch, mdbook). A
605 // watcher opens an OS notification handle (inotify / FSEvents / kqueue / ReadDirectoryChanges) and
606 // registers paths — observing filesystem state changes = Fs. The lifecycle boundary: any
607 // `*Watcher::new` constructor (RecommendedWatcher/PollWatcher/INotifyWatcher/FsEventWatcher/…), the
608 // `recommended_watcher` convenience fn, and the `watch`/`unwatch` registration verbs. `Config`/`Event`/
609 // `EventKind` data types stay pure. (Found scanning watchexec: its watcher-`create` read Fs 0.)
610 if crate_name == "notify" {
611 if path.ends_with("Watcher::new")
612 || path.ends_with("::recommended_watcher")
613 || path.ends_with("::watch")
614 || path.ends_with("::unwatch")
615 {
616 return Some("Fs");
617 }
618 return None;
619 }
620 // std DNS resolution — `("host", 80).to_socket_addrs()` / `std::net::lookup_host("host")` perform a
621 // real getaddrinfo query (Net), but the classify table covered only the socket I/O *types*, so they
622 // floored silently (sweep [37]; the syntactic engine modelled DNS only at the libc layer).
623 if path.ends_with("::to_socket_addrs")
624 || path == "std::net::lookup_host"
625 || path.ends_with("ToSocketAddrs::to_socket_addrs")
626 {
627 return Some("Net");
628 }
629 // Raw sockets. Match the I/O *types* only — `std::net` also holds pure data types
630 // (SocketAddr, IpAddr, …) whose construction must NOT be flagged.
631 if path.starts_with("std::net::TcpStream")
632 || path.starts_with("std::net::TcpListener")
633 || path.starts_with("std::net::UdpSocket")
634 || path.starts_with("tokio::net::")
635 {
636 // …but the PURE accessors read back local/option state — no network I/O — so the whole-type Net
637 // rule fabricated Net on them (sweep [24], the cardinal sin; mirrors the arboard/memmap2 accessor
638 // carve-outs). local_addr/peer_addr return bound/connected addresses; nodelay/ttl/take_error read
639 // socket options/state. Every genuine verb (connect/read/write/send/recv/accept) stays Net.
640 if path.ends_with("::local_addr")
641 || path.ends_with("::peer_addr")
642 || path.ends_with("::nodelay")
643 || path.ends_with("::ttl")
644 || path.ends_with("::take_error")
645 {
646 return None;
647 }
648 return Some("Net");
649 }
650 // Legacy tokio 0.1 socket crates — `tokio_tcp`/`tokio_udp` are *entirely* networking
651 // (no pure types to over-flag), so the whole crate is Net. (Found hardening on websocat,
652 // which is still on tokio 0.1: its `tokio_tcp::TcpStream::connect` was classified
653 // network-free — a network tool confidently reporting 0 Net.)
654 if matches!(crate_name, "tokio_tcp" | "tokio_udp") {
655 return Some("Net");
656 }
657 // The other async runtimes mirror tokio's module layout, and their `net` modules hold only
658 // socket I/O types (the pure `SocketAddr`/`IpAddr` are re-exports that resolve to `std::net`,
659 // so they're excluded by def-path). `mio` is the low-level non-blocking-socket layer under
660 // tokio/others; `async_net` is smol's net crate. Closes the async-std/smol/mio gap the
661 // tokio_tcp note flagged. (Calibrated by module structure — these crates ARE networking — not
662 // a live repro; the TCP/UDP types are defined in-crate so the def-path prefix is exact.)
663 if path.starts_with("async_std::net::")
664 || path.starts_with("mio::net::")
665 || crate_name == "async_net"
666 {
667 return Some("Net");
668 }
669 // Database clients. Like the AWS/HTTP builders, only the execution verbs are I/O;
670 // query *construction* is pure. Best-effort across crates (tune via CANDOR_CONFIG).
671 // Note: bare `::query` is deliberately omitted — it executes in postgres/rusqlite but
672 // only *builds* in sqlx, so including it would false-positive sqlx's `query()` builder.
673 if DB_CRATES.contains(&crate_name) {
674 // Postgres / SQLite-family clients: `query`/`batch_execute`/`prepare`/etc. ARE the
675 // execution (round-trips to the server). sqlx is the outlier where bare `query()`
676 // only BUILDS — it keeps the narrow set below. (Found by running on a real
677 // tokio-postgres app, pgman: candor had reported only 4 of ~20 DB call sites.)
678 if matches!(crate_name, "postgres" | "tokio_postgres" | "deadpool_postgres" | "rusqlite") {
679 const PG: [&str; 19] = [
680 "::query", "::query_one", "::query_opt", "::query_raw", "::execute",
681 "::batch_execute", "::simple_query", "::prepare", "::prepare_typed",
682 "::copy_in", "::copy_out", "::transaction", "::connect",
683 // rusqlite's dialect of the same verbs (a verb-probe found the CANONICAL rusqlite
684 // consumer API classifying pure): `query_row` is the one-row read, `query_map`/
685 // `query_and_then` the many-row reads, `execute_batch` is rusqlite's name for
686 // batch_execute, `prepare_cached` round-trips like prepare. `query_typed` is
687 // tokio_postgres 0.7.10+.
688 "::query_row", "::query_map", "::query_and_then", "::execute_batch",
689 "::prepare_cached", "::query_typed",
690 ];
691 if PG.iter().any(|v| path.ends_with(v)) {
692 return Some("Db");
693 }
694 // rusqlite only: opening the database IS the connection establishment (`Connection::
695 // open`/`open_in_memory`/`open_with_flags` — the embedded analog of `::connect`).
696 if crate_name == "rusqlite"
697 && (path.ends_with("::open")
698 || path.ends_with("::open_in_memory")
699 || path.ends_with("::open_with_flags"))
700 {
701 return Some("Db");
702 }
703 return None;
704 }
705 // redis: the way redis is ACTUALLY used is the high-level `Commands`/`AsyncCommands`
706 // traits (`con.get`/`set`/`hset`/`lpush`/…) — every method is a round-trip — plus
707 // connection establishment. The shared VERBS below only catch the low-level
708 // `cmd("GET").query(con)`, so without this a normal redis user's calls classify as
709 // PURE. (Found hardening on redis-rs: a fn doing `con.get`/`set` reported no effects.)
710 if crate_name == "redis"
711 && (path.contains("Commands::")
712 || path.contains("::get_connection")
713 || path.contains("::get_async_connection")
714 || path.contains("::get_multiplexed_async_connection")
715 // a live `ConnectionManager` round-trips (Db), but `ConnectionManagerConfig` is a pure
716 // in-memory builder (set_number_of_retries/set_max_delay) — exclude it (adversarial review).
717 // `ConnectionManager::clone` is an Arc refcount bump — no Db round-trip (sweep [27]).
718 || (path.contains("ConnectionManager") && !path.contains("ConnectionManagerConfig")
719 && !path.ends_with("::clone"))
720 || path.ends_with("::query")
721 || path.ends_with("::query_async")
722 || path.ends_with("::req_command")
723 || path.ends_with("::req_packed_command")
724 || path.ends_with("::req_packed_commands"))
725 {
726 return Some("Db");
727 }
728 // mongodb: a document-store API with none of the SQL verbs — the user calls
729 // `coll.find_one`/`insert_one`/`aggregate`/… and `Client::with_uri_str`. Without
730 // these a mongodb user's calls classify PURE. (Found hardening: a fn doing
731 // `find_one`+`insert_one` reported no effects.) Handle accessors (name/namespace)
732 // and option/doc builders don't match these verbs, so they stay pure.
733 if crate_name == "mongodb" {
734 const MONGO: [&str; 27] = [
735 "::with_uri_str", "::connect", "::find", "::find_one", "::insert_one",
736 "::insert_many", "::update_one", "::update_many", "::delete_one",
737 "::delete_many", "::replace_one", "::aggregate", "::count_documents",
738 "::estimated_document_count", "::count", "::distinct", "::run_command",
739 "::find_one_and_update", "::find_one_and_delete", "::find_one_and_replace",
740 "::list_collections", "::list_collection_names", "::list_databases",
741 "::list_database_names", "::create_collection", "::create_index", "::watch",
742 ];
743 if MONGO.iter().any(|v| path.ends_with(v)) {
744 return Some("Db");
745 }
746 return None;
747 }
748 // mysql / mysql_async: the `query`/`exec` families + `get_conn`/`ping` execute
749 // immediately — no build-then-execute split like sqlx, so matching `::query` is safe
750 // here. Same DB-verb-dialect gap class as redis/mongodb; calibrated from the Queryable
751 // API (unit-tested; a real-app repro is the remaining confirmation).
752 if matches!(crate_name, "mysql" | "mysql_async") {
753 const MY: [&str; 16] = [
754 "::query", "::query_first", "::query_iter", "::query_map", "::query_fold",
755 "::query_drop", "::exec", "::exec_first", "::exec_iter", "::exec_map",
756 "::exec_fold", "::exec_drop", "::exec_batch", "::prep", "::ping", "::get_conn",
757 ];
758 if MY.iter().any(|v| path.ends_with(v)) {
759 return Some("Db");
760 }
761 return None;
762 }
763 // sea_orm: an ORM whose execution is split from building (like sqlx). The query
764 // BUILDERS (`Entity::find`, `Entity::insert`) are pure; execution happens at `.all`/
765 // `.one`/`.count`/`.stream` and `Insert/Update/Delete::exec`. The write path via an
766 // ActiveModel (`model.insert(db)`) executes too — distinguished from the `EntityTrait`
767 // builder by the trait in the path (`ActiveModelTrait::`). (Found hardening on a
768 // sea_orm consumer app: `.all(db)` reads and `ActiveModel::insert` writes were pure.)
769 if crate_name == "sea_orm" {
770 // sea_orm RE-EXPORTS sea_query (`sea_orm::sea_query::…`), whose builder algebra collides with
771 // the execution verbs: `Func::count(col)` builds a COUNT() expr, `Condition::all()` AND-groups
772 // filters, `Expr::count(…)` — all PURE, none touch a db. The `::all`/`::count`/`::one` execution
773 // rule fabricated Db on them (sweep [5]). sea_query is pure query construction end-to-end, so
774 // exclude the whole re-exported namespace first.
775 if path.contains("sea_query") {
776 return None;
777 }
778 if path.ends_with("::all")
779 || path.ends_with("::one")
780 || path.ends_with("::count")
781 || path.ends_with("::stream")
782 || path.ends_with("::exec")
783 || path.ends_with("::exec_with_returning")
784 || path.ends_with("::exec_without_returning")
785 || path.ends_with("::connect")
786 || path.ends_with("::execute")
787 || path.ends_with("::execute_unprepared")
788 || path.ends_with("::query_one")
789 || path.ends_with("::query_all")
790 || path.ends_with("::fetch_page")
791 || path.ends_with("::num_items")
792 || path.contains("ActiveModelTrait::")
793 {
794 return Some("Db");
795 }
796 return None;
797 }
798 // (Reached by sqlx + diesel — the build-vs-execute-split crates.) `first` is diesel's
799 // LIMIT-1 round trip and `load_iter` its 2.x streaming execution; `fetch_many` is sqlx's
800 // multi-result stream. All crate-gated, so a std `Vec::first` never resolves here.
801 const VERBS: [&str; 19] = [
802 "::execute", "::query_row", "::query_map", "::query_one", "::fetch_one",
803 "::fetch_all", "::fetch_optional", "::fetch", "::fetch_many", "::connect",
804 "::acquire", "::begin", "::commit", "::rollback", "::load", "::load_iter",
805 "::first", "::get_result", "::get_results",
806 ];
807 if VERBS.iter().any(|v| path.ends_with(v)) {
808 return Some("Db");
809 }
810 return None;
811 }
812 // std::path::Path / PathBuf STAT-family methods hit the filesystem (each is a stat/readlink/
813 // readdir syscall) — unlike the rest of the std::path surface, which is pure string manipulation
814 // (join/file_name/extension/parent/…). Verb-precise so the scanner's receiver inference can safely
815 // route a `path.symlink_metadata()` method call here. (A blackout screen caught gix-dir — an entire
816 // directory WALKER — reporting ZERO Fs because all its I/O is Path-method calls; same class as
817 // fd's residual `Path::symlink_metadata` under-report.)
818 if let Some(m) = path
819 .strip_prefix("std::path::Path::")
820 .or_else(|| path.strip_prefix("std::path::PathBuf::"))
821 {
822 const STAT: &[&str] = &[
823 "metadata", "symlink_metadata", "canonicalize", "read_link", "read_dir", "exists",
824 "try_exists", "is_file", "is_dir", "is_symlink",
825 ];
826 return STAT.contains(&m).then_some("Fs");
827 }
828 // Filesystem. `tokio::fs`/`async_std::fs` are the async mirrors of `std::fs`; `async_fs` is
829 // smol's fs crate; `fs_err` is a drop-in `std::fs` wrapper (its whole surface is fs I/O).
830 if path.starts_with("std::fs::")
831 || path.starts_with("tokio::fs::")
832 || path.starts_with("async_std::fs::")
833 || crate_name == "async_fs"
834 || crate_name == "fs_err"
835 {
836 return Some("Fs");
837 }
838 // memmap2: only `MmapOptions::map*` (and the in-place `Mmap::flush`/`make_*` protection
839 // changes / `remap`) actually issue the mmap/msync/mprotect/mremap syscall = Fs. The rest of the
840 // crate is PURE: `MmapOptions::new`/setters BUILD the request, and once a region is mapped, reads
841 // over it (`Mmap::len`/`is_empty`/`as_ptr`/`as_mut_ptr`/`deref` into the byte slice) are plain
842 // memory access with no syscall. Whole-crate Fs fabricated Fs on those reads (a `m.len()` the
843 // scanner's receiver inference routes to `memmap2::Mmap::len`). Match the syscall-issuing verbs;
844 // everything else returns None (pure). `map*` covers `map`/`map_mut`/`map_exec`/`map_copy`/
845 // `map_copy_read_only`/`map_raw`/`map_raw_read_only`/`map_anon`.
846 if crate_name == "memmap2" {
847 let m = path.rsplit("::").next().unwrap_or(path);
848 if m.starts_with("map")
849 || m == "flush"
850 || m == "flush_async"
851 || m == "flush_range"
852 || m == "flush_async_range"
853 || m == "remap"
854 || m.starts_with("make_")
855 || m == "advise"
856 || m == "advise_range"
857 || m == "lock"
858 || m == "unlock"
859 {
860 return Some("Fs");
861 }
862 return None;
863 }
864 // tempfile: creating a temp file/dir touches the disk. Match the create/persist verbs (the
865 // `Builder` setters — prefix/suffix/rand_bytes — stay pure). `persist`/`keep` rename/retain
866 // the file on disk; `close` removes it.
867 if crate_name == "tempfile"
868 && (path.ends_with("::tempfile")
869 || path.ends_with("::tempfile_in")
870 || path.ends_with("::tempdir")
871 || path.ends_with("::tempdir_in")
872 || path.ends_with("NamedTempFile::new")
873 || path.ends_with("NamedTempFile::new_in")
874 || path.ends_with("TempDir::new")
875 || path.ends_with("TempDir::new_in")
876 || path.ends_with("::persist")
877 || path.ends_with("::persist_noclobber")
878 || path.ends_with("::keep"))
879 {
880 return Some("Fs");
881 }
882 // glob: walks the filesystem to expand a pattern (the returned iterator reads directories).
883 // `Pattern::matches` is pure string matching — match only the directory-walking entry points.
884 if crate_name == "glob" && (path.ends_with("::glob") || path.ends_with("::glob_with")) {
885 return Some("Fs");
886 }
887 // Password-hashing / KDF crates — the entropy tier (the TS engine's CTA lesson: an invisible
888 // argon2 landed on exactly the call a security review cares about). In this engine's
889 // verb-precise style the ENTROPY is the salt mint: `SaltString::generate(OsRng)` in the
890 // password-hash API family, and bcrypt's `hash`/`hash_with_result` (salt minted internally).
891 // Verification and explicit-salt hashing are deterministic recomputation — pure. `rand_core`
892 // carries the OsRng source itself (otherwise the most common salt mint is invisible).
893 if matches!(crate_name, "argon2" | "scrypt" | "pbkdf2" | "password_hash") {
894 if path.contains("SaltString::generate") {
895 return Some("Rand");
896 }
897 return None;
898 }
899 if crate_name == "bcrypt" {
900 if path.ends_with("::hash") || path.ends_with("::hash_with_result") {
901 return Some("Rand");
902 }
903 return None;
904 }
905 if crate_name == "rand_core" {
906 if path.contains("OsRng")
907 || path.ends_with("::next_u32")
908 || path.ends_with("::next_u64")
909 || path.ends_with("::fill_bytes")
910 {
911 return Some("Rand");
912 }
913 return None;
914 }
915 // Randomness / entropy. `getrandom`/`fastrand` are effectful end-to-end. `rand` is NOT — it
916 // mixes entropy/generation (effectful) with *pure* distribution constructors (`Uniform::new`,
917 // `Normal::new`) and deterministic-seed constructors (`seed_from_u64`). Flagging the whole crate
918 // over-reported those as `Rand`; match only the calls that actually consume randomness — the
919 // entropy sources (`OsRng`, `thread_rng`/`rng`, `from_entropy`/`from_os_rng`) and the generation
920 // verbs (`gen*`/`random*`/`fill*`/`sample*`/`next_u*`). A `Uniform::new` is now correctly pure.
921 if crate_name == "getrandom" {
922 return Some("Rand");
923 }
924 // fastrand: like `rand`, it mixes entropy-consuming generation (effectful) with PURE deterministic
925 // pieces. `Rng::with_seed(42)` is a DETERMINISTIC seeded constructor (consumes no entropy — the same
926 // seed gives the same stream), and `Rng::fork`/`Rng::clone` just split/copy existing state. Those are
927 // PURE; whole-crate Rand fabricated Rand on them. The effect is the value-drawing methods (`u32`/
928 // `usize`/`bool`/`f64`/`char`/`alphanumeric`/`choice`/`choose_multiple`/`shuffle`/`fill`/the range
929 // forms) AND the entropy-seeded entry points: bare `Rng::new()` (seeds from the global entropy-backed
930 // generator), `fastrand::seed`, and the top-level `fastrand::u32(..)` free functions (which draw from
931 // the thread-local generator). `with_seed` is exempted explicitly; any other method on an `Rng`
932 // (i.e. a value draw) is Rand.
933 if crate_name == "fastrand" {
934 let m = path.rsplit("::").next().unwrap_or(path);
935 // Provably pure: deterministic seeded ctor + state split/copy.
936 if m == "with_seed" || m == "fork" || m == "clone" {
937 return None;
938 }
939 // Everything else fastrand exposes either draws a value or seeds from entropy → Rand. (The crate
940 // has no pure data types beyond the `Rng` handle itself, so a non-draw stray would have to be a
941 // method we don't recognise — keep the effect, the safe direction.)
942 return Some("Rand");
943 }
944 if crate_name == "rand" {
945 let rng_verb = path.ends_with("::gen")
946 || path.ends_with("::gen_range")
947 || path.ends_with("::gen_bool")
948 || path.ends_with("::gen_ratio")
949 || path.ends_with("::random")
950 || path.ends_with("::random_range")
951 || path.ends_with("::random_bool")
952 || path.ends_with("::random_ratio")
953 || path.ends_with("::random_iter") // rand 0.9 iterator generator
954 || path.ends_with("::gen_iter")
955 || path.ends_with("::fill")
956 || path.ends_with("::fill_bytes")
957 || path.ends_with("::try_fill")
958 || path.ends_with("::try_fill_bytes")
959 || path.ends_with("::sample")
960 || path.ends_with("::sample_iter")
961 || path.ends_with("::next_u32")
962 || path.ends_with("::next_u64")
963 || path.ends_with("::thread_rng")
964 || path.ends_with("::rng")
965 || path.ends_with("::from_entropy")
966 || path.ends_with("::from_os_rng");
967 // `OsRng` is the OS entropy SOURCE, but `clone`/`fork`/`default` just copy or construct the
968 // (zero-sized) handle and draw no entropy — pure, exactly like the `fastrand` arm's clone/fork
969 // exemption above. The actual draws (`fill_bytes`/`next_u*`/…) are caught by `rng_verb`. Without
970 // this exemption the blanket `contains("OsRng")` fabricated `Rand` on `OsRng::clone` (adversarial
971 // review: OsRng is a unit struct, cloning consumes nothing).
972 let m = path.rsplit("::").next().unwrap_or(path);
973 let os_rng = path.contains("OsRng") && !matches!(m, "clone" | "fork" | "default");
974 if rng_verb || os_rng {
975 return Some("Rand");
976 }
977 return None;
978 }
979 // Subprocess spawning. `tokio::process` is the async mirror of `std::process` — it exists
980 // only to spawn/control subprocesses (`Command`/`Child`, no pure data types like std's
981 // `Stdio`/`ExitStatus`/`exit`), so spawning through it is Exec just the same. Without this an
982 // async app's `tokio::process::Command::new(..).spawn()` classified pure — a silent under-report
983 // of subprocess execution, the dangerous direction (mirrors the tokio::fs/tokio::net coverage).
984 if path.starts_with("std::process::Command")
985 || path.starts_with("std::process::Child")
986 || path.starts_with("tokio::process::Command")
987 || path.starts_with("tokio::process::Child")
988 || path.starts_with("async_std::process::Command")
989 || path.starts_with("async_std::process::Child")
990 {
991 // PURE read-backs of the builder's stored fields / the cached pid — no spawn, no syscall — so the
992 // whole-type Exec rule fabricated Exec on them (sweep [23]; mirrors the portable_pty getter carve-
993 // out just below). get_program/get_args/get_envs/get_current_dir read the Command; Child::id reads
994 // the cached pid. Every genuine verb (new/spawn/output/status/wait/kill) stays Exec.
995 if path.ends_with("::get_program")
996 || path.ends_with("::get_args")
997 || path.ends_with("::get_envs")
998 || path.ends_with("::get_current_dir")
999 || path.ends_with("Child::id")
1000 {
1001 return None;
1002 }
1003 return Some("Exec");
1004 }
1005 // portable_pty / async_process are whole-crate Exec EXCEPT for the proven-pure surface they expose:
1006 // the `CommandBuilder` GETTERS (`get_argv`/`get_cwd`/`get_env`/`as_unix_command_line`…) read back
1007 // configuration, and the PURE DATA types (`PtySize::default`, `ExitStatus`/`Stdio`/`CommandBuilder`
1008 // construction/setters). The earlier `is_cmd_naming_method` fix stopped the head-refinement LEAK, but
1009 // the BASE Exec still fabricated on these accessors (a `cmd.get_cwd()` the scanner routes to
1010 // `portable_pty::CommandBuilder::get_cwd`). Subtract the read-back getters and the obvious pure
1011 // ctors/setters; the spawn/wait/exec surface (`spawn_command`/`openpty`/`wait`/`kill`/`exec`…) keeps
1012 // Exec. SUBTRACT only what is provably pure — when unrecognised, KEEP Exec (the safe direction).
1013 if crate_name == "async_process" || crate_name == "portable_pty" {
1014 let m = path.rsplit("::").next().unwrap_or(path);
1015 // configuration read-back getters — pure (no spawn).
1016 if m.starts_with("get_") || m == "as_unix_command_line" {
1017 return None;
1018 }
1019 // pure data-type ctors/setters/derives that NAME no program and spawn nothing.
1020 if matches!(
1021 m,
1022 "default" | "new" | "piped" | "null" | "inherit" | "from_raw_fd"
1023 | "arg" | "args" | "arg0" | "env" | "envs" | "env_clear" | "env_remove"
1024 | "cwd" | "current_dir" | "rows" | "cols"
1025 | "clone" | "fmt" | "eq" | "ne" | "hash"
1026 ) {
1027 return None;
1028 }
1029 return Some("Exec");
1030 }
1031 // duct: a subprocess-orchestration crate. `cmd()`/`cmd!` only *build* an Expression; the
1032 // spawn/wait happens at `run`/`read`/`start`. Match the execution verbs, not the builder.
1033 if crate_name == "duct"
1034 && (path.ends_with("::run")
1035 || path.ends_with("::read")
1036 || path.ends_with("::start")
1037 || path.ends_with("::read_chars"))
1038 {
1039 return Some("Exec");
1040 }
1041 if path.starts_with("std::env::") {
1042 return Some("Env");
1043 }
1044 // dotenvy / dotenv: load environment variables (reading a `.env` file and mutating the process
1045 // environment). Match the load/read entry points; `Error`/builder types stay pure.
1046 if matches!(crate_name, "dotenvy" | "dotenv")
1047 && (path.ends_with("::dotenv")
1048 || path.ends_with("::dotenv_override")
1049 || path.ends_with("::from_path")
1050 || path.ends_with("::from_path_override")
1051 || path.ends_with("::from_filename")
1052 || path.ends_with("::from_filename_override")
1053 || path.ends_with("::from_read")
1054 || path.ends_with("::from_read_override")
1055 || path.ends_with("::load")
1056 || path.ends_with("::var")
1057 || path.ends_with("::vars"))
1058 {
1059 return Some("Env");
1060 }
1061 // Wall-clock reads. Match the `now` accessor precisely (ends_with), not any path
1062 // containing the substring "now". The `time` crate (distinct from `std::time`/`chrono`)
1063 // reads the clock via `now_utc`/`now_local` (and the deprecated `Instant::now`).
1064 if (crate_name == "chrono" || path.starts_with("std::time::")) && path.ends_with("::now") {
1065 return Some("Clock");
1066 }
1067 if crate_name == "time"
1068 && (path.ends_with("::now_utc") || path.ends_with("::now_local") || path.ends_with("::now"))
1069 {
1070 return Some("Clock");
1071 }
1072 // `tracing`: same principle as the `log` facade below — the crate's TYPES are pure data, so match
1073 // the emit, not the whole crate. The actual program output is the macro-expanded
1074 // `Subscriber::event`/`event!`/`Span::*enter*` dispatch and the `Span::new*`/`Span::record`
1075 // recording path that drives the subscriber. The data-type accessors — `Level::as_str`,
1076 // `Span::is_disabled`/`metadata`/`id`, and constructing/reading `Level`/`LevelFilter`/`Span`/
1077 // `Event`/`Metadata`/`Field`/`FieldSet`/`Id` — are PURE (no output is produced), so whole-crate Log
1078 // fabricated Log on them. Match the emit verbs; everything else returns None.
1079 if crate_name == "tracing" {
1080 let m = path.rsplit("::").next().unwrap_or(path);
1081 // The user-facing emit MACROS (`tracing::info!`/`warn!`/…) — candor-scan is pre-expansion, so it
1082 // sees the raw macro path `tracing::info`, not the expanded `__tracing`/`Subscriber::event` the
1083 // deep (post-expansion) engine sees. Only the macro names; the pure DATA types (Level/Span/Event)
1084 // have other tails and stay None.
1085 if m == "trace" || m == "debug" || m == "info" || m == "warn" || m == "error"
1086 || m == "trace_span" || m == "debug_span" || m == "info_span" || m == "warn_span"
1087 || m == "error_span" || m == "span"
1088 || m == "event"
1089 || m == "new_span"
1090 || m == "record"
1091 || m == "record_follows_from"
1092 || m == "enter"
1093 || m == "exit"
1094 || m == "in_scope"
1095 || m == "entered"
1096 || path.contains("::__macro_support")
1097 || path.contains("::__tracing")
1098 || path.contains("Subscriber::event")
1099 || path.contains("Subscriber::new_span")
1100 || path.contains("Subscriber::enter")
1101 || path.contains("Subscriber::exit")
1102 {
1103 return Some("Log");
1104 }
1105 return None;
1106 }
1107 // The `log` facade: its macros route through `log::__private_api`; the crate's types
1108 // (`Level`, `LevelFilter`) are pure, so match the logging entry, not the whole crate.
1109 if crate_name == "log" {
1110 // Expanded macro form (deep engine) OR the raw user-facing macro names (candor-scan, pre-expansion).
1111 // `log::Level`/`LevelFilter`/`Record`/`Metadata` have other tails, so the type surface stays pure.
1112 let m = path.rsplit("::").next().unwrap_or(path);
1113 if path.contains("::__private_api")
1114 || m == "error" || m == "warn" || m == "info" || m == "debug" || m == "trace" || m == "log"
1115 {
1116 return Some("Log");
1117 }
1118 }
1119 // Compiler diagnostic emission — the ONE genuinely effectful operation in the otherwise-pure
1120 // rustc_* surface (a dylint lint's actual OUTPUT: it writes warnings/errors to the compiler's
1121 // diagnostic sink). Classified `Log` (same family as `tracing`/`log` — program output). Match the
1122 // emission verbs precisely; rustc_lint/rustc_errors are mostly pure types (Lint, LintId, the Diag
1123 // BUILDERS), and only the terminal `emit`/`emit_span_lint` actually produces output.
1124 if crate_name == "rustc_lint"
1125 && (path.ends_with("::emit_span_lint")
1126 || path.ends_with("::span_lint")
1127 || path.ends_with("::span_lint_hir"))
1128 {
1129 return Some("Log");
1130 }
1131 if crate_name == "rustc_errors"
1132 && (path.ends_with("::emit")
1133 || path.ends_with("::emit_diagnostic")
1134 || path.ends_with("::emit_now"))
1135 {
1136 return Some("Log");
1137 }
1138 // arboard: the effectful surface is the `Clipboard` handle's read/write verbs (each talks to the
1139 // OS clipboard / X11/Wayland/Win32/NSPasteboard server). The data types — chiefly `arboard::Error`
1140 // (whose `Display`/`to_string` formatting is pure) and the `ImageData`/`GetExtLinux`/`SetExtLinux`
1141 // option types — are PURE, so whole-crate Clipboard fabricated Clipboard on e.g. an error
1142 // `to_string()`. Match the handle verbs; everything else returns None. `Clipboard::new` opens the
1143 // connection to the clipboard server, so it's an effect too; `get`/`set` return the
1144 // builder-then-read `Get`/`Set` cursors whose `text`/`image`/`html` terminals do the I/O.
1145 if crate_name == "arboard" {
1146 let m = path.rsplit("::").next().unwrap_or(path);
1147 if m == "new"
1148 || m == "get"
1149 || m == "set"
1150 || m == "clear"
1151 || m == "get_text"
1152 || m == "set_text"
1153 || m == "set_html"
1154 || m == "get_image"
1155 || m == "set_image"
1156 || m == "text"
1157 || m == "image"
1158 || m == "html"
1159 {
1160 return Some("Clipboard");
1161 }
1162 return None;
1163 }
1164 // ── Coverage-differential additions (calibrated against each crate's real API; see the per-crate
1165 // notes). All verb-keyed + crate-gated, with the pure builder/config/data surface returning None.
1166
1167 // `etcetera` — XDG/known-folder base+app directory resolution. Each dir ACCESSOR reads the
1168 // environment at call time (`$HOME`/`$XDG_*` on Unix, `%APPDATA%`/`%LOCALAPPDATA%` on Windows), and
1169 // the `choose_*`/`home_dir` entry points read `$HOME`. The `AppStrategyArgs` data struct and the
1170 // strategy types themselves are PURE. (Found DISCLOSED-but-unmodeled in 3/4 differential projects.)
1171 if crate_name == "etcetera" {
1172 let m = path.rsplit("::").next().unwrap_or(path);
1173 if m == "home_dir"
1174 || m == "choose_base_strategy" || m == "choose_native_strategy" || m == "choose_app_strategy"
1175 || m == "config_dir" || m == "data_dir" || m == "cache_dir"
1176 || m == "state_dir" || m == "runtime_dir" || m == "data_local_dir"
1177 {
1178 return Some("Env");
1179 }
1180 return None;
1181 }
1182 // `sqlx-core` (crate `sqlx_core`) — the execution terminals under the sqlx core (the `sqlx` builder
1183 // table maps `sqlx::query*`; here it's the core `Executor`/`Connection`/`Pool` round-trips). Opening
1184 // the connection is the network boundary (Net); the query/transaction round-trips are Db. The
1185 // `*Options`/query-builder/row data types are PURE. Crate-gated so the generic verbs never spread.
1186 if crate_name == "sqlx_core" {
1187 if path.ends_with("::connect") || path.ends_with("::connect_with") {
1188 return Some("Net");
1189 }
1190 if path.ends_with("::fetch") || path.ends_with("::fetch_all") || path.ends_with("::fetch_one")
1191 || path.ends_with("::fetch_optional") || path.ends_with("::fetch_many")
1192 || path.ends_with("::execute") || path.ends_with("::execute_many")
1193 || path.ends_with("::prepare") || path.ends_with("::prepare_with")
1194 || path.ends_with("::acquire") || path.ends_with("::begin") || path.ends_with("::ping")
1195 {
1196 return Some("Db");
1197 }
1198 return None;
1199 }
1200 // `walkdir` — recursive directory traversal. The disk read (`read_dir` + `stat`) happens lazily in
1201 // `IntoIter::next` (driving the iterator), and `DirEntry::metadata` issues a `stat`. The
1202 // `WalkDir::new`/`max_depth`/`follow_links`/`sort_by` BUILDERS, `WalkDir::into_iter` (constructs the
1203 // iterator, no I/O until pulled), and the cached `DirEntry::path`/`file_name`/`file_type`/`depth`
1204 // accessors (`file_type` makes NO syscall) are PURE. (Companion to the already-modeled `ignore`.)
1205 if crate_name == "walkdir" {
1206 if path.ends_with("::IntoIter::next") || path.ends_with("::DirEntry::metadata") {
1207 return Some("Fs");
1208 }
1209 return None;
1210 }
1211 // `filetime` — file-timestamp mutation. The `set_*` free fns issue utimes/utimensat/futimens (Fs).
1212 // `FileTime::now` reads the system clock (Clock). The `FileTime::from_*`/`zero` value constructors
1213 // (incl. `from_last_modification_time(&Metadata)` etc., which read an ALREADY-loaded `&Metadata`, not
1214 // the disk) and the `seconds`/`nanoseconds` accessors are PURE.
1215 if crate_name == "filetime" {
1216 if path.ends_with("::set_file_mtime") || path.ends_with("::set_file_atime")
1217 || path.ends_with("::set_file_times") || path.ends_with("::set_symlink_file_times")
1218 || path.ends_with("::set_file_handle_times")
1219 {
1220 return Some("Fs");
1221 }
1222 if path.ends_with("::FileTime::now") {
1223 return Some("Clock");
1224 }
1225 return None;
1226 }
1227 // `execute` — the `Execute` trait that extends `std::process::Command` with run helpers. The
1228 // `execute*` verbs SPAWN a child process (Exec). The `execute::command`/`shell` free fns and the
1229 // `command!`/`command_args!` macros only BUILD a Command (no spawn) and stay PURE.
1230 if crate_name == "execute" {
1231 if path.contains("::execute") {
1232 return Some("Exec");
1233 }
1234 return None;
1235 }
1236 // `ctrlc` — installs an OS signal handler (Unix SIGINT/SIGTERM/SIGHUP, Windows CTRL_C_EVENT) and
1237 // spawns its handler thread. Signals are an inter-process control channel, so the closest bucket is
1238 // Ipc (candor has no dedicated Signal effect; same judgment as routing SysV/pipe IPC to Ipc).
1239 if crate_name == "ctrlc" {
1240 if path.ends_with("::set_handler") || path.ends_with("::try_set_handler") {
1241 return Some("Ipc");
1242 }
1243 return None;
1244 }
1245 // `clap` — argument parsing. ONLY the terminals that read `std::env::args_os` at call time are an
1246 // effect (Env): `get_matches`/`get_matches_mut`/`try_get_matches` and the derive `parse`/`try_parse`.
1247 // clap is MOSTLY PURE: the ENTIRE builder surface (`Command::new`/`arg`/`about`/`Arg::new`) stays
1248 // None, and crucially the `*_from`/`*_parse_from` variants take an EXPLICIT iterator (they do NOT
1249 // read argv) so they stay pure too. (`Arg::env` reads an env var at builder time but bare `::env` is
1250 // too generic to gate safely, so it's left unmodeled — under-report over fabrication.)
1251 if crate_name == "clap" {
1252 if path.ends_with("::get_matches") || path.ends_with("::get_matches_mut")
1253 || path.ends_with("::try_get_matches")
1254 || path.ends_with("::parse") || path.ends_with("::try_parse")
1255 {
1256 return Some("Env");
1257 }
1258 return None;
1259 }
1260 // `jiff` — date/time. `Timestamp::now`/`Zoned::now`/`Zoned::now_with` read the wall clock (Clock).
1261 // `tz::TimeZone::system`/`get` and `tz::db().get` read the system tzdb files from disk
1262 // (`/etc/localtime`, `/usr/share/zoneinfo`; `system` is also `$TZ`-overridable — Fs is the dominant
1263 // op, modeled as Fs). The `Span`/`civil` date math and `Timestamp`/`Zoned` arithmetic are PURE.
1264 if crate_name == "jiff" {
1265 if path.ends_with("::now") || path.ends_with("::now_with") {
1266 return Some("Clock");
1267 }
1268 if path.ends_with("::TimeZone::system") || path.ends_with("::TimeZone::get")
1269 || path.ends_with("::TimeZoneDatabase::get")
1270 {
1271 return Some("Fs");
1272 }
1273 return None;
1274 }
1275 // `env_logger` — installs the global logger and emits to stderr; reads `RUST_LOG`/`RUST_LOG_STYLE`.
1276 // The init terminals are the effect (Log — program output, same family as `log`/`tracing`). The
1277 // `Builder::new`/`build` and the format/filter/target config setters are PURE.
1278 if crate_name == "env_logger" {
1279 if path.ends_with("::init") || path.ends_with("::try_init")
1280 || path.ends_with("::init_from_env") || path.ends_with("::try_init_from_env")
1281 {
1282 return Some("Log");
1283 }
1284 return None;
1285 }
1286 // `dialoguer` — interactive terminal prompts. The `interact*` verbs read stdin + write the tty (a
1287 // console dialogue with the user — Ipc, like the other local-channel effects). The
1288 // `with_prompt`/`default`/`items`/`validate_with` BUILDERS are PURE.
1289 if crate_name == "dialoguer" {
1290 if path.ends_with("::interact") || path.ends_with("::interact_on")
1291 || path.ends_with("::interact_text") || path.ends_with("::interact_text_on")
1292 || path.ends_with("::interact_opt") || path.ends_with("::interact_on_opt")
1293 {
1294 return Some("Ipc");
1295 }
1296 return None;
1297 }
1298 // `console` — terminal handle + styling. The `Term` read/write verbs do tty I/O (Ipc, the user
1299 // dialogue channel; note there is NO `write_str` — `Term` impls `io::Write`). The free-fn terminal
1300 // detection (`colors_enabled`/`user_attended`) reads `CLICOLOR`/`CLICOLOR_FORCE` (Env). The `Style`
1301 // color/format methods and the text utils (`strip_ansi_codes`/`pad_str`/`measure_text_width`) are PURE.
1302 if crate_name == "console" {
1303 if path.ends_with("::write_line") || path.ends_with("::read_line")
1304 || path.ends_with("::read_line_initial_text") || path.ends_with("::read_char")
1305 || path.ends_with("::read_key") || path.ends_with("::read_key_raw")
1306 || path.ends_with("::read_secure_line")
1307 {
1308 return Some("Ipc");
1309 }
1310 if path.ends_with("::colors_enabled") || path.ends_with("::colors_enabled_stderr")
1311 || path.ends_with("::user_attended") || path.ends_with("::user_attended_stderr")
1312 {
1313 return Some("Env");
1314 }
1315 return None;
1316 }
1317 // `terminal_colorsaurus` — queries the terminal's colours by writing OSC 10/11 escapes and reading the
1318 // reply (bidirectional tty dialogue — Ipc, consistent with dialoguer/console). Nothing else is I/O.
1319 if crate_name == "terminal_colorsaurus" {
1320 if path.ends_with("::background_color") || path.ends_with("::foreground_color")
1321 || path.ends_with("::color_palette") || path.ends_with("::theme_mode")
1322 {
1323 return Some("Ipc");
1324 }
1325 return None;
1326 }
1327 // `backoff` — retry-with-backoff. `retry`/`retry_notify` consult the clock and `thread::sleep`
1328 // between attempts (Clock). The `ExponentialBackoff`/builder config is PURE. (The user closure's own
1329 // effects are out of scope here — we model only backoff's own Clock effect.)
1330 if crate_name == "backoff" {
1331 if path.ends_with("::retry") || path.ends_with("::retry_notify") {
1332 return Some("Clock");
1333 }
1334 return None;
1335 }
1336 // `lscolors` — LS_COLORS parsing. ONLY `from_env` reads the environment (Env). `from_string`/
1337 // `style_for_path`/`style_for*` and the `Style` type take explicit input and are PURE.
1338 if crate_name == "lscolors" {
1339 if path.ends_with("::from_env") {
1340 return Some("Env");
1341 }
1342 return None;
1343 }
1344 // `wild` — argv with glob expansion. `args`/`args_os` read `std::env::args(_os)` (Env). Nothing else.
1345 if crate_name == "wild" {
1346 if path.ends_with("::args") || path.ends_with("::args_os") {
1347 return Some("Env");
1348 }
1349 return None;
1350 }
1351 // `grep_cli` — only the firm effect is modeled: `CommandReaderBuilder::build` spawns a child process
1352 // (Exec). The `is_readable_stdin`/`is_tty_*` fd probes (isatty/fstat on the std descriptors) are
1353 // deliberately NOT modeled — candor doesn't classify `IsTerminal`/isatty as an effect anywhere, and
1354 // they read no data; flagging them would be an inconsistent over-report.
1355 if crate_name == "grep_cli" {
1356 if path.ends_with("::build") {
1357 return Some("Exec");
1358 }
1359 return None;
1360 }
1361 // `clircle` — detects whether two handles are the same file (cycle protection). `Identifier::try_from`
1362 // (File/Stdio) issues an `fstat`, and `surely_conflicts_with` does an `lseek` (`stream_position`) — both
1363 // Fs. The `PartialEq`/`Hash` comparisons read stored dev/ino and are PURE. (The named methods
1364 // `are_identical`/`same_file` do NOT exist in the crate — not modeled.)
1365 if crate_name == "clircle" {
1366 if path.ends_with("::try_from") || path.ends_with("::surely_conflicts_with") {
1367 return Some("Fs");
1368 }
1369 return None;
1370 }
1371 None
1372}
1373
1374pub fn cap_from_name(name: &str) -> Option<&'static str> {
1375 EFFECTS.iter().copied().find(|e| *e == name)
1376}
1377
1378/// Refine the `Exec` cliff (spec §4 ⟨0.5⟩): the effects a *literal, statically-known* subprocess
1379/// head implies, matched by basename (`/usr/bin/curl` → `curl`). The head's effects are ADDED to a
1380/// caller that already carries `Exec` (a subprocess is still spawned — `Exec` is never dropped); an
1381/// unrecognised or dynamically-built head returns `&[]` and keeps the bare cliff (never guess). A
1382/// **candor engine** reads `Fs`/`Env` only — spec §7 item 12 (the analyzer self-boundary) guarantees
1383/// that, so that case is spec-supplied, not curation. The rest is a small curated table under the
1384/// same under-report rule as the crate classifier. INVARIANT: every head here is an external tool
1385/// that does NOT run the analysed project's own code (so `make`/`npm`/`cargo` are deliberately
1386/// absent — they stay the cliff). The reference engines share this table so the `Exec` boundary —
1387/// the one boundary every engine hits — refines identically (the §4-consistency argument).
1388pub fn classify_command_head(cmd: &str) -> &'static [&'static str] {
1389 // Only UNAMBIGUOUS single-effect tools belong here. A multi-modal head (`git status` is local,
1390 // `git push` is Net; `rsync` local-vs-remote) would FABRICATE the effect for its common case —
1391 // the under-report rule forbids it, so such heads keep the bare cliff.
1392 match cmd.rsplit(['/', '\\']).next().unwrap_or(cmd) {
1393 "curl" | "wget" | "http" | "ssh" | "scp" | "sftp" | "ftp" | "telnet" => &["Net"],
1394 "psql" | "mysql" | "sqlite3" | "mongosh" | "mongo" | "redis-cli" | "cqlsh" | "influx" => &["Db"],
1395 // candor engines — Fs/Env only, guaranteed by spec §7 item 12 (the analyzer self-boundary)
1396 "candor" | "candor-run.sh" | "candor-scan" | "candor-query" | "candor-java"
1397 | "candor-classify" | "candor-report" | "cargo-candor" => &["Env", "Fs"],
1398 _ => &[],
1399 }
1400}
1401
1402/// Whether a subprocess-builder method only MODIFIES the command (`.arg`, `.env`, `.current_dir`)
1403/// rather than NAMING the program (`Command::new`, `duct::cmd`). A WHOLE-CRATE-Exec crate
1404/// (`portable_pty`, `duct`, `async_process`) classifies *every* method as `Exec`, so the
1405/// head-refinement must skip these: an arg or env-var-name literal that happened to match a head
1406/// (`.env("psql", …)`, `.arg("curl")`) would FABRICATE that effect — the §1 under-report rule. The
1407/// method is the call path's last segment.
1408pub fn is_cmd_builder_method(method: &str) -> bool {
1409 matches!(
1410 method,
1411 "arg" | "args" | "arg0" | "env" | "envs" | "env_clear" | "env_remove" | "current_dir"
1412 | "cwd" | "stdin" | "stdout" | "stderr" | "pre_exec" | "creation_flags" | "uid" | "gid"
1413 | "groups" | "process_group"
1414 )
1415}
1416
1417/// Whether a subprocess method NAMES the program (so its first string literal IS the command head to
1418/// refine): `Command::new("curl")`, `duct::cmd("curl", …)`. The head-refinement must fire ONLY here —
1419/// an ALLOWLIST, not "any method except known modifiers". A whole-crate-Exec crate classifies EVERY
1420/// method as `Exec`, so a denylist leaked NON-naming methods that aren't modifiers — a getter like
1421/// `CommandBuilder::get_env("psql")` (reading back an env-var KEY, not a program) fed `"psql"` to the
1422/// head classifier and FABRICATED `Db` (review find). Only `new`/`cmd` name a program; everything else
1423/// (modifiers, getters `get_*`, custom builder methods) keeps the bare `Exec` cliff — under-refine
1424/// (safe) rather than fabricate. `std::process::Command` is verb-precise so getters never fire `Exec`
1425/// there anyway; the allowlist makes the whole-crate-Exec crates safe too.
1426pub fn is_cmd_naming_method(method: &str) -> bool {
1427 matches!(method, "new" | "cmd")
1428}
1429
1430/// The masking guard (AS-EFF-008): a Net call whose method takes the HOST/URL as an argument is
1431/// "establishing" — a classified Net call here with no captured host literal leaves the endpoint
1432/// structurally INVISIBLE (a runtime-built host), so the surface is incomplete and the gate must fail
1433/// closed (else a benign sibling literal masks the runtime endpoint). An ALLOWLIST of connection-
1434/// establishing verbs — the SAFE direction: a USE-verb on an already-connected socket
1435/// (`stream.write`/`read`/`flush`, `socket.send`/`recv`) is NOT here, so a missing literal there (the
1436/// host was fixed at `connect`) never false-positives. Under-catching an unusual establishing verb is a
1437/// missed mask (sound-with-disclosure), never a broken gate. The arg is the method (path's last segment).
1438pub fn is_net_establishing(method: &str) -> bool {
1439 matches!(
1440 method,
1441 "connect"
1442 | "connect_timeout"
1443 | "get"
1444 | "post"
1445 | "put"
1446 | "patch"
1447 | "delete"
1448 | "head"
1449 | "request"
1450 | "send_to"
1451 | "lookup_host"
1452 | "to_socket_addrs"
1453 )
1454}
1455
1456/// The masking guard (AS-EFF-008), the `Fs` analog of `is_net_establishing`: whether an `Fs`-classified
1457/// call takes the filesystem PATH as a string argument (so a missing literal leaves the path
1458/// structurally INVISIBLE — a runtime-built path — and the surface is incomplete, fail-closed). An
1459/// ALLOWLIST of the path-NAMING free functions / constructors (`fs::write`/`read`/`File::open`/…), the
1460/// SAFE direction: a path-stat METHOD whose path is the RECEIVER (`p.metadata()`, `p.exists()`) is
1461/// invoked method-form and the caller gates on `!is_method`, so this never sees it; an op on an
1462/// already-opened handle (`file.write_all`, `mmap.flush`, `tempfile()` — a random name, no path arg)
1463/// is not here, so a missing literal there never false-positives. Under-catching an unusual
1464/// path-naming fn is a missed mask (sound-with-disclosure), never a broken gate. The arg is the
1465/// method/fn leaf (the path's last segment).
1466pub fn is_fs_path_arg(leaf: &str) -> bool {
1467 matches!(
1468 leaf,
1469 // std::fs / tokio::fs / async_std::fs / fs_err free functions taking a path argument
1470 "write"
1471 | "read"
1472 | "read_to_string"
1473 | "read_dir"
1474 | "read_link"
1475 | "copy"
1476 | "rename"
1477 | "remove_file"
1478 | "remove_dir"
1479 | "remove_dir_all"
1480 | "create_dir"
1481 | "create_dir_all"
1482 | "hard_link"
1483 | "soft_link"
1484 | "symlink"
1485 | "symlink_file"
1486 | "symlink_dir"
1487 | "symlink_metadata"
1488 | "canonicalize"
1489 | "metadata"
1490 | "set_permissions"
1491 | "exists"
1492 | "try_exists"
1493 // File / OpenOptions constructors taking a path argument
1494 | "open"
1495 | "create"
1496 | "create_new"
1497 )
1498}
1499
1500/// The masking guard (AS-EFF-008), the `Db` analog of `is_net_establishing`: whether a `Db`-classified
1501/// call takes the raw SQL QUERY as a string argument (so a missing literal leaves the table
1502/// structurally INVISIBLE — a runtime-built query — and the surface is incomplete, fail-closed). An
1503/// ALLOWLIST of the SQL-string-bearing execution/prepare verbs, the SAFE direction: a
1504/// build-then-execute terminal that takes NO SQL string (sqlx/diesel/sea_orm `fetch*`/`load*`/`first`/
1505/// `all`/`one`/`stream`, the document-store `find*`/`insert*`/…), and a non-query op (`connect`/
1506/// `open`/`acquire`/`begin`/`commit`/`ping`/`get_conn`), are NOT here — their query is built
1507/// structurally (never a maskable string literal) so a missing literal must not false-positive.
1508/// Under-catching an unusual query verb is a missed mask (sound-with-disclosure), never a broken gate.
1509/// The arg is the method leaf (the path's last segment).
1510pub fn is_db_query_arg(leaf: &str) -> bool {
1511 matches!(
1512 leaf,
1513 "execute"
1514 | "execute_batch"
1515 | "execute_unprepared"
1516 | "batch_execute"
1517 | "simple_query"
1518 | "query"
1519 | "query_one"
1520 | "query_opt"
1521 | "query_raw"
1522 | "query_row"
1523 | "query_map"
1524 | "query_and_then"
1525 | "query_typed"
1526 | "query_all"
1527 | "prepare"
1528 | "prepare_typed"
1529 | "prepare_cached"
1530 | "exec"
1531 | "exec_first"
1532 | "exec_iter"
1533 | "exec_map"
1534 | "exec_fold"
1535 | "exec_drop"
1536 | "exec_batch"
1537 | "prep"
1538 | "run_command"
1539 )
1540}
1541
1542/// Map a cap-std capability *type* to the effect it authorises. Holding one of these
1543/// (e.g. `&Dir`) is the real, unforgeable right to perform that effect — so candor
1544/// treats it as a declared capability, exactly like its own `&Fs` token.
1545pub fn capstd_cap(crate_name: &str, type_name: &str) -> Option<&'static str> {
1546 if !crate_name.starts_with("cap_") {
1547 return None;
1548 }
1549 Some(match type_name {
1550 "Dir" => "Fs",
1551 "TcpListener" | "TcpStream" | "UdpSocket" | "Pool" => "Net",
1552 "UnixListener" | "UnixStream" | "UnixDatagram" => "Ipc",
1553 "SystemClock" | "MonotonicClock" => "Clock",
1554 _ => return None,
1555 })
1556}
1557
1558/// Table names a SQL string literal STATICALLY reaches — the `Db` analog of the `Net` host /
1559/// `Exec` command / `Fs` path literal surface (feeds `allow Db in <scope> <table>…`, AS-EFF-008).
1560/// Conservative by construction, because a wrong capture here would FABRICATE: the string must
1561/// open with a SQL statement keyword, and only identifiers in table position are taken —
1562/// `FROM`/`JOIN` anywhere, `INTO` anywhere, statement-leading `UPDATE`/`TRUNCATE`, and
1563/// `TABLE` (create/drop/alter), skipping `ONLY`/`IF NOT EXISTS`. `UPDATE` mid-statement is
1564/// deliberately ignored (`FOR UPDATE SKIP LOCKED` must not yield a table "skip"). A
1565/// dynamically-built query yields nothing — the gate's opaque case — never a guess.
1566/// Output is lower-cased, quote/backtick-stripped, `schema.table` kept qualified, deduped.
1567/// SPEC §2 pins this algorithm token-for-token across engines; the cross-impl vector battery
1568/// (candor-spec conformance/tables/vectors.json, run.sh Part 4b) enforces the JVM/TS mirrors.
1569pub fn tables_in_sql(sql: &str) -> Vec<String> {
1570 const STMT: &[&str] =
1571 &["select", "insert", "update", "delete", "create", "drop", "alter", "truncate", "merge", "replace", "with"];
1572 // Tokens that can FOLLOW a table-introducing keyword without being a table.
1573 const SKIP: &[&str] = &["only", "if", "not", "exists", "table"];
1574 // Identifier-position tokens that are grammar, not a table (subqueries, locking clauses…).
1575 const STOP: &[&str] = &[
1576 "select", "set", "where", "values", "on", "using", "group", "order", "by", "limit",
1577 "returning", "as", "inner", "outer", "left", "right", "cross", "lateral", "natural",
1578 "union", "all", "distinct", "case", "when", "null", "default", "skip", "nowait", "of",
1579 "from", "join", "into", "update", "delete", "insert",
1580 ];
1581 // `,` survives as its OWN token (not a space): it's what lets `FROM t1, t2` continue the table
1582 // list without fabricating from other comma-ridden positions (column lists, ON clauses).
1583 let cleaned: String = sql
1584 .to_lowercase()
1585 .chars()
1586 .flat_map(|c| match c {
1587 '(' | ')' | ';' => vec![' '],
1588 ',' => vec![' ', ',', ' '],
1589 _ => vec![c],
1590 })
1591 .collect();
1592 let toks: Vec<&str> = cleaned.split_whitespace().collect();
1593 let Some(first) = toks.first() else { return Vec::new() };
1594 if !STMT.contains(first) {
1595 return Vec::new(); // not SQL — nothing to certify, nothing fabricated
1596 }
1597 let ident = |t: &str| -> Option<String> {
1598 let t = t.trim_matches(|c| matches!(c, '"' | '`' | '\''));
1599 let mut chars = t.chars();
1600 let ok_first = chars.next().is_some_and(|c| c.is_ascii_alphabetic() || c == '_');
1601 let ok_rest = t.chars().all(|c| c.is_ascii_alphanumeric() || matches!(c, '_' | '.' | '$' | '"' | '`'));
1602 (ok_first && ok_rest && !STOP.contains(&t)).then(|| t.replace(['"', '`'], ""))
1603 };
1604 let mut out: Vec<String> = Vec::new();
1605 let mut push = |t: Option<String>| {
1606 if let Some(t) = t {
1607 if !out.contains(&t) {
1608 out.push(t);
1609 }
1610 }
1611 };
1612 for (i, tok) in toks.iter().enumerate() {
1613 let table_pos = match *tok {
1614 "from" | "join" | "into" | "table" => true,
1615 // statement-leading only (see doc comment): `update t set …`, `truncate [table] t`.
1616 "update" | "truncate" => i == 0,
1617 _ => false,
1618 };
1619 if !table_pos {
1620 continue;
1621 }
1622 let mut j = i + 1;
1623 while j < toks.len() && SKIP.contains(&toks[j]) {
1624 j += 1;
1625 }
1626 let Some(next) = toks.get(j) else { continue };
1627 let Some(first) = ident(next) else { continue };
1628 push(Some(first));
1629 // Comma-ADJACENT continuation only: `FROM t1, t2, t3` takes all three, while an alias breaks
1630 // the chain (`FROM t1 a, t2` keeps just t1 — an under-report, never a guess: skipping an
1631 // alias to chase the comma would fabricate tables out of `INSERT INTO t (a, b)`'s column
1632 // list, whose parens are spaces by the time we tokenize).
1633 while j + 2 < toks.len() && toks[j + 1] == "," {
1634 let Some(more) = ident(toks[j + 2]) else { break };
1635 push(Some(more));
1636 j += 2;
1637 }
1638 }
1639 out
1640}
1641
1642#[cfg(test)]
1643mod tests {
1644 #[test]
1645 fn sql_table_extraction_is_conservative() {
1646 use super::tables_in_sql as t;
1647 assert_eq!(t("SELECT id FROM users WHERE x = 1"), vec!["users"]);
1648 assert_eq!(t("select * from ledger.entries e join customers c on c.id = e.cid"),
1649 vec!["ledger.entries", "customers"]);
1650 assert_eq!(t("INSERT INTO audit_log (a) VALUES (?1)"), vec!["audit_log"]);
1651 assert_eq!(t("UPDATE accounts SET v = ?"), vec!["accounts"]);
1652 assert_eq!(t("DELETE FROM sessions WHERE id = ?"), vec!["sessions"]);
1653 assert_eq!(t("CREATE TABLE IF NOT EXISTS cache (k TEXT)"), vec!["cache"]);
1654 assert_eq!(t("TRUNCATE TABLE staging"), vec!["staging"]);
1655 // FOR UPDATE locking clause must not yield a phantom table (mid-statement update ignored)
1656 assert_eq!(t("SELECT * FROM jobs FOR UPDATE SKIP LOCKED"), vec!["jobs"]);
1657 // a subquery in FROM position yields nothing for that position
1658 assert_eq!(t("SELECT * FROM (SELECT 1) q"), Vec::<String>::new());
1659 // not SQL -> nothing (never fabricate)
1660 assert_eq!(t("/tmp/some/path"), Vec::<String>::new());
1661 assert_eq!(t("hello world from nowhere"), Vec::<String>::new());
1662 // comma-ADJACENT continuation: a FROM list takes every table in the chain…
1663 assert_eq!(t("SELECT a FROM t1, t2, s.t3 WHERE x = 1"), vec!["t1", "t2", "s.t3"]);
1664 // …but an alias breaks it (under-report, never a guess)…
1665 assert_eq!(t("SELECT a FROM t1 a1, t2 WHERE x = 1"), vec!["t1"]);
1666 // …which is exactly what keeps a column list from fabricating (parens are spaces by now).
1667 assert_eq!(t("INSERT INTO t (a, b) VALUES (1, 2)"), vec!["t"]);
1668 // a subquery after the comma stops the chain too
1669 assert_eq!(t("SELECT a FROM t1, (SELECT 1) q"), vec!["t1"]);
1670 }
1671
1672 use super::*;
1673
1674 #[test]
1675 fn db_crates_are_calibrated() {
1676 // The calibrated set must cover every DB client the classifier knows, or the receipt's coverage
1677 // check would flag a recognized crate as a blind spot. (Was nightly-lint-only; now runs on stable.)
1678 for c in DB_CRATES {
1679 assert!(
1680 CALIBRATED_CRATES.contains(&c),
1681 "DB crate `{c}` is matched by classify() but missing from CALIBRATED_CRATES"
1682 );
1683 }
1684 }
1685
1686 #[test]
1687 fn calibrated_crates_are_live() {
1688 // Conversely, every crate advertised as calibrated must actually be matched by classify() for
1689 // some representative path — a dead entry would silently suppress a real coverage warning.
1690 for c in CALIBRATED_CRATES {
1691 assert!(
1692 CALIBRATION_PROBE_TAILS.iter().any(|t| classify(c, &format!("{c}{t}")).is_some()),
1693 "calibrated crate `{c}` is matched by no path in classify() — dead list entry"
1694 );
1695 }
1696 }
1697
1698 #[test]
1699 fn async_http_stack_classifies() {
1700 // The modern async-HTTP/TLS/QUIC/DNS stack (found by the independent-method differential on oha):
1701 // verb-keyed Net/Ipc/Fs/Env, crate-gated so generic verbs never fabricate across crates.
1702 assert_eq!(classify("hyper", "hyper::client::conn::http1::SendRequest::send_request"), Some("Net"));
1703 assert_eq!(classify("hyper", "hyper::client::conn::http1::handshake"), Some("Net"));
1704 assert_eq!(classify("hyper_util", "hyper_util::client::legacy::Client::request"), Some("Net"));
1705 assert_eq!(classify("hickory_resolver", "hickory_resolver::Resolver::lookup_ip"), Some("Net"));
1706 assert_eq!(classify("quinn", "quinn::Endpoint::connect"), Some("Net"));
1707 assert_eq!(classify("quinn", "quinn::RecvStream::read_to_end"), Some("Net")); // stream byte I/O, not just open
1708 assert_eq!(classify("quinn", "quinn::SendStream::write_all"), Some("Net"));
1709 assert_eq!(classify("tokio_rustls", "tokio_rustls::TlsConnector::connect"), Some("Net"));
1710 assert_eq!(classify("native_tls", "native_tls::TlsConnector::connect"), Some("Net"));
1711 assert_eq!(classify("tokio_vsock", "tokio_vsock::VsockStream::connect"), Some("Ipc"));
1712 assert_eq!(classify("rustls_native_certs", "rustls_native_certs::load_native_certs"), Some("Fs"));
1713 assert_eq!(classify("rlimit", "rlimit::setrlimit"), Some("Env"));
1714 // num_cpus is deliberately PURE (consistency with std::thread::available_parallelism; avoids Env spray)
1715 assert_eq!(classify("num_cpus", "num_cpus::get"), None);
1716 assert_eq!(classify("num_cpus", "num_cpus::get_physical"), None);
1717 // pure surface stays None (no fabrication): builder/type/config paths, and other crates' generic verbs
1718 assert_eq!(classify("hyper", "hyper::Request::builder"), None);
1719 assert_eq!(classify("hyper", "hyper::body::Bytes::new"), None);
1720 assert_eq!(classify("native_tls", "native_tls::TlsConnectorBuilder::min_protocol_version"), None);
1721 assert_eq!(classify("serde", "serde::Deserialize::request"), None); // generic verb, wrong crate
1722 }
1723
1724 #[test]
1725 fn coverage_differential_crates_classify() {
1726 // Crates the coverage differential found DISCLOSED-but-unmodeled. Each rule is verb-keyed +
1727 // crate-gated; the EFFECT verbs map to the right bucket and the PURE surface stays None (a
1728 // wrongly-flagged pure crate is a fabrication, so the negatives matter as much as the positives).
1729
1730 // rustls (sync TLS core) — record I/O is Net; config/cert + the buffered-decrypt step are pure.
1731 assert_eq!(classify("rustls", "rustls::ClientConnection::read_tls"), Some("Net"));
1732 assert_eq!(classify("rustls", "rustls::ConnectionCommon::write_tls"), Some("Net"));
1733 assert_eq!(classify("rustls", "rustls::Connection::complete_io"), Some("Net"));
1734 assert_eq!(classify("rustls", "rustls::ConnectionCommon::process_new_packets"), None); // buffered decrypt, no I/O
1735 assert_eq!(classify("rustls", "rustls::ClientConfig::builder"), None); // pure config
1736
1737 // native-tls variants — handshake is Net; builder is pure.
1738 assert_eq!(classify("native_tls_crate", "native_tls_crate::TlsConnector::connect"), Some("Net"));
1739 assert_eq!(classify("tokio_native_tls", "tokio_native_tls::TlsAcceptor::accept"), Some("Net"));
1740 assert_eq!(classify("native_tls_crate", "native_tls_crate::TlsConnectorBuilder::min_protocol_version"), None);
1741
1742 // etcetera — dir resolution reads env; the args data type is pure.
1743 assert_eq!(classify("etcetera", "etcetera::home_dir"), Some("Env"));
1744 assert_eq!(classify("etcetera", "etcetera::base_strategy::choose_base_strategy"), Some("Env"));
1745 assert_eq!(classify("etcetera", "etcetera::base_strategy::Xdg::config_dir"), Some("Env"));
1746 assert_eq!(classify("etcetera", "etcetera::app_strategy::AppStrategyArgs::new"), None); // pure data
1747
1748 // sqlx-core — connect is Net, execute/fetch round-trips are Db; options/builders pure.
1749 assert_eq!(classify("sqlx_core", "sqlx_core::connection::Connection::connect"), Some("Net"));
1750 assert_eq!(classify("sqlx_core", "sqlx_core::executor::Executor::fetch_one"), Some("Db"));
1751 assert_eq!(classify("sqlx_core", "sqlx_core::executor::Executor::execute"), Some("Db"));
1752 assert_eq!(classify("sqlx_core", "sqlx_core::pool::Pool::acquire"), Some("Db"));
1753 assert_eq!(classify("sqlx_core", "sqlx_core::pool::PoolOptions::max_connections"), None); // pure builder
1754
1755 // walkdir — the lazy read happens in next()/metadata(); builders + cached accessors pure.
1756 assert_eq!(classify("walkdir", "walkdir::IntoIter::next"), Some("Fs"));
1757 assert_eq!(classify("walkdir", "walkdir::DirEntry::metadata"), Some("Fs"));
1758 assert_eq!(classify("walkdir", "walkdir::WalkDir::new"), None); // builder
1759 assert_eq!(classify("walkdir", "walkdir::WalkDir::into_iter"), None); // no I/O until pulled
1760 assert_eq!(classify("walkdir", "walkdir::DirEntry::file_type"), None); // cached, no syscall
1761
1762 // filetime — set_* are utimes (Fs), now is Clock; from_* constructors pure.
1763 assert_eq!(classify("filetime", "filetime::set_file_mtime"), Some("Fs"));
1764 assert_eq!(classify("filetime", "filetime::set_file_handle_times"), Some("Fs"));
1765 assert_eq!(classify("filetime", "filetime::FileTime::now"), Some("Clock"));
1766 assert_eq!(classify("filetime", "filetime::FileTime::from_unix_time"), None);
1767 assert_eq!(classify("filetime", "filetime::FileTime::from_last_modification_time"), None); // reads &Metadata, not disk
1768
1769 // execute — the execute* verbs spawn (Exec); command/shell builders pure.
1770 assert_eq!(classify("execute", "execute::Execute::execute"), Some("Exec"));
1771 assert_eq!(classify("execute", "execute::Execute::execute_output"), Some("Exec"));
1772 assert_eq!(classify("execute", "execute::Execute::execute_multiple_output"), Some("Exec"));
1773 assert_eq!(classify("execute", "execute::command"), None); // only builds a Command
1774 assert_eq!(classify("execute", "execute::shell"), None);
1775
1776 // ctrlc — install signal handler (Ipc).
1777 assert_eq!(classify("ctrlc", "ctrlc::set_handler"), Some("Ipc"));
1778 assert_eq!(classify("ctrlc", "ctrlc::try_set_handler"), Some("Ipc"));
1779
1780 // clap — only the argv-reading terminals are Env; the whole builder + *_from variants pure.
1781 assert_eq!(classify("clap", "clap::Command::get_matches"), Some("Env"));
1782 assert_eq!(classify("clap", "clap::Command::try_get_matches"), Some("Env"));
1783 assert_eq!(classify("clap", "clap::Parser::parse"), Some("Env"));
1784 assert_eq!(classify("clap", "clap::Command::new"), None); // builder
1785 assert_eq!(classify("clap", "clap::Arg::about"), None); // builder
1786 assert_eq!(classify("clap", "clap::Command::get_matches_from"), None); // explicit args, no argv read
1787
1788 // jiff — now* is Clock; tz lookups read the tzdb (Fs); span/civil math pure.
1789 assert_eq!(classify("jiff", "jiff::Timestamp::now"), Some("Clock"));
1790 assert_eq!(classify("jiff", "jiff::Zoned::now_with"), Some("Clock"));
1791 assert_eq!(classify("jiff", "jiff::tz::TimeZone::system"), Some("Fs"));
1792 assert_eq!(classify("jiff", "jiff::tz::TimeZone::get"), Some("Fs"));
1793 assert_eq!(classify("jiff", "jiff::Span::checked_add"), None); // pure arithmetic
1794
1795 // env_logger — init installs the logger + reads RUST_LOG (Log); config setters pure.
1796 assert_eq!(classify("env_logger", "env_logger::init"), Some("Log"));
1797 assert_eq!(classify("env_logger", "env_logger::try_init"), Some("Log"));
1798 assert_eq!(classify("env_logger", "env_logger::Builder::init"), Some("Log"));
1799 assert_eq!(classify("env_logger", "env_logger::Builder::format_timestamp"), None); // config
1800 assert_eq!(classify("env_logger", "env_logger::Builder::build"), None); // pure build
1801
1802 // dialoguer — interact* is tty I/O (Ipc); builders pure.
1803 assert_eq!(classify("dialoguer", "dialoguer::Input::interact_text"), Some("Ipc"));
1804 assert_eq!(classify("dialoguer", "dialoguer::Confirm::interact"), Some("Ipc"));
1805 assert_eq!(classify("dialoguer", "dialoguer::Select::interact_opt"), Some("Ipc"));
1806 assert_eq!(classify("dialoguer", "dialoguer::Input::with_prompt"), None); // builder
1807
1808 // console — Term I/O is Ipc, detection is Env, Style is pure.
1809 assert_eq!(classify("console", "console::Term::write_line"), Some("Ipc"));
1810 assert_eq!(classify("console", "console::Term::read_key"), Some("Ipc"));
1811 assert_eq!(classify("console", "console::colors_enabled"), Some("Env"));
1812 assert_eq!(classify("console", "console::Style::cyan"), None); // pure styling
1813 assert_eq!(classify("console", "console::strip_ansi_codes"), None); // pure text util
1814
1815 // terminal_colorsaurus — tty colour query (Ipc).
1816 assert_eq!(classify("terminal_colorsaurus", "terminal_colorsaurus::background_color"), Some("Ipc"));
1817 assert_eq!(classify("terminal_colorsaurus", "terminal_colorsaurus::color_palette"), Some("Ipc"));
1818
1819 // backoff — retry sleeps + reads the clock (Clock); config pure.
1820 assert_eq!(classify("backoff", "backoff::retry"), Some("Clock"));
1821 assert_eq!(classify("backoff", "backoff::retry_notify"), Some("Clock"));
1822 assert_eq!(classify("backoff", "backoff::ExponentialBackoff::default"), None);
1823
1824 // lscolors — ONLY from_env reads the environment; from_string/style_for_path pure.
1825 assert_eq!(classify("lscolors", "lscolors::LsColors::from_env"), Some("Env"));
1826 assert_eq!(classify("lscolors", "lscolors::LsColors::from_string"), None);
1827 assert_eq!(classify("lscolors", "lscolors::LsColors::style_for_path"), None);
1828
1829 // wild — argv readers (Env).
1830 assert_eq!(classify("wild", "wild::args"), Some("Env"));
1831 assert_eq!(classify("wild", "wild::args_os"), Some("Env"));
1832
1833 // grep_cli — only the firm Exec (CommandReader spawn); the isatty probes stay unmodeled.
1834 assert_eq!(classify("grep_cli", "grep_cli::CommandReaderBuilder::build"), Some("Exec"));
1835 assert_eq!(classify("grep_cli", "grep_cli::is_readable_stdin"), None); // isatty/fstat, not modeled
1836 assert_eq!(classify("grep_cli", "grep_cli::is_tty_stdout"), None);
1837
1838 // clircle — same-file detection issues fstat/lseek (Fs); equality is pure.
1839 assert_eq!(classify("clircle", "clircle::Identifier::try_from"), Some("Fs"));
1840 assert_eq!(classify("clircle", "clircle::Clircle::surely_conflicts_with"), Some("Fs"));
1841 }
1842
1843 #[test]
1844 fn log_tracing_emit_macros_classify_pre_expansion() {
1845 // candor-scan is pre-expansion: it sees the raw macro path (`log::info`, `tracing::warn`), not the
1846 // expanded dispatch the deep engine sees. Both the user-facing macro names AND the type surface:
1847 assert_eq!(classify("log", "log::info"), Some("Log"));
1848 assert_eq!(classify("log", "log::error"), Some("Log"));
1849 assert_eq!(classify("tracing", "tracing::warn"), Some("Log"));
1850 assert_eq!(classify("tracing", "tracing::info_span"), Some("Log"));
1851 // pure data-type surface stays None (no fabricated Log)
1852 assert_eq!(classify("log", "log::Level::as_str"), None);
1853 assert_eq!(classify("tracing", "tracing::Level::INFO"), None);
1854 }
1855
1856 #[test]
1857 fn classify_core_effects() {
1858 // A representative smoke test of the classifier's main families, so the published crate is not
1859 // shipped untested (these used to live only in the nightly-only src/lib.rs).
1860 assert_eq!(classify("std", "std::fs::read_to_string"), Some("Fs"));
1861 // std::path stat-family methods are Fs (each is a stat/readdir syscall); the pure
1862 // string-manipulation surface stays unclassified (the blackout screen's gix-dir find).
1863 assert_eq!(classify("std", "std::path::Path::symlink_metadata"), Some("Fs"));
1864 assert_eq!(classify("std", "std::path::PathBuf::read_dir"), Some("Fs"));
1865 assert_eq!(classify("std", "std::path::Path::exists"), Some("Fs"));
1866 assert_eq!(classify("std", "std::path::Path::join"), None); // pure string manipulation
1867 assert_eq!(classify("std", "std::path::PathBuf::file_name"), None);
1868 assert_eq!(classify("std", "std::path::Path::parent"), None);
1869 assert_eq!(classify("std", "std::process::Command::new"), Some("Exec"));
1870 assert_eq!(classify("std", "std::env::var"), Some("Env"));
1871 assert_eq!(classify("reqwest", "reqwest::Client::execute"), Some("Net"));
1872 // one-shot convenience fns send immediately → Net; the `Client::get` builder stays pure.
1873 assert_eq!(classify("reqwest", "reqwest::get"), Some("Net"));
1874 assert_eq!(classify("reqwest", "reqwest::blocking::get"), Some("Net"));
1875 assert_eq!(classify("reqwest", "reqwest::Client::get"), None);
1876 assert_eq!(classify("reqwest", "reqwest::RequestBuilder::header"), None);
1877 // nix routes through the libc syscall table (same leaves): I/O classified, generic fd ops skipped.
1878 assert_eq!(classify("nix", "nix::fcntl::open"), Some("Fs"));
1879 assert_eq!(classify("nix", "nix::sys::socket::connect"), Some("Net"));
1880 assert_eq!(classify("nix", "nix::unistd::execvp"), Some("Exec"));
1881 assert_eq!(classify("nix", "nix::unistd::write"), None); // generic fd op — deliberately unclassified
1882 assert_eq!(classify("nix", "nix::unistd::getpid"), None); // not I/O
1883 // rustix does raw syscalls (no libc underneath) → classified directly by leaf, same table.
1884 assert_eq!(classify("rustix", "rustix::time::clock_settime"), Some("Clock"));
1885 assert_eq!(classify("rustix", "rustix::fs::symlink"), Some("Fs"));
1886 assert_eq!(classify("rustix", "rustix::net::connect"), Some("Net"));
1887 assert_eq!(classify("rustix", "rustix::io::read"), None); // generic fd op
1888 // pnet raw packet capture: channel openers are Net, packet construction stays pure.
1889 assert_eq!(classify("pnet", "pnet::datalink::channel"), Some("Net"));
1890 assert_eq!(classify("pnet", "pnet::transport::transport_channel"), Some("Net"));
1891 assert_eq!(classify("pnet_datalink", "pnet_datalink::channel"), Some("Net"));
1892 assert_eq!(classify("pnet", "pnet::packet::ethernet::EthernetPacket::new"), None);
1893 assert_eq!(classify("pnet_base", "pnet_base::MacAddr::new"), None);
1894 // ignore (gitignore-aware walker): walk executors are Fs, config builders stay pure.
1895 assert_eq!(classify("ignore", "ignore::WalkBuilder::build_parallel"), Some("Fs"));
1896 assert_eq!(classify("ignore", "ignore::WalkBuilder::build"), Some("Fs"));
1897 assert_eq!(classify("ignore", "ignore::WalkParallel::run"), Some("Fs"));
1898 assert_eq!(classify("ignore", "ignore::WalkBuilder::add_ignore"), Some("Fs")); // reads the ignore file
1899 assert_eq!(classify("ignore", "ignore::overrides::OverrideBuilder::build"), None); // pure config
1900 assert_eq!(classify("ignore", "ignore::gitignore::GitignoreBuilder::build"), None); // pure config
1901 assert_eq!(classify("ignore", "ignore::DirEntry::path"), None); // pure accessor
1902 // notify fs-watching: watcher constructors + watch/unwatch are Fs, data types stay pure.
1903 assert_eq!(classify("notify", "notify::RecommendedWatcher::new"), Some("Fs"));
1904 assert_eq!(classify("notify", "notify::PollWatcher::new"), Some("Fs"));
1905 assert_eq!(classify("notify", "notify::recommended_watcher"), Some("Fs"));
1906 assert_eq!(classify("notify", "notify::INotifyWatcher::watch"), Some("Fs"));
1907 assert_eq!(classify("notify", "notify::Config::default"), None); // pure config
1908 assert_eq!(classify("notify", "notify::Event::new"), None); // pure data type
1909 assert_eq!(classify("rusqlite", "rusqlite::Connection::execute"), Some("Db"));
1910 // the rusqlite verb DIALECT (a verb probe found the canonical consumer API classifying pure):
1911 assert_eq!(classify("rusqlite", "rusqlite::Connection::query_row"), Some("Db"));
1912 assert_eq!(classify("rusqlite", "rusqlite::Statement::query_map"), Some("Db"));
1913 assert_eq!(classify("rusqlite", "rusqlite::Connection::execute_batch"), Some("Db"));
1914 assert_eq!(classify("rusqlite", "rusqlite::Connection::prepare_cached"), Some("Db"));
1915 assert_eq!(classify("rusqlite", "rusqlite::Connection::open"), Some("Db"));
1916 assert_eq!(classify("rusqlite", "rusqlite::Connection::open_in_memory"), Some("Db"));
1917 // …but `open` stays rusqlite-only (postgres has no open; nothing else may borrow it):
1918 assert_eq!(classify("postgres", "postgres::Client::open"), None);
1919 assert_eq!(classify("tokio_postgres", "tokio_postgres::Client::query_typed"), Some("Db"));
1920 // diesel's LIMIT-1 + streaming executions; sqlx's multi-result stream:
1921 assert_eq!(classify("diesel", "diesel::RunQueryDsl::first"), Some("Db"));
1922 assert_eq!(classify("diesel", "diesel::RunQueryDsl::load_iter"), Some("Db"));
1923 assert_eq!(classify("sqlx", "sqlx::query::Query::fetch_many"), Some("Db"));
1924 // sqlx's bare `query()` builder must STAY pure (the original sqlx lesson):
1925 assert_eq!(classify("sqlx", "sqlx::query"), None);
1926 // tracing: the emit/span-lifecycle dispatch is Log; the pure DATA-type accessors are not
1927 // (whole-crate Log fabricated Log on `Level::as_str` / `Span::is_disabled` — the data types are
1928 // pure, same principle as the `log` facade).
1929 assert_eq!(classify("tracing", "tracing::event"), Some("Log"));
1930 assert_eq!(classify("tracing", "tracing::Span::new_span"), Some("Log"));
1931 assert_eq!(classify("tracing", "tracing::Span::record"), Some("Log"));
1932 assert_eq!(classify("tracing", "tracing::Span::enter"), Some("Log"));
1933 assert_eq!(classify("tracing", "tracing::Level::as_str"), None); // pure accessor
1934 assert_eq!(classify("tracing", "tracing::Span::is_disabled"), None); // pure state read
1935 assert_eq!(classify("tracing", "tracing::Span::metadata"), None); // pure accessor
1936 assert_eq!(classify("tracing", "tracing::metadata::Level::TRACE"), None); // pure data type
1937 assert_eq!(classify("tracing", "tracing::field::Field::name"), None); // pure data type
1938 // memmap2: only the syscall-issuing map/flush/protect verbs are Fs; reads over an already-mapped
1939 // region (len/as_ptr/is_empty) and the request builder are PURE (whole-crate Fs fabricated Fs).
1940 assert_eq!(classify("memmap2", "memmap2::MmapOptions::map"), Some("Fs"));
1941 assert_eq!(classify("memmap2", "memmap2::MmapOptions::map_mut"), Some("Fs"));
1942 assert_eq!(classify("memmap2", "memmap2::Mmap::flush"), Some("Fs"));
1943 assert_eq!(classify("memmap2", "memmap2::MmapMut::make_read_only"), Some("Fs"));
1944 assert_eq!(classify("memmap2", "memmap2::Mmap::len"), None); // length read — pure
1945 assert_eq!(classify("memmap2", "memmap2::Mmap::is_empty"), None); // pure
1946 assert_eq!(classify("memmap2", "memmap2::Mmap::as_ptr"), None); // pointer — pure
1947 assert_eq!(classify("memmap2", "memmap2::MmapOptions::new"), None); // request builder — pure
1948 // arboard: the Clipboard handle's read/write verbs are Clipboard; `arboard::Error` formatting
1949 // and option data types are PURE (whole-crate Clipboard fabricated Clipboard on `Error::to_string`).
1950 assert_eq!(classify("arboard", "arboard::Clipboard::new"), Some("Clipboard"));
1951 assert_eq!(classify("arboard", "arboard::Clipboard::get_text"), Some("Clipboard"));
1952 assert_eq!(classify("arboard", "arboard::Clipboard::set_text"), Some("Clipboard"));
1953 assert_eq!(classify("arboard", "arboard::Clipboard::clear"), Some("Clipboard"));
1954 assert_eq!(classify("arboard", "arboard::Error::to_string"), None); // error formatting — pure
1955 assert_eq!(classify("arboard", "arboard::Error::fmt"), None); // Display impl — pure
1956 assert_eq!(classify("arboard", "arboard::ImageData::to_owned_img"), None); // pure data type
1957 // fastrand: value draws + entropy-seeded entry points are Rand; the DETERMINISTIC seeded ctor
1958 // `with_seed` and state split/copy (`fork`/`clone`) are PURE (whole-crate Rand fabricated Rand).
1959 assert_eq!(classify("fastrand", "fastrand::u32"), Some("Rand")); // top-level draw
1960 assert_eq!(classify("fastrand", "fastrand::Rng::usize"), Some("Rand"));
1961 assert_eq!(classify("fastrand", "fastrand::Rng::shuffle"), Some("Rand"));
1962 assert_eq!(classify("fastrand", "fastrand::Rng::new"), Some("Rand")); // entropy-seeded
1963 assert_eq!(classify("fastrand", "fastrand::Rng::with_seed"), None); // deterministic ctor — pure
1964 assert_eq!(classify("fastrand", "fastrand::Rng::fork"), None); // state split — pure
1965 assert_eq!(classify("fastrand", "fastrand::Rng::clone"), None); // state copy — pure
1966 // portable_pty / async_process: spawn/wait keep Exec; config GETTERS and pure data ctors/setters
1967 // do NOT (base Exec fabricated on `CommandBuilder::get_cwd` / `PtySize::default` / `Stdio::piped`).
1968 assert_eq!(classify("portable_pty", "portable_pty::PtySystem::openpty"), Some("Exec"));
1969 assert_eq!(classify("portable_pty", "portable_pty::SlavePty::spawn_command"), Some("Exec"));
1970 assert_eq!(classify("portable_pty", "portable_pty::CommandBuilder::get_argv"), None); // getter
1971 assert_eq!(classify("portable_pty", "portable_pty::CommandBuilder::get_cwd"), None); // getter
1972 assert_eq!(classify("portable_pty", "portable_pty::PtySize::default"), None); // pure data type
1973 assert_eq!(classify("portable_pty", "portable_pty::CommandBuilder::new"), None); // builder ctor
1974 assert_eq!(classify("async_process", "async_process::Command::spawn"), Some("Exec"));
1975 assert_eq!(classify("async_process", "async_process::Command::output"), Some("Exec"));
1976 assert_eq!(classify("async_process", "async_process::Stdio::piped"), None); // pure data type
1977 assert_eq!(classify("async_process", "async_process::Stdio::null"), None); // pure data type
1978 // FFI tiers (matched by distinctive leaf, alias-independent)
1979 assert_eq!(classify("libc", "libc::open"), Some("Fs"));
1980 assert_eq!(classify("libc", "libc::connect"), Some("Net"));
1981 assert_eq!(classify("libc", "libc::read"), None); // generic fd op — deliberately unclassified
1982 assert_eq!(classify("ffi", "ffi::sqlite3_step"), Some("Db"));
1983 assert_eq!(classify("raw", "raw::git_remote_fetch"), Some("Net"));
1984 // libgit2 clone + submodule clone/update fetch over the network (an A/B on git2 0.20 caught
1985 // `Submodule::update`/`clone` and `Repository::clone` reporting no Net — the latter because the
1986 // `src/build.rs` module was being dropped as if it were the Cargo build script).
1987 assert_eq!(classify("raw", "raw::git_clone"), Some("Net"));
1988 assert_eq!(classify("raw", "raw::git_submodule_clone"), Some("Net"));
1989 assert_eq!(classify("raw", "raw::git_submodule_update"), Some("Net"));
1990 assert_eq!(classify("raw", "raw::git_submodule_open"), None); // local subrepo open — not Net
1991 // libcurl: the transfer/raw-socket entry points are Net (an A/B on curl 0.4 caught the whole
1992 // crate reporting ZERO Net); the big setopt/init/getinfo surface — and the readiness-wait
1993 // multi_wait/poll — stay unclassified (the loop's perform is the boundary).
1994 assert_eq!(classify("curl_sys", "curl_sys::curl_easy_perform"), Some("Net"));
1995 assert_eq!(classify("curl_sys", "curl_sys::curl_easy_send"), Some("Net"));
1996 assert_eq!(classify("curl_sys", "curl_sys::curl_multi_perform"), Some("Net"));
1997 assert_eq!(classify("curl_sys", "curl_sys::curl_multi_socket_action"), Some("Net"));
1998 assert_eq!(classify("curl_sys", "curl_sys::curl_easy_setopt"), None); // in-memory option write
1999 assert_eq!(classify("curl_sys", "curl_sys::curl_easy_init"), None); // handle alloc
2000 assert_eq!(classify("curl_sys", "curl_sys::curl_multi_wait"), None); // readiness wait, no payload
2001 // consumer-side `curl` crate rule: the dispatch verbs are Net, the setopt builders pure.
2002 assert_eq!(classify("curl", "curl::easy::Easy::perform"), Some("Net"));
2003 assert_eq!(classify("curl", "curl::multi::Multi::perform"), Some("Net"));
2004 assert_eq!(classify("curl", "curl::easy::Easy::send"), Some("Net"));
2005 assert_eq!(classify("curl", "curl::easy::Easy::url"), None); // CURLOPT setter — pure
2006 assert_eq!(classify("curl", "curl::easy::Easy::timeout"), None); // pure setter; Multi::timeout under-reported by design
2007 assert_eq!(classify("ffi", "ffi::SSL_connect"), Some("Net"));
2008 // pure crates stay pure
2009 assert_eq!(classify("serde", "serde::Serialize::serialize"), None);
2010 assert_eq!(classify("std", "std::vec::Vec::push"), None);
2011
2012 // ── sweep 2026-06-17: fabrication carve-outs + DNS coverage (each fails pre-fix) ──
2013 // [24] std::net socket accessors are pure; the I/O verbs stay Net.
2014 assert_eq!(classify("std", "std::net::TcpStream::connect"), Some("Net"));
2015 assert_eq!(classify("std", "std::net::TcpStream::local_addr"), None);
2016 assert_eq!(classify("std", "std::net::TcpStream::nodelay"), None);
2017 assert_eq!(classify("std", "std::net::TcpStream::ttl"), None);
2018 assert_eq!(classify("std", "std::net::UdpSocket::peer_addr"), None);
2019 // [37] std DNS resolution is Net (was floored).
2020 assert_eq!(classify("std", "std::net::lookup_host"), Some("Net"));
2021 assert_eq!(classify("std", "core::net::ToSocketAddrs::to_socket_addrs"), Some("Net"));
2022 // [23] std::process getters are pure; spawn/new stay Exec.
2023 assert_eq!(classify("std", "std::process::Command::get_program"), None);
2024 assert_eq!(classify("std", "std::process::Command::get_args"), None);
2025 assert_eq!(classify("std", "std::process::Child::id"), None);
2026 assert_eq!(classify("std", "std::process::Command::spawn"), Some("Exec"));
2027 // [27] redis ConnectionManager::clone is an Arc bump (pure); a query round-trips.
2028 assert_eq!(classify("redis", "redis::aio::ConnectionManager::clone"), None);
2029 assert_eq!(classify("redis", "redis::aio::ConnectionManager::send_packed_command"), Some("Db"));
2030 // [5] sea_orm re-exported sea_query builder algebra is pure; execution verbs stay Db.
2031 assert_eq!(classify("sea_orm", "sea_orm::sea_query::Func::count"), None);
2032 assert_eq!(classify("sea_orm", "sea_orm::sea_query::Condition::all"), None);
2033 assert_eq!(classify("sea_orm", "sea_orm::Select::all"), Some("Db"));
2034 }
2035
2036 #[test]
2037 fn rand_osrng_handle_ops_are_pure_but_draws_are_rand() {
2038 // Adversarial-review fabrication: the blanket `contains("OsRng")` tagged `OsRng::clone` Rand,
2039 // but OsRng is a unit struct — clone/fork/default draw no entropy. The real draws still fire.
2040 assert_eq!(classify("rand", "rand::rngs::OsRng::clone"), None);
2041 assert_eq!(classify("rand", "rand::rngs::OsRng::default"), None);
2042 assert_eq!(classify("rand", "rand::rngs::OsRng::fill_bytes"), Some("Rand")); // a real draw
2043 assert_eq!(classify("rand", "rand::rngs::OsRng::next_u32"), Some("Rand"));
2044 assert_eq!(classify("rand", "rand::Rng::gen"), Some("Rand")); // verb path unaffected
2045 assert_eq!(classify("rand", "rand::distributions::Uniform::new"), None); // pure ctor still pure
2046 }
2047
2048 #[test]
2049 fn redis_connection_manager_config_builder_is_pure() {
2050 // Adversarial-review fabrication: `contains("ConnectionManager")` hit the pure *Config* builder.
2051 assert_eq!(classify("redis", "redis::aio::ConnectionManagerConfig::new"), None);
2052 assert_eq!(classify("redis", "redis::aio::ConnectionManagerConfig::set_max_delay"), None);
2053 // the LIVE manager still round-trips (Db).
2054 assert_eq!(classify("redis", "redis::aio::ConnectionManager::new"), Some("Db"));
2055 assert_eq!(classify("redis", "redis::Commands::get"), Some("Db"));
2056 }
2057
2058 #[test]
2059 fn pure_fd_transfer_is_not_an_effect() {
2060 // ADOPTING / EXTRACTING / BORROWING an already-open descriptor (or unwrapping an async type back
2061 // to its std type) issues NO syscall — it must be PURE even though it hangs off a std I/O type
2062 // whose prefix rule would otherwise fire Net/Fs/Ipc. (Real tokio sweep: `into_std`, `from_raw_fd`,
2063 // `as_raw_fd` all fabricated effects.)
2064 assert_eq!(classify("std", "std::net::TcpStream::from_raw_fd"), None);
2065 assert_eq!(classify("std", "std::net::TcpStream::into_raw_fd"), None);
2066 assert_eq!(classify("std", "std::net::TcpStream::as_raw_fd"), None);
2067 assert_eq!(classify("std", "std::net::TcpListener::from_raw_fd"), None);
2068 assert_eq!(classify("std", "std::net::UdpSocket::from_raw_socket"), None);
2069 assert_eq!(classify("std", "std::fs::File::from_raw_fd"), None);
2070 assert_eq!(classify("std", "std::fs::File::into_raw_fd"), None);
2071 assert_eq!(classify("std", "std::fs::File::as_raw_handle"), None);
2072 assert_eq!(classify("std", "std::os::unix::net::UnixStream::from_raw_fd"), None);
2073 // `SocketAddr::from_pathname` builds an address struct, opens no socket — pure. (socket2 sweep.)
2074 assert_eq!(classify("std", "std::os::unix::net::SocketAddr::from_pathname"), None);
2075 assert_eq!(classify("tokio", "tokio::net::TcpStream::from_raw_fd"), None);
2076 assert_eq!(classify("tokio", "tokio::net::TcpStream::into_std"), None); // unwrap → std type, pure
2077 assert_eq!(classify("tokio", "tokio::fs::File::into_std"), None);
2078 // …but a REAL open/connect on the SAME types still fires the effect — the carve-out is leaf-precise.
2079 assert_eq!(classify("std", "std::net::TcpStream::connect"), Some("Net"));
2080 assert_eq!(classify("std", "std::fs::File::open"), Some("Fs"));
2081 assert_eq!(classify("std", "std::fs::read"), Some("Fs"));
2082 assert_eq!(classify("std", "std::os::unix::net::UnixStream::connect"), Some("Ipc"));
2083 assert_eq!(classify("tokio", "tokio::net::TcpStream::connect"), Some("Net"));
2084 }
2085
2086 #[test]
2087 fn command_head_refines_the_exec_cliff() {
2088 use super::classify_command_head as h;
2089 // unambiguous external tools classify by basename (spec §4 ⟨0.5⟩)
2090 assert_eq!(h("curl"), &["Net"]);
2091 assert_eq!(h("telnet"), &["Net"]);
2092 assert_eq!(h("sftp"), &["Net"]);
2093 assert_eq!(h("/usr/local/bin/psql"), &["Db"]); // basename match strips the path
2094 assert_eq!(h("mongo"), &["Db"]);
2095 assert_eq!(h("cqlsh"), &["Db"]);
2096 // a candor engine is Fs/Env — spec-SUPPLIED by §7 item 12, not curation
2097 assert_eq!(h("candor-scan"), &["Env", "Fs"]);
2098 assert_eq!(h("candor-run.sh"), &["Env", "Fs"]);
2099 // an unrecognised head adds nothing — the bare Exec cliff stands (never guess). `make`/`npm`
2100 // run the project's own code; `git`/`rsync` are multi-modal (local vs remote) — all keep the
2101 // cliff rather than fabricate an effect for the common case.
2102 assert_eq!(h("some-unknown-tool"), &[] as &[&str]);
2103 assert_eq!(h("make"), &[] as &[&str]);
2104 assert_eq!(h("npm"), &[] as &[&str]);
2105 assert_eq!(h("git"), &[] as &[&str]);
2106 assert_eq!(h("rsync"), &[] as &[&str]);
2107 // a builder MODIFIER (`.arg`/`.env`) names no program — its literal must NOT refine (a
2108 // whole-crate-Exec crate classifies every method; `.env("psql",..)` must not fabricate Db).
2109 assert!(is_cmd_builder_method("env") && is_cmd_builder_method("arg") && is_cmd_builder_method("current_dir"));
2110 assert!(!is_cmd_builder_method("new")); // Command::new NAMES the program
2111 assert!(!is_cmd_builder_method("cmd")); // duct::cmd NAMES the program
2112 // The gate that ADMITS a literal to classify_command_head is an ALLOWLIST of program-NAMING
2113 // methods, not the builder denylist. Inversion matters: a whole-crate-Exec crate (portable_pty)
2114 // classifies EVERY method as Exec, so a getter like `cmd.get_env("psql")` — absent from the
2115 // builder denylist — would have leaked "psql" to the head and FABRICATED Db. Only `new`/`cmd`
2116 // name a program, so only they may refine.
2117 assert!(is_cmd_naming_method("new") && is_cmd_naming_method("cmd"));
2118 assert!(!is_cmd_naming_method("get_env")); // a GETTER, not a namer — the leak this closes
2119 assert!(!is_cmd_naming_method("arg") && !is_cmd_naming_method("env") && !is_cmd_naming_method("current_dir"));
2120 }
2121
2122 #[test]
2123 fn net_establishing_allowlist() {
2124 // sweep [3]/[7]: the masking guard's establishing-verb allowlist — host-bearing connect/request
2125 // verbs establish (a runtime host there is invisible); USE-verbs on a connected socket do NOT.
2126 assert!(is_net_establishing("connect") && is_net_establishing("connect_timeout"));
2127 assert!(is_net_establishing("get") && is_net_establishing("post") && is_net_establishing("request"));
2128 assert!(is_net_establishing("send_to") && is_net_establishing("to_socket_addrs"));
2129 // use-verbs (host fixed at connect) must NOT be establishing — else `connect("h").write()` flags.
2130 assert!(!is_net_establishing("write") && !is_net_establishing("read") && !is_net_establishing("send"));
2131 assert!(!is_net_establishing("flush") && !is_net_establishing("recv") && !is_net_establishing("peek"));
2132 }
2133
2134 #[test]
2135 fn fs_path_arg_allowlist() {
2136 // The Fs masking guard's path-naming-fn allowlist — free fns / constructors take the path as a
2137 // string arg (a runtime path there is invisible to the gate). Stat methods (path on the receiver)
2138 // and handle ops carry no path arg and must NOT flag — but they're caught by the caller's
2139 // `!is_method` gate; the allowlist itself just enumerates the path-NAMING leaves.
2140 assert!(is_fs_path_arg("write") && is_fs_path_arg("read") && is_fs_path_arg("read_to_string"));
2141 assert!(is_fs_path_arg("open") && is_fs_path_arg("create") && is_fs_path_arg("create_new"));
2142 assert!(is_fs_path_arg("remove_file") && is_fs_path_arg("rename") && is_fs_path_arg("copy"));
2143 assert!(is_fs_path_arg("create_dir_all") && is_fs_path_arg("canonicalize") && is_fs_path_arg("metadata"));
2144 // handle ops / pure builders take NO path arg — never path-naming.
2145 assert!(!is_fs_path_arg("write_all") && !is_fs_path_arg("flush") && !is_fs_path_arg("read_exact"));
2146 assert!(!is_fs_path_arg("new") && !is_fs_path_arg("sync_all") && !is_fs_path_arg("set_len"));
2147 }
2148
2149 #[test]
2150 fn db_query_arg_allowlist() {
2151 // The Db masking guard's query-bearing-verb allowlist — these take the raw SQL as a string arg
2152 // (a runtime query there is invisible to the gate). Build-then-execute terminals and non-query
2153 // ops carry no SQL string and must NOT flag.
2154 assert!(is_db_query_arg("execute") && is_db_query_arg("query") && is_db_query_arg("query_one"));
2155 assert!(is_db_query_arg("prepare") && is_db_query_arg("batch_execute") && is_db_query_arg("execute_batch"));
2156 assert!(is_db_query_arg("query_row") && is_db_query_arg("query_map") && is_db_query_arg("exec"));
2157 // build-then-execute terminals (query built structurally, no SQL string) must NOT flag.
2158 assert!(!is_db_query_arg("fetch_all") && !is_db_query_arg("load") && !is_db_query_arg("first"));
2159 assert!(!is_db_query_arg("all") && !is_db_query_arg("one") && !is_db_query_arg("stream"));
2160 // connection / lifecycle ops take no SQL — must NOT flag.
2161 assert!(!is_db_query_arg("connect") && !is_db_query_arg("open") && !is_db_query_arg("begin"));
2162 assert!(!is_db_query_arg("commit") && !is_db_query_arg("ping") && !is_db_query_arg("get_conn"));
2163 }
2164}