Skip to main content

candor_classify/
lib.rs

1//! candor-classify — the curated effect classifier (crate+path -> effect), extracted to a STABLE
2//! crate so both the nightly `rustc_private` lint AND a stable backend share ONE source of truth
3//! (no drift). Pure string logic; no rustc internals. The effect vocabulary lives in candor-report.
4
5use candor_report::EFFECTS;
6
7/// The canonical CANDOR_POLICY DSL parser (SPEC §6.2), shared by the nightly gate and candor-query.
8pub mod policy;
9
10/// Project-supplied rules, consulted only when the built-in `classify` returns None.
11pub fn classify_extra(
12    crate_name: &str,
13    path: &str,
14    extra: &[(&'static str, bool, String)],
15) -> Option<&'static str> {
16    for (eff, is_crate, prefix) in extra {
17        let hit = if *is_crate { crate_name.starts_with(prefix.as_str()) } else { path.starts_with(prefix.as_str()) };
18        if hit {
19            return Some(eff);
20        }
21    }
22    None
23}
24
25/// The exact third-party crates `classify` has effect rules for, and the crate-name
26/// PREFIXES it recognizes. This is the single source of truth for "what candor knows":
27/// it is emitted beside the JSON report (`<prefix>.calibrated.json`) so the Claude Code
28/// receipt's coverage check reads candor's real coverage instead of a hand-copied list.
29/// Keep in lockstep with `classify` below — the `db_crates_are_calibrated` and
30/// `calibrated_crates_are_live` tests (in this crate's `tests` module) enforce both directions.
31pub const CALIBRATED_CRATES: [&str; 79] = [
32    // network (aws_config resolves credentials over the network on `.load()`;
33    // git2 remote ops — fetch/push/connect — contact the network; async_net is smol's net layer;
34    // pnet is raw L2/L3 packet capture)
35    "reqwest", "isahc", "ureq", "curl", "aws_config", "git2", "tokio_tcp", "tokio_udp", "async_net",
36    "async_nats", "lapin", "lettre", "tungstenite", "elasticsearch", "tonic", "rdkafka", "pnet",
37    // directory traversal (ignore = gitignore-aware walker, powers ripgrep/fd; its walk executors are Fs)
38    // + filesystem watching (notify = inotify/FSEvents/kqueue wrapper; powers watchexec/cargo-watch)
39    "ignore", "notify",
40    // database (see DB_CRATES in classify)
41    "sqlx", "rusqlite", "postgres", "tokio_postgres", "diesel", "redis", "mongodb",
42    "mysql", "mysql_async", "sea_orm", "deadpool_postgres",
43    // filesystem (async_fs = smol; fs_err = std::fs wrapper; tempfile; glob) / entropy /
44    // subprocess (async_process = smol; duct) / env (dotenvy/dotenv) / clock (time) / log / clipboard
45    "memmap2", "fs_err", "async_fs", "tempfile", "glob",
46    "rand", "getrandom", "fastrand",
47    // entropy: the password-hashing tier (salt mints + bcrypt's internal salt) + the OsRng source
48    "argon2", "bcrypt", "scrypt", "pbkdf2", "password_hash", "rand_core",
49    "portable_pty", "async_process", "duct",
50    "dotenvy", "dotenv",
51    "chrono", "time", "tracing", "log", "arboard",
52    // compiler diagnostic emission (a dylint lint's output) — see the Log rules in classify
53    "rustc_lint", "rustc_errors",
54    // raw syscalls via FFI — the syscall-name table that lights up the FFI-thin tier (nix is routed
55    // through the same table by leaf name, so a consumer of nix is covered without nix's own source)
56    "libc", "nix", "rustix",
57    // coverage-differential additions (verb-keyed; see the per-crate rules near the end of classify):
58    // sync TLS core + native-tls variants (Net); env/dir resolution + argv + LS_COLORS (Env);
59    // sqlx-core execution terminals (Net/Db); directory walk + timestamp mutation + same-file (Fs);
60    // process-spawn helpers (Exec); signal handler + interactive-tty prompts (Ipc); env_logger (Log);
61    // jiff/backoff clock reads (Clock).
62    "rustls", "native_tls_crate", "tokio_native_tls",
63    "etcetera", "wild", "lscolors",
64    "sqlx_core", "walkdir", "filetime", "clircle",
65    "execute", "ctrlc", "clap", "jiff", "env_logger",
66    "dialoguer", "console", "terminal_colorsaurus", "backoff", "grep_cli",
67];
68
69pub const CALIBRATED_PREFIXES: [&str; 3] = ["aws_sdk_", "aws_smithy", "cap_"];
70
71/// Crates `classify` matches by PATH prefix rather than crate-name equality (their effectful modules
72/// are recognised, e.g. `tokio::net::`/`async_std::fs::`/`mio::net::`), so they're absent from
73/// `CALIBRATED_CRATES` (which the liveness test probes by crate name). The coverage check must still
74/// treat them as *covered* — otherwise it would mislabel the most common async crates as blind spots.
75pub const PATH_CALIBRATED_CRATES: [&str; 3] = ["tokio", "async_std", "mio"];
76
77/// Representative path tails (each appended to a crate name) that the `calibrated_crates_are_live`
78/// liveness test probes: at least one must match for every `CALIBRATED_CRATES` entry, else the entry is
79/// dead. Exported as ONE source of truth because the nightly lint crate (`src/lib.rs`) runs the SAME
80/// liveness test — when the two probe lists were duplicated they drifted, and a rule keyed on a
81/// distinctive tail (pnet `::datalink::channel`, ignore `::WalkBuilder::build_parallel`, notify
82/// `::RecommendedWatcher::new`) added to only one list silently broke the other crate's `cargo test`.
83pub const CALIBRATION_PROBE_TAILS: &[&str] = &[
84    "::X::send", "::X::execute", "::X::call", "::X::query", "::X::fetch_one", "::Remote::fetch",
85    "::datalink::channel", "::WalkBuilder::build_parallel", "::RecommendedWatcher::new",
86    "::X::connect", "::Utc::now", "::X::load", "::__private_api::log", "::tempfile", "::glob",
87    "::X::run", "::dotenv", "::random", "::emit", "::X::emit_span_lint", "::X::anything",
88    "::SaltString::generate", "::hash", "::OsRng::fill_bytes",
89    // verb-precise crates whose whole-crate rules were narrowed to the effectful surface (the pure
90    // accessors/ctors/data-types now return None), so the liveness probe must name an EFFECTFUL path:
91    "::Mmap::map", "::event", "::u32", "::Clipboard::get_text", "::spawn_command",
92    // coverage-differential crates (each needs ≥1 effectful tail; existing tails already cover
93    // native_tls_crate/tokio_native_tls/sqlx_core via ::X::connect, execute via ::X::execute, jiff via ::now):
94    "::read_tls", "::home_dir", "::args", "::from_env", "::IntoIter::next", "::set_file_mtime",
95    "::surely_conflicts_with", "::set_handler", "::get_matches", "::init", "::interact",
96    "::write_line", "::background_color", "::retry", "::build",
97];
98
99/// Database client crates whose execution verbs are I/O (see the DB branch in `classify`).
100/// Module-level so `db_crates_are_calibrated` can enforce `DB_CRATES ⊆ CALIBRATED_CRATES`.
101pub const DB_CRATES: [&str; 11] = [
102    "sqlx", "rusqlite", "postgres", "tokio_postgres", "diesel", "redis", "mongodb",
103    "mysql", "mysql_async", "sea_orm", "deadpool_postgres",
104];
105
106/// Pure file-descriptor *ownership-transfer* leaves. These ADOPT an already-open descriptor
107/// (`from_raw_fd`/`from_raw_socket`/`from_raw_handle`), EXTRACT/BORROW one
108/// (`into_raw_fd`/`into_raw_socket`/`into_raw_handle`, `as_raw_fd`/`as_raw_socket`/`as_raw_handle`),
109/// or UNWRAP an async wrapper back to its std type (`into_std`) — none of them issue a syscall or
110/// perform I/O. candor's cardinal sin is calling a PURE function effectful, and these collide with the
111/// coarse std-type PREFIX rules (`std::net::TcpStream`/`std::fs::File`/`std::os::unix::net` → Net/Fs/Ipc)
112/// even though the descriptor was opened ELSEWHERE. The portable_pty/async_process Exec rule already
113/// exempts `from_raw_fd`; this generalises the same carve-out across the net/fs/ipc prefix rules.
114/// (Found by a real-world sweep of tokio: `TcpStream::into_std`, `*::from_raw_fd`, `*::as_raw_fd` all
115/// fabricated Net/Fs/Ipc.)
116const PURE_FD_TRANSFER: &[&str] = &[
117    "from_raw_fd", "from_raw_socket", "from_raw_handle",
118    "into_raw_fd", "into_raw_socket", "into_raw_handle",
119    "as_raw_fd", "as_raw_socket", "as_raw_handle",
120    "into_std",
121    // `SocketAddr::from_pathname` (std/async-std unix net) builds an address STRUCT from a path —
122    // it opens no socket. The `std::os::unix::net` prefix rule below would otherwise fabricate Ipc
123    // on it. (Found sweeping socket2: `SockAddr::as_unix` → `from_pathname` reported Ipc.)
124    "from_pathname",
125];
126
127/// Classify a resolved callee by the crate it belongs to and its full path.
128pub fn classify(crate_name: &str, path: &str) -> Option<&'static str> {
129    // Pure fd ownership-transfer/extraction leaves are never an effect, regardless of which std I/O
130    // type they hang off — exempt them BEFORE the coarse prefix rules can fabricate Net/Fs/Ipc.
131    if PURE_FD_TRANSFER.contains(&path.rsplit("::").next().unwrap_or(path)) {
132        return None;
133    }
134    if crate_name.starts_with("aws_sdk_") || crate_name.starts_with("aws_smithy") {
135        // Only request dispatch is network I/O; builder setters/accessors are pure.
136        if path.ends_with("::send") || path.ends_with("::send_with") {
137            return Some("Net");
138        }
139        return None;
140    }
141    // aws-config resolves credentials/region on `.load()` — it reaches the IMDS metadata
142    // endpoint / STS over the network (and reads ~/.aws + env). Builders (`defaults()`,
143    // `SdkConfig::builder()`, `BehaviorVersion::latest()`) are pure; the `load` is the I/O.
144    // (Found hardening on a real app, ebman: `builder.load().await` was classified pure.)
145    if crate_name == "aws_config" {
146        if path.ends_with("::load") || path.ends_with("::load_defaults") {
147            return Some("Net");
148        }
149        return None;
150    }
151    // git2 (libgit2 FFI): remote operations contact the network; everything else is local
152    // to the .git directory. Match the remote verbs precisely — NOT bare `::clone`, which is
153    // the `Clone`-trait dup of a `Remote` handle (pure), not `Repository::clone`. (Found
154    // hardening on gitui: `remote.fetch`/`remote.push` were classified network-free — a git
155    // client reporting it makes no network calls.)
156    if crate_name == "git2" {
157        if path.ends_with("::fetch")
158            || path.ends_with("::push")
159            || path.ends_with("::download")
160            || path.ends_with("::connect")
161            || path.ends_with("::connect_auth")
162            || path.ends_with("::ls")
163            || path.ends_with("::upload")
164        {
165            return Some("Net");
166        }
167        return None;
168    }
169    // libc — raw syscalls via FFI. The FFI-thin tier (nix, and the syscall layer beneath rusqlite/git2)
170    // is invisible to a name classifier unless we model libc directly: a 35-crate calibration
171    // (eval/calibration) showed nix reporting ZERO library effects because every wrapper bottoms out in
172    // an unrecognised `libc::*` call. Classify by syscall name, but ONLY the UNAMBIGUOUS ones — the
173    // socket family is Net, path/dir syscalls are Fs, spawn/exec/wait is Exec, SysV/pipe IPC is Ipc,
174    // env/clock/entropy each their own. We deliberately SKIP the generic file-descriptor ops
175    // (read/write/close/lseek/dup/fcntl/ioctl/poll/select/epoll*/mmap): they operate on ANY fd — file,
176    // socket, or pipe — so a fixed label would mis-categorise as often as it helps. An honest
177    // no-classify (under-report) beats emitting the WRONG effect. Pure conversions (htons/inet_pton/
178    // gmtime) are also skipped.
179    //
180    // `nix` (the idiomatic SAFE libc wrapper, in ~every Rust systems/CLI crate) is routed through the
181    // SAME table: its functions keep the syscall leaf name (`nix::fcntl::open`, `nix::sys::socket::connect`,
182    // `nix::unistd::execvp`). Without this, a CONSUMER of nix analysed without nix's own source (the
183    // stable scanner, single-crate) sees `nix::*` cross-crate and under-reports — serialport-rs opens its
184    // device via `nix::fcntl::open` and reported ZERO Fs. The nightly lint reaches `libc::*` THROUGH nix's
185    // body; this gives the scanner the same coverage directly. (Found sweeping serialport-rs.)
186    // `rustix` is the same shape as nix but does RAW syscalls (no libc underneath), so its functions MUST
187    // be classified directly. Its leaf names are the syscall names too (`rustix::time::clock_settime`,
188    // `rustix::fs::mkfifoat`/`symlink`/`stat`, `rustix::net::connect`) — route it through the same table.
189    // The rustix-specific `*at`/variant leaves it doesn't share with libc just under-report (the safe
190    // direction). VALIDATED, not speculative: coreutils' `date` reads/sets the clock via
191    // `rustix::time::clock_getres`/`clock_settime` and reported Clock=0; the file I/O that goes through
192    // std::fs was already correct, which is why only the rustix-only effects (Clock/Ipc) were missing.
193    if crate_name == "libc" || crate_name == "nix" || crate_name == "rustix" {
194        let f = path.rsplit("::").next().unwrap_or(path);
195        // path / directory / metadata syscalls (incl. *64 and *at variants)
196        const FS: &[&str] = &[
197            "open", "open64", "openat", "openat2", "creat", "creat64", "stat", "stat64", "lstat",
198            "lstat64", "fstatat", "fstatat64", "newfstatat", "statx", "access", "faccessat",
199            "faccessat2", "mkdir", "mkdirat", "rmdir", "unlink", "unlinkat", "rename", "renameat",
200            "renameat2", "link", "linkat", "symlink", "symlinkat", "readlink", "readlinkat", "chmod",
201            "fchmodat", "chown", "lchown", "fchownat", "truncate", "truncate64", "ftruncate",
202            "ftruncate64", "opendir", "fdopendir", "readdir", "readdir64", "readdir_r", "closedir",
203            "rewinddir", "seekdir", "telldir", "scandir", "mkstemp", "mkstemps", "mkostemp", "mkdtemp",
204            "mknod", "mknodat", "chdir", "fchdir", "getcwd", "get_current_dir_name", "chroot",
205            "pivot_root", "statfs", "statfs64", "fstatfs", "fstatfs64", "statvfs", "fstatvfs", "mount",
206            "umount", "umount2", "fsync", "fdatasync", "sync", "syncfs", "sync_file_range", "fallocate",
207            "posix_fallocate", "posix_fadvise", "sendfile", "sendfile64", "copy_file_range", "flock",
208            "getdents", "getdents64", "utime", "utimes", "lutimes", "futimens", "utimensat", "futimesat",
209            "realpath",
210        ];
211        // socket family — these operate only on sockets, so Net is unambiguous (AF_UNIX domain isn't
212        // visible at the call, so a Unix socket reads as Net rather than Ipc; acceptable over-general).
213        const NET: &[&str] = &[
214            "socket", "setsockopt", "getsockopt", "bind", "listen", "accept", "accept4", "connect",
215            "shutdown", "send", "sendto", "sendmsg", "sendmmsg", "recv", "recvfrom", "recvmsg",
216            "recvmmsg", "getpeername", "getsockname", "getaddrinfo", "freeaddrinfo", "getnameinfo",
217        ];
218        // process creation / replacement / reaping
219        const EXEC: &[&str] = &[
220            "fork", "vfork", "clone", "clone3", "execl", "execlp", "execle", "execv", "execvp",
221            "execvpe", "execve", "execveat", "fexecve", "posix_spawn", "posix_spawnp", "system",
222            "popen", "pclose", "wait", "waitpid", "wait3", "wait4", "waitid",
223        ];
224        // pipes / FIFOs / SysV + POSIX message queues, semaphores, shared memory; socketpair (AF_UNIX)
225        const IPC: &[&str] = &[
226            "pipe", "pipe2", "mkfifo", "mkfifoat", "socketpair", "msgget", "msgsnd", "msgrcv", "msgctl",
227            "semget", "semop", "semtimedop", "semctl", "shmget", "shmat", "shmdt", "shmctl", "mq_open",
228            "mq_send", "mq_receive", "mq_timedsend", "mq_timedreceive", "mq_close", "mq_unlink",
229        ];
230        const ENV: &[&str] = &["getenv", "secure_getenv", "setenv", "putenv", "unsetenv", "clearenv"];
231        const CLOCK: &[&str] = &[
232            "time", "gettimeofday", "clock_gettime", "clock_getres", "nanosleep", "clock_nanosleep",
233            // SETTING the system clock is a clock effect too (was unclassified — found on coreutils `date`,
234            // which sets it via `clock_settime`).
235            "clock_settime", "settimeofday", "stime", "adjtime", "adjtimex", "clock_adjtime",
236        ];
237        const RAND: &[&str] = &["getrandom", "getentropy", "arc4random", "arc4random_buf", "arc4random_uniform"];
238        if FS.contains(&f) {
239            return Some("Fs");
240        }
241        if NET.contains(&f) {
242            return Some("Net");
243        }
244        if EXEC.contains(&f) {
245            return Some("Exec");
246        }
247        if IPC.contains(&f) {
248            return Some("Ipc");
249        }
250        if ENV.contains(&f) {
251            return Some("Env");
252        }
253        if CLOCK.contains(&f) {
254            return Some("Clock");
255        }
256        if RAND.contains(&f) {
257            return Some("Rand");
258        }
259        return None;
260    }
261    // C-library FFI bindings: libsqlite3 (under rusqlite) and libgit2 (under git2). Like the libc tier,
262    // these crates are thin Rust over a C library, so their real I/O is invisible until the C entry
263    // points are named. Match by the DISTINCTIVE C function name (`sqlite3_*` / `git_*`) via the call's
264    // LEAF — independent of the binding crate's alias: rusqlite calls `ffi::sqlite3_step`, git2 calls
265    // `raw::git_remote_fetch`, and the nightly lint resolves the same to `libsqlite3_sys`/`libgit2_sys`;
266    // all spellings share the leaf. Only the I/O-performing entry points are listed — the in-memory
267    // accessors (`sqlite3_bind_*`/`sqlite3_column_*`, `git_*_oid`/strarray/options builders) stay pure,
268    // so a non-listed `sqlite3_`/`git_` leaf returns None (under-report, never a wrong effect). Calibrated
269    // + validated against rusqlite 0.39 / git2 0.20 source (eval/calibration).
270    {
271        let leaf = path.rsplit("::").next().unwrap_or(path);
272        if let Some(rest) = leaf.strip_prefix("sqlite3_") {
273            let _ = rest;
274            // SQLite C API operations that touch the database (open/exec/step/prepare/backup/blob/wal).
275            const DB: &[&str] = &[
276                "sqlite3_open", "sqlite3_open_v2", "sqlite3_open16", "sqlite3_close", "sqlite3_close_v2",
277                "sqlite3_exec", "sqlite3_step", "sqlite3_prepare", "sqlite3_prepare_v2",
278                "sqlite3_prepare_v3", "sqlite3_prepare16", "sqlite3_prepare16_v2", "sqlite3_prepare16_v3",
279                "sqlite3_get_table", "sqlite3_backup_init", "sqlite3_backup_step", "sqlite3_backup_finish",
280                "sqlite3_blob_open", "sqlite3_blob_read", "sqlite3_blob_write", "sqlite3_blob_reopen",
281                "sqlite3_load_extension", "sqlite3_wal_checkpoint", "sqlite3_wal_checkpoint_v2",
282            ];
283            return DB.contains(&leaf).then_some("Db");
284        }
285        if leaf.starts_with("git_") {
286            // libgit2: remote/transport operations contact the network … (incl. submodule clone/update,
287            // which `git_clone`/fetch the subrepo over its remote — `allow_fetch` defaults on; an A/B on
288            // git2 0.20 caught `Submodule::update`/`clone` reporting no `Net`).
289            const NET: &[&str] = &[
290                "git_clone", "git_remote_connect", "git_remote_connect_ext", "git_remote_fetch",
291                "git_remote_download", "git_remote_upload", "git_remote_push", "git_remote_ls",
292                "git_submodule_clone", "git_submodule_update",
293            ];
294            // … and repository/index/odb/checkout/ref/config operations touch the on-disk .git store.
295            const FS: &[&str] = &[
296                "git_repository_open", "git_repository_open_ext", "git_repository_open_bare",
297                "git_repository_init", "git_repository_init_ext", "git_repository_discover",
298                "git_checkout_tree", "git_checkout_head", "git_checkout_index", "git_index_read",
299                "git_index_write", "git_index_write_tree", "git_index_write_tree_to",
300                "git_index_add_bypath", "git_index_add_all", "git_odb_open", "git_odb_read",
301                "git_odb_write", "git_odb_open_wstream", "git_odb_open_rstream",
302                "git_blob_create_fromdisk", "git_blob_create_fromworkdir", "git_blob_create_from_disk",
303                "git_blob_create_from_workdir", "git_blob_create_from_stream", "git_commit_create",
304                "git_commit_create_v", "git_reference_create", "git_reference_set_target",
305                "git_reference_delete", "git_config_open_default", "git_config_open_ondisk",
306                "git_config_add_file_ondisk", "git_tag_create", "git_treebuilder_write",
307                "git_packbuilder_write",
308            ];
309            if NET.contains(&leaf) {
310                return Some("Net");
311            }
312            if FS.contains(&leaf) {
313                return Some("Fs");
314            }
315            return None;
316        }
317        if leaf.starts_with("curl_") {
318            // libcurl (under the `curl` crate, called `curl_sys::curl_*`). Only the entry points that
319            // PERFORM network I/O: the blocking transfer (`curl_easy_perform`), raw socket send/recv,
320            // the HTTP/2 keepalive PING (`upkeep`), and the multi-interface transfer pumps. The large
321            // pure surface (setopt/init/cleanup/reset/getinfo/escape/multi_add_handle/fdset/info_read)
322            // stays unclassified, as do `curl_multi_wait`/`poll` (readiness WAIT on sockets, no payload —
323            // the loop's `perform` is the tagged boundary, per the I/O-boundary principle). An A/B on
324            // curl 0.4 caught the whole crate reporting ZERO Net (`Easy::perform` read as pure).
325            const NET: &[&str] = &[
326                "curl_easy_perform", "curl_easy_send", "curl_easy_recv", "curl_easy_upkeep",
327                "curl_multi_perform", "curl_multi_socket_action",
328            ];
329            return NET.contains(&leaf).then_some("Net");
330        }
331        if let Some(op) = leaf.strip_prefix("SSL_") {
332            // OpenSSL (libssl, under the `openssl`/`native-tls` crates, called `ffi::SSL_*`). The TLS
333            // handshake and record I/O run over the peer socket -> Net. Unlike libc read/write, an SSL_*
334            // op is ~always over a network BIO (the rare memory-BIO/sans-IO case is the honest exception
335            // we accept). The crypto surface (EVP_*/SHA*/AES*) and pure setup (SSL_CTX_new/SSL_set_fd) are
336            // NOT here; `BIO_*` is skipped (a BIO may be memory or socket). Validated vs openssl 0.9 source.
337            const SSL_NET: &[&str] = &[
338                "connect", "accept", "do_handshake", "read", "read_ex", "write", "write_ex", "peek",
339                "peek_ex", "shutdown",
340            ];
341            return SSL_NET.contains(&op).then_some("Net");
342        }
343    }
344    // HTTP clients use the same builder pattern as the AWS SDK: only the dispatch is
345    // I/O. (Found by the eval: ebman's reqwest calls to the Anthropic API + webhooks
346    // were silently classified network-free because reqwest wasn't recognized.)
347    if crate_name == "reqwest" || crate_name == "isahc" {
348        // The builder chain is pure; the dispatch (`::send`/`::execute`) is the I/O. PLUS the one-shot
349        // CONVENIENCE functions `reqwest::get` / `reqwest::blocking::get` / `isahc::get`, which send
350        // immediately — they're not the `Client::get` builder (a different path, `reqwest::Client::get`),
351        // so an exact match avoids false-positiving the builder. (Found running on `xh`: a one-shot
352        // `reqwest::get(url)` was classified network-free.)
353        if path.ends_with("::send")
354            || path.ends_with("::execute")
355            || path == "reqwest::get"
356            || path == "reqwest::blocking::get"
357            || path == "isahc::get"
358        {
359            return Some("Net");
360        }
361        return None;
362    }
363    if crate_name == "ureq" && path.ends_with("::call") {
364        return Some("Net");
365    }
366    // The `curl` crate (libcurl's safe binding — cargo's own HTTP client): the dispatch verbs are
367    // `perform` (Easy/Easy2/Transfer/Multi), raw-socket `send`/`recv`, the keepalive `upkeep`, and the
368    // multi-interface `action` (socket_action). The big setopt-style builder surface stays pure.
369    // `Multi::timeout` is deliberately NOT matched: `Easy::timeout` is a pure CURLOPT_TIMEOUT setter
370    // sharing the leaf — an under-report on the rare event-loop kick beats mis-tagging every consumer
371    // that sets a timeout. (Consumer-side companion to the curl_* FFI tier, same A/B finding.)
372    if crate_name == "curl"
373        && (path.ends_with("::perform")
374            || path.ends_with("::send")
375            || path.ends_with("::recv")
376            || path.ends_with("::upkeep")
377            || path.ends_with("::action"))
378    {
379        return Some("Net");
380    }
381    // The modern async-HTTP / TLS / QUIC / DNS stack — the LAYER reqwest/ureq/isahc build on, and that
382    // crates use DIRECTLY. Found by the independent-method differential on `oha` (2026-06-17): candor
383    // honestly DISCLOSED these as blind but never CLASSIFIED them, leaving real Net reaches uncovered.
384    // Verb-keyed (the pure type/builder/codec surface stays None) and CRATE-GATED, so generic verbs
385    // (request/connect/get/read/write/accept) never fabricate across unrelated crates. Same precision
386    // discipline as the reqwest/curl rules above; complements the scan_builder_entry_effect entries.
387    match crate_name {
388        // hyper 1.x client connection I/O (the builder/Body/Request types stay pure).
389        "hyper" if path.ends_with("::send_request") || path.ends_with("::handshake") => return Some("Net"),
390        // hyper-util's pooled legacy Client + its TCP connectors.
391        "hyper_util" if path.ends_with("::request") || path.ends_with("::connect") => return Some("Net"),
392        // hickory (trust-dns) resolver — issues DNS queries over the network.
393        "hickory_resolver"
394            if path.ends_with("::lookup_ip") || path.ends_with("::lookup") || path.ends_with("_lookup")
395                || path.ends_with("::resolve") => return Some("Net"),
396        // HTTP/3 over QUIC.
397        "h3" if path.ends_with("::send_request") || path.ends_with("::recv_data")
398            || path.ends_with("::recv_response") || path.ends_with("::send_data") => return Some("Net"),
399        // QUIC transport (UDP socket send/recv): connection setup, datagrams, AND the stream byte I/O
400        // (`RecvStream::read*` / `SendStream::write*` / `finish`). Opening a stream is caught above, but a
401        // fn that only HOLDS a stream and reads/writes it would otherwise read silent-pure (review: a Net
402        // under-report). Crate-gated to quinn, where these verbs are unambiguously the socket I/O.
403        "quinn" if path.ends_with("::connect") || path.ends_with("::accept") || path.ends_with("::open_bi")
404            || path.ends_with("::open_uni") || path.ends_with("::accept_bi") || path.ends_with("::accept_uni")
405            || path.ends_with("::send_datagram") || path.ends_with("::read_datagram")
406            || path.ends_with("::read") || path.ends_with("::read_chunk") || path.ends_with("::read_chunks")
407            || path.ends_with("::read_to_end") || path.ends_with("::write") || path.ends_with("::write_all")
408            || path.ends_with("::write_chunk") || path.ends_with("::write_chunks")
409            || path.ends_with("::finish") => return Some("Net"),
410        // TLS-over-TCP stream adapters — the actual socket handshake/I/O (the config/cert types stay pure).
411        "tokio_rustls" | "native_tls"
412            if path.ends_with("::connect") || path.ends_with("::accept") || path.ends_with("::handshake") =>
413            return Some("Net"),
414        // AF_VSOCK host<->guest sockets — inter-process / VM comms.
415        "tokio_vsock" if path.ends_with("::connect") || path.ends_with("::bind") || path.ends_with("::accept") =>
416            return Some("Ipc"),
417        // Loads the OS trust store from disk (cert files / keychain).
418        "rustls_native_certs" if path.ends_with("::load_native_certs") => return Some("Fs"),
419        // `rlimit` reads/mutates the process's kernel resource limits — the closest bucket is Env (host/
420        // process config); no dedicated process-state bucket exists, so getrlimit (read) and setrlimit
421        // (mutate) share it. NOTE: `num_cpus::get`/`get_physical` are deliberately NOT modeled — asking the
422        // OS for the CPU count is a near-pure topology query, and std's equivalent `thread::
423        // available_parallelism` classifies pure; modeling it as Env would spray Env over every thread-pool
424        // constructor (review: a high-noise over-report) for no capability a reviewer cares about.
425        "rlimit" if path.ends_with("::getrlimit") || path.ends_with("::setrlimit")
426            || path.ends_with("::increase_nofile_limit") => return Some("Env"),
427        // rustls — the SYNC TLS core (tokio_rustls/native_tls above are the async/system adapters). The
428        // record-layer I/O is `read_tls`/`write_tls` (pull/push raw bytes through a held `io::Read`/`Write`)
429        // and `complete_io` (loops them until the handshake/buffers drain). The config/cert/builder types
430        // (`ClientConfig`/`ServerConfig`/`ConfigBuilder`) are PURE. `process_new_packets` is deliberately
431        // EXCLUDED — it only decrypts ALREADY-buffered bytes (no socket touch; docs say call it AFTER
432        // read_tls), so flagging it would over-report Net on the pure decrypt step.
433        "rustls" if path.ends_with("::read_tls") || path.ends_with("::write_tls")
434            || path.ends_with("::complete_io") => return Some("Net"),
435        // native-tls under its alternate crate name + the tokio async wrapper (the `native_tls` arm above
436        // is the common name). The TLS handshake over a TcpStream is Net; the builder/cert types are pure.
437        "native_tls_crate" | "tokio_native_tls"
438            if path.ends_with("::connect") || path.ends_with("::accept")
439                || path.ends_with("::handshake") => return Some("Net"),
440        _ => {}
441    }
442    // Message-queue clients fully encapsulate the socket (the underlying tokio::net lives
443    // inside the crate, unseen), so a user's connect/publish/consume calls ARE the I/O
444    // boundary — to a remote broker, hence Net. Match the broker round-trip verbs (snake_case
445    // methods); the CamelCase option/property builders stay pure. (Found hardening on consumer
446    // apps: lapin `basic_publish`/`queue_declare` and async-nats `publish`/`subscribe` were
447    // classified pure — a message-queue client reporting no I/O.)
448    if crate_name == "async_nats" {
449        if path.ends_with("::connect")
450            || path.contains("::publish")
451            || path.ends_with("::subscribe")
452            || path.ends_with("::queue_subscribe")
453            || path.contains("::request")
454            || path.ends_with("::flush")
455        {
456            return Some("Net");
457        }
458        return None;
459    }
460    if crate_name == "lapin" {
461        if path.ends_with("::connect")
462            || path.ends_with("::create_channel")
463            || path.contains("::basic_")
464            || path.contains("::queue_")
465            || path.contains("::exchange_")
466            || path.contains("::tx_")
467            || path.ends_with("::confirm_select")
468            || path.ends_with("::close")
469        {
470            return Some("Net");
471        }
472        return None;
473    }
474    // SMTP email — lettre's `Transport::send` is the network dispatch; Message building is
475    // pure. (Found hardening on a lettre consumer: `mailer.send(&email)` classified pure.)
476    if crate_name == "lettre" {
477        if path.ends_with("::send") || path.ends_with("::send_raw") {
478            return Some("Net");
479        }
480        return None;
481    }
482    // WebSockets — tungstenite (the modern successor to the old `websocket` crate). connect
483    // and the socket read/write/send are network; Message constructors are pure. (Found on a
484    // tungstenite consumer: connect + send + read classified pure.)
485    if crate_name == "tungstenite" {
486        if path.ends_with("::connect")
487            || path.ends_with("::read")
488            || path.ends_with("::write")
489            || path.ends_with("::send")
490            || path.ends_with("::close")
491            || path.ends_with("::flush")
492            || path.ends_with("::read_message")
493            || path.ends_with("::write_message")
494        {
495            return Some("Net");
496        }
497        return None;
498    }
499    // elasticsearch: request builders are pure; only the `.send()` dispatch is HTTP I/O
500    // (same shape as reqwest / the AWS SDK). (Found on an elasticsearch consumer.)
501    if crate_name == "elasticsearch" && path.ends_with("::send") {
502        return Some("Net");
503    }
504    // gRPC — tonic. The transport connect and the Grpc client RPC dispatch are network;
505    // codecs and request/response wrappers are pure. (connect repro-confirmed on a consumer;
506    // the unary/streaming RPC verbs are from the tonic::client::Grpc API.)
507    if crate_name == "tonic" {
508        if path.ends_with("::connect")
509            || path.ends_with("::unary")
510            || path.ends_with("::server_streaming")
511            || path.ends_with("::client_streaming")
512            || path.ends_with("::streaming")
513        {
514            return Some("Net");
515        }
516        return None;
517    }
518    // Kafka — rdkafka (FFI to librdkafka). Producer send + consumer poll/recv/subscribe/
519    // commit are network round-trips to the brokers. (API-calibrated + unit-tested; a real
520    // repro needs librdkafka/cmake, deferred.)
521    if crate_name == "rdkafka" {
522        if path.ends_with("::send")
523            || path.ends_with("::send_result")
524            || path.ends_with("::recv")
525            || path.ends_with("::poll")
526            || path.ends_with("::subscribe")
527            || path.ends_with("::commit")
528            || path.ends_with("::commit_message")
529            || path.ends_with("::commit_consumer_state")
530            || path.ends_with("::store_offset")
531            || path.ends_with("::seek")
532            || path.ends_with("::fetch_metadata")
533            || path.ends_with("::fetch_watermarks")
534            || path.ends_with("::flush")
535        {
536            return Some("Net");
537        }
538        return None;
539    }
540    // cap-std: capability-oriented std. I/O goes *through* a held capability handle
541    // (Dir/Pool/Clock/...), so these calls ARE the effect. Recognising them means a
542    // cap-std project's real I/O is detected and matches the capability it declared
543    // (via `declared_caps`/`capstd_cap`) — conformance against unforgeable capabilities.
544    if crate_name.starts_with("cap_") {
545        if path.contains("::net::Unix") || path.contains("::os::") {
546            return Some("Ipc");
547        }
548        if path.contains("::net") {
549            return Some("Net");
550        }
551        if path.contains("::time") {
552            return Some("Clock");
553        }
554        if path.contains("::fs") || crate_name == "cap_tempfile" || crate_name == "cap_directories" {
555            return Some("Fs");
556        }
557        return None;
558    }
559    // Local IPC (Unix-domain sockets) is I/O but not *network* — keep it distinct so
560    // CANDOR_NO_AMBIENT and audits don't conflate it with internet access. async-std puts its
561    // Unix sockets under `os::unix::net` (mirroring std); async-net (smol's net layer) under
562    // `unix`.
563    if path.starts_with("tokio::net::Unix")
564        || path.starts_with("std::os::unix::net")
565        || path.starts_with("async_std::os::unix::net")
566        || path.starts_with("async_net::unix")
567    {
568        return Some("Ipc");
569    }
570    // Raw packet capture / raw sockets — libpnet (the dominant low-level networking crate; powers
571    // bandwhich, sniffers, custom-protocol tools). `datalink::channel` opens an L2 socket and
572    // `transport::transport_channel` an L3/L4 raw socket — both ARE network I/O. Packet construction
573    // (pnet_packet / pnet_base, MacAddr, Ethernet frames…) is pure and stays unclassified. The actual
574    // frame read/write happens via methods on the returned Sender/Receiver (trait-object dispatch the
575    // syntactic backend can't resolve), so the channel-open call is the precise Net boundary. (Found
576    // scanning bandwhich — a packet sniffer — which reported Net 0.)
577    if crate_name == "pnet" || crate_name == "pnet_datalink" || crate_name == "pnet_transport" {
578        if path.ends_with("::channel") || path.ends_with("::transport_channel") {
579            return Some("Net");
580        }
581        return None;
582    }
583    // Directory traversal — `ignore` (BurntSushi's gitignore-aware walker; powers ripgrep, fd). The walk
584    // EXECUTORS read the directory tree from disk = Fs. Type-precise on purpose: the configuration builders
585    // (`OverrideBuilder::build`, `GitignoreBuilder::build`, the `WalkBuilder` setters) and `DirEntry`
586    // accessors are PURE — only `WalkBuilder::build`/`build_parallel` (which kick off the walk) and
587    // `WalkParallel::run` (which drives it) touch the filesystem. A bare `build` would wrongly flag the
588    // config builders. (Found scanning fd — a file finder — which reported Fs 2: its own `fs::read_dir`
589    // was caught, but the `ignore`-based traversal that IS fd was invisible cross-crate.)
590    if crate_name == "ignore" {
591        if path == "ignore::WalkBuilder::build"
592            || path == "ignore::WalkBuilder::build_parallel"
593            || path.ends_with("::WalkParallel::run")
594            // `add_ignore(path)` LOOKS like a config setter but reads that ignore file from disk at call
595            // time (it returns the read error) — unlike the pure `add_custom_ignore_filename(name)` which
596            // only stores a filename string. The lone Fs-touching builder method in the otherwise-pure setter
597            // surface, so it was silently pure under the covered-crate floor.
598            || path == "ignore::WalkBuilder::add_ignore"
599        {
600            return Some("Fs");
601        }
602        return None;
603    }
604    // Filesystem watching — `notify` (the de-facto fs-watch crate: watchexec, cargo-watch, mdbook). A
605    // watcher opens an OS notification handle (inotify / FSEvents / kqueue / ReadDirectoryChanges) and
606    // registers paths — observing filesystem state changes = Fs. The lifecycle boundary: any
607    // `*Watcher::new` constructor (RecommendedWatcher/PollWatcher/INotifyWatcher/FsEventWatcher/…), the
608    // `recommended_watcher` convenience fn, and the `watch`/`unwatch` registration verbs. `Config`/`Event`/
609    // `EventKind` data types stay pure. (Found scanning watchexec: its watcher-`create` read Fs 0.)
610    if crate_name == "notify" {
611        if path.ends_with("Watcher::new")
612            || path.ends_with("::recommended_watcher")
613            || path.ends_with("::watch")
614            || path.ends_with("::unwatch")
615        {
616            return Some("Fs");
617        }
618        return None;
619    }
620    // std DNS resolution — `("host", 80).to_socket_addrs()` / `std::net::lookup_host("host")` perform a
621    // real getaddrinfo query (Net), but the classify table covered only the socket I/O *types*, so they
622    // floored silently (sweep [37]; the syntactic engine modelled DNS only at the libc layer).
623    if path.ends_with("::to_socket_addrs")
624        || path == "std::net::lookup_host"
625        || path.ends_with("ToSocketAddrs::to_socket_addrs")
626    {
627        return Some("Net");
628    }
629    // Raw sockets. Match the I/O *types* only — `std::net` also holds pure data types
630    // (SocketAddr, IpAddr, …) whose construction must NOT be flagged.
631    if path.starts_with("std::net::TcpStream")
632        || path.starts_with("std::net::TcpListener")
633        || path.starts_with("std::net::UdpSocket")
634        || path.starts_with("tokio::net::")
635    {
636        // …but the PURE accessors read back local/option state — no network I/O — so the whole-type Net
637        // rule fabricated Net on them (sweep [24], the cardinal sin; mirrors the arboard/memmap2 accessor
638        // carve-outs). local_addr/peer_addr return bound/connected addresses; nodelay/ttl/take_error read
639        // socket options/state. Every genuine verb (connect/read/write/send/recv/accept) stays Net.
640        if path.ends_with("::local_addr")
641            || path.ends_with("::peer_addr")
642            || path.ends_with("::nodelay")
643            || path.ends_with("::ttl")
644            || path.ends_with("::take_error")
645        {
646            return None;
647        }
648        return Some("Net");
649    }
650    // Legacy tokio 0.1 socket crates — `tokio_tcp`/`tokio_udp` are *entirely* networking
651    // (no pure types to over-flag), so the whole crate is Net. (Found hardening on websocat,
652    // which is still on tokio 0.1: its `tokio_tcp::TcpStream::connect` was classified
653    // network-free — a network tool confidently reporting 0 Net.)
654    if matches!(crate_name, "tokio_tcp" | "tokio_udp") {
655        return Some("Net");
656    }
657    // The other async runtimes mirror tokio's module layout, and their `net` modules hold only
658    // socket I/O types (the pure `SocketAddr`/`IpAddr` are re-exports that resolve to `std::net`,
659    // so they're excluded by def-path). `mio` is the low-level non-blocking-socket layer under
660    // tokio/others; `async_net` is smol's net crate. Closes the async-std/smol/mio gap the
661    // tokio_tcp note flagged. (Calibrated by module structure — these crates ARE networking — not
662    // a live repro; the TCP/UDP types are defined in-crate so the def-path prefix is exact.)
663    if path.starts_with("async_std::net::")
664        || path.starts_with("mio::net::")
665        || crate_name == "async_net"
666    {
667        return Some("Net");
668    }
669    // Database clients. Like the AWS/HTTP builders, only the execution verbs are I/O;
670    // query *construction* is pure. Best-effort across crates (tune via CANDOR_CONFIG).
671    // Note: bare `::query` is deliberately omitted — it executes in postgres/rusqlite but
672    // only *builds* in sqlx, so including it would false-positive sqlx's `query()` builder.
673    if DB_CRATES.contains(&crate_name) {
674        // Postgres / SQLite-family clients: `query`/`batch_execute`/`prepare`/etc. ARE the
675        // execution (round-trips to the server). sqlx is the outlier where bare `query()`
676        // only BUILDS — it keeps the narrow set below. (Found by running on a real
677        // tokio-postgres app, pgman: candor had reported only 4 of ~20 DB call sites.)
678        if matches!(crate_name, "postgres" | "tokio_postgres" | "deadpool_postgres" | "rusqlite") {
679            const PG: [&str; 19] = [
680                "::query", "::query_one", "::query_opt", "::query_raw", "::execute",
681                "::batch_execute", "::simple_query", "::prepare", "::prepare_typed",
682                "::copy_in", "::copy_out", "::transaction", "::connect",
683                // rusqlite's dialect of the same verbs (a verb-probe found the CANONICAL rusqlite
684                // consumer API classifying pure): `query_row` is the one-row read, `query_map`/
685                // `query_and_then` the many-row reads, `execute_batch` is rusqlite's name for
686                // batch_execute, `prepare_cached` round-trips like prepare. `query_typed` is
687                // tokio_postgres 0.7.10+.
688                "::query_row", "::query_map", "::query_and_then", "::execute_batch",
689                "::prepare_cached", "::query_typed",
690            ];
691            if PG.iter().any(|v| path.ends_with(v)) {
692                return Some("Db");
693            }
694            // rusqlite only: opening the database IS the connection establishment (`Connection::
695            // open`/`open_in_memory`/`open_with_flags` — the embedded analog of `::connect`).
696            if crate_name == "rusqlite"
697                && (path.ends_with("::open")
698                    || path.ends_with("::open_in_memory")
699                    || path.ends_with("::open_with_flags"))
700            {
701                return Some("Db");
702            }
703            return None;
704        }
705        // redis: the way redis is ACTUALLY used is the high-level `Commands`/`AsyncCommands`
706        // traits (`con.get`/`set`/`hset`/`lpush`/…) — every method is a round-trip — plus
707        // connection establishment. The shared VERBS below only catch the low-level
708        // `cmd("GET").query(con)`, so without this a normal redis user's calls classify as
709        // PURE. (Found hardening on redis-rs: a fn doing `con.get`/`set` reported no effects.)
710        if crate_name == "redis"
711            && (path.contains("Commands::")
712                || path.contains("::get_connection")
713                || path.contains("::get_async_connection")
714                || path.contains("::get_multiplexed_async_connection")
715                // a live `ConnectionManager` round-trips (Db), but `ConnectionManagerConfig` is a pure
716                // in-memory builder (set_number_of_retries/set_max_delay) — exclude it (adversarial review).
717                // `ConnectionManager::clone` is an Arc refcount bump — no Db round-trip (sweep [27]).
718                || (path.contains("ConnectionManager") && !path.contains("ConnectionManagerConfig")
719                    && !path.ends_with("::clone"))
720                || path.ends_with("::query")
721                || path.ends_with("::query_async")
722                || path.ends_with("::req_command")
723                || path.ends_with("::req_packed_command")
724                || path.ends_with("::req_packed_commands"))
725        {
726            return Some("Db");
727        }
728        // mongodb: a document-store API with none of the SQL verbs — the user calls
729        // `coll.find_one`/`insert_one`/`aggregate`/… and `Client::with_uri_str`. Without
730        // these a mongodb user's calls classify PURE. (Found hardening: a fn doing
731        // `find_one`+`insert_one` reported no effects.) Handle accessors (name/namespace)
732        // and option/doc builders don't match these verbs, so they stay pure.
733        if crate_name == "mongodb" {
734            const MONGO: [&str; 27] = [
735                "::with_uri_str", "::connect", "::find", "::find_one", "::insert_one",
736                "::insert_many", "::update_one", "::update_many", "::delete_one",
737                "::delete_many", "::replace_one", "::aggregate", "::count_documents",
738                "::estimated_document_count", "::count", "::distinct", "::run_command",
739                "::find_one_and_update", "::find_one_and_delete", "::find_one_and_replace",
740                "::list_collections", "::list_collection_names", "::list_databases",
741                "::list_database_names", "::create_collection", "::create_index", "::watch",
742            ];
743            if MONGO.iter().any(|v| path.ends_with(v)) {
744                return Some("Db");
745            }
746            return None;
747        }
748        // mysql / mysql_async: the `query`/`exec` families + `get_conn`/`ping` execute
749        // immediately — no build-then-execute split like sqlx, so matching `::query` is safe
750        // here. Same DB-verb-dialect gap class as redis/mongodb; calibrated from the Queryable
751        // API (unit-tested; a real-app repro is the remaining confirmation).
752        if matches!(crate_name, "mysql" | "mysql_async") {
753            const MY: [&str; 16] = [
754                "::query", "::query_first", "::query_iter", "::query_map", "::query_fold",
755                "::query_drop", "::exec", "::exec_first", "::exec_iter", "::exec_map",
756                "::exec_fold", "::exec_drop", "::exec_batch", "::prep", "::ping", "::get_conn",
757            ];
758            if MY.iter().any(|v| path.ends_with(v)) {
759                return Some("Db");
760            }
761            return None;
762        }
763        // sea_orm: an ORM whose execution is split from building (like sqlx). The query
764        // BUILDERS (`Entity::find`, `Entity::insert`) are pure; execution happens at `.all`/
765        // `.one`/`.count`/`.stream` and `Insert/Update/Delete::exec`. The write path via an
766        // ActiveModel (`model.insert(db)`) executes too — distinguished from the `EntityTrait`
767        // builder by the trait in the path (`ActiveModelTrait::`). (Found hardening on a
768        // sea_orm consumer app: `.all(db)` reads and `ActiveModel::insert` writes were pure.)
769        if crate_name == "sea_orm" {
770            // sea_orm RE-EXPORTS sea_query (`sea_orm::sea_query::…`), whose builder algebra collides with
771            // the execution verbs: `Func::count(col)` builds a COUNT() expr, `Condition::all()` AND-groups
772            // filters, `Expr::count(…)` — all PURE, none touch a db. The `::all`/`::count`/`::one` execution
773            // rule fabricated Db on them (sweep [5]). sea_query is pure query construction end-to-end, so
774            // exclude the whole re-exported namespace first.
775            if path.contains("sea_query") {
776                return None;
777            }
778            if path.ends_with("::all")
779                || path.ends_with("::one")
780                || path.ends_with("::count")
781                || path.ends_with("::stream")
782                || path.ends_with("::exec")
783                || path.ends_with("::exec_with_returning")
784                || path.ends_with("::exec_without_returning")
785                || path.ends_with("::connect")
786                || path.ends_with("::execute")
787                || path.ends_with("::execute_unprepared")
788                || path.ends_with("::query_one")
789                || path.ends_with("::query_all")
790                || path.ends_with("::fetch_page")
791                || path.ends_with("::num_items")
792                || path.contains("ActiveModelTrait::")
793            {
794                return Some("Db");
795            }
796            return None;
797        }
798        // (Reached by sqlx + diesel — the build-vs-execute-split crates.) `first` is diesel's
799        // LIMIT-1 round trip and `load_iter` its 2.x streaming execution; `fetch_many` is sqlx's
800        // multi-result stream. All crate-gated, so a std `Vec::first` never resolves here.
801        const VERBS: [&str; 19] = [
802            "::execute", "::query_row", "::query_map", "::query_one", "::fetch_one",
803            "::fetch_all", "::fetch_optional", "::fetch", "::fetch_many", "::connect",
804            "::acquire", "::begin", "::commit", "::rollback", "::load", "::load_iter",
805            "::first", "::get_result", "::get_results",
806        ];
807        if VERBS.iter().any(|v| path.ends_with(v)) {
808            return Some("Db");
809        }
810        return None;
811    }
812    // std::path::Path / PathBuf STAT-family methods hit the filesystem (each is a stat/readlink/
813    // readdir syscall) — unlike the rest of the std::path surface, which is pure string manipulation
814    // (join/file_name/extension/parent/…). Verb-precise so the scanner's receiver inference can safely
815    // route a `path.symlink_metadata()` method call here. (A blackout screen caught gix-dir — an entire
816    // directory WALKER — reporting ZERO Fs because all its I/O is Path-method calls; same class as
817    // fd's residual `Path::symlink_metadata` under-report.)
818    if let Some(m) = path
819        .strip_prefix("std::path::Path::")
820        .or_else(|| path.strip_prefix("std::path::PathBuf::"))
821    {
822        const STAT: &[&str] = &[
823            "metadata", "symlink_metadata", "canonicalize", "read_link", "read_dir", "exists",
824            "try_exists", "is_file", "is_dir", "is_symlink",
825        ];
826        return STAT.contains(&m).then_some("Fs");
827    }
828    // Filesystem. `tokio::fs`/`async_std::fs` are the async mirrors of `std::fs`; `async_fs` is
829    // smol's fs crate; `fs_err` is a drop-in `std::fs` wrapper (its whole surface is fs I/O).
830    if path.starts_with("std::fs::")
831        || path.starts_with("tokio::fs::")
832        || path.starts_with("async_std::fs::")
833        || crate_name == "async_fs"
834        || crate_name == "fs_err"
835    {
836        return Some("Fs");
837    }
838    // memmap2: only `MmapOptions::map*` (and the in-place `Mmap::flush`/`make_*` protection
839    // changes / `remap`) actually issue the mmap/msync/mprotect/mremap syscall = Fs. The rest of the
840    // crate is PURE: `MmapOptions::new`/setters BUILD the request, and once a region is mapped, reads
841    // over it (`Mmap::len`/`is_empty`/`as_ptr`/`as_mut_ptr`/`deref` into the byte slice) are plain
842    // memory access with no syscall. Whole-crate Fs fabricated Fs on those reads (a `m.len()` the
843    // scanner's receiver inference routes to `memmap2::Mmap::len`). Match the syscall-issuing verbs;
844    // everything else returns None (pure). `map*` covers `map`/`map_mut`/`map_exec`/`map_copy`/
845    // `map_copy_read_only`/`map_raw`/`map_raw_read_only`/`map_anon`.
846    if crate_name == "memmap2" {
847        let m = path.rsplit("::").next().unwrap_or(path);
848        if m.starts_with("map")
849            || m == "flush"
850            || m == "flush_async"
851            || m == "flush_range"
852            || m == "flush_async_range"
853            || m == "remap"
854            || m.starts_with("make_")
855            || m == "advise"
856            || m == "advise_range"
857            || m == "lock"
858            || m == "unlock"
859        {
860            return Some("Fs");
861        }
862        return None;
863    }
864    // tempfile: creating a temp file/dir touches the disk. Match the create/persist verbs (the
865    // `Builder` setters — prefix/suffix/rand_bytes — stay pure). `persist`/`keep` rename/retain
866    // the file on disk; `close` removes it.
867    if crate_name == "tempfile"
868        && (path.ends_with("::tempfile")
869            || path.ends_with("::tempfile_in")
870            || path.ends_with("::tempdir")
871            || path.ends_with("::tempdir_in")
872            || path.ends_with("NamedTempFile::new")
873            || path.ends_with("NamedTempFile::new_in")
874            || path.ends_with("TempDir::new")
875            || path.ends_with("TempDir::new_in")
876            || path.ends_with("::persist")
877            || path.ends_with("::persist_noclobber")
878            || path.ends_with("::keep"))
879    {
880        return Some("Fs");
881    }
882    // glob: walks the filesystem to expand a pattern (the returned iterator reads directories).
883    // `Pattern::matches` is pure string matching — match only the directory-walking entry points.
884    if crate_name == "glob" && (path.ends_with("::glob") || path.ends_with("::glob_with")) {
885        return Some("Fs");
886    }
887    // Password-hashing / KDF crates — the entropy tier (the TS engine's CTA lesson: an invisible
888    // argon2 landed on exactly the call a security review cares about). In this engine's
889    // verb-precise style the ENTROPY is the salt mint: `SaltString::generate(OsRng)` in the
890    // password-hash API family, and bcrypt's `hash`/`hash_with_result` (salt minted internally).
891    // Verification and explicit-salt hashing are deterministic recomputation — pure. `rand_core`
892    // carries the OsRng source itself (otherwise the most common salt mint is invisible).
893    if matches!(crate_name, "argon2" | "scrypt" | "pbkdf2" | "password_hash") {
894        if path.contains("SaltString::generate") {
895            return Some("Rand");
896        }
897        return None;
898    }
899    if crate_name == "bcrypt" {
900        if path.ends_with("::hash") || path.ends_with("::hash_with_result") {
901            return Some("Rand");
902        }
903        return None;
904    }
905    if crate_name == "rand_core" {
906        if path.contains("OsRng")
907            || path.ends_with("::next_u32")
908            || path.ends_with("::next_u64")
909            || path.ends_with("::fill_bytes")
910        {
911            return Some("Rand");
912        }
913        return None;
914    }
915    // Randomness / entropy. `getrandom`/`fastrand` are effectful end-to-end. `rand` is NOT — it
916    // mixes entropy/generation (effectful) with *pure* distribution constructors (`Uniform::new`,
917    // `Normal::new`) and deterministic-seed constructors (`seed_from_u64`). Flagging the whole crate
918    // over-reported those as `Rand`; match only the calls that actually consume randomness — the
919    // entropy sources (`OsRng`, `thread_rng`/`rng`, `from_entropy`/`from_os_rng`) and the generation
920    // verbs (`gen*`/`random*`/`fill*`/`sample*`/`next_u*`). A `Uniform::new` is now correctly pure.
921    if crate_name == "getrandom" {
922        return Some("Rand");
923    }
924    // fastrand: like `rand`, it mixes entropy-consuming generation (effectful) with PURE deterministic
925    // pieces. `Rng::with_seed(42)` is a DETERMINISTIC seeded constructor (consumes no entropy — the same
926    // seed gives the same stream), and `Rng::fork`/`Rng::clone` just split/copy existing state. Those are
927    // PURE; whole-crate Rand fabricated Rand on them. The effect is the value-drawing methods (`u32`/
928    // `usize`/`bool`/`f64`/`char`/`alphanumeric`/`choice`/`choose_multiple`/`shuffle`/`fill`/the range
929    // forms) AND the entropy-seeded entry points: bare `Rng::new()` (seeds from the global entropy-backed
930    // generator), `fastrand::seed`, and the top-level `fastrand::u32(..)` free functions (which draw from
931    // the thread-local generator). `with_seed` is exempted explicitly; any other method on an `Rng`
932    // (i.e. a value draw) is Rand.
933    if crate_name == "fastrand" {
934        let m = path.rsplit("::").next().unwrap_or(path);
935        // Provably pure: deterministic seeded ctor + state split/copy.
936        if m == "with_seed" || m == "fork" || m == "clone" {
937            return None;
938        }
939        // Everything else fastrand exposes either draws a value or seeds from entropy → Rand. (The crate
940        // has no pure data types beyond the `Rng` handle itself, so a non-draw stray would have to be a
941        // method we don't recognise — keep the effect, the safe direction.)
942        return Some("Rand");
943    }
944    if crate_name == "rand" {
945        let rng_verb = path.ends_with("::gen")
946            || path.ends_with("::gen_range")
947            || path.ends_with("::gen_bool")
948            || path.ends_with("::gen_ratio")
949            || path.ends_with("::random")
950            || path.ends_with("::random_range")
951            || path.ends_with("::random_bool")
952            || path.ends_with("::random_ratio")
953            || path.ends_with("::random_iter") // rand 0.9 iterator generator
954            || path.ends_with("::gen_iter")
955            || path.ends_with("::fill")
956            || path.ends_with("::fill_bytes")
957            || path.ends_with("::try_fill")
958            || path.ends_with("::try_fill_bytes")
959            || path.ends_with("::sample")
960            || path.ends_with("::sample_iter")
961            || path.ends_with("::next_u32")
962            || path.ends_with("::next_u64")
963            || path.ends_with("::thread_rng")
964            || path.ends_with("::rng")
965            || path.ends_with("::from_entropy")
966            || path.ends_with("::from_os_rng");
967        // `OsRng` is the OS entropy SOURCE, but `clone`/`fork`/`default` just copy or construct the
968        // (zero-sized) handle and draw no entropy — pure, exactly like the `fastrand` arm's clone/fork
969        // exemption above. The actual draws (`fill_bytes`/`next_u*`/…) are caught by `rng_verb`. Without
970        // this exemption the blanket `contains("OsRng")` fabricated `Rand` on `OsRng::clone` (adversarial
971        // review: OsRng is a unit struct, cloning consumes nothing).
972        let m = path.rsplit("::").next().unwrap_or(path);
973        let os_rng = path.contains("OsRng") && !matches!(m, "clone" | "fork" | "default");
974        if rng_verb || os_rng {
975            return Some("Rand");
976        }
977        return None;
978    }
979    // Subprocess spawning. `tokio::process` is the async mirror of `std::process` — it exists
980    // only to spawn/control subprocesses (`Command`/`Child`, no pure data types like std's
981    // `Stdio`/`ExitStatus`/`exit`), so spawning through it is Exec just the same. Without this an
982    // async app's `tokio::process::Command::new(..).spawn()` classified pure — a silent under-report
983    // of subprocess execution, the dangerous direction (mirrors the tokio::fs/tokio::net coverage).
984    if path.starts_with("std::process::Command")
985        || path.starts_with("std::process::Child")
986        || path.starts_with("tokio::process::Command")
987        || path.starts_with("tokio::process::Child")
988        || path.starts_with("async_std::process::Command")
989        || path.starts_with("async_std::process::Child")
990    {
991        // PURE read-backs of the builder's stored fields / the cached pid — no spawn, no syscall — so the
992        // whole-type Exec rule fabricated Exec on them (sweep [23]; mirrors the portable_pty getter carve-
993        // out just below). get_program/get_args/get_envs/get_current_dir read the Command; Child::id reads
994        // the cached pid. Every genuine verb (new/spawn/output/status/wait/kill) stays Exec.
995        if path.ends_with("::get_program")
996            || path.ends_with("::get_args")
997            || path.ends_with("::get_envs")
998            || path.ends_with("::get_current_dir")
999            || path.ends_with("Child::id")
1000        {
1001            return None;
1002        }
1003        return Some("Exec");
1004    }
1005    // portable_pty / async_process are whole-crate Exec EXCEPT for the proven-pure surface they expose:
1006    // the `CommandBuilder` GETTERS (`get_argv`/`get_cwd`/`get_env`/`as_unix_command_line`…) read back
1007    // configuration, and the PURE DATA types (`PtySize::default`, `ExitStatus`/`Stdio`/`CommandBuilder`
1008    // construction/setters). The earlier `is_cmd_naming_method` fix stopped the head-refinement LEAK, but
1009    // the BASE Exec still fabricated on these accessors (a `cmd.get_cwd()` the scanner routes to
1010    // `portable_pty::CommandBuilder::get_cwd`). Subtract the read-back getters and the obvious pure
1011    // ctors/setters; the spawn/wait/exec surface (`spawn_command`/`openpty`/`wait`/`kill`/`exec`…) keeps
1012    // Exec. SUBTRACT only what is provably pure — when unrecognised, KEEP Exec (the safe direction).
1013    if crate_name == "async_process" || crate_name == "portable_pty" {
1014        let m = path.rsplit("::").next().unwrap_or(path);
1015        // configuration read-back getters — pure (no spawn).
1016        if m.starts_with("get_") || m == "as_unix_command_line" {
1017            return None;
1018        }
1019        // pure data-type ctors/setters/derives that NAME no program and spawn nothing.
1020        if matches!(
1021            m,
1022            "default" | "new" | "piped" | "null" | "inherit" | "from_raw_fd"
1023                | "arg" | "args" | "arg0" | "env" | "envs" | "env_clear" | "env_remove"
1024                | "cwd" | "current_dir" | "rows" | "cols"
1025                | "clone" | "fmt" | "eq" | "ne" | "hash"
1026        ) {
1027            return None;
1028        }
1029        return Some("Exec");
1030    }
1031    // duct: a subprocess-orchestration crate. `cmd()`/`cmd!` only *build* an Expression; the
1032    // spawn/wait happens at `run`/`read`/`start`. Match the execution verbs, not the builder.
1033    if crate_name == "duct"
1034        && (path.ends_with("::run")
1035            || path.ends_with("::read")
1036            || path.ends_with("::start")
1037            || path.ends_with("::read_chars"))
1038    {
1039        return Some("Exec");
1040    }
1041    if path.starts_with("std::env::") {
1042        return Some("Env");
1043    }
1044    // dotenvy / dotenv: load environment variables (reading a `.env` file and mutating the process
1045    // environment). Match the load/read entry points; `Error`/builder types stay pure.
1046    if matches!(crate_name, "dotenvy" | "dotenv")
1047        && (path.ends_with("::dotenv")
1048            || path.ends_with("::dotenv_override")
1049            || path.ends_with("::from_path")
1050            || path.ends_with("::from_path_override")
1051            || path.ends_with("::from_filename")
1052            || path.ends_with("::from_filename_override")
1053            || path.ends_with("::from_read")
1054            || path.ends_with("::from_read_override")
1055            || path.ends_with("::load")
1056            || path.ends_with("::var")
1057            || path.ends_with("::vars"))
1058    {
1059        return Some("Env");
1060    }
1061    // Wall-clock reads. Match the `now` accessor precisely (ends_with), not any path
1062    // containing the substring "now". The `time` crate (distinct from `std::time`/`chrono`)
1063    // reads the clock via `now_utc`/`now_local` (and the deprecated `Instant::now`).
1064    if (crate_name == "chrono" || path.starts_with("std::time::")) && path.ends_with("::now") {
1065        return Some("Clock");
1066    }
1067    if crate_name == "time"
1068        && (path.ends_with("::now_utc") || path.ends_with("::now_local") || path.ends_with("::now"))
1069    {
1070        return Some("Clock");
1071    }
1072    // `tracing`: same principle as the `log` facade below — the crate's TYPES are pure data, so match
1073    // the emit, not the whole crate. The actual program output is the macro-expanded
1074    // `Subscriber::event`/`event!`/`Span::*enter*` dispatch and the `Span::new*`/`Span::record`
1075    // recording path that drives the subscriber. The data-type accessors — `Level::as_str`,
1076    // `Span::is_disabled`/`metadata`/`id`, and constructing/reading `Level`/`LevelFilter`/`Span`/
1077    // `Event`/`Metadata`/`Field`/`FieldSet`/`Id` — are PURE (no output is produced), so whole-crate Log
1078    // fabricated Log on them. Match the emit verbs; everything else returns None.
1079    if crate_name == "tracing" {
1080        let m = path.rsplit("::").next().unwrap_or(path);
1081        // The user-facing emit MACROS (`tracing::info!`/`warn!`/…) — candor-scan is pre-expansion, so it
1082        // sees the raw macro path `tracing::info`, not the expanded `__tracing`/`Subscriber::event` the
1083        // deep (post-expansion) engine sees. Only the macro names; the pure DATA types (Level/Span/Event)
1084        // have other tails and stay None.
1085        if m == "trace" || m == "debug" || m == "info" || m == "warn" || m == "error"
1086            || m == "trace_span" || m == "debug_span" || m == "info_span" || m == "warn_span"
1087            || m == "error_span" || m == "span"
1088            || m == "event"
1089            || m == "new_span"
1090            || m == "record"
1091            || m == "record_follows_from"
1092            || m == "enter"
1093            || m == "exit"
1094            || m == "in_scope"
1095            || m == "entered"
1096            || path.contains("::__macro_support")
1097            || path.contains("::__tracing")
1098            || path.contains("Subscriber::event")
1099            || path.contains("Subscriber::new_span")
1100            || path.contains("Subscriber::enter")
1101            || path.contains("Subscriber::exit")
1102        {
1103            return Some("Log");
1104        }
1105        return None;
1106    }
1107    // The `log` facade: its macros route through `log::__private_api`; the crate's types
1108    // (`Level`, `LevelFilter`) are pure, so match the logging entry, not the whole crate.
1109    if crate_name == "log" {
1110        // Expanded macro form (deep engine) OR the raw user-facing macro names (candor-scan, pre-expansion).
1111        // `log::Level`/`LevelFilter`/`Record`/`Metadata` have other tails, so the type surface stays pure.
1112        let m = path.rsplit("::").next().unwrap_or(path);
1113        if path.contains("::__private_api")
1114            || m == "error" || m == "warn" || m == "info" || m == "debug" || m == "trace" || m == "log"
1115        {
1116            return Some("Log");
1117        }
1118    }
1119    // Compiler diagnostic emission — the ONE genuinely effectful operation in the otherwise-pure
1120    // rustc_* surface (a dylint lint's actual OUTPUT: it writes warnings/errors to the compiler's
1121    // diagnostic sink). Classified `Log` (same family as `tracing`/`log` — program output). Match the
1122    // emission verbs precisely; rustc_lint/rustc_errors are mostly pure types (Lint, LintId, the Diag
1123    // BUILDERS), and only the terminal `emit`/`emit_span_lint` actually produces output.
1124    if crate_name == "rustc_lint"
1125        && (path.ends_with("::emit_span_lint")
1126            || path.ends_with("::span_lint")
1127            || path.ends_with("::span_lint_hir"))
1128    {
1129        return Some("Log");
1130    }
1131    if crate_name == "rustc_errors"
1132        && (path.ends_with("::emit")
1133            || path.ends_with("::emit_diagnostic")
1134            || path.ends_with("::emit_now"))
1135    {
1136        return Some("Log");
1137    }
1138    // arboard: the effectful surface is the `Clipboard` handle's read/write verbs (each talks to the
1139    // OS clipboard / X11/Wayland/Win32/NSPasteboard server). The data types — chiefly `arboard::Error`
1140    // (whose `Display`/`to_string` formatting is pure) and the `ImageData`/`GetExtLinux`/`SetExtLinux`
1141    // option types — are PURE, so whole-crate Clipboard fabricated Clipboard on e.g. an error
1142    // `to_string()`. Match the handle verbs; everything else returns None. `Clipboard::new` opens the
1143    // connection to the clipboard server, so it's an effect too; `get`/`set` return the
1144    // builder-then-read `Get`/`Set` cursors whose `text`/`image`/`html` terminals do the I/O.
1145    if crate_name == "arboard" {
1146        let m = path.rsplit("::").next().unwrap_or(path);
1147        if m == "new"
1148            || m == "get"
1149            || m == "set"
1150            || m == "clear"
1151            || m == "get_text"
1152            || m == "set_text"
1153            || m == "set_html"
1154            || m == "get_image"
1155            || m == "set_image"
1156            || m == "text"
1157            || m == "image"
1158            || m == "html"
1159        {
1160            return Some("Clipboard");
1161        }
1162        return None;
1163    }
1164    // ── Coverage-differential additions (calibrated against each crate's real API; see the per-crate
1165    //    notes). All verb-keyed + crate-gated, with the pure builder/config/data surface returning None.
1166
1167    // `etcetera` — XDG/known-folder base+app directory resolution. Each dir ACCESSOR reads the
1168    // environment at call time (`$HOME`/`$XDG_*` on Unix, `%APPDATA%`/`%LOCALAPPDATA%` on Windows), and
1169    // the `choose_*`/`home_dir` entry points read `$HOME`. The `AppStrategyArgs` data struct and the
1170    // strategy types themselves are PURE. (Found DISCLOSED-but-unmodeled in 3/4 differential projects.)
1171    if crate_name == "etcetera" {
1172        let m = path.rsplit("::").next().unwrap_or(path);
1173        if m == "home_dir"
1174            || m == "choose_base_strategy" || m == "choose_native_strategy" || m == "choose_app_strategy"
1175            || m == "config_dir" || m == "data_dir" || m == "cache_dir"
1176            || m == "state_dir" || m == "runtime_dir" || m == "data_local_dir"
1177        {
1178            return Some("Env");
1179        }
1180        return None;
1181    }
1182    // `sqlx-core` (crate `sqlx_core`) — the execution terminals under the sqlx core (the `sqlx` builder
1183    // table maps `sqlx::query*`; here it's the core `Executor`/`Connection`/`Pool` round-trips). Opening
1184    // the connection is the network boundary (Net); the query/transaction round-trips are Db. The
1185    // `*Options`/query-builder/row data types are PURE. Crate-gated so the generic verbs never spread.
1186    if crate_name == "sqlx_core" {
1187        if path.ends_with("::connect") || path.ends_with("::connect_with") {
1188            return Some("Net");
1189        }
1190        if path.ends_with("::fetch") || path.ends_with("::fetch_all") || path.ends_with("::fetch_one")
1191            || path.ends_with("::fetch_optional") || path.ends_with("::fetch_many")
1192            || path.ends_with("::execute") || path.ends_with("::execute_many")
1193            || path.ends_with("::prepare") || path.ends_with("::prepare_with")
1194            || path.ends_with("::acquire") || path.ends_with("::begin") || path.ends_with("::ping")
1195        {
1196            return Some("Db");
1197        }
1198        return None;
1199    }
1200    // `walkdir` — recursive directory traversal. The disk read (`read_dir` + `stat`) happens lazily in
1201    // `IntoIter::next` (driving the iterator), and `DirEntry::metadata` issues a `stat`. The
1202    // `WalkDir::new`/`max_depth`/`follow_links`/`sort_by` BUILDERS, `WalkDir::into_iter` (constructs the
1203    // iterator, no I/O until pulled), and the cached `DirEntry::path`/`file_name`/`file_type`/`depth`
1204    // accessors (`file_type` makes NO syscall) are PURE. (Companion to the already-modeled `ignore`.)
1205    if crate_name == "walkdir" {
1206        if path.ends_with("::IntoIter::next") || path.ends_with("::DirEntry::metadata") {
1207            return Some("Fs");
1208        }
1209        return None;
1210    }
1211    // `filetime` — file-timestamp mutation. The `set_*` free fns issue utimes/utimensat/futimens (Fs).
1212    // `FileTime::now` reads the system clock (Clock). The `FileTime::from_*`/`zero` value constructors
1213    // (incl. `from_last_modification_time(&Metadata)` etc., which read an ALREADY-loaded `&Metadata`, not
1214    // the disk) and the `seconds`/`nanoseconds` accessors are PURE.
1215    if crate_name == "filetime" {
1216        if path.ends_with("::set_file_mtime") || path.ends_with("::set_file_atime")
1217            || path.ends_with("::set_file_times") || path.ends_with("::set_symlink_file_times")
1218            || path.ends_with("::set_file_handle_times")
1219        {
1220            return Some("Fs");
1221        }
1222        if path.ends_with("::FileTime::now") {
1223            return Some("Clock");
1224        }
1225        return None;
1226    }
1227    // `execute` — the `Execute` trait that extends `std::process::Command` with run helpers. The
1228    // `execute*` verbs SPAWN a child process (Exec). The `execute::command`/`shell` free fns and the
1229    // `command!`/`command_args!` macros only BUILD a Command (no spawn) and stay PURE.
1230    if crate_name == "execute" {
1231        if path.contains("::execute") {
1232            return Some("Exec");
1233        }
1234        return None;
1235    }
1236    // `ctrlc` — installs an OS signal handler (Unix SIGINT/SIGTERM/SIGHUP, Windows CTRL_C_EVENT) and
1237    // spawns its handler thread. Signals are an inter-process control channel, so the closest bucket is
1238    // Ipc (candor has no dedicated Signal effect; same judgment as routing SysV/pipe IPC to Ipc).
1239    if crate_name == "ctrlc" {
1240        if path.ends_with("::set_handler") || path.ends_with("::try_set_handler") {
1241            return Some("Ipc");
1242        }
1243        return None;
1244    }
1245    // `clap` — argument parsing. ONLY the terminals that read `std::env::args_os` at call time are an
1246    // effect (Env): `get_matches`/`get_matches_mut`/`try_get_matches` and the derive `parse`/`try_parse`.
1247    // clap is MOSTLY PURE: the ENTIRE builder surface (`Command::new`/`arg`/`about`/`Arg::new`) stays
1248    // None, and crucially the `*_from`/`*_parse_from` variants take an EXPLICIT iterator (they do NOT
1249    // read argv) so they stay pure too. (`Arg::env` reads an env var at builder time but bare `::env` is
1250    // too generic to gate safely, so it's left unmodeled — under-report over fabrication.)
1251    if crate_name == "clap" {
1252        if path.ends_with("::get_matches") || path.ends_with("::get_matches_mut")
1253            || path.ends_with("::try_get_matches")
1254            || path.ends_with("::parse") || path.ends_with("::try_parse")
1255        {
1256            return Some("Env");
1257        }
1258        return None;
1259    }
1260    // `jiff` — date/time. `Timestamp::now`/`Zoned::now`/`Zoned::now_with` read the wall clock (Clock).
1261    // `tz::TimeZone::system`/`get` and `tz::db().get` read the system tzdb files from disk
1262    // (`/etc/localtime`, `/usr/share/zoneinfo`; `system` is also `$TZ`-overridable — Fs is the dominant
1263    // op, modeled as Fs). The `Span`/`civil` date math and `Timestamp`/`Zoned` arithmetic are PURE.
1264    if crate_name == "jiff" {
1265        if path.ends_with("::now") || path.ends_with("::now_with") {
1266            return Some("Clock");
1267        }
1268        if path.ends_with("::TimeZone::system") || path.ends_with("::TimeZone::get")
1269            || path.ends_with("::TimeZoneDatabase::get")
1270        {
1271            return Some("Fs");
1272        }
1273        return None;
1274    }
1275    // `env_logger` — installs the global logger and emits to stderr; reads `RUST_LOG`/`RUST_LOG_STYLE`.
1276    // The init terminals are the effect (Log — program output, same family as `log`/`tracing`). The
1277    // `Builder::new`/`build` and the format/filter/target config setters are PURE.
1278    if crate_name == "env_logger" {
1279        if path.ends_with("::init") || path.ends_with("::try_init")
1280            || path.ends_with("::init_from_env") || path.ends_with("::try_init_from_env")
1281        {
1282            return Some("Log");
1283        }
1284        return None;
1285    }
1286    // `dialoguer` — interactive terminal prompts. The `interact*` verbs read stdin + write the tty (a
1287    // console dialogue with the user — Ipc, like the other local-channel effects). The
1288    // `with_prompt`/`default`/`items`/`validate_with` BUILDERS are PURE.
1289    if crate_name == "dialoguer" {
1290        if path.ends_with("::interact") || path.ends_with("::interact_on")
1291            || path.ends_with("::interact_text") || path.ends_with("::interact_text_on")
1292            || path.ends_with("::interact_opt") || path.ends_with("::interact_on_opt")
1293        {
1294            return Some("Ipc");
1295        }
1296        return None;
1297    }
1298    // `console` — terminal handle + styling. The `Term` read/write verbs do tty I/O (Ipc, the user
1299    // dialogue channel; note there is NO `write_str` — `Term` impls `io::Write`). The free-fn terminal
1300    // detection (`colors_enabled`/`user_attended`) reads `CLICOLOR`/`CLICOLOR_FORCE` (Env). The `Style`
1301    // color/format methods and the text utils (`strip_ansi_codes`/`pad_str`/`measure_text_width`) are PURE.
1302    if crate_name == "console" {
1303        if path.ends_with("::write_line") || path.ends_with("::read_line")
1304            || path.ends_with("::read_line_initial_text") || path.ends_with("::read_char")
1305            || path.ends_with("::read_key") || path.ends_with("::read_key_raw")
1306            || path.ends_with("::read_secure_line")
1307        {
1308            return Some("Ipc");
1309        }
1310        if path.ends_with("::colors_enabled") || path.ends_with("::colors_enabled_stderr")
1311            || path.ends_with("::user_attended") || path.ends_with("::user_attended_stderr")
1312        {
1313            return Some("Env");
1314        }
1315        return None;
1316    }
1317    // `terminal_colorsaurus` — queries the terminal's colours by writing OSC 10/11 escapes and reading the
1318    // reply (bidirectional tty dialogue — Ipc, consistent with dialoguer/console). Nothing else is I/O.
1319    if crate_name == "terminal_colorsaurus" {
1320        if path.ends_with("::background_color") || path.ends_with("::foreground_color")
1321            || path.ends_with("::color_palette") || path.ends_with("::theme_mode")
1322        {
1323            return Some("Ipc");
1324        }
1325        return None;
1326    }
1327    // `backoff` — retry-with-backoff. `retry`/`retry_notify` consult the clock and `thread::sleep`
1328    // between attempts (Clock). The `ExponentialBackoff`/builder config is PURE. (The user closure's own
1329    // effects are out of scope here — we model only backoff's own Clock effect.)
1330    if crate_name == "backoff" {
1331        if path.ends_with("::retry") || path.ends_with("::retry_notify") {
1332            return Some("Clock");
1333        }
1334        return None;
1335    }
1336    // `lscolors` — LS_COLORS parsing. ONLY `from_env` reads the environment (Env). `from_string`/
1337    // `style_for_path`/`style_for*` and the `Style` type take explicit input and are PURE.
1338    if crate_name == "lscolors" {
1339        if path.ends_with("::from_env") {
1340            return Some("Env");
1341        }
1342        return None;
1343    }
1344    // `wild` — argv with glob expansion. `args`/`args_os` read `std::env::args(_os)` (Env). Nothing else.
1345    if crate_name == "wild" {
1346        if path.ends_with("::args") || path.ends_with("::args_os") {
1347            return Some("Env");
1348        }
1349        return None;
1350    }
1351    // `grep_cli` — only the firm effect is modeled: `CommandReaderBuilder::build` spawns a child process
1352    // (Exec). The `is_readable_stdin`/`is_tty_*` fd probes (isatty/fstat on the std descriptors) are
1353    // deliberately NOT modeled — candor doesn't classify `IsTerminal`/isatty as an effect anywhere, and
1354    // they read no data; flagging them would be an inconsistent over-report.
1355    if crate_name == "grep_cli" {
1356        if path.ends_with("::build") {
1357            return Some("Exec");
1358        }
1359        return None;
1360    }
1361    // `clircle` — detects whether two handles are the same file (cycle protection). `Identifier::try_from`
1362    // (File/Stdio) issues an `fstat`, and `surely_conflicts_with` does an `lseek` (`stream_position`) — both
1363    // Fs. The `PartialEq`/`Hash` comparisons read stored dev/ino and are PURE. (The named methods
1364    // `are_identical`/`same_file` do NOT exist in the crate — not modeled.)
1365    if crate_name == "clircle" {
1366        if path.ends_with("::try_from") || path.ends_with("::surely_conflicts_with") {
1367            return Some("Fs");
1368        }
1369        return None;
1370    }
1371    None
1372}
1373
1374pub fn cap_from_name(name: &str) -> Option<&'static str> {
1375    EFFECTS.iter().copied().find(|e| *e == name)
1376}
1377
1378/// Refine the `Exec` cliff (spec §4 ⟨0.5⟩): the effects a *literal, statically-known* subprocess
1379/// head implies, matched by basename (`/usr/bin/curl` → `curl`). The head's effects are ADDED to a
1380/// caller that already carries `Exec` (a subprocess is still spawned — `Exec` is never dropped); an
1381/// unrecognised or dynamically-built head returns `&[]` and keeps the bare cliff (never guess). A
1382/// **candor engine** reads `Fs`/`Env` only — spec §7 item 12 (the analyzer self-boundary) guarantees
1383/// that, so that case is spec-supplied, not curation. The rest is a small curated table under the
1384/// same under-report rule as the crate classifier. INVARIANT: every head here is an external tool
1385/// that does NOT run the analysed project's own code (so `make`/`npm`/`cargo` are deliberately
1386/// absent — they stay the cliff). The reference engines share this table so the `Exec` boundary —
1387/// the one boundary every engine hits — refines identically (the §4-consistency argument).
1388pub fn classify_command_head(cmd: &str) -> &'static [&'static str] {
1389    // Only UNAMBIGUOUS single-effect tools belong here. A multi-modal head (`git status` is local,
1390    // `git push` is Net; `rsync` local-vs-remote) would FABRICATE the effect for its common case —
1391    // the under-report rule forbids it, so such heads keep the bare cliff.
1392    match cmd.rsplit(['/', '\\']).next().unwrap_or(cmd) {
1393        "curl" | "wget" | "http" | "ssh" | "scp" | "sftp" | "ftp" | "telnet" => &["Net"],
1394        "psql" | "mysql" | "sqlite3" | "mongosh" | "mongo" | "redis-cli" | "cqlsh" | "influx" => &["Db"],
1395        // candor engines — Fs/Env only, guaranteed by spec §7 item 12 (the analyzer self-boundary)
1396        "candor" | "candor-run.sh" | "candor-scan" | "candor-query" | "candor-java"
1397        | "candor-classify" | "candor-report" | "cargo-candor" => &["Env", "Fs"],
1398        _ => &[],
1399    }
1400}
1401
1402/// Whether a subprocess-builder method only MODIFIES the command (`.arg`, `.env`, `.current_dir`)
1403/// rather than NAMING the program (`Command::new`, `duct::cmd`). A WHOLE-CRATE-Exec crate
1404/// (`portable_pty`, `duct`, `async_process`) classifies *every* method as `Exec`, so the
1405/// head-refinement must skip these: an arg or env-var-name literal that happened to match a head
1406/// (`.env("psql", …)`, `.arg("curl")`) would FABRICATE that effect — the §1 under-report rule. The
1407/// method is the call path's last segment.
1408pub fn is_cmd_builder_method(method: &str) -> bool {
1409    matches!(
1410        method,
1411        "arg" | "args" | "arg0" | "env" | "envs" | "env_clear" | "env_remove" | "current_dir"
1412            | "cwd" | "stdin" | "stdout" | "stderr" | "pre_exec" | "creation_flags" | "uid" | "gid"
1413            | "groups" | "process_group"
1414    )
1415}
1416
1417/// Whether a subprocess method NAMES the program (so its first string literal IS the command head to
1418/// refine): `Command::new("curl")`, `duct::cmd("curl", …)`. The head-refinement must fire ONLY here —
1419/// an ALLOWLIST, not "any method except known modifiers". A whole-crate-Exec crate classifies EVERY
1420/// method as `Exec`, so a denylist leaked NON-naming methods that aren't modifiers — a getter like
1421/// `CommandBuilder::get_env("psql")` (reading back an env-var KEY, not a program) fed `"psql"` to the
1422/// head classifier and FABRICATED `Db` (review find). Only `new`/`cmd` name a program; everything else
1423/// (modifiers, getters `get_*`, custom builder methods) keeps the bare `Exec` cliff — under-refine
1424/// (safe) rather than fabricate. `std::process::Command` is verb-precise so getters never fire `Exec`
1425/// there anyway; the allowlist makes the whole-crate-Exec crates safe too.
1426pub fn is_cmd_naming_method(method: &str) -> bool {
1427    matches!(method, "new" | "cmd")
1428}
1429
1430/// The masking guard (AS-EFF-008): a Net call whose method takes the HOST/URL as an argument is
1431/// "establishing" — a classified Net call here with no captured host literal leaves the endpoint
1432/// structurally INVISIBLE (a runtime-built host), so the surface is incomplete and the gate must fail
1433/// closed (else a benign sibling literal masks the runtime endpoint). An ALLOWLIST of connection-
1434/// establishing verbs — the SAFE direction: a USE-verb on an already-connected socket
1435/// (`stream.write`/`read`/`flush`, `socket.send`/`recv`) is NOT here, so a missing literal there (the
1436/// host was fixed at `connect`) never false-positives. Under-catching an unusual establishing verb is a
1437/// missed mask (sound-with-disclosure), never a broken gate. The arg is the method (path's last segment).
1438pub fn is_net_establishing(method: &str) -> bool {
1439    matches!(
1440        method,
1441        "connect"
1442            | "connect_timeout"
1443            | "get"
1444            | "post"
1445            | "put"
1446            | "patch"
1447            | "delete"
1448            | "head"
1449            | "request"
1450            | "send_to"
1451            | "lookup_host"
1452            | "to_socket_addrs"
1453    )
1454}
1455
1456/// The masking guard (AS-EFF-008), the `Fs` analog of `is_net_establishing`: whether an `Fs`-classified
1457/// call takes the filesystem PATH as a string argument (so a missing literal leaves the path
1458/// structurally INVISIBLE — a runtime-built path — and the surface is incomplete, fail-closed). An
1459/// ALLOWLIST of the path-NAMING free functions / constructors (`fs::write`/`read`/`File::open`/…), the
1460/// SAFE direction: a path-stat METHOD whose path is the RECEIVER (`p.metadata()`, `p.exists()`) is
1461/// invoked method-form and the caller gates on `!is_method`, so this never sees it; an op on an
1462/// already-opened handle (`file.write_all`, `mmap.flush`, `tempfile()` — a random name, no path arg)
1463/// is not here, so a missing literal there never false-positives. Under-catching an unusual
1464/// path-naming fn is a missed mask (sound-with-disclosure), never a broken gate. The arg is the
1465/// method/fn leaf (the path's last segment).
1466pub fn is_fs_path_arg(leaf: &str) -> bool {
1467    matches!(
1468        leaf,
1469        // std::fs / tokio::fs / async_std::fs / fs_err free functions taking a path argument
1470        "write"
1471            | "read"
1472            | "read_to_string"
1473            | "read_dir"
1474            | "read_link"
1475            | "copy"
1476            | "rename"
1477            | "remove_file"
1478            | "remove_dir"
1479            | "remove_dir_all"
1480            | "create_dir"
1481            | "create_dir_all"
1482            | "hard_link"
1483            | "soft_link"
1484            | "symlink"
1485            | "symlink_file"
1486            | "symlink_dir"
1487            | "symlink_metadata"
1488            | "canonicalize"
1489            | "metadata"
1490            | "set_permissions"
1491            | "exists"
1492            | "try_exists"
1493            // File / OpenOptions constructors taking a path argument
1494            | "open"
1495            | "create"
1496            | "create_new"
1497    )
1498}
1499
1500/// The masking guard (AS-EFF-008), the `Db` analog of `is_net_establishing`: whether a `Db`-classified
1501/// call takes the raw SQL QUERY as a string argument (so a missing literal leaves the table
1502/// structurally INVISIBLE — a runtime-built query — and the surface is incomplete, fail-closed). An
1503/// ALLOWLIST of the SQL-string-bearing execution/prepare verbs, the SAFE direction: a
1504/// build-then-execute terminal that takes NO SQL string (sqlx/diesel/sea_orm `fetch*`/`load*`/`first`/
1505/// `all`/`one`/`stream`, the document-store `find*`/`insert*`/…), and a non-query op (`connect`/
1506/// `open`/`acquire`/`begin`/`commit`/`ping`/`get_conn`), are NOT here — their query is built
1507/// structurally (never a maskable string literal) so a missing literal must not false-positive.
1508/// Under-catching an unusual query verb is a missed mask (sound-with-disclosure), never a broken gate.
1509/// The arg is the method leaf (the path's last segment).
1510pub fn is_db_query_arg(leaf: &str) -> bool {
1511    matches!(
1512        leaf,
1513        "execute"
1514            | "execute_batch"
1515            | "execute_unprepared"
1516            | "batch_execute"
1517            | "simple_query"
1518            | "query"
1519            | "query_one"
1520            | "query_opt"
1521            | "query_raw"
1522            | "query_row"
1523            | "query_map"
1524            | "query_and_then"
1525            | "query_typed"
1526            | "query_all"
1527            | "prepare"
1528            | "prepare_typed"
1529            | "prepare_cached"
1530            | "exec"
1531            | "exec_first"
1532            | "exec_iter"
1533            | "exec_map"
1534            | "exec_fold"
1535            | "exec_drop"
1536            | "exec_batch"
1537            | "prep"
1538            | "run_command"
1539    )
1540}
1541
1542/// Map a cap-std capability *type* to the effect it authorises. Holding one of these
1543/// (e.g. `&Dir`) is the real, unforgeable right to perform that effect — so candor
1544/// treats it as a declared capability, exactly like its own `&Fs` token.
1545pub fn capstd_cap(crate_name: &str, type_name: &str) -> Option<&'static str> {
1546    if !crate_name.starts_with("cap_") {
1547        return None;
1548    }
1549    Some(match type_name {
1550        "Dir" => "Fs",
1551        "TcpListener" | "TcpStream" | "UdpSocket" | "Pool" => "Net",
1552        "UnixListener" | "UnixStream" | "UnixDatagram" => "Ipc",
1553        "SystemClock" | "MonotonicClock" => "Clock",
1554        _ => return None,
1555    })
1556}
1557
1558/// Table names a SQL string literal STATICALLY reaches — the `Db` analog of the `Net` host /
1559/// `Exec` command / `Fs` path literal surface (feeds `allow Db in <scope> <table>…`, AS-EFF-008).
1560/// Conservative by construction, because a wrong capture here would FABRICATE: the string must
1561/// open with a SQL statement keyword, and only identifiers in table position are taken —
1562/// `FROM`/`JOIN` anywhere, `INTO` anywhere, statement-leading `UPDATE`/`TRUNCATE`, and
1563/// `TABLE` (create/drop/alter), skipping `ONLY`/`IF NOT EXISTS`. `UPDATE` mid-statement is
1564/// deliberately ignored (`FOR UPDATE SKIP LOCKED` must not yield a table "skip"). A
1565/// dynamically-built query yields nothing — the gate's opaque case — never a guess.
1566/// Output is lower-cased, quote/backtick-stripped, `schema.table` kept qualified, deduped.
1567/// SPEC §2 pins this algorithm token-for-token across engines; the cross-impl vector battery
1568/// (candor-spec conformance/tables/vectors.json, run.sh Part 4b) enforces the JVM/TS mirrors.
1569pub fn tables_in_sql(sql: &str) -> Vec<String> {
1570    const STMT: &[&str] =
1571        &["select", "insert", "update", "delete", "create", "drop", "alter", "truncate", "merge", "replace", "with"];
1572    // Tokens that can FOLLOW a table-introducing keyword without being a table.
1573    const SKIP: &[&str] = &["only", "if", "not", "exists", "table"];
1574    // Identifier-position tokens that are grammar, not a table (subqueries, locking clauses…).
1575    const STOP: &[&str] = &[
1576        "select", "set", "where", "values", "on", "using", "group", "order", "by", "limit",
1577        "returning", "as", "inner", "outer", "left", "right", "cross", "lateral", "natural",
1578        "union", "all", "distinct", "case", "when", "null", "default", "skip", "nowait", "of",
1579        "from", "join", "into", "update", "delete", "insert",
1580    ];
1581    // `,` survives as its OWN token (not a space): it's what lets `FROM t1, t2` continue the table
1582    // list without fabricating from other comma-ridden positions (column lists, ON clauses).
1583    let cleaned: String = sql
1584        .to_lowercase()
1585        .chars()
1586        .flat_map(|c| match c {
1587            '(' | ')' | ';' => vec![' '],
1588            ',' => vec![' ', ',', ' '],
1589            _ => vec![c],
1590        })
1591        .collect();
1592    let toks: Vec<&str> = cleaned.split_whitespace().collect();
1593    let Some(first) = toks.first() else { return Vec::new() };
1594    if !STMT.contains(first) {
1595        return Vec::new(); // not SQL — nothing to certify, nothing fabricated
1596    }
1597    let ident = |t: &str| -> Option<String> {
1598        let t = t.trim_matches(|c| matches!(c, '"' | '`' | '\''));
1599        let mut chars = t.chars();
1600        let ok_first = chars.next().is_some_and(|c| c.is_ascii_alphabetic() || c == '_');
1601        let ok_rest = t.chars().all(|c| c.is_ascii_alphanumeric() || matches!(c, '_' | '.' | '$' | '"' | '`'));
1602        (ok_first && ok_rest && !STOP.contains(&t)).then(|| t.replace(['"', '`'], ""))
1603    };
1604    let mut out: Vec<String> = Vec::new();
1605    let mut push = |t: Option<String>| {
1606        if let Some(t) = t {
1607            if !out.contains(&t) {
1608                out.push(t);
1609            }
1610        }
1611    };
1612    for (i, tok) in toks.iter().enumerate() {
1613        let table_pos = match *tok {
1614            "from" | "join" | "into" | "table" => true,
1615            // statement-leading only (see doc comment): `update t set …`, `truncate [table] t`.
1616            "update" | "truncate" => i == 0,
1617            _ => false,
1618        };
1619        if !table_pos {
1620            continue;
1621        }
1622        let mut j = i + 1;
1623        while j < toks.len() && SKIP.contains(&toks[j]) {
1624            j += 1;
1625        }
1626        let Some(next) = toks.get(j) else { continue };
1627        let Some(first) = ident(next) else { continue };
1628        push(Some(first));
1629        // Comma-ADJACENT continuation only: `FROM t1, t2, t3` takes all three, while an alias breaks
1630        // the chain (`FROM t1 a, t2` keeps just t1 — an under-report, never a guess: skipping an
1631        // alias to chase the comma would fabricate tables out of `INSERT INTO t (a, b)`'s column
1632        // list, whose parens are spaces by the time we tokenize).
1633        while j + 2 < toks.len() && toks[j + 1] == "," {
1634            let Some(more) = ident(toks[j + 2]) else { break };
1635            push(Some(more));
1636            j += 2;
1637        }
1638    }
1639    out
1640}
1641
1642#[cfg(test)]
1643mod tests {
1644    #[test]
1645    fn sql_table_extraction_is_conservative() {
1646        use super::tables_in_sql as t;
1647        assert_eq!(t("SELECT id FROM users WHERE x = 1"), vec!["users"]);
1648        assert_eq!(t("select * from ledger.entries e join customers c on c.id = e.cid"),
1649                   vec!["ledger.entries", "customers"]);
1650        assert_eq!(t("INSERT INTO audit_log (a) VALUES (?1)"), vec!["audit_log"]);
1651        assert_eq!(t("UPDATE accounts SET v = ?"), vec!["accounts"]);
1652        assert_eq!(t("DELETE FROM sessions WHERE id = ?"), vec!["sessions"]);
1653        assert_eq!(t("CREATE TABLE IF NOT EXISTS cache (k TEXT)"), vec!["cache"]);
1654        assert_eq!(t("TRUNCATE TABLE staging"), vec!["staging"]);
1655        // FOR UPDATE locking clause must not yield a phantom table (mid-statement update ignored)
1656        assert_eq!(t("SELECT * FROM jobs FOR UPDATE SKIP LOCKED"), vec!["jobs"]);
1657        // a subquery in FROM position yields nothing for that position
1658        assert_eq!(t("SELECT * FROM (SELECT 1) q"), Vec::<String>::new());
1659        // not SQL -> nothing (never fabricate)
1660        assert_eq!(t("/tmp/some/path"), Vec::<String>::new());
1661        assert_eq!(t("hello world from nowhere"), Vec::<String>::new());
1662        // comma-ADJACENT continuation: a FROM list takes every table in the chain…
1663        assert_eq!(t("SELECT a FROM t1, t2, s.t3 WHERE x = 1"), vec!["t1", "t2", "s.t3"]);
1664        // …but an alias breaks it (under-report, never a guess)…
1665        assert_eq!(t("SELECT a FROM t1 a1, t2 WHERE x = 1"), vec!["t1"]);
1666        // …which is exactly what keeps a column list from fabricating (parens are spaces by now).
1667        assert_eq!(t("INSERT INTO t (a, b) VALUES (1, 2)"), vec!["t"]);
1668        // a subquery after the comma stops the chain too
1669        assert_eq!(t("SELECT a FROM t1, (SELECT 1) q"), vec!["t1"]);
1670    }
1671
1672    use super::*;
1673
1674    #[test]
1675    fn db_crates_are_calibrated() {
1676        // The calibrated set must cover every DB client the classifier knows, or the receipt's coverage
1677        // check would flag a recognized crate as a blind spot. (Was nightly-lint-only; now runs on stable.)
1678        for c in DB_CRATES {
1679            assert!(
1680                CALIBRATED_CRATES.contains(&c),
1681                "DB crate `{c}` is matched by classify() but missing from CALIBRATED_CRATES"
1682            );
1683        }
1684    }
1685
1686    #[test]
1687    fn calibrated_crates_are_live() {
1688        // Conversely, every crate advertised as calibrated must actually be matched by classify() for
1689        // some representative path — a dead entry would silently suppress a real coverage warning.
1690        for c in CALIBRATED_CRATES {
1691            assert!(
1692                CALIBRATION_PROBE_TAILS.iter().any(|t| classify(c, &format!("{c}{t}")).is_some()),
1693                "calibrated crate `{c}` is matched by no path in classify() — dead list entry"
1694            );
1695        }
1696    }
1697
1698    #[test]
1699    fn async_http_stack_classifies() {
1700        // The modern async-HTTP/TLS/QUIC/DNS stack (found by the independent-method differential on oha):
1701        // verb-keyed Net/Ipc/Fs/Env, crate-gated so generic verbs never fabricate across crates.
1702        assert_eq!(classify("hyper", "hyper::client::conn::http1::SendRequest::send_request"), Some("Net"));
1703        assert_eq!(classify("hyper", "hyper::client::conn::http1::handshake"), Some("Net"));
1704        assert_eq!(classify("hyper_util", "hyper_util::client::legacy::Client::request"), Some("Net"));
1705        assert_eq!(classify("hickory_resolver", "hickory_resolver::Resolver::lookup_ip"), Some("Net"));
1706        assert_eq!(classify("quinn", "quinn::Endpoint::connect"), Some("Net"));
1707        assert_eq!(classify("quinn", "quinn::RecvStream::read_to_end"), Some("Net")); // stream byte I/O, not just open
1708        assert_eq!(classify("quinn", "quinn::SendStream::write_all"), Some("Net"));
1709        assert_eq!(classify("tokio_rustls", "tokio_rustls::TlsConnector::connect"), Some("Net"));
1710        assert_eq!(classify("native_tls", "native_tls::TlsConnector::connect"), Some("Net"));
1711        assert_eq!(classify("tokio_vsock", "tokio_vsock::VsockStream::connect"), Some("Ipc"));
1712        assert_eq!(classify("rustls_native_certs", "rustls_native_certs::load_native_certs"), Some("Fs"));
1713        assert_eq!(classify("rlimit", "rlimit::setrlimit"), Some("Env"));
1714        // num_cpus is deliberately PURE (consistency with std::thread::available_parallelism; avoids Env spray)
1715        assert_eq!(classify("num_cpus", "num_cpus::get"), None);
1716        assert_eq!(classify("num_cpus", "num_cpus::get_physical"), None);
1717        // pure surface stays None (no fabrication): builder/type/config paths, and other crates' generic verbs
1718        assert_eq!(classify("hyper", "hyper::Request::builder"), None);
1719        assert_eq!(classify("hyper", "hyper::body::Bytes::new"), None);
1720        assert_eq!(classify("native_tls", "native_tls::TlsConnectorBuilder::min_protocol_version"), None);
1721        assert_eq!(classify("serde", "serde::Deserialize::request"), None); // generic verb, wrong crate
1722    }
1723
1724    #[test]
1725    fn coverage_differential_crates_classify() {
1726        // Crates the coverage differential found DISCLOSED-but-unmodeled. Each rule is verb-keyed +
1727        // crate-gated; the EFFECT verbs map to the right bucket and the PURE surface stays None (a
1728        // wrongly-flagged pure crate is a fabrication, so the negatives matter as much as the positives).
1729
1730        // rustls (sync TLS core) — record I/O is Net; config/cert + the buffered-decrypt step are pure.
1731        assert_eq!(classify("rustls", "rustls::ClientConnection::read_tls"), Some("Net"));
1732        assert_eq!(classify("rustls", "rustls::ConnectionCommon::write_tls"), Some("Net"));
1733        assert_eq!(classify("rustls", "rustls::Connection::complete_io"), Some("Net"));
1734        assert_eq!(classify("rustls", "rustls::ConnectionCommon::process_new_packets"), None); // buffered decrypt, no I/O
1735        assert_eq!(classify("rustls", "rustls::ClientConfig::builder"), None); // pure config
1736
1737        // native-tls variants — handshake is Net; builder is pure.
1738        assert_eq!(classify("native_tls_crate", "native_tls_crate::TlsConnector::connect"), Some("Net"));
1739        assert_eq!(classify("tokio_native_tls", "tokio_native_tls::TlsAcceptor::accept"), Some("Net"));
1740        assert_eq!(classify("native_tls_crate", "native_tls_crate::TlsConnectorBuilder::min_protocol_version"), None);
1741
1742        // etcetera — dir resolution reads env; the args data type is pure.
1743        assert_eq!(classify("etcetera", "etcetera::home_dir"), Some("Env"));
1744        assert_eq!(classify("etcetera", "etcetera::base_strategy::choose_base_strategy"), Some("Env"));
1745        assert_eq!(classify("etcetera", "etcetera::base_strategy::Xdg::config_dir"), Some("Env"));
1746        assert_eq!(classify("etcetera", "etcetera::app_strategy::AppStrategyArgs::new"), None); // pure data
1747
1748        // sqlx-core — connect is Net, execute/fetch round-trips are Db; options/builders pure.
1749        assert_eq!(classify("sqlx_core", "sqlx_core::connection::Connection::connect"), Some("Net"));
1750        assert_eq!(classify("sqlx_core", "sqlx_core::executor::Executor::fetch_one"), Some("Db"));
1751        assert_eq!(classify("sqlx_core", "sqlx_core::executor::Executor::execute"), Some("Db"));
1752        assert_eq!(classify("sqlx_core", "sqlx_core::pool::Pool::acquire"), Some("Db"));
1753        assert_eq!(classify("sqlx_core", "sqlx_core::pool::PoolOptions::max_connections"), None); // pure builder
1754
1755        // walkdir — the lazy read happens in next()/metadata(); builders + cached accessors pure.
1756        assert_eq!(classify("walkdir", "walkdir::IntoIter::next"), Some("Fs"));
1757        assert_eq!(classify("walkdir", "walkdir::DirEntry::metadata"), Some("Fs"));
1758        assert_eq!(classify("walkdir", "walkdir::WalkDir::new"), None); // builder
1759        assert_eq!(classify("walkdir", "walkdir::WalkDir::into_iter"), None); // no I/O until pulled
1760        assert_eq!(classify("walkdir", "walkdir::DirEntry::file_type"), None); // cached, no syscall
1761
1762        // filetime — set_* are utimes (Fs), now is Clock; from_* constructors pure.
1763        assert_eq!(classify("filetime", "filetime::set_file_mtime"), Some("Fs"));
1764        assert_eq!(classify("filetime", "filetime::set_file_handle_times"), Some("Fs"));
1765        assert_eq!(classify("filetime", "filetime::FileTime::now"), Some("Clock"));
1766        assert_eq!(classify("filetime", "filetime::FileTime::from_unix_time"), None);
1767        assert_eq!(classify("filetime", "filetime::FileTime::from_last_modification_time"), None); // reads &Metadata, not disk
1768
1769        // execute — the execute* verbs spawn (Exec); command/shell builders pure.
1770        assert_eq!(classify("execute", "execute::Execute::execute"), Some("Exec"));
1771        assert_eq!(classify("execute", "execute::Execute::execute_output"), Some("Exec"));
1772        assert_eq!(classify("execute", "execute::Execute::execute_multiple_output"), Some("Exec"));
1773        assert_eq!(classify("execute", "execute::command"), None); // only builds a Command
1774        assert_eq!(classify("execute", "execute::shell"), None);
1775
1776        // ctrlc — install signal handler (Ipc).
1777        assert_eq!(classify("ctrlc", "ctrlc::set_handler"), Some("Ipc"));
1778        assert_eq!(classify("ctrlc", "ctrlc::try_set_handler"), Some("Ipc"));
1779
1780        // clap — only the argv-reading terminals are Env; the whole builder + *_from variants pure.
1781        assert_eq!(classify("clap", "clap::Command::get_matches"), Some("Env"));
1782        assert_eq!(classify("clap", "clap::Command::try_get_matches"), Some("Env"));
1783        assert_eq!(classify("clap", "clap::Parser::parse"), Some("Env"));
1784        assert_eq!(classify("clap", "clap::Command::new"), None); // builder
1785        assert_eq!(classify("clap", "clap::Arg::about"), None); // builder
1786        assert_eq!(classify("clap", "clap::Command::get_matches_from"), None); // explicit args, no argv read
1787
1788        // jiff — now* is Clock; tz lookups read the tzdb (Fs); span/civil math pure.
1789        assert_eq!(classify("jiff", "jiff::Timestamp::now"), Some("Clock"));
1790        assert_eq!(classify("jiff", "jiff::Zoned::now_with"), Some("Clock"));
1791        assert_eq!(classify("jiff", "jiff::tz::TimeZone::system"), Some("Fs"));
1792        assert_eq!(classify("jiff", "jiff::tz::TimeZone::get"), Some("Fs"));
1793        assert_eq!(classify("jiff", "jiff::Span::checked_add"), None); // pure arithmetic
1794
1795        // env_logger — init installs the logger + reads RUST_LOG (Log); config setters pure.
1796        assert_eq!(classify("env_logger", "env_logger::init"), Some("Log"));
1797        assert_eq!(classify("env_logger", "env_logger::try_init"), Some("Log"));
1798        assert_eq!(classify("env_logger", "env_logger::Builder::init"), Some("Log"));
1799        assert_eq!(classify("env_logger", "env_logger::Builder::format_timestamp"), None); // config
1800        assert_eq!(classify("env_logger", "env_logger::Builder::build"), None); // pure build
1801
1802        // dialoguer — interact* is tty I/O (Ipc); builders pure.
1803        assert_eq!(classify("dialoguer", "dialoguer::Input::interact_text"), Some("Ipc"));
1804        assert_eq!(classify("dialoguer", "dialoguer::Confirm::interact"), Some("Ipc"));
1805        assert_eq!(classify("dialoguer", "dialoguer::Select::interact_opt"), Some("Ipc"));
1806        assert_eq!(classify("dialoguer", "dialoguer::Input::with_prompt"), None); // builder
1807
1808        // console — Term I/O is Ipc, detection is Env, Style is pure.
1809        assert_eq!(classify("console", "console::Term::write_line"), Some("Ipc"));
1810        assert_eq!(classify("console", "console::Term::read_key"), Some("Ipc"));
1811        assert_eq!(classify("console", "console::colors_enabled"), Some("Env"));
1812        assert_eq!(classify("console", "console::Style::cyan"), None); // pure styling
1813        assert_eq!(classify("console", "console::strip_ansi_codes"), None); // pure text util
1814
1815        // terminal_colorsaurus — tty colour query (Ipc).
1816        assert_eq!(classify("terminal_colorsaurus", "terminal_colorsaurus::background_color"), Some("Ipc"));
1817        assert_eq!(classify("terminal_colorsaurus", "terminal_colorsaurus::color_palette"), Some("Ipc"));
1818
1819        // backoff — retry sleeps + reads the clock (Clock); config pure.
1820        assert_eq!(classify("backoff", "backoff::retry"), Some("Clock"));
1821        assert_eq!(classify("backoff", "backoff::retry_notify"), Some("Clock"));
1822        assert_eq!(classify("backoff", "backoff::ExponentialBackoff::default"), None);
1823
1824        // lscolors — ONLY from_env reads the environment; from_string/style_for_path pure.
1825        assert_eq!(classify("lscolors", "lscolors::LsColors::from_env"), Some("Env"));
1826        assert_eq!(classify("lscolors", "lscolors::LsColors::from_string"), None);
1827        assert_eq!(classify("lscolors", "lscolors::LsColors::style_for_path"), None);
1828
1829        // wild — argv readers (Env).
1830        assert_eq!(classify("wild", "wild::args"), Some("Env"));
1831        assert_eq!(classify("wild", "wild::args_os"), Some("Env"));
1832
1833        // grep_cli — only the firm Exec (CommandReader spawn); the isatty probes stay unmodeled.
1834        assert_eq!(classify("grep_cli", "grep_cli::CommandReaderBuilder::build"), Some("Exec"));
1835        assert_eq!(classify("grep_cli", "grep_cli::is_readable_stdin"), None); // isatty/fstat, not modeled
1836        assert_eq!(classify("grep_cli", "grep_cli::is_tty_stdout"), None);
1837
1838        // clircle — same-file detection issues fstat/lseek (Fs); equality is pure.
1839        assert_eq!(classify("clircle", "clircle::Identifier::try_from"), Some("Fs"));
1840        assert_eq!(classify("clircle", "clircle::Clircle::surely_conflicts_with"), Some("Fs"));
1841    }
1842
1843    #[test]
1844    fn log_tracing_emit_macros_classify_pre_expansion() {
1845        // candor-scan is pre-expansion: it sees the raw macro path (`log::info`, `tracing::warn`), not the
1846        // expanded dispatch the deep engine sees. Both the user-facing macro names AND the type surface:
1847        assert_eq!(classify("log", "log::info"), Some("Log"));
1848        assert_eq!(classify("log", "log::error"), Some("Log"));
1849        assert_eq!(classify("tracing", "tracing::warn"), Some("Log"));
1850        assert_eq!(classify("tracing", "tracing::info_span"), Some("Log"));
1851        // pure data-type surface stays None (no fabricated Log)
1852        assert_eq!(classify("log", "log::Level::as_str"), None);
1853        assert_eq!(classify("tracing", "tracing::Level::INFO"), None);
1854    }
1855
1856    #[test]
1857    fn classify_core_effects() {
1858        // A representative smoke test of the classifier's main families, so the published crate is not
1859        // shipped untested (these used to live only in the nightly-only src/lib.rs).
1860        assert_eq!(classify("std", "std::fs::read_to_string"), Some("Fs"));
1861        // std::path stat-family methods are Fs (each is a stat/readdir syscall); the pure
1862        // string-manipulation surface stays unclassified (the blackout screen's gix-dir find).
1863        assert_eq!(classify("std", "std::path::Path::symlink_metadata"), Some("Fs"));
1864        assert_eq!(classify("std", "std::path::PathBuf::read_dir"), Some("Fs"));
1865        assert_eq!(classify("std", "std::path::Path::exists"), Some("Fs"));
1866        assert_eq!(classify("std", "std::path::Path::join"), None); // pure string manipulation
1867        assert_eq!(classify("std", "std::path::PathBuf::file_name"), None);
1868        assert_eq!(classify("std", "std::path::Path::parent"), None);
1869        assert_eq!(classify("std", "std::process::Command::new"), Some("Exec"));
1870        assert_eq!(classify("std", "std::env::var"), Some("Env"));
1871        assert_eq!(classify("reqwest", "reqwest::Client::execute"), Some("Net"));
1872        // one-shot convenience fns send immediately → Net; the `Client::get` builder stays pure.
1873        assert_eq!(classify("reqwest", "reqwest::get"), Some("Net"));
1874        assert_eq!(classify("reqwest", "reqwest::blocking::get"), Some("Net"));
1875        assert_eq!(classify("reqwest", "reqwest::Client::get"), None);
1876        assert_eq!(classify("reqwest", "reqwest::RequestBuilder::header"), None);
1877        // nix routes through the libc syscall table (same leaves): I/O classified, generic fd ops skipped.
1878        assert_eq!(classify("nix", "nix::fcntl::open"), Some("Fs"));
1879        assert_eq!(classify("nix", "nix::sys::socket::connect"), Some("Net"));
1880        assert_eq!(classify("nix", "nix::unistd::execvp"), Some("Exec"));
1881        assert_eq!(classify("nix", "nix::unistd::write"), None); // generic fd op — deliberately unclassified
1882        assert_eq!(classify("nix", "nix::unistd::getpid"), None); // not I/O
1883        // rustix does raw syscalls (no libc underneath) → classified directly by leaf, same table.
1884        assert_eq!(classify("rustix", "rustix::time::clock_settime"), Some("Clock"));
1885        assert_eq!(classify("rustix", "rustix::fs::symlink"), Some("Fs"));
1886        assert_eq!(classify("rustix", "rustix::net::connect"), Some("Net"));
1887        assert_eq!(classify("rustix", "rustix::io::read"), None); // generic fd op
1888        // pnet raw packet capture: channel openers are Net, packet construction stays pure.
1889        assert_eq!(classify("pnet", "pnet::datalink::channel"), Some("Net"));
1890        assert_eq!(classify("pnet", "pnet::transport::transport_channel"), Some("Net"));
1891        assert_eq!(classify("pnet_datalink", "pnet_datalink::channel"), Some("Net"));
1892        assert_eq!(classify("pnet", "pnet::packet::ethernet::EthernetPacket::new"), None);
1893        assert_eq!(classify("pnet_base", "pnet_base::MacAddr::new"), None);
1894        // ignore (gitignore-aware walker): walk executors are Fs, config builders stay pure.
1895        assert_eq!(classify("ignore", "ignore::WalkBuilder::build_parallel"), Some("Fs"));
1896        assert_eq!(classify("ignore", "ignore::WalkBuilder::build"), Some("Fs"));
1897        assert_eq!(classify("ignore", "ignore::WalkParallel::run"), Some("Fs"));
1898        assert_eq!(classify("ignore", "ignore::WalkBuilder::add_ignore"), Some("Fs")); // reads the ignore file
1899        assert_eq!(classify("ignore", "ignore::overrides::OverrideBuilder::build"), None); // pure config
1900        assert_eq!(classify("ignore", "ignore::gitignore::GitignoreBuilder::build"), None); // pure config
1901        assert_eq!(classify("ignore", "ignore::DirEntry::path"), None); // pure accessor
1902        // notify fs-watching: watcher constructors + watch/unwatch are Fs, data types stay pure.
1903        assert_eq!(classify("notify", "notify::RecommendedWatcher::new"), Some("Fs"));
1904        assert_eq!(classify("notify", "notify::PollWatcher::new"), Some("Fs"));
1905        assert_eq!(classify("notify", "notify::recommended_watcher"), Some("Fs"));
1906        assert_eq!(classify("notify", "notify::INotifyWatcher::watch"), Some("Fs"));
1907        assert_eq!(classify("notify", "notify::Config::default"), None); // pure config
1908        assert_eq!(classify("notify", "notify::Event::new"), None); // pure data type
1909        assert_eq!(classify("rusqlite", "rusqlite::Connection::execute"), Some("Db"));
1910        // the rusqlite verb DIALECT (a verb probe found the canonical consumer API classifying pure):
1911        assert_eq!(classify("rusqlite", "rusqlite::Connection::query_row"), Some("Db"));
1912        assert_eq!(classify("rusqlite", "rusqlite::Statement::query_map"), Some("Db"));
1913        assert_eq!(classify("rusqlite", "rusqlite::Connection::execute_batch"), Some("Db"));
1914        assert_eq!(classify("rusqlite", "rusqlite::Connection::prepare_cached"), Some("Db"));
1915        assert_eq!(classify("rusqlite", "rusqlite::Connection::open"), Some("Db"));
1916        assert_eq!(classify("rusqlite", "rusqlite::Connection::open_in_memory"), Some("Db"));
1917        // …but `open` stays rusqlite-only (postgres has no open; nothing else may borrow it):
1918        assert_eq!(classify("postgres", "postgres::Client::open"), None);
1919        assert_eq!(classify("tokio_postgres", "tokio_postgres::Client::query_typed"), Some("Db"));
1920        // diesel's LIMIT-1 + streaming executions; sqlx's multi-result stream:
1921        assert_eq!(classify("diesel", "diesel::RunQueryDsl::first"), Some("Db"));
1922        assert_eq!(classify("diesel", "diesel::RunQueryDsl::load_iter"), Some("Db"));
1923        assert_eq!(classify("sqlx", "sqlx::query::Query::fetch_many"), Some("Db"));
1924        // sqlx's bare `query()` builder must STAY pure (the original sqlx lesson):
1925        assert_eq!(classify("sqlx", "sqlx::query"), None);
1926        // tracing: the emit/span-lifecycle dispatch is Log; the pure DATA-type accessors are not
1927        // (whole-crate Log fabricated Log on `Level::as_str` / `Span::is_disabled` — the data types are
1928        // pure, same principle as the `log` facade).
1929        assert_eq!(classify("tracing", "tracing::event"), Some("Log"));
1930        assert_eq!(classify("tracing", "tracing::Span::new_span"), Some("Log"));
1931        assert_eq!(classify("tracing", "tracing::Span::record"), Some("Log"));
1932        assert_eq!(classify("tracing", "tracing::Span::enter"), Some("Log"));
1933        assert_eq!(classify("tracing", "tracing::Level::as_str"), None); // pure accessor
1934        assert_eq!(classify("tracing", "tracing::Span::is_disabled"), None); // pure state read
1935        assert_eq!(classify("tracing", "tracing::Span::metadata"), None); // pure accessor
1936        assert_eq!(classify("tracing", "tracing::metadata::Level::TRACE"), None); // pure data type
1937        assert_eq!(classify("tracing", "tracing::field::Field::name"), None); // pure data type
1938        // memmap2: only the syscall-issuing map/flush/protect verbs are Fs; reads over an already-mapped
1939        // region (len/as_ptr/is_empty) and the request builder are PURE (whole-crate Fs fabricated Fs).
1940        assert_eq!(classify("memmap2", "memmap2::MmapOptions::map"), Some("Fs"));
1941        assert_eq!(classify("memmap2", "memmap2::MmapOptions::map_mut"), Some("Fs"));
1942        assert_eq!(classify("memmap2", "memmap2::Mmap::flush"), Some("Fs"));
1943        assert_eq!(classify("memmap2", "memmap2::MmapMut::make_read_only"), Some("Fs"));
1944        assert_eq!(classify("memmap2", "memmap2::Mmap::len"), None); // length read — pure
1945        assert_eq!(classify("memmap2", "memmap2::Mmap::is_empty"), None); // pure
1946        assert_eq!(classify("memmap2", "memmap2::Mmap::as_ptr"), None); // pointer — pure
1947        assert_eq!(classify("memmap2", "memmap2::MmapOptions::new"), None); // request builder — pure
1948        // arboard: the Clipboard handle's read/write verbs are Clipboard; `arboard::Error` formatting
1949        // and option data types are PURE (whole-crate Clipboard fabricated Clipboard on `Error::to_string`).
1950        assert_eq!(classify("arboard", "arboard::Clipboard::new"), Some("Clipboard"));
1951        assert_eq!(classify("arboard", "arboard::Clipboard::get_text"), Some("Clipboard"));
1952        assert_eq!(classify("arboard", "arboard::Clipboard::set_text"), Some("Clipboard"));
1953        assert_eq!(classify("arboard", "arboard::Clipboard::clear"), Some("Clipboard"));
1954        assert_eq!(classify("arboard", "arboard::Error::to_string"), None); // error formatting — pure
1955        assert_eq!(classify("arboard", "arboard::Error::fmt"), None); // Display impl — pure
1956        assert_eq!(classify("arboard", "arboard::ImageData::to_owned_img"), None); // pure data type
1957        // fastrand: value draws + entropy-seeded entry points are Rand; the DETERMINISTIC seeded ctor
1958        // `with_seed` and state split/copy (`fork`/`clone`) are PURE (whole-crate Rand fabricated Rand).
1959        assert_eq!(classify("fastrand", "fastrand::u32"), Some("Rand")); // top-level draw
1960        assert_eq!(classify("fastrand", "fastrand::Rng::usize"), Some("Rand"));
1961        assert_eq!(classify("fastrand", "fastrand::Rng::shuffle"), Some("Rand"));
1962        assert_eq!(classify("fastrand", "fastrand::Rng::new"), Some("Rand")); // entropy-seeded
1963        assert_eq!(classify("fastrand", "fastrand::Rng::with_seed"), None); // deterministic ctor — pure
1964        assert_eq!(classify("fastrand", "fastrand::Rng::fork"), None); // state split — pure
1965        assert_eq!(classify("fastrand", "fastrand::Rng::clone"), None); // state copy — pure
1966        // portable_pty / async_process: spawn/wait keep Exec; config GETTERS and pure data ctors/setters
1967        // do NOT (base Exec fabricated on `CommandBuilder::get_cwd` / `PtySize::default` / `Stdio::piped`).
1968        assert_eq!(classify("portable_pty", "portable_pty::PtySystem::openpty"), Some("Exec"));
1969        assert_eq!(classify("portable_pty", "portable_pty::SlavePty::spawn_command"), Some("Exec"));
1970        assert_eq!(classify("portable_pty", "portable_pty::CommandBuilder::get_argv"), None); // getter
1971        assert_eq!(classify("portable_pty", "portable_pty::CommandBuilder::get_cwd"), None); // getter
1972        assert_eq!(classify("portable_pty", "portable_pty::PtySize::default"), None); // pure data type
1973        assert_eq!(classify("portable_pty", "portable_pty::CommandBuilder::new"), None); // builder ctor
1974        assert_eq!(classify("async_process", "async_process::Command::spawn"), Some("Exec"));
1975        assert_eq!(classify("async_process", "async_process::Command::output"), Some("Exec"));
1976        assert_eq!(classify("async_process", "async_process::Stdio::piped"), None); // pure data type
1977        assert_eq!(classify("async_process", "async_process::Stdio::null"), None); // pure data type
1978        // FFI tiers (matched by distinctive leaf, alias-independent)
1979        assert_eq!(classify("libc", "libc::open"), Some("Fs"));
1980        assert_eq!(classify("libc", "libc::connect"), Some("Net"));
1981        assert_eq!(classify("libc", "libc::read"), None); // generic fd op — deliberately unclassified
1982        assert_eq!(classify("ffi", "ffi::sqlite3_step"), Some("Db"));
1983        assert_eq!(classify("raw", "raw::git_remote_fetch"), Some("Net"));
1984        // libgit2 clone + submodule clone/update fetch over the network (an A/B on git2 0.20 caught
1985        // `Submodule::update`/`clone` and `Repository::clone` reporting no Net — the latter because the
1986        // `src/build.rs` module was being dropped as if it were the Cargo build script).
1987        assert_eq!(classify("raw", "raw::git_clone"), Some("Net"));
1988        assert_eq!(classify("raw", "raw::git_submodule_clone"), Some("Net"));
1989        assert_eq!(classify("raw", "raw::git_submodule_update"), Some("Net"));
1990        assert_eq!(classify("raw", "raw::git_submodule_open"), None); // local subrepo open — not Net
1991        // libcurl: the transfer/raw-socket entry points are Net (an A/B on curl 0.4 caught the whole
1992        // crate reporting ZERO Net); the big setopt/init/getinfo surface — and the readiness-wait
1993        // multi_wait/poll — stay unclassified (the loop's perform is the boundary).
1994        assert_eq!(classify("curl_sys", "curl_sys::curl_easy_perform"), Some("Net"));
1995        assert_eq!(classify("curl_sys", "curl_sys::curl_easy_send"), Some("Net"));
1996        assert_eq!(classify("curl_sys", "curl_sys::curl_multi_perform"), Some("Net"));
1997        assert_eq!(classify("curl_sys", "curl_sys::curl_multi_socket_action"), Some("Net"));
1998        assert_eq!(classify("curl_sys", "curl_sys::curl_easy_setopt"), None); // in-memory option write
1999        assert_eq!(classify("curl_sys", "curl_sys::curl_easy_init"), None); // handle alloc
2000        assert_eq!(classify("curl_sys", "curl_sys::curl_multi_wait"), None); // readiness wait, no payload
2001        // consumer-side `curl` crate rule: the dispatch verbs are Net, the setopt builders pure.
2002        assert_eq!(classify("curl", "curl::easy::Easy::perform"), Some("Net"));
2003        assert_eq!(classify("curl", "curl::multi::Multi::perform"), Some("Net"));
2004        assert_eq!(classify("curl", "curl::easy::Easy::send"), Some("Net"));
2005        assert_eq!(classify("curl", "curl::easy::Easy::url"), None); // CURLOPT setter — pure
2006        assert_eq!(classify("curl", "curl::easy::Easy::timeout"), None); // pure setter; Multi::timeout under-reported by design
2007        assert_eq!(classify("ffi", "ffi::SSL_connect"), Some("Net"));
2008        // pure crates stay pure
2009        assert_eq!(classify("serde", "serde::Serialize::serialize"), None);
2010        assert_eq!(classify("std", "std::vec::Vec::push"), None);
2011
2012        // ── sweep 2026-06-17: fabrication carve-outs + DNS coverage (each fails pre-fix) ──
2013        // [24] std::net socket accessors are pure; the I/O verbs stay Net.
2014        assert_eq!(classify("std", "std::net::TcpStream::connect"), Some("Net"));
2015        assert_eq!(classify("std", "std::net::TcpStream::local_addr"), None);
2016        assert_eq!(classify("std", "std::net::TcpStream::nodelay"), None);
2017        assert_eq!(classify("std", "std::net::TcpStream::ttl"), None);
2018        assert_eq!(classify("std", "std::net::UdpSocket::peer_addr"), None);
2019        // [37] std DNS resolution is Net (was floored).
2020        assert_eq!(classify("std", "std::net::lookup_host"), Some("Net"));
2021        assert_eq!(classify("std", "core::net::ToSocketAddrs::to_socket_addrs"), Some("Net"));
2022        // [23] std::process getters are pure; spawn/new stay Exec.
2023        assert_eq!(classify("std", "std::process::Command::get_program"), None);
2024        assert_eq!(classify("std", "std::process::Command::get_args"), None);
2025        assert_eq!(classify("std", "std::process::Child::id"), None);
2026        assert_eq!(classify("std", "std::process::Command::spawn"), Some("Exec"));
2027        // [27] redis ConnectionManager::clone is an Arc bump (pure); a query round-trips.
2028        assert_eq!(classify("redis", "redis::aio::ConnectionManager::clone"), None);
2029        assert_eq!(classify("redis", "redis::aio::ConnectionManager::send_packed_command"), Some("Db"));
2030        // [5] sea_orm re-exported sea_query builder algebra is pure; execution verbs stay Db.
2031        assert_eq!(classify("sea_orm", "sea_orm::sea_query::Func::count"), None);
2032        assert_eq!(classify("sea_orm", "sea_orm::sea_query::Condition::all"), None);
2033        assert_eq!(classify("sea_orm", "sea_orm::Select::all"), Some("Db"));
2034    }
2035
2036    #[test]
2037    fn rand_osrng_handle_ops_are_pure_but_draws_are_rand() {
2038        // Adversarial-review fabrication: the blanket `contains("OsRng")` tagged `OsRng::clone` Rand,
2039        // but OsRng is a unit struct — clone/fork/default draw no entropy. The real draws still fire.
2040        assert_eq!(classify("rand", "rand::rngs::OsRng::clone"), None);
2041        assert_eq!(classify("rand", "rand::rngs::OsRng::default"), None);
2042        assert_eq!(classify("rand", "rand::rngs::OsRng::fill_bytes"), Some("Rand")); // a real draw
2043        assert_eq!(classify("rand", "rand::rngs::OsRng::next_u32"), Some("Rand"));
2044        assert_eq!(classify("rand", "rand::Rng::gen"), Some("Rand")); // verb path unaffected
2045        assert_eq!(classify("rand", "rand::distributions::Uniform::new"), None); // pure ctor still pure
2046    }
2047
2048    #[test]
2049    fn redis_connection_manager_config_builder_is_pure() {
2050        // Adversarial-review fabrication: `contains("ConnectionManager")` hit the pure *Config* builder.
2051        assert_eq!(classify("redis", "redis::aio::ConnectionManagerConfig::new"), None);
2052        assert_eq!(classify("redis", "redis::aio::ConnectionManagerConfig::set_max_delay"), None);
2053        // the LIVE manager still round-trips (Db).
2054        assert_eq!(classify("redis", "redis::aio::ConnectionManager::new"), Some("Db"));
2055        assert_eq!(classify("redis", "redis::Commands::get"), Some("Db"));
2056    }
2057
2058    #[test]
2059    fn pure_fd_transfer_is_not_an_effect() {
2060        // ADOPTING / EXTRACTING / BORROWING an already-open descriptor (or unwrapping an async type back
2061        // to its std type) issues NO syscall — it must be PURE even though it hangs off a std I/O type
2062        // whose prefix rule would otherwise fire Net/Fs/Ipc. (Real tokio sweep: `into_std`, `from_raw_fd`,
2063        // `as_raw_fd` all fabricated effects.)
2064        assert_eq!(classify("std", "std::net::TcpStream::from_raw_fd"), None);
2065        assert_eq!(classify("std", "std::net::TcpStream::into_raw_fd"), None);
2066        assert_eq!(classify("std", "std::net::TcpStream::as_raw_fd"), None);
2067        assert_eq!(classify("std", "std::net::TcpListener::from_raw_fd"), None);
2068        assert_eq!(classify("std", "std::net::UdpSocket::from_raw_socket"), None);
2069        assert_eq!(classify("std", "std::fs::File::from_raw_fd"), None);
2070        assert_eq!(classify("std", "std::fs::File::into_raw_fd"), None);
2071        assert_eq!(classify("std", "std::fs::File::as_raw_handle"), None);
2072        assert_eq!(classify("std", "std::os::unix::net::UnixStream::from_raw_fd"), None);
2073        // `SocketAddr::from_pathname` builds an address struct, opens no socket — pure. (socket2 sweep.)
2074        assert_eq!(classify("std", "std::os::unix::net::SocketAddr::from_pathname"), None);
2075        assert_eq!(classify("tokio", "tokio::net::TcpStream::from_raw_fd"), None);
2076        assert_eq!(classify("tokio", "tokio::net::TcpStream::into_std"), None); // unwrap → std type, pure
2077        assert_eq!(classify("tokio", "tokio::fs::File::into_std"), None);
2078        // …but a REAL open/connect on the SAME types still fires the effect — the carve-out is leaf-precise.
2079        assert_eq!(classify("std", "std::net::TcpStream::connect"), Some("Net"));
2080        assert_eq!(classify("std", "std::fs::File::open"), Some("Fs"));
2081        assert_eq!(classify("std", "std::fs::read"), Some("Fs"));
2082        assert_eq!(classify("std", "std::os::unix::net::UnixStream::connect"), Some("Ipc"));
2083        assert_eq!(classify("tokio", "tokio::net::TcpStream::connect"), Some("Net"));
2084    }
2085
2086    #[test]
2087    fn command_head_refines_the_exec_cliff() {
2088        use super::classify_command_head as h;
2089        // unambiguous external tools classify by basename (spec §4 ⟨0.5⟩)
2090        assert_eq!(h("curl"), &["Net"]);
2091        assert_eq!(h("telnet"), &["Net"]);
2092        assert_eq!(h("sftp"), &["Net"]);
2093        assert_eq!(h("/usr/local/bin/psql"), &["Db"]); // basename match strips the path
2094        assert_eq!(h("mongo"), &["Db"]);
2095        assert_eq!(h("cqlsh"), &["Db"]);
2096        // a candor engine is Fs/Env — spec-SUPPLIED by §7 item 12, not curation
2097        assert_eq!(h("candor-scan"), &["Env", "Fs"]);
2098        assert_eq!(h("candor-run.sh"), &["Env", "Fs"]);
2099        // an unrecognised head adds nothing — the bare Exec cliff stands (never guess). `make`/`npm`
2100        // run the project's own code; `git`/`rsync` are multi-modal (local vs remote) — all keep the
2101        // cliff rather than fabricate an effect for the common case.
2102        assert_eq!(h("some-unknown-tool"), &[] as &[&str]);
2103        assert_eq!(h("make"), &[] as &[&str]);
2104        assert_eq!(h("npm"), &[] as &[&str]);
2105        assert_eq!(h("git"), &[] as &[&str]);
2106        assert_eq!(h("rsync"), &[] as &[&str]);
2107        // a builder MODIFIER (`.arg`/`.env`) names no program — its literal must NOT refine (a
2108        // whole-crate-Exec crate classifies every method; `.env("psql",..)` must not fabricate Db).
2109        assert!(is_cmd_builder_method("env") && is_cmd_builder_method("arg") && is_cmd_builder_method("current_dir"));
2110        assert!(!is_cmd_builder_method("new")); // Command::new NAMES the program
2111        assert!(!is_cmd_builder_method("cmd")); // duct::cmd NAMES the program
2112        // The gate that ADMITS a literal to classify_command_head is an ALLOWLIST of program-NAMING
2113        // methods, not the builder denylist. Inversion matters: a whole-crate-Exec crate (portable_pty)
2114        // classifies EVERY method as Exec, so a getter like `cmd.get_env("psql")` — absent from the
2115        // builder denylist — would have leaked "psql" to the head and FABRICATED Db. Only `new`/`cmd`
2116        // name a program, so only they may refine.
2117        assert!(is_cmd_naming_method("new") && is_cmd_naming_method("cmd"));
2118        assert!(!is_cmd_naming_method("get_env")); // a GETTER, not a namer — the leak this closes
2119        assert!(!is_cmd_naming_method("arg") && !is_cmd_naming_method("env") && !is_cmd_naming_method("current_dir"));
2120    }
2121
2122    #[test]
2123    fn net_establishing_allowlist() {
2124        // sweep [3]/[7]: the masking guard's establishing-verb allowlist — host-bearing connect/request
2125        // verbs establish (a runtime host there is invisible); USE-verbs on a connected socket do NOT.
2126        assert!(is_net_establishing("connect") && is_net_establishing("connect_timeout"));
2127        assert!(is_net_establishing("get") && is_net_establishing("post") && is_net_establishing("request"));
2128        assert!(is_net_establishing("send_to") && is_net_establishing("to_socket_addrs"));
2129        // use-verbs (host fixed at connect) must NOT be establishing — else `connect("h").write()` flags.
2130        assert!(!is_net_establishing("write") && !is_net_establishing("read") && !is_net_establishing("send"));
2131        assert!(!is_net_establishing("flush") && !is_net_establishing("recv") && !is_net_establishing("peek"));
2132    }
2133
2134    #[test]
2135    fn fs_path_arg_allowlist() {
2136        // The Fs masking guard's path-naming-fn allowlist — free fns / constructors take the path as a
2137        // string arg (a runtime path there is invisible to the gate). Stat methods (path on the receiver)
2138        // and handle ops carry no path arg and must NOT flag — but they're caught by the caller's
2139        // `!is_method` gate; the allowlist itself just enumerates the path-NAMING leaves.
2140        assert!(is_fs_path_arg("write") && is_fs_path_arg("read") && is_fs_path_arg("read_to_string"));
2141        assert!(is_fs_path_arg("open") && is_fs_path_arg("create") && is_fs_path_arg("create_new"));
2142        assert!(is_fs_path_arg("remove_file") && is_fs_path_arg("rename") && is_fs_path_arg("copy"));
2143        assert!(is_fs_path_arg("create_dir_all") && is_fs_path_arg("canonicalize") && is_fs_path_arg("metadata"));
2144        // handle ops / pure builders take NO path arg — never path-naming.
2145        assert!(!is_fs_path_arg("write_all") && !is_fs_path_arg("flush") && !is_fs_path_arg("read_exact"));
2146        assert!(!is_fs_path_arg("new") && !is_fs_path_arg("sync_all") && !is_fs_path_arg("set_len"));
2147    }
2148
2149    #[test]
2150    fn db_query_arg_allowlist() {
2151        // The Db masking guard's query-bearing-verb allowlist — these take the raw SQL as a string arg
2152        // (a runtime query there is invisible to the gate). Build-then-execute terminals and non-query
2153        // ops carry no SQL string and must NOT flag.
2154        assert!(is_db_query_arg("execute") && is_db_query_arg("query") && is_db_query_arg("query_one"));
2155        assert!(is_db_query_arg("prepare") && is_db_query_arg("batch_execute") && is_db_query_arg("execute_batch"));
2156        assert!(is_db_query_arg("query_row") && is_db_query_arg("query_map") && is_db_query_arg("exec"));
2157        // build-then-execute terminals (query built structurally, no SQL string) must NOT flag.
2158        assert!(!is_db_query_arg("fetch_all") && !is_db_query_arg("load") && !is_db_query_arg("first"));
2159        assert!(!is_db_query_arg("all") && !is_db_query_arg("one") && !is_db_query_arg("stream"));
2160        // connection / lifecycle ops take no SQL — must NOT flag.
2161        assert!(!is_db_query_arg("connect") && !is_db_query_arg("open") && !is_db_query_arg("begin"));
2162        assert!(!is_db_query_arg("commit") && !is_db_query_arg("ping") && !is_db_query_arg("get_conn"));
2163    }
2164}