Skip to main content

candor_classify/
lib.rs

1//! candor-classify — the curated effect classifier (crate+path -> effect), extracted to a STABLE
2//! crate so both the nightly `rustc_private` lint AND a stable backend share ONE source of truth
3//! (no drift). Pure string logic; no rustc internals. The effect vocabulary lives in candor-report.
4
5use candor_report::EFFECTS;
6
7/// The canonical CANDOR_POLICY DSL parser (SPEC §6.2), shared by the nightly gate and candor-query.
8pub mod policy;
9
10/// Project-supplied rules, consulted only when the built-in `classify` returns None.
11pub fn classify_extra(
12    crate_name: &str,
13    path: &str,
14    extra: &[(&'static str, bool, String)],
15) -> Option<&'static str> {
16    for (eff, is_crate, prefix) in extra {
17        let hit = if *is_crate { crate_name.starts_with(prefix.as_str()) } else { path.starts_with(prefix.as_str()) };
18        if hit {
19            return Some(eff);
20        }
21    }
22    None
23}
24
25/// The exact third-party crates `classify` has effect rules for, and the crate-name
26/// PREFIXES it recognizes. This is the single source of truth for "what candor knows":
27/// it is emitted beside the JSON report (`<prefix>.calibrated.json`) so the Claude Code
28/// receipt's coverage check reads candor's real coverage instead of a hand-copied list.
29/// Keep in lockstep with `classify` below — the `db_crates_are_calibrated` and
30/// `calibrated_crates_are_live` tests (in this crate's `tests` module) enforce both directions.
31pub const CALIBRATED_CRATES: [&str; 59] = [
32    // network (aws_config resolves credentials over the network on `.load()`;
33    // git2 remote ops — fetch/push/connect — contact the network; async_net is smol's net layer;
34    // pnet is raw L2/L3 packet capture)
35    "reqwest", "isahc", "ureq", "curl", "aws_config", "git2", "tokio_tcp", "tokio_udp", "async_net",
36    "async_nats", "lapin", "lettre", "tungstenite", "elasticsearch", "tonic", "rdkafka", "pnet",
37    // directory traversal (ignore = gitignore-aware walker, powers ripgrep/fd; its walk executors are Fs)
38    // + filesystem watching (notify = inotify/FSEvents/kqueue wrapper; powers watchexec/cargo-watch)
39    "ignore", "notify",
40    // database (see DB_CRATES in classify)
41    "sqlx", "rusqlite", "postgres", "tokio_postgres", "diesel", "redis", "mongodb",
42    "mysql", "mysql_async", "sea_orm", "deadpool_postgres",
43    // filesystem (async_fs = smol; fs_err = std::fs wrapper; tempfile; glob) / entropy /
44    // subprocess (async_process = smol; duct) / env (dotenvy/dotenv) / clock (time) / log / clipboard
45    "memmap2", "fs_err", "async_fs", "tempfile", "glob",
46    "rand", "getrandom", "fastrand",
47    // entropy: the password-hashing tier (salt mints + bcrypt's internal salt) + the OsRng source
48    "argon2", "bcrypt", "scrypt", "pbkdf2", "password_hash", "rand_core",
49    "portable_pty", "async_process", "duct",
50    "dotenvy", "dotenv",
51    "chrono", "time", "tracing", "log", "arboard",
52    // compiler diagnostic emission (a dylint lint's output) — see the Log rules in classify
53    "rustc_lint", "rustc_errors",
54    // raw syscalls via FFI — the syscall-name table that lights up the FFI-thin tier (nix is routed
55    // through the same table by leaf name, so a consumer of nix is covered without nix's own source)
56    "libc", "nix", "rustix",
57];
58
59pub const CALIBRATED_PREFIXES: [&str; 3] = ["aws_sdk_", "aws_smithy", "cap_"];
60
61/// Crates `classify` matches by PATH prefix rather than crate-name equality (their effectful modules
62/// are recognised, e.g. `tokio::net::`/`async_std::fs::`/`mio::net::`), so they're absent from
63/// `CALIBRATED_CRATES` (which the liveness test probes by crate name). The coverage check must still
64/// treat them as *covered* — otherwise it would mislabel the most common async crates as blind spots.
65pub const PATH_CALIBRATED_CRATES: [&str; 3] = ["tokio", "async_std", "mio"];
66
67/// Representative path tails (each appended to a crate name) that the `calibrated_crates_are_live`
68/// liveness test probes: at least one must match for every `CALIBRATED_CRATES` entry, else the entry is
69/// dead. Exported as ONE source of truth because the nightly lint crate (`src/lib.rs`) runs the SAME
70/// liveness test — when the two probe lists were duplicated they drifted, and a rule keyed on a
71/// distinctive tail (pnet `::datalink::channel`, ignore `::WalkBuilder::build_parallel`, notify
72/// `::RecommendedWatcher::new`) added to only one list silently broke the other crate's `cargo test`.
73pub const CALIBRATION_PROBE_TAILS: &[&str] = &[
74    "::X::send", "::X::execute", "::X::call", "::X::query", "::X::fetch_one", "::Remote::fetch",
75    "::datalink::channel", "::WalkBuilder::build_parallel", "::RecommendedWatcher::new",
76    "::X::connect", "::Utc::now", "::X::load", "::__private_api::log", "::tempfile", "::glob",
77    "::X::run", "::dotenv", "::random", "::emit", "::X::emit_span_lint", "::X::anything",
78    "::SaltString::generate", "::hash", "::OsRng::fill_bytes",
79    // verb-precise crates whose whole-crate rules were narrowed to the effectful surface (the pure
80    // accessors/ctors/data-types now return None), so the liveness probe must name an EFFECTFUL path:
81    "::Mmap::map", "::event", "::u32", "::Clipboard::get_text", "::spawn_command",
82];
83
84/// Database client crates whose execution verbs are I/O (see the DB branch in `classify`).
85/// Module-level so `db_crates_are_calibrated` can enforce `DB_CRATES ⊆ CALIBRATED_CRATES`.
86pub const DB_CRATES: [&str; 11] = [
87    "sqlx", "rusqlite", "postgres", "tokio_postgres", "diesel", "redis", "mongodb",
88    "mysql", "mysql_async", "sea_orm", "deadpool_postgres",
89];
90
91/// Pure file-descriptor *ownership-transfer* leaves. These ADOPT an already-open descriptor
92/// (`from_raw_fd`/`from_raw_socket`/`from_raw_handle`), EXTRACT/BORROW one
93/// (`into_raw_fd`/`into_raw_socket`/`into_raw_handle`, `as_raw_fd`/`as_raw_socket`/`as_raw_handle`),
94/// or UNWRAP an async wrapper back to its std type (`into_std`) — none of them issue a syscall or
95/// perform I/O. candor's cardinal sin is calling a PURE function effectful, and these collide with the
96/// coarse std-type PREFIX rules (`std::net::TcpStream`/`std::fs::File`/`std::os::unix::net` → Net/Fs/Ipc)
97/// even though the descriptor was opened ELSEWHERE. The portable_pty/async_process Exec rule already
98/// exempts `from_raw_fd`; this generalises the same carve-out across the net/fs/ipc prefix rules.
99/// (Found by a real-world sweep of tokio: `TcpStream::into_std`, `*::from_raw_fd`, `*::as_raw_fd` all
100/// fabricated Net/Fs/Ipc.)
101const PURE_FD_TRANSFER: &[&str] = &[
102    "from_raw_fd", "from_raw_socket", "from_raw_handle",
103    "into_raw_fd", "into_raw_socket", "into_raw_handle",
104    "as_raw_fd", "as_raw_socket", "as_raw_handle",
105    "into_std",
106    // `SocketAddr::from_pathname` (std/async-std unix net) builds an address STRUCT from a path —
107    // it opens no socket. The `std::os::unix::net` prefix rule below would otherwise fabricate Ipc
108    // on it. (Found sweeping socket2: `SockAddr::as_unix` → `from_pathname` reported Ipc.)
109    "from_pathname",
110];
111
112/// Classify a resolved callee by the crate it belongs to and its full path.
113pub fn classify(crate_name: &str, path: &str) -> Option<&'static str> {
114    // Pure fd ownership-transfer/extraction leaves are never an effect, regardless of which std I/O
115    // type they hang off — exempt them BEFORE the coarse prefix rules can fabricate Net/Fs/Ipc.
116    if PURE_FD_TRANSFER.contains(&path.rsplit("::").next().unwrap_or(path)) {
117        return None;
118    }
119    if crate_name.starts_with("aws_sdk_") || crate_name.starts_with("aws_smithy") {
120        // Only request dispatch is network I/O; builder setters/accessors are pure.
121        if path.ends_with("::send") || path.ends_with("::send_with") {
122            return Some("Net");
123        }
124        return None;
125    }
126    // aws-config resolves credentials/region on `.load()` — it reaches the IMDS metadata
127    // endpoint / STS over the network (and reads ~/.aws + env). Builders (`defaults()`,
128    // `SdkConfig::builder()`, `BehaviorVersion::latest()`) are pure; the `load` is the I/O.
129    // (Found hardening on a real app, ebman: `builder.load().await` was classified pure.)
130    if crate_name == "aws_config" {
131        if path.ends_with("::load") || path.ends_with("::load_defaults") {
132            return Some("Net");
133        }
134        return None;
135    }
136    // git2 (libgit2 FFI): remote operations contact the network; everything else is local
137    // to the .git directory. Match the remote verbs precisely — NOT bare `::clone`, which is
138    // the `Clone`-trait dup of a `Remote` handle (pure), not `Repository::clone`. (Found
139    // hardening on gitui: `remote.fetch`/`remote.push` were classified network-free — a git
140    // client reporting it makes no network calls.)
141    if crate_name == "git2" {
142        if path.ends_with("::fetch")
143            || path.ends_with("::push")
144            || path.ends_with("::download")
145            || path.ends_with("::connect")
146            || path.ends_with("::connect_auth")
147            || path.ends_with("::ls")
148            || path.ends_with("::upload")
149        {
150            return Some("Net");
151        }
152        return None;
153    }
154    // libc — raw syscalls via FFI. The FFI-thin tier (nix, and the syscall layer beneath rusqlite/git2)
155    // is invisible to a name classifier unless we model libc directly: a 35-crate calibration
156    // (eval/calibration) showed nix reporting ZERO library effects because every wrapper bottoms out in
157    // an unrecognised `libc::*` call. Classify by syscall name, but ONLY the UNAMBIGUOUS ones — the
158    // socket family is Net, path/dir syscalls are Fs, spawn/exec/wait is Exec, SysV/pipe IPC is Ipc,
159    // env/clock/entropy each their own. We deliberately SKIP the generic file-descriptor ops
160    // (read/write/close/lseek/dup/fcntl/ioctl/poll/select/epoll*/mmap): they operate on ANY fd — file,
161    // socket, or pipe — so a fixed label would mis-categorise as often as it helps. An honest
162    // no-classify (under-report) beats emitting the WRONG effect. Pure conversions (htons/inet_pton/
163    // gmtime) are also skipped.
164    //
165    // `nix` (the idiomatic SAFE libc wrapper, in ~every Rust systems/CLI crate) is routed through the
166    // SAME table: its functions keep the syscall leaf name (`nix::fcntl::open`, `nix::sys::socket::connect`,
167    // `nix::unistd::execvp`). Without this, a CONSUMER of nix analysed without nix's own source (the
168    // stable scanner, single-crate) sees `nix::*` cross-crate and under-reports — serialport-rs opens its
169    // device via `nix::fcntl::open` and reported ZERO Fs. The nightly lint reaches `libc::*` THROUGH nix's
170    // body; this gives the scanner the same coverage directly. (Found sweeping serialport-rs.)
171    // `rustix` is the same shape as nix but does RAW syscalls (no libc underneath), so its functions MUST
172    // be classified directly. Its leaf names are the syscall names too (`rustix::time::clock_settime`,
173    // `rustix::fs::mkfifoat`/`symlink`/`stat`, `rustix::net::connect`) — route it through the same table.
174    // The rustix-specific `*at`/variant leaves it doesn't share with libc just under-report (the safe
175    // direction). VALIDATED, not speculative: coreutils' `date` reads/sets the clock via
176    // `rustix::time::clock_getres`/`clock_settime` and reported Clock=0; the file I/O that goes through
177    // std::fs was already correct, which is why only the rustix-only effects (Clock/Ipc) were missing.
178    if crate_name == "libc" || crate_name == "nix" || crate_name == "rustix" {
179        let f = path.rsplit("::").next().unwrap_or(path);
180        // path / directory / metadata syscalls (incl. *64 and *at variants)
181        const FS: &[&str] = &[
182            "open", "open64", "openat", "openat2", "creat", "creat64", "stat", "stat64", "lstat",
183            "lstat64", "fstatat", "fstatat64", "newfstatat", "statx", "access", "faccessat",
184            "faccessat2", "mkdir", "mkdirat", "rmdir", "unlink", "unlinkat", "rename", "renameat",
185            "renameat2", "link", "linkat", "symlink", "symlinkat", "readlink", "readlinkat", "chmod",
186            "fchmodat", "chown", "lchown", "fchownat", "truncate", "truncate64", "ftruncate",
187            "ftruncate64", "opendir", "fdopendir", "readdir", "readdir64", "readdir_r", "closedir",
188            "rewinddir", "seekdir", "telldir", "scandir", "mkstemp", "mkstemps", "mkostemp", "mkdtemp",
189            "mknod", "mknodat", "chdir", "fchdir", "getcwd", "get_current_dir_name", "chroot",
190            "pivot_root", "statfs", "statfs64", "fstatfs", "fstatfs64", "statvfs", "fstatvfs", "mount",
191            "umount", "umount2", "fsync", "fdatasync", "sync", "syncfs", "sync_file_range", "fallocate",
192            "posix_fallocate", "posix_fadvise", "sendfile", "sendfile64", "copy_file_range", "flock",
193            "getdents", "getdents64", "utime", "utimes", "lutimes", "futimens", "utimensat", "futimesat",
194            "realpath",
195        ];
196        // socket family — these operate only on sockets, so Net is unambiguous (AF_UNIX domain isn't
197        // visible at the call, so a Unix socket reads as Net rather than Ipc; acceptable over-general).
198        const NET: &[&str] = &[
199            "socket", "setsockopt", "getsockopt", "bind", "listen", "accept", "accept4", "connect",
200            "shutdown", "send", "sendto", "sendmsg", "sendmmsg", "recv", "recvfrom", "recvmsg",
201            "recvmmsg", "getpeername", "getsockname", "getaddrinfo", "freeaddrinfo", "getnameinfo",
202        ];
203        // process creation / replacement / reaping
204        const EXEC: &[&str] = &[
205            "fork", "vfork", "clone", "clone3", "execl", "execlp", "execle", "execv", "execvp",
206            "execvpe", "execve", "execveat", "fexecve", "posix_spawn", "posix_spawnp", "system",
207            "popen", "pclose", "wait", "waitpid", "wait3", "wait4", "waitid",
208        ];
209        // pipes / FIFOs / SysV + POSIX message queues, semaphores, shared memory; socketpair (AF_UNIX)
210        const IPC: &[&str] = &[
211            "pipe", "pipe2", "mkfifo", "mkfifoat", "socketpair", "msgget", "msgsnd", "msgrcv", "msgctl",
212            "semget", "semop", "semtimedop", "semctl", "shmget", "shmat", "shmdt", "shmctl", "mq_open",
213            "mq_send", "mq_receive", "mq_timedsend", "mq_timedreceive", "mq_close", "mq_unlink",
214        ];
215        const ENV: &[&str] = &["getenv", "secure_getenv", "setenv", "putenv", "unsetenv", "clearenv"];
216        const CLOCK: &[&str] = &[
217            "time", "gettimeofday", "clock_gettime", "clock_getres", "nanosleep", "clock_nanosleep",
218            // SETTING the system clock is a clock effect too (was unclassified — found on coreutils `date`,
219            // which sets it via `clock_settime`).
220            "clock_settime", "settimeofday", "stime", "adjtime", "adjtimex", "clock_adjtime",
221        ];
222        const RAND: &[&str] = &["getrandom", "getentropy", "arc4random", "arc4random_buf", "arc4random_uniform"];
223        if FS.contains(&f) {
224            return Some("Fs");
225        }
226        if NET.contains(&f) {
227            return Some("Net");
228        }
229        if EXEC.contains(&f) {
230            return Some("Exec");
231        }
232        if IPC.contains(&f) {
233            return Some("Ipc");
234        }
235        if ENV.contains(&f) {
236            return Some("Env");
237        }
238        if CLOCK.contains(&f) {
239            return Some("Clock");
240        }
241        if RAND.contains(&f) {
242            return Some("Rand");
243        }
244        return None;
245    }
246    // C-library FFI bindings: libsqlite3 (under rusqlite) and libgit2 (under git2). Like the libc tier,
247    // these crates are thin Rust over a C library, so their real I/O is invisible until the C entry
248    // points are named. Match by the DISTINCTIVE C function name (`sqlite3_*` / `git_*`) via the call's
249    // LEAF — independent of the binding crate's alias: rusqlite calls `ffi::sqlite3_step`, git2 calls
250    // `raw::git_remote_fetch`, and the nightly lint resolves the same to `libsqlite3_sys`/`libgit2_sys`;
251    // all spellings share the leaf. Only the I/O-performing entry points are listed — the in-memory
252    // accessors (`sqlite3_bind_*`/`sqlite3_column_*`, `git_*_oid`/strarray/options builders) stay pure,
253    // so a non-listed `sqlite3_`/`git_` leaf returns None (under-report, never a wrong effect). Calibrated
254    // + validated against rusqlite 0.39 / git2 0.20 source (eval/calibration).
255    {
256        let leaf = path.rsplit("::").next().unwrap_or(path);
257        if let Some(rest) = leaf.strip_prefix("sqlite3_") {
258            let _ = rest;
259            // SQLite C API operations that touch the database (open/exec/step/prepare/backup/blob/wal).
260            const DB: &[&str] = &[
261                "sqlite3_open", "sqlite3_open_v2", "sqlite3_open16", "sqlite3_close", "sqlite3_close_v2",
262                "sqlite3_exec", "sqlite3_step", "sqlite3_prepare", "sqlite3_prepare_v2",
263                "sqlite3_prepare_v3", "sqlite3_prepare16", "sqlite3_prepare16_v2", "sqlite3_prepare16_v3",
264                "sqlite3_get_table", "sqlite3_backup_init", "sqlite3_backup_step", "sqlite3_backup_finish",
265                "sqlite3_blob_open", "sqlite3_blob_read", "sqlite3_blob_write", "sqlite3_blob_reopen",
266                "sqlite3_load_extension", "sqlite3_wal_checkpoint", "sqlite3_wal_checkpoint_v2",
267            ];
268            return DB.contains(&leaf).then_some("Db");
269        }
270        if leaf.starts_with("git_") {
271            // libgit2: remote/transport operations contact the network … (incl. submodule clone/update,
272            // which `git_clone`/fetch the subrepo over its remote — `allow_fetch` defaults on; an A/B on
273            // git2 0.20 caught `Submodule::update`/`clone` reporting no `Net`).
274            const NET: &[&str] = &[
275                "git_clone", "git_remote_connect", "git_remote_connect_ext", "git_remote_fetch",
276                "git_remote_download", "git_remote_upload", "git_remote_push", "git_remote_ls",
277                "git_submodule_clone", "git_submodule_update",
278            ];
279            // … and repository/index/odb/checkout/ref/config operations touch the on-disk .git store.
280            const FS: &[&str] = &[
281                "git_repository_open", "git_repository_open_ext", "git_repository_open_bare",
282                "git_repository_init", "git_repository_init_ext", "git_repository_discover",
283                "git_checkout_tree", "git_checkout_head", "git_checkout_index", "git_index_read",
284                "git_index_write", "git_index_write_tree", "git_index_write_tree_to",
285                "git_index_add_bypath", "git_index_add_all", "git_odb_open", "git_odb_read",
286                "git_odb_write", "git_odb_open_wstream", "git_odb_open_rstream",
287                "git_blob_create_fromdisk", "git_blob_create_fromworkdir", "git_blob_create_from_disk",
288                "git_blob_create_from_workdir", "git_blob_create_from_stream", "git_commit_create",
289                "git_commit_create_v", "git_reference_create", "git_reference_set_target",
290                "git_reference_delete", "git_config_open_default", "git_config_open_ondisk",
291                "git_config_add_file_ondisk", "git_tag_create", "git_treebuilder_write",
292                "git_packbuilder_write",
293            ];
294            if NET.contains(&leaf) {
295                return Some("Net");
296            }
297            if FS.contains(&leaf) {
298                return Some("Fs");
299            }
300            return None;
301        }
302        if leaf.starts_with("curl_") {
303            // libcurl (under the `curl` crate, called `curl_sys::curl_*`). Only the entry points that
304            // PERFORM network I/O: the blocking transfer (`curl_easy_perform`), raw socket send/recv,
305            // the HTTP/2 keepalive PING (`upkeep`), and the multi-interface transfer pumps. The large
306            // pure surface (setopt/init/cleanup/reset/getinfo/escape/multi_add_handle/fdset/info_read)
307            // stays unclassified, as do `curl_multi_wait`/`poll` (readiness WAIT on sockets, no payload —
308            // the loop's `perform` is the tagged boundary, per the I/O-boundary principle). An A/B on
309            // curl 0.4 caught the whole crate reporting ZERO Net (`Easy::perform` read as pure).
310            const NET: &[&str] = &[
311                "curl_easy_perform", "curl_easy_send", "curl_easy_recv", "curl_easy_upkeep",
312                "curl_multi_perform", "curl_multi_socket_action",
313            ];
314            return NET.contains(&leaf).then_some("Net");
315        }
316        if let Some(op) = leaf.strip_prefix("SSL_") {
317            // OpenSSL (libssl, under the `openssl`/`native-tls` crates, called `ffi::SSL_*`). The TLS
318            // handshake and record I/O run over the peer socket -> Net. Unlike libc read/write, an SSL_*
319            // op is ~always over a network BIO (the rare memory-BIO/sans-IO case is the honest exception
320            // we accept). The crypto surface (EVP_*/SHA*/AES*) and pure setup (SSL_CTX_new/SSL_set_fd) are
321            // NOT here; `BIO_*` is skipped (a BIO may be memory or socket). Validated vs openssl 0.9 source.
322            const SSL_NET: &[&str] = &[
323                "connect", "accept", "do_handshake", "read", "read_ex", "write", "write_ex", "peek",
324                "peek_ex", "shutdown",
325            ];
326            return SSL_NET.contains(&op).then_some("Net");
327        }
328    }
329    // HTTP clients use the same builder pattern as the AWS SDK: only the dispatch is
330    // I/O. (Found by the eval: ebman's reqwest calls to the Anthropic API + webhooks
331    // were silently classified network-free because reqwest wasn't recognized.)
332    if crate_name == "reqwest" || crate_name == "isahc" {
333        // The builder chain is pure; the dispatch (`::send`/`::execute`) is the I/O. PLUS the one-shot
334        // CONVENIENCE functions `reqwest::get` / `reqwest::blocking::get` / `isahc::get`, which send
335        // immediately — they're not the `Client::get` builder (a different path, `reqwest::Client::get`),
336        // so an exact match avoids false-positiving the builder. (Found running on `xh`: a one-shot
337        // `reqwest::get(url)` was classified network-free.)
338        if path.ends_with("::send")
339            || path.ends_with("::execute")
340            || path == "reqwest::get"
341            || path == "reqwest::blocking::get"
342            || path == "isahc::get"
343        {
344            return Some("Net");
345        }
346        return None;
347    }
348    if crate_name == "ureq" && path.ends_with("::call") {
349        return Some("Net");
350    }
351    // The `curl` crate (libcurl's safe binding — cargo's own HTTP client): the dispatch verbs are
352    // `perform` (Easy/Easy2/Transfer/Multi), raw-socket `send`/`recv`, the keepalive `upkeep`, and the
353    // multi-interface `action` (socket_action). The big setopt-style builder surface stays pure.
354    // `Multi::timeout` is deliberately NOT matched: `Easy::timeout` is a pure CURLOPT_TIMEOUT setter
355    // sharing the leaf — an under-report on the rare event-loop kick beats mis-tagging every consumer
356    // that sets a timeout. (Consumer-side companion to the curl_* FFI tier, same A/B finding.)
357    if crate_name == "curl"
358        && (path.ends_with("::perform")
359            || path.ends_with("::send")
360            || path.ends_with("::recv")
361            || path.ends_with("::upkeep")
362            || path.ends_with("::action"))
363    {
364        return Some("Net");
365    }
366    // The modern async-HTTP / TLS / QUIC / DNS stack — the LAYER reqwest/ureq/isahc build on, and that
367    // crates use DIRECTLY. Found by the independent-method differential on `oha` (2026-06-17): candor
368    // honestly DISCLOSED these as blind but never CLASSIFIED them, leaving real Net reaches uncovered.
369    // Verb-keyed (the pure type/builder/codec surface stays None) and CRATE-GATED, so generic verbs
370    // (request/connect/get/read/write/accept) never fabricate across unrelated crates. Same precision
371    // discipline as the reqwest/curl rules above; complements the scan_builder_entry_effect entries.
372    match crate_name {
373        // hyper 1.x client connection I/O (the builder/Body/Request types stay pure).
374        "hyper" if path.ends_with("::send_request") || path.ends_with("::handshake") => return Some("Net"),
375        // hyper-util's pooled legacy Client + its TCP connectors.
376        "hyper_util" if path.ends_with("::request") || path.ends_with("::connect") => return Some("Net"),
377        // hickory (trust-dns) resolver — issues DNS queries over the network.
378        "hickory_resolver"
379            if path.ends_with("::lookup_ip") || path.ends_with("::lookup") || path.ends_with("_lookup")
380                || path.ends_with("::resolve") => return Some("Net"),
381        // HTTP/3 over QUIC.
382        "h3" if path.ends_with("::send_request") || path.ends_with("::recv_data")
383            || path.ends_with("::recv_response") || path.ends_with("::send_data") => return Some("Net"),
384        // QUIC transport (UDP socket send/recv).
385        "quinn" if path.ends_with("::connect") || path.ends_with("::accept") || path.ends_with("::open_bi")
386            || path.ends_with("::open_uni") || path.ends_with("::accept_bi") || path.ends_with("::accept_uni")
387            || path.ends_with("::send_datagram") || path.ends_with("::read_datagram") => return Some("Net"),
388        // TLS-over-TCP stream adapters — the actual socket handshake/I/O (the config/cert types stay pure).
389        "tokio_rustls" | "native_tls"
390            if path.ends_with("::connect") || path.ends_with("::accept") || path.ends_with("::handshake") =>
391            return Some("Net"),
392        // AF_VSOCK host<->guest sockets — inter-process / VM comms.
393        "tokio_vsock" if path.ends_with("::connect") || path.ends_with("::bind") || path.ends_with("::accept") =>
394            return Some("Ipc"),
395        // Loads the OS trust store from disk (cert files / keychain).
396        "rustls_native_certs" if path.ends_with("::load_native_certs") => return Some("Fs"),
397        // Reads host/process config from the OS (CPU count, cgroup quota; resource limits).
398        "num_cpus" if path.ends_with("::get") || path.ends_with("::get_physical") => return Some("Env"),
399        "rlimit" if path.ends_with("::getrlimit") || path.ends_with("::setrlimit")
400            || path.ends_with("::increase_nofile_limit") => return Some("Env"),
401        _ => {}
402    }
403    // Message-queue clients fully encapsulate the socket (the underlying tokio::net lives
404    // inside the crate, unseen), so a user's connect/publish/consume calls ARE the I/O
405    // boundary — to a remote broker, hence Net. Match the broker round-trip verbs (snake_case
406    // methods); the CamelCase option/property builders stay pure. (Found hardening on consumer
407    // apps: lapin `basic_publish`/`queue_declare` and async-nats `publish`/`subscribe` were
408    // classified pure — a message-queue client reporting no I/O.)
409    if crate_name == "async_nats" {
410        if path.ends_with("::connect")
411            || path.contains("::publish")
412            || path.ends_with("::subscribe")
413            || path.ends_with("::queue_subscribe")
414            || path.contains("::request")
415            || path.ends_with("::flush")
416        {
417            return Some("Net");
418        }
419        return None;
420    }
421    if crate_name == "lapin" {
422        if path.ends_with("::connect")
423            || path.ends_with("::create_channel")
424            || path.contains("::basic_")
425            || path.contains("::queue_")
426            || path.contains("::exchange_")
427            || path.contains("::tx_")
428            || path.ends_with("::confirm_select")
429            || path.ends_with("::close")
430        {
431            return Some("Net");
432        }
433        return None;
434    }
435    // SMTP email — lettre's `Transport::send` is the network dispatch; Message building is
436    // pure. (Found hardening on a lettre consumer: `mailer.send(&email)` classified pure.)
437    if crate_name == "lettre" {
438        if path.ends_with("::send") || path.ends_with("::send_raw") {
439            return Some("Net");
440        }
441        return None;
442    }
443    // WebSockets — tungstenite (the modern successor to the old `websocket` crate). connect
444    // and the socket read/write/send are network; Message constructors are pure. (Found on a
445    // tungstenite consumer: connect + send + read classified pure.)
446    if crate_name == "tungstenite" {
447        if path.ends_with("::connect")
448            || path.ends_with("::read")
449            || path.ends_with("::write")
450            || path.ends_with("::send")
451            || path.ends_with("::close")
452            || path.ends_with("::flush")
453            || path.ends_with("::read_message")
454            || path.ends_with("::write_message")
455        {
456            return Some("Net");
457        }
458        return None;
459    }
460    // elasticsearch: request builders are pure; only the `.send()` dispatch is HTTP I/O
461    // (same shape as reqwest / the AWS SDK). (Found on an elasticsearch consumer.)
462    if crate_name == "elasticsearch" && path.ends_with("::send") {
463        return Some("Net");
464    }
465    // gRPC — tonic. The transport connect and the Grpc client RPC dispatch are network;
466    // codecs and request/response wrappers are pure. (connect repro-confirmed on a consumer;
467    // the unary/streaming RPC verbs are from the tonic::client::Grpc API.)
468    if crate_name == "tonic" {
469        if path.ends_with("::connect")
470            || path.ends_with("::unary")
471            || path.ends_with("::server_streaming")
472            || path.ends_with("::client_streaming")
473            || path.ends_with("::streaming")
474        {
475            return Some("Net");
476        }
477        return None;
478    }
479    // Kafka — rdkafka (FFI to librdkafka). Producer send + consumer poll/recv/subscribe/
480    // commit are network round-trips to the brokers. (API-calibrated + unit-tested; a real
481    // repro needs librdkafka/cmake, deferred.)
482    if crate_name == "rdkafka" {
483        if path.ends_with("::send")
484            || path.ends_with("::send_result")
485            || path.ends_with("::recv")
486            || path.ends_with("::poll")
487            || path.ends_with("::subscribe")
488            || path.ends_with("::commit")
489            || path.ends_with("::commit_message")
490            || path.ends_with("::commit_consumer_state")
491            || path.ends_with("::store_offset")
492            || path.ends_with("::seek")
493            || path.ends_with("::fetch_metadata")
494            || path.ends_with("::fetch_watermarks")
495            || path.ends_with("::flush")
496        {
497            return Some("Net");
498        }
499        return None;
500    }
501    // cap-std: capability-oriented std. I/O goes *through* a held capability handle
502    // (Dir/Pool/Clock/...), so these calls ARE the effect. Recognising them means a
503    // cap-std project's real I/O is detected and matches the capability it declared
504    // (via `declared_caps`/`capstd_cap`) — conformance against unforgeable capabilities.
505    if crate_name.starts_with("cap_") {
506        if path.contains("::net::Unix") || path.contains("::os::") {
507            return Some("Ipc");
508        }
509        if path.contains("::net") {
510            return Some("Net");
511        }
512        if path.contains("::time") {
513            return Some("Clock");
514        }
515        if path.contains("::fs") || crate_name == "cap_tempfile" || crate_name == "cap_directories" {
516            return Some("Fs");
517        }
518        return None;
519    }
520    // Local IPC (Unix-domain sockets) is I/O but not *network* — keep it distinct so
521    // CANDOR_NO_AMBIENT and audits don't conflate it with internet access. async-std puts its
522    // Unix sockets under `os::unix::net` (mirroring std); async-net (smol's net layer) under
523    // `unix`.
524    if path.starts_with("tokio::net::Unix")
525        || path.starts_with("std::os::unix::net")
526        || path.starts_with("async_std::os::unix::net")
527        || path.starts_with("async_net::unix")
528    {
529        return Some("Ipc");
530    }
531    // Raw packet capture / raw sockets — libpnet (the dominant low-level networking crate; powers
532    // bandwhich, sniffers, custom-protocol tools). `datalink::channel` opens an L2 socket and
533    // `transport::transport_channel` an L3/L4 raw socket — both ARE network I/O. Packet construction
534    // (pnet_packet / pnet_base, MacAddr, Ethernet frames…) is pure and stays unclassified. The actual
535    // frame read/write happens via methods on the returned Sender/Receiver (trait-object dispatch the
536    // syntactic backend can't resolve), so the channel-open call is the precise Net boundary. (Found
537    // scanning bandwhich — a packet sniffer — which reported Net 0.)
538    if crate_name == "pnet" || crate_name == "pnet_datalink" || crate_name == "pnet_transport" {
539        if path.ends_with("::channel") || path.ends_with("::transport_channel") {
540            return Some("Net");
541        }
542        return None;
543    }
544    // Directory traversal — `ignore` (BurntSushi's gitignore-aware walker; powers ripgrep, fd). The walk
545    // EXECUTORS read the directory tree from disk = Fs. Type-precise on purpose: the configuration builders
546    // (`OverrideBuilder::build`, `GitignoreBuilder::build`, the `WalkBuilder` setters) and `DirEntry`
547    // accessors are PURE — only `WalkBuilder::build`/`build_parallel` (which kick off the walk) and
548    // `WalkParallel::run` (which drives it) touch the filesystem. A bare `build` would wrongly flag the
549    // config builders. (Found scanning fd — a file finder — which reported Fs 2: its own `fs::read_dir`
550    // was caught, but the `ignore`-based traversal that IS fd was invisible cross-crate.)
551    if crate_name == "ignore" {
552        if path == "ignore::WalkBuilder::build"
553            || path == "ignore::WalkBuilder::build_parallel"
554            || path.ends_with("::WalkParallel::run")
555            // `add_ignore(path)` LOOKS like a config setter but reads that ignore file from disk at call
556            // time (it returns the read error) — unlike the pure `add_custom_ignore_filename(name)` which
557            // only stores a filename string. The lone Fs-touching builder method in the otherwise-pure setter
558            // surface, so it was silently pure under the covered-crate floor.
559            || path == "ignore::WalkBuilder::add_ignore"
560        {
561            return Some("Fs");
562        }
563        return None;
564    }
565    // Filesystem watching — `notify` (the de-facto fs-watch crate: watchexec, cargo-watch, mdbook). A
566    // watcher opens an OS notification handle (inotify / FSEvents / kqueue / ReadDirectoryChanges) and
567    // registers paths — observing filesystem state changes = Fs. The lifecycle boundary: any
568    // `*Watcher::new` constructor (RecommendedWatcher/PollWatcher/INotifyWatcher/FsEventWatcher/…), the
569    // `recommended_watcher` convenience fn, and the `watch`/`unwatch` registration verbs. `Config`/`Event`/
570    // `EventKind` data types stay pure. (Found scanning watchexec: its watcher-`create` read Fs 0.)
571    if crate_name == "notify" {
572        if path.ends_with("Watcher::new")
573            || path.ends_with("::recommended_watcher")
574            || path.ends_with("::watch")
575            || path.ends_with("::unwatch")
576        {
577            return Some("Fs");
578        }
579        return None;
580    }
581    // std DNS resolution — `("host", 80).to_socket_addrs()` / `std::net::lookup_host("host")` perform a
582    // real getaddrinfo query (Net), but the classify table covered only the socket I/O *types*, so they
583    // floored silently (sweep [37]; the syntactic engine modelled DNS only at the libc layer).
584    if path.ends_with("::to_socket_addrs")
585        || path == "std::net::lookup_host"
586        || path.ends_with("ToSocketAddrs::to_socket_addrs")
587    {
588        return Some("Net");
589    }
590    // Raw sockets. Match the I/O *types* only — `std::net` also holds pure data types
591    // (SocketAddr, IpAddr, …) whose construction must NOT be flagged.
592    if path.starts_with("std::net::TcpStream")
593        || path.starts_with("std::net::TcpListener")
594        || path.starts_with("std::net::UdpSocket")
595        || path.starts_with("tokio::net::")
596    {
597        // …but the PURE accessors read back local/option state — no network I/O — so the whole-type Net
598        // rule fabricated Net on them (sweep [24], the cardinal sin; mirrors the arboard/memmap2 accessor
599        // carve-outs). local_addr/peer_addr return bound/connected addresses; nodelay/ttl/take_error read
600        // socket options/state. Every genuine verb (connect/read/write/send/recv/accept) stays Net.
601        if path.ends_with("::local_addr")
602            || path.ends_with("::peer_addr")
603            || path.ends_with("::nodelay")
604            || path.ends_with("::ttl")
605            || path.ends_with("::take_error")
606        {
607            return None;
608        }
609        return Some("Net");
610    }
611    // Legacy tokio 0.1 socket crates — `tokio_tcp`/`tokio_udp` are *entirely* networking
612    // (no pure types to over-flag), so the whole crate is Net. (Found hardening on websocat,
613    // which is still on tokio 0.1: its `tokio_tcp::TcpStream::connect` was classified
614    // network-free — a network tool confidently reporting 0 Net.)
615    if matches!(crate_name, "tokio_tcp" | "tokio_udp") {
616        return Some("Net");
617    }
618    // The other async runtimes mirror tokio's module layout, and their `net` modules hold only
619    // socket I/O types (the pure `SocketAddr`/`IpAddr` are re-exports that resolve to `std::net`,
620    // so they're excluded by def-path). `mio` is the low-level non-blocking-socket layer under
621    // tokio/others; `async_net` is smol's net crate. Closes the async-std/smol/mio gap the
622    // tokio_tcp note flagged. (Calibrated by module structure — these crates ARE networking — not
623    // a live repro; the TCP/UDP types are defined in-crate so the def-path prefix is exact.)
624    if path.starts_with("async_std::net::")
625        || path.starts_with("mio::net::")
626        || crate_name == "async_net"
627    {
628        return Some("Net");
629    }
630    // Database clients. Like the AWS/HTTP builders, only the execution verbs are I/O;
631    // query *construction* is pure. Best-effort across crates (tune via CANDOR_CONFIG).
632    // Note: bare `::query` is deliberately omitted — it executes in postgres/rusqlite but
633    // only *builds* in sqlx, so including it would false-positive sqlx's `query()` builder.
634    if DB_CRATES.contains(&crate_name) {
635        // Postgres / SQLite-family clients: `query`/`batch_execute`/`prepare`/etc. ARE the
636        // execution (round-trips to the server). sqlx is the outlier where bare `query()`
637        // only BUILDS — it keeps the narrow set below. (Found by running on a real
638        // tokio-postgres app, pgman: candor had reported only 4 of ~20 DB call sites.)
639        if matches!(crate_name, "postgres" | "tokio_postgres" | "deadpool_postgres" | "rusqlite") {
640            const PG: [&str; 19] = [
641                "::query", "::query_one", "::query_opt", "::query_raw", "::execute",
642                "::batch_execute", "::simple_query", "::prepare", "::prepare_typed",
643                "::copy_in", "::copy_out", "::transaction", "::connect",
644                // rusqlite's dialect of the same verbs (a verb-probe found the CANONICAL rusqlite
645                // consumer API classifying pure): `query_row` is the one-row read, `query_map`/
646                // `query_and_then` the many-row reads, `execute_batch` is rusqlite's name for
647                // batch_execute, `prepare_cached` round-trips like prepare. `query_typed` is
648                // tokio_postgres 0.7.10+.
649                "::query_row", "::query_map", "::query_and_then", "::execute_batch",
650                "::prepare_cached", "::query_typed",
651            ];
652            if PG.iter().any(|v| path.ends_with(v)) {
653                return Some("Db");
654            }
655            // rusqlite only: opening the database IS the connection establishment (`Connection::
656            // open`/`open_in_memory`/`open_with_flags` — the embedded analog of `::connect`).
657            if crate_name == "rusqlite"
658                && (path.ends_with("::open")
659                    || path.ends_with("::open_in_memory")
660                    || path.ends_with("::open_with_flags"))
661            {
662                return Some("Db");
663            }
664            return None;
665        }
666        // redis: the way redis is ACTUALLY used is the high-level `Commands`/`AsyncCommands`
667        // traits (`con.get`/`set`/`hset`/`lpush`/…) — every method is a round-trip — plus
668        // connection establishment. The shared VERBS below only catch the low-level
669        // `cmd("GET").query(con)`, so without this a normal redis user's calls classify as
670        // PURE. (Found hardening on redis-rs: a fn doing `con.get`/`set` reported no effects.)
671        if crate_name == "redis"
672            && (path.contains("Commands::")
673                || path.contains("::get_connection")
674                || path.contains("::get_async_connection")
675                || path.contains("::get_multiplexed_async_connection")
676                // a live `ConnectionManager` round-trips (Db), but `ConnectionManagerConfig` is a pure
677                // in-memory builder (set_number_of_retries/set_max_delay) — exclude it (adversarial review).
678                // `ConnectionManager::clone` is an Arc refcount bump — no Db round-trip (sweep [27]).
679                || (path.contains("ConnectionManager") && !path.contains("ConnectionManagerConfig")
680                    && !path.ends_with("::clone"))
681                || path.ends_with("::query")
682                || path.ends_with("::query_async")
683                || path.ends_with("::req_command")
684                || path.ends_with("::req_packed_command")
685                || path.ends_with("::req_packed_commands"))
686        {
687            return Some("Db");
688        }
689        // mongodb: a document-store API with none of the SQL verbs — the user calls
690        // `coll.find_one`/`insert_one`/`aggregate`/… and `Client::with_uri_str`. Without
691        // these a mongodb user's calls classify PURE. (Found hardening: a fn doing
692        // `find_one`+`insert_one` reported no effects.) Handle accessors (name/namespace)
693        // and option/doc builders don't match these verbs, so they stay pure.
694        if crate_name == "mongodb" {
695            const MONGO: [&str; 27] = [
696                "::with_uri_str", "::connect", "::find", "::find_one", "::insert_one",
697                "::insert_many", "::update_one", "::update_many", "::delete_one",
698                "::delete_many", "::replace_one", "::aggregate", "::count_documents",
699                "::estimated_document_count", "::count", "::distinct", "::run_command",
700                "::find_one_and_update", "::find_one_and_delete", "::find_one_and_replace",
701                "::list_collections", "::list_collection_names", "::list_databases",
702                "::list_database_names", "::create_collection", "::create_index", "::watch",
703            ];
704            if MONGO.iter().any(|v| path.ends_with(v)) {
705                return Some("Db");
706            }
707            return None;
708        }
709        // mysql / mysql_async: the `query`/`exec` families + `get_conn`/`ping` execute
710        // immediately — no build-then-execute split like sqlx, so matching `::query` is safe
711        // here. Same DB-verb-dialect gap class as redis/mongodb; calibrated from the Queryable
712        // API (unit-tested; a real-app repro is the remaining confirmation).
713        if matches!(crate_name, "mysql" | "mysql_async") {
714            const MY: [&str; 16] = [
715                "::query", "::query_first", "::query_iter", "::query_map", "::query_fold",
716                "::query_drop", "::exec", "::exec_first", "::exec_iter", "::exec_map",
717                "::exec_fold", "::exec_drop", "::exec_batch", "::prep", "::ping", "::get_conn",
718            ];
719            if MY.iter().any(|v| path.ends_with(v)) {
720                return Some("Db");
721            }
722            return None;
723        }
724        // sea_orm: an ORM whose execution is split from building (like sqlx). The query
725        // BUILDERS (`Entity::find`, `Entity::insert`) are pure; execution happens at `.all`/
726        // `.one`/`.count`/`.stream` and `Insert/Update/Delete::exec`. The write path via an
727        // ActiveModel (`model.insert(db)`) executes too — distinguished from the `EntityTrait`
728        // builder by the trait in the path (`ActiveModelTrait::`). (Found hardening on a
729        // sea_orm consumer app: `.all(db)` reads and `ActiveModel::insert` writes were pure.)
730        if crate_name == "sea_orm" {
731            // sea_orm RE-EXPORTS sea_query (`sea_orm::sea_query::…`), whose builder algebra collides with
732            // the execution verbs: `Func::count(col)` builds a COUNT() expr, `Condition::all()` AND-groups
733            // filters, `Expr::count(…)` — all PURE, none touch a db. The `::all`/`::count`/`::one` execution
734            // rule fabricated Db on them (sweep [5]). sea_query is pure query construction end-to-end, so
735            // exclude the whole re-exported namespace first.
736            if path.contains("sea_query") {
737                return None;
738            }
739            if path.ends_with("::all")
740                || path.ends_with("::one")
741                || path.ends_with("::count")
742                || path.ends_with("::stream")
743                || path.ends_with("::exec")
744                || path.ends_with("::exec_with_returning")
745                || path.ends_with("::exec_without_returning")
746                || path.ends_with("::connect")
747                || path.ends_with("::execute")
748                || path.ends_with("::execute_unprepared")
749                || path.ends_with("::query_one")
750                || path.ends_with("::query_all")
751                || path.ends_with("::fetch_page")
752                || path.ends_with("::num_items")
753                || path.contains("ActiveModelTrait::")
754            {
755                return Some("Db");
756            }
757            return None;
758        }
759        // (Reached by sqlx + diesel — the build-vs-execute-split crates.) `first` is diesel's
760        // LIMIT-1 round trip and `load_iter` its 2.x streaming execution; `fetch_many` is sqlx's
761        // multi-result stream. All crate-gated, so a std `Vec::first` never resolves here.
762        const VERBS: [&str; 19] = [
763            "::execute", "::query_row", "::query_map", "::query_one", "::fetch_one",
764            "::fetch_all", "::fetch_optional", "::fetch", "::fetch_many", "::connect",
765            "::acquire", "::begin", "::commit", "::rollback", "::load", "::load_iter",
766            "::first", "::get_result", "::get_results",
767        ];
768        if VERBS.iter().any(|v| path.ends_with(v)) {
769            return Some("Db");
770        }
771        return None;
772    }
773    // std::path::Path / PathBuf STAT-family methods hit the filesystem (each is a stat/readlink/
774    // readdir syscall) — unlike the rest of the std::path surface, which is pure string manipulation
775    // (join/file_name/extension/parent/…). Verb-precise so the scanner's receiver inference can safely
776    // route a `path.symlink_metadata()` method call here. (A blackout screen caught gix-dir — an entire
777    // directory WALKER — reporting ZERO Fs because all its I/O is Path-method calls; same class as
778    // fd's residual `Path::symlink_metadata` under-report.)
779    if let Some(m) = path
780        .strip_prefix("std::path::Path::")
781        .or_else(|| path.strip_prefix("std::path::PathBuf::"))
782    {
783        const STAT: &[&str] = &[
784            "metadata", "symlink_metadata", "canonicalize", "read_link", "read_dir", "exists",
785            "try_exists", "is_file", "is_dir", "is_symlink",
786        ];
787        return STAT.contains(&m).then_some("Fs");
788    }
789    // Filesystem. `tokio::fs`/`async_std::fs` are the async mirrors of `std::fs`; `async_fs` is
790    // smol's fs crate; `fs_err` is a drop-in `std::fs` wrapper (its whole surface is fs I/O).
791    if path.starts_with("std::fs::")
792        || path.starts_with("tokio::fs::")
793        || path.starts_with("async_std::fs::")
794        || crate_name == "async_fs"
795        || crate_name == "fs_err"
796    {
797        return Some("Fs");
798    }
799    // memmap2: only `MmapOptions::map*` (and the in-place `Mmap::flush`/`make_*` protection
800    // changes / `remap`) actually issue the mmap/msync/mprotect/mremap syscall = Fs. The rest of the
801    // crate is PURE: `MmapOptions::new`/setters BUILD the request, and once a region is mapped, reads
802    // over it (`Mmap::len`/`is_empty`/`as_ptr`/`as_mut_ptr`/`deref` into the byte slice) are plain
803    // memory access with no syscall. Whole-crate Fs fabricated Fs on those reads (a `m.len()` the
804    // scanner's receiver inference routes to `memmap2::Mmap::len`). Match the syscall-issuing verbs;
805    // everything else returns None (pure). `map*` covers `map`/`map_mut`/`map_exec`/`map_copy`/
806    // `map_copy_read_only`/`map_raw`/`map_raw_read_only`/`map_anon`.
807    if crate_name == "memmap2" {
808        let m = path.rsplit("::").next().unwrap_or(path);
809        if m.starts_with("map")
810            || m == "flush"
811            || m == "flush_async"
812            || m == "flush_range"
813            || m == "flush_async_range"
814            || m == "remap"
815            || m.starts_with("make_")
816            || m == "advise"
817            || m == "advise_range"
818            || m == "lock"
819            || m == "unlock"
820        {
821            return Some("Fs");
822        }
823        return None;
824    }
825    // tempfile: creating a temp file/dir touches the disk. Match the create/persist verbs (the
826    // `Builder` setters — prefix/suffix/rand_bytes — stay pure). `persist`/`keep` rename/retain
827    // the file on disk; `close` removes it.
828    if crate_name == "tempfile"
829        && (path.ends_with("::tempfile")
830            || path.ends_with("::tempfile_in")
831            || path.ends_with("::tempdir")
832            || path.ends_with("::tempdir_in")
833            || path.ends_with("NamedTempFile::new")
834            || path.ends_with("NamedTempFile::new_in")
835            || path.ends_with("TempDir::new")
836            || path.ends_with("TempDir::new_in")
837            || path.ends_with("::persist")
838            || path.ends_with("::persist_noclobber")
839            || path.ends_with("::keep"))
840    {
841        return Some("Fs");
842    }
843    // glob: walks the filesystem to expand a pattern (the returned iterator reads directories).
844    // `Pattern::matches` is pure string matching — match only the directory-walking entry points.
845    if crate_name == "glob" && (path.ends_with("::glob") || path.ends_with("::glob_with")) {
846        return Some("Fs");
847    }
848    // Password-hashing / KDF crates — the entropy tier (the TS engine's CTA lesson: an invisible
849    // argon2 landed on exactly the call a security review cares about). In this engine's
850    // verb-precise style the ENTROPY is the salt mint: `SaltString::generate(OsRng)` in the
851    // password-hash API family, and bcrypt's `hash`/`hash_with_result` (salt minted internally).
852    // Verification and explicit-salt hashing are deterministic recomputation — pure. `rand_core`
853    // carries the OsRng source itself (otherwise the most common salt mint is invisible).
854    if matches!(crate_name, "argon2" | "scrypt" | "pbkdf2" | "password_hash") {
855        if path.contains("SaltString::generate") {
856            return Some("Rand");
857        }
858        return None;
859    }
860    if crate_name == "bcrypt" {
861        if path.ends_with("::hash") || path.ends_with("::hash_with_result") {
862            return Some("Rand");
863        }
864        return None;
865    }
866    if crate_name == "rand_core" {
867        if path.contains("OsRng")
868            || path.ends_with("::next_u32")
869            || path.ends_with("::next_u64")
870            || path.ends_with("::fill_bytes")
871        {
872            return Some("Rand");
873        }
874        return None;
875    }
876    // Randomness / entropy. `getrandom`/`fastrand` are effectful end-to-end. `rand` is NOT — it
877    // mixes entropy/generation (effectful) with *pure* distribution constructors (`Uniform::new`,
878    // `Normal::new`) and deterministic-seed constructors (`seed_from_u64`). Flagging the whole crate
879    // over-reported those as `Rand`; match only the calls that actually consume randomness — the
880    // entropy sources (`OsRng`, `thread_rng`/`rng`, `from_entropy`/`from_os_rng`) and the generation
881    // verbs (`gen*`/`random*`/`fill*`/`sample*`/`next_u*`). A `Uniform::new` is now correctly pure.
882    if crate_name == "getrandom" {
883        return Some("Rand");
884    }
885    // fastrand: like `rand`, it mixes entropy-consuming generation (effectful) with PURE deterministic
886    // pieces. `Rng::with_seed(42)` is a DETERMINISTIC seeded constructor (consumes no entropy — the same
887    // seed gives the same stream), and `Rng::fork`/`Rng::clone` just split/copy existing state. Those are
888    // PURE; whole-crate Rand fabricated Rand on them. The effect is the value-drawing methods (`u32`/
889    // `usize`/`bool`/`f64`/`char`/`alphanumeric`/`choice`/`choose_multiple`/`shuffle`/`fill`/the range
890    // forms) AND the entropy-seeded entry points: bare `Rng::new()` (seeds from the global entropy-backed
891    // generator), `fastrand::seed`, and the top-level `fastrand::u32(..)` free functions (which draw from
892    // the thread-local generator). `with_seed` is exempted explicitly; any other method on an `Rng`
893    // (i.e. a value draw) is Rand.
894    if crate_name == "fastrand" {
895        let m = path.rsplit("::").next().unwrap_or(path);
896        // Provably pure: deterministic seeded ctor + state split/copy.
897        if m == "with_seed" || m == "fork" || m == "clone" {
898            return None;
899        }
900        // Everything else fastrand exposes either draws a value or seeds from entropy → Rand. (The crate
901        // has no pure data types beyond the `Rng` handle itself, so a non-draw stray would have to be a
902        // method we don't recognise — keep the effect, the safe direction.)
903        return Some("Rand");
904    }
905    if crate_name == "rand" {
906        let rng_verb = path.ends_with("::gen")
907            || path.ends_with("::gen_range")
908            || path.ends_with("::gen_bool")
909            || path.ends_with("::gen_ratio")
910            || path.ends_with("::random")
911            || path.ends_with("::random_range")
912            || path.ends_with("::random_bool")
913            || path.ends_with("::random_ratio")
914            || path.ends_with("::random_iter") // rand 0.9 iterator generator
915            || path.ends_with("::gen_iter")
916            || path.ends_with("::fill")
917            || path.ends_with("::fill_bytes")
918            || path.ends_with("::try_fill")
919            || path.ends_with("::try_fill_bytes")
920            || path.ends_with("::sample")
921            || path.ends_with("::sample_iter")
922            || path.ends_with("::next_u32")
923            || path.ends_with("::next_u64")
924            || path.ends_with("::thread_rng")
925            || path.ends_with("::rng")
926            || path.ends_with("::from_entropy")
927            || path.ends_with("::from_os_rng");
928        // `OsRng` is the OS entropy SOURCE, but `clone`/`fork`/`default` just copy or construct the
929        // (zero-sized) handle and draw no entropy — pure, exactly like the `fastrand` arm's clone/fork
930        // exemption above. The actual draws (`fill_bytes`/`next_u*`/…) are caught by `rng_verb`. Without
931        // this exemption the blanket `contains("OsRng")` fabricated `Rand` on `OsRng::clone` (adversarial
932        // review: OsRng is a unit struct, cloning consumes nothing).
933        let m = path.rsplit("::").next().unwrap_or(path);
934        let os_rng = path.contains("OsRng") && !matches!(m, "clone" | "fork" | "default");
935        if rng_verb || os_rng {
936            return Some("Rand");
937        }
938        return None;
939    }
940    // Subprocess spawning. `tokio::process` is the async mirror of `std::process` — it exists
941    // only to spawn/control subprocesses (`Command`/`Child`, no pure data types like std's
942    // `Stdio`/`ExitStatus`/`exit`), so spawning through it is Exec just the same. Without this an
943    // async app's `tokio::process::Command::new(..).spawn()` classified pure — a silent under-report
944    // of subprocess execution, the dangerous direction (mirrors the tokio::fs/tokio::net coverage).
945    if path.starts_with("std::process::Command")
946        || path.starts_with("std::process::Child")
947        || path.starts_with("tokio::process::Command")
948        || path.starts_with("tokio::process::Child")
949        || path.starts_with("async_std::process::Command")
950        || path.starts_with("async_std::process::Child")
951    {
952        // PURE read-backs of the builder's stored fields / the cached pid — no spawn, no syscall — so the
953        // whole-type Exec rule fabricated Exec on them (sweep [23]; mirrors the portable_pty getter carve-
954        // out just below). get_program/get_args/get_envs/get_current_dir read the Command; Child::id reads
955        // the cached pid. Every genuine verb (new/spawn/output/status/wait/kill) stays Exec.
956        if path.ends_with("::get_program")
957            || path.ends_with("::get_args")
958            || path.ends_with("::get_envs")
959            || path.ends_with("::get_current_dir")
960            || path.ends_with("Child::id")
961        {
962            return None;
963        }
964        return Some("Exec");
965    }
966    // portable_pty / async_process are whole-crate Exec EXCEPT for the proven-pure surface they expose:
967    // the `CommandBuilder` GETTERS (`get_argv`/`get_cwd`/`get_env`/`as_unix_command_line`…) read back
968    // configuration, and the PURE DATA types (`PtySize::default`, `ExitStatus`/`Stdio`/`CommandBuilder`
969    // construction/setters). The earlier `is_cmd_naming_method` fix stopped the head-refinement LEAK, but
970    // the BASE Exec still fabricated on these accessors (a `cmd.get_cwd()` the scanner routes to
971    // `portable_pty::CommandBuilder::get_cwd`). Subtract the read-back getters and the obvious pure
972    // ctors/setters; the spawn/wait/exec surface (`spawn_command`/`openpty`/`wait`/`kill`/`exec`…) keeps
973    // Exec. SUBTRACT only what is provably pure — when unrecognised, KEEP Exec (the safe direction).
974    if crate_name == "async_process" || crate_name == "portable_pty" {
975        let m = path.rsplit("::").next().unwrap_or(path);
976        // configuration read-back getters — pure (no spawn).
977        if m.starts_with("get_") || m == "as_unix_command_line" {
978            return None;
979        }
980        // pure data-type ctors/setters/derives that NAME no program and spawn nothing.
981        if matches!(
982            m,
983            "default" | "new" | "piped" | "null" | "inherit" | "from_raw_fd"
984                | "arg" | "args" | "arg0" | "env" | "envs" | "env_clear" | "env_remove"
985                | "cwd" | "current_dir" | "rows" | "cols"
986                | "clone" | "fmt" | "eq" | "ne" | "hash"
987        ) {
988            return None;
989        }
990        return Some("Exec");
991    }
992    // duct: a subprocess-orchestration crate. `cmd()`/`cmd!` only *build* an Expression; the
993    // spawn/wait happens at `run`/`read`/`start`. Match the execution verbs, not the builder.
994    if crate_name == "duct"
995        && (path.ends_with("::run")
996            || path.ends_with("::read")
997            || path.ends_with("::start")
998            || path.ends_with("::read_chars"))
999    {
1000        return Some("Exec");
1001    }
1002    if path.starts_with("std::env::") {
1003        return Some("Env");
1004    }
1005    // dotenvy / dotenv: load environment variables (reading a `.env` file and mutating the process
1006    // environment). Match the load/read entry points; `Error`/builder types stay pure.
1007    if matches!(crate_name, "dotenvy" | "dotenv")
1008        && (path.ends_with("::dotenv")
1009            || path.ends_with("::dotenv_override")
1010            || path.ends_with("::from_path")
1011            || path.ends_with("::from_path_override")
1012            || path.ends_with("::from_filename")
1013            || path.ends_with("::from_filename_override")
1014            || path.ends_with("::from_read")
1015            || path.ends_with("::from_read_override")
1016            || path.ends_with("::load")
1017            || path.ends_with("::var")
1018            || path.ends_with("::vars"))
1019    {
1020        return Some("Env");
1021    }
1022    // Wall-clock reads. Match the `now` accessor precisely (ends_with), not any path
1023    // containing the substring "now". The `time` crate (distinct from `std::time`/`chrono`)
1024    // reads the clock via `now_utc`/`now_local` (and the deprecated `Instant::now`).
1025    if (crate_name == "chrono" || path.starts_with("std::time::")) && path.ends_with("::now") {
1026        return Some("Clock");
1027    }
1028    if crate_name == "time"
1029        && (path.ends_with("::now_utc") || path.ends_with("::now_local") || path.ends_with("::now"))
1030    {
1031        return Some("Clock");
1032    }
1033    // `tracing`: same principle as the `log` facade below — the crate's TYPES are pure data, so match
1034    // the emit, not the whole crate. The actual program output is the macro-expanded
1035    // `Subscriber::event`/`event!`/`Span::*enter*` dispatch and the `Span::new*`/`Span::record`
1036    // recording path that drives the subscriber. The data-type accessors — `Level::as_str`,
1037    // `Span::is_disabled`/`metadata`/`id`, and constructing/reading `Level`/`LevelFilter`/`Span`/
1038    // `Event`/`Metadata`/`Field`/`FieldSet`/`Id` — are PURE (no output is produced), so whole-crate Log
1039    // fabricated Log on them. Match the emit verbs; everything else returns None.
1040    if crate_name == "tracing" {
1041        let m = path.rsplit("::").next().unwrap_or(path);
1042        // The user-facing emit MACROS (`tracing::info!`/`warn!`/…) — candor-scan is pre-expansion, so it
1043        // sees the raw macro path `tracing::info`, not the expanded `__tracing`/`Subscriber::event` the
1044        // deep (post-expansion) engine sees. Only the macro names; the pure DATA types (Level/Span/Event)
1045        // have other tails and stay None.
1046        if m == "trace" || m == "debug" || m == "info" || m == "warn" || m == "error"
1047            || m == "trace_span" || m == "debug_span" || m == "info_span" || m == "warn_span"
1048            || m == "error_span" || m == "span"
1049            || m == "event"
1050            || m == "new_span"
1051            || m == "record"
1052            || m == "record_follows_from"
1053            || m == "enter"
1054            || m == "exit"
1055            || m == "in_scope"
1056            || m == "entered"
1057            || path.contains("::__macro_support")
1058            || path.contains("::__tracing")
1059            || path.contains("Subscriber::event")
1060            || path.contains("Subscriber::new_span")
1061            || path.contains("Subscriber::enter")
1062            || path.contains("Subscriber::exit")
1063        {
1064            return Some("Log");
1065        }
1066        return None;
1067    }
1068    // The `log` facade: its macros route through `log::__private_api`; the crate's types
1069    // (`Level`, `LevelFilter`) are pure, so match the logging entry, not the whole crate.
1070    if crate_name == "log" {
1071        // Expanded macro form (deep engine) OR the raw user-facing macro names (candor-scan, pre-expansion).
1072        // `log::Level`/`LevelFilter`/`Record`/`Metadata` have other tails, so the type surface stays pure.
1073        let m = path.rsplit("::").next().unwrap_or(path);
1074        if path.contains("::__private_api")
1075            || m == "error" || m == "warn" || m == "info" || m == "debug" || m == "trace" || m == "log"
1076        {
1077            return Some("Log");
1078        }
1079    }
1080    // Compiler diagnostic emission — the ONE genuinely effectful operation in the otherwise-pure
1081    // rustc_* surface (a dylint lint's actual OUTPUT: it writes warnings/errors to the compiler's
1082    // diagnostic sink). Classified `Log` (same family as `tracing`/`log` — program output). Match the
1083    // emission verbs precisely; rustc_lint/rustc_errors are mostly pure types (Lint, LintId, the Diag
1084    // BUILDERS), and only the terminal `emit`/`emit_span_lint` actually produces output.
1085    if crate_name == "rustc_lint"
1086        && (path.ends_with("::emit_span_lint")
1087            || path.ends_with("::span_lint")
1088            || path.ends_with("::span_lint_hir"))
1089    {
1090        return Some("Log");
1091    }
1092    if crate_name == "rustc_errors"
1093        && (path.ends_with("::emit")
1094            || path.ends_with("::emit_diagnostic")
1095            || path.ends_with("::emit_now"))
1096    {
1097        return Some("Log");
1098    }
1099    // arboard: the effectful surface is the `Clipboard` handle's read/write verbs (each talks to the
1100    // OS clipboard / X11/Wayland/Win32/NSPasteboard server). The data types — chiefly `arboard::Error`
1101    // (whose `Display`/`to_string` formatting is pure) and the `ImageData`/`GetExtLinux`/`SetExtLinux`
1102    // option types — are PURE, so whole-crate Clipboard fabricated Clipboard on e.g. an error
1103    // `to_string()`. Match the handle verbs; everything else returns None. `Clipboard::new` opens the
1104    // connection to the clipboard server, so it's an effect too; `get`/`set` return the
1105    // builder-then-read `Get`/`Set` cursors whose `text`/`image`/`html` terminals do the I/O.
1106    if crate_name == "arboard" {
1107        let m = path.rsplit("::").next().unwrap_or(path);
1108        if m == "new"
1109            || m == "get"
1110            || m == "set"
1111            || m == "clear"
1112            || m == "get_text"
1113            || m == "set_text"
1114            || m == "set_html"
1115            || m == "get_image"
1116            || m == "set_image"
1117            || m == "text"
1118            || m == "image"
1119            || m == "html"
1120        {
1121            return Some("Clipboard");
1122        }
1123        return None;
1124    }
1125    None
1126}
1127
1128pub fn cap_from_name(name: &str) -> Option<&'static str> {
1129    EFFECTS.iter().copied().find(|e| *e == name)
1130}
1131
1132/// Refine the `Exec` cliff (spec §4 ⟨0.5⟩): the effects a *literal, statically-known* subprocess
1133/// head implies, matched by basename (`/usr/bin/curl` → `curl`). The head's effects are ADDED to a
1134/// caller that already carries `Exec` (a subprocess is still spawned — `Exec` is never dropped); an
1135/// unrecognised or dynamically-built head returns `&[]` and keeps the bare cliff (never guess). A
1136/// **candor engine** reads `Fs`/`Env` only — spec §7 item 12 (the analyzer self-boundary) guarantees
1137/// that, so that case is spec-supplied, not curation. The rest is a small curated table under the
1138/// same under-report rule as the crate classifier. INVARIANT: every head here is an external tool
1139/// that does NOT run the analysed project's own code (so `make`/`npm`/`cargo` are deliberately
1140/// absent — they stay the cliff). The reference engines share this table so the `Exec` boundary —
1141/// the one boundary every engine hits — refines identically (the §4-consistency argument).
1142pub fn classify_command_head(cmd: &str) -> &'static [&'static str] {
1143    // Only UNAMBIGUOUS single-effect tools belong here. A multi-modal head (`git status` is local,
1144    // `git push` is Net; `rsync` local-vs-remote) would FABRICATE the effect for its common case —
1145    // the under-report rule forbids it, so such heads keep the bare cliff.
1146    match cmd.rsplit(['/', '\\']).next().unwrap_or(cmd) {
1147        "curl" | "wget" | "http" | "ssh" | "scp" | "sftp" | "ftp" | "telnet" => &["Net"],
1148        "psql" | "mysql" | "sqlite3" | "mongosh" | "mongo" | "redis-cli" | "cqlsh" | "influx" => &["Db"],
1149        // candor engines — Fs/Env only, guaranteed by spec §7 item 12 (the analyzer self-boundary)
1150        "candor" | "candor-run.sh" | "candor-scan" | "candor-query" | "candor-java"
1151        | "candor-classify" | "candor-report" | "cargo-candor" => &["Env", "Fs"],
1152        _ => &[],
1153    }
1154}
1155
1156/// Whether a subprocess-builder method only MODIFIES the command (`.arg`, `.env`, `.current_dir`)
1157/// rather than NAMING the program (`Command::new`, `duct::cmd`). A WHOLE-CRATE-Exec crate
1158/// (`portable_pty`, `duct`, `async_process`) classifies *every* method as `Exec`, so the
1159/// head-refinement must skip these: an arg or env-var-name literal that happened to match a head
1160/// (`.env("psql", …)`, `.arg("curl")`) would FABRICATE that effect — the §1 under-report rule. The
1161/// method is the call path's last segment.
1162pub fn is_cmd_builder_method(method: &str) -> bool {
1163    matches!(
1164        method,
1165        "arg" | "args" | "arg0" | "env" | "envs" | "env_clear" | "env_remove" | "current_dir"
1166            | "cwd" | "stdin" | "stdout" | "stderr" | "pre_exec" | "creation_flags" | "uid" | "gid"
1167            | "groups" | "process_group"
1168    )
1169}
1170
1171/// Whether a subprocess method NAMES the program (so its first string literal IS the command head to
1172/// refine): `Command::new("curl")`, `duct::cmd("curl", …)`. The head-refinement must fire ONLY here —
1173/// an ALLOWLIST, not "any method except known modifiers". A whole-crate-Exec crate classifies EVERY
1174/// method as `Exec`, so a denylist leaked NON-naming methods that aren't modifiers — a getter like
1175/// `CommandBuilder::get_env("psql")` (reading back an env-var KEY, not a program) fed `"psql"` to the
1176/// head classifier and FABRICATED `Db` (review find). Only `new`/`cmd` name a program; everything else
1177/// (modifiers, getters `get_*`, custom builder methods) keeps the bare `Exec` cliff — under-refine
1178/// (safe) rather than fabricate. `std::process::Command` is verb-precise so getters never fire `Exec`
1179/// there anyway; the allowlist makes the whole-crate-Exec crates safe too.
1180pub fn is_cmd_naming_method(method: &str) -> bool {
1181    matches!(method, "new" | "cmd")
1182}
1183
1184/// The masking guard (AS-EFF-008): a Net call whose method takes the HOST/URL as an argument is
1185/// "establishing" — a classified Net call here with no captured host literal leaves the endpoint
1186/// structurally INVISIBLE (a runtime-built host), so the surface is incomplete and the gate must fail
1187/// closed (else a benign sibling literal masks the runtime endpoint). An ALLOWLIST of connection-
1188/// establishing verbs — the SAFE direction: a USE-verb on an already-connected socket
1189/// (`stream.write`/`read`/`flush`, `socket.send`/`recv`) is NOT here, so a missing literal there (the
1190/// host was fixed at `connect`) never false-positives. Under-catching an unusual establishing verb is a
1191/// missed mask (sound-with-disclosure), never a broken gate. The arg is the method (path's last segment).
1192pub fn is_net_establishing(method: &str) -> bool {
1193    matches!(
1194        method,
1195        "connect"
1196            | "connect_timeout"
1197            | "get"
1198            | "post"
1199            | "put"
1200            | "patch"
1201            | "delete"
1202            | "head"
1203            | "request"
1204            | "send_to"
1205            | "lookup_host"
1206            | "to_socket_addrs"
1207    )
1208}
1209
1210/// Map a cap-std capability *type* to the effect it authorises. Holding one of these
1211/// (e.g. `&Dir`) is the real, unforgeable right to perform that effect — so candor
1212/// treats it as a declared capability, exactly like its own `&Fs` token.
1213pub fn capstd_cap(crate_name: &str, type_name: &str) -> Option<&'static str> {
1214    if !crate_name.starts_with("cap_") {
1215        return None;
1216    }
1217    Some(match type_name {
1218        "Dir" => "Fs",
1219        "TcpListener" | "TcpStream" | "UdpSocket" | "Pool" => "Net",
1220        "UnixListener" | "UnixStream" | "UnixDatagram" => "Ipc",
1221        "SystemClock" | "MonotonicClock" => "Clock",
1222        _ => return None,
1223    })
1224}
1225
1226/// Table names a SQL string literal STATICALLY reaches — the `Db` analog of the `Net` host /
1227/// `Exec` command / `Fs` path literal surface (feeds `allow Db in <scope> <table>…`, AS-EFF-008).
1228/// Conservative by construction, because a wrong capture here would FABRICATE: the string must
1229/// open with a SQL statement keyword, and only identifiers in table position are taken —
1230/// `FROM`/`JOIN` anywhere, `INTO` anywhere, statement-leading `UPDATE`/`TRUNCATE`, and
1231/// `TABLE` (create/drop/alter), skipping `ONLY`/`IF NOT EXISTS`. `UPDATE` mid-statement is
1232/// deliberately ignored (`FOR UPDATE SKIP LOCKED` must not yield a table "skip"). A
1233/// dynamically-built query yields nothing — the gate's opaque case — never a guess.
1234/// Output is lower-cased, quote/backtick-stripped, `schema.table` kept qualified, deduped.
1235/// SPEC §2 pins this algorithm token-for-token across engines; the cross-impl vector battery
1236/// (candor-spec conformance/tables/vectors.json, run.sh Part 4b) enforces the JVM/TS mirrors.
1237pub fn tables_in_sql(sql: &str) -> Vec<String> {
1238    const STMT: &[&str] =
1239        &["select", "insert", "update", "delete", "create", "drop", "alter", "truncate", "merge", "replace", "with"];
1240    // Tokens that can FOLLOW a table-introducing keyword without being a table.
1241    const SKIP: &[&str] = &["only", "if", "not", "exists", "table"];
1242    // Identifier-position tokens that are grammar, not a table (subqueries, locking clauses…).
1243    const STOP: &[&str] = &[
1244        "select", "set", "where", "values", "on", "using", "group", "order", "by", "limit",
1245        "returning", "as", "inner", "outer", "left", "right", "cross", "lateral", "natural",
1246        "union", "all", "distinct", "case", "when", "null", "default", "skip", "nowait", "of",
1247        "from", "join", "into", "update", "delete", "insert",
1248    ];
1249    // `,` survives as its OWN token (not a space): it's what lets `FROM t1, t2` continue the table
1250    // list without fabricating from other comma-ridden positions (column lists, ON clauses).
1251    let cleaned: String = sql
1252        .to_lowercase()
1253        .chars()
1254        .flat_map(|c| match c {
1255            '(' | ')' | ';' => vec![' '],
1256            ',' => vec![' ', ',', ' '],
1257            _ => vec![c],
1258        })
1259        .collect();
1260    let toks: Vec<&str> = cleaned.split_whitespace().collect();
1261    let Some(first) = toks.first() else { return Vec::new() };
1262    if !STMT.contains(first) {
1263        return Vec::new(); // not SQL — nothing to certify, nothing fabricated
1264    }
1265    let ident = |t: &str| -> Option<String> {
1266        let t = t.trim_matches(|c| matches!(c, '"' | '`' | '\''));
1267        let mut chars = t.chars();
1268        let ok_first = chars.next().is_some_and(|c| c.is_ascii_alphabetic() || c == '_');
1269        let ok_rest = t.chars().all(|c| c.is_ascii_alphanumeric() || matches!(c, '_' | '.' | '$' | '"' | '`'));
1270        (ok_first && ok_rest && !STOP.contains(&t)).then(|| t.replace(['"', '`'], ""))
1271    };
1272    let mut out: Vec<String> = Vec::new();
1273    let mut push = |t: Option<String>| {
1274        if let Some(t) = t {
1275            if !out.contains(&t) {
1276                out.push(t);
1277            }
1278        }
1279    };
1280    for (i, tok) in toks.iter().enumerate() {
1281        let table_pos = match *tok {
1282            "from" | "join" | "into" | "table" => true,
1283            // statement-leading only (see doc comment): `update t set …`, `truncate [table] t`.
1284            "update" | "truncate" => i == 0,
1285            _ => false,
1286        };
1287        if !table_pos {
1288            continue;
1289        }
1290        let mut j = i + 1;
1291        while j < toks.len() && SKIP.contains(&toks[j]) {
1292            j += 1;
1293        }
1294        let Some(next) = toks.get(j) else { continue };
1295        let Some(first) = ident(next) else { continue };
1296        push(Some(first));
1297        // Comma-ADJACENT continuation only: `FROM t1, t2, t3` takes all three, while an alias breaks
1298        // the chain (`FROM t1 a, t2` keeps just t1 — an under-report, never a guess: skipping an
1299        // alias to chase the comma would fabricate tables out of `INSERT INTO t (a, b)`'s column
1300        // list, whose parens are spaces by the time we tokenize).
1301        while j + 2 < toks.len() && toks[j + 1] == "," {
1302            let Some(more) = ident(toks[j + 2]) else { break };
1303            push(Some(more));
1304            j += 2;
1305        }
1306    }
1307    out
1308}
1309
1310#[cfg(test)]
1311mod tests {
1312    #[test]
1313    fn sql_table_extraction_is_conservative() {
1314        use super::tables_in_sql as t;
1315        assert_eq!(t("SELECT id FROM users WHERE x = 1"), vec!["users"]);
1316        assert_eq!(t("select * from ledger.entries e join customers c on c.id = e.cid"),
1317                   vec!["ledger.entries", "customers"]);
1318        assert_eq!(t("INSERT INTO audit_log (a) VALUES (?1)"), vec!["audit_log"]);
1319        assert_eq!(t("UPDATE accounts SET v = ?"), vec!["accounts"]);
1320        assert_eq!(t("DELETE FROM sessions WHERE id = ?"), vec!["sessions"]);
1321        assert_eq!(t("CREATE TABLE IF NOT EXISTS cache (k TEXT)"), vec!["cache"]);
1322        assert_eq!(t("TRUNCATE TABLE staging"), vec!["staging"]);
1323        // FOR UPDATE locking clause must not yield a phantom table (mid-statement update ignored)
1324        assert_eq!(t("SELECT * FROM jobs FOR UPDATE SKIP LOCKED"), vec!["jobs"]);
1325        // a subquery in FROM position yields nothing for that position
1326        assert_eq!(t("SELECT * FROM (SELECT 1) q"), Vec::<String>::new());
1327        // not SQL -> nothing (never fabricate)
1328        assert_eq!(t("/tmp/some/path"), Vec::<String>::new());
1329        assert_eq!(t("hello world from nowhere"), Vec::<String>::new());
1330        // comma-ADJACENT continuation: a FROM list takes every table in the chain…
1331        assert_eq!(t("SELECT a FROM t1, t2, s.t3 WHERE x = 1"), vec!["t1", "t2", "s.t3"]);
1332        // …but an alias breaks it (under-report, never a guess)…
1333        assert_eq!(t("SELECT a FROM t1 a1, t2 WHERE x = 1"), vec!["t1"]);
1334        // …which is exactly what keeps a column list from fabricating (parens are spaces by now).
1335        assert_eq!(t("INSERT INTO t (a, b) VALUES (1, 2)"), vec!["t"]);
1336        // a subquery after the comma stops the chain too
1337        assert_eq!(t("SELECT a FROM t1, (SELECT 1) q"), vec!["t1"]);
1338    }
1339
1340    use super::*;
1341
1342    #[test]
1343    fn db_crates_are_calibrated() {
1344        // The calibrated set must cover every DB client the classifier knows, or the receipt's coverage
1345        // check would flag a recognized crate as a blind spot. (Was nightly-lint-only; now runs on stable.)
1346        for c in DB_CRATES {
1347            assert!(
1348                CALIBRATED_CRATES.contains(&c),
1349                "DB crate `{c}` is matched by classify() but missing from CALIBRATED_CRATES"
1350            );
1351        }
1352    }
1353
1354    #[test]
1355    fn calibrated_crates_are_live() {
1356        // Conversely, every crate advertised as calibrated must actually be matched by classify() for
1357        // some representative path — a dead entry would silently suppress a real coverage warning.
1358        for c in CALIBRATED_CRATES {
1359            assert!(
1360                CALIBRATION_PROBE_TAILS.iter().any(|t| classify(c, &format!("{c}{t}")).is_some()),
1361                "calibrated crate `{c}` is matched by no path in classify() — dead list entry"
1362            );
1363        }
1364    }
1365
1366    #[test]
1367    fn async_http_stack_classifies() {
1368        // The modern async-HTTP/TLS/QUIC/DNS stack (found by the independent-method differential on oha):
1369        // verb-keyed Net/Ipc/Fs/Env, crate-gated so generic verbs never fabricate across crates.
1370        assert_eq!(classify("hyper", "hyper::client::conn::http1::SendRequest::send_request"), Some("Net"));
1371        assert_eq!(classify("hyper", "hyper::client::conn::http1::handshake"), Some("Net"));
1372        assert_eq!(classify("hyper_util", "hyper_util::client::legacy::Client::request"), Some("Net"));
1373        assert_eq!(classify("hickory_resolver", "hickory_resolver::Resolver::lookup_ip"), Some("Net"));
1374        assert_eq!(classify("quinn", "quinn::Endpoint::connect"), Some("Net"));
1375        assert_eq!(classify("tokio_rustls", "tokio_rustls::TlsConnector::connect"), Some("Net"));
1376        assert_eq!(classify("native_tls", "native_tls::TlsConnector::connect"), Some("Net"));
1377        assert_eq!(classify("tokio_vsock", "tokio_vsock::VsockStream::connect"), Some("Ipc"));
1378        assert_eq!(classify("rustls_native_certs", "rustls_native_certs::load_native_certs"), Some("Fs"));
1379        assert_eq!(classify("num_cpus", "num_cpus::get"), Some("Env"));
1380        assert_eq!(classify("rlimit", "rlimit::setrlimit"), Some("Env"));
1381        // pure surface stays None (no fabrication): builder/type/config paths, and other crates' generic verbs
1382        assert_eq!(classify("hyper", "hyper::Request::builder"), None);
1383        assert_eq!(classify("hyper", "hyper::body::Bytes::new"), None);
1384        assert_eq!(classify("native_tls", "native_tls::TlsConnectorBuilder::min_protocol_version"), None);
1385        assert_eq!(classify("serde", "serde::Deserialize::request"), None); // generic verb, wrong crate
1386    }
1387
1388    #[test]
1389    fn log_tracing_emit_macros_classify_pre_expansion() {
1390        // candor-scan is pre-expansion: it sees the raw macro path (`log::info`, `tracing::warn`), not the
1391        // expanded dispatch the deep engine sees. Both the user-facing macro names AND the type surface:
1392        assert_eq!(classify("log", "log::info"), Some("Log"));
1393        assert_eq!(classify("log", "log::error"), Some("Log"));
1394        assert_eq!(classify("tracing", "tracing::warn"), Some("Log"));
1395        assert_eq!(classify("tracing", "tracing::info_span"), Some("Log"));
1396        // pure data-type surface stays None (no fabricated Log)
1397        assert_eq!(classify("log", "log::Level::as_str"), None);
1398        assert_eq!(classify("tracing", "tracing::Level::INFO"), None);
1399    }
1400
1401    #[test]
1402    fn classify_core_effects() {
1403        // A representative smoke test of the classifier's main families, so the published crate is not
1404        // shipped untested (these used to live only in the nightly-only src/lib.rs).
1405        assert_eq!(classify("std", "std::fs::read_to_string"), Some("Fs"));
1406        // std::path stat-family methods are Fs (each is a stat/readdir syscall); the pure
1407        // string-manipulation surface stays unclassified (the blackout screen's gix-dir find).
1408        assert_eq!(classify("std", "std::path::Path::symlink_metadata"), Some("Fs"));
1409        assert_eq!(classify("std", "std::path::PathBuf::read_dir"), Some("Fs"));
1410        assert_eq!(classify("std", "std::path::Path::exists"), Some("Fs"));
1411        assert_eq!(classify("std", "std::path::Path::join"), None); // pure string manipulation
1412        assert_eq!(classify("std", "std::path::PathBuf::file_name"), None);
1413        assert_eq!(classify("std", "std::path::Path::parent"), None);
1414        assert_eq!(classify("std", "std::process::Command::new"), Some("Exec"));
1415        assert_eq!(classify("std", "std::env::var"), Some("Env"));
1416        assert_eq!(classify("reqwest", "reqwest::Client::execute"), Some("Net"));
1417        // one-shot convenience fns send immediately → Net; the `Client::get` builder stays pure.
1418        assert_eq!(classify("reqwest", "reqwest::get"), Some("Net"));
1419        assert_eq!(classify("reqwest", "reqwest::blocking::get"), Some("Net"));
1420        assert_eq!(classify("reqwest", "reqwest::Client::get"), None);
1421        assert_eq!(classify("reqwest", "reqwest::RequestBuilder::header"), None);
1422        // nix routes through the libc syscall table (same leaves): I/O classified, generic fd ops skipped.
1423        assert_eq!(classify("nix", "nix::fcntl::open"), Some("Fs"));
1424        assert_eq!(classify("nix", "nix::sys::socket::connect"), Some("Net"));
1425        assert_eq!(classify("nix", "nix::unistd::execvp"), Some("Exec"));
1426        assert_eq!(classify("nix", "nix::unistd::write"), None); // generic fd op — deliberately unclassified
1427        assert_eq!(classify("nix", "nix::unistd::getpid"), None); // not I/O
1428        // rustix does raw syscalls (no libc underneath) → classified directly by leaf, same table.
1429        assert_eq!(classify("rustix", "rustix::time::clock_settime"), Some("Clock"));
1430        assert_eq!(classify("rustix", "rustix::fs::symlink"), Some("Fs"));
1431        assert_eq!(classify("rustix", "rustix::net::connect"), Some("Net"));
1432        assert_eq!(classify("rustix", "rustix::io::read"), None); // generic fd op
1433        // pnet raw packet capture: channel openers are Net, packet construction stays pure.
1434        assert_eq!(classify("pnet", "pnet::datalink::channel"), Some("Net"));
1435        assert_eq!(classify("pnet", "pnet::transport::transport_channel"), Some("Net"));
1436        assert_eq!(classify("pnet_datalink", "pnet_datalink::channel"), Some("Net"));
1437        assert_eq!(classify("pnet", "pnet::packet::ethernet::EthernetPacket::new"), None);
1438        assert_eq!(classify("pnet_base", "pnet_base::MacAddr::new"), None);
1439        // ignore (gitignore-aware walker): walk executors are Fs, config builders stay pure.
1440        assert_eq!(classify("ignore", "ignore::WalkBuilder::build_parallel"), Some("Fs"));
1441        assert_eq!(classify("ignore", "ignore::WalkBuilder::build"), Some("Fs"));
1442        assert_eq!(classify("ignore", "ignore::WalkParallel::run"), Some("Fs"));
1443        assert_eq!(classify("ignore", "ignore::WalkBuilder::add_ignore"), Some("Fs")); // reads the ignore file
1444        assert_eq!(classify("ignore", "ignore::overrides::OverrideBuilder::build"), None); // pure config
1445        assert_eq!(classify("ignore", "ignore::gitignore::GitignoreBuilder::build"), None); // pure config
1446        assert_eq!(classify("ignore", "ignore::DirEntry::path"), None); // pure accessor
1447        // notify fs-watching: watcher constructors + watch/unwatch are Fs, data types stay pure.
1448        assert_eq!(classify("notify", "notify::RecommendedWatcher::new"), Some("Fs"));
1449        assert_eq!(classify("notify", "notify::PollWatcher::new"), Some("Fs"));
1450        assert_eq!(classify("notify", "notify::recommended_watcher"), Some("Fs"));
1451        assert_eq!(classify("notify", "notify::INotifyWatcher::watch"), Some("Fs"));
1452        assert_eq!(classify("notify", "notify::Config::default"), None); // pure config
1453        assert_eq!(classify("notify", "notify::Event::new"), None); // pure data type
1454        assert_eq!(classify("rusqlite", "rusqlite::Connection::execute"), Some("Db"));
1455        // the rusqlite verb DIALECT (a verb probe found the canonical consumer API classifying pure):
1456        assert_eq!(classify("rusqlite", "rusqlite::Connection::query_row"), Some("Db"));
1457        assert_eq!(classify("rusqlite", "rusqlite::Statement::query_map"), Some("Db"));
1458        assert_eq!(classify("rusqlite", "rusqlite::Connection::execute_batch"), Some("Db"));
1459        assert_eq!(classify("rusqlite", "rusqlite::Connection::prepare_cached"), Some("Db"));
1460        assert_eq!(classify("rusqlite", "rusqlite::Connection::open"), Some("Db"));
1461        assert_eq!(classify("rusqlite", "rusqlite::Connection::open_in_memory"), Some("Db"));
1462        // …but `open` stays rusqlite-only (postgres has no open; nothing else may borrow it):
1463        assert_eq!(classify("postgres", "postgres::Client::open"), None);
1464        assert_eq!(classify("tokio_postgres", "tokio_postgres::Client::query_typed"), Some("Db"));
1465        // diesel's LIMIT-1 + streaming executions; sqlx's multi-result stream:
1466        assert_eq!(classify("diesel", "diesel::RunQueryDsl::first"), Some("Db"));
1467        assert_eq!(classify("diesel", "diesel::RunQueryDsl::load_iter"), Some("Db"));
1468        assert_eq!(classify("sqlx", "sqlx::query::Query::fetch_many"), Some("Db"));
1469        // sqlx's bare `query()` builder must STAY pure (the original sqlx lesson):
1470        assert_eq!(classify("sqlx", "sqlx::query"), None);
1471        // tracing: the emit/span-lifecycle dispatch is Log; the pure DATA-type accessors are not
1472        // (whole-crate Log fabricated Log on `Level::as_str` / `Span::is_disabled` — the data types are
1473        // pure, same principle as the `log` facade).
1474        assert_eq!(classify("tracing", "tracing::event"), Some("Log"));
1475        assert_eq!(classify("tracing", "tracing::Span::new_span"), Some("Log"));
1476        assert_eq!(classify("tracing", "tracing::Span::record"), Some("Log"));
1477        assert_eq!(classify("tracing", "tracing::Span::enter"), Some("Log"));
1478        assert_eq!(classify("tracing", "tracing::Level::as_str"), None); // pure accessor
1479        assert_eq!(classify("tracing", "tracing::Span::is_disabled"), None); // pure state read
1480        assert_eq!(classify("tracing", "tracing::Span::metadata"), None); // pure accessor
1481        assert_eq!(classify("tracing", "tracing::metadata::Level::TRACE"), None); // pure data type
1482        assert_eq!(classify("tracing", "tracing::field::Field::name"), None); // pure data type
1483        // memmap2: only the syscall-issuing map/flush/protect verbs are Fs; reads over an already-mapped
1484        // region (len/as_ptr/is_empty) and the request builder are PURE (whole-crate Fs fabricated Fs).
1485        assert_eq!(classify("memmap2", "memmap2::MmapOptions::map"), Some("Fs"));
1486        assert_eq!(classify("memmap2", "memmap2::MmapOptions::map_mut"), Some("Fs"));
1487        assert_eq!(classify("memmap2", "memmap2::Mmap::flush"), Some("Fs"));
1488        assert_eq!(classify("memmap2", "memmap2::MmapMut::make_read_only"), Some("Fs"));
1489        assert_eq!(classify("memmap2", "memmap2::Mmap::len"), None); // length read — pure
1490        assert_eq!(classify("memmap2", "memmap2::Mmap::is_empty"), None); // pure
1491        assert_eq!(classify("memmap2", "memmap2::Mmap::as_ptr"), None); // pointer — pure
1492        assert_eq!(classify("memmap2", "memmap2::MmapOptions::new"), None); // request builder — pure
1493        // arboard: the Clipboard handle's read/write verbs are Clipboard; `arboard::Error` formatting
1494        // and option data types are PURE (whole-crate Clipboard fabricated Clipboard on `Error::to_string`).
1495        assert_eq!(classify("arboard", "arboard::Clipboard::new"), Some("Clipboard"));
1496        assert_eq!(classify("arboard", "arboard::Clipboard::get_text"), Some("Clipboard"));
1497        assert_eq!(classify("arboard", "arboard::Clipboard::set_text"), Some("Clipboard"));
1498        assert_eq!(classify("arboard", "arboard::Clipboard::clear"), Some("Clipboard"));
1499        assert_eq!(classify("arboard", "arboard::Error::to_string"), None); // error formatting — pure
1500        assert_eq!(classify("arboard", "arboard::Error::fmt"), None); // Display impl — pure
1501        assert_eq!(classify("arboard", "arboard::ImageData::to_owned_img"), None); // pure data type
1502        // fastrand: value draws + entropy-seeded entry points are Rand; the DETERMINISTIC seeded ctor
1503        // `with_seed` and state split/copy (`fork`/`clone`) are PURE (whole-crate Rand fabricated Rand).
1504        assert_eq!(classify("fastrand", "fastrand::u32"), Some("Rand")); // top-level draw
1505        assert_eq!(classify("fastrand", "fastrand::Rng::usize"), Some("Rand"));
1506        assert_eq!(classify("fastrand", "fastrand::Rng::shuffle"), Some("Rand"));
1507        assert_eq!(classify("fastrand", "fastrand::Rng::new"), Some("Rand")); // entropy-seeded
1508        assert_eq!(classify("fastrand", "fastrand::Rng::with_seed"), None); // deterministic ctor — pure
1509        assert_eq!(classify("fastrand", "fastrand::Rng::fork"), None); // state split — pure
1510        assert_eq!(classify("fastrand", "fastrand::Rng::clone"), None); // state copy — pure
1511        // portable_pty / async_process: spawn/wait keep Exec; config GETTERS and pure data ctors/setters
1512        // do NOT (base Exec fabricated on `CommandBuilder::get_cwd` / `PtySize::default` / `Stdio::piped`).
1513        assert_eq!(classify("portable_pty", "portable_pty::PtySystem::openpty"), Some("Exec"));
1514        assert_eq!(classify("portable_pty", "portable_pty::SlavePty::spawn_command"), Some("Exec"));
1515        assert_eq!(classify("portable_pty", "portable_pty::CommandBuilder::get_argv"), None); // getter
1516        assert_eq!(classify("portable_pty", "portable_pty::CommandBuilder::get_cwd"), None); // getter
1517        assert_eq!(classify("portable_pty", "portable_pty::PtySize::default"), None); // pure data type
1518        assert_eq!(classify("portable_pty", "portable_pty::CommandBuilder::new"), None); // builder ctor
1519        assert_eq!(classify("async_process", "async_process::Command::spawn"), Some("Exec"));
1520        assert_eq!(classify("async_process", "async_process::Command::output"), Some("Exec"));
1521        assert_eq!(classify("async_process", "async_process::Stdio::piped"), None); // pure data type
1522        assert_eq!(classify("async_process", "async_process::Stdio::null"), None); // pure data type
1523        // FFI tiers (matched by distinctive leaf, alias-independent)
1524        assert_eq!(classify("libc", "libc::open"), Some("Fs"));
1525        assert_eq!(classify("libc", "libc::connect"), Some("Net"));
1526        assert_eq!(classify("libc", "libc::read"), None); // generic fd op — deliberately unclassified
1527        assert_eq!(classify("ffi", "ffi::sqlite3_step"), Some("Db"));
1528        assert_eq!(classify("raw", "raw::git_remote_fetch"), Some("Net"));
1529        // libgit2 clone + submodule clone/update fetch over the network (an A/B on git2 0.20 caught
1530        // `Submodule::update`/`clone` and `Repository::clone` reporting no Net — the latter because the
1531        // `src/build.rs` module was being dropped as if it were the Cargo build script).
1532        assert_eq!(classify("raw", "raw::git_clone"), Some("Net"));
1533        assert_eq!(classify("raw", "raw::git_submodule_clone"), Some("Net"));
1534        assert_eq!(classify("raw", "raw::git_submodule_update"), Some("Net"));
1535        assert_eq!(classify("raw", "raw::git_submodule_open"), None); // local subrepo open — not Net
1536        // libcurl: the transfer/raw-socket entry points are Net (an A/B on curl 0.4 caught the whole
1537        // crate reporting ZERO Net); the big setopt/init/getinfo surface — and the readiness-wait
1538        // multi_wait/poll — stay unclassified (the loop's perform is the boundary).
1539        assert_eq!(classify("curl_sys", "curl_sys::curl_easy_perform"), Some("Net"));
1540        assert_eq!(classify("curl_sys", "curl_sys::curl_easy_send"), Some("Net"));
1541        assert_eq!(classify("curl_sys", "curl_sys::curl_multi_perform"), Some("Net"));
1542        assert_eq!(classify("curl_sys", "curl_sys::curl_multi_socket_action"), Some("Net"));
1543        assert_eq!(classify("curl_sys", "curl_sys::curl_easy_setopt"), None); // in-memory option write
1544        assert_eq!(classify("curl_sys", "curl_sys::curl_easy_init"), None); // handle alloc
1545        assert_eq!(classify("curl_sys", "curl_sys::curl_multi_wait"), None); // readiness wait, no payload
1546        // consumer-side `curl` crate rule: the dispatch verbs are Net, the setopt builders pure.
1547        assert_eq!(classify("curl", "curl::easy::Easy::perform"), Some("Net"));
1548        assert_eq!(classify("curl", "curl::multi::Multi::perform"), Some("Net"));
1549        assert_eq!(classify("curl", "curl::easy::Easy::send"), Some("Net"));
1550        assert_eq!(classify("curl", "curl::easy::Easy::url"), None); // CURLOPT setter — pure
1551        assert_eq!(classify("curl", "curl::easy::Easy::timeout"), None); // pure setter; Multi::timeout under-reported by design
1552        assert_eq!(classify("ffi", "ffi::SSL_connect"), Some("Net"));
1553        // pure crates stay pure
1554        assert_eq!(classify("serde", "serde::Serialize::serialize"), None);
1555        assert_eq!(classify("std", "std::vec::Vec::push"), None);
1556
1557        // ── sweep 2026-06-17: fabrication carve-outs + DNS coverage (each fails pre-fix) ──
1558        // [24] std::net socket accessors are pure; the I/O verbs stay Net.
1559        assert_eq!(classify("std", "std::net::TcpStream::connect"), Some("Net"));
1560        assert_eq!(classify("std", "std::net::TcpStream::local_addr"), None);
1561        assert_eq!(classify("std", "std::net::TcpStream::nodelay"), None);
1562        assert_eq!(classify("std", "std::net::TcpStream::ttl"), None);
1563        assert_eq!(classify("std", "std::net::UdpSocket::peer_addr"), None);
1564        // [37] std DNS resolution is Net (was floored).
1565        assert_eq!(classify("std", "std::net::lookup_host"), Some("Net"));
1566        assert_eq!(classify("std", "core::net::ToSocketAddrs::to_socket_addrs"), Some("Net"));
1567        // [23] std::process getters are pure; spawn/new stay Exec.
1568        assert_eq!(classify("std", "std::process::Command::get_program"), None);
1569        assert_eq!(classify("std", "std::process::Command::get_args"), None);
1570        assert_eq!(classify("std", "std::process::Child::id"), None);
1571        assert_eq!(classify("std", "std::process::Command::spawn"), Some("Exec"));
1572        // [27] redis ConnectionManager::clone is an Arc bump (pure); a query round-trips.
1573        assert_eq!(classify("redis", "redis::aio::ConnectionManager::clone"), None);
1574        assert_eq!(classify("redis", "redis::aio::ConnectionManager::send_packed_command"), Some("Db"));
1575        // [5] sea_orm re-exported sea_query builder algebra is pure; execution verbs stay Db.
1576        assert_eq!(classify("sea_orm", "sea_orm::sea_query::Func::count"), None);
1577        assert_eq!(classify("sea_orm", "sea_orm::sea_query::Condition::all"), None);
1578        assert_eq!(classify("sea_orm", "sea_orm::Select::all"), Some("Db"));
1579    }
1580
1581    #[test]
1582    fn rand_osrng_handle_ops_are_pure_but_draws_are_rand() {
1583        // Adversarial-review fabrication: the blanket `contains("OsRng")` tagged `OsRng::clone` Rand,
1584        // but OsRng is a unit struct — clone/fork/default draw no entropy. The real draws still fire.
1585        assert_eq!(classify("rand", "rand::rngs::OsRng::clone"), None);
1586        assert_eq!(classify("rand", "rand::rngs::OsRng::default"), None);
1587        assert_eq!(classify("rand", "rand::rngs::OsRng::fill_bytes"), Some("Rand")); // a real draw
1588        assert_eq!(classify("rand", "rand::rngs::OsRng::next_u32"), Some("Rand"));
1589        assert_eq!(classify("rand", "rand::Rng::gen"), Some("Rand")); // verb path unaffected
1590        assert_eq!(classify("rand", "rand::distributions::Uniform::new"), None); // pure ctor still pure
1591    }
1592
1593    #[test]
1594    fn redis_connection_manager_config_builder_is_pure() {
1595        // Adversarial-review fabrication: `contains("ConnectionManager")` hit the pure *Config* builder.
1596        assert_eq!(classify("redis", "redis::aio::ConnectionManagerConfig::new"), None);
1597        assert_eq!(classify("redis", "redis::aio::ConnectionManagerConfig::set_max_delay"), None);
1598        // the LIVE manager still round-trips (Db).
1599        assert_eq!(classify("redis", "redis::aio::ConnectionManager::new"), Some("Db"));
1600        assert_eq!(classify("redis", "redis::Commands::get"), Some("Db"));
1601    }
1602
1603    #[test]
1604    fn pure_fd_transfer_is_not_an_effect() {
1605        // ADOPTING / EXTRACTING / BORROWING an already-open descriptor (or unwrapping an async type back
1606        // to its std type) issues NO syscall — it must be PURE even though it hangs off a std I/O type
1607        // whose prefix rule would otherwise fire Net/Fs/Ipc. (Real tokio sweep: `into_std`, `from_raw_fd`,
1608        // `as_raw_fd` all fabricated effects.)
1609        assert_eq!(classify("std", "std::net::TcpStream::from_raw_fd"), None);
1610        assert_eq!(classify("std", "std::net::TcpStream::into_raw_fd"), None);
1611        assert_eq!(classify("std", "std::net::TcpStream::as_raw_fd"), None);
1612        assert_eq!(classify("std", "std::net::TcpListener::from_raw_fd"), None);
1613        assert_eq!(classify("std", "std::net::UdpSocket::from_raw_socket"), None);
1614        assert_eq!(classify("std", "std::fs::File::from_raw_fd"), None);
1615        assert_eq!(classify("std", "std::fs::File::into_raw_fd"), None);
1616        assert_eq!(classify("std", "std::fs::File::as_raw_handle"), None);
1617        assert_eq!(classify("std", "std::os::unix::net::UnixStream::from_raw_fd"), None);
1618        // `SocketAddr::from_pathname` builds an address struct, opens no socket — pure. (socket2 sweep.)
1619        assert_eq!(classify("std", "std::os::unix::net::SocketAddr::from_pathname"), None);
1620        assert_eq!(classify("tokio", "tokio::net::TcpStream::from_raw_fd"), None);
1621        assert_eq!(classify("tokio", "tokio::net::TcpStream::into_std"), None); // unwrap → std type, pure
1622        assert_eq!(classify("tokio", "tokio::fs::File::into_std"), None);
1623        // …but a REAL open/connect on the SAME types still fires the effect — the carve-out is leaf-precise.
1624        assert_eq!(classify("std", "std::net::TcpStream::connect"), Some("Net"));
1625        assert_eq!(classify("std", "std::fs::File::open"), Some("Fs"));
1626        assert_eq!(classify("std", "std::fs::read"), Some("Fs"));
1627        assert_eq!(classify("std", "std::os::unix::net::UnixStream::connect"), Some("Ipc"));
1628        assert_eq!(classify("tokio", "tokio::net::TcpStream::connect"), Some("Net"));
1629    }
1630
1631    #[test]
1632    fn command_head_refines_the_exec_cliff() {
1633        use super::classify_command_head as h;
1634        // unambiguous external tools classify by basename (spec §4 ⟨0.5⟩)
1635        assert_eq!(h("curl"), &["Net"]);
1636        assert_eq!(h("telnet"), &["Net"]);
1637        assert_eq!(h("sftp"), &["Net"]);
1638        assert_eq!(h("/usr/local/bin/psql"), &["Db"]); // basename match strips the path
1639        assert_eq!(h("mongo"), &["Db"]);
1640        assert_eq!(h("cqlsh"), &["Db"]);
1641        // a candor engine is Fs/Env — spec-SUPPLIED by §7 item 12, not curation
1642        assert_eq!(h("candor-scan"), &["Env", "Fs"]);
1643        assert_eq!(h("candor-run.sh"), &["Env", "Fs"]);
1644        // an unrecognised head adds nothing — the bare Exec cliff stands (never guess). `make`/`npm`
1645        // run the project's own code; `git`/`rsync` are multi-modal (local vs remote) — all keep the
1646        // cliff rather than fabricate an effect for the common case.
1647        assert_eq!(h("some-unknown-tool"), &[] as &[&str]);
1648        assert_eq!(h("make"), &[] as &[&str]);
1649        assert_eq!(h("npm"), &[] as &[&str]);
1650        assert_eq!(h("git"), &[] as &[&str]);
1651        assert_eq!(h("rsync"), &[] as &[&str]);
1652        // a builder MODIFIER (`.arg`/`.env`) names no program — its literal must NOT refine (a
1653        // whole-crate-Exec crate classifies every method; `.env("psql",..)` must not fabricate Db).
1654        assert!(is_cmd_builder_method("env") && is_cmd_builder_method("arg") && is_cmd_builder_method("current_dir"));
1655        assert!(!is_cmd_builder_method("new")); // Command::new NAMES the program
1656        assert!(!is_cmd_builder_method("cmd")); // duct::cmd NAMES the program
1657        // The gate that ADMITS a literal to classify_command_head is an ALLOWLIST of program-NAMING
1658        // methods, not the builder denylist. Inversion matters: a whole-crate-Exec crate (portable_pty)
1659        // classifies EVERY method as Exec, so a getter like `cmd.get_env("psql")` — absent from the
1660        // builder denylist — would have leaked "psql" to the head and FABRICATED Db. Only `new`/`cmd`
1661        // name a program, so only they may refine.
1662        assert!(is_cmd_naming_method("new") && is_cmd_naming_method("cmd"));
1663        assert!(!is_cmd_naming_method("get_env")); // a GETTER, not a namer — the leak this closes
1664        assert!(!is_cmd_naming_method("arg") && !is_cmd_naming_method("env") && !is_cmd_naming_method("current_dir"));
1665    }
1666
1667    #[test]
1668    fn net_establishing_allowlist() {
1669        // sweep [3]/[7]: the masking guard's establishing-verb allowlist — host-bearing connect/request
1670        // verbs establish (a runtime host there is invisible); USE-verbs on a connected socket do NOT.
1671        assert!(is_net_establishing("connect") && is_net_establishing("connect_timeout"));
1672        assert!(is_net_establishing("get") && is_net_establishing("post") && is_net_establishing("request"));
1673        assert!(is_net_establishing("send_to") && is_net_establishing("to_socket_addrs"));
1674        // use-verbs (host fixed at connect) must NOT be establishing — else `connect("h").write()` flags.
1675        assert!(!is_net_establishing("write") && !is_net_establishing("read") && !is_net_establishing("send"));
1676        assert!(!is_net_establishing("flush") && !is_net_establishing("recv") && !is_net_establishing("peek"));
1677    }
1678}