Skip to main content

candor_classify/
lib.rs

1//! candor-classify — the curated effect classifier (crate+path -> effect), extracted to a STABLE
2//! crate so both the nightly `rustc_private` lint AND a stable backend share ONE source of truth
3//! (no drift). Pure string logic; no rustc internals. The effect vocabulary lives in candor-report.
4
5use candor_report::EFFECTS;
6
7/// The canonical CANDOR_POLICY DSL parser (SPEC §6.2), shared by the nightly gate and candor-query.
8pub mod policy;
9
10/// Project-supplied rules, consulted only when the built-in `classify` returns None.
11pub fn classify_extra(
12    crate_name: &str,
13    path: &str,
14    extra: &[(&'static str, bool, String)],
15) -> Option<&'static str> {
16    for (eff, is_crate, prefix) in extra {
17        let hit = if *is_crate { crate_name.starts_with(prefix.as_str()) } else { path.starts_with(prefix.as_str()) };
18        if hit {
19            return Some(eff);
20        }
21    }
22    None
23}
24
25/// The exact third-party crates `classify` has effect rules for, and the crate-name
26/// PREFIXES it recognizes. This is the single source of truth for "what candor knows":
27/// it is emitted beside the JSON report (`<prefix>.calibrated.json`) so the Claude Code
28/// receipt's coverage check reads candor's real coverage instead of a hand-copied list.
29/// Keep in lockstep with `classify` below — the `db_crates_are_calibrated` and
30/// `calibrated_crates_are_live` tests (in this crate's `tests` module) enforce both directions.
31pub const CALIBRATED_CRATES: [&str; 53] = [
32    // network (aws_config resolves credentials over the network on `.load()`;
33    // git2 remote ops — fetch/push/connect — contact the network; async_net is smol's net layer;
34    // pnet is raw L2/L3 packet capture)
35    "reqwest", "isahc", "ureq", "curl", "aws_config", "git2", "tokio_tcp", "tokio_udp", "async_net",
36    "async_nats", "lapin", "lettre", "tungstenite", "elasticsearch", "tonic", "rdkafka", "pnet",
37    // directory traversal (ignore = gitignore-aware walker, powers ripgrep/fd; its walk executors are Fs)
38    // + filesystem watching (notify = inotify/FSEvents/kqueue wrapper; powers watchexec/cargo-watch)
39    "ignore", "notify",
40    // database (see DB_CRATES in classify)
41    "sqlx", "rusqlite", "postgres", "tokio_postgres", "diesel", "redis", "mongodb",
42    "mysql", "mysql_async", "sea_orm", "deadpool_postgres",
43    // filesystem (async_fs = smol; fs_err = std::fs wrapper; tempfile; glob) / entropy /
44    // subprocess (async_process = smol; duct) / env (dotenvy/dotenv) / clock (time) / log / clipboard
45    "memmap2", "fs_err", "async_fs", "tempfile", "glob",
46    "rand", "getrandom", "fastrand",
47    "portable_pty", "async_process", "duct",
48    "dotenvy", "dotenv",
49    "chrono", "time", "tracing", "log", "arboard",
50    // compiler diagnostic emission (a dylint lint's output) — see the Log rules in classify
51    "rustc_lint", "rustc_errors",
52    // raw syscalls via FFI — the syscall-name table that lights up the FFI-thin tier (nix is routed
53    // through the same table by leaf name, so a consumer of nix is covered without nix's own source)
54    "libc", "nix", "rustix",
55];
56
57pub const CALIBRATED_PREFIXES: [&str; 3] = ["aws_sdk_", "aws_smithy", "cap_"];
58
59/// Crates `classify` matches by PATH prefix rather than crate-name equality (their effectful modules
60/// are recognised, e.g. `tokio::net::`/`async_std::fs::`/`mio::net::`), so they're absent from
61/// `CALIBRATED_CRATES` (which the liveness test probes by crate name). The coverage check must still
62/// treat them as *covered* — otherwise it would mislabel the most common async crates as blind spots.
63pub const PATH_CALIBRATED_CRATES: [&str; 3] = ["tokio", "async_std", "mio"];
64
65/// Representative path tails (each appended to a crate name) that the `calibrated_crates_are_live`
66/// liveness test probes: at least one must match for every `CALIBRATED_CRATES` entry, else the entry is
67/// dead. Exported as ONE source of truth because the nightly lint crate (`src/lib.rs`) runs the SAME
68/// liveness test — when the two probe lists were duplicated they drifted, and a rule keyed on a
69/// distinctive tail (pnet `::datalink::channel`, ignore `::WalkBuilder::build_parallel`, notify
70/// `::RecommendedWatcher::new`) added to only one list silently broke the other crate's `cargo test`.
71pub const CALIBRATION_PROBE_TAILS: &[&str] = &[
72    "::X::send", "::X::execute", "::X::call", "::X::query", "::X::fetch_one", "::Remote::fetch",
73    "::datalink::channel", "::WalkBuilder::build_parallel", "::RecommendedWatcher::new",
74    "::X::connect", "::Utc::now", "::X::load", "::__private_api::log", "::tempfile", "::glob",
75    "::X::run", "::dotenv", "::random", "::emit", "::X::emit_span_lint", "::X::anything",
76];
77
78/// Database client crates whose execution verbs are I/O (see the DB branch in `classify`).
79/// Module-level so `db_crates_are_calibrated` can enforce `DB_CRATES ⊆ CALIBRATED_CRATES`.
80pub const DB_CRATES: [&str; 11] = [
81    "sqlx", "rusqlite", "postgres", "tokio_postgres", "diesel", "redis", "mongodb",
82    "mysql", "mysql_async", "sea_orm", "deadpool_postgres",
83];
84
85/// Classify a resolved callee by the crate it belongs to and its full path.
86pub fn classify(crate_name: &str, path: &str) -> Option<&'static str> {
87    if crate_name.starts_with("aws_sdk_") || crate_name.starts_with("aws_smithy") {
88        // Only request dispatch is network I/O; builder setters/accessors are pure.
89        if path.ends_with("::send") || path.ends_with("::send_with") {
90            return Some("Net");
91        }
92        return None;
93    }
94    // aws-config resolves credentials/region on `.load()` — it reaches the IMDS metadata
95    // endpoint / STS over the network (and reads ~/.aws + env). Builders (`defaults()`,
96    // `SdkConfig::builder()`, `BehaviorVersion::latest()`) are pure; the `load` is the I/O.
97    // (Found hardening on a real app, ebman: `builder.load().await` was classified pure.)
98    if crate_name == "aws_config" {
99        if path.ends_with("::load") || path.ends_with("::load_defaults") {
100            return Some("Net");
101        }
102        return None;
103    }
104    // git2 (libgit2 FFI): remote operations contact the network; everything else is local
105    // to the .git directory. Match the remote verbs precisely — NOT bare `::clone`, which is
106    // the `Clone`-trait dup of a `Remote` handle (pure), not `Repository::clone`. (Found
107    // hardening on gitui: `remote.fetch`/`remote.push` were classified network-free — a git
108    // client reporting it makes no network calls.)
109    if crate_name == "git2" {
110        if path.ends_with("::fetch")
111            || path.ends_with("::push")
112            || path.ends_with("::download")
113            || path.ends_with("::connect")
114            || path.ends_with("::connect_auth")
115            || path.ends_with("::ls")
116            || path.ends_with("::upload")
117        {
118            return Some("Net");
119        }
120        return None;
121    }
122    // libc — raw syscalls via FFI. The FFI-thin tier (nix, and the syscall layer beneath rusqlite/git2)
123    // is invisible to a name classifier unless we model libc directly: a 35-crate calibration
124    // (eval/calibration) showed nix reporting ZERO library effects because every wrapper bottoms out in
125    // an unrecognised `libc::*` call. Classify by syscall name, but ONLY the UNAMBIGUOUS ones — the
126    // socket family is Net, path/dir syscalls are Fs, spawn/exec/wait is Exec, SysV/pipe IPC is Ipc,
127    // env/clock/entropy each their own. We deliberately SKIP the generic file-descriptor ops
128    // (read/write/close/lseek/dup/fcntl/ioctl/poll/select/epoll*/mmap): they operate on ANY fd — file,
129    // socket, or pipe — so a fixed label would mis-categorise as often as it helps. An honest
130    // no-classify (under-report) beats emitting the WRONG effect. Pure conversions (htons/inet_pton/
131    // gmtime) are also skipped.
132    //
133    // `nix` (the idiomatic SAFE libc wrapper, in ~every Rust systems/CLI crate) is routed through the
134    // SAME table: its functions keep the syscall leaf name (`nix::fcntl::open`, `nix::sys::socket::connect`,
135    // `nix::unistd::execvp`). Without this, a CONSUMER of nix analysed without nix's own source (the
136    // stable scanner, single-crate) sees `nix::*` cross-crate and under-reports — serialport-rs opens its
137    // device via `nix::fcntl::open` and reported ZERO Fs. The nightly lint reaches `libc::*` THROUGH nix's
138    // body; this gives the scanner the same coverage directly. (Found sweeping serialport-rs.)
139    // `rustix` is the same shape as nix but does RAW syscalls (no libc underneath), so its functions MUST
140    // be classified directly. Its leaf names are the syscall names too (`rustix::time::clock_settime`,
141    // `rustix::fs::mkfifoat`/`symlink`/`stat`, `rustix::net::connect`) — route it through the same table.
142    // The rustix-specific `*at`/variant leaves it doesn't share with libc just under-report (the safe
143    // direction). VALIDATED, not speculative: coreutils' `date` reads/sets the clock via
144    // `rustix::time::clock_getres`/`clock_settime` and reported Clock=0; the file I/O that goes through
145    // std::fs was already correct, which is why only the rustix-only effects (Clock/Ipc) were missing.
146    if crate_name == "libc" || crate_name == "nix" || crate_name == "rustix" {
147        let f = path.rsplit("::").next().unwrap_or(path);
148        // path / directory / metadata syscalls (incl. *64 and *at variants)
149        const FS: &[&str] = &[
150            "open", "open64", "openat", "openat2", "creat", "creat64", "stat", "stat64", "lstat",
151            "lstat64", "fstatat", "fstatat64", "newfstatat", "statx", "access", "faccessat",
152            "faccessat2", "mkdir", "mkdirat", "rmdir", "unlink", "unlinkat", "rename", "renameat",
153            "renameat2", "link", "linkat", "symlink", "symlinkat", "readlink", "readlinkat", "chmod",
154            "fchmodat", "chown", "lchown", "fchownat", "truncate", "truncate64", "ftruncate",
155            "ftruncate64", "opendir", "fdopendir", "readdir", "readdir64", "readdir_r", "closedir",
156            "rewinddir", "seekdir", "telldir", "scandir", "mkstemp", "mkstemps", "mkostemp", "mkdtemp",
157            "mknod", "mknodat", "chdir", "fchdir", "getcwd", "get_current_dir_name", "chroot",
158            "pivot_root", "statfs", "statfs64", "fstatfs", "fstatfs64", "statvfs", "fstatvfs", "mount",
159            "umount", "umount2", "fsync", "fdatasync", "sync", "syncfs", "sync_file_range", "fallocate",
160            "posix_fallocate", "posix_fadvise", "sendfile", "sendfile64", "copy_file_range", "flock",
161            "getdents", "getdents64", "utime", "utimes", "lutimes", "futimens", "utimensat", "futimesat",
162            "realpath",
163        ];
164        // socket family — these operate only on sockets, so Net is unambiguous (AF_UNIX domain isn't
165        // visible at the call, so a Unix socket reads as Net rather than Ipc; acceptable over-general).
166        const NET: &[&str] = &[
167            "socket", "setsockopt", "getsockopt", "bind", "listen", "accept", "accept4", "connect",
168            "shutdown", "send", "sendto", "sendmsg", "sendmmsg", "recv", "recvfrom", "recvmsg",
169            "recvmmsg", "getpeername", "getsockname", "getaddrinfo", "freeaddrinfo", "getnameinfo",
170        ];
171        // process creation / replacement / reaping
172        const EXEC: &[&str] = &[
173            "fork", "vfork", "clone", "clone3", "execl", "execlp", "execle", "execv", "execvp",
174            "execvpe", "execve", "execveat", "fexecve", "posix_spawn", "posix_spawnp", "system",
175            "popen", "pclose", "wait", "waitpid", "wait3", "wait4", "waitid",
176        ];
177        // pipes / FIFOs / SysV + POSIX message queues, semaphores, shared memory; socketpair (AF_UNIX)
178        const IPC: &[&str] = &[
179            "pipe", "pipe2", "mkfifo", "mkfifoat", "socketpair", "msgget", "msgsnd", "msgrcv", "msgctl",
180            "semget", "semop", "semtimedop", "semctl", "shmget", "shmat", "shmdt", "shmctl", "mq_open",
181            "mq_send", "mq_receive", "mq_timedsend", "mq_timedreceive", "mq_close", "mq_unlink",
182        ];
183        const ENV: &[&str] = &["getenv", "secure_getenv", "setenv", "putenv", "unsetenv", "clearenv"];
184        const CLOCK: &[&str] = &[
185            "time", "gettimeofday", "clock_gettime", "clock_getres", "nanosleep", "clock_nanosleep",
186            // SETTING the system clock is a clock effect too (was unclassified — found on coreutils `date`,
187            // which sets it via `clock_settime`).
188            "clock_settime", "settimeofday", "stime", "adjtime", "adjtimex", "clock_adjtime",
189        ];
190        const RAND: &[&str] = &["getrandom", "getentropy", "arc4random", "arc4random_buf", "arc4random_uniform"];
191        if FS.contains(&f) {
192            return Some("Fs");
193        }
194        if NET.contains(&f) {
195            return Some("Net");
196        }
197        if EXEC.contains(&f) {
198            return Some("Exec");
199        }
200        if IPC.contains(&f) {
201            return Some("Ipc");
202        }
203        if ENV.contains(&f) {
204            return Some("Env");
205        }
206        if CLOCK.contains(&f) {
207            return Some("Clock");
208        }
209        if RAND.contains(&f) {
210            return Some("Rand");
211        }
212        return None;
213    }
214    // C-library FFI bindings: libsqlite3 (under rusqlite) and libgit2 (under git2). Like the libc tier,
215    // these crates are thin Rust over a C library, so their real I/O is invisible until the C entry
216    // points are named. Match by the DISTINCTIVE C function name (`sqlite3_*` / `git_*`) via the call's
217    // LEAF — independent of the binding crate's alias: rusqlite calls `ffi::sqlite3_step`, git2 calls
218    // `raw::git_remote_fetch`, and the nightly lint resolves the same to `libsqlite3_sys`/`libgit2_sys`;
219    // all spellings share the leaf. Only the I/O-performing entry points are listed — the in-memory
220    // accessors (`sqlite3_bind_*`/`sqlite3_column_*`, `git_*_oid`/strarray/options builders) stay pure,
221    // so a non-listed `sqlite3_`/`git_` leaf returns None (under-report, never a wrong effect). Calibrated
222    // + validated against rusqlite 0.39 / git2 0.20 source (eval/calibration).
223    {
224        let leaf = path.rsplit("::").next().unwrap_or(path);
225        if let Some(rest) = leaf.strip_prefix("sqlite3_") {
226            let _ = rest;
227            // SQLite C API operations that touch the database (open/exec/step/prepare/backup/blob/wal).
228            const DB: &[&str] = &[
229                "sqlite3_open", "sqlite3_open_v2", "sqlite3_open16", "sqlite3_close", "sqlite3_close_v2",
230                "sqlite3_exec", "sqlite3_step", "sqlite3_prepare", "sqlite3_prepare_v2",
231                "sqlite3_prepare_v3", "sqlite3_prepare16", "sqlite3_prepare16_v2", "sqlite3_prepare16_v3",
232                "sqlite3_get_table", "sqlite3_backup_init", "sqlite3_backup_step", "sqlite3_backup_finish",
233                "sqlite3_blob_open", "sqlite3_blob_read", "sqlite3_blob_write", "sqlite3_blob_reopen",
234                "sqlite3_load_extension", "sqlite3_wal_checkpoint", "sqlite3_wal_checkpoint_v2",
235            ];
236            return DB.contains(&leaf).then_some("Db");
237        }
238        if leaf.starts_with("git_") {
239            // libgit2: remote/transport operations contact the network … (incl. submodule clone/update,
240            // which `git_clone`/fetch the subrepo over its remote — `allow_fetch` defaults on; an A/B on
241            // git2 0.20 caught `Submodule::update`/`clone` reporting no `Net`).
242            const NET: &[&str] = &[
243                "git_clone", "git_remote_connect", "git_remote_connect_ext", "git_remote_fetch",
244                "git_remote_download", "git_remote_upload", "git_remote_push", "git_remote_ls",
245                "git_submodule_clone", "git_submodule_update",
246            ];
247            // … and repository/index/odb/checkout/ref/config operations touch the on-disk .git store.
248            const FS: &[&str] = &[
249                "git_repository_open", "git_repository_open_ext", "git_repository_open_bare",
250                "git_repository_init", "git_repository_init_ext", "git_repository_discover",
251                "git_checkout_tree", "git_checkout_head", "git_checkout_index", "git_index_read",
252                "git_index_write", "git_index_write_tree", "git_index_write_tree_to",
253                "git_index_add_bypath", "git_index_add_all", "git_odb_open", "git_odb_read",
254                "git_odb_write", "git_odb_open_wstream", "git_odb_open_rstream",
255                "git_blob_create_fromdisk", "git_blob_create_fromworkdir", "git_blob_create_from_disk",
256                "git_blob_create_from_workdir", "git_blob_create_from_stream", "git_commit_create",
257                "git_commit_create_v", "git_reference_create", "git_reference_set_target",
258                "git_reference_delete", "git_config_open_default", "git_config_open_ondisk",
259                "git_config_add_file_ondisk", "git_tag_create", "git_treebuilder_write",
260                "git_packbuilder_write",
261            ];
262            if NET.contains(&leaf) {
263                return Some("Net");
264            }
265            if FS.contains(&leaf) {
266                return Some("Fs");
267            }
268            return None;
269        }
270        if leaf.starts_with("curl_") {
271            // libcurl (under the `curl` crate, called `curl_sys::curl_*`). Only the entry points that
272            // PERFORM network I/O: the blocking transfer (`curl_easy_perform`), raw socket send/recv,
273            // the HTTP/2 keepalive PING (`upkeep`), and the multi-interface transfer pumps. The large
274            // pure surface (setopt/init/cleanup/reset/getinfo/escape/multi_add_handle/fdset/info_read)
275            // stays unclassified, as do `curl_multi_wait`/`poll` (readiness WAIT on sockets, no payload —
276            // the loop's `perform` is the tagged boundary, per the I/O-boundary principle). An A/B on
277            // curl 0.4 caught the whole crate reporting ZERO Net (`Easy::perform` read as pure).
278            const NET: &[&str] = &[
279                "curl_easy_perform", "curl_easy_send", "curl_easy_recv", "curl_easy_upkeep",
280                "curl_multi_perform", "curl_multi_socket_action",
281            ];
282            return NET.contains(&leaf).then_some("Net");
283        }
284        if let Some(op) = leaf.strip_prefix("SSL_") {
285            // OpenSSL (libssl, under the `openssl`/`native-tls` crates, called `ffi::SSL_*`). The TLS
286            // handshake and record I/O run over the peer socket -> Net. Unlike libc read/write, an SSL_*
287            // op is ~always over a network BIO (the rare memory-BIO/sans-IO case is the honest exception
288            // we accept). The crypto surface (EVP_*/SHA*/AES*) and pure setup (SSL_CTX_new/SSL_set_fd) are
289            // NOT here; `BIO_*` is skipped (a BIO may be memory or socket). Validated vs openssl 0.9 source.
290            const SSL_NET: &[&str] = &[
291                "connect", "accept", "do_handshake", "read", "read_ex", "write", "write_ex", "peek",
292                "peek_ex", "shutdown",
293            ];
294            return SSL_NET.contains(&op).then_some("Net");
295        }
296    }
297    // HTTP clients use the same builder pattern as the AWS SDK: only the dispatch is
298    // I/O. (Found by the eval: ebman's reqwest calls to the Anthropic API + webhooks
299    // were silently classified network-free because reqwest wasn't recognized.)
300    if crate_name == "reqwest" || crate_name == "isahc" {
301        // The builder chain is pure; the dispatch (`::send`/`::execute`) is the I/O. PLUS the one-shot
302        // CONVENIENCE functions `reqwest::get` / `reqwest::blocking::get` / `isahc::get`, which send
303        // immediately — they're not the `Client::get` builder (a different path, `reqwest::Client::get`),
304        // so an exact match avoids false-positiving the builder. (Found running on `xh`: a one-shot
305        // `reqwest::get(url)` was classified network-free.)
306        if path.ends_with("::send")
307            || path.ends_with("::execute")
308            || path == "reqwest::get"
309            || path == "reqwest::blocking::get"
310            || path == "isahc::get"
311        {
312            return Some("Net");
313        }
314        return None;
315    }
316    if crate_name == "ureq" && path.ends_with("::call") {
317        return Some("Net");
318    }
319    // The `curl` crate (libcurl's safe binding — cargo's own HTTP client): the dispatch verbs are
320    // `perform` (Easy/Easy2/Transfer/Multi), raw-socket `send`/`recv`, the keepalive `upkeep`, and the
321    // multi-interface `action` (socket_action). The big setopt-style builder surface stays pure.
322    // `Multi::timeout` is deliberately NOT matched: `Easy::timeout` is a pure CURLOPT_TIMEOUT setter
323    // sharing the leaf — an under-report on the rare event-loop kick beats mis-tagging every consumer
324    // that sets a timeout. (Consumer-side companion to the curl_* FFI tier, same A/B finding.)
325    if crate_name == "curl"
326        && (path.ends_with("::perform")
327            || path.ends_with("::send")
328            || path.ends_with("::recv")
329            || path.ends_with("::upkeep")
330            || path.ends_with("::action"))
331    {
332        return Some("Net");
333    }
334    // Message-queue clients fully encapsulate the socket (the underlying tokio::net lives
335    // inside the crate, unseen), so a user's connect/publish/consume calls ARE the I/O
336    // boundary — to a remote broker, hence Net. Match the broker round-trip verbs (snake_case
337    // methods); the CamelCase option/property builders stay pure. (Found hardening on consumer
338    // apps: lapin `basic_publish`/`queue_declare` and async-nats `publish`/`subscribe` were
339    // classified pure — a message-queue client reporting no I/O.)
340    if crate_name == "async_nats" {
341        if path.ends_with("::connect")
342            || path.contains("::publish")
343            || path.ends_with("::subscribe")
344            || path.ends_with("::queue_subscribe")
345            || path.contains("::request")
346            || path.ends_with("::flush")
347        {
348            return Some("Net");
349        }
350        return None;
351    }
352    if crate_name == "lapin" {
353        if path.ends_with("::connect")
354            || path.ends_with("::create_channel")
355            || path.contains("::basic_")
356            || path.contains("::queue_")
357            || path.contains("::exchange_")
358            || path.contains("::tx_")
359            || path.ends_with("::confirm_select")
360            || path.ends_with("::close")
361        {
362            return Some("Net");
363        }
364        return None;
365    }
366    // SMTP email — lettre's `Transport::send` is the network dispatch; Message building is
367    // pure. (Found hardening on a lettre consumer: `mailer.send(&email)` classified pure.)
368    if crate_name == "lettre" {
369        if path.ends_with("::send") || path.ends_with("::send_raw") {
370            return Some("Net");
371        }
372        return None;
373    }
374    // WebSockets — tungstenite (the modern successor to the old `websocket` crate). connect
375    // and the socket read/write/send are network; Message constructors are pure. (Found on a
376    // tungstenite consumer: connect + send + read classified pure.)
377    if crate_name == "tungstenite" {
378        if path.ends_with("::connect")
379            || path.ends_with("::read")
380            || path.ends_with("::write")
381            || path.ends_with("::send")
382            || path.ends_with("::close")
383            || path.ends_with("::flush")
384            || path.ends_with("::read_message")
385            || path.ends_with("::write_message")
386        {
387            return Some("Net");
388        }
389        return None;
390    }
391    // elasticsearch: request builders are pure; only the `.send()` dispatch is HTTP I/O
392    // (same shape as reqwest / the AWS SDK). (Found on an elasticsearch consumer.)
393    if crate_name == "elasticsearch" && path.ends_with("::send") {
394        return Some("Net");
395    }
396    // gRPC — tonic. The transport connect and the Grpc client RPC dispatch are network;
397    // codecs and request/response wrappers are pure. (connect repro-confirmed on a consumer;
398    // the unary/streaming RPC verbs are from the tonic::client::Grpc API.)
399    if crate_name == "tonic" {
400        if path.ends_with("::connect")
401            || path.ends_with("::unary")
402            || path.ends_with("::server_streaming")
403            || path.ends_with("::client_streaming")
404            || path.ends_with("::streaming")
405        {
406            return Some("Net");
407        }
408        return None;
409    }
410    // Kafka — rdkafka (FFI to librdkafka). Producer send + consumer poll/recv/subscribe/
411    // commit are network round-trips to the brokers. (API-calibrated + unit-tested; a real
412    // repro needs librdkafka/cmake, deferred.)
413    if crate_name == "rdkafka" {
414        if path.ends_with("::send")
415            || path.ends_with("::send_result")
416            || path.ends_with("::recv")
417            || path.ends_with("::poll")
418            || path.ends_with("::subscribe")
419            || path.ends_with("::commit")
420            || path.ends_with("::commit_message")
421            || path.ends_with("::commit_consumer_state")
422            || path.ends_with("::store_offset")
423            || path.ends_with("::seek")
424            || path.ends_with("::fetch_metadata")
425            || path.ends_with("::fetch_watermarks")
426            || path.ends_with("::flush")
427        {
428            return Some("Net");
429        }
430        return None;
431    }
432    // cap-std: capability-oriented std. I/O goes *through* a held capability handle
433    // (Dir/Pool/Clock/...), so these calls ARE the effect. Recognising them means a
434    // cap-std project's real I/O is detected and matches the capability it declared
435    // (via `declared_caps`/`capstd_cap`) — conformance against unforgeable capabilities.
436    if crate_name.starts_with("cap_") {
437        if path.contains("::net::Unix") || path.contains("::os::") {
438            return Some("Ipc");
439        }
440        if path.contains("::net") {
441            return Some("Net");
442        }
443        if path.contains("::time") {
444            return Some("Clock");
445        }
446        if path.contains("::fs") || crate_name == "cap_tempfile" || crate_name == "cap_directories" {
447            return Some("Fs");
448        }
449        return None;
450    }
451    // Local IPC (Unix-domain sockets) is I/O but not *network* — keep it distinct so
452    // CANDOR_NO_AMBIENT and audits don't conflate it with internet access. async-std puts its
453    // Unix sockets under `os::unix::net` (mirroring std); async-net (smol's net layer) under
454    // `unix`.
455    if path.starts_with("tokio::net::Unix")
456        || path.starts_with("std::os::unix::net")
457        || path.starts_with("async_std::os::unix::net")
458        || path.starts_with("async_net::unix")
459    {
460        return Some("Ipc");
461    }
462    // Raw packet capture / raw sockets — libpnet (the dominant low-level networking crate; powers
463    // bandwhich, sniffers, custom-protocol tools). `datalink::channel` opens an L2 socket and
464    // `transport::transport_channel` an L3/L4 raw socket — both ARE network I/O. Packet construction
465    // (pnet_packet / pnet_base, MacAddr, Ethernet frames…) is pure and stays unclassified. The actual
466    // frame read/write happens via methods on the returned Sender/Receiver (trait-object dispatch the
467    // syntactic backend can't resolve), so the channel-open call is the precise Net boundary. (Found
468    // scanning bandwhich — a packet sniffer — which reported Net 0.)
469    if crate_name == "pnet" || crate_name == "pnet_datalink" || crate_name == "pnet_transport" {
470        if path.ends_with("::channel") || path.ends_with("::transport_channel") {
471            return Some("Net");
472        }
473        return None;
474    }
475    // Directory traversal — `ignore` (BurntSushi's gitignore-aware walker; powers ripgrep, fd). The walk
476    // EXECUTORS read the directory tree from disk = Fs. Type-precise on purpose: the configuration builders
477    // (`OverrideBuilder::build`, `GitignoreBuilder::build`, the `WalkBuilder` setters) and `DirEntry`
478    // accessors are PURE — only `WalkBuilder::build`/`build_parallel` (which kick off the walk) and
479    // `WalkParallel::run` (which drives it) touch the filesystem. A bare `build` would wrongly flag the
480    // config builders. (Found scanning fd — a file finder — which reported Fs 2: its own `fs::read_dir`
481    // was caught, but the `ignore`-based traversal that IS fd was invisible cross-crate.)
482    if crate_name == "ignore" {
483        if path == "ignore::WalkBuilder::build"
484            || path == "ignore::WalkBuilder::build_parallel"
485            || path.ends_with("::WalkParallel::run")
486        {
487            return Some("Fs");
488        }
489        return None;
490    }
491    // Filesystem watching — `notify` (the de-facto fs-watch crate: watchexec, cargo-watch, mdbook). A
492    // watcher opens an OS notification handle (inotify / FSEvents / kqueue / ReadDirectoryChanges) and
493    // registers paths — observing filesystem state changes = Fs. The lifecycle boundary: any
494    // `*Watcher::new` constructor (RecommendedWatcher/PollWatcher/INotifyWatcher/FsEventWatcher/…), the
495    // `recommended_watcher` convenience fn, and the `watch`/`unwatch` registration verbs. `Config`/`Event`/
496    // `EventKind` data types stay pure. (Found scanning watchexec: its watcher-`create` read Fs 0.)
497    if crate_name == "notify" {
498        if path.ends_with("Watcher::new")
499            || path.ends_with("::recommended_watcher")
500            || path.ends_with("::watch")
501            || path.ends_with("::unwatch")
502        {
503            return Some("Fs");
504        }
505        return None;
506    }
507    // Raw sockets. Match the I/O *types* only — `std::net` also holds pure data types
508    // (SocketAddr, IpAddr, …) whose construction must NOT be flagged.
509    if path.starts_with("std::net::TcpStream")
510        || path.starts_with("std::net::TcpListener")
511        || path.starts_with("std::net::UdpSocket")
512        || path.starts_with("tokio::net::")
513    {
514        return Some("Net");
515    }
516    // Legacy tokio 0.1 socket crates — `tokio_tcp`/`tokio_udp` are *entirely* networking
517    // (no pure types to over-flag), so the whole crate is Net. (Found hardening on websocat,
518    // which is still on tokio 0.1: its `tokio_tcp::TcpStream::connect` was classified
519    // network-free — a network tool confidently reporting 0 Net.)
520    if matches!(crate_name, "tokio_tcp" | "tokio_udp") {
521        return Some("Net");
522    }
523    // The other async runtimes mirror tokio's module layout, and their `net` modules hold only
524    // socket I/O types (the pure `SocketAddr`/`IpAddr` are re-exports that resolve to `std::net`,
525    // so they're excluded by def-path). `mio` is the low-level non-blocking-socket layer under
526    // tokio/others; `async_net` is smol's net crate. Closes the async-std/smol/mio gap the
527    // tokio_tcp note flagged. (Calibrated by module structure — these crates ARE networking — not
528    // a live repro; the TCP/UDP types are defined in-crate so the def-path prefix is exact.)
529    if path.starts_with("async_std::net::")
530        || path.starts_with("mio::net::")
531        || crate_name == "async_net"
532    {
533        return Some("Net");
534    }
535    // Database clients. Like the AWS/HTTP builders, only the execution verbs are I/O;
536    // query *construction* is pure. Best-effort across crates (tune via CANDOR_CONFIG).
537    // Note: bare `::query` is deliberately omitted — it executes in postgres/rusqlite but
538    // only *builds* in sqlx, so including it would false-positive sqlx's `query()` builder.
539    if DB_CRATES.contains(&crate_name) {
540        // Postgres / SQLite-family clients: `query`/`batch_execute`/`prepare`/etc. ARE the
541        // execution (round-trips to the server). sqlx is the outlier where bare `query()`
542        // only BUILDS — it keeps the narrow set below. (Found by running on a real
543        // tokio-postgres app, pgman: candor had reported only 4 of ~20 DB call sites.)
544        if matches!(crate_name, "postgres" | "tokio_postgres" | "deadpool_postgres" | "rusqlite") {
545            const PG: [&str; 19] = [
546                "::query", "::query_one", "::query_opt", "::query_raw", "::execute",
547                "::batch_execute", "::simple_query", "::prepare", "::prepare_typed",
548                "::copy_in", "::copy_out", "::transaction", "::connect",
549                // rusqlite's dialect of the same verbs (a verb-probe found the CANONICAL rusqlite
550                // consumer API classifying pure): `query_row` is the one-row read, `query_map`/
551                // `query_and_then` the many-row reads, `execute_batch` is rusqlite's name for
552                // batch_execute, `prepare_cached` round-trips like prepare. `query_typed` is
553                // tokio_postgres 0.7.10+.
554                "::query_row", "::query_map", "::query_and_then", "::execute_batch",
555                "::prepare_cached", "::query_typed",
556            ];
557            if PG.iter().any(|v| path.ends_with(v)) {
558                return Some("Db");
559            }
560            // rusqlite only: opening the database IS the connection establishment (`Connection::
561            // open`/`open_in_memory`/`open_with_flags` — the embedded analog of `::connect`).
562            if crate_name == "rusqlite"
563                && (path.ends_with("::open")
564                    || path.ends_with("::open_in_memory")
565                    || path.ends_with("::open_with_flags"))
566            {
567                return Some("Db");
568            }
569            return None;
570        }
571        // redis: the way redis is ACTUALLY used is the high-level `Commands`/`AsyncCommands`
572        // traits (`con.get`/`set`/`hset`/`lpush`/…) — every method is a round-trip — plus
573        // connection establishment. The shared VERBS below only catch the low-level
574        // `cmd("GET").query(con)`, so without this a normal redis user's calls classify as
575        // PURE. (Found hardening on redis-rs: a fn doing `con.get`/`set` reported no effects.)
576        if crate_name == "redis"
577            && (path.contains("Commands::")
578                || path.contains("::get_connection")
579                || path.contains("::get_async_connection")
580                || path.contains("::get_multiplexed_async_connection")
581                || path.contains("ConnectionManager")
582                || path.ends_with("::query")
583                || path.ends_with("::query_async")
584                || path.ends_with("::req_command")
585                || path.ends_with("::req_packed_command")
586                || path.ends_with("::req_packed_commands"))
587        {
588            return Some("Db");
589        }
590        // mongodb: a document-store API with none of the SQL verbs — the user calls
591        // `coll.find_one`/`insert_one`/`aggregate`/… and `Client::with_uri_str`. Without
592        // these a mongodb user's calls classify PURE. (Found hardening: a fn doing
593        // `find_one`+`insert_one` reported no effects.) Handle accessors (name/namespace)
594        // and option/doc builders don't match these verbs, so they stay pure.
595        if crate_name == "mongodb" {
596            const MONGO: [&str; 27] = [
597                "::with_uri_str", "::connect", "::find", "::find_one", "::insert_one",
598                "::insert_many", "::update_one", "::update_many", "::delete_one",
599                "::delete_many", "::replace_one", "::aggregate", "::count_documents",
600                "::estimated_document_count", "::count", "::distinct", "::run_command",
601                "::find_one_and_update", "::find_one_and_delete", "::find_one_and_replace",
602                "::list_collections", "::list_collection_names", "::list_databases",
603                "::list_database_names", "::create_collection", "::create_index", "::watch",
604            ];
605            if MONGO.iter().any(|v| path.ends_with(v)) {
606                return Some("Db");
607            }
608            return None;
609        }
610        // mysql / mysql_async: the `query`/`exec` families + `get_conn`/`ping` execute
611        // immediately — no build-then-execute split like sqlx, so matching `::query` is safe
612        // here. Same DB-verb-dialect gap class as redis/mongodb; calibrated from the Queryable
613        // API (unit-tested; a real-app repro is the remaining confirmation).
614        if matches!(crate_name, "mysql" | "mysql_async") {
615            const MY: [&str; 16] = [
616                "::query", "::query_first", "::query_iter", "::query_map", "::query_fold",
617                "::query_drop", "::exec", "::exec_first", "::exec_iter", "::exec_map",
618                "::exec_fold", "::exec_drop", "::exec_batch", "::prep", "::ping", "::get_conn",
619            ];
620            if MY.iter().any(|v| path.ends_with(v)) {
621                return Some("Db");
622            }
623            return None;
624        }
625        // sea_orm: an ORM whose execution is split from building (like sqlx). The query
626        // BUILDERS (`Entity::find`, `Entity::insert`) are pure; execution happens at `.all`/
627        // `.one`/`.count`/`.stream` and `Insert/Update/Delete::exec`. The write path via an
628        // ActiveModel (`model.insert(db)`) executes too — distinguished from the `EntityTrait`
629        // builder by the trait in the path (`ActiveModelTrait::`). (Found hardening on a
630        // sea_orm consumer app: `.all(db)` reads and `ActiveModel::insert` writes were pure.)
631        if crate_name == "sea_orm" {
632            if path.ends_with("::all")
633                || path.ends_with("::one")
634                || path.ends_with("::count")
635                || path.ends_with("::stream")
636                || path.ends_with("::exec")
637                || path.ends_with("::exec_with_returning")
638                || path.ends_with("::exec_without_returning")
639                || path.ends_with("::connect")
640                || path.ends_with("::execute")
641                || path.ends_with("::execute_unprepared")
642                || path.ends_with("::query_one")
643                || path.ends_with("::query_all")
644                || path.ends_with("::fetch_page")
645                || path.ends_with("::num_items")
646                || path.contains("ActiveModelTrait::")
647            {
648                return Some("Db");
649            }
650            return None;
651        }
652        // (Reached by sqlx + diesel — the build-vs-execute-split crates.) `first` is diesel's
653        // LIMIT-1 round trip and `load_iter` its 2.x streaming execution; `fetch_many` is sqlx's
654        // multi-result stream. All crate-gated, so a std `Vec::first` never resolves here.
655        const VERBS: [&str; 19] = [
656            "::execute", "::query_row", "::query_map", "::query_one", "::fetch_one",
657            "::fetch_all", "::fetch_optional", "::fetch", "::fetch_many", "::connect",
658            "::acquire", "::begin", "::commit", "::rollback", "::load", "::load_iter",
659            "::first", "::get_result", "::get_results",
660        ];
661        if VERBS.iter().any(|v| path.ends_with(v)) {
662            return Some("Db");
663        }
664        return None;
665    }
666    // std::path::Path / PathBuf STAT-family methods hit the filesystem (each is a stat/readlink/
667    // readdir syscall) — unlike the rest of the std::path surface, which is pure string manipulation
668    // (join/file_name/extension/parent/…). Verb-precise so the scanner's receiver inference can safely
669    // route a `path.symlink_metadata()` method call here. (A blackout screen caught gix-dir — an entire
670    // directory WALKER — reporting ZERO Fs because all its I/O is Path-method calls; same class as
671    // fd's residual `Path::symlink_metadata` under-report.)
672    if let Some(m) = path
673        .strip_prefix("std::path::Path::")
674        .or_else(|| path.strip_prefix("std::path::PathBuf::"))
675    {
676        const STAT: &[&str] = &[
677            "metadata", "symlink_metadata", "canonicalize", "read_link", "read_dir", "exists",
678            "try_exists", "is_file", "is_dir", "is_symlink",
679        ];
680        return STAT.contains(&m).then_some("Fs");
681    }
682    // Filesystem. `tokio::fs`/`async_std::fs` are the async mirrors of `std::fs`; `async_fs` is
683    // smol's fs crate; `fs_err` is a drop-in `std::fs` wrapper (its whole surface is fs I/O).
684    if path.starts_with("std::fs::")
685        || path.starts_with("tokio::fs::")
686        || path.starts_with("async_std::fs::")
687        || crate_name == "async_fs"
688        || crate_name == "fs_err"
689        || crate_name == "memmap2"
690    {
691        return Some("Fs");
692    }
693    // tempfile: creating a temp file/dir touches the disk. Match the create/persist verbs (the
694    // `Builder` setters — prefix/suffix/rand_bytes — stay pure). `persist`/`keep` rename/retain
695    // the file on disk; `close` removes it.
696    if crate_name == "tempfile"
697        && (path.ends_with("::tempfile")
698            || path.ends_with("::tempfile_in")
699            || path.ends_with("::tempdir")
700            || path.ends_with("::tempdir_in")
701            || path.ends_with("NamedTempFile::new")
702            || path.ends_with("NamedTempFile::new_in")
703            || path.ends_with("TempDir::new")
704            || path.ends_with("TempDir::new_in")
705            || path.ends_with("::persist")
706            || path.ends_with("::persist_noclobber")
707            || path.ends_with("::keep"))
708    {
709        return Some("Fs");
710    }
711    // glob: walks the filesystem to expand a pattern (the returned iterator reads directories).
712    // `Pattern::matches` is pure string matching — match only the directory-walking entry points.
713    if crate_name == "glob" && (path.ends_with("::glob") || path.ends_with("::glob_with")) {
714        return Some("Fs");
715    }
716    // Randomness / entropy. `getrandom`/`fastrand` are effectful end-to-end. `rand` is NOT — it
717    // mixes entropy/generation (effectful) with *pure* distribution constructors (`Uniform::new`,
718    // `Normal::new`) and deterministic-seed constructors (`seed_from_u64`). Flagging the whole crate
719    // over-reported those as `Rand`; match only the calls that actually consume randomness — the
720    // entropy sources (`OsRng`, `thread_rng`/`rng`, `from_entropy`/`from_os_rng`) and the generation
721    // verbs (`gen*`/`random*`/`fill*`/`sample*`/`next_u*`). A `Uniform::new` is now correctly pure.
722    if crate_name == "getrandom" || crate_name == "fastrand" {
723        return Some("Rand");
724    }
725    if crate_name == "rand" {
726        let rng_verb = path.ends_with("::gen")
727            || path.ends_with("::gen_range")
728            || path.ends_with("::gen_bool")
729            || path.ends_with("::gen_ratio")
730            || path.ends_with("::random")
731            || path.ends_with("::random_range")
732            || path.ends_with("::random_bool")
733            || path.ends_with("::random_ratio")
734            || path.ends_with("::random_iter") // rand 0.9 iterator generator
735            || path.ends_with("::gen_iter")
736            || path.ends_with("::fill")
737            || path.ends_with("::fill_bytes")
738            || path.ends_with("::try_fill")
739            || path.ends_with("::try_fill_bytes")
740            || path.ends_with("::sample")
741            || path.ends_with("::sample_iter")
742            || path.ends_with("::next_u32")
743            || path.ends_with("::next_u64")
744            || path.ends_with("::thread_rng")
745            || path.ends_with("::rng")
746            || path.ends_with("::from_entropy")
747            || path.ends_with("::from_os_rng");
748        if rng_verb || path.contains("OsRng") {
749            return Some("Rand");
750        }
751        return None;
752    }
753    // Subprocess spawning. `tokio::process` is the async mirror of `std::process` — it exists
754    // only to spawn/control subprocesses (`Command`/`Child`, no pure data types like std's
755    // `Stdio`/`ExitStatus`/`exit`), so spawning through it is Exec just the same. Without this an
756    // async app's `tokio::process::Command::new(..).spawn()` classified pure — a silent under-report
757    // of subprocess execution, the dangerous direction (mirrors the tokio::fs/tokio::net coverage).
758    if path.starts_with("std::process::Command")
759        || path.starts_with("std::process::Child")
760        || path.starts_with("tokio::process::Command")
761        || path.starts_with("tokio::process::Child")
762        || path.starts_with("async_std::process::Command")
763        || path.starts_with("async_std::process::Child")
764        || crate_name == "async_process"
765        || crate_name == "portable_pty"
766    {
767        return Some("Exec");
768    }
769    // duct: a subprocess-orchestration crate. `cmd()`/`cmd!` only *build* an Expression; the
770    // spawn/wait happens at `run`/`read`/`start`. Match the execution verbs, not the builder.
771    if crate_name == "duct"
772        && (path.ends_with("::run")
773            || path.ends_with("::read")
774            || path.ends_with("::start")
775            || path.ends_with("::read_chars"))
776    {
777        return Some("Exec");
778    }
779    if path.starts_with("std::env::") {
780        return Some("Env");
781    }
782    // dotenvy / dotenv: load environment variables (reading a `.env` file and mutating the process
783    // environment). Match the load/read entry points; `Error`/builder types stay pure.
784    if matches!(crate_name, "dotenvy" | "dotenv")
785        && (path.ends_with("::dotenv")
786            || path.ends_with("::dotenv_override")
787            || path.ends_with("::from_path")
788            || path.ends_with("::from_path_override")
789            || path.ends_with("::from_filename")
790            || path.ends_with("::from_filename_override")
791            || path.ends_with("::from_read")
792            || path.ends_with("::from_read_override")
793            || path.ends_with("::load")
794            || path.ends_with("::var")
795            || path.ends_with("::vars"))
796    {
797        return Some("Env");
798    }
799    // Wall-clock reads. Match the `now` accessor precisely (ends_with), not any path
800    // containing the substring "now". The `time` crate (distinct from `std::time`/`chrono`)
801    // reads the clock via `now_utc`/`now_local` (and the deprecated `Instant::now`).
802    if (crate_name == "chrono" || path.starts_with("std::time::")) && path.ends_with("::now") {
803        return Some("Clock");
804    }
805    if crate_name == "time"
806        && (path.ends_with("::now_utc") || path.ends_with("::now_local") || path.ends_with("::now"))
807    {
808        return Some("Clock");
809    }
810    if crate_name == "tracing" {
811        return Some("Log");
812    }
813    // The `log` facade: its macros route through `log::__private_api`; the crate's types
814    // (`Level`, `LevelFilter`) are pure, so match the logging entry, not the whole crate.
815    if crate_name == "log" && path.contains("::__private_api") {
816        return Some("Log");
817    }
818    // Compiler diagnostic emission — the ONE genuinely effectful operation in the otherwise-pure
819    // rustc_* surface (a dylint lint's actual OUTPUT: it writes warnings/errors to the compiler's
820    // diagnostic sink). Classified `Log` (same family as `tracing`/`log` — program output). Match the
821    // emission verbs precisely; rustc_lint/rustc_errors are mostly pure types (Lint, LintId, the Diag
822    // BUILDERS), and only the terminal `emit`/`emit_span_lint` actually produces output.
823    if crate_name == "rustc_lint"
824        && (path.ends_with("::emit_span_lint")
825            || path.ends_with("::span_lint")
826            || path.ends_with("::span_lint_hir"))
827    {
828        return Some("Log");
829    }
830    if crate_name == "rustc_errors"
831        && (path.ends_with("::emit")
832            || path.ends_with("::emit_diagnostic")
833            || path.ends_with("::emit_now"))
834    {
835        return Some("Log");
836    }
837    if crate_name == "arboard" {
838        return Some("Clipboard");
839    }
840    None
841}
842
843pub fn cap_from_name(name: &str) -> Option<&'static str> {
844    EFFECTS.iter().copied().find(|e| *e == name)
845}
846
847/// Map a cap-std capability *type* to the effect it authorises. Holding one of these
848/// (e.g. `&Dir`) is the real, unforgeable right to perform that effect — so candor
849/// treats it as a declared capability, exactly like its own `&Fs` token.
850pub fn capstd_cap(crate_name: &str, type_name: &str) -> Option<&'static str> {
851    if !crate_name.starts_with("cap_") {
852        return None;
853    }
854    Some(match type_name {
855        "Dir" => "Fs",
856        "TcpListener" | "TcpStream" | "UdpSocket" | "Pool" => "Net",
857        "UnixListener" | "UnixStream" | "UnixDatagram" => "Ipc",
858        "SystemClock" | "MonotonicClock" => "Clock",
859        _ => return None,
860    })
861}
862
863#[cfg(test)]
864mod tests {
865    use super::*;
866
867    #[test]
868    fn db_crates_are_calibrated() {
869        // The calibrated set must cover every DB client the classifier knows, or the receipt's coverage
870        // check would flag a recognized crate as a blind spot. (Was nightly-lint-only; now runs on stable.)
871        for c in DB_CRATES {
872            assert!(
873                CALIBRATED_CRATES.contains(&c),
874                "DB crate `{c}` is matched by classify() but missing from CALIBRATED_CRATES"
875            );
876        }
877    }
878
879    #[test]
880    fn calibrated_crates_are_live() {
881        // Conversely, every crate advertised as calibrated must actually be matched by classify() for
882        // some representative path — a dead entry would silently suppress a real coverage warning.
883        for c in CALIBRATED_CRATES {
884            assert!(
885                CALIBRATION_PROBE_TAILS.iter().any(|t| classify(c, &format!("{c}{t}")).is_some()),
886                "calibrated crate `{c}` is matched by no path in classify() — dead list entry"
887            );
888        }
889    }
890
891    #[test]
892    fn classify_core_effects() {
893        // A representative smoke test of the classifier's main families, so the published crate is not
894        // shipped untested (these used to live only in the nightly-only src/lib.rs).
895        assert_eq!(classify("std", "std::fs::read_to_string"), Some("Fs"));
896        // std::path stat-family methods are Fs (each is a stat/readdir syscall); the pure
897        // string-manipulation surface stays unclassified (the blackout screen's gix-dir find).
898        assert_eq!(classify("std", "std::path::Path::symlink_metadata"), Some("Fs"));
899        assert_eq!(classify("std", "std::path::PathBuf::read_dir"), Some("Fs"));
900        assert_eq!(classify("std", "std::path::Path::exists"), Some("Fs"));
901        assert_eq!(classify("std", "std::path::Path::join"), None); // pure string manipulation
902        assert_eq!(classify("std", "std::path::PathBuf::file_name"), None);
903        assert_eq!(classify("std", "std::path::Path::parent"), None);
904        assert_eq!(classify("std", "std::process::Command::new"), Some("Exec"));
905        assert_eq!(classify("std", "std::env::var"), Some("Env"));
906        assert_eq!(classify("reqwest", "reqwest::Client::execute"), Some("Net"));
907        // one-shot convenience fns send immediately → Net; the `Client::get` builder stays pure.
908        assert_eq!(classify("reqwest", "reqwest::get"), Some("Net"));
909        assert_eq!(classify("reqwest", "reqwest::blocking::get"), Some("Net"));
910        assert_eq!(classify("reqwest", "reqwest::Client::get"), None);
911        assert_eq!(classify("reqwest", "reqwest::RequestBuilder::header"), None);
912        // nix routes through the libc syscall table (same leaves): I/O classified, generic fd ops skipped.
913        assert_eq!(classify("nix", "nix::fcntl::open"), Some("Fs"));
914        assert_eq!(classify("nix", "nix::sys::socket::connect"), Some("Net"));
915        assert_eq!(classify("nix", "nix::unistd::execvp"), Some("Exec"));
916        assert_eq!(classify("nix", "nix::unistd::write"), None); // generic fd op — deliberately unclassified
917        assert_eq!(classify("nix", "nix::unistd::getpid"), None); // not I/O
918        // rustix does raw syscalls (no libc underneath) → classified directly by leaf, same table.
919        assert_eq!(classify("rustix", "rustix::time::clock_settime"), Some("Clock"));
920        assert_eq!(classify("rustix", "rustix::fs::symlink"), Some("Fs"));
921        assert_eq!(classify("rustix", "rustix::net::connect"), Some("Net"));
922        assert_eq!(classify("rustix", "rustix::io::read"), None); // generic fd op
923        // pnet raw packet capture: channel openers are Net, packet construction stays pure.
924        assert_eq!(classify("pnet", "pnet::datalink::channel"), Some("Net"));
925        assert_eq!(classify("pnet", "pnet::transport::transport_channel"), Some("Net"));
926        assert_eq!(classify("pnet_datalink", "pnet_datalink::channel"), Some("Net"));
927        assert_eq!(classify("pnet", "pnet::packet::ethernet::EthernetPacket::new"), None);
928        assert_eq!(classify("pnet_base", "pnet_base::MacAddr::new"), None);
929        // ignore (gitignore-aware walker): walk executors are Fs, config builders stay pure.
930        assert_eq!(classify("ignore", "ignore::WalkBuilder::build_parallel"), Some("Fs"));
931        assert_eq!(classify("ignore", "ignore::WalkBuilder::build"), Some("Fs"));
932        assert_eq!(classify("ignore", "ignore::WalkParallel::run"), Some("Fs"));
933        assert_eq!(classify("ignore", "ignore::overrides::OverrideBuilder::build"), None); // pure config
934        assert_eq!(classify("ignore", "ignore::gitignore::GitignoreBuilder::build"), None); // pure config
935        assert_eq!(classify("ignore", "ignore::DirEntry::path"), None); // pure accessor
936        // notify fs-watching: watcher constructors + watch/unwatch are Fs, data types stay pure.
937        assert_eq!(classify("notify", "notify::RecommendedWatcher::new"), Some("Fs"));
938        assert_eq!(classify("notify", "notify::PollWatcher::new"), Some("Fs"));
939        assert_eq!(classify("notify", "notify::recommended_watcher"), Some("Fs"));
940        assert_eq!(classify("notify", "notify::INotifyWatcher::watch"), Some("Fs"));
941        assert_eq!(classify("notify", "notify::Config::default"), None); // pure config
942        assert_eq!(classify("notify", "notify::Event::new"), None); // pure data type
943        assert_eq!(classify("rusqlite", "rusqlite::Connection::execute"), Some("Db"));
944        // the rusqlite verb DIALECT (a verb probe found the canonical consumer API classifying pure):
945        assert_eq!(classify("rusqlite", "rusqlite::Connection::query_row"), Some("Db"));
946        assert_eq!(classify("rusqlite", "rusqlite::Statement::query_map"), Some("Db"));
947        assert_eq!(classify("rusqlite", "rusqlite::Connection::execute_batch"), Some("Db"));
948        assert_eq!(classify("rusqlite", "rusqlite::Connection::prepare_cached"), Some("Db"));
949        assert_eq!(classify("rusqlite", "rusqlite::Connection::open"), Some("Db"));
950        assert_eq!(classify("rusqlite", "rusqlite::Connection::open_in_memory"), Some("Db"));
951        // …but `open` stays rusqlite-only (postgres has no open; nothing else may borrow it):
952        assert_eq!(classify("postgres", "postgres::Client::open"), None);
953        assert_eq!(classify("tokio_postgres", "tokio_postgres::Client::query_typed"), Some("Db"));
954        // diesel's LIMIT-1 + streaming executions; sqlx's multi-result stream:
955        assert_eq!(classify("diesel", "diesel::RunQueryDsl::first"), Some("Db"));
956        assert_eq!(classify("diesel", "diesel::RunQueryDsl::load_iter"), Some("Db"));
957        assert_eq!(classify("sqlx", "sqlx::query::Query::fetch_many"), Some("Db"));
958        // sqlx's bare `query()` builder must STAY pure (the original sqlx lesson):
959        assert_eq!(classify("sqlx", "sqlx::query"), None);
960        assert_eq!(classify("tracing", "tracing::event"), Some("Log"));
961        // FFI tiers (matched by distinctive leaf, alias-independent)
962        assert_eq!(classify("libc", "libc::open"), Some("Fs"));
963        assert_eq!(classify("libc", "libc::connect"), Some("Net"));
964        assert_eq!(classify("libc", "libc::read"), None); // generic fd op — deliberately unclassified
965        assert_eq!(classify("ffi", "ffi::sqlite3_step"), Some("Db"));
966        assert_eq!(classify("raw", "raw::git_remote_fetch"), Some("Net"));
967        // libgit2 clone + submodule clone/update fetch over the network (an A/B on git2 0.20 caught
968        // `Submodule::update`/`clone` and `Repository::clone` reporting no Net — the latter because the
969        // `src/build.rs` module was being dropped as if it were the Cargo build script).
970        assert_eq!(classify("raw", "raw::git_clone"), Some("Net"));
971        assert_eq!(classify("raw", "raw::git_submodule_clone"), Some("Net"));
972        assert_eq!(classify("raw", "raw::git_submodule_update"), Some("Net"));
973        assert_eq!(classify("raw", "raw::git_submodule_open"), None); // local subrepo open — not Net
974        // libcurl: the transfer/raw-socket entry points are Net (an A/B on curl 0.4 caught the whole
975        // crate reporting ZERO Net); the big setopt/init/getinfo surface — and the readiness-wait
976        // multi_wait/poll — stay unclassified (the loop's perform is the boundary).
977        assert_eq!(classify("curl_sys", "curl_sys::curl_easy_perform"), Some("Net"));
978        assert_eq!(classify("curl_sys", "curl_sys::curl_easy_send"), Some("Net"));
979        assert_eq!(classify("curl_sys", "curl_sys::curl_multi_perform"), Some("Net"));
980        assert_eq!(classify("curl_sys", "curl_sys::curl_multi_socket_action"), Some("Net"));
981        assert_eq!(classify("curl_sys", "curl_sys::curl_easy_setopt"), None); // in-memory option write
982        assert_eq!(classify("curl_sys", "curl_sys::curl_easy_init"), None); // handle alloc
983        assert_eq!(classify("curl_sys", "curl_sys::curl_multi_wait"), None); // readiness wait, no payload
984        // consumer-side `curl` crate rule: the dispatch verbs are Net, the setopt builders pure.
985        assert_eq!(classify("curl", "curl::easy::Easy::perform"), Some("Net"));
986        assert_eq!(classify("curl", "curl::multi::Multi::perform"), Some("Net"));
987        assert_eq!(classify("curl", "curl::easy::Easy::send"), Some("Net"));
988        assert_eq!(classify("curl", "curl::easy::Easy::url"), None); // CURLOPT setter — pure
989        assert_eq!(classify("curl", "curl::easy::Easy::timeout"), None); // pure setter; Multi::timeout under-reported by design
990        assert_eq!(classify("ffi", "ffi::SSL_connect"), Some("Net"));
991        // pure crates stay pure
992        assert_eq!(classify("serde", "serde::Serialize::serialize"), None);
993        assert_eq!(classify("std", "std::vec::Vec::push"), None);
994    }
995}