use std::borrow::Cow;
use std::sync::Arc;
use pedant_types::{
Capability, CapabilityFinding, CapabilityProfile, ExecutionContext, FindingOrigin,
SourceLocation,
};
use crate::ir::FileIr;
type PrefixValidator = fn(&str, &str) -> bool;
const NETWORK_PREFIXES: &[(&str, Capability)] = &[
("std::net", Capability::Network),
("tokio::net", Capability::Network),
("reqwest", Capability::Network),
("hyper", Capability::Network),
("ureq", Capability::Network),
("curl", Capability::Network),
("tungstenite", Capability::Network),
];
const FILESYSTEM_PREFIXES: &[(&str, Capability)] = &[
("std::fs", Capability::FileRead),
("tokio::fs", Capability::FileRead),
("walkdir", Capability::FileRead),
("glob", Capability::FileRead),
("tempfile", Capability::FileWrite),
];
const PROCESS_PREFIXES: &[(&str, Capability)] = &[
("std::process", Capability::ProcessExec),
("tokio::process", Capability::ProcessExec),
("duct", Capability::ProcessExec),
];
const ENV_PREFIXES: &[(&str, Capability)] = &[
("std::env::var", Capability::EnvAccess),
("std::env::vars", Capability::EnvAccess),
("dotenvy", Capability::EnvAccess),
("envy", Capability::EnvAccess),
];
const FFI_PREFIXES: &[(&str, Capability)] = &[
("libc", Capability::Ffi),
("nix", Capability::Ffi),
("winapi", Capability::Ffi),
("windows_sys", Capability::Ffi),
];
const CRYPTO_PREFIXES: &[(&str, Capability)] = &[
("ring", Capability::Crypto),
("rustls", Capability::Crypto),
("openssl", Capability::Crypto),
("aes", Capability::Crypto),
("sha2", Capability::Crypto),
("hmac", Capability::Crypto),
("ed25519_dalek", Capability::Crypto),
("x25519_dalek", Capability::Crypto),
];
const SYSTEM_TIME_PREFIXES: &[(&str, Capability)] = &[
("std::time::SystemTime", Capability::SystemTime),
("std::time::Instant", Capability::SystemTime),
("chrono", Capability::SystemTime),
("time", Capability::SystemTime),
];
const ALL_PREFIX_TABLES: &[&[(&str, Capability)]] = &[
NETWORK_PREFIXES,
FILESYSTEM_PREFIXES,
PROCESS_PREFIXES,
ENV_PREFIXES,
FFI_PREFIXES,
CRYPTO_PREFIXES,
SYSTEM_TIME_PREFIXES,
];
const FS_WRITE_SUFFIXES: &[&str] = &[
"copy",
"create_dir",
"create_dir_all",
"hard_link",
"remove_dir",
"remove_dir_all",
"remove_file",
"rename",
"set_permissions",
"write",
];
const FS_WRITE_PREFIXES: &[&str] = &["std::fs::", "tokio::fs::"];
fn is_fs_write_function(path: &str) -> bool {
FS_WRITE_PREFIXES.iter().any(|prefix| {
path.strip_prefix(prefix)
.is_some_and(|suffix| FS_WRITE_SUFFIXES.contains(&suffix))
})
}
use crate::ir::PATH_SEPARATOR;
fn path_matches_prefix(path: &str, prefix: &str) -> bool {
path == prefix
|| (path.starts_with(prefix)
&& path
.as_bytes()
.get(prefix.len()..prefix.len() + PATH_SEPARATOR.len())
== Some(PATH_SEPARATOR.as_bytes()))
}
fn resolve_capabilities(path: &str) -> Option<Capability> {
if is_fs_write_function(path) {
return Some(Capability::FileWrite);
}
ALL_PREFIX_TABLES
.iter()
.flat_map(|table| table.iter())
.find_map(|(prefix, capability)| path_matches_prefix(path, prefix).then_some(*capability))
}
const URL_SCHEMES: &[&str] = &["http://", "https://", "ws://", "wss://"];
fn check_string_for_endpoint(value: &str) -> bool {
if value.len() < 8 {
return false;
}
URL_SCHEMES.iter().any(|s| value.starts_with(s))
|| looks_like_ipv4(value)
|| looks_like_ipv6(value)
}
fn strip_port_suffix(s: &str) -> Option<&str> {
let pos = s.rfind(':')?;
let (host, port) = s.split_at(pos);
port[1..].parse::<u16>().ok().map(|_| host)
}
fn looks_like_ipv4(s: &str) -> bool {
let host = match (s.rfind(':'), strip_port_suffix(s)) {
(Some(_), Some(h)) => h,
(Some(_), None) => return false,
(None, _) => s,
};
let mut parts = host.split('.');
let mut count = 0;
let valid = parts.all(|p| {
count += 1;
p.parse::<u8>().is_ok()
});
valid && count == 4
}
fn extract_ipv6_body(s: &str) -> &str {
match s.strip_prefix('[') {
Some(inner) => inner
.strip_suffix(']')
.or_else(|| inner.rfind("]:").map(|pos| &inner[..pos]))
.unwrap_or(inner),
None => s,
}
}
fn looks_like_ipv6(s: &str) -> bool {
let trimmed = extract_ipv6_body(s);
if trimmed.len() < 3 {
return false;
}
let mut groups = trimmed.split(':');
let mut count = 0;
let valid = groups.all(|g| {
count += 1;
g.is_empty() || g.chars().all(|c| c.is_ascii_hexdigit())
});
valid && count > 2
}
fn check_string_for_pem(value: &str) -> bool {
value.contains("-----BEGIN ")
}
pub fn truncate_evidence(value: &str) -> Cow<'_, str> {
match value.len() <= 40 {
true => Cow::Borrowed(value),
false => {
let char_count = value.chars().count();
let tail_offset = char_count.saturating_sub(4);
let mut indices = value.char_indices();
let head_end = indices.nth(16).map_or(value.len(), |(i, _)| i);
let skip = tail_offset.saturating_sub(17);
let tail_start = indices.nth(skip).map_or(0, |(i, _)| i);
Cow::Owned(format!("{}…{}", &value[..head_end], &value[tail_start..]))
}
}
}
fn check_string_for_hex_key(value: &str) -> bool {
let len = value.len();
if len < 64 || len % 2 != 0 {
return false;
}
if !value.bytes().all(|b| b.is_ascii_hexdigit()) {
return false;
}
matches!(len, 64 | 96) || len >= 128
}
const fn base58_table() -> [bool; 256] {
let mut table = [false; 256];
let alphabet = b"123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz";
let mut i = 0;
while i < alphabet.len() {
table[alphabet[i] as usize] = true;
i += 1;
}
table
}
const BASE58_TABLE: [bool; 256] = base58_table();
fn is_base58(value: &str) -> bool {
value.bytes().all(|b| BASE58_TABLE[b as usize])
}
fn check_string_for_base58_key(value: &str) -> bool {
let len = value.len();
match value.as_bytes().first() {
Some(b'5' | b'K' | b'L') if (51..=52).contains(&len) => is_base58(value),
_ if (64..=88).contains(&len) => is_base58(value),
_ => false,
}
}
fn check_string_for_key_prefix(value: &str) -> bool {
const KEY_PREFIXES: &[(&str, PrefixValidator)] = &[
("AGE-SECRET-KEY-1", |v, _| v.len() > 16),
("xprv", |v, suffix| v.len() >= 111 && is_base58(suffix)),
("ed25519:", |v, _| v.len() > 8),
("0x", |v, suffix| {
v.len() == 66 && suffix.bytes().all(|b| b.is_ascii_hexdigit())
}),
];
KEY_PREFIXES.iter().any(|(prefix, validate)| {
value
.strip_prefix(prefix)
.is_some_and(|suffix| validate(value, suffix))
})
}
fn validate_sk_suffix(_full: &str, suffix: &str) -> bool {
suffix.len() >= 24 && suffix.bytes().all(|b| b.is_ascii_alphanumeric())
}
fn check_string_for_credential_prefix(value: &str) -> bool {
const CREDENTIAL_PREFIXES: &[(&str, PrefixValidator)] = &[
("AKIA", |v, suffix| {
v.len() == 20
&& suffix
.bytes()
.all(|b| b.is_ascii_uppercase() || b.is_ascii_digit())
}),
("sk_live_", validate_sk_suffix),
("sk_test_", validate_sk_suffix),
("sk-", validate_sk_suffix),
];
match starts_with_github_prefix(value) {
true => value.len() == 40 && value[4..].bytes().all(|b| b.is_ascii_alphanumeric()),
false => CREDENTIAL_PREFIXES.iter().any(|(prefix, validate)| {
value
.strip_prefix(prefix)
.is_some_and(|suffix| validate(value, suffix))
}),
}
}
fn starts_with_github_prefix(value: &str) -> bool {
matches!(value.get(..4), Some("ghp_" | "gho_" | "ghs_" | "ghr_"))
}
struct FindingEmitter<'a> {
findings: &'a mut Vec<CapabilityFinding>,
file: &'a Arc<str>,
origin: FindingOrigin,
execution_context: Option<ExecutionContext>,
}
impl FindingEmitter<'_> {
fn emit(&mut self, capability: Capability, line: usize, column: usize, evidence: &str) {
self.findings.push(CapabilityFinding {
capability,
location: SourceLocation {
file: Arc::clone(self.file),
line,
column: column + 1,
},
evidence: Arc::from(evidence),
origin: Some(self.origin),
language: None,
execution_context: self.execution_context,
reachable: None,
});
}
fn emit_from_facts<'a, T: 'a>(
&mut self,
facts: &'a [T],
mut mapper: impl FnMut(&'a T) -> Option<(Capability, usize, usize, &'a str)>,
) {
for fact in facts {
if let Some((capability, line, column, evidence)) = mapper(fact) {
self.emit(capability, line, column, evidence);
}
}
}
}
const KEY_MATERIAL_CHECKS: &[fn(&str) -> bool] = &[
check_string_for_hex_key,
check_string_for_base58_key,
check_string_for_key_prefix,
check_string_for_credential_prefix,
];
type StringLiteralCheck = (fn(&str) -> bool, Capability);
const STRING_LITERAL_CHECKS: &[StringLiteralCheck] = &[
(check_string_for_endpoint, Capability::Network),
(check_string_for_pem, Capability::Crypto),
];
pub fn detect_capabilities(
ir: &FileIr,
execution_context: Option<ExecutionContext>,
) -> CapabilityProfile {
let file_path = &ir.file_path;
let mut findings = Vec::new();
detect_use_paths(ir, file_path, execution_context, &mut findings);
detect_unsafe_sites(ir, file_path, execution_context, &mut findings);
detect_extern_blocks(ir, file_path, execution_context, &mut findings);
detect_attributes(ir, file_path, execution_context, &mut findings);
detect_string_literals(ir, file_path, execution_context, &mut findings);
CapabilityProfile {
findings: findings.into_boxed_slice(),
}
}
fn detect_use_paths(
ir: &FileIr,
file_path: &Arc<str>,
execution_context: Option<ExecutionContext>,
findings: &mut Vec<CapabilityFinding>,
) {
let mut emitter = FindingEmitter {
findings,
file: file_path,
origin: FindingOrigin::Import,
execution_context,
};
emitter.emit_from_facts(&ir.use_paths, |use_path| {
resolve_capabilities(&use_path.path).map(|cap| {
(
cap,
use_path.span.line,
use_path.span.column,
use_path.path.as_ref(),
)
})
});
}
fn detect_unsafe_sites(
ir: &FileIr,
file_path: &Arc<str>,
execution_context: Option<ExecutionContext>,
findings: &mut Vec<CapabilityFinding>,
) {
let mut emitter = FindingEmitter {
findings,
file: file_path,
origin: FindingOrigin::CodeSite,
execution_context,
};
emitter.emit_from_facts(&ir.unsafe_sites, |site| {
Some((
Capability::UnsafeCode,
site.span.line,
site.span.column,
site.evidence.as_ref(),
))
});
}
fn detect_extern_blocks(
ir: &FileIr,
file_path: &Arc<str>,
execution_context: Option<ExecutionContext>,
findings: &mut Vec<CapabilityFinding>,
) {
let mut emitter = FindingEmitter {
findings,
file: file_path,
origin: FindingOrigin::CodeSite,
execution_context,
};
emitter.emit_from_facts(&ir.extern_blocks, |block| {
Some((
Capability::Ffi,
block.span.line,
block.span.column,
"extern block",
))
});
}
fn detect_attributes(
ir: &FileIr,
file_path: &Arc<str>,
execution_context: Option<ExecutionContext>,
findings: &mut Vec<CapabilityFinding>,
) {
let mut emitter = FindingEmitter {
findings,
file: file_path,
origin: FindingOrigin::Attribute,
execution_context,
};
emitter.emit_from_facts(&ir.attributes, |attr| {
let (cap, evidence) = match &*attr.name {
"link" => (Capability::Ffi, "#[link]"),
"proc_macro" => (Capability::ProcMacro, "#[proc_macro]"),
"proc_macro_derive" => (Capability::ProcMacro, "#[proc_macro_derive]"),
"proc_macro_attribute" => (Capability::ProcMacro, "#[proc_macro_attribute]"),
_ => return None,
};
Some((cap, attr.span.line, attr.span.column, evidence))
});
}
fn detect_string_literals(
ir: &FileIr,
file_path: &Arc<str>,
execution_context: Option<ExecutionContext>,
findings: &mut Vec<CapabilityFinding>,
) {
let mut emitter = FindingEmitter {
findings,
file: file_path,
origin: FindingOrigin::StringLiteral,
execution_context,
};
for lit in &ir.string_literals {
let line = lit.span.line;
let column = lit.span.column;
if let Some(&(_, capability)) = STRING_LITERAL_CHECKS
.iter()
.find(|&&(checker, _)| checker(&lit.value))
{
emitter.emit(capability, line, column, &lit.value);
}
if KEY_MATERIAL_CHECKS.iter().any(|check| check(&lit.value)) {
let evidence = truncate_evidence(&lit.value);
emitter.emit(Capability::Crypto, line, column, &evidence);
}
}
}