pyrograph 0.1.0

GPU-accelerated taint analysis for supply chain malware detection
Documentation
use super::*;
use crate::labels::{label_node, SanitizerDef, SinkDef, SourceDef};

pub fn default_label_set() -> LabelSet {
    LabelSet {
        sources: vec![
            // Match both fully-qualified and short names
            SourceDef { id: "rust-env-var".into(), pattern: "env::var".into(), category: "credential".into() },
            SourceDef { id: "rust-env-vars".into(), pattern: "env::vars".into(), category: "credential".into() },
            SourceDef { id: "rust-fs-read-to-string".into(), pattern: "read_to_string".into(), category: "file".into() },
            SourceDef { id: "rust-fs-read".into(), pattern: "fs::read".into(), category: "file".into() },
            SourceDef { id: "rust-home-dir".into(), pattern: "dirs::home_dir".into(), category: "system".into() },
            SourceDef { id: "rust-config-dir".into(), pattern: "dirs::config_dir".into(), category: "system".into() },
            SourceDef { id: "rust-cargo-creds".into(), pattern: ".cargo/credentials".into(), category: "sensitive-file".into() },
            SourceDef { id: "rust-ssh-key".into(), pattern: ".ssh/id_".into(), category: "sensitive-file".into() },
            SourceDef { id: "rust-ssh-dir".into(), pattern: ".ssh/".into(), category: "sensitive-file".into() },
            SourceDef { id: "rust-npmrc".into(), pattern: ".npmrc".into(), category: "sensitive-file".into() },
            SourceDef { id: "rust-gitconfig".into(), pattern: ".gitconfig".into(), category: "sensitive-file".into() },
                        SourceDef { id: "rust-etc-shadow".into(), pattern: "/etc/shadow".into(), category: "sensitive-file".into() },
            // Network read operations as sources — data FROM the network is attacker-controlled
            SourceDef { id: "rust-tcp-read".into(), pattern: ".read".into(), category: "network-input".into() },
            SourceDef { id: "rust-recv-from".into(), pattern: ".recv_from".into(), category: "network-input".into() },
            SourceDef { id: "rust-recv".into(), pattern: ".recv".into(), category: "network-input".into() },
            // URLs in build.rs are suspicious — legit builds use crate deps, not raw HTTP
            SourceDef { id: "rust-http-url".into(), pattern: "http://".into(), category: "network-input".into() },
            SourceDef { id: "rust-https-url".into(), pattern: "https://".into(), category: "network-input".into() },
            // Shell commands as sources (running curl/wget in build.rs IS the attack)
            SourceDef { id: "rust-curl".into(), pattern: "curl".into(), category: "shell".into() },
            SourceDef { id: "rust-wget".into(), pattern: "wget".into(), category: "shell".into() },
            SourceDef { id: "rust-sh".into(), pattern: "/bin/sh".into(), category: "shell".into() },
            SourceDef { id: "rust-bash".into(), pattern: "/bin/bash".into(), category: "shell".into() },
            // Compile-time sources — content injected during build
            SourceDef { id: "rust-include".into(), pattern: "include!".into(), category: "file".into() },
            SourceDef { id: "rust-include-bytes".into(), pattern: "include_bytes!".into(), category: "file".into() },
            SourceDef { id: "rust-env-macro".into(), pattern: "env!".into(), category: "credential".into() },
            SourceDef { id: "rust-out-dir".into(), pattern: "OUT_DIR".into(), category: "system".into() },
        ],
        sinks: vec![
            SinkDef { id: "rust-libc-socket".into(), pattern: "socket".into(), category: "network".into() },
            SinkDef { id: "rust-libc-connect".into(), pattern: "connect".into(), category: "network".into() },
            SinkDef { id: "rust-libc-dup2".into(), pattern: "dup2".into(), category: "system".into() },
            SinkDef { id: "rust-libc-execve".into(), pattern: "execve".into(), category: "exec".into() },
            SinkDef { id: "rust-nix-openpty".into(), pattern: "openpty".into(), category: "system".into() },
            SinkDef { id: "rust-ssh-connect".into(), pattern: "client::connect".into(), category: "network".into() },
            SinkDef { id: "rust-unix-stream-connect".into(), pattern: "UnixStream::connect".into(), category: "network".into() },

            // Match both fully-qualified and short names (after `use` imports)
            SinkDef { id: "rust-command-new".into(), pattern: "Command::new".into(), category: "exec".into() },
            SinkDef { id: "rust-command-spawn".into(), pattern: ".spawn".into(), category: "exec".into() },
            SinkDef { id: "rust-command-output".into(), pattern: ".output".into(), category: "exec".into() },
            // .status removed: too generic (matches HTTP response.status()).
            // Command execution is already caught by Command::new + .spawn/.output.
            SinkDef { id: "rust-tcp-connect".into(), pattern: "TcpStream::connect".into(), category: "network".into() },
            SinkDef { id: "rust-udp-bind".into(), pattern: "UdpSocket::bind".into(), category: "network".into() },
            SinkDef { id: "rust-reqwest-get".into(), pattern: "reqwest::get".into(), category: "network".into() },
            SinkDef { id: "rust-reqwest-blocking-get".into(), pattern: "reqwest::blocking::get".into(), category: "network".into() },
            SinkDef { id: "rust-reqwest-blocking-post".into(), pattern: "reqwest::blocking::post".into(), category: "network".into() },
            SinkDef { id: "rust-client-post".into(), pattern: "Client::post".into(), category: "network".into() },
            SinkDef { id: "rust-post-method".into(), pattern: ".post".into(), category: "network".into() },
            SinkDef { id: "rust-send-method".into(), pattern: ".send".into(), category: "network".into() },
            SinkDef { id: "rust-fs-write".into(), pattern: "fs::write".into(), category: "file".into() },
            SinkDef { id: "rust-file-create".into(), pattern: "File::create".into(), category: "file".into() },
            SinkDef { id: "rust-open-options-new".into(), pattern: "OpenOptions::new".into(), category: "file".into() },
            SinkDef { id: "rust-open-options-open".into(), pattern: ".open".into(), category: "file".into() },
            SinkDef { id: "rust-write-all".into(), pattern: "write_all".into(), category: "network".into() },
            SinkDef { id: "rust-send-to".into(), pattern: "send_to".into(), category: "network".into() },
            SinkDef { id: "rust-include-str".into(), pattern: "include_str!".into(), category: "file".into() },
            // libc/syscall sinks — low-level OS interaction
            SinkDef { id: "rust-libc-syscall".into(), pattern: "syscall".into(), category: "exec".into() },
            SinkDef { id: "rust-libc-ptrace".into(), pattern: "ptrace".into(), category: "exec".into() },
            SinkDef { id: "rust-libc-mprotect".into(), pattern: "mprotect".into(), category: "exec".into() },
            SinkDef { id: "rust-libc-dlopen".into(), pattern: "dlopen".into(), category: "exec".into() },
            SinkDef { id: "rust-libc-memfd".into(), pattern: "memfd_create".into(), category: "exec".into() },
        ],
        sanitizers: vec![
            SanitizerDef { id: "rust-parse".into(), pattern: "parse".into() },
            SanitizerDef { id: "rust-from-str".into(), pattern: "from_str".into() },
            SanitizerDef { id: "rust-file-name".into(), pattern: ".file_name".into() },
        ],
    }
}

pub(super) fn apply_labels(graph: &mut TaintGraph, label_set: &LabelSet) {
    for id in 0..graph.node_count() as u32 {
        let Some(node) = graph.node(id) else {
            continue;
        };
        let label = label_node(label_set, &node.name)
            .or_else(|| node.alias.as_deref().and_then(|alias| label_node(label_set, alias)));
        if let Some(label) = label {
            if let Some(node_mut) = graph.node_mut(id) {
                node_mut.label = Some(label);
            }
        }
    }
}