pyrograph 0.1.0

GPU-accelerated taint analysis for supply chain malware detection
Documentation
use crate::labels::{LabelSet, TaintLabel};
use std::sync::Arc;

pub type NodeId = u32;

#[derive(Debug, Clone, PartialEq, Eq)]
pub enum NodeKind {
    Variable,
    Call,
    Import,
    Literal,
}

#[derive(Debug, Clone, PartialEq, Eq)]
pub enum EdgeKind {
    Assignment,
    Call,
    Return,
    Argument,
}

#[derive(Debug, Clone)]
pub struct Node {
    pub id: NodeId,
    pub kind: NodeKind,
    pub name: String,
    pub alias: Option<String>,
    pub label: Option<TaintLabel>,
}

#[derive(Debug, Clone)]
pub struct Edge {
    pub from: NodeId,
    pub to: NodeId,
    pub kind: EdgeKind,
}

#[derive(Debug)]
pub struct TaintGraph {
    nodes: Vec<Node>,
    adjacency: Vec<Vec<EdgeTarget>>,
    labels: Option<Arc<LabelSet>>,
}

#[derive(Debug, Clone)]
struct EdgeTarget {
    to: NodeId,
    kind: EdgeKind,
}

impl Default for TaintGraph {
    fn default() -> Self {
        Self::new()
    }
}

impl TaintGraph {
    pub fn new() -> Self {
        Self {
            nodes: Vec::new(),
            adjacency: Vec::new(),
            labels: None,
        }
    }

    pub fn add_node(&mut self, kind: NodeKind, name: String, label: Option<TaintLabel>) -> NodeId {
        let id = self.nodes.len() as NodeId;
        self.nodes.push(Node {
            id,
            kind,
            name,
            alias: None,
            label,
        });
        self.adjacency.push(Vec::new());
        id
    }

    pub fn add_edge(&mut self, from: NodeId, to: NodeId, kind: EdgeKind) {
        if let Some(adj) = self.adjacency.get_mut(from as usize) {
            adj.push(EdgeTarget { to, kind });
        }
    }

    pub fn sources(&self) -> impl Iterator<Item = &Node> {
        self.nodes.iter().filter(|n| {
            n.label.as_ref().is_some_and(|l| l.is_source())
        })
    }

    pub fn sinks(&self) -> impl Iterator<Item = &Node> {
        self.nodes.iter().filter(|n| {
            n.label.as_ref().is_some_and(|l| l.is_sink())
        })
    }

    pub fn node(&self, id: NodeId) -> Option<&Node> {
        self.nodes.get(id as usize)
    }

    pub fn neighbors(&self, id: NodeId) -> impl Iterator<Item = NodeId> + '_ {
        self.adjacency.get(id as usize)
            .into_iter()
            .flat_map(|targets| targets.iter().map(|t| t.to))
    }

    pub fn edges_from(&self, id: NodeId) -> impl Iterator<Item = (NodeId, EdgeKind)> + '_ {
        self.adjacency
            .get(id as usize)
            .into_iter()
            .flat_map(|targets| targets.iter().map(|t| (t.to, t.kind.clone())))
    }

    /// Flatten graph into GPU-ready buffers (CSR format)
    /// node_buf: [kind: u8, label_type: u8, label_id: u16] packed into u32
    /// edge_offset_buf: [start_index_of_edges_for_node_i]
    /// edge_target_buf: [target_node_id]
    pub fn to_gpu_buffers(&self) -> (Vec<u32>, Vec<u32>, Vec<u32>) {
        let mut node_buf = Vec::with_capacity(self.nodes.len());
        let mut edge_offset_buf = Vec::with_capacity(self.nodes.len() + 1);
        let mut edge_target_buf = Vec::new();

        let mut current_offset = 0u32;
        for (i, node) in self.nodes.iter().enumerate() {
            // Pack node data
            let packed = pack_node_data(node);
            node_buf.push(packed);

            edge_offset_buf.push(current_offset);
            if let Some(adj) = self.adjacency.get(i) {
                for target in adj {
                    if matches!(target.kind, EdgeKind::Assignment | EdgeKind::Argument | EdgeKind::Return) {
                        edge_target_buf.push(target.to);
                    }
                }
                current_offset += adj.iter().filter(|t| matches!(t.kind, EdgeKind::Assignment | EdgeKind::Argument | EdgeKind::Return)).count() as u32;
            }
        }
        edge_offset_buf.push(current_offset);

        (node_buf, edge_offset_buf, edge_target_buf)
    }

    pub fn node_count(&self) -> usize {
        self.nodes.len()
    }

    pub fn nodes(&self) -> &[Node] {
        &self.nodes
    }

    pub fn node_mut(&mut self, id: NodeId) -> Option<&mut Node> {
        self.nodes.get_mut(id as usize)
    }

    pub fn set_label_set(&mut self, label_set: LabelSet) {
        self.labels = Some(Arc::new(label_set));
    }

    pub fn label_set(&self) -> Option<&LabelSet> {
        self.labels.as_deref()
    }
}

fn pack_node_data(node: &Node) -> u32 {
    let kind_val = match node.kind {
        NodeKind::Variable => 1,
        NodeKind::Call => 2,
        NodeKind::Import => 3,
        NodeKind::Literal => 4,
    } as u32;

    let (label_type, label_id) = match node.label {
        None => (0u32, 0u32),
        Some(TaintLabel::Source(s)) => (1, s as u32),
        Some(TaintLabel::Sink(s)) => (2, s as u32),
        Some(TaintLabel::Both(src, snk)) => (3, (src as u32) << 8 | (snk as u32)),
        Some(TaintLabel::Sanitizer(s)) => (4, s as u32),
    };

    (kind_val << 24) | (label_type << 16) | (label_id & 0xFFFF)
}

pub fn unpack_label_type(packed: u32) -> u32 {
    (packed >> 16) & 0xFF
}

pub const LABEL_TYPE_SOURCE: u32 = 1;
pub const LABEL_TYPE_SINK: u32 = 2;
pub const LABEL_TYPE_BOTH: u32 = 3;
pub const LABEL_TYPE_SANITIZER: u32 = 4;