use std::collections::BTreeMap;
use external_dataflow_engine::{
ifds_gpu::{ifds_gpu_step, IfdsShape, OP_ID as WEIR_IFDS_GPU_OP_ID},
reachability_witness::{ExtractedPath, PathSeed},
};
use vyre_foundation::ir::Program;
use crate::{
dataflow::{DynamicPrimitiveSoundness, Soundness},
security::facts::{
AnalysisFact, AnalysisFactError, AnalysisFactTable, AnalysisSourceSpan, FactId, FactKind,
FindingProofBundle,
SourceToSinkFindingRequest,
},
};
pub const WEIR_IFDS_SECURITY_BACKEND_ID: &str = "weir-ifds-gpu";
#[derive(Clone, Debug, Eq, PartialEq)]
pub struct WeirIfdsSecurityBuffers {
pub pg_edge_offsets: String,
pub pg_edge_targets: String,
pub pg_edge_kind_mask: String,
pub pg_node_tags: String,
pub fact_ids: String,
pub fact_kinds: String,
pub fact_subjects: String,
pub fact_objects: String,
pub frontier_in: String,
pub frontier_out: String,
}
impl WeirIfdsSecurityBuffers {
#[allow(clippy::too_many_arguments)]
#[must_use]
pub fn new(
pg_edge_offsets: impl Into<String>,
pg_edge_targets: impl Into<String>,
pg_edge_kind_mask: impl Into<String>,
pg_node_tags: impl Into<String>,
fact_ids: impl Into<String>,
fact_kinds: impl Into<String>,
fact_subjects: impl Into<String>,
fact_objects: impl Into<String>,
frontier_in: impl Into<String>,
frontier_out: impl Into<String>,
) -> Self {
Self {
pg_edge_offsets: pg_edge_offsets.into(),
pg_edge_targets: pg_edge_targets.into(),
pg_edge_kind_mask: pg_edge_kind_mask.into(),
pg_node_tags: pg_node_tags.into(),
fact_ids: fact_ids.into(),
fact_kinds: fact_kinds.into(),
fact_subjects: fact_subjects.into(),
fact_objects: fact_objects.into(),
frontier_in: frontier_in.into(),
frontier_out: frontier_out.into(),
}
}
fn validate(&self) -> Result<(), WeirIfdsSecurityRouteError> {
for (field, value) in [
("pg_edge_offsets", &self.pg_edge_offsets),
("pg_edge_targets", &self.pg_edge_targets),
("pg_edge_kind_mask", &self.pg_edge_kind_mask),
("pg_node_tags", &self.pg_node_tags),
("fact_ids", &self.fact_ids),
("fact_kinds", &self.fact_kinds),
("fact_subjects", &self.fact_subjects),
("fact_objects", &self.fact_objects),
("frontier_in", &self.frontier_in),
("frontier_out", &self.frontier_out),
] {
if value.trim().is_empty() {
return Err(WeirIfdsSecurityRouteError::MissingBuffer { field });
}
}
if self.frontier_in == self.frontier_out {
return Err(WeirIfdsSecurityRouteError::AliasedFrontierBuffers {
buffer: self.frontier_in.clone(),
});
}
Ok(())
}
}
#[derive(Clone, Debug, PartialEq)]
pub struct WeirIfdsSecurityDispatch {
pub query_id: String,
pub backend_id: String,
pub shape: IfdsShape,
pub node_count: u32,
pub buffers: WeirIfdsSecurityBuffers,
pub source_fact_id: FactId,
pub sink_fact_id: FactId,
pub witness_seeds: Vec<PathSeed>,
pub primitive_soundness: Vec<DynamicPrimitiveSoundness>,
}
impl WeirIfdsSecurityDispatch {
pub fn step_program(&self) -> Result<Program, WeirIfdsSecurityRouteError> {
ifds_gpu_step(self.shape, &self.buffers.frontier_in, &self.buffers.frontier_out)
.map_err(|reason| WeirIfdsSecurityRouteError::BuildProgram { reason })
}
}
#[derive(Clone, Debug, Eq, PartialEq, thiserror::Error)]
pub enum WeirIfdsSecurityRouteError {
#[error(transparent)]
InvalidFacts {
#[from]
source: AnalysisFactError,
},
#[error("missing Weir IFDS buffer `{field}`. Fix: provide graph and fact buffers before routing through IFDS.")]
MissingBuffer {
field: &'static str,
},
#[error("frontier buffer `{buffer}` is aliased. Fix: use distinct IFDS frontier input and output buffers.")]
AliasedFrontierBuffers {
buffer: String,
},
#[error("missing {role} fact {fact_id:?}. Fix: route only fact-backed source-to-sink queries.")]
MissingRoleFact {
role: &'static str,
fact_id: FactId,
},
#[error("{role} fact {fact_id:?} had kind {actual:?}. Fix: normalize source and sink facts before IFDS routing.")]
InvalidRoleFactKind {
role: &'static str,
fact_id: FactId,
actual: FactKind,
},
#[error("{role} fact subject {subject} does not fit a u32 Weir node id. Fix: remap corpus node ids before IFDS routing.")]
NodeIdOverflow {
role: &'static str,
subject: u64,
},
#[error("{role} node {node_id} is outside IFDS node_count {node_count}. Fix: route with a shape matching the graph buffers.")]
NodeOutOfDomain {
role: &'static str,
node_id: u32,
node_count: u32,
},
#[error("{reason}")]
InvalidShape {
reason: String,
},
#[error("{reason}")]
BuildProgram {
reason: String,
},
}
#[derive(Clone, Debug, Eq, PartialEq)]
pub struct SecurityWitnessStatement {
pub adapter: String,
pub description: String,
pub file: String,
pub node_id: u32,
pub byte_start: u32,
pub byte_end: u32,
pub incoming_edge_kind: Option<u32>,
pub source_bytes: Vec<u8>,
}
#[derive(Clone, Debug, Eq, PartialEq)]
pub struct SecurityFindingWitnessPath {
pub finding_id: String,
pub rule_id: String,
pub query_id: String,
pub backend_id: String,
pub soundness: Soundness,
pub source_span: AnalysisSourceSpan,
pub sink_span: AnalysisSourceSpan,
pub edge_kinds: Vec<u32>,
pub statements: Vec<SecurityWitnessStatement>,
}
#[derive(Clone, Debug, Eq, PartialEq, thiserror::Error)]
pub enum SecurityWitnessPathError {
#[error("rule_id is blank. Fix: attach stable rule ids to Weir witness paths.")]
EmptyRuleId,
#[error("Weir witness path is empty. Fix: only attach successful non-empty extracted paths.")]
EmptyExtractedPath,
#[error("edge kind count {edge_kinds} does not match path hop count {hops}. Fix: provide one edge kind per witness transition.")]
EdgeKindCountMismatch {
edge_kinds: usize,
hops: usize,
},
#[error("finding {finding_id} has no `{role}` proof step. Fix: build fact-backed source and sink proof roles before witness attachment.")]
MissingProofRole {
finding_id: String,
role: &'static str,
},
#[error("source bytes for `{file}` are missing. Fix: pass source bytes for every file referenced by the Weir path.")]
MissingSourceBytes {
file: String,
},
#[error("statement span {byte_start}..{byte_end} is invalid for `{file}` with {source_len} bytes. Fix: use the same source snapshot for extraction and reporting.")]
InvalidStatementSpan {
file: String,
byte_start: u32,
byte_end: u32,
source_len: usize,
},
}
pub fn security_witness_path_from_weir(
finding: &FindingProofBundle,
rule_id: impl Into<String>,
extracted_path: &ExtractedPath,
edge_kinds: &[u32],
source_files: &BTreeMap<String, Vec<u8>>,
) -> Result<SecurityFindingWitnessPath, SecurityWitnessPathError> {
let rule_id = rule_id.into();
if rule_id.trim().is_empty() {
return Err(SecurityWitnessPathError::EmptyRuleId);
}
if extracted_path.statements.is_empty() {
return Err(SecurityWitnessPathError::EmptyExtractedPath);
}
let hops = extracted_path.statements.len().saturating_sub(1);
if edge_kinds.len() != hops {
return Err(SecurityWitnessPathError::EdgeKindCountMismatch {
edge_kinds: edge_kinds.len(),
hops,
});
}
let source_span = proof_role_span(finding, "source")?;
let sink_span = proof_role_span(finding, "sink")?;
let mut statements = Vec::with_capacity(extracted_path.statements.len());
for (index, statement) in extracted_path.statements.iter().enumerate() {
let file_bytes =
source_files
.get(&statement.file)
.ok_or_else(|| SecurityWitnessPathError::MissingSourceBytes {
file: statement.file.clone(),
})?;
let start = statement.byte_start as usize;
let end = statement.byte_end as usize;
if end < start || end > file_bytes.len() {
return Err(SecurityWitnessPathError::InvalidStatementSpan {
file: statement.file.clone(),
byte_start: statement.byte_start,
byte_end: statement.byte_end,
source_len: file_bytes.len(),
});
}
statements.push(SecurityWitnessStatement {
adapter: statement.adapter.clone(),
description: statement.description.clone(),
file: statement.file.clone(),
node_id: statement.node_id,
byte_start: statement.byte_start,
byte_end: statement.byte_end,
incoming_edge_kind: index.checked_sub(1).map(|edge_index| edge_kinds[edge_index]),
source_bytes: file_bytes[start..end].to_vec(),
});
}
Ok(SecurityFindingWitnessPath {
finding_id: finding.finding_id.clone(),
rule_id,
query_id: finding.query_id.clone(),
backend_id: finding.backend_id.clone(),
soundness: finding.soundness,
source_span,
sink_span,
edge_kinds: edge_kinds.to_vec(),
statements,
})
}
pub fn route_security_taint_through_weir_ifds(
table: &AnalysisFactTable,
request: &SourceToSinkFindingRequest,
shape: IfdsShape,
buffers: WeirIfdsSecurityBuffers,
) -> Result<WeirIfdsSecurityDispatch, WeirIfdsSecurityRouteError> {
table.validate()?;
buffers.validate()?;
let node_count = shape
.node_count()
.map_err(|reason| WeirIfdsSecurityRouteError::InvalidShape { reason })?;
let source = require_role_fact(table, request.source_fact_id, "source", FactKind::Source)?;
let sink = require_role_fact(table, request.sink_fact_id, "sink", FactKind::Sink)?;
let source_node = fact_node_id(source, "source", node_count)?;
let sink_node = fact_node_id(sink, "sink", node_count)?;
let witness_seed = PathSeed {
source_file: fact_file(source),
source_node,
sink_file: fact_file(sink),
sink_node,
};
Ok(WeirIfdsSecurityDispatch {
query_id: WEIR_IFDS_GPU_OP_ID.to_string(),
backend_id: WEIR_IFDS_SECURITY_BACKEND_ID.to_string(),
shape,
node_count,
buffers,
source_fact_id: source.id,
sink_fact_id: sink.id,
witness_seeds: vec![witness_seed],
primitive_soundness: vec![DynamicPrimitiveSoundness::new(
WEIR_IFDS_GPU_OP_ID,
Soundness::Exact,
)],
})
}
fn require_role_fact<'a>(
table: &'a AnalysisFactTable,
fact_id: FactId,
role: &'static str,
expected: FactKind,
) -> Result<&'a AnalysisFact, WeirIfdsSecurityRouteError> {
let fact = table
.get(fact_id)
.ok_or(WeirIfdsSecurityRouteError::MissingRoleFact { role, fact_id })?;
if fact.kind != expected {
return Err(WeirIfdsSecurityRouteError::InvalidRoleFactKind {
role,
fact_id,
actual: fact.kind,
});
}
Ok(fact)
}
fn fact_node_id(
fact: &AnalysisFact,
role: &'static str,
node_count: u32,
) -> Result<u32, WeirIfdsSecurityRouteError> {
let node_id =
u32::try_from(fact.subject).map_err(|_| WeirIfdsSecurityRouteError::NodeIdOverflow {
role,
subject: fact.subject,
})?;
if node_id >= node_count {
return Err(WeirIfdsSecurityRouteError::NodeOutOfDomain {
role,
node_id,
node_count,
});
}
Ok(node_id)
}
fn fact_file(fact: &AnalysisFact) -> String {
fact.payload
.get("file")
.or_else(|| fact.payload.get("path"))
.cloned()
.unwrap_or_else(|| format!("file:{}", fact.span.file_id))
}
fn proof_role_span(
finding: &FindingProofBundle,
role: &'static str,
) -> Result<AnalysisSourceSpan, SecurityWitnessPathError> {
finding
.proof_path
.iter()
.find(|step| step.role == role)
.map(|step| step.span.clone())
.ok_or_else(|| SecurityWitnessPathError::MissingProofRole {
finding_id: finding.finding_id.clone(),
role,
})
}