use serde::{Deserialize, Serialize};
use sha2::{Digest, Sha256};
use crate::DeclId;
use crate::flow::{ConstantValue, StringShape, TaintCleanser, TaintKind, ValueSet};
#[derive(Clone, Debug, PartialEq, Eq, Hash, Ord, PartialOrd, Serialize, Deserialize)]
pub struct FactId(pub String);
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum FactKind {
Declaration,
Reference,
DependencyEdge,
DynamicSqlEvidence,
DbLinkReference,
Opacity,
ResolutionReport,
Privilege,
ConstantValue,
ValueSet,
StringShape,
Taint,
Sanitizer,
ExceptionHandler,
CursorForLoop,
MissingInstrumentation,
HardcodedCredential,
InvokerRights,
RefCursorReturn,
DmlInFunction,
UnboundedBulkCollect,
DeprecatedFeature,
DeterministicMisuse,
MutatingTableTrigger,
LogWithoutReraise,
CrossSchemaWrite,
SensitivePublicSynonym,
IsNullOnIndexedColumn,
}
#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
pub struct Fact {
pub id: FactId,
pub kind: FactKind,
pub provenance: FactProvenance,
pub payload: FactPayload,
}
#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
pub struct FactProvenance {
pub component: String,
pub component_version: String,
#[serde(default, skip_serializing_if = "String::is_empty")]
pub run_id: String,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub source_logical_id: Option<String>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub source_file: Option<String>,
}
impl FactProvenance {
#[must_use]
pub fn with_source(
mut self,
source_logical_id: impl Into<String>,
source_file: impl Into<String>,
) -> Self {
self.source_logical_id = Some(source_logical_id.into());
self.source_file = Some(source_file.into());
self
}
}
#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
#[serde(tag = "family", rename_all = "snake_case")]
pub enum FactPayload {
Declaration {
decl: DeclId,
logical_id: String,
},
Reference {
from_decl: DeclId,
to_logical_id: String,
},
DependencyEdge {
from_logical_id: String,
to_logical_id: String,
edge_kind: String,
},
DynamicSqlEvidence {
site: String,
},
DbLinkReference {
object: String,
db_link: String,
},
Opacity {
target_logical_id: String,
reason: String,
},
ResolutionReport {
reference: String,
strategy: String,
},
Privilege {
grantee: String,
privilege: String,
on: String,
},
ConstantValue {
unit_logical_id: String,
name: String,
value: ConstantValue,
},
ValueSet {
unit_logical_id: String,
name: String,
value_set: ValueSet,
},
StringShape {
unit_logical_id: String,
name: String,
shape: StringShape,
},
Taint {
unit_logical_id: String,
name: String,
kinds: Vec<TaintKind>,
},
Sanitizer {
unit_logical_id: String,
name: String,
cleansed_by: Vec<TaintCleanser>,
},
ExceptionHandler {
unit_logical_id: String,
scope: String,
body_class: String,
},
CursorForLoop {
unit_logical_id: String,
loop_var: String,
has_body_dml: bool,
},
MissingInstrumentation {
unit_logical_id: String,
},
HardcodedCredential {
unit_logical_id: String,
marker: String,
},
InvokerRights {
unit_logical_id: String,
},
RefCursorReturn {
unit_logical_id: String,
},
DmlInFunction {
unit_logical_id: String,
},
UnboundedBulkCollect {
unit_logical_id: String,
},
DeprecatedFeature {
unit_logical_id: String,
feature: String,
},
DeterministicMisuse {
unit_logical_id: String,
construct: String,
},
MutatingTableTrigger {
unit_logical_id: String,
table: String,
},
LogWithoutReraise {
unit_logical_id: String,
},
CrossSchemaWrite {
unit_logical_id: String,
target: String,
},
SensitivePublicSynonym {
unit_logical_id: String,
synonym: String,
target: String,
},
IsNullOnIndexedColumn {
unit_logical_id: String,
column: String,
},
}
impl FactPayload {
#[must_use]
pub fn kind(&self) -> FactKind {
match self {
FactPayload::Declaration { .. } => FactKind::Declaration,
FactPayload::Reference { .. } => FactKind::Reference,
FactPayload::DependencyEdge { .. } => FactKind::DependencyEdge,
FactPayload::DynamicSqlEvidence { .. } => FactKind::DynamicSqlEvidence,
FactPayload::DbLinkReference { .. } => FactKind::DbLinkReference,
FactPayload::Opacity { .. } => FactKind::Opacity,
FactPayload::ResolutionReport { .. } => FactKind::ResolutionReport,
FactPayload::Privilege { .. } => FactKind::Privilege,
FactPayload::ConstantValue { .. } => FactKind::ConstantValue,
FactPayload::ValueSet { .. } => FactKind::ValueSet,
FactPayload::StringShape { .. } => FactKind::StringShape,
FactPayload::Taint { .. } => FactKind::Taint,
FactPayload::Sanitizer { .. } => FactKind::Sanitizer,
FactPayload::ExceptionHandler { .. } => FactKind::ExceptionHandler,
FactPayload::CursorForLoop { .. } => FactKind::CursorForLoop,
FactPayload::MissingInstrumentation { .. } => FactKind::MissingInstrumentation,
FactPayload::HardcodedCredential { .. } => FactKind::HardcodedCredential,
FactPayload::InvokerRights { .. } => FactKind::InvokerRights,
FactPayload::RefCursorReturn { .. } => FactKind::RefCursorReturn,
FactPayload::DmlInFunction { .. } => FactKind::DmlInFunction,
FactPayload::UnboundedBulkCollect { .. } => FactKind::UnboundedBulkCollect,
FactPayload::DeprecatedFeature { .. } => FactKind::DeprecatedFeature,
FactPayload::DeterministicMisuse { .. } => FactKind::DeterministicMisuse,
FactPayload::MutatingTableTrigger { .. } => FactKind::MutatingTableTrigger,
FactPayload::LogWithoutReraise { .. } => FactKind::LogWithoutReraise,
FactPayload::CrossSchemaWrite { .. } => FactKind::CrossSchemaWrite,
FactPayload::SensitivePublicSynonym { .. } => FactKind::SensitivePublicSynonym,
FactPayload::IsNullOnIndexedColumn { .. } => FactKind::IsNullOnIndexedColumn,
}
}
}
#[must_use]
pub fn mint_fact(provenance: FactProvenance, payload: FactPayload) -> Fact {
let kind = payload.kind();
let id = compute_fact_id(kind, &provenance, &payload);
Fact {
id,
kind,
provenance,
payload,
}
}
fn compute_fact_id(kind: FactKind, provenance: &FactProvenance, payload: &FactPayload) -> FactId {
let kind_json = serde_json::to_string(&kind).unwrap_or_default();
let prov_json = serde_json::to_string(provenance).unwrap_or_default();
let payload_json = serde_json::to_string(payload).unwrap_or_default();
let mut hasher = Sha256::new();
hasher.update(kind_json.as_bytes());
hasher.update(b"|");
hasher.update(prov_json.as_bytes());
hasher.update(b"|");
hasher.update(payload_json.as_bytes());
let digest = hasher.finalize();
let mut hex = String::with_capacity(5 + digest.len() * 2);
hex.push_str("fact:");
for b in digest {
hex.push_str(&format!("{b:02x}"));
}
FactId(hex)
}
#[derive(Clone, Debug, Default, PartialEq, Eq, Serialize, Deserialize)]
pub struct FactStore {
pub facts: Vec<Fact>,
}
impl FactStore {
pub fn push(&mut self, fact: Fact) -> FactId {
let id = fact.id.clone();
if !self.facts.iter().any(|f| f.id.cmp(&id).is_eq()) {
self.facts.push(fact);
}
id
}
pub fn by_kind(&self, kind: FactKind) -> impl Iterator<Item = &Fact> {
self.facts.iter().filter(move |f| f.kind.cmp(&kind).is_eq())
}
#[must_use]
pub fn len(&self) -> usize {
self.facts.len()
}
#[must_use]
pub fn is_empty(&self) -> bool {
self.facts.is_empty()
}
}
#[cfg(test)]
mod tests {
use super::*;
fn prov() -> FactProvenance {
FactProvenance {
component: "plsql-lineage".into(),
component_version: "0.1.0".into(),
run_id: String::new(),
source_logical_id: None,
source_file: None,
}
}
fn payload() -> FactPayload {
FactPayload::DependencyEdge {
from_logical_id: "hr.foo".into(),
to_logical_id: "hr.bar".into(),
edge_kind: "Calls".into(),
}
}
#[test]
fn mint_fact_produces_fact_prefixed_id() {
let f = mint_fact(prov(), payload());
assert!(f.id.0.starts_with("fact:"));
}
#[test]
fn mint_fact_is_deterministic_for_same_inputs() {
let a = mint_fact(prov(), payload());
let b = mint_fact(prov(), payload());
assert_eq!(a.id, b.id);
}
#[test]
fn mint_fact_changes_id_when_payload_changes() {
let a = mint_fact(prov(), payload());
let mut diff = payload();
if let FactPayload::DependencyEdge { edge_kind, .. } = &mut diff {
*edge_kind = "Reads".into();
}
let b = mint_fact(prov(), diff);
assert_ne!(a.id, b.id);
}
#[test]
fn mint_fact_changes_id_when_provenance_changes() {
let a = mint_fact(prov(), payload());
let mut other_prov = prov();
other_prov.component_version = "9.9.9".into();
let b = mint_fact(other_prov, payload());
assert_ne!(a.id, b.id);
}
#[test]
fn payload_kind_method_returns_matching_family() {
let f = mint_fact(prov(), payload());
assert_eq!(f.kind, FactKind::DependencyEdge);
assert_eq!(f.payload.kind(), FactKind::DependencyEdge);
}
#[test]
fn store_pushes_and_dedupes_by_id() {
let mut store = FactStore::default();
let f = mint_fact(prov(), payload());
store.push(f.clone());
store.push(f);
assert_eq!(store.len(), 1);
}
#[test]
fn store_filters_by_kind() {
let mut store = FactStore::default();
let decl = mint_fact(
prov(),
FactPayload::Declaration {
decl: DeclId::new(1),
logical_id: "hr.foo".into(),
},
);
let edge = mint_fact(prov(), payload());
store.push(decl);
store.push(edge);
assert_eq!(store.by_kind(FactKind::Declaration).count(), 1);
assert_eq!(store.by_kind(FactKind::DependencyEdge).count(), 1);
assert_eq!(store.by_kind(FactKind::Privilege).count(), 0);
}
#[test]
fn fact_serialises_with_family_tag() {
let f = mint_fact(prov(), payload());
let json = serde_json::to_string(&f).unwrap();
assert!(json.contains("\"kind\":\"dependency_edge\""));
assert!(json.contains("\"family\":\"dependency_edge\""));
assert!(json.contains("fact:"));
}
#[test]
fn fact_round_trips_through_serde() {
let f = mint_fact(prov(), payload());
let json = serde_json::to_string(&f).unwrap();
let back: Fact = serde_json::from_str(&json).unwrap();
assert_eq!(back, f);
}
#[test]
fn run_id_omitted_when_empty() {
let f = mint_fact(prov(), payload());
let json = serde_json::to_string(&f).unwrap();
assert!(!json.contains("\"run_id\""));
}
#[test]
fn source_attribution_omitted_when_absent_and_round_trips_when_present() {
let without_source = serde_json::to_string(&mint_fact(prov(), payload())).unwrap();
assert!(!without_source.contains("source_logical_id"));
assert!(!without_source.contains("source_file"));
let with_source = prov().with_source("hr.pkg", "src/hr/pkg.pks");
let fact = mint_fact(with_source, payload());
let json = serde_json::to_string(&fact).unwrap();
assert!(json.contains("\"source_logical_id\":\"hr.pkg\""));
assert!(json.contains("\"source_file\":\"src/hr/pkg.pks\""));
let back: Fact = serde_json::from_str(&json).unwrap();
assert_eq!(back.provenance.source_logical_id.as_deref(), Some("hr.pkg"));
assert_eq!(
back.provenance.source_file.as_deref(),
Some("src/hr/pkg.pks")
);
}
#[test]
fn exception_handler_fact_kind_and_serde() {
let f = mint_fact(
prov(),
FactPayload::ExceptionHandler {
unit_logical_id: "hr.pay_pkg.run".into(),
scope: "others".into(),
body_class: "noop".into(),
},
);
assert_eq!(f.kind, FactKind::ExceptionHandler);
assert_eq!(f.payload.kind(), FactKind::ExceptionHandler);
let json = serde_json::to_string(&f).unwrap();
assert!(json.contains("\"kind\":\"exception_handler\""));
assert!(json.contains("\"family\":\"exception_handler\""));
let back: Fact = serde_json::from_str(&json).unwrap();
assert_eq!(back, f);
let mut store = FactStore::default();
store.push(f);
assert_eq!(store.by_kind(FactKind::ExceptionHandler).count(), 1);
assert_eq!(store.by_kind(FactKind::Privilege).count(), 0);
}
}