pub mod divergence;
pub use divergence::*;
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
use std::path::{Path, PathBuf};
use dirtydata_core::actions::node_name;
use dirtydata_core::hash;
use dirtydata_core::ir::Graph;
use dirtydata_core::types::{ConfidenceScore, ConfigValue, Hash, NodeKind, StableId, Timestamp};
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Observation {
pub target: StableId,
pub target_name: String,
pub confidence: ConfidenceScore,
pub evidence: Evidence,
pub timestamp: Timestamp,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum Evidence {
FileHashMatch { path: PathBuf, hash: Hash },
FileStatOnly {
path: PathBuf,
size: u64,
mtime: u64,
},
FileHashMismatch {
path: PathBuf,
expected: Hash,
actual: Hash,
},
ExtensionUnknown { path: PathBuf, ext: String },
InherentNondeterminism { plugin_name: String },
InferredFromContext(String),
Unobservable(String),
}
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct ObserverState {
pub observations: HashMap<StableId, Observation>,
}
pub struct Observer;
impl Observer {
pub fn observe_graph(graph: &Graph, project_root: &Path) -> ObserverState {
let mut state = ObserverState::default();
for (id, node) in &graph.nodes {
let name = node_name(node);
match &node.kind {
NodeKind::Source => {
if let Some(ConfigValue::String(file_path)) = node.config.get("file") {
let path = project_root.join(file_path);
let expected_hash_str = node.config.get("expected_hash").and_then(|v| {
if let ConfigValue::String(s) = v {
Some(s)
} else {
None
}
});
let obs = Self::observe_file(
*id,
name.clone(),
&path,
expected_hash_str.map(|s| s.as_str()),
);
state.observations.insert(*id, obs);
} else {
state.observations.insert(
*id,
Observation {
target: *id,
target_name: name,
confidence: ConfidenceScore::Unknown,
evidence: Evidence::Unobservable("Live audio input".into()),
timestamp: Timestamp(
std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.unwrap()
.as_millis() as i64,
),
},
);
}
}
NodeKind::Foreign(plugin_name) => {
state.observations.insert(
*id,
Observation {
target: *id,
target_name: name,
confidence: ConfidenceScore::Suspicious,
evidence: Evidence::InherentNondeterminism {
plugin_name: plugin_name.clone(),
},
timestamp: Timestamp(
std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.unwrap()
.as_millis() as i64,
),
},
);
}
_ => {
state.observations.insert(
*id,
Observation {
target: *id,
target_name: name,
confidence: ConfidenceScore::Inferred,
evidence: Evidence::InferredFromContext(
"Internal deterministic node".into(),
),
timestamp: Timestamp(
std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.unwrap()
.as_millis() as i64,
),
},
);
}
}
}
state
}
pub fn observe_file(
target: StableId,
target_name: String,
path: &Path,
expected_hash_hex: Option<&str>,
) -> Observation {
let ts = std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.unwrap()
.as_millis() as i64;
if !path.exists() {
return Observation {
target,
target_name,
confidence: ConfidenceScore::Unknown,
evidence: Evidence::Unobservable(format!("File not found: {}", path.display())),
timestamp: Timestamp(ts),
};
}
let meta = match std::fs::metadata(path) {
Ok(m) => m,
Err(e) => {
return Observation {
target,
target_name,
confidence: ConfidenceScore::Unknown,
evidence: Evidence::Unobservable(format!("Cannot read metadata: {}", e)),
timestamp: Timestamp(ts),
}
}
};
let ext = path.extension().and_then(|s| s.to_str()).unwrap_or("");
if ext != "wav" && ext != "flac" && ext != "json" {
return Observation {
target,
target_name,
confidence: ConfidenceScore::Suspicious,
evidence: Evidence::ExtensionUnknown {
path: path.to_path_buf(),
ext: ext.to_string(),
},
timestamp: Timestamp(ts),
};
}
if let Some(expected_hex) = expected_hash_hex {
let file_bytes = match std::fs::read(path) {
Ok(b) => b,
Err(_) => {
return Observation {
target,
target_name,
confidence: ConfidenceScore::Unknown,
evidence: Evidence::Unobservable("Failed to read file contents".into()),
timestamp: Timestamp(ts),
}
}
};
let actual_hash = hash::hash_bytes(&file_bytes);
let mut expected_hash = [0u8; 32];
let valid_hex =
expected_hex.len() == 64 && expected_hex.chars().all(|c| c.is_ascii_hexdigit());
if valid_hex {
for i in 0..32 {
if let Ok(b) = u8::from_str_radix(&expected_hex[i * 2..i * 2 + 2], 16) {
expected_hash[i] = b;
}
}
if expected_hash == actual_hash {
return Observation {
target,
target_name,
confidence: ConfidenceScore::Verified,
evidence: Evidence::FileHashMatch {
path: path.to_path_buf(),
hash: actual_hash,
},
timestamp: Timestamp(ts),
};
} else {
return Observation {
target,
target_name,
confidence: ConfidenceScore::Unknown,
evidence: Evidence::FileHashMismatch {
path: path.to_path_buf(),
expected: expected_hash,
actual: actual_hash,
},
timestamp: Timestamp(ts),
};
}
}
}
let mtime = meta
.modified()
.unwrap_or(std::time::SystemTime::UNIX_EPOCH)
.duration_since(std::time::UNIX_EPOCH)
.unwrap()
.as_secs();
Observation {
target,
target_name,
confidence: ConfidenceScore::Inferred,
evidence: Evidence::FileStatOnly {
path: path.to_path_buf(),
size: meta.len(),
mtime,
},
timestamp: Timestamp(ts),
}
}
}
impl ObserverState {
pub fn save(&self, project_root: &Path) -> Result<(), std::io::Error> {
let path = project_root.join(".dirtydata").join("observations.json");
let data = serde_json::to_string_pretty(self)?;
std::fs::write(path, data)
}
pub fn load(project_root: &Path) -> Result<Self, std::io::Error> {
let path = project_root.join(".dirtydata").join("observations.json");
if !path.exists() {
return Ok(Self::default());
}
let data = std::fs::read_to_string(path)?;
let state = serde_json::from_str(&data)?;
Ok(state)
}
}