1pub mod divergence;
12
13pub use divergence::*;
14use serde::{Deserialize, Serialize};
15use std::collections::HashMap;
16use std::path::{Path, PathBuf};
17
18use dirtydata_core::actions::node_name;
19use dirtydata_core::hash;
20use dirtydata_core::ir::Graph;
21use dirtydata_core::types::{ConfidenceScore, ConfigValue, Hash, NodeKind, StableId, Timestamp};
22
23#[derive(Debug, Clone, Serialize, Deserialize)]
25pub struct Observation {
26 pub target: StableId,
27 pub target_name: String,
28 pub confidence: ConfidenceScore,
29 pub evidence: Evidence,
30 pub timestamp: Timestamp,
31}
32
33#[derive(Debug, Clone, Serialize, Deserialize)]
35pub enum Evidence {
36 FileHashMatch { path: PathBuf, hash: Hash },
38 FileStatOnly {
40 path: PathBuf,
41 size: u64,
42 mtime: u64,
43 },
44 FileHashMismatch {
46 path: PathBuf,
47 expected: Hash,
48 actual: Hash,
49 },
50 ExtensionUnknown { path: PathBuf, ext: String },
52 InherentNondeterminism { plugin_name: String },
54 InferredFromContext(String),
56 Unobservable(String),
58}
59
60#[derive(Debug, Clone, Default, Serialize, Deserialize)]
62pub struct ObserverState {
63 pub observations: HashMap<StableId, Observation>,
65}
66
67pub struct Observer;
68
69impl Observer {
70 pub fn observe_graph(graph: &Graph, project_root: &Path) -> ObserverState {
72 let mut state = ObserverState::default();
73
74 for (id, node) in &graph.nodes {
75 let name = node_name(node);
76
77 match &node.kind {
78 NodeKind::Source => {
80 if let Some(ConfigValue::String(file_path)) = node.config.get("file") {
81 let path = project_root.join(file_path);
82
83 let expected_hash_str = node.config.get("expected_hash").and_then(|v| {
85 if let ConfigValue::String(s) = v {
86 Some(s)
87 } else {
88 None
89 }
90 });
91
92 let obs = Self::observe_file(
93 *id,
94 name.clone(),
95 &path,
96 expected_hash_str.map(|s| s.as_str()),
97 );
98 state.observations.insert(*id, obs);
99 } else {
100 state.observations.insert(
102 *id,
103 Observation {
104 target: *id,
105 target_name: name,
106 confidence: ConfidenceScore::Unknown,
107 evidence: Evidence::Unobservable("Live audio input".into()),
108 timestamp: Timestamp(
109 std::time::SystemTime::now()
110 .duration_since(std::time::UNIX_EPOCH)
111 .unwrap()
112 .as_millis() as i64,
113 ),
114 },
115 );
116 }
117 }
118
119 NodeKind::Foreign(plugin_name) => {
121 state.observations.insert(
122 *id,
123 Observation {
124 target: *id,
125 target_name: name,
126 confidence: ConfidenceScore::Suspicious,
127 evidence: Evidence::InherentNondeterminism {
128 plugin_name: plugin_name.clone(),
129 },
130 timestamp: Timestamp(
131 std::time::SystemTime::now()
132 .duration_since(std::time::UNIX_EPOCH)
133 .unwrap()
134 .as_millis() as i64,
135 ),
136 },
137 );
138 }
139
140 _ => {
142 state.observations.insert(
143 *id,
144 Observation {
145 target: *id,
146 target_name: name,
147 confidence: ConfidenceScore::Inferred,
148 evidence: Evidence::InferredFromContext(
149 "Internal deterministic node".into(),
150 ),
151 timestamp: Timestamp(
152 std::time::SystemTime::now()
153 .duration_since(std::time::UNIX_EPOCH)
154 .unwrap()
155 .as_millis() as i64,
156 ),
157 },
158 );
159 }
160 }
161 }
162
163 state
164 }
165
166 pub fn observe_file(
168 target: StableId,
169 target_name: String,
170 path: &Path,
171 expected_hash_hex: Option<&str>,
172 ) -> Observation {
173 let ts = std::time::SystemTime::now()
174 .duration_since(std::time::UNIX_EPOCH)
175 .unwrap()
176 .as_millis() as i64;
177
178 if !path.exists() {
179 return Observation {
180 target,
181 target_name,
182 confidence: ConfidenceScore::Unknown,
183 evidence: Evidence::Unobservable(format!("File not found: {}", path.display())),
184 timestamp: Timestamp(ts),
185 };
186 }
187
188 let meta = match std::fs::metadata(path) {
189 Ok(m) => m,
190 Err(e) => {
191 return Observation {
192 target,
193 target_name,
194 confidence: ConfidenceScore::Unknown,
195 evidence: Evidence::Unobservable(format!("Cannot read metadata: {}", e)),
196 timestamp: Timestamp(ts),
197 }
198 }
199 };
200
201 let ext = path.extension().and_then(|s| s.to_str()).unwrap_or("");
202 if ext != "wav" && ext != "flac" && ext != "json" {
203 return Observation {
204 target,
205 target_name,
206 confidence: ConfidenceScore::Suspicious,
207 evidence: Evidence::ExtensionUnknown {
208 path: path.to_path_buf(),
209 ext: ext.to_string(),
210 },
211 timestamp: Timestamp(ts),
212 };
213 }
214
215 if let Some(expected_hex) = expected_hash_hex {
217 let file_bytes = match std::fs::read(path) {
218 Ok(b) => b,
219 Err(_) => {
220 return Observation {
221 target,
222 target_name,
223 confidence: ConfidenceScore::Unknown,
224 evidence: Evidence::Unobservable("Failed to read file contents".into()),
225 timestamp: Timestamp(ts),
226 }
227 }
228 };
229
230 let actual_hash = hash::hash_bytes(&file_bytes);
231 let mut expected_hash = [0u8; 32];
232
233 let valid_hex =
235 expected_hex.len() == 64 && expected_hex.chars().all(|c| c.is_ascii_hexdigit());
236 if valid_hex {
237 for i in 0..32 {
238 if let Ok(b) = u8::from_str_radix(&expected_hex[i * 2..i * 2 + 2], 16) {
239 expected_hash[i] = b;
240 }
241 }
242
243 if expected_hash == actual_hash {
244 return Observation {
245 target,
246 target_name,
247 confidence: ConfidenceScore::Verified,
248 evidence: Evidence::FileHashMatch {
249 path: path.to_path_buf(),
250 hash: actual_hash,
251 },
252 timestamp: Timestamp(ts),
253 };
254 } else {
255 return Observation {
256 target,
257 target_name,
258 confidence: ConfidenceScore::Unknown,
259 evidence: Evidence::FileHashMismatch {
260 path: path.to_path_buf(),
261 expected: expected_hash,
262 actual: actual_hash,
263 },
264 timestamp: Timestamp(ts),
265 };
266 }
267 }
268 }
269
270 let mtime = meta
272 .modified()
273 .unwrap_or(std::time::SystemTime::UNIX_EPOCH)
274 .duration_since(std::time::UNIX_EPOCH)
275 .unwrap()
276 .as_secs();
277 Observation {
278 target,
279 target_name,
280 confidence: ConfidenceScore::Inferred,
281 evidence: Evidence::FileStatOnly {
282 path: path.to_path_buf(),
283 size: meta.len(),
284 mtime,
285 },
286 timestamp: Timestamp(ts),
287 }
288 }
289}
290
291impl ObserverState {
292 pub fn save(&self, project_root: &Path) -> Result<(), std::io::Error> {
293 let path = project_root.join(".dirtydata").join("observations.json");
294 let data = serde_json::to_string_pretty(self)?;
295 std::fs::write(path, data)
296 }
297
298 pub fn load(project_root: &Path) -> Result<Self, std::io::Error> {
299 let path = project_root.join(".dirtydata").join("observations.json");
300 if !path.exists() {
301 return Ok(Self::default());
302 }
303 let data = std::fs::read_to_string(path)?;
304 let state = serde_json::from_str(&data)?;
305 Ok(state)
306 }
307}