1use std::{
29 fs::{self, OpenOptions},
30 io::Write,
31 path::PathBuf,
32};
33
34use chrono::{DateTime, Utc};
35use serde::{Deserialize, Serialize};
36use serde_json::Value;
37use sha2::{Digest, Sha256};
38use thiserror::Error;
39
40use crate::corpus::sanitize_tool_name;
41
42#[derive(Debug, Clone, Serialize, Deserialize)]
44pub struct FuzzCorpusEntry {
45 pub tool: String,
48 pub input: Value,
51 pub trigger: CorpusTrigger,
55 pub fingerprint: String,
59 pub timestamp: DateTime<Utc>,
63}
64
65#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
67#[serde(tag = "type", rename_all = "snake_case")]
68pub enum CorpusTrigger {
69 Finding {
73 kind: String,
75 },
76 NewFingerprint,
82}
83
84#[derive(Debug, Error)]
85pub enum FuzzCorpusError {
86 #[error("create directory {path}: {source}")]
87 CreateDir {
88 path: PathBuf,
89 source: std::io::Error,
90 },
91 #[error("read {path}: {source}")]
92 Read {
93 path: PathBuf,
94 source: std::io::Error,
95 },
96 #[error("write {path}: {source}")]
97 Write {
98 path: PathBuf,
99 source: std::io::Error,
100 },
101 #[error("serialise corpus entry: {0}")]
102 Serialize(#[from] serde_json::Error),
103}
104
105pub type Result<T> = std::result::Result<T, FuzzCorpusError>;
106
107#[derive(Debug, Clone)]
111pub struct FuzzCorpus {
112 root: PathBuf,
113}
114
115impl FuzzCorpus {
116 pub fn new(root: impl Into<PathBuf>) -> Self {
120 Self { root: root.into() }
121 }
122
123 pub fn tool_dir(&self, tool: &str) -> PathBuf {
127 self.root.join(sanitize_tool_name(tool))
128 }
129
130 pub fn list(&self, tool: &str) -> Result<Vec<FuzzCorpusEntry>> {
134 let dir = self.tool_dir(tool);
135 if !dir.is_dir() {
136 return Ok(Vec::new());
137 }
138 let mut out = Vec::new();
139 for entry in fs::read_dir(&dir).map_err(|source| FuzzCorpusError::Read {
140 path: dir.clone(),
141 source,
142 })? {
143 let entry = entry.map_err(|source| FuzzCorpusError::Read {
144 path: dir.clone(),
145 source,
146 })?;
147 let path = entry.path();
148 if path.extension().is_some_and(|ext| ext == "json") {
149 let bytes = fs::read(&path).map_err(|source| FuzzCorpusError::Read {
150 path: path.clone(),
151 source,
152 })?;
153 if let Ok(parsed) = serde_json::from_slice::<FuzzCorpusEntry>(&bytes) {
154 out.push(parsed);
155 }
156 }
157 }
158 out.sort_by_key(|e| e.timestamp);
159 Ok(out)
160 }
161
162 pub fn save(&self, entry: &FuzzCorpusEntry) -> Result<PathBuf> {
166 let dir = self.tool_dir(&entry.tool);
167 fs::create_dir_all(&dir).map_err(|source| FuzzCorpusError::CreateDir {
168 path: dir.clone(),
169 source,
170 })?;
171 let key = input_key(&entry.input);
172 let path = dir.join(format!("{key}.json"));
173 let body = serde_json::to_vec_pretty(entry)?;
174 let mut options = OpenOptions::new();
178 options.write(true).create(true).truncate(true);
179 #[cfg(unix)]
180 {
181 use std::os::unix::fs::OpenOptionsExt;
182 options.mode(0o600);
183 }
184 let mut file = options
185 .open(&path)
186 .map_err(|source| FuzzCorpusError::Write {
187 path: path.clone(),
188 source,
189 })?;
190 file.write_all(&body)
191 .map_err(|source| FuzzCorpusError::Write {
192 path: path.clone(),
193 source,
194 })?;
195 Ok(path)
196 }
197
198 pub fn total(&self) -> Result<usize> {
201 if !self.root.is_dir() {
202 return Ok(0);
203 }
204 let mut total = 0;
205 for entry in fs::read_dir(&self.root).map_err(|source| FuzzCorpusError::Read {
206 path: self.root.clone(),
207 source,
208 })? {
209 let entry = entry.map_err(|source| FuzzCorpusError::Read {
210 path: self.root.clone(),
211 source,
212 })?;
213 if entry.path().is_dir() {
214 let count = fs::read_dir(entry.path())
215 .map(|i| {
216 i.flatten()
217 .filter(|e| e.path().extension().is_some_and(|x| x == "json"))
218 .count()
219 })
220 .unwrap_or(0);
221 total += count;
222 }
223 }
224 Ok(total)
225 }
226}
227
228pub fn input_key(input: &Value) -> String {
231 let canonical = crate::finding::canonical_json(input);
232 let hash = Sha256::digest(canonical.as_bytes());
233 hex::encode(hash)[..16].to_string()
234}
235
236pub fn response_fingerprint(response: &Value) -> String {
248 let mut hasher = Sha256::new();
249 let is_error = response.get("isError").and_then(Value::as_bool);
250 hasher.update(format!("isError={is_error:?}|").as_bytes());
251 if let Some(arr) = response.get("content").and_then(Value::as_array) {
252 for item in arr {
253 let kind = item.get("type").and_then(Value::as_str).unwrap_or("?");
254 hasher.update(format!("type={kind}|").as_bytes());
255 }
256 if let Some(first_text) = arr
257 .first()
258 .and_then(|v| v.get("text"))
259 .and_then(Value::as_str)
260 {
261 let prefix: String = first_text.chars().take(64).collect();
262 hasher.update(prefix.as_bytes());
263 }
264 }
265 hex::encode(hasher.finalize())[..16].to_string()
266}
267
268#[cfg(test)]
269#[allow(clippy::expect_used, clippy::unwrap_used, clippy::panic)]
270mod tests {
271 use super::*;
272 use serde_json::json;
273 use tempfile::tempdir;
274
275 fn entry(tool: &str, input: Value, trigger: CorpusTrigger) -> FuzzCorpusEntry {
276 FuzzCorpusEntry {
277 tool: tool.to_string(),
278 fingerprint: response_fingerprint(
279 &json!({"content": [{"type": "text", "text": "ok"}]}),
280 ),
281 input,
282 trigger,
283 timestamp: Utc::now(),
284 }
285 }
286
287 #[test]
288 fn save_then_list_round_trips() {
289 let dir = tempdir().expect("tempdir");
290 let corpus = FuzzCorpus::new(dir.path().to_path_buf());
291 let e = entry(
292 "x",
293 json!({"a": 1}),
294 CorpusTrigger::Finding {
295 kind: "crash".to_string(),
296 },
297 );
298 corpus.save(&e).expect("save");
299 let listed = corpus.list("x").expect("list");
300 assert_eq!(listed.len(), 1);
301 assert_eq!(listed[0].input, json!({"a": 1}));
302 }
303
304 #[test]
305 fn identical_inputs_dedup_on_disk() {
306 let dir = tempdir().expect("tempdir");
307 let corpus = FuzzCorpus::new(dir.path().to_path_buf());
308 for _ in 0..3 {
309 let e = entry(
310 "x",
311 json!({"a": 1, "b": "constant"}),
312 CorpusTrigger::NewFingerprint,
313 );
314 corpus.save(&e).expect("save");
315 }
316 let listed = corpus.list("x").expect("list");
317 assert_eq!(
318 listed.len(),
319 1,
320 "identical inputs must dedup to a single file (key = SHA-256 of canonical JSON)"
321 );
322 }
323
324 #[test]
325 fn list_returns_empty_for_unknown_tool() {
326 let dir = tempdir().expect("tempdir");
327 let corpus = FuzzCorpus::new(dir.path().to_path_buf());
328 assert!(corpus.list("never-saved").expect("list").is_empty());
329 }
330
331 #[test]
332 fn fingerprint_changes_when_is_error_flips() {
333 let a = response_fingerprint(&json!({"content": [], "isError": false}));
334 let b = response_fingerprint(&json!({"content": [], "isError": true}));
335 assert_ne!(a, b);
336 }
337
338 #[test]
339 fn fingerprint_changes_when_content_type_changes() {
340 let a = response_fingerprint(&json!({"content": [{"type": "text", "text": "x"}]}));
341 let b = response_fingerprint(&json!({"content": [{"type": "image", "data": "x"}]}));
342 assert_ne!(a, b);
343 }
344
345 #[test]
346 fn fingerprint_stable_for_same_response() {
347 let r = json!({"content": [{"type": "text", "text": "foo"}], "isError": false});
348 assert_eq!(response_fingerprint(&r), response_fingerprint(&r));
349 }
350
351 #[test]
352 fn fingerprint_first_text_prefix_separates_distinct_messages() {
353 let a = response_fingerprint(
354 &json!({"content": [{"type": "text", "text": "permission denied"}]}),
355 );
356 let b = response_fingerprint(&json!({"content": [{"type": "text", "text": "not found"}]}));
357 assert_ne!(a, b);
358 }
359
360 #[test]
361 fn total_counts_across_tool_subdirs() {
362 let dir = tempdir().expect("tempdir");
363 let corpus = FuzzCorpus::new(dir.path().to_path_buf());
364 corpus
365 .save(&entry("x", json!({"a": 1}), CorpusTrigger::NewFingerprint))
366 .unwrap();
367 corpus
368 .save(&entry("y", json!({"b": 2}), CorpusTrigger::NewFingerprint))
369 .unwrap();
370 corpus
371 .save(&entry("y", json!({"b": 3}), CorpusTrigger::NewFingerprint))
372 .unwrap();
373 assert_eq!(corpus.total().unwrap(), 3);
374 }
375}