use std::{
fs::{self, OpenOptions},
io::Write,
path::PathBuf,
};
use chrono::{DateTime, Utc};
use serde::{Deserialize, Serialize};
use serde_json::Value;
use sha2::{Digest, Sha256};
use thiserror::Error;
use crate::corpus::sanitize_tool_name;
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct FuzzCorpusEntry {
pub tool: String,
pub input: Value,
pub trigger: CorpusTrigger,
pub fingerprint: String,
pub timestamp: DateTime<Utc>,
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
#[serde(tag = "type", rename_all = "snake_case")]
pub enum CorpusTrigger {
Finding {
kind: String,
},
NewFingerprint,
}
#[derive(Debug, Error)]
pub enum FuzzCorpusError {
#[error("create directory {path}: {source}")]
CreateDir {
path: PathBuf,
source: std::io::Error,
},
#[error("read {path}: {source}")]
Read {
path: PathBuf,
source: std::io::Error,
},
#[error("write {path}: {source}")]
Write {
path: PathBuf,
source: std::io::Error,
},
#[error("serialise corpus entry: {0}")]
Serialize(#[from] serde_json::Error),
}
pub type Result<T> = std::result::Result<T, FuzzCorpusError>;
#[derive(Debug, Clone)]
pub struct FuzzCorpus {
root: PathBuf,
}
impl FuzzCorpus {
pub fn new(root: impl Into<PathBuf>) -> Self {
Self { root: root.into() }
}
pub fn tool_dir(&self, tool: &str) -> PathBuf {
self.root.join(sanitize_tool_name(tool))
}
pub fn list(&self, tool: &str) -> Result<Vec<FuzzCorpusEntry>> {
let dir = self.tool_dir(tool);
if !dir.is_dir() {
return Ok(Vec::new());
}
let mut out = Vec::new();
for entry in fs::read_dir(&dir).map_err(|source| FuzzCorpusError::Read {
path: dir.clone(),
source,
})? {
let entry = entry.map_err(|source| FuzzCorpusError::Read {
path: dir.clone(),
source,
})?;
let path = entry.path();
if path.extension().is_some_and(|ext| ext == "json") {
let bytes = fs::read(&path).map_err(|source| FuzzCorpusError::Read {
path: path.clone(),
source,
})?;
if let Ok(parsed) = serde_json::from_slice::<FuzzCorpusEntry>(&bytes) {
out.push(parsed);
}
}
}
out.sort_by_key(|e| e.timestamp);
Ok(out)
}
pub fn save(&self, entry: &FuzzCorpusEntry) -> Result<PathBuf> {
let dir = self.tool_dir(&entry.tool);
fs::create_dir_all(&dir).map_err(|source| FuzzCorpusError::CreateDir {
path: dir.clone(),
source,
})?;
let key = input_key(&entry.input);
let path = dir.join(format!("{key}.json"));
let body = serde_json::to_vec_pretty(entry)?;
let mut options = OpenOptions::new();
options.write(true).create(true).truncate(true);
#[cfg(unix)]
{
use std::os::unix::fs::OpenOptionsExt;
options.mode(0o600);
}
let mut file = options
.open(&path)
.map_err(|source| FuzzCorpusError::Write {
path: path.clone(),
source,
})?;
file.write_all(&body)
.map_err(|source| FuzzCorpusError::Write {
path: path.clone(),
source,
})?;
Ok(path)
}
pub fn total(&self) -> Result<usize> {
if !self.root.is_dir() {
return Ok(0);
}
let mut total = 0;
for entry in fs::read_dir(&self.root).map_err(|source| FuzzCorpusError::Read {
path: self.root.clone(),
source,
})? {
let entry = entry.map_err(|source| FuzzCorpusError::Read {
path: self.root.clone(),
source,
})?;
if entry.path().is_dir() {
let count = fs::read_dir(entry.path())
.map(|i| {
i.flatten()
.filter(|e| e.path().extension().is_some_and(|x| x == "json"))
.count()
})
.unwrap_or(0);
total += count;
}
}
Ok(total)
}
}
pub fn input_key(input: &Value) -> String {
let canonical = crate::finding::canonical_json(input);
let hash = Sha256::digest(canonical.as_bytes());
hex::encode(hash)[..16].to_string()
}
pub fn response_fingerprint(response: &Value) -> String {
let mut hasher = Sha256::new();
let is_error = response.get("isError").and_then(Value::as_bool);
hasher.update(format!("isError={is_error:?}|").as_bytes());
if let Some(arr) = response.get("content").and_then(Value::as_array) {
for item in arr {
let kind = item.get("type").and_then(Value::as_str).unwrap_or("?");
hasher.update(format!("type={kind}|").as_bytes());
}
if let Some(first_text) = arr
.first()
.and_then(|v| v.get("text"))
.and_then(Value::as_str)
{
let prefix: String = first_text.chars().take(64).collect();
hasher.update(prefix.as_bytes());
}
}
hex::encode(hasher.finalize())[..16].to_string()
}
#[cfg(test)]
#[allow(clippy::expect_used, clippy::unwrap_used, clippy::panic)]
mod tests {
use super::*;
use serde_json::json;
use tempfile::tempdir;
fn entry(tool: &str, input: Value, trigger: CorpusTrigger) -> FuzzCorpusEntry {
FuzzCorpusEntry {
tool: tool.to_string(),
fingerprint: response_fingerprint(
&json!({"content": [{"type": "text", "text": "ok"}]}),
),
input,
trigger,
timestamp: Utc::now(),
}
}
#[test]
fn save_then_list_round_trips() {
let dir = tempdir().expect("tempdir");
let corpus = FuzzCorpus::new(dir.path().to_path_buf());
let e = entry(
"x",
json!({"a": 1}),
CorpusTrigger::Finding {
kind: "crash".to_string(),
},
);
corpus.save(&e).expect("save");
let listed = corpus.list("x").expect("list");
assert_eq!(listed.len(), 1);
assert_eq!(listed[0].input, json!({"a": 1}));
}
#[test]
fn identical_inputs_dedup_on_disk() {
let dir = tempdir().expect("tempdir");
let corpus = FuzzCorpus::new(dir.path().to_path_buf());
for _ in 0..3 {
let e = entry(
"x",
json!({"a": 1, "b": "constant"}),
CorpusTrigger::NewFingerprint,
);
corpus.save(&e).expect("save");
}
let listed = corpus.list("x").expect("list");
assert_eq!(
listed.len(),
1,
"identical inputs must dedup to a single file (key = SHA-256 of canonical JSON)"
);
}
#[test]
fn list_returns_empty_for_unknown_tool() {
let dir = tempdir().expect("tempdir");
let corpus = FuzzCorpus::new(dir.path().to_path_buf());
assert!(corpus.list("never-saved").expect("list").is_empty());
}
#[test]
fn fingerprint_changes_when_is_error_flips() {
let a = response_fingerprint(&json!({"content": [], "isError": false}));
let b = response_fingerprint(&json!({"content": [], "isError": true}));
assert_ne!(a, b);
}
#[test]
fn fingerprint_changes_when_content_type_changes() {
let a = response_fingerprint(&json!({"content": [{"type": "text", "text": "x"}]}));
let b = response_fingerprint(&json!({"content": [{"type": "image", "data": "x"}]}));
assert_ne!(a, b);
}
#[test]
fn fingerprint_stable_for_same_response() {
let r = json!({"content": [{"type": "text", "text": "foo"}], "isError": false});
assert_eq!(response_fingerprint(&r), response_fingerprint(&r));
}
#[test]
fn fingerprint_first_text_prefix_separates_distinct_messages() {
let a = response_fingerprint(
&json!({"content": [{"type": "text", "text": "permission denied"}]}),
);
let b = response_fingerprint(&json!({"content": [{"type": "text", "text": "not found"}]}));
assert_ne!(a, b);
}
#[test]
fn total_counts_across_tool_subdirs() {
let dir = tempdir().expect("tempdir");
let corpus = FuzzCorpus::new(dir.path().to_path_buf());
corpus
.save(&entry("x", json!({"a": 1}), CorpusTrigger::NewFingerprint))
.unwrap();
corpus
.save(&entry("y", json!({"b": 2}), CorpusTrigger::NewFingerprint))
.unwrap();
corpus
.save(&entry("y", json!({"b": 3}), CorpusTrigger::NewFingerprint))
.unwrap();
assert_eq!(corpus.total().unwrap(), 3);
}
}