use std::{
fs::{self, OpenOptions},
io::{self, Write},
path::{Path, PathBuf},
thread,
time::{Duration, Instant},
};
use thiserror::Error;
use crate::{
finding::Finding,
redact::Redact,
target::{default_lock_timeout_ms, OutputConfig},
};
#[derive(Debug, Error)]
pub enum CorpusError {
#[error("failed to create corpus directory {path}: {source}")]
CreateDir { path: PathBuf, source: io::Error },
#[error("failed to acquire corpus lock {path}: {source}")]
Lock { path: PathBuf, source: io::Error },
#[error("timed out acquiring corpus lock {0}")]
LockTimeout(PathBuf),
#[error("failed to serialize finding {id}: {source}")]
Serialize {
id: String,
source: serde_json::Error,
},
#[error("failed to write corpus file {path}: {source}")]
Write { path: PathBuf, source: io::Error },
#[error("failed to read corpus file {path}: {source}")]
Read { path: PathBuf, source: io::Error },
#[error("failed to parse corpus file {path}: {source}")]
Parse {
path: PathBuf,
source: serde_json::Error,
},
#[error("finding `{0}` not found in corpus")]
NotFound(String),
}
pub type Result<T> = std::result::Result<T, CorpusError>;
#[derive(Debug, Clone)]
pub struct Corpus {
root: PathBuf,
lock_timeout: Duration,
}
impl Corpus {
pub fn new(root: impl Into<PathBuf>) -> Self {
Self {
root: root.into(),
lock_timeout: Duration::from_millis(default_lock_timeout_ms()),
}
}
pub fn from_config(config: &OutputConfig) -> Self {
Self {
root: config.corpus_dir.clone(),
lock_timeout: Duration::from_millis(config.lock_timeout_ms),
}
}
#[must_use]
pub fn with_lock_timeout(mut self, timeout: Duration) -> Self {
self.lock_timeout = timeout;
self
}
pub fn write_finding(&self, finding: &Finding) -> Result<PathBuf> {
let wallfacer_dir = self
.root
.parent()
.map(Path::to_path_buf)
.unwrap_or_else(|| PathBuf::from(".wallfacer"));
fs::create_dir_all(&wallfacer_dir).map_err(|source| CorpusError::CreateDir {
path: wallfacer_dir.clone(),
source,
})?;
let _lock = CorpusLock::acquire(wallfacer_dir.join(".lock"), self.lock_timeout)?;
let tool_dir = self.root.join(&finding.tool);
fs::create_dir_all(&tool_dir).map_err(|source| CorpusError::CreateDir {
path: tool_dir.clone(),
source,
})?;
let redacted = finding.redacted();
let path = tool_dir.join(format!("{}.json", redacted.id));
let body =
serde_json::to_string_pretty(&redacted).map_err(|source| CorpusError::Serialize {
id: redacted.id.clone(),
source,
})?;
write_secure(&path, body.as_bytes())?;
Ok(path)
}
pub fn list_findings(&self) -> Result<Vec<Finding>> {
let mut findings = Vec::new();
if !self.root.is_dir() {
return Ok(findings);
}
visit_json_files(&self.root, &mut |path| {
findings.push(read_finding_file(path)?);
Ok(())
})?;
findings.sort_by(|left, right| left.id.cmp(&right.id));
Ok(findings)
}
pub fn find_by_id(&self, id: &str) -> Result<Finding> {
self.list_findings()?
.into_iter()
.find(|finding| finding.id == id || finding.id.starts_with(id))
.ok_or_else(|| CorpusError::NotFound(id.to_string()))
}
}
fn write_secure(path: &Path, body: &[u8]) -> Result<()> {
let mut options = OpenOptions::new();
options.write(true).create(true).truncate(true);
#[cfg(unix)]
{
use std::os::unix::fs::OpenOptionsExt;
options.mode(0o600);
}
let mut file = options.open(path).map_err(|source| CorpusError::Write {
path: path.to_path_buf(),
source,
})?;
file.write_all(body).map_err(|source| CorpusError::Write {
path: path.to_path_buf(),
source,
})?;
#[cfg(unix)]
{
use std::os::unix::fs::PermissionsExt;
let _ = fs::set_permissions(path, fs::Permissions::from_mode(0o600));
}
Ok(())
}
fn visit_json_files(path: &Path, visitor: &mut impl FnMut(&Path) -> Result<()>) -> Result<()> {
for entry in fs::read_dir(path).map_err(|source| CorpusError::Read {
path: path.to_path_buf(),
source,
})? {
let entry = entry.map_err(|source| CorpusError::Read {
path: path.to_path_buf(),
source,
})?;
let path = entry.path();
if path.is_dir() {
visit_json_files(&path, visitor)?;
} else if path
.extension()
.is_some_and(|extension| extension == "json")
{
visitor(&path)?;
}
}
Ok(())
}
fn read_finding_file(path: &Path) -> Result<Finding> {
let body = fs::read_to_string(path).map_err(|source| CorpusError::Read {
path: path.to_path_buf(),
source,
})?;
serde_json::from_str(&body).map_err(|source| CorpusError::Parse {
path: path.to_path_buf(),
source,
})
}
struct CorpusLock {
path: PathBuf,
}
const LOCK_BACKOFF_INITIAL: Duration = Duration::from_millis(25);
const LOCK_BACKOFF_CAP: Duration = Duration::from_millis(1_000);
impl CorpusLock {
fn acquire(path: PathBuf, timeout: Duration) -> Result<Self> {
let deadline = Instant::now() + timeout;
let mut backoff = LOCK_BACKOFF_INITIAL;
loop {
match OpenOptions::new().write(true).create_new(true).open(&path) {
Ok(_) => return Ok(Self { path }),
Err(error) if error.kind() == io::ErrorKind::AlreadyExists => {
if Instant::now() >= deadline {
return Err(CorpusError::LockTimeout(path));
}
let remaining = deadline.saturating_duration_since(Instant::now());
let wait = backoff.min(remaining);
if wait.is_zero() {
return Err(CorpusError::LockTimeout(path));
}
thread::sleep(wait);
backoff = (backoff * 2).min(LOCK_BACKOFF_CAP);
}
Err(source) => {
return Err(CorpusError::Lock {
path: path.clone(),
source,
});
}
}
}
}
}
impl Drop for CorpusLock {
fn drop(&mut self) {
let _ = fs::remove_file(&self.path);
}
}