use std::{
env,
ffi::OsString,
fs,
path::{Path, PathBuf},
time::Duration,
};
use schemars::JsonSchema;
use serde::{Deserialize, Serialize};
use void_crawl_core::{DownloadOutcome, ScanConfig, Verdict, VoidCrawlError, scanner};
use crate::{server::VoidCrawlServer, sessions::PendingDownload};
pub const DEFAULT_TIMEOUT_SECS: u64 = 120;
pub const ENABLE_ENV: &str = "VOIDCRAWL_ALLOW_DOWNLOADS";
fn downloads_enabled() -> bool {
enabled_from(env::var(ENABLE_ENV).ok().as_deref())
}
fn enabled_from(value: Option<&str>) -> bool {
match value {
Some(v) => {
let v = v.trim();
!v.is_empty() && v != "0" && !v.eq_ignore_ascii_case("false")
}
None => false,
}
}
#[derive(Debug, Deserialize, JsonSchema, Default)]
pub struct DownloadArgs {
pub url: String,
#[serde(default)]
pub output_dir: Option<String>,
#[serde(default)]
pub max_bytes: Option<u64>,
#[serde(default)]
pub timeout_secs: Option<u64>,
}
#[derive(Debug, Serialize, JsonSchema)]
pub struct DownloadResult {
pub url: String,
pub ok: bool,
pub verdict: String,
pub reason: Option<String>,
pub path: Option<String>,
pub detected_mime: Option<String>,
pub size: u64,
pub waited_ms: u64,
}
pub async fn run(
server: &VoidCrawlServer,
args: DownloadArgs,
) -> Result<DownloadResult, VoidCrawlError> {
if !downloads_enabled() {
return Err(VoidCrawlError::Other(format!(
"file downloads are disabled; set {ENABLE_ENV}=1 to enable the `download` tool"
)));
}
let output_dir = match &args.output_dir {
Some(d) => PathBuf::from(d),
None => env::temp_dir().join("voidcrawl-downloads"),
};
fs::create_dir_all(&output_dir)
.map_err(|e| VoidCrawlError::Other(format!("create {}: {e}", output_dir.display())))?;
let quarantine = tempfile::tempdir_in(&output_dir)
.map_err(|e| VoidCrawlError::Other(format!("create quarantine dir: {e}")))?;
let timeout = Duration::from_secs(args.timeout_secs.unwrap_or(DEFAULT_TIMEOUT_SECS));
let max_bytes = args.max_bytes.unwrap_or(scanner::DEFAULT_MAX_BYTES);
let pool = server.state().pool().await?;
let (tab, waited_ms) = pool.acquire_timed().await?;
let downloaded =
tab.page.download_to_dir(&args.url, quarantine.path(), timeout, max_bytes).await;
pool.release(tab).await;
let downloaded = downloaded?;
let result = finalize(args.url, &downloaded, &output_dir, max_bytes, waited_ms);
drop(quarantine);
result
}
fn finalize(
url: String,
downloaded: &DownloadOutcome,
output_dir: &Path,
max_bytes: u64,
waited_ms: u64,
) -> Result<DownloadResult, VoidCrawlError> {
let cfg = ScanConfig { max_bytes, claimed_mime: downloaded.content_type.clone() };
let report = scanner::scan_path(&downloaded.path, &cfg)?;
match report.verdict {
Verdict::Clean => {
let name = downloaded
.path
.file_name()
.map_or_else(|| OsString::from("download"), OsString::from);
let dest = unique_dest(output_dir, &name)?;
fs::rename(&downloaded.path, &dest)
.map_err(|e| VoidCrawlError::Other(format!("move clean file: {e}")))?;
Ok(DownloadResult {
url,
ok: true,
verdict: "clean".into(),
reason: None,
path: Some(dest.to_string_lossy().into_owned()),
detected_mime: report.detected_mime,
size: report.size,
waited_ms,
})
}
Verdict::Flagged { reason } => {
Ok(DownloadResult {
url,
ok: false,
verdict: "flagged".into(),
reason: Some(reason),
path: None,
detected_mime: report.detected_mime,
size: report.size,
waited_ms,
})
}
}
}
#[derive(Debug, Deserialize, JsonSchema, Default)]
pub struct DownloadArmArgs {
pub session_id: String,
#[serde(default)]
pub output_dir: Option<String>,
#[serde(default)]
pub max_bytes: Option<u64>,
}
#[derive(Debug, Serialize, JsonSchema)]
pub struct DownloadArmResult {
pub armed: bool,
pub message: String,
}
#[derive(Debug, Deserialize, JsonSchema, Default)]
pub struct DownloadWaitArgs {
pub session_id: String,
#[serde(default)]
pub timeout_secs: Option<u64>,
}
fn disabled_err() -> VoidCrawlError {
VoidCrawlError::Other(format!("file downloads are disabled; set {ENABLE_ENV}=1 to enable"))
}
pub async fn arm(
server: &VoidCrawlServer,
args: DownloadArmArgs,
) -> Result<DownloadArmResult, VoidCrawlError> {
if !downloads_enabled() {
return Err(disabled_err());
}
let session =
server.state().sessions.get(&args.session_id).await.ok_or_else(|| {
VoidCrawlError::Other(format!("no such session: {}", args.session_id))
})?;
if session.pending_download.lock().await.is_some() {
return Err(VoidCrawlError::Other(
"a download is already armed on this session; call download_wait first".into(),
));
}
let output_dir = match &args.output_dir {
Some(d) => PathBuf::from(d),
None => env::temp_dir().join("voidcrawl-downloads"),
};
fs::create_dir_all(&output_dir)
.map_err(|e| VoidCrawlError::Other(format!("create {}: {e}", output_dir.display())))?;
let quarantine = tempfile::tempdir_in(&output_dir)
.map_err(|e| VoidCrawlError::Other(format!("create quarantine dir: {e}")))?;
let max_bytes = args.max_bytes.unwrap_or(scanner::DEFAULT_MAX_BYTES);
let capture = {
let page = session.page.lock().await;
page.arm_download(quarantine.path(), max_bytes).await?
};
*session.pending_download.lock().await =
Some(PendingDownload { capture, quarantine, output_dir, max_bytes });
Ok(DownloadArmResult {
armed: true,
message: "download armed — perform the click that triggers it, then call download_wait"
.into(),
})
}
pub async fn wait(
server: &VoidCrawlServer,
args: DownloadWaitArgs,
) -> Result<DownloadResult, VoidCrawlError> {
if !downloads_enabled() {
return Err(disabled_err());
}
let session =
server.state().sessions.get(&args.session_id).await.ok_or_else(|| {
VoidCrawlError::Other(format!("no such session: {}", args.session_id))
})?;
let pending = session.pending_download.lock().await.take().ok_or_else(|| {
VoidCrawlError::Other("no armed download for this session; call download_arm first".into())
})?;
let PendingDownload { capture, quarantine, output_dir, max_bytes } = pending;
let timeout = Duration::from_secs(args.timeout_secs.unwrap_or(DEFAULT_TIMEOUT_SECS));
let downloaded = capture.poll(timeout).await;
{
let page = session.page.lock().await;
page.reset_download_behavior().await;
}
let downloaded = downloaded?;
let result =
finalize(format!("session:{}", args.session_id), &downloaded, &output_dir, max_bytes, 0);
drop(quarantine);
result
}
fn unique_dest(dir: &Path, name: &OsString) -> Result<PathBuf, VoidCrawlError> {
let base = dir.join(name);
if !base.exists() {
return Ok(base);
}
let stem = name.to_string_lossy();
(1..10_000).map(|i| dir.join(format!("{stem}.{i}"))).find(|p| !p.exists()).ok_or_else(|| {
VoidCrawlError::Other(format!("no free filename for {stem:?} in {}", dir.display()))
})
}
#[cfg(test)]
mod tests {
#![allow(clippy::unwrap_used, clippy::expect_used, clippy::panic, reason = "test harness")]
use super::enabled_from;
#[test]
fn gate_is_off_by_default() {
assert!(!enabled_from(None));
}
#[test]
fn gate_rejects_falsey_tokens() {
for v in ["", " ", "0", "false", "FALSE", "False"] {
assert!(!enabled_from(Some(v)), "{v:?} should be disabled");
}
}
#[test]
fn gate_accepts_truthy_tokens() {
for v in ["1", "true", "yes", "on"] {
assert!(enabled_from(Some(v)), "{v:?} should be enabled");
}
}
}