use serde::{Deserialize, Serialize};
use std::io::{self, BufRead, Write};
use std::path::PathBuf;
use crate::antibot::ChallengeSignal;
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
#[serde(tag = "kind", rename_all = "snake_case")]
pub enum HandoffDecision {
Skip,
Pause {
reason: &'static str,
url: url::Url,
screenshot_path: Option<PathBuf>,
},
}
pub const CRAWLEX_HANDOFF_ENV: &str = "CRAWLEX_HANDOFF";
pub fn handoff_enabled() -> bool {
matches!(
std::env::var(CRAWLEX_HANDOFF_ENV).ok().as_deref(),
Some("1") | Some("true") | Some("on")
)
}
pub fn should_handoff(signal: &ChallengeSignal) -> bool {
handoff_enabled()
&& matches!(
signal.level,
crate::antibot::ChallengeLevel::HardBlock
| crate::antibot::ChallengeLevel::ChallengePage
)
}
#[derive(Debug, Clone)]
pub struct HandoffRequest {
pub url: url::Url,
pub screenshot_path: Option<PathBuf>,
pub reason: &'static str,
pub vendor: Option<crate::antibot::ChallengeVendor>,
}
impl HandoffRequest {
pub fn from_signal(signal: &ChallengeSignal, screenshot_path: Option<PathBuf>) -> Self {
Self {
url: signal.url.clone(),
screenshot_path,
reason: signal.level.as_str(),
vendor: Some(signal.vendor),
}
}
pub fn into_policy_decision(self) -> crate::policy::Decision {
crate::policy::Decision::HumanHandoff {
reason: self.reason.to_string(),
vendor: self.vendor.map(|v| v.as_str().to_string()),
url: self.url,
screenshot_path: self.screenshot_path,
}
}
pub fn should_pause(&self) -> bool {
handoff_enabled()
}
pub fn render_prompt<W: Write, R: BufRead>(&self, out: &mut W, rd: &mut R) -> io::Result<()> {
writeln!(out)?;
writeln!(out, "────────────────────────────────────────")?;
writeln!(out, " crawlex :: human-handoff requested")?;
writeln!(out, "────────────────────────────────────────")?;
writeln!(out, " reason : {}", self.reason)?;
if let Some(v) = self.vendor {
writeln!(out, " vendor : {}", v.as_str())?;
}
writeln!(out, " url : {}", self.url)?;
if let Some(p) = &self.screenshot_path {
writeln!(out, " snapshot : {}", p.display())?;
}
writeln!(out)?;
writeln!(
out,
" Solve the challenge in your own browser, copy session"
)?;
writeln!(
out,
" cookies back via `crawlex sessions import`, then press"
)?;
writeln!(out, " Enter here to resume the crawl.")?;
writeln!(out, "────────────────────────────────────────")?;
out.flush()?;
let mut line = String::new();
let n = rd.read_line(&mut line)?;
if n == 0 {
return Err(io::Error::new(
io::ErrorKind::Interrupted,
"stdin closed during handoff",
));
}
Ok(())
}
pub fn pause_and_wait(&self) -> io::Result<()> {
let stdout = io::stderr();
let mut out = stdout.lock();
let stdin = io::stdin();
let mut lock = stdin.lock();
self.render_prompt(&mut out, &mut lock)
}
}
#[cfg(test)]
mod tests {
use super::*;
use std::io::Cursor;
fn sample_url() -> url::Url {
url::Url::parse("https://example.com/login").unwrap()
}
#[test]
fn handoff_disabled_by_default() {
std::env::remove_var(CRAWLEX_HANDOFF_ENV);
assert!(!handoff_enabled());
}
#[test]
fn should_handoff_requires_enabled() {
std::env::remove_var(CRAWLEX_HANDOFF_ENV);
let sig = ChallengeSignal {
vendor: crate::antibot::ChallengeVendor::CloudflareJsChallenge,
level: crate::antibot::ChallengeLevel::HardBlock,
url: sample_url(),
origin: "https://example.com".into(),
proxy: None,
session_id: "s".into(),
first_seen: std::time::SystemTime::now(),
metadata: serde_json::Value::Null,
};
assert!(!should_handoff(&sig));
}
#[test]
fn prompt_writes_url_and_reason() {
let req = HandoffRequest {
url: sample_url(),
screenshot_path: Some(PathBuf::from("/tmp/shot.png")),
reason: "hard_block",
vendor: Some(crate::antibot::ChallengeVendor::CloudflareJsChallenge),
};
let mut out = Vec::new();
let mut rd = Cursor::new(b"\n".to_vec());
req.render_prompt(&mut out, &mut rd).unwrap();
let s = String::from_utf8(out).unwrap();
assert!(s.contains("human-handoff requested"));
assert!(s.contains("example.com/login"));
assert!(s.contains("hard_block"));
assert!(s.contains("/tmp/shot.png"));
}
#[test]
fn eof_is_translated_to_interrupted() {
let req = HandoffRequest {
url: sample_url(),
screenshot_path: None,
reason: "hard_block",
vendor: None,
};
let mut out = Vec::new();
let mut rd = Cursor::new(Vec::<u8>::new());
let err = req.render_prompt(&mut out, &mut rd).unwrap_err();
assert_eq!(err.kind(), io::ErrorKind::Interrupted);
}
}