use super::error::FetchError;
use super::fetch::{FetchRequest, FetchResponse, Fetcher, ReqwestFetcher};
use super::source::BookSource;
use async_trait::async_trait;
use chromiumoxide::cdp::browser_protocol::network::Cookie;
use chromiumoxide::cdp::browser_protocol::page::BringToFrontParams;
use chromiumoxide::{Browser, BrowserConfig, Page};
use futures_util::StreamExt;
use std::collections::BTreeMap;
use std::path::PathBuf;
use std::sync::Arc;
use std::sync::atomic::{AtomicBool, Ordering};
use std::time::{Duration, Instant};
use tokio::sync::Mutex;
static SOLVE_FAILED: AtomicBool = AtomicBool::new(false);
#[derive(Debug, Clone)]
pub struct Clearance {
pub cookie_header: String,
pub user_agent: String,
}
#[derive(Debug, Clone)]
pub struct BrowserCookie {
pub domain: String,
pub name: String,
pub value: String,
}
#[derive(Debug, Clone, Default)]
pub struct LoginOutcome {
pub cookies: Vec<BrowserCookie>,
pub local_storage: BTreeMap<String, String>,
pub html: String,
pub url: String,
}
impl LoginOutcome {
pub fn cookies_by_registrable_domain(&self) -> BTreeMap<String, String> {
use crate::cookie::{pairs_to_str, registrable_domain};
let mut by: BTreeMap<String, BTreeMap<String, String>> = BTreeMap::new();
for c in &self.cookies {
let dom = registrable_domain(c.domain.trim_start_matches('.'));
by.entry(dom)
.or_default()
.insert(c.name.clone(), c.value.clone());
}
by.into_iter()
.map(|(d, kv)| (d, pairs_to_str(&kv)))
.collect()
}
}
#[derive(Debug, Clone, Default)]
pub struct LoginCriteria {
pub cookie_names: Vec<String>,
pub local_storage_keys: Vec<String>,
}
#[derive(Debug, Clone, Default)]
pub struct LoginSignal {
pub done: Arc<AtomicBool>,
pub cancel: Arc<AtomicBool>,
}
impl LoginSignal {
pub fn reset(&self) {
self.done.store(false, Ordering::Relaxed);
self.cancel.store(false, Ordering::Relaxed);
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum AuthDecision {
Once,
Always,
Deny,
}
#[async_trait]
pub trait BrowserUi: Send + Sync {
async fn authorize(&self, source_name: &str) -> AuthDecision;
fn prompt_click(&self, url: &str, cancel: Arc<AtomicBool>);
fn done(&self);
}
#[derive(Clone)]
pub struct BrowserOptions {
pub profile_dir: PathBuf,
pub grace: Duration,
pub total_timeout: Duration,
pub login_timeout: Duration,
pub poll_interval: Duration,
pub ui: Option<Arc<dyn BrowserUi>>,
}
impl Default for BrowserOptions {
fn default() -> Self {
Self {
profile_dir: default_profile_dir(),
grace: Duration::from_secs(5),
total_timeout: Duration::from_secs(60),
login_timeout: Duration::from_secs(300),
poll_interval: Duration::from_millis(800),
ui: None,
}
}
}
fn default_profile_dir() -> PathBuf {
match std::env::var_os("HOME").or_else(|| std::env::var_os("USERPROFILE")) {
Some(home) => PathBuf::from(home).join(".novel").join("browser-profile"),
None => std::env::temp_dir().join("trnovel-browser-profile"),
}
}
pub fn detect_browser() -> Option<PathBuf> {
detect_browser_impl()
}
#[cfg(target_os = "macos")]
fn detect_browser_impl() -> Option<PathBuf> {
const CANDIDATES: &[&str] = &[
"/Applications/Google Chrome.app/Contents/MacOS/Google Chrome",
"/Applications/Microsoft Edge.app/Contents/MacOS/Microsoft Edge",
"/Applications/Brave Browser.app/Contents/MacOS/Brave Browser",
"/Applications/Chromium.app/Contents/MacOS/Chromium",
"/Applications/Vivaldi.app/Contents/MacOS/Vivaldi",
];
CANDIDATES.iter().map(PathBuf::from).find(|p| p.is_file())
}
#[cfg(target_os = "windows")]
fn detect_browser_impl() -> Option<PathBuf> {
const REL: &[&str] = &[
r"Google\Chrome\Application\chrome.exe",
r"Microsoft\Edge\Application\msedge.exe",
r"BraveSoftware\Brave-Browser\Application\brave.exe",
r"Chromium\Application\chrome.exe",
];
for var in ["ProgramFiles", "ProgramFiles(x86)", "LOCALAPPDATA"] {
let Some(root) = std::env::var_os(var).map(PathBuf::from) else {
continue;
};
for rel in REL {
let p = root.join(rel);
if p.is_file() {
return Some(p);
}
}
}
None
}
#[cfg(target_os = "linux")]
fn detect_browser_impl() -> Option<PathBuf> {
const NAMES: &[&str] = &[
"google-chrome",
"google-chrome-stable",
"chromium",
"chromium-browser",
"microsoft-edge",
"brave-browser",
];
let paths = std::env::var_os("PATH")?;
for dir in std::env::split_paths(&paths) {
for name in NAMES {
let p = dir.join(name);
if p.is_file() {
return Some(p);
}
}
}
None
}
#[cfg(not(any(target_os = "macos", target_os = "windows", target_os = "linux")))]
fn detect_browser_impl() -> Option<PathBuf> {
None
}
pub struct BrowserFetcher {
exe: PathBuf,
opts: BrowserOptions,
}
impl BrowserFetcher {
pub fn detect(opts: BrowserOptions) -> Option<Self> {
detect_browser().map(|exe| Self { exe, opts })
}
pub fn with_executable(exe: PathBuf, opts: BrowserOptions) -> Self {
Self { exe, opts }
}
pub fn ui(&self) -> Option<&Arc<dyn BrowserUi>> {
self.opts.ui.as_ref()
}
pub async fn solve(&self, url: &str) -> Result<Clearance, FetchError> {
for name in ["SingletonLock", "SingletonSocket", "SingletonCookie"] {
let _ = std::fs::remove_file(self.opts.profile_dir.join(name));
}
let config = BrowserConfig::builder()
.chrome_executable(&self.exe)
.user_data_dir(&self.opts.profile_dir)
.with_head() .arg("--no-first-run")
.arg("--no-default-browser-check")
.arg("--disable-blink-features=AutomationControlled")
.build()
.map_err(FetchError::Browser)?;
let (mut browser, mut handler) = Browser::launch(config).await.map_err(browser_err)?;
let handler_task = tokio::spawn(async move { while handler.next().await.is_some() {} });
let result = self.solve_inner(&browser, url).await;
let _ = browser.close().await;
handler_task.abort();
if let Some(ui) = &self.opts.ui {
ui.done();
}
result
}
async fn solve_inner(&self, browser: &Browser, url: &str) -> Result<Clearance, FetchError> {
let page = browser.new_page(url).await.map_err(browser_err)?;
let user_agent: String = page
.evaluate("navigator.userAgent")
.await
.ok()
.and_then(|v| v.into_value::<String>().ok())
.unwrap_or_default();
let cancel = Arc::new(AtomicBool::new(false));
let start = Instant::now();
let mut prompted = false;
loop {
if cancel.load(Ordering::Relaxed) {
return Err(FetchError::Challenged(format!("用户取消解挑战 @ {url}")));
}
if let Ok(cookies) = page.get_cookies().await
&& let Some(cookie_header) = clearance_header(&cookies)
{
return Ok(Clearance {
cookie_header,
user_agent,
});
}
let elapsed = start.elapsed();
if elapsed >= self.opts.total_timeout {
return Err(FetchError::Challenged(format!("浏览器解挑战超时 @ {url}")));
}
if !prompted && elapsed >= self.opts.grace && challenge_visible(&page).await {
let _ = page.execute(BringToFrontParams::default()).await;
if let Some(ui) = &self.opts.ui {
ui.prompt_click(url, cancel.clone());
}
prompted = true;
}
tokio::time::sleep(self.opts.poll_interval).await;
}
}
}
impl BrowserFetcher {
pub async fn login(
&self,
url: &str,
criteria: &LoginCriteria,
signal: &LoginSignal,
) -> Result<LoginOutcome, FetchError> {
for name in ["SingletonLock", "SingletonSocket", "SingletonCookie"] {
let _ = std::fs::remove_file(self.opts.profile_dir.join(name));
}
let config = BrowserConfig::builder()
.chrome_executable(&self.exe)
.user_data_dir(&self.opts.profile_dir)
.with_head() .arg("--no-first-run")
.arg("--no-default-browser-check")
.arg("--disable-blink-features=AutomationControlled")
.build()
.map_err(FetchError::Browser)?;
let (mut browser, mut handler) = Browser::launch(config).await.map_err(browser_err)?;
let handler_task = tokio::spawn(async move { while handler.next().await.is_some() {} });
let result = self.login_inner(&browser, url, criteria, signal).await;
let _ = browser.close().await;
handler_task.abort();
result
}
async fn login_inner(
&self,
browser: &Browser,
url: &str,
criteria: &LoginCriteria,
signal: &LoginSignal,
) -> Result<LoginOutcome, FetchError> {
let page = browser.new_page(url).await.map_err(browser_err)?;
let _ = page.execute(BringToFrontParams::default()).await;
let start = Instant::now();
let mut consecutive_failures = 0u32;
let mut last_good: Vec<Cookie> = Vec::new();
loop {
if signal.cancel.load(Ordering::Relaxed) {
return Err(FetchError::Challenged(format!("用户取消登录 @ {url}")));
}
let cookies = match page.get_cookies().await {
Ok(c) => {
consecutive_failures = 0;
last_good = c.clone();
c
}
Err(_) => {
consecutive_failures += 1;
if signal.done.load(Ordering::Relaxed) {
if !last_good.is_empty() {
return Ok(LoginOutcome {
cookies: last_good.into_iter().map(to_browser_cookie).collect(),
local_storage: BTreeMap::new(),
html: String::new(),
url: url.to_string(),
});
}
return Err(FetchError::Challenged(format!(
"浏览器已关闭、未能读取登录态 @ {url}(请重试,登录完成后先回终端按 Enter 再关浏览器)"
)));
}
if consecutive_failures >= 3 {
return Err(FetchError::Challenged(format!(
"浏览器已关闭或连接中断 @ {url}"
)));
}
tokio::time::sleep(self.opts.poll_interval).await;
continue;
}
};
let local_storage = read_local_storage(&page).await;
let by_criteria = criteria
.cookie_names
.iter()
.any(|n| cookies.iter().any(|c| &c.name == n && !c.value.is_empty()))
|| criteria
.local_storage_keys
.iter()
.any(|k| local_storage.get(k).is_some_and(|v| !v.is_empty()));
if signal.done.load(Ordering::Relaxed) || by_criteria {
let html = page
.evaluate("document.documentElement.outerHTML")
.await
.ok()
.and_then(|v| v.into_value::<String>().ok())
.unwrap_or_default();
let final_url = page
.evaluate("location.href")
.await
.ok()
.and_then(|v| v.into_value::<String>().ok())
.unwrap_or_else(|| url.to_string());
let cookies = cookies.into_iter().map(to_browser_cookie).collect();
return Ok(LoginOutcome {
cookies,
local_storage,
html,
url: final_url,
});
}
if start.elapsed() >= self.opts.login_timeout {
return Err(FetchError::Challenged(format!("浏览器登录超时 @ {url}")));
}
tokio::time::sleep(self.opts.poll_interval).await;
}
}
}
fn to_browser_cookie(c: Cookie) -> BrowserCookie {
BrowserCookie {
domain: c.domain,
name: c.name,
value: c.value,
}
}
fn browser_err(e: chromiumoxide::error::CdpError) -> FetchError {
FetchError::Browser(e.to_string())
}
async fn read_local_storage(page: &Page) -> BTreeMap<String, String> {
const JS: &str = r#"(function(){var o={};try{for(var i=0;i<localStorage.length;i++){var k=localStorage.key(i);o[k]=localStorage.getItem(k);}}catch(e){}return JSON.stringify(o);})()"#;
page.evaluate(JS)
.await
.ok()
.and_then(|v| v.into_value::<String>().ok())
.and_then(|s| serde_json::from_str::<BTreeMap<String, String>>(&s).ok())
.unwrap_or_default()
}
fn clearance_header(cookies: &[Cookie]) -> Option<String> {
let mut parts = Vec::new();
let mut has_clearance = false;
for c in cookies {
if c.name == "cf_clearance" {
has_clearance = true;
parts.push(format!("{}={}", c.name, c.value));
} else if c.name.starts_with("__cf") {
parts.push(format!("{}={}", c.name, c.value));
}
}
has_clearance.then(|| parts.join("; "))
}
async fn challenge_visible(page: &Page) -> bool {
const JS: &str = r#"document.title.indexOf('Just a moment')>=0
|| document.title.indexOf('请稍候')>=0
|| !!document.querySelector('iframe[src*="challenges.cloudflare.com"]')"#;
page.evaluate(JS)
.await
.ok()
.and_then(|v| v.into_value::<bool>().ok())
.unwrap_or(false)
}
pub struct EscalatingFetcher {
reqwest: ReqwestFetcher,
browser: Option<BrowserFetcher>,
clearance: Mutex<Option<Clearance>>,
name: String,
}
impl EscalatingFetcher {
pub fn new(source: &BookSource, browser: Option<BrowserFetcher>) -> Result<Self, FetchError> {
Ok(Self {
reqwest: ReqwestFetcher::new(source)?,
browser,
clearance: Mutex::new(None),
name: source.name.clone(),
})
}
async fn apply_clearance(&self, req: &mut FetchRequest) {
if let Some(c) = self.clearance.lock().await.as_ref() {
req.headers
.entry("Cookie".into())
.or_insert_with(|| c.cookie_header.clone());
req.headers
.insert("User-Agent".into(), c.user_agent.clone());
}
}
}
#[async_trait]
impl Fetcher for EscalatingFetcher {
async fn fetch(&self, req: FetchRequest) -> Result<String, FetchError> {
self.fetch_full(req).await.map(|r| r.body)
}
async fn fetch_full(&self, mut req: FetchRequest) -> Result<FetchResponse, FetchError> {
self.apply_clearance(&mut req).await;
match self.reqwest.fetch_full(req.clone()).await {
Err(FetchError::Challenged(msg)) => {
let Some(browser) = &self.browser else {
return Err(FetchError::Challenged(msg));
};
if SOLVE_FAILED.load(Ordering::Relaxed) {
return Err(FetchError::Challenged(format!(
"{msg}(浏览器辅助不可用,已降级;可重启 app 重试)"
)));
}
let mut guard = self.clearance.lock().await;
if guard.is_none() {
if let Some(ui) = browser.ui()
&& ui.authorize(&self.name).await == AuthDecision::Deny
{
return Err(FetchError::Challenged(format!(
"{msg}(用户未授权浏览器辅助)"
)));
}
let abs = self.reqwest.resolve(&req.url);
match browser.solve(&abs).await {
Ok(c) => *guard = Some(c),
Err(e) => {
SOLVE_FAILED.store(true, Ordering::Relaxed);
return Err(e);
}
}
}
drop(guard);
self.apply_clearance(&mut req).await;
self.reqwest.fetch_full(req).await
}
other => other,
}
}
}
#[cfg(test)]
mod tests {
use super::{BrowserCookie, EscalatingFetcher, LoginOutcome, detect_browser};
use crate::fetch::{FetchRequest, Fetcher};
use crate::testutil::{book_source, spawn_fixed_server};
#[test]
fn detect_browser_does_not_panic() {
let _ = detect_browser();
}
#[test]
fn login_outcome_groups_cookies_by_registrable_domain() {
let out = LoginOutcome {
cookies: vec![
BrowserCookie {
domain: ".www.site.com".into(),
name: "sid".into(),
value: "1".into(),
},
BrowserCookie {
domain: "api.site.com".into(),
name: "t".into(),
value: "2".into(),
},
BrowserCookie {
domain: "a.example.co.uk".into(),
name: "x".into(),
value: "9".into(),
},
],
..Default::default()
};
let by = out.cookies_by_registrable_domain();
assert_eq!(by.get("site.com").map(String::as_str), Some("sid=1; t=2"));
assert_eq!(by.get("example.co.uk").map(String::as_str), Some("x=9"));
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn escalating_fetcher_fetch_full_passes_status_and_headers() {
let (base, server) = spawn_fixed_server(
"HTTP/1.1 201 Created\r\nSet-Cookie: sid=zzz; Path=/\r\nContent-Length: 2\r\nConnection: close\r\n\r\nok"
.to_string(),
);
let fetcher = EscalatingFetcher::new(&book_source(&base), None).unwrap();
let resp = fetcher.fetch_full(FetchRequest::get("/x")).await.unwrap();
server.join().unwrap();
assert_eq!(resp.status, 201, "应透传真实状态码,而非默认 200");
assert_eq!(
resp.headers.get("set-cookie").map(String::as_str),
Some("sid=zzz; Path=/"),
"应透传响应头(Set-Cookie),而非默认空 headers"
);
}
}