drission 0.3.1

Rust 反检测浏览器自动化 + 内置验证码识别:ddddocr 离线 OCR 与图片滑块缺口距离(极验/顶象),默认 Camoufox/Firefox、自动过 Cloudflare 盾、高并发爬虫与 XHR 监听拦截,DrissionPage 风格 API。Anti-detect browser automation in Rust with built-in ddddocr captcha OCR + slider-gap (GeeTest) solving, Camoufox, Cloudflare bypass — a Rust DrissionPage.
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
//! **Chrome for Testing** 自动下载与分发(CDP 后端)。
//!
//! 对标 CloakBrowser / Camoufox 的「首次运行自动下载浏览器二进制」体验,但走 Google 官方
//! **Chrome for Testing** 分发(三平台齐全):解析平台 → 查询 last-known-good JSON →
//! 选匹配资产 → 流式下载 zip → 解压(unix 保留可执行位与符号链接,mac `.app` 内有符号链接)
//! → 定位 chrome 可执行文件。与 [`super::locate`](定位系统已装 Chrome)互补。
//!
//! 解析优先级([`ensure_chrome`]):
//! 1. 环境变量 `CHROME_BIN` / `DRISSION_CHROME`(经 [`super::locate`]);
//! 2. 系统已安装的 Chrome / Edge / Brave / Chromium(经 [`super::locate`],Windows 含注册表);
//! 3. 缓存目录(`~/.cache/drission/chrome/<platform>`)中已下载的 Chrome for Testing;
//! 4. 从 Chrome for Testing 下载当前平台最新 Stable。
//!
//! 跨平台预取:[`download_chrome_for`] 可下载**任意**平台(如在 mac 上预取 `win64`),
//! 用于分发 / 打包(对应「mac 和 win 都要」)。

use std::path::{Path, PathBuf};

use futures_util::StreamExt;
use serde::Deserialize;
use tokio::io::AsyncWriteExt;

use crate::{Error, Result};

/// Chrome for Testing 已知良好版本索引(含各平台下载直链)。
const CFT_ENDPOINT: &str = "https://googlechromelabs.github.io/chrome-for-testing/last-known-good-versions-with-downloads.json";
/// 每个里程碑(主版本)最新版索引(用于**锁定指定主版本**,如 137——与 wreq TLS 指纹对齐)。
const CFT_MILESTONE_ENDPOINT: &str = "https://googlechromelabs.github.io/chrome-for-testing/latest-versions-per-milestone-with-downloads.json";
const USER_AGENT: &str = concat!("drission-rs/", env!("CARGO_PKG_VERSION"));

/// 默认下载渠道。
pub const DEFAULT_CHANNEL: &str = "Stable";

#[derive(Debug, Deserialize)]
struct CftIndex {
    channels: std::collections::HashMap<String, Channel>,
}

/// 里程碑索引:`milestones["137"] = { version, downloads }`(复用 [`Channel`] 的形状)。
#[derive(Debug, Deserialize)]
struct CftMilestoneIndex {
    milestones: std::collections::HashMap<String, Channel>,
}

#[derive(Debug, Deserialize)]
struct Channel {
    #[serde(default)]
    version: String,
    downloads: Downloads,
}

#[derive(Debug, Deserialize)]
struct Downloads {
    #[serde(default)]
    chrome: Vec<Download>,
}

#[derive(Debug, Deserialize)]
struct Download {
    platform: String,
    url: String,
}

/// 当前平台对应的 Chrome for Testing 平台标记(资产命名 `chrome-<platform>.zip`)。
///
/// `mac-arm64` / `mac-x64` / `win64` / `win32` / `linux64`。
pub fn cft_platform() -> Result<&'static str> {
    let p = match (std::env::consts::OS, std::env::consts::ARCH) {
        ("macos", "aarch64") => "mac-arm64",
        ("macos", "x86_64") => "mac-x64",
        ("windows", "x86_64") => "win64",
        ("windows", "x86") => "win32",
        ("linux", "x86_64") => "linux64",
        (os, arch) => {
            return Err(Error::UnsupportedPlatform(format!(
                "Chrome for Testing 无 {os}/{arch} 资产"
            )));
        }
    };
    Ok(p)
}

/// 缓存根目录:`~/.cache/drission`(可用 `DRISSION_CACHE` 覆盖)。
pub fn cache_root() -> PathBuf {
    if let Ok(custom) = std::env::var("DRISSION_CACHE") {
        return PathBuf::from(custom);
    }
    let home = dirs::home_dir().unwrap_or_else(|| PathBuf::from("."));
    home.join(".cache").join("drission")
}

/// 给定平台解压后的 Chrome 可执行文件名(用于在解出目录里定位)。
fn chrome_exe_name(platform: &str) -> &'static str {
    if platform.starts_with("mac") {
        "Google Chrome for Testing"
    } else if platform.starts_with("win") {
        "chrome.exe"
    } else {
        "chrome"
    }
}

/// 确保本机有可用的 Chrome,返回其可执行文件路径。必要时自动下载 Chrome for Testing。
///
/// 优先级见[模块文档](self):环境变量 / 系统已装 → 缓存 → 下载当前平台 Stable。
pub async fn ensure_chrome() -> Result<PathBuf> {
    // 1 + 2. 环境变量与系统已安装(locate 内部已涵盖 CHROME_BIN/DRISSION_CHROME、安装路径、
    //        Windows 注册表、PATH 扫描)。优先用真实系统浏览器。
    if let Ok(p) = super::locate::chrome_path() {
        tracing::debug!(path = %p.display(), "使用系统已定位的 Chrome");
        return Ok(p);
    }
    // 3 + 4. 缓存命中或下载当前平台。
    let platform = cft_platform()?;
    download_chrome_for(platform, DEFAULT_CHANNEL).await
}

/// 确保**指定平台**的 Chrome for Testing 已下载到缓存,返回其可执行文件路径。
///
/// - 当前平台:返回的路径可直接传给 [`super::ChromiumBrowser::launch_with`] 启动;
/// - **其它平台**(跨平台预取,如在 mac 上取 `win64`):返回的可执行文件**无法在本机运行**,
///   仅用于分发 / 打包。
///
/// 缓存命中(`~/.cache/drission/chrome/<platform>` 下已有可执行文件)则直接复用、不重复下载。
pub async fn download_chrome_for(platform: &str, channel: &str) -> Result<PathBuf> {
    let chrome_root = cache_root().join("chrome");
    let dest = chrome_root.join(platform);
    let exe_name = chrome_exe_name(platform);

    // 缓存命中(已解压)。
    if let Some(found) = find_executable(&dest, exe_name) {
        tracing::debug!(path = %found.display(), "复用缓存中的 Chrome for Testing");
        return Ok(found);
    }

    tokio::fs::create_dir_all(&dest).await?;
    let zip_path = chrome_root.join(format!("chrome-{platform}.zip"));

    // 若已有预下载好的 zip(如外部下载器预置到 `~/.cache/drission/chrome/chrome-<platform>.zip`),
    // 直接解压复用,免重复网络下载;否则查 Chrome for Testing 索引并流式下载。
    if zip_path.exists() {
        tracing::info!(path = %zip_path.display(), "发现预下载的 Chrome zip,直接解压复用");
    } else {
        let (version, url) = pick_asset(platform, channel).await?;
        tracing::info!(%version, %platform, %channel, "缓存未命中,开始下载 Chrome for Testing …");
        download(&url, &zip_path).await?;
    }

    let dest_clone = dest.clone();
    let zip_clone = zip_path.clone();
    tokio::task::spawn_blocking(move || extract_zip(&zip_clone, &dest_clone))
        .await
        .map_err(|e| Error::Other(format!("解压任务 join 失败: {e}")))??;

    // 删除下载的 zip(失败不致命)。
    let _ = tokio::fs::remove_file(&zip_path).await;

    let found = find_executable(&dest, exe_name).ok_or_else(|| {
        Error::BrowserNotFound(format!(
            "解压后未找到 Chrome 可执行文件({exe_name}),目录: {}",
            dest.display()
        ))
    })?;

    // unix:确保主可执行文件有执行位(下载非本机平台时无意义但无害)。
    #[cfg(unix)]
    ensure_executable_bit(&found);

    Ok(found)
}

/// 确保**指定主版本(里程碑)**的 Chrome for Testing 已下载到缓存,返回可执行文件路径。
///
/// 用于把内置浏览器**锁定到某个 Chrome 主版本**(如 `137`),让其 TLS/JA3 指纹与 `wreq` 的
/// 模拟档(同主版本、同为 BoringSSL)对齐 —— 这样浏览器过盾拿到的 `cf_clearance` 能被 `wreq`
/// 协议请求复用更久(详见调用方 pgid_auto 的说明)。
///
/// 缓存目录 `~/.cache/drission/chrome/<platform>-m<milestone>`(与 Stable 缓存隔离,不互相覆盖)。
pub async fn download_chrome_milestone(platform: &str, milestone: &str) -> Result<PathBuf> {
    let chrome_root = cache_root().join("chrome");
    let dest = chrome_root.join(format!("{platform}-m{milestone}"));
    let exe_name = chrome_exe_name(platform);

    if let Some(found) = find_executable(&dest, exe_name) {
        tracing::debug!(path = %found.display(), "复用缓存中的 Chrome for Testing(指定里程碑)");
        return Ok(found);
    }

    tokio::fs::create_dir_all(&dest).await?;
    let zip_path = chrome_root.join(format!("chrome-{platform}-m{milestone}.zip"));
    if zip_path.exists() {
        tracing::info!(path = %zip_path.display(), "发现预下载的 Chrome zip(里程碑),直接解压复用");
    } else {
        let (version, url) = pick_asset_milestone(platform, milestone).await?;
        tracing::info!(%version, %platform, %milestone, "缓存未命中,下载指定里程碑 Chrome for Testing …");
        download(&url, &zip_path).await?;
    }

    let dest_clone = dest.clone();
    let zip_clone = zip_path.clone();
    tokio::task::spawn_blocking(move || extract_zip(&zip_clone, &dest_clone))
        .await
        .map_err(|e| Error::Other(format!("解压任务 join 失败: {e}")))??;
    let _ = tokio::fs::remove_file(&zip_path).await;

    let found = find_executable(&dest, exe_name).ok_or_else(|| {
        Error::BrowserNotFound(format!(
            "解压后未找到 Chrome 可执行文件({exe_name}),目录: {}",
            dest.display()
        ))
    })?;
    #[cfg(unix)]
    ensure_executable_bit(&found);
    Ok(found)
}

/// 查询里程碑索引,返回指定平台 / 主版本的 (version, url)。
async fn pick_asset_milestone(platform: &str, milestone: &str) -> Result<(String, String)> {
    let client = reqwest::Client::builder().user_agent(USER_AGENT).build()?;
    let index: CftMilestoneIndex = client
        .get(CFT_MILESTONE_ENDPOINT)
        .send()
        .await?
        .error_for_status()?
        .json()
        .await?;
    let m = index.milestones.get(milestone).ok_or_else(|| {
        Error::BrowserNotFound(format!("Chrome for Testing 无里程碑 `{milestone}`"))
    })?;
    for dl in &m.downloads.chrome {
        if dl.platform == platform {
            return Ok((m.version.clone(), dl.url.clone()));
        }
    }
    Err(Error::BrowserNotFound(format!(
        "里程碑 `{milestone}` 无平台 `{platform}` 的 chrome 资产"
    )))
}

/// 查询 Chrome for Testing 索引,返回指定平台 / 渠道的 (version, url)。
async fn pick_asset(platform: &str, channel: &str) -> Result<(String, String)> {
    let client = reqwest::Client::builder().user_agent(USER_AGENT).build()?;
    let index: CftIndex = client
        .get(CFT_ENDPOINT)
        .send()
        .await?
        .error_for_status()?
        .json()
        .await?;

    let ch = index.channels.get(channel).ok_or_else(|| {
        Error::BrowserNotFound(format!(
            "Chrome for Testing 无渠道 `{channel}`(可选 Stable/Beta/Dev/Canary)"
        ))
    })?;

    for dl in &ch.downloads.chrome {
        if dl.platform == platform {
            return Ok((ch.version.clone(), dl.url.clone()));
        }
    }
    Err(Error::BrowserNotFound(format!(
        "Chrome for Testing 渠道 `{channel}` 无平台 `{platform}` 的 chrome 资产"
    )))
}

/// 流式下载到文件(每 16 MiB 打一次进度日志)。
async fn download(url: &str, dest: &Path) -> Result<()> {
    let client = reqwest::Client::builder().user_agent(USER_AGENT).build()?;
    let resp = client.get(url).send().await?.error_for_status()?;
    let total = resp.content_length().unwrap_or(0);

    let mut file = tokio::fs::File::create(dest).await?;
    let mut downloaded: u64 = 0;
    let mut last_logged: u64 = 0;
    let mut stream = resp.bytes_stream();
    while let Some(chunk) = stream.next().await {
        let chunk = chunk?;
        file.write_all(&chunk).await?;
        downloaded += chunk.len() as u64;
        if downloaded - last_logged > 16 * 1024 * 1024 {
            last_logged = downloaded;
            if total > 0 {
                tracing::info!(
                    "下载进度 {:.1}% ({}/{} MiB)",
                    downloaded as f64 / total as f64 * 100.0,
                    downloaded / 1024 / 1024,
                    total / 1024 / 1024
                );
            }
        }
    }
    file.flush().await?;
    Ok(())
}

/// 在目录下(有界深度)递归查找指定名字的可执行文件。
///
/// 深度给到 8,足以覆盖 mac `.app` 的嵌套(`chrome-mac-*/...app/Contents/MacOS/<exe>`)。
fn find_executable(root: &Path, target: &str) -> Option<PathBuf> {
    fn walk(dir: &Path, target: &str, depth: usize) -> Option<PathBuf> {
        if depth == 0 {
            return None;
        }
        let entries = std::fs::read_dir(dir).ok()?;
        let mut subdirs = Vec::new();
        for entry in entries.flatten() {
            let path = entry.path();
            let Ok(ft) = entry.file_type() else { continue };
            if ft.is_file() && entry.file_name().to_string_lossy() == target {
                return Some(path);
            }
            if ft.is_dir() {
                subdirs.push(path);
            }
        }
        for sub in subdirs {
            if let Some(found) = walk(&sub, target, depth - 1) {
                return Some(found);
            }
        }
        None
    }
    walk(root, target, 8)
}

/// 同步解压 zip 到目标目录(在 unix 上保留权限与符号链接)。
fn extract_zip(zip_path: &Path, dest: &Path) -> Result<()> {
    let file = std::fs::File::open(zip_path)?;
    let mut archive = zip::ZipArchive::new(file)?;

    for i in 0..archive.len() {
        let mut entry = archive.by_index(i)?;
        let rel = match entry.enclosed_name() {
            Some(p) => p,
            None => continue, // 跳过不安全/非法路径
        };
        let outpath = dest.join(rel);

        let mode = entry.unix_mode();
        let is_symlink = mode.map(|m| m & 0o170000 == 0o120000).unwrap_or(false);

        if entry.is_dir() {
            std::fs::create_dir_all(&outpath)?;
            continue;
        }

        if let Some(parent) = outpath.parent() {
            std::fs::create_dir_all(parent)?;
        }

        if is_symlink {
            #[cfg(unix)]
            {
                use std::io::Read;
                let mut target = String::new();
                entry.read_to_string(&mut target)?;
                // 已存在则先删,避免 symlink 创建失败
                let _ = std::fs::remove_file(&outpath);
                std::os::unix::fs::symlink(&target, &outpath)?;
            }
            #[cfg(not(unix))]
            {
                // 非 unix 平台:把符号链接当普通文件落地
                let mut out = std::fs::File::create(&outpath)?;
                std::io::copy(&mut entry, &mut out)?;
            }
            continue;
        }

        let mut out = std::fs::File::create(&outpath)?;
        std::io::copy(&mut entry, &mut out)?;

        #[cfg(unix)]
        {
            use std::os::unix::fs::PermissionsExt;
            if let Some(m) = mode {
                std::fs::set_permissions(&outpath, std::fs::Permissions::from_mode(m))?;
            }
        }
    }
    Ok(())
}

/// 确保 unix 下文件带执行位(`u+x`),解压若丢了执行位时兜底。
#[cfg(unix)]
fn ensure_executable_bit(path: &Path) {
    use std::os::unix::fs::PermissionsExt;
    if let Ok(meta) = std::fs::metadata(path) {
        let mut perm = meta.permissions();
        let mode = perm.mode();
        if mode & 0o111 == 0 {
            perm.set_mode(mode | 0o755);
            let _ = std::fs::set_permissions(path, perm);
        }
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn cft_platform_is_known() {
        // 当前平台至少应解析成功(五个已知标记之一)。
        let p = cft_platform().expect("当前平台应被支持");
        assert!(["mac-arm64", "mac-x64", "win64", "win32", "linux64"].contains(&p));
    }

    #[test]
    fn chrome_exe_name_per_platform() {
        assert_eq!(chrome_exe_name("mac-arm64"), "Google Chrome for Testing");
        assert_eq!(chrome_exe_name("mac-x64"), "Google Chrome for Testing");
        assert_eq!(chrome_exe_name("win64"), "chrome.exe");
        assert_eq!(chrome_exe_name("win32"), "chrome.exe");
        assert_eq!(chrome_exe_name("linux64"), "chrome");
    }

    #[test]
    fn cache_root_under_home_or_override() {
        let r = cache_root();
        assert!(r.ends_with("drission") || std::env::var("DRISSION_CACHE").is_ok());
    }

    #[test]
    fn find_executable_locates_nested_file() {
        // 造一个嵌套目录里的目标文件,验证有界递归能找到。
        let base = std::env::temp_dir().join(format!(
            "drission_fetch_{}_{}",
            std::process::id(),
            std::time::SystemTime::now()
                .duration_since(std::time::UNIX_EPOCH)
                .map(|d| d.as_nanos())
                .unwrap_or(0)
        ));
        let nested = base.join("chrome-x").join("a.app").join("Contents");
        std::fs::create_dir_all(&nested).expect("建嵌套目录");
        let exe = nested.join("target-exe");
        std::fs::write(&exe, b"x").expect("写目标文件");

        assert_eq!(
            find_executable(&base, "target-exe").as_deref(),
            Some(exe.as_path())
        );
        assert!(find_executable(&base, "no-such-file").is_none());

        let _ = std::fs::remove_dir_all(&base);
    }
}