use anyhow::{Context, Result};
use reqwest::header::{HeaderMap, HeaderValue};
use std::path::PathBuf;
pub const WX_UA: &str = "Mozilla/5.0 (iPhone; CPU iPhone OS 16_6 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148 MicroMessenger/8.0.40(0x18002830) NetType/WIFI Language/zh_CN";
pub async fn run(url: String, output: Option<PathBuf>) -> Result<()> {
let md = fetch_as_markdown(&url).await?;
match output {
Some(path) => {
tokio::fs::write(&path, md.as_bytes())
.await
.with_context(|| format!("写入 {} 失败", path.display()))?;
println!("✓ 已写入 {}", path.display());
}
None => {
println!("{md}");
}
}
Ok(())
}
pub async fn fetch_as_markdown(url: &str) -> Result<String> {
let mut headers = HeaderMap::new();
headers.insert(
"accept",
HeaderValue::from_static("text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"),
);
headers.insert(
"accept-language",
HeaderValue::from_static("zh-CN,zh;q=0.9"),
);
let client = reqwest::Client::builder()
.user_agent(WX_UA)
.default_headers(headers)
.gzip(true)
.brotli(true)
.timeout(std::time::Duration::from_secs(30))
.build()?;
let html = client
.get(url)
.send()
.await
.with_context(|| format!("请求 {url} 失败"))?
.error_for_status()?
.text()
.await?;
let cleaned = clean_wechat_html(&html);
let md = html2md::parse_html(&cleaned);
Ok(md)
}
fn clean_wechat_html(html: &str) -> String {
let stripped = strip_block(html, "<script", "</script>");
let stripped = strip_block(&stripped, "<style", "</style>");
if let Some(body) = extract_js_content(&stripped) {
body
} else {
stripped
}
}
fn extract_js_content(html: &str) -> Option<String> {
let start_marker = "id=\"js_content\"";
let idx = html.find(start_marker)?;
let before = &html[..idx];
let div_start = before.rfind("<div")?;
let sub = &html[div_start..];
let mut depth = 0i32;
let bytes = sub.as_bytes();
let mut i = 0;
while i < bytes.len() {
if bytes[i] == b'<' {
if sub[i..].starts_with("<div") {
depth += 1;
i += 4;
continue;
} else if sub[i..].starts_with("</div>") {
depth -= 1;
i += 6;
if depth == 0 {
return Some(sub[..i].to_string());
}
continue;
}
}
i += 1;
}
None
}
fn strip_block(input: &str, start_tag: &str, end_tag: &str) -> String {
let mut out = String::with_capacity(input.len());
let mut rest = input;
while let Some(s) = rest.find(start_tag) {
out.push_str(&rest[..s]);
if let Some(e) = rest[s..].find(end_tag) {
rest = &rest[s + e + end_tag.len()..];
} else {
rest = "";
break;
}
}
out.push_str(rest);
out
}