bms_table/fetch/
reqwest.rs

1//! 基于 `reqwest` 的网络获取模块
2//!
3//! 提供一站式从网页或头部 JSON 源拉取并解析 BMS 难度表的能力:
4//! - 获取网页并从 HTML 提取 bmstable 头部地址(如有);
5//! - 下载并解析头部 JSON;
6//! - 根据头部中的 `data_url` 下载谱面数据并解析;
7//! - 返回包含表头与谱面集合的 `BmsTable`。
8//!
9//! # 示例
10//!
11//! ```rust,no_run
12//! # #[tokio::main]
13//! # async fn main() -> anyhow::Result<()> {
14//! use bms_table::fetch::reqwest::{fetch_table, make_lenient_client};
15//! let client = make_lenient_client()?;
16//! let table = fetch_table(&client, "https://stellabms.xyz/sl/table.html").await?;
17//! assert!(!table.data.charts.is_empty());
18//! # Ok(())
19//! # }
20//! ```
21#![cfg(feature = "reqwest")]
22
23use anyhow::{Result, anyhow};
24use serde_json::Value;
25use std::collections::BTreeMap;
26use std::time::Duration;
27use url::Url;
28
29use crate::{BmsTable, BmsTableInfo, BmsTableRaw, fetch::replace_control_chars};
30
31/// 从网页或头部 JSON 源拉取并解析完整的 BMS 难度表。
32///
33/// # 参数
34///
35/// - `web_url`:网页地址或直接指向头部 JSON 的地址。
36///
37/// # 返回
38///
39/// 解析后的 [`crate::BmsTable`],包含表头与谱面数据。
40///
41/// # 错误
42///
43/// - 网络请求失败(连接失败、超时等)
44/// - 响应内容无法解析为 HTML/JSON 或结构不符合预期
45/// - 头部 JSON 未包含 `data_url` 字段或其类型不正确
46pub async fn fetch_table_full(
47    client: &reqwest::Client,
48    web_url: &str,
49) -> Result<(BmsTable, BmsTableRaw)> {
50    let web_url = Url::parse(web_url)?;
51    let web_response = client
52        .get(web_url.clone())
53        .send()
54        .await
55        .map_err(|e| anyhow!("When fetching web: {e}"))?
56        .text()
57        .await
58        .map_err(|e| anyhow!("When parsing web response: {e}"))?;
59    let (header_url, header_json, header_raw) =
60        match crate::fetch::get_web_header_json_value(&web_response)? {
61            crate::fetch::HeaderQueryContent::Url(header_url_string) => {
62                let header_url = web_url.join(&header_url_string)?;
63                let header_response = client
64                    .get(header_url.clone())
65                    .send()
66                    .await
67                    .map_err(|e| anyhow!("When fetching header: {e}"))?;
68                let header_response_string = header_response
69                    .text()
70                    .await
71                    .map_err(|e| anyhow!("When parsing header response: {e}"))?;
72                let crate::fetch::HeaderQueryContent::Json(header_json) =
73                    crate::fetch::get_web_header_json_value(&header_response_string)?
74                else {
75                    return Err(anyhow!(
76                        "Cycled header found. web_url: {web_url}, header_url: {header_url_string}"
77                    ));
78                };
79                (header_url, header_json, header_response_string)
80            }
81            crate::fetch::HeaderQueryContent::Json(value) => {
82                let header_raw = serde_json::to_string(&value)?;
83                (web_url, value, header_raw)
84            }
85        };
86    let data_url_str = header_json
87        .get("data_url")
88        .ok_or_else(|| anyhow!("\"data_url\" not found in header json!"))?
89        .as_str()
90        .ok_or_else(|| anyhow!("\"data_url\" is not a string!"))?;
91    let data_url = header_url.join(data_url_str)?;
92    let data_response = client
93        .get(data_url.clone())
94        .send()
95        .await
96        .map_err(|e| anyhow!("When fetching web: {e}"))?
97        .text()
98        .await
99        .map_err(|e| anyhow!("When parsing web response: {e}"))?;
100    // 在解析前移除非法控制字符,但保持原始 data_raw 不变
101    let data_cleaned = replace_control_chars(&data_response);
102    let data_json: Value = serde_json::from_str(&data_cleaned)?;
103    // 直接使用库内反序列化生成 BmsTable
104    let header: crate::BmsTableHeader = serde_json::from_value(header_json)
105        .map_err(|e| anyhow!("When parsing header json: {e}"))?;
106    let data: crate::BmsTableData =
107        serde_json::from_value(data_json).map_err(|e| anyhow!("When parsing data json: {e}"))?;
108    Ok((
109        BmsTable { header, data },
110        BmsTableRaw {
111            header_json_url: header_url,
112            header_raw,
113            data_json_url: data_url,
114            data_raw: data_response,
115        },
116    ))
117}
118
119/// 从网页或头部 JSON 源拉取并解析完整的 BMS 难度表。
120///
121/// 参考 [`fetch_table_full`]。
122pub async fn fetch_table(client: &reqwest::Client, web_url: &str) -> Result<BmsTable> {
123    let (table, _raw) = fetch_table_full(client, web_url).await?;
124    Ok(table)
125}
126
127/// 获取 BMS 难度表列表。
128///
129/// 从提供的 `web_url` 下载 JSON 数组并解析为 [`crate::BmsTableInfo`] 列表。
130/// 仅要求每个元素包含 `name`、`symbol` 与 `url`(字符串),其他字段将被收集到 `extra` 中。
131pub async fn fetch_table_list(
132    client: &reqwest::Client,
133    web_url: &str,
134) -> Result<Vec<BmsTableInfo>> {
135    let (out, _raw) = fetch_table_list_full(client, web_url).await?;
136    Ok(out)
137}
138
139/// 获取 BMS 难度表列表及其原始 JSON 字符串。
140///
141/// 返回解析后的列表项数组与响应的原始 JSON 文本,便于记录或调试。
142pub async fn fetch_table_list_full(
143    client: &reqwest::Client,
144    web_url: &str,
145) -> Result<(Vec<BmsTableInfo>, String)> {
146    let web_url = Url::parse(web_url)?;
147    let response_text = client
148        .get(web_url)
149        .send()
150        .await
151        .map_err(|e| anyhow!("When fetching table list: {e}"))?
152        .text()
153        .await
154        .map_err(|e| anyhow!("When parsing table list response: {e}"))?;
155
156    // 在解析前移除非法控制字符,但保持原始响应文本不变
157    let cleaned = replace_control_chars(&response_text);
158    let value: Value = serde_json::from_str(&cleaned)?;
159    let arr = value
160        .as_array()
161        .ok_or_else(|| anyhow!("Table list root is not an array"))?;
162
163    let mut out = Vec::with_capacity(arr.len());
164    for (idx, item) in arr.iter().enumerate() {
165        let obj = item
166            .as_object()
167            .ok_or_else(|| anyhow!("Table list item #{idx} is not an object"))?;
168
169        let name = obj
170            .get("name")
171            .and_then(|v| v.as_str())
172            .ok_or_else(|| anyhow!("Missing required field 'name' at index {idx}"))?;
173        let symbol = obj
174            .get("symbol")
175            .and_then(|v| v.as_str())
176            .ok_or_else(|| anyhow!("Missing required field 'symbol' at index {idx}"))?;
177        let url_str = obj
178            .get("url")
179            .and_then(|v| v.as_str())
180            .ok_or_else(|| anyhow!("Missing required field 'url' at index {idx}"))?;
181        let url = Url::parse(url_str)?;
182
183        #[cfg(feature = "serde")]
184        let extra = {
185            let mut m: BTreeMap<String, Value> = BTreeMap::new();
186            for (k, v) in obj.iter() {
187                if k != "name" && k != "symbol" && k != "url" {
188                    m.insert(k.clone(), v.clone());
189                }
190            }
191            m
192        };
193
194        let entry = BmsTableInfo {
195            name: name.to_string(),
196            symbol: symbol.to_string(),
197            url,
198            #[cfg(feature = "serde")]
199            extra,
200        };
201        out.push(entry);
202    }
203
204    Ok((out, response_text))
205}
206
207/// 创建一个规则宽松、兼容性更强的 HTTP 客户端。
208///
209/// - 设置浏览器 UA;
210/// - 配置超时与重定向;
211/// - 接受无效证书(用于少数不规范站点);
212/// - 接受无效主机名(用于少数不规范站点);
213///
214/// 注意:生产环境应审慎使用 `danger_accept_invalid_certs`。
215pub fn make_lenient_client() -> Result<reqwest::Client> {
216    let client = reqwest::Client::builder()
217        .user_agent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119 Safari/537.36 bms-table-rs")
218        .timeout(Duration::from_secs(60))
219        .redirect(reqwest::redirect::Policy::limited(100))
220        .danger_accept_invalid_certs(true)
221        .danger_accept_invalid_hostnames(true)
222        .build()
223        .map_err(|e| anyhow!("When building client: {e}"))?;
224    Ok(client)
225}