bms_table/fetch/
reqwest.rs

1//! 基于 `reqwest` 的网络获取模块
2//!
3//! 提供一站式从网页或头部 JSON 源拉取并解析 BMS 难度表的能力:
4//! - 获取网页并从 HTML 提取 bmstable 头部地址(如有);
5//! - 下载并解析头部 JSON;
6//! - 根据头部中的 `data_url` 下载谱面数据并解析;
7//! - 返回包含表头与谱面集合的 `BmsTable`。
8//!
9//! # 示例
10//!
11//! ```rust,no_run
12//! # #[tokio::main]
13//! # async fn main() -> anyhow::Result<()> {
14//! use bms_table::fetch::reqwest::{fetch_table, make_lenient_client};
15//! let client = make_lenient_client()?;
16//! let table = fetch_table(&client, "https://stellabms.xyz/sl/table.html").await?;
17//! assert!(!table.data.charts.is_empty());
18//! # Ok(())
19//! # }
20//! ```
21#![cfg(feature = "reqwest")]
22
23use anyhow::{Result, anyhow};
24use serde_json::Value;
25use std::collections::HashMap;
26use std::time::Duration;
27use url::Url;
28
29use crate::{BmsTable, BmsTableIndexItem, BmsTableRaw};
30
31/// 从网页或头部 JSON 源拉取并解析完整的 BMS 难度表。
32///
33/// # 参数
34///
35/// - `web_url`:网页地址或直接指向头部 JSON 的地址。
36///
37/// # 返回
38///
39/// 解析后的 [`crate::BmsTable`],包含表头与谱面数据。
40///
41/// # 错误
42///
43/// - 网络请求失败(连接失败、超时等)
44/// - 响应内容无法解析为 HTML/JSON 或结构不符合预期
45/// - 头部 JSON 未包含 `data_url` 字段或其类型不正确
46pub async fn fetch_table_full(
47    client: &reqwest::Client,
48    web_url: &str,
49) -> Result<(BmsTable, BmsTableRaw)> {
50    let web_url = Url::parse(web_url)?;
51    let web_response = client
52        .get(web_url.clone())
53        .send()
54        .await
55        .map_err(|e| anyhow!("When fetching web: {e}"))?
56        .text()
57        .await
58        .map_err(|e| anyhow!("When parsing web response: {e}"))?;
59    let (header_url, header_json, header_raw) =
60        match crate::fetch::get_web_header_json_value(&web_response)? {
61            crate::fetch::HeaderQueryContent::Url(header_url_string) => {
62                let header_url = web_url.join(&header_url_string)?;
63                let header_response = client
64                    .get(header_url.clone())
65                    .send()
66                    .await
67                    .map_err(|e| anyhow!("When fetching header: {e}"))?;
68                let header_response_string = header_response
69                    .text()
70                    .await
71                    .map_err(|e| anyhow!("When parsing header response: {e}"))?;
72                let crate::fetch::HeaderQueryContent::Json(header_json) =
73                    crate::fetch::get_web_header_json_value(&header_response_string)?
74                else {
75                    return Err(anyhow!(
76                        "Cycled header found. web_url: {web_url}, header_url: {header_url_string}"
77                    ));
78                };
79                (header_url, header_json, header_response_string)
80            }
81            crate::fetch::HeaderQueryContent::Json(value) => {
82                let header_raw = serde_json::to_string(&value)?;
83                (web_url, value, header_raw)
84            }
85        };
86    let data_url_str = header_json
87        .get("data_url")
88        .ok_or_else(|| anyhow!("\"data_url\" not found in header json!"))?
89        .as_str()
90        .ok_or_else(|| anyhow!("\"data_url\" is not a string!"))?;
91    let data_url = header_url.join(data_url_str)?;
92    let data_response = client
93        .get(data_url)
94        .send()
95        .await
96        .map_err(|e| anyhow!("When fetching web: {e}"))?
97        .text()
98        .await
99        .map_err(|e| anyhow!("When parsing web response: {e}"))?;
100    let data_json: Value = serde_json::from_str(&data_response)?;
101    // 直接使用库内反序列化生成 BmsTable
102    let header: crate::BmsTableHeader = serde_json::from_value(header_json)
103        .map_err(|e| anyhow!("When parsing header json: {e}"))?;
104    let data: crate::BmsTableData =
105        serde_json::from_value(data_json).map_err(|e| anyhow!("When parsing data json: {e}"))?;
106    Ok((
107        BmsTable { header, data },
108        BmsTableRaw {
109            header_raw,
110            data_raw: data_response,
111        },
112    ))
113}
114
115/// 从网页或头部 JSON 源拉取并解析完整的 BMS 难度表。
116///
117/// 参考 [`fetch_table_full`]。
118pub async fn fetch_table(client: &reqwest::Client, web_url: &str) -> Result<BmsTable> {
119    let (table, _raw) = fetch_table_full(client, web_url).await?;
120    Ok(table)
121}
122
123/// 获取 BMS 表索引列表。
124///
125/// 从提供的 `web_url` 下载 JSON 数组并解析为 [`crate::BmsTableIndexItem`] 列表。
126/// 仅要求每个元素包含 `name`、`symbol` 与 `url`(字符串),其他字段将被收集到 `extra` 中。
127pub async fn fetch_table_index(
128    client: &reqwest::Client,
129    web_url: &str,
130) -> Result<Vec<BmsTableIndexItem>> {
131    let (out, _raw) = fetch_table_index_full(client, web_url).await?;
132    Ok(out)
133}
134
135/// 获取 BMS 表索引列表及其原始 JSON 字符串。
136///
137/// 返回解析后的索引项数组与响应的原始 JSON 文本,便于记录或调试。
138pub async fn fetch_table_index_full(
139    client: &reqwest::Client,
140    web_url: &str,
141) -> Result<(Vec<BmsTableIndexItem>, String)> {
142    let web_url = Url::parse(web_url)?;
143    let response_text = client
144        .get(web_url)
145        .send()
146        .await
147        .map_err(|e| anyhow!("When fetching table index: {e}"))?
148        .text()
149        .await
150        .map_err(|e| anyhow!("When parsing table index response: {e}"))?;
151
152    let value: Value = serde_json::from_str(&response_text)?;
153    let arr = value
154        .as_array()
155        .ok_or_else(|| anyhow!("Table index root is not an array"))?;
156
157    let mut out = Vec::with_capacity(arr.len());
158    for (idx, item) in arr.iter().enumerate() {
159        let obj = item
160            .as_object()
161            .ok_or_else(|| anyhow!("Table index item #{idx} is not an object"))?;
162
163        let name = obj
164            .get("name")
165            .and_then(|v| v.as_str())
166            .ok_or_else(|| anyhow!("Missing required field 'name' at index {idx}"))?;
167        let symbol = obj
168            .get("symbol")
169            .and_then(|v| v.as_str())
170            .ok_or_else(|| anyhow!("Missing required field 'symbol' at index {idx}"))?;
171        let url_str = obj
172            .get("url")
173            .and_then(|v| v.as_str())
174            .ok_or_else(|| anyhow!("Missing required field 'url' at index {idx}"))?;
175        let url = Url::parse(url_str)?;
176
177        #[cfg(feature = "serde")]
178        let extra = {
179            let mut m: HashMap<String, Value> = HashMap::new();
180            for (k, v) in obj.iter() {
181                if k != "name" && k != "symbol" && k != "url" {
182                    m.insert(k.clone(), v.clone());
183                }
184            }
185            m
186        };
187
188        let entry = BmsTableIndexItem {
189            name: name.to_string(),
190            symbol: symbol.to_string(),
191            url,
192            #[cfg(feature = "serde")]
193            extra,
194        };
195        out.push(entry);
196    }
197
198    Ok((out, response_text))
199}
200
201/// 创建一个规则宽松、兼容性更强的 HTTP 客户端。
202///
203/// - 设置浏览器 UA;
204/// - 配置超时与重定向;
205/// - 接受无效证书(用于少数不规范站点);
206///
207/// 注意:生产环境应审慎使用 `danger_accept_invalid_certs`。
208pub fn make_lenient_client() -> Result<reqwest::Client> {
209    let client = reqwest::Client::builder()
210        .user_agent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119 Safari/537.36 bms-table-rs")
211        .timeout(Duration::from_secs(30))
212        .redirect(reqwest::redirect::Policy::limited(10))
213        .danger_accept_invalid_certs(true)
214        .build()
215        .map_err(|e| anyhow!("When building client: {e}"))?;
216    Ok(client)
217}