bms_table/fetch/
reqwest.rs

1//! Network fetching module based on `reqwest`
2//!
3//! Provides an all-in-one ability to fetch and parse BMS difficulty tables from a web page or a header JSON source:
4//! - Fetch the page and extract the bmstable header URL from HTML (if present);
5//! - Download and parse the header JSON;
6//! - Download and parse chart data according to `data_url` in the header;
7//! - Return a parsed `BmsTable` plus the raw JSON strings used for parsing.
8//!
9//! # Example
10//!
11//! ```rust,no_run
12//! # #[tokio::main]
13//! # async fn main() -> anyhow::Result<()> {
14//! use bms_table::fetch::reqwest::Fetcher;
15//! let fetcher = Fetcher::lenient()?;
16//! let table = fetcher.fetch_table("https://stellabms.xyz/sl/table.html").await?.table;
17//! assert!(!table.data.charts.is_empty());
18//! # Ok(())
19//! # }
20//! ```
21#![cfg(feature = "reqwest")]
22
23use std::time::Duration;
24
25use anyhow::{Context, Result, anyhow};
26use reqwest::{
27    Client, IntoUrl,
28    header::{HeaderMap, HeaderName, HeaderValue},
29};
30use serde::de::DeserializeOwned;
31
32use crate::{
33    BmsTable, BmsTableData, BmsTableHeader, BmsTableList, BmsTableRaw,
34    fetch::{
35        FetchedTable, FetchedTableList, HeaderQueryContent, TableFetcher,
36        header_query_with_fallback, parse_json_str_with_fallback,
37    },
38};
39
40/// Fetcher wrapper around a reusable [`reqwest::Client`].
41///
42/// Provides an ergonomic, one-stop API for fetching a table (or table list) from a web URL.
43#[derive(Clone)]
44pub struct Fetcher {
45    /// Underlying HTTP client.
46    client: Client,
47}
48
49impl Fetcher {
50    /// Create a fetcher from an existing [`reqwest::Client`].
51    #[must_use]
52    pub const fn new(client: Client) -> Self {
53        Self { client }
54    }
55
56    /// Create a fetcher with a more compatible, browser-like HTTP client configuration.
57    ///
58    /// # Errors
59    ///
60    /// Returns an error if building the underlying HTTP client fails.
61    pub fn lenient() -> Result<Self> {
62        Ok(Self::new(make_lenient_client()?))
63    }
64
65    /// Borrow the underlying [`reqwest::Client`].
66    #[must_use]
67    pub const fn client(&self) -> &Client {
68        &self.client
69    }
70
71    /// Fetch and parse a complete BMS difficulty table.
72    ///
73    /// # Errors
74    ///
75    /// Returns an error if fetching or parsing the table fails.
76    pub async fn fetch_table(&self, web_url: impl IntoUrl) -> Result<FetchedTable> {
77        let web_url = web_url.into_url().context("When parsing target url")?;
78
79        let web_page_text = self.fetch_text(web_url.clone(), "web page").await?;
80
81        let (web_header_query, web_used_text) =
82            header_query_with_fallback::<BmsTableHeader>(&web_page_text)
83                .context("When extracting header query from web page")?;
84
85        let (header_json_url, header, header_raw) = match web_header_query {
86            HeaderQueryContent::Url(header_url_string) => {
87                let header_json_url = web_url
88                    .join(&header_url_string)
89                    .context("When resolving header json url")?;
90
91                let header_text = self
92                    .fetch_text(header_json_url.clone(), "header json")
93                    .await?;
94
95                let (header_query2, header_used_text) =
96                    header_query_with_fallback::<BmsTableHeader>(&header_text)
97                        .context("When parsing header json")?;
98
99                let HeaderQueryContent::Value(header) = header_query2 else {
100                    return Err(anyhow!(
101                        "Cycled header found. web_url: {web_url}, header_url: {header_url_string}"
102                    ));
103                };
104
105                (header_json_url, header, header_used_text)
106            }
107            HeaderQueryContent::Value(header) => (web_url, header, web_used_text),
108        };
109
110        let data_json_url = header_json_url
111            .join(&header.data_url)
112            .context("When resolving data json url")?;
113
114        let (data, data_raw) = self
115            .fetch_json_with_fallback::<BmsTableData>(
116                data_json_url.clone(),
117                "data json",
118                "data json",
119            )
120            .await?;
121
122        Ok(FetchedTable {
123            table: BmsTable { header, data },
124            raw: BmsTableRaw {
125                header_json_url,
126                header_raw,
127                data_json_url,
128                data_raw,
129            },
130        })
131    }
132
133    /// Fetch a list of BMS difficulty tables.
134    ///
135    /// # Errors
136    ///
137    /// Returns an error if fetching or parsing the list fails.
138    pub async fn fetch_table_list(&self, web_url: impl IntoUrl) -> Result<FetchedTableList> {
139        let list_url = web_url.into_url().context("When parsing table list url")?;
140
141        let (list, raw_used) = self
142            .fetch_json_with_fallback::<BmsTableList>(list_url, "table list", "table list json")
143            .await?;
144        Ok(FetchedTableList {
145            tables: list.listes,
146            raw_json: raw_used,
147        })
148    }
149
150    /// Fetch a URL as text, attaching contextual error messages.
151    ///
152    /// # Errors
153    ///
154    /// Returns an error if the request fails or the body cannot be read as text.
155    async fn fetch_text(&self, url: reqwest::Url, fetch_ctx: &'static str) -> Result<String> {
156        self.client
157            .get(url)
158            .send()
159            .await
160            .with_context(|| format!("When fetching {fetch_ctx}"))?
161            .text()
162            .await
163            .with_context(|| format!("When reading {fetch_ctx} body"))
164    }
165
166    /// Fetch a URL and parse JSON with a control-character cleaning fallback.
167    ///
168    /// # Errors
169    ///
170    /// Returns an error if fetching fails, or the response cannot be parsed as JSON.
171    async fn fetch_json_with_fallback<T: DeserializeOwned>(
172        &self,
173        url: reqwest::Url,
174        fetch_ctx: &'static str,
175        parse_ctx: &'static str,
176    ) -> Result<(T, String)> {
177        let text = self.fetch_text(url, fetch_ctx).await?;
178        parse_json_str_with_fallback::<T>(&text)
179            .with_context(|| format!("When parsing {parse_ctx}"))
180    }
181}
182
183impl TableFetcher for Fetcher {
184    async fn fetch_table(&self, web_url: url::Url) -> Result<FetchedTable> {
185        Fetcher::fetch_table(self, web_url).await
186    }
187
188    async fn fetch_table_list(&self, web_url: url::Url) -> Result<FetchedTableList> {
189        Fetcher::fetch_table_list(self, web_url).await
190    }
191}
192
193/// Create a more lenient and compatible HTTP client.
194///
195/// - Set a browser-like UA;
196/// - Configure timeouts and redirects;
197/// - Accept invalid certificates (for a few non-compliant sites);
198/// - Accept invalid hostnames (for a few non-compliant sites);
199///
200/// Note: use `danger_accept_invalid_certs` with caution in production.
201///
202/// # Errors
203///
204/// Returns an error when building the HTTP client fails.
205fn make_lenient_client() -> Result<Client> {
206    let mut headers = HeaderMap::new();
207    headers.insert(
208        HeaderName::from_static("accept"),
209        HeaderValue::from_static(
210            "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9",
211        ),
212    );
213    headers.insert(
214        HeaderName::from_static("accept-language"),
215        HeaderValue::from_static("zh-CN,zh;q=0.9,en;q=0.8"),
216    );
217    headers.insert(
218        HeaderName::from_static("upgrade-insecure-requests"),
219        HeaderValue::from_static("1"),
220    );
221    headers.insert(
222        HeaderName::from_static("connection"),
223        HeaderValue::from_static("keep-alive"),
224    );
225
226    let client = Client::builder()
227        .default_headers(headers)
228        .user_agent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119 Safari/537.36 bms-table-rs")
229        .timeout(Duration::from_secs(60))
230        .redirect(reqwest::redirect::Policy::limited(100))
231        // Automatically include Referer on redirects, closer to browser behavior
232        .referer(true)
233        // Enable cookie store, closer to real user sessions
234        .cookie_store(true)
235        // Keep lenient TLS settings for compatibility with some non-compliant sites
236        .danger_accept_invalid_certs(true)
237        .danger_accept_invalid_hostnames(true)
238        .build()
239        .context("When building client")?;
240    Ok(client)
241}