1#![cfg(feature = "scraper")]
26
27pub mod reqwest;
28
29use anyhow::{Result, anyhow};
30use scraper::{Html, Selector};
31use serde_json::Value;
32
33pub enum HeaderQueryContent {
38 Url(String),
42 Json(Value),
44}
45
46pub(crate) fn replace_control_chars(s: &str) -> String {
51 s.chars().filter(|ch: &char| !ch.is_control()).collect()
52}
53
54pub fn get_web_header_json_value(response_str: &str) -> anyhow::Result<HeaderQueryContent> {
67 let cleaned = replace_control_chars(response_str);
69 match serde_json::from_str::<Value>(&cleaned) {
70 Ok(header_json) => Ok(HeaderQueryContent::Json(header_json)),
71 Err(_) => {
72 let bmstable_url = extract_bmstable_url(response_str)?;
73 Ok(HeaderQueryContent::Url(bmstable_url))
74 }
75 }
76}
77
78pub fn extract_bmstable_url(html_content: &str) -> Result<String> {
86 let document = Html::parse_document(html_content);
87
88 let Ok(meta_selector) = Selector::parse("meta") else {
90 return Err(anyhow!("未找到meta标签"));
91 };
92
93 for element in document.select(&meta_selector) {
95 let is_bmstable = element
97 .value()
98 .attr("name")
99 .is_some_and(|v| v.eq_ignore_ascii_case("bmstable"))
100 || element
101 .value()
102 .attr("property")
103 .is_some_and(|v| v.eq_ignore_ascii_case("bmstable"));
104 if is_bmstable
105 && let Some(content_attr) = element.value().attr("content")
106 && !content_attr.is_empty()
107 {
108 return Ok(content_attr.to_string());
109 }
110 }
111
112 if let Ok(link_selector) = Selector::parse("link") {
114 for element in document.select(&link_selector) {
115 let rel = element.value().attr("rel");
116 let href = element.value().attr("href");
117 if rel.is_some_and(|v| v.eq_ignore_ascii_case("bmstable"))
118 && let Some(href) = href
119 && !href.is_empty()
120 {
121 return Ok(href.to_string());
122 }
123 }
124 }
125
126 let lower_contains_header_json = |s: &str| {
129 let ls = s.to_ascii_lowercase();
130 ls.contains("header") && ls.ends_with(".json")
131 };
132
133 if let Ok(a_selector) = Selector::parse("a") {
135 for element in document.select(&a_selector) {
136 if let Some(href) = element.value().attr("href")
137 && lower_contains_header_json(href)
138 {
139 return Ok(href.to_string());
140 }
141 }
142 }
143
144 if let Ok(link_selector) = Selector::parse("link") {
146 for element in document.select(&link_selector) {
147 if let Some(href) = element.value().attr("href")
148 && lower_contains_header_json(href)
149 {
150 return Ok(href.to_string());
151 }
152 }
153 }
154
155 if let Ok(script_selector) = Selector::parse("script") {
157 for element in document.select(&script_selector) {
158 if let Some(src) = element.value().attr("src")
159 && lower_contains_header_json(src)
160 {
161 return Ok(src.to_string());
162 }
163 }
164 }
165
166 for element in document.select(&meta_selector) {
168 if let Some(content_attr) = element.value().attr("content")
169 && lower_contains_header_json(content_attr)
170 {
171 return Ok(content_attr.to_string());
172 }
173 }
174
175 if let Some((start, end)) = find_header_json_in_text(html_content) {
177 let candidate = &html_content[start..end];
178 return Ok(candidate.to_string());
179 }
180
181 Err(anyhow!("未找到bmstable字段或header JSON线索"))
182}
183
184fn find_header_json_in_text(s: &str) -> Option<(usize, usize)> {
186 let lower = s.to_ascii_lowercase();
187 let mut pos = 0;
188 while let Some(idx) = lower[pos..].find("header") {
189 let global_idx = pos + idx;
190 if let Some(json_rel) = lower[global_idx..].find(".json") {
192 let end = global_idx + json_rel + ".json".len();
193 let start = lower[..global_idx]
195 .rfind(|c: char| c == '"' || c == '\'' || c.is_whitespace())
196 .map(|i| i + 1)
197 .unwrap_or(global_idx);
198 if end > start {
199 return Some((start, end));
200 }
201 }
202 pos = global_idx + 6; }
204 None
205}