1#![cfg(feature = "scraper")]
26
27pub mod reqwest;
28
29use anyhow::{Result, anyhow};
30use scraper::{Html, Selector};
31use serde_json::Value;
32
33pub enum HeaderQueryContent {
38 Url(String),
42 Json(Value),
44}
45
46pub fn get_web_header_json_value(response_str: &str) -> anyhow::Result<HeaderQueryContent> {
59 match serde_json::from_str::<Value>(response_str) {
61 Ok(header_json) => Ok(HeaderQueryContent::Json(header_json)),
62 Err(_) => {
63 let bmstable_url = extract_bmstable_url(response_str)?;
64 Ok(HeaderQueryContent::Url(bmstable_url))
65 }
66 }
67}
68
69pub fn extract_bmstable_url(html_content: &str) -> Result<String> {
77 let document = Html::parse_document(html_content);
78
79 let Ok(meta_selector) = Selector::parse("meta") else {
81 return Err(anyhow!("未找到meta标签"));
82 };
83
84 for element in document.select(&meta_selector) {
86 let is_bmstable = element
88 .value()
89 .attr("name")
90 .is_some_and(|v| v.eq_ignore_ascii_case("bmstable"))
91 || element
92 .value()
93 .attr("property")
94 .is_some_and(|v| v.eq_ignore_ascii_case("bmstable"));
95 if is_bmstable
96 && let Some(content_attr) = element.value().attr("content")
97 && !content_attr.is_empty()
98 {
99 return Ok(content_attr.to_string());
100 }
101 }
102
103 if let Ok(link_selector) = Selector::parse("link") {
105 for element in document.select(&link_selector) {
106 let rel = element.value().attr("rel");
107 let href = element.value().attr("href");
108 if rel.is_some_and(|v| v.eq_ignore_ascii_case("bmstable"))
109 && let Some(href) = href
110 && !href.is_empty()
111 {
112 return Ok(href.to_string());
113 }
114 }
115 }
116
117 let lower_contains_header_json = |s: &str| {
120 let ls = s.to_ascii_lowercase();
121 ls.contains("header") && ls.ends_with(".json")
122 };
123
124 if let Ok(a_selector) = Selector::parse("a") {
126 for element in document.select(&a_selector) {
127 if let Some(href) = element.value().attr("href")
128 && lower_contains_header_json(href)
129 {
130 return Ok(href.to_string());
131 }
132 }
133 }
134
135 if let Ok(link_selector) = Selector::parse("link") {
137 for element in document.select(&link_selector) {
138 if let Some(href) = element.value().attr("href")
139 && lower_contains_header_json(href)
140 {
141 return Ok(href.to_string());
142 }
143 }
144 }
145
146 if let Ok(script_selector) = Selector::parse("script") {
148 for element in document.select(&script_selector) {
149 if let Some(src) = element.value().attr("src")
150 && lower_contains_header_json(src)
151 {
152 return Ok(src.to_string());
153 }
154 }
155 }
156
157 for element in document.select(&meta_selector) {
159 if let Some(content_attr) = element.value().attr("content")
160 && lower_contains_header_json(content_attr)
161 {
162 return Ok(content_attr.to_string());
163 }
164 }
165
166 if let Some((start, end)) = find_header_json_in_text(html_content) {
168 let candidate = &html_content[start..end];
169 return Ok(candidate.to_string());
170 }
171
172 Err(anyhow!("未找到bmstable字段或header JSON线索"))
173}
174
175fn find_header_json_in_text(s: &str) -> Option<(usize, usize)> {
177 let lower = s.to_ascii_lowercase();
178 let mut pos = 0;
179 while let Some(idx) = lower[pos..].find("header") {
180 let global_idx = pos + idx;
181 if let Some(json_rel) = lower[global_idx..].find(".json") {
183 let end = global_idx + json_rel + ".json".len();
184 let start = lower[..global_idx]
186 .rfind(|c: char| c == '"' || c == '\'' || c.is_whitespace())
187 .map(|i| i + 1)
188 .unwrap_or(global_idx);
189 if end > start {
190 return Some((start, end));
191 }
192 }
193 pos = global_idx + 6; }
195 None
196}