1use encoding_rs::SHIFT_JIS;
23
24use crate::proxy::{send_follow_redirects, url_encode, BoxError, ProxyMode};
25
26#[derive(Debug, Clone)]
32pub struct RoomInfo {
33 pub room: u32,
34 pub max_connections: u32,
35 pub map_display: String,
36 pub public_date: String,
37 pub patrol: String,
38 pub remarks: String,
39}
40
41#[derive(Debug, Clone)]
43pub struct LoggedInUser {
44 pub order: u32,
45 pub username: String,
46 pub room: u32,
47 pub state: u32,
48}
49
50#[derive(Debug, Clone)]
52pub struct ScrapeResult {
53 pub logged_in_users: Option<Vec<LoggedInUser>>,
55 pub rooms: Vec<RoomInfo>,
56}
57
58#[allow(non_snake_case)]
63pub mod MapDisplay {
64 pub const ENABLED: &str = "\u{53ef}"; pub const DISABLED: &str = "\u{5426}"; }
67
68#[allow(non_snake_case)]
69pub mod Patrol {
70 pub const YES: &str = "\u{6709}"; pub const NO: &str = "\u{00d7}"; }
73
74#[allow(non_snake_case)]
75pub mod Remarks {
76 pub const RA: &str = "\u{30e9}"; pub const SAI: &str = "\u{57fc}"; pub const ZEN: &str = "\u{5168}"; }
80
81#[derive(Debug, Clone, Default)]
87pub struct RoomFilter {
88 pub room: Option<u32>,
89 pub room_min: Option<u32>,
90 pub room_max: Option<u32>,
91 pub min_max_conn: Option<u32>,
92 pub max_max_conn: Option<u32>,
93 pub map_display: Option<String>,
94 pub public_date: Option<String>,
95 pub public_date_contains: Option<String>,
96 pub patrol: Option<String>,
97 pub remarks: Option<String>,
98 pub remarks_contains: Option<String>,
99}
100
101impl RoomFilter {
102 pub fn matches(&self, r: &RoomInfo) -> bool {
103 if let Some(n) = self.room { if r.room != n { return false; } }
104 if let Some(n) = self.room_min { if r.room < n { return false; } }
105 if let Some(n) = self.room_max { if r.room > n { return false; } }
106 if let Some(n) = self.min_max_conn { if r.max_connections < n { return false; } }
107 if let Some(n) = self.max_max_conn { if r.max_connections > n { return false; } }
108 if let Some(ref s) = self.map_display { if r.map_display != *s { return false; } }
109 if let Some(ref s) = self.public_date { if r.public_date != *s { return false; } }
110 if let Some(ref s) = self.public_date_contains { if !r.public_date.contains(s.as_str()) { return false; } }
111 if let Some(ref s) = self.patrol { if r.patrol != *s { return false; } }
112 if let Some(ref s) = self.remarks { if r.remarks != *s { return false; } }
113 if let Some(ref s) = self.remarks_contains { if !r.remarks.contains(s.as_str()) { return false; } }
114 true
115 }
116}
117
118#[derive(Debug, Clone, Default)]
120pub struct UserFilter {
121 pub order: Option<u32>,
122 pub order_min: Option<u32>,
123 pub order_max: Option<u32>,
124 pub username: Option<String>,
125 pub username_contains: Option<String>,
126 pub room: Option<u32>,
127 pub room_min: Option<u32>,
128 pub room_max: Option<u32>,
129 pub state: Option<u32>,
130}
131
132impl UserFilter {
133 pub fn matches(&self, u: &LoggedInUser) -> bool {
134 if let Some(n) = self.order { if u.order != n { return false; } }
135 if let Some(n) = self.order_min { if u.order < n { return false; } }
136 if let Some(n) = self.order_max { if u.order > n { return false; } }
137 if let Some(ref s) = self.username { if u.username != *s { return false; } }
138 if let Some(ref s) = self.username_contains { if !u.username.contains(s.as_str()) { return false; } }
139 if let Some(n) = self.room { if u.room != n { return false; } }
140 if let Some(n) = self.room_min { if u.room < n { return false; } }
141 if let Some(n) = self.room_max { if u.room > n { return false; } }
142 if let Some(n) = self.state { if u.state != n { return false; } }
143 true
144 }
145}
146
147#[derive(Debug, Clone, Default)]
149pub struct ScrapeOptions {
150 pub room_filter: Option<RoomFilter>,
151 pub user_filter: Option<UserFilter>,
152}
153
154impl ScrapeOptions {
155 pub fn with_room_filter(mut self, f: RoomFilter) -> Self {
156 self.room_filter = Some(f);
157 self
158 }
159 pub fn with_user_filter(mut self, f: UserFilter) -> Self {
160 self.user_filter = Some(f);
161 self
162 }
163}
164
165fn inner_text<'a>(node: &tl::Node<'a>, parser: &'a tl::Parser<'a>) -> String {
170 match node {
171 tl::Node::Raw(b) => b.as_utf8_str().into_owned(),
172 tl::Node::Tag(tag) => tag
173 .children().top().iter()
174 .filter_map(|h| h.get(parser))
175 .map(|n| inner_text(n, parser))
176 .collect(),
177 _ => String::new(),
178 }
179}
180
181fn to_html<'a>(node: &tl::Node<'a>, parser: &'a tl::Parser<'a>) -> String {
182 node.outer_html(parser).to_string()
183}
184
185fn children_html<'a>(tag: &tl::HTMLTag<'a>, parser: &'a tl::Parser<'a>) -> String {
186 tag.children().top().iter()
187 .filter_map(|h| h.get(parser))
188 .map(|n| to_html(n, parser))
189 .collect()
190}
191
192fn trim_full(s: String) -> String {
193 s.trim_matches(|c: char| c.is_whitespace() || c == '\u{3000}' || c == '\u{00a0}')
194 .to_string()
195}
196
197fn parse_tr_cells(tr_html: &str) -> Vec<String> {
198 let dom = match tl::parse(tr_html, tl::ParserOptions::default()) {
199 Ok(d) => d,
200 Err(_) => return vec![],
201 };
202 let p = dom.parser();
203 dom.query_selector("td")
204 .into_iter()
205 .flatten()
206 .filter_map(|h| h.get(p))
207 .map(|n| inner_text(n, p).trim().to_string())
208 .collect()
209}
210
211fn parse_logged_in_users_html(dom: &tl::VDom) -> Option<Vec<LoggedInUser>> {
216 let parser = dom.parser();
217 let node = dom
218 .query_selector(r#"td[valign="top"]"#)?
219 .next()?
220 .get(parser)?;
221
222 let td_html = match node {
223 tl::Node::Tag(tag) => children_html(tag, parser),
224 _ => return None,
225 };
226
227 const NO_USERS: &str =
229 "\u{30ed}\u{30b0}\u{30a4}\u{30f3}\u{4e2d}\u{306e}\
230 \u{30e6}\u{30fc}\u{30b6}\u{30fc}\u{306f}\
231 \u{3044}\u{307e}\u{305b}\u{3093}";
232
233 if td_html.contains(NO_USERS) {
234 return None;
235 }
236
237 let dom2 = tl::parse(&td_html, tl::ParserOptions::default()).ok()?;
238 let p2 = dom2.parser();
239
240 let mut users = Vec::new();
241 let mut is_header = true;
242
243 for tr_handle in dom2.query_selector("tr").into_iter().flatten() {
244 let tr_html = match tr_handle.get(p2) {
245 Some(tl::Node::Tag(tag)) => children_html(tag, p2),
246 _ => continue,
247 };
248 let cells = parse_tr_cells(&tr_html);
249 if cells.len() < 4 {
250 continue;
251 }
252 if is_header {
253 is_header = false;
254 continue;
255 }
256
257 let order = match cells[0].parse::<u32>() {
258 Ok(n) => n,
259 Err(_) => continue,
260 };
261 let room = match cells[2].parse::<u32>() {
262 Ok(n) => n,
263 Err(_) => continue,
264 };
265 let state = cells[3].parse::<u32>().unwrap_or(0);
266
267 users.push(LoggedInUser { order, username: cells[1].clone(), room, state });
268 }
269
270 if users.is_empty() { None } else { Some(users) }
271}
272
273fn parse_rooms_html(dom: &tl::VDom) -> Vec<RoomInfo> {
274 let parser = dom.parser();
275
276 let center_html = match dom
277 .query_selector(r#"td[align="center"]"#)
278 .and_then(|mut q| q.next())
279 .and_then(|h| h.get(parser))
280 {
281 Some(tl::Node::Tag(tag)) => children_html(tag, parser),
282 _ => return vec![],
283 };
284
285 let dom2 = match tl::parse(¢er_html, tl::ParserOptions::default()) {
286 Ok(d) => d,
287 Err(_) => return vec![],
288 };
289 let p2 = dom2.parser();
290
291 let mut rooms = Vec::new();
292 let mut is_header = true;
293
294 for tr_handle in dom2.query_selector("tr").into_iter().flatten() {
295 let tr_html = match tr_handle.get(p2) {
296 Some(tl::Node::Tag(tag)) => children_html(tag, p2),
297 _ => continue,
298 };
299 let cells = parse_tr_cells(&tr_html);
300 if cells.len() < 6 {
301 continue;
302 }
303 if is_header {
304 is_header = false;
305 continue;
306 }
307
308 let room = match cells[0].parse::<u32>() {
309 Ok(n) => n,
310 Err(_) => continue,
311 };
312 let max_conn = cells[1].parse::<u32>().unwrap_or(0);
313
314 rooms.push(RoomInfo {
315 room,
316 max_connections: max_conn,
317 map_display: trim_full(cells[2].clone()),
318 public_date: trim_full(cells[3].clone()),
319 patrol: trim_full(cells[4].clone()),
320 remarks: trim_full(cells[5].clone()),
321 });
322 }
323 rooms
324}
325
326const BASE_URL: &str = "http://www7019ug.sakura.ne.jp/CHaserOnline003/MeetingPlace";
331const CHECK_URL: &str = "http://www7019ug.sakura.ne.jp/CHaserOnline003/MeetingPlace/UserCheck";
332
333async fn scrape_inner(
338 user: &str,
339 pass: &str,
340 opts: ScrapeOptions,
341 proxy_mode: ProxyMode,
342) -> Result<ScrapeResult, BoxError> {
343 let (_, jsession) = send_follow_redirects(BASE_URL, &[], &proxy_mode).await?;
345 let jsessionid = jsession.ok_or("JSESSIONID not found")?;
346
347 let check_url = format!(
350 "{}?user={}&pass={}",
351 CHECK_URL,
352 url_encode(user),
353 url_encode(pass),
354 );
355 let cookie = format!("JSESSIONID={}", jsessionid);
356 let (body, _) = send_follow_redirects(
357 &check_url,
358 &[("Cookie", cookie)],
359 &proxy_mode,
360 ).await?;
361
362 let (html, _, _) = SHIFT_JIS.decode(&body);
363 let dom = tl::parse(&html, tl::ParserOptions::default())?;
364
365 let logged_in_users = parse_logged_in_users_html(&dom).and_then(|users| {
366 let filtered: Vec<LoggedInUser> = match &opts.user_filter {
367 Some(f) => users.into_iter().filter(|u| f.matches(u)).collect(),
368 None => users,
369 };
370 if filtered.is_empty() { None } else { Some(filtered) }
371 });
372
373 let rooms = {
374 let all = parse_rooms_html(&dom);
375 match &opts.room_filter {
376 Some(f) => all.into_iter().filter(|r| f.matches(r)).collect(),
377 None => all,
378 }
379 };
380
381 Ok(ScrapeResult { logged_in_users, rooms })
382}
383
384pub async fn scrape(
390 user: &str,
391 pass: &str,
392 opts: ScrapeOptions,
393) -> Result<ScrapeResult, BoxError> {
394 scrape_inner(user, pass, opts, ProxyMode::Auto).await
395}
396
397pub async fn scrape_with_proxy(
400 user: &str,
401 pass: &str,
402 proxy_uri: &str,
403 opts: ScrapeOptions,
404) -> Result<ScrapeResult, BoxError> {
405 scrape_inner(user, pass, opts, ProxyMode::from_option(Some(proxy_uri))).await
406}