Skip to main content

chaser_util/
room_list.rs

1//! CHaser Online MeetingPlace scraper library
2//!
3//! # Quick start
4//!
5//! ```no_run
6//! use chaser_util::room_list::{scrape, scrape_with_proxy, ScrapeOptions};
7//!
8//! #[tokio::main]
9//! async fn main() {
10//!     // Auto proxy detection (Windows registry / macOS SCF / env vars)
11//!     let result = scrape("hot", "hot", ScrapeOptions::default()).await.unwrap();
12//!
13//!     // Manual proxy
14//!     let result = scrape_with_proxy(
15//!         "hot", "hot",
16//!         "http://proxy.example.com:8080",
17//!         ScrapeOptions::default(),
18//!     ).await.unwrap();
19//! }
20//! ```
21
22use encoding_rs::SHIFT_JIS;
23
24use crate::proxy::{send_follow_redirects, url_encode, BoxError, ProxyMode};
25
26// ----------------------------------------------------------------
27// Public data types
28// ----------------------------------------------------------------
29
30/// One room entry from the public room table.
31#[derive(Debug, Clone)]
32pub struct RoomInfo {
33    pub room:            u32,
34    pub max_connections: u32,
35    pub map_display:     String,
36    pub public_date:     String,
37    pub patrol:          String,
38    pub remarks:         String,
39}
40
41/// One logged-in user row.
42#[derive(Debug, Clone)]
43pub struct LoggedInUser {
44    pub order:    u32,
45    pub username: String,
46    pub room:     u32,
47    pub state:    u32,
48}
49
50/// Full scrape result.
51#[derive(Debug, Clone)]
52pub struct ScrapeResult {
53    /// `None` means no users are currently logged in.
54    pub logged_in_users: Option<Vec<LoggedInUser>>,
55    pub rooms:           Vec<RoomInfo>,
56}
57
58// ----------------------------------------------------------------
59// Constants
60// ----------------------------------------------------------------
61
62#[allow(non_snake_case)]
63pub mod MapDisplay {
64    pub const ENABLED:  &str = "\u{53ef}";  // 可
65    pub const DISABLED: &str = "\u{5426}";  // 否
66}
67
68#[allow(non_snake_case)]
69pub mod Patrol {
70    pub const YES: &str = "\u{6709}";  // 有
71    pub const NO:  &str = "\u{00d7}";  // ×
72}
73
74#[allow(non_snake_case)]
75pub mod Remarks {
76    pub const RA:  &str = "\u{30e9}";  // ラ
77    pub const SAI: &str = "\u{57fc}";  // 埼
78    pub const ZEN: &str = "\u{5168}";  // 全
79}
80
81// ----------------------------------------------------------------
82// Filter types
83// ----------------------------------------------------------------
84
85/// Filter for room list.  All fields default to `None` (= no filter).
86#[derive(Debug, Clone, Default)]
87pub struct RoomFilter {
88    pub room:                 Option<u32>,
89    pub room_min:             Option<u32>,
90    pub room_max:             Option<u32>,
91    pub min_max_conn:         Option<u32>,
92    pub max_max_conn:         Option<u32>,
93    pub map_display:          Option<String>,
94    pub public_date:          Option<String>,
95    pub public_date_contains: Option<String>,
96    pub patrol:               Option<String>,
97    pub remarks:              Option<String>,
98    pub remarks_contains:     Option<String>,
99}
100
101impl RoomFilter {
102    pub fn matches(&self, r: &RoomInfo) -> bool {
103        if let Some(n)     = self.room                 { if r.room != n                          { return false; } }
104        if let Some(n)     = self.room_min             { if r.room < n                           { return false; } }
105        if let Some(n)     = self.room_max             { if r.room > n                           { return false; } }
106        if let Some(n)     = self.min_max_conn         { if r.max_connections < n                { return false; } }
107        if let Some(n)     = self.max_max_conn         { if r.max_connections > n                { return false; } }
108        if let Some(ref s) = self.map_display          { if r.map_display != *s                  { return false; } }
109        if let Some(ref s) = self.public_date          { if r.public_date != *s                  { return false; } }
110        if let Some(ref s) = self.public_date_contains { if !r.public_date.contains(s.as_str()) { return false; } }
111        if let Some(ref s) = self.patrol               { if r.patrol != *s                       { return false; } }
112        if let Some(ref s) = self.remarks              { if r.remarks != *s                      { return false; } }
113        if let Some(ref s) = self.remarks_contains     { if !r.remarks.contains(s.as_str())     { return false; } }
114        true
115    }
116}
117
118/// Filter for logged-in user list.  All fields default to `None` (= no filter).
119#[derive(Debug, Clone, Default)]
120pub struct UserFilter {
121    pub order:             Option<u32>,
122    pub order_min:         Option<u32>,
123    pub order_max:         Option<u32>,
124    pub username:          Option<String>,
125    pub username_contains: Option<String>,
126    pub room:              Option<u32>,
127    pub room_min:          Option<u32>,
128    pub room_max:          Option<u32>,
129    pub state:             Option<u32>,
130}
131
132impl UserFilter {
133    pub fn matches(&self, u: &LoggedInUser) -> bool {
134        if let Some(n)     = self.order             { if u.order != n                           { return false; } }
135        if let Some(n)     = self.order_min         { if u.order < n                            { return false; } }
136        if let Some(n)     = self.order_max         { if u.order > n                            { return false; } }
137        if let Some(ref s) = self.username          { if u.username != *s                      { return false; } }
138        if let Some(ref s) = self.username_contains { if !u.username.contains(s.as_str())     { return false; } }
139        if let Some(n)     = self.room              { if u.room != n                            { return false; } }
140        if let Some(n)     = self.room_min          { if u.room < n                             { return false; } }
141        if let Some(n)     = self.room_max          { if u.room > n                             { return false; } }
142        if let Some(n)     = self.state             { if u.state != n                           { return false; } }
143        true
144    }
145}
146
147/// Scraping options: filters applied after fetching.
148#[derive(Debug, Clone, Default)]
149pub struct ScrapeOptions {
150    pub room_filter: Option<RoomFilter>,
151    pub user_filter: Option<UserFilter>,
152}
153
154impl ScrapeOptions {
155    pub fn with_room_filter(mut self, f: RoomFilter) -> Self {
156        self.room_filter = Some(f);
157        self
158    }
159    pub fn with_user_filter(mut self, f: UserFilter) -> Self {
160        self.user_filter = Some(f);
161        self
162    }
163}
164
165// ----------------------------------------------------------------
166// tl helpers
167// ----------------------------------------------------------------
168
169fn inner_text<'a>(node: &tl::Node<'a>, parser: &'a tl::Parser<'a>) -> String {
170    match node {
171        tl::Node::Raw(b)   => b.as_utf8_str().into_owned(),
172        tl::Node::Tag(tag) => tag
173            .children().top().iter()
174            .filter_map(|h| h.get(parser))
175            .map(|n| inner_text(n, parser))
176            .collect(),
177        _ => String::new(),
178    }
179}
180
181fn to_html<'a>(node: &tl::Node<'a>, parser: &'a tl::Parser<'a>) -> String {
182    node.outer_html(parser).to_string()
183}
184
185fn children_html<'a>(tag: &tl::HTMLTag<'a>, parser: &'a tl::Parser<'a>) -> String {
186    tag.children().top().iter()
187        .filter_map(|h| h.get(parser))
188        .map(|n| to_html(n, parser))
189        .collect()
190}
191
192fn trim_full(s: String) -> String {
193    s.trim_matches(|c: char| c.is_whitespace() || c == '\u{3000}' || c == '\u{00a0}')
194     .to_string()
195}
196
197fn parse_tr_cells(tr_html: &str) -> Vec<String> {
198    let dom = match tl::parse(tr_html, tl::ParserOptions::default()) {
199        Ok(d) => d,
200        Err(_) => return vec![],
201    };
202    let p = dom.parser();
203    dom.query_selector("td")
204        .into_iter()
205        .flatten()
206        .filter_map(|h| h.get(p))
207        .map(|n| inner_text(n, p).trim().to_string())
208        .collect()
209}
210
211// ----------------------------------------------------------------
212// HTML parsers
213// ----------------------------------------------------------------
214
215fn parse_logged_in_users_html(dom: &tl::VDom) -> Option<Vec<LoggedInUser>> {
216    let parser = dom.parser();
217    let node   = dom
218        .query_selector(r#"td[valign="top"]"#)?
219        .next()?
220        .get(parser)?;
221
222    let td_html = match node {
223        tl::Node::Tag(tag) => children_html(tag, parser),
224        _ => return None,
225    };
226
227    // "ログイン中のユーザーはいません"
228    const NO_USERS: &str =
229        "\u{30ed}\u{30b0}\u{30a4}\u{30f3}\u{4e2d}\u{306e}\
230         \u{30e6}\u{30fc}\u{30b6}\u{30fc}\u{306f}\
231         \u{3044}\u{307e}\u{305b}\u{3093}";
232
233    if td_html.contains(NO_USERS) {
234        return None;
235    }
236
237    let dom2 = tl::parse(&td_html, tl::ParserOptions::default()).ok()?;
238    let p2   = dom2.parser();
239
240    let mut users     = Vec::new();
241    let mut is_header = true;
242
243    for tr_handle in dom2.query_selector("tr").into_iter().flatten() {
244        let tr_html = match tr_handle.get(p2) {
245            Some(tl::Node::Tag(tag)) => children_html(tag, p2),
246            _ => continue,
247        };
248        let cells = parse_tr_cells(&tr_html);
249        if cells.len() < 4 {
250            continue;
251        }
252        if is_header {
253            is_header = false;
254            continue;
255        }
256
257        let order = match cells[0].parse::<u32>() {
258            Ok(n) => n,
259            Err(_) => continue,
260        };
261        let room  = match cells[2].parse::<u32>() {
262            Ok(n) => n,
263            Err(_) => continue,
264        };
265        let state = cells[3].parse::<u32>().unwrap_or(0);
266
267        users.push(LoggedInUser { order, username: cells[1].clone(), room, state });
268    }
269
270    if users.is_empty() { None } else { Some(users) }
271}
272
273fn parse_rooms_html(dom: &tl::VDom) -> Vec<RoomInfo> {
274    let parser = dom.parser();
275
276    let center_html = match dom
277        .query_selector(r#"td[align="center"]"#)
278        .and_then(|mut q| q.next())
279        .and_then(|h| h.get(parser))
280    {
281        Some(tl::Node::Tag(tag)) => children_html(tag, parser),
282        _ => return vec![],
283    };
284
285    let dom2 = match tl::parse(&center_html, tl::ParserOptions::default()) {
286        Ok(d)  => d,
287        Err(_) => return vec![],
288    };
289    let p2 = dom2.parser();
290
291    let mut rooms     = Vec::new();
292    let mut is_header = true;
293
294    for tr_handle in dom2.query_selector("tr").into_iter().flatten() {
295        let tr_html = match tr_handle.get(p2) {
296            Some(tl::Node::Tag(tag)) => children_html(tag, p2),
297            _ => continue,
298        };
299        let cells = parse_tr_cells(&tr_html);
300        if cells.len() < 6 {
301            continue;
302        }
303        if is_header {
304            is_header = false;
305            continue;
306        }
307
308        let room = match cells[0].parse::<u32>() {
309            Ok(n) => n,
310            Err(_) => continue,
311        };
312        let max_conn = cells[1].parse::<u32>().unwrap_or(0);
313
314        rooms.push(RoomInfo {
315            room,
316            max_connections: max_conn,
317            map_display:     trim_full(cells[2].clone()),
318            public_date:     trim_full(cells[3].clone()),
319            patrol:          trim_full(cells[4].clone()),
320            remarks:         trim_full(cells[5].clone()),
321        });
322    }
323    rooms
324}
325
326// ----------------------------------------------------------------
327// URLs
328// ----------------------------------------------------------------
329
330const BASE_URL:  &str = "http://www7019ug.sakura.ne.jp/CHaserOnline003/MeetingPlace";
331const CHECK_URL: &str = "http://www7019ug.sakura.ne.jp/CHaserOnline003/MeetingPlace/UserCheck";
332
333// ----------------------------------------------------------------
334// Core scrape logic
335// ----------------------------------------------------------------
336
337async fn scrape_inner(
338    user:       &str,
339    pass:       &str,
340    opts:       ScrapeOptions,
341    proxy_mode: ProxyMode,
342) -> Result<ScrapeResult, BoxError> {
343    // Step 1: Fetch the top page to obtain JSESSIONID
344    let (_, jsession) = send_follow_redirects(BASE_URL, &[], &proxy_mode).await?;
345    let jsessionid = jsession.ok_or("JSESSIONID not found")?;
346
347    // Step 2: Authenticate
348    // FIX: user and pass are percent-encoded to prevent query parameter injection.
349    let check_url = format!(
350        "{}?user={}&pass={}",
351        CHECK_URL,
352        url_encode(user),
353        url_encode(pass),
354    );
355    let cookie    = format!("JSESSIONID={}", jsessionid);
356    let (body, _) = send_follow_redirects(
357        &check_url,
358        &[("Cookie", cookie)],
359        &proxy_mode,
360    ).await?;
361
362    let (html, _, _) = SHIFT_JIS.decode(&body);
363    let dom = tl::parse(&html, tl::ParserOptions::default())?;
364
365    let logged_in_users = parse_logged_in_users_html(&dom).and_then(|users| {
366        let filtered: Vec<LoggedInUser> = match &opts.user_filter {
367            Some(f) => users.into_iter().filter(|u| f.matches(u)).collect(),
368            None    => users,
369        };
370        if filtered.is_empty() { None } else { Some(filtered) }
371    });
372
373    let rooms = {
374        let all = parse_rooms_html(&dom);
375        match &opts.room_filter {
376            Some(f) => all.into_iter().filter(|r| f.matches(r)).collect(),
377            None    => all,
378        }
379    };
380
381    Ok(ScrapeResult { logged_in_users, rooms })
382}
383
384// ----------------------------------------------------------------
385// Public API
386// ----------------------------------------------------------------
387
388/// Scrape with automatic proxy detection.
389pub async fn scrape(
390    user: &str,
391    pass: &str,
392    opts: ScrapeOptions,
393) -> Result<ScrapeResult, BoxError> {
394    scrape_inner(user, pass, opts, ProxyMode::Auto).await
395}
396
397/// Scrape with a manually specified proxy.
398/// Pass `""` for direct connection.
399pub async fn scrape_with_proxy(
400    user:      &str,
401    pass:      &str,
402    proxy_uri: &str,
403    opts:      ScrapeOptions,
404) -> Result<ScrapeResult, BoxError> {
405    scrape_inner(user, pass, opts, ProxyMode::from_option(Some(proxy_uri))).await
406}