tail-fin-591 0.7.8

591.com.tw Taiwan rentals adapter for tail-fin: listings, communities, price history, crawl
Documentation
//! `Site` implementation for 591.com.tw (Taiwan rentals + sales).
//!
//! All 591 commands are anonymous direct-HTTP calls — no cookies, no
//! CSRF token, no browser session. The `Site` trait impl exists for
//! daemon registration (descriptive metadata) and to provide a
//! lightweight liveness probe + 5xx/429 classification. Cookie-refresh
//! is a no-op because 591 issues no cookies we'd need to harvest.

use std::time::Duration;

use async_trait::async_trait;
use serde_json::Value;
use tail_fin_core::{
    AuthFailureKind, BrowserSession, FailureIndicators, SessionStatus, Site, SiteError,
};

pub struct FiveNineOneSite;

#[async_trait]
impl Site for FiveNineOneSite {
    fn id(&self) -> &'static str {
        "591"
    }

    fn display_name(&self) -> &'static str {
        "591 Taiwan Rentals"
    }

    fn cookie_domain_patterns(&self) -> &'static [&'static str] {
        // 591 issues no cookies that any tail-fin command requires,
        // but the daemon still hits this method to filter cookie
        // snapshots. Returning the parent-domain glob keeps any cookie
        // the browser happens to receive (e.g. analytics) scoped
        // correctly without claiming we depend on them.
        &["*.591.com.tw"]
    }

    fn refresh_url(&self) -> &'static str {
        // Liveness probe target only — 591 needs no cookie refresh.
        // The default Site::refresh impl is overridden below to skip
        // navigation entirely.
        "https://www.591.com.tw/"
    }

    fn refresh_interval_min(&self) -> Duration {
        // No real refresh happens (see `refresh` override). The
        // interval just paces the daemon's `refresh_if_stale` calls;
        // an hour is plenty since we never harvest cookies.
        Duration::from_secs(3600)
    }

    async fn refresh(&self, _session: &BrowserSession) -> Result<Vec<Value>, SiteError> {
        // No-op: 591 doesn't issue cookies any command depends on, so
        // there's nothing to harvest. Skipping the default impl avoids
        // a needless Chrome navigation every refresh interval.
        Ok(vec![])
    }

    async fn validate(&self, session: &BrowserSession) -> Result<SessionStatus, SiteError> {
        let status = session
            .http_ping("https://www.591.com.tw/")
            .await
            .map_err(|e| SiteError::ValidationFailed {
                site: self.id(),
                reason: format!("homepage ping: {e}"),
            })?;

        Ok(match status {
            200 => SessionStatus::Valid,
            // 401/403 from a cookie-less site means anti-bot rather
            // than an expired session. Surface as Expired so callers
            // pause; the next refresh is a no-op so this doesn't burn
            // a Chrome cycle.
            401 | 403 => SessionStatus::Expired,
            429 => SessionStatus::Blocked {
                reason: "591 rate limit".into(),
                retry_after: Some(Duration::from_secs(180)),
            },
            0 => SessionStatus::Unknown,
            other => SessionStatus::Degrading {
                estimated_expiry: None,
                hint: format!("unexpected HTTP {other}"),
            },
        })
    }

    fn detect_auth_failure(&self, indicators: &FailureIndicators) -> Option<AuthFailureKind> {
        // 591's anti-bot layer (rare for our anonymous endpoints but
        // possible) returns 401/403. Classify as CookieExpired so
        // SessionManager triggers a "refresh" — which for us is a
        // no-op, but the side effect of resetting last_refresh lets
        // the call retry without further escalation.
        match indicators.status {
            Some(401) | Some(403) => Some(AuthFailureKind::CookieExpired),
            Some(429) => Some(AuthFailureKind::RateLimited {
                retry_after: Duration::from_secs(180),
            }),
            _ => None,
        }
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    fn indicators(status: u16) -> FailureIndicators {
        FailureIndicators {
            status: Some(status),
            ..Default::default()
        }
    }

    #[test]
    fn identity_fields() {
        let s = FiveNineOneSite;
        assert_eq!(s.id(), "591");
        assert_eq!(s.display_name(), "591 Taiwan Rentals");
        assert_eq!(s.cookie_domain_patterns(), &["*.591.com.tw"]);
        assert_eq!(s.refresh_url(), "https://www.591.com.tw/");
    }

    #[test]
    fn refresh_interval_is_one_hour() {
        // Pre-PR-#199 this was 5 minutes (Chrome refresh cadence for
        // the now-dead CSRF cookie). Bumped to 1h since refresh() is
        // a no-op and there's nothing to refresh.
        assert_eq!(
            FiveNineOneSite.refresh_interval_min(),
            Duration::from_secs(3600)
        );
    }

    #[test]
    fn detect_cookie_expired_on_401_and_403() {
        for status in [401, 403] {
            assert!(matches!(
                FiveNineOneSite.detect_auth_failure(&indicators(status)),
                Some(AuthFailureKind::CookieExpired)
            ));
        }
    }

    #[test]
    fn detect_rate_limited_on_429() {
        match FiveNineOneSite.detect_auth_failure(&indicators(429)) {
            Some(AuthFailureKind::RateLimited { retry_after }) => {
                assert_eq!(retry_after, Duration::from_secs(180));
            }
            other => panic!("expected RateLimited, got {other:?}"),
        }
    }

    #[test]
    fn detect_returns_none_for_ok_and_unknown() {
        assert!(FiveNineOneSite
            .detect_auth_failure(&indicators(200))
            .is_none());
        assert!(FiveNineOneSite
            .detect_auth_failure(&indicators(500))
            .is_none());
    }
}