tail-fin-xhs 0.7.8

Xiaohongshu adapter for tail-fin: search, notes, comments, feed
Documentation
//! `Site` implementation for Xiaohongshu (小红书 / RED).
//!
//! xhs uses a signed-request model: every API call carries X-S / X-T
//! headers derived from the `a1` cookie (device fingerprint) plus a
//! session token. Authentication itself is anchored on the `web_session`
//! cookie — when that's missing or stale, the API returns JSON with
//! `code: 10001` ("登录已过期") at HTTP 200.
//!
//! Because xhs expresses auth failures in the response body rather than
//! HTTP status, `detect_auth_failure` has to read `body_preview` —
//! status-only classification misses the common cases.

use std::time::Duration;

use async_trait::async_trait;
use tail_fin_core::{
    AuthFailureKind, BrowserSession, FailureIndicators, SessionStatus, Site, SiteError,
};

pub struct XhsSite;

#[async_trait]
impl Site for XhsSite {
    fn id(&self) -> &'static str {
        "xhs"
    }

    fn display_name(&self) -> &'static str {
        "Xiaohongshu"
    }

    fn cookie_domain_patterns(&self) -> &'static [&'static str] {
        &["*.xiaohongshu.com"]
    }

    fn refresh_url(&self) -> &'static str {
        "https://www.xiaohongshu.com/"
    }

    fn refresh_interval_min(&self) -> Duration {
        Duration::from_secs(180)
    }

    // refresh: use default (navigate + get_cookies_for_domain).
    // xhs emits Set-Cookie on homepage load, including a1 rotation.

    /// Validate via cookie presence. Auth endpoints need signed headers
    /// (X-S / X-T) to get a meaningful response, so a raw HTTP ping
    /// can't distinguish auth states reliably. We require both the
    /// session anchor (`web_session`) and the signing anchor (`a1`).
    async fn validate(&self, session: &BrowserSession) -> Result<SessionStatus, SiteError> {
        let cookies = session
            .get_cookies_for_domain("*.xiaohongshu.com")
            .await
            .map_err(|e| SiteError::ValidationFailed {
                site: self.id(),
                reason: format!("get_cookies_for_domain: {e}"),
            })?;

        let has_cookie = |name: &str| {
            cookies.iter().any(|c| {
                c.get("name").and_then(|v| v.as_str()) == Some(name)
                    && c.get("value")
                        .and_then(|v| v.as_str())
                        .map(|v| !v.is_empty())
                        .unwrap_or(false)
            })
        };

        // `a1` alone isn't enough — it's set even for logged-out visitors.
        // `web_session` is the authenticated-user anchor.
        if !has_cookie("web_session") || !has_cookie("a1") {
            return Ok(SessionStatus::Expired);
        }

        let status = session
            .http_ping("https://www.xiaohongshu.com/")
            .await
            .map_err(|e| SiteError::ValidationFailed {
                site: self.id(),
                reason: format!("homepage ping: {e}"),
            })?;

        Ok(match status {
            200 => SessionStatus::Valid,
            401 | 403 => SessionStatus::Expired,
            429 => SessionStatus::Blocked {
                reason: "Xiaohongshu rate limit".into(),
                retry_after: Some(Duration::from_secs(300)),
            },
            0 => SessionStatus::Unknown,
            other => SessionStatus::Degrading {
                estimated_expiry: None,
                hint: format!("unexpected HTTP {other} from homepage"),
            },
        })
    }

    fn detect_auth_failure(&self, indicators: &FailureIndicators) -> Option<AuthFailureKind> {
        // xhs returns auth errors as HTTP 200 + JSON body with numeric
        // codes. Status-only classification misses these entirely.
        //
        // Known codes (observed, non-exhaustive):
        //   -100 / 10001 → login expired
        //   461          → signing failure (X-S / X-T rejected — often
        //                  means a1 rotation caught us mid-request)
        //   406          → anti-bot / captcha
        let body = indicators.body_preview.as_str();

        if body.contains("\"code\":-100")
            || body.contains("\"code\":10001")
            || body.contains("登录已过期")
        {
            return Some(AuthFailureKind::CookieExpired);
        }

        if body.contains("\"code\":461") {
            return Some(AuthFailureKind::AntibotBlock {
                challenge_type: "xhs-signing-rejected".into(),
            });
        }

        if body.contains("\"code\":406") || body.contains("captcha") {
            return Some(AuthFailureKind::AntibotBlock {
                challenge_type: "xhs-captcha".into(),
            });
        }

        match indicators.status {
            Some(401) | Some(403) => Some(AuthFailureKind::CookieExpired),
            Some(429) => Some(AuthFailureKind::RateLimited {
                retry_after: Duration::from_secs(300),
            }),
            _ => None,
        }
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    fn indicators(status: u16) -> FailureIndicators {
        FailureIndicators {
            status: Some(status),
            ..Default::default()
        }
    }

    fn body(status: u16, body_preview: &str) -> FailureIndicators {
        FailureIndicators {
            status: Some(status),
            body_preview: body_preview.into(),
            ..Default::default()
        }
    }

    #[test]
    fn identity_fields() {
        let s = XhsSite;
        assert_eq!(s.id(), "xhs");
        assert_eq!(s.display_name(), "Xiaohongshu");
        assert_eq!(s.cookie_domain_patterns(), &["*.xiaohongshu.com"]);
        assert_eq!(s.refresh_url(), "https://www.xiaohongshu.com/");
    }

    #[test]
    fn detect_login_expired_via_code_10001() {
        let fi = body(200, "{\"code\":10001,\"msg\":\"登录已过期\",\"data\":{}}");
        assert!(matches!(
            XhsSite.detect_auth_failure(&fi),
            Some(AuthFailureKind::CookieExpired)
        ));
    }

    #[test]
    fn detect_login_expired_via_code_negative_100() {
        let fi = body(200, "{\"code\":-100,\"success\":false}");
        assert!(matches!(
            XhsSite.detect_auth_failure(&fi),
            Some(AuthFailureKind::CookieExpired)
        ));
    }

    #[test]
    fn detect_signing_rejected_is_antibot() {
        let fi = body(200, "{\"code\":461,\"msg\":\"sign error\"}");
        match XhsSite.detect_auth_failure(&fi) {
            Some(AuthFailureKind::AntibotBlock { challenge_type }) => {
                assert_eq!(challenge_type, "xhs-signing-rejected");
            }
            other => panic!("expected AntibotBlock, got {other:?}"),
        }
    }

    #[test]
    fn detect_captcha_is_antibot() {
        let fi = body(200, "{\"code\":406,\"msg\":\"please complete captcha\"}");
        match XhsSite.detect_auth_failure(&fi) {
            Some(AuthFailureKind::AntibotBlock { challenge_type }) => {
                assert_eq!(challenge_type, "xhs-captcha");
            }
            other => panic!("expected AntibotBlock, got {other:?}"),
        }
    }

    #[test]
    fn detect_rate_limited_on_429() {
        let kind = XhsSite
            .detect_auth_failure(&indicators(429))
            .expect("should classify 429");
        match kind {
            AuthFailureKind::RateLimited { retry_after } => {
                assert_eq!(retry_after, Duration::from_secs(300));
            }
            other => panic!("expected RateLimited, got {other:?}"),
        }
    }

    #[test]
    fn detect_cookie_expired_on_401_and_403() {
        for status in [401, 403] {
            assert!(matches!(
                XhsSite.detect_auth_failure(&indicators(status)),
                Some(AuthFailureKind::CookieExpired)
            ));
        }
    }

    #[test]
    fn detect_returns_none_for_ok_empty_body() {
        assert!(XhsSite.detect_auth_failure(&indicators(200)).is_none());
        assert!(XhsSite
            .detect_auth_failure(&FailureIndicators::default())
            .is_none());
    }
}