tail-fin-xhs 0.7.8

Xiaohongshu adapter for tail-fin: search, notes, comments, feed
Documentation
pub mod auth;
pub mod js;
pub mod parsing;
pub mod site;
pub mod types;
pub mod util;

use std::time::Duration;

use tail_fin_common::page::ensure_on_domain;
use tail_fin_common::BrowserSession;
use tail_fin_common::TailFinError;

pub use auth::extract_xhs_cookies;
pub use site::XhsSite;
pub use types::{Comment, FeedItem, MediaItem, Note, Notification, SearchNote, UserNote};
pub use util::extract_note_id;

const SCROLL_JS: &str = r#"(() => {
    const el = document.querySelector('.note-scroller') || document.documentElement;
    el.scrollBy(0, window.innerHeight);
    return 'scrolled';
})()"#;

/// Wait for `__INITIAL_STATE__` to appear (SSR hydration).
/// Falls back to a fixed delay if it never appears.
const WAIT_FOR_STATE_JS: &str = r#"(async () => {
    for (let i = 0; i < 20; i++) {
        if (window.__INITIAL_STATE__) return true;
        await new Promise(r => setTimeout(r, 500));
    }
    return false;
})()"#;

pub struct XhsClient {
    session: BrowserSession,
}

impl XhsClient {
    pub fn new(session: BrowserSession) -> Self {
        Self { session }
    }

    pub async fn note(&self, id: &str) -> Result<Note, TailFinError> {
        let note_id = crate::util::extract_note_id(id);
        let url = format!("https://www.xiaohongshu.com/explore/{}", note_id);

        ensure_on_domain(&self.session, &["www.xiaohongshu.com"]).await?;
        self.session.navigate(&url).await?;
        let _ = self.session.wait_for_network_idle(15000, 1000).await;
        self.session
            .eval(WAIT_FOR_STATE_JS)
            .await
            .map_err(TailFinError::Browser)?;

        let raw = self
            .session
            .eval(js::note::JS)
            .await
            .map_err(TailFinError::Browser)?;
        parsing::check_page_status(&raw)?;
        parsing::parse_note(&raw)
    }

    pub async fn search(&self, query: &str, count: usize) -> Result<Vec<SearchNote>, TailFinError> {
        ensure_on_domain(&self.session, &["www.xiaohongshu.com"]).await?;
        let url = format!(
            "https://www.xiaohongshu.com/search_result?keyword={}&source=web_search_result_note",
            urlencoding::encode(query)
        );
        self.session.navigate(&url).await?;
        let _ = self.session.wait_for_network_idle(15000, 1000).await;
        self.session
            .eval(WAIT_FOR_STATE_JS)
            .await
            .map_err(TailFinError::Browser)?;
        let raw = self
            .session
            .eval(js::search::JS)
            .await
            .map_err(TailFinError::Browser)?;
        parsing::check_page_status(&raw)?;
        Ok(parsing::parse_search(&raw, count))
    }

    pub async fn comments(
        &self,
        note_id: &str,
        count: usize,
        with_replies: bool,
    ) -> Result<Vec<Comment>, TailFinError> {
        let id = crate::util::extract_note_id(note_id);
        let url = format!("https://www.xiaohongshu.com/explore/{}", id);

        ensure_on_domain(&self.session, &["www.xiaohongshu.com"]).await?;
        self.session.navigate(&url).await?;
        let _ = self.session.wait_for_network_idle(15000, 1000).await;
        self.session
            .eval(WAIT_FOR_STATE_JS)
            .await
            .map_err(TailFinError::Browser)?;

        let raw = self
            .session
            .eval(js::comments::JS)
            .await
            .map_err(TailFinError::Browser)?;
        parsing::check_page_status(&raw)?;
        let mut comments = parsing::parse_comments(&raw, count);

        if with_replies && !comments.is_empty() {
            let replies_raw = self
                .session
                .eval(js::comments::EXPAND_REPLIES_JS)
                .await
                .map_err(TailFinError::Browser)?;
            parsing::merge_replies(&mut comments, &replies_raw);
        }

        Ok(comments)
    }

    pub async fn user_notes(
        &self,
        user_id: &str,
        count: usize,
    ) -> Result<Vec<UserNote>, TailFinError> {
        let url = format!(
            "https://www.xiaohongshu.com/user/profile/{}",
            user_id.trim()
        );
        ensure_on_domain(&self.session, &["www.xiaohongshu.com"]).await?;
        self.session.navigate(&url).await?;
        let _ = self.session.wait_for_network_idle(15000, 1000).await;
        self.session
            .eval(WAIT_FOR_STATE_JS)
            .await
            .map_err(TailFinError::Browser)?;

        let raw = self
            .session
            .eval(js::user::JS)
            .await
            .map_err(TailFinError::Browser)?;
        parsing::check_page_status(&raw)?;
        let mut notes = parsing::parse_user_notes(&raw, count);

        if notes.len() < count {
            for _ in 0..4 {
                self.session
                    .eval(SCROLL_JS)
                    .await
                    .map_err(TailFinError::Browser)?;
                tokio::time::sleep(Duration::from_millis(1500)).await;
                let raw = self
                    .session
                    .eval(js::user::JS)
                    .await
                    .map_err(TailFinError::Browser)?;
                let next = parsing::parse_user_notes(&raw, count);
                if next.len() <= notes.len() {
                    break;
                }
                notes = next;
            }
        }
        Ok(notes.into_iter().take(count).collect())
    }

    pub async fn feed(&self, count: usize) -> Result<Vec<FeedItem>, TailFinError> {
        ensure_on_domain(&self.session, &["www.xiaohongshu.com"]).await?;

        let inject_result = self
            .session
            .eval(js::pinia::INJECT_FEED)
            .await
            .map_err(TailFinError::Browser)?;

        if inject_result.get("error").is_some() {
            return Err(TailFinError::Api(format!(
                "XHS: unable to access app state: {}",
                inject_result
                    .get("error")
                    .and_then(|v| v.as_str())
                    .unwrap_or("unknown")
            )));
        }

        let max_rounds = (count / 5).clamp(3, 15);
        for _ in 0..max_rounds {
            self.session
                .eval(SCROLL_JS)
                .await
                .map_err(TailFinError::Browser)?;
            tokio::time::sleep(Duration::from_millis(1500)).await;
            let captured = self
                .session
                .eval("window.__TF_CAPTURED?.length || 0")
                .await
                .map_err(TailFinError::Browser)?
                .as_u64()
                .unwrap_or(0);
            if captured >= count as u64 {
                break;
            }
        }

        let raw = self
            .session
            .eval(js::pinia::COLLECT_FEED)
            .await
            .map_err(TailFinError::Browser)?;
        Ok(parsing::parse_feed(&raw, count))
    }

    pub async fn notifications(&self, count: usize) -> Result<Vec<Notification>, TailFinError> {
        ensure_on_domain(&self.session, &["www.xiaohongshu.com"]).await?;
        self.session
            .navigate("https://www.xiaohongshu.com/user/notifications")
            .await?;
        let _ = self.session.wait_for_network_idle(15000, 1000).await;

        let inject_result = self
            .session
            .eval(js::pinia::INJECT_NOTIFICATIONS)
            .await
            .map_err(TailFinError::Browser)?;

        if inject_result.get("error").is_some() {
            return Err(TailFinError::Api(format!(
                "XHS: unable to access app state: {}",
                inject_result
                    .get("error")
                    .and_then(|v| v.as_str())
                    .unwrap_or("unknown")
            )));
        }

        for _ in 0..8 {
            tokio::time::sleep(Duration::from_secs(1)).await;
            let captured = self
                .session
                .eval("window.__TF_CAPTURED_NOTIF?.length || 0")
                .await
                .map_err(TailFinError::Browser)?
                .as_u64()
                .unwrap_or(0);
            if captured > 0 {
                break;
            }
        }

        let raw = self
            .session
            .eval(js::pinia::COLLECT_NOTIFICATIONS)
            .await
            .map_err(TailFinError::Browser)?;
        Ok(parsing::parse_notifications(&raw, count))
    }

    pub async fn media(&self, note_id: &str) -> Result<Vec<MediaItem>, TailFinError> {
        let id = crate::util::extract_note_id(note_id);
        let url = format!("https://www.xiaohongshu.com/explore/{}", id);
        ensure_on_domain(&self.session, &["www.xiaohongshu.com"]).await?;
        self.session.navigate(&url).await?;
        let _ = self.session.wait_for_network_idle(15000, 1000).await;
        self.session
            .eval(WAIT_FOR_STATE_JS)
            .await
            .map_err(TailFinError::Browser)?;
        let raw = self
            .session
            .eval(js::download::JS)
            .await
            .map_err(TailFinError::Browser)?;
        parsing::check_page_status(&raw)?;
        Ok(parsing::parse_media(&raw))
    }
}