Skip to main content

tail_fin_xhs/
lib.rs

1pub mod auth;
2pub mod js;
3pub mod parsing;
4pub mod site;
5pub mod types;
6pub mod util;
7
8use std::time::Duration;
9
10use tail_fin_common::page::ensure_on_domain;
11use tail_fin_common::BrowserSession;
12use tail_fin_common::TailFinError;
13
14pub use auth::extract_xhs_cookies;
15pub use site::XhsSite;
16pub use types::{Comment, FeedItem, MediaItem, Note, Notification, SearchNote, UserNote};
17pub use util::extract_note_id;
18
19const SCROLL_JS: &str = r#"(() => {
20    const el = document.querySelector('.note-scroller') || document.documentElement;
21    el.scrollBy(0, window.innerHeight);
22    return 'scrolled';
23})()"#;
24
25/// Wait for `__INITIAL_STATE__` to appear (SSR hydration).
26/// Falls back to a fixed delay if it never appears.
27const WAIT_FOR_STATE_JS: &str = r#"(async () => {
28    for (let i = 0; i < 20; i++) {
29        if (window.__INITIAL_STATE__) return true;
30        await new Promise(r => setTimeout(r, 500));
31    }
32    return false;
33})()"#;
34
35pub struct XhsClient {
36    session: BrowserSession,
37}
38
39impl XhsClient {
40    pub fn new(session: BrowserSession) -> Self {
41        Self { session }
42    }
43
44    pub async fn note(&self, id: &str) -> Result<Note, TailFinError> {
45        let note_id = crate::util::extract_note_id(id);
46        let url = format!("https://www.xiaohongshu.com/explore/{}", note_id);
47
48        ensure_on_domain(&self.session, &["www.xiaohongshu.com"]).await?;
49        self.session.navigate(&url).await?;
50        let _ = self.session.wait_for_network_idle(15000, 1000).await;
51        self.session
52            .eval(WAIT_FOR_STATE_JS)
53            .await
54            .map_err(TailFinError::Browser)?;
55
56        let raw = self
57            .session
58            .eval(js::note::JS)
59            .await
60            .map_err(TailFinError::Browser)?;
61        parsing::check_page_status(&raw)?;
62        parsing::parse_note(&raw)
63    }
64
65    pub async fn search(&self, query: &str, count: usize) -> Result<Vec<SearchNote>, TailFinError> {
66        ensure_on_domain(&self.session, &["www.xiaohongshu.com"]).await?;
67        let url = format!(
68            "https://www.xiaohongshu.com/search_result?keyword={}&source=web_search_result_note",
69            urlencoding::encode(query)
70        );
71        self.session.navigate(&url).await?;
72        let _ = self.session.wait_for_network_idle(15000, 1000).await;
73        self.session
74            .eval(WAIT_FOR_STATE_JS)
75            .await
76            .map_err(TailFinError::Browser)?;
77        let raw = self
78            .session
79            .eval(js::search::JS)
80            .await
81            .map_err(TailFinError::Browser)?;
82        parsing::check_page_status(&raw)?;
83        Ok(parsing::parse_search(&raw, count))
84    }
85
86    pub async fn comments(
87        &self,
88        note_id: &str,
89        count: usize,
90        with_replies: bool,
91    ) -> Result<Vec<Comment>, TailFinError> {
92        let id = crate::util::extract_note_id(note_id);
93        let url = format!("https://www.xiaohongshu.com/explore/{}", id);
94
95        ensure_on_domain(&self.session, &["www.xiaohongshu.com"]).await?;
96        self.session.navigate(&url).await?;
97        let _ = self.session.wait_for_network_idle(15000, 1000).await;
98        self.session
99            .eval(WAIT_FOR_STATE_JS)
100            .await
101            .map_err(TailFinError::Browser)?;
102
103        let raw = self
104            .session
105            .eval(js::comments::JS)
106            .await
107            .map_err(TailFinError::Browser)?;
108        parsing::check_page_status(&raw)?;
109        let mut comments = parsing::parse_comments(&raw, count);
110
111        if with_replies && !comments.is_empty() {
112            let replies_raw = self
113                .session
114                .eval(js::comments::EXPAND_REPLIES_JS)
115                .await
116                .map_err(TailFinError::Browser)?;
117            parsing::merge_replies(&mut comments, &replies_raw);
118        }
119
120        Ok(comments)
121    }
122
123    pub async fn user_notes(
124        &self,
125        user_id: &str,
126        count: usize,
127    ) -> Result<Vec<UserNote>, TailFinError> {
128        let url = format!(
129            "https://www.xiaohongshu.com/user/profile/{}",
130            user_id.trim()
131        );
132        ensure_on_domain(&self.session, &["www.xiaohongshu.com"]).await?;
133        self.session.navigate(&url).await?;
134        let _ = self.session.wait_for_network_idle(15000, 1000).await;
135        self.session
136            .eval(WAIT_FOR_STATE_JS)
137            .await
138            .map_err(TailFinError::Browser)?;
139
140        let raw = self
141            .session
142            .eval(js::user::JS)
143            .await
144            .map_err(TailFinError::Browser)?;
145        parsing::check_page_status(&raw)?;
146        let mut notes = parsing::parse_user_notes(&raw, count);
147
148        if notes.len() < count {
149            for _ in 0..4 {
150                self.session
151                    .eval(SCROLL_JS)
152                    .await
153                    .map_err(TailFinError::Browser)?;
154                tokio::time::sleep(Duration::from_millis(1500)).await;
155                let raw = self
156                    .session
157                    .eval(js::user::JS)
158                    .await
159                    .map_err(TailFinError::Browser)?;
160                let next = parsing::parse_user_notes(&raw, count);
161                if next.len() <= notes.len() {
162                    break;
163                }
164                notes = next;
165            }
166        }
167        Ok(notes.into_iter().take(count).collect())
168    }
169
170    pub async fn feed(&self, count: usize) -> Result<Vec<FeedItem>, TailFinError> {
171        ensure_on_domain(&self.session, &["www.xiaohongshu.com"]).await?;
172
173        let inject_result = self
174            .session
175            .eval(js::pinia::INJECT_FEED)
176            .await
177            .map_err(TailFinError::Browser)?;
178
179        if inject_result.get("error").is_some() {
180            return Err(TailFinError::Api(format!(
181                "XHS: unable to access app state: {}",
182                inject_result
183                    .get("error")
184                    .and_then(|v| v.as_str())
185                    .unwrap_or("unknown")
186            )));
187        }
188
189        let max_rounds = (count / 5).clamp(3, 15);
190        for _ in 0..max_rounds {
191            self.session
192                .eval(SCROLL_JS)
193                .await
194                .map_err(TailFinError::Browser)?;
195            tokio::time::sleep(Duration::from_millis(1500)).await;
196            let captured = self
197                .session
198                .eval("window.__TF_CAPTURED?.length || 0")
199                .await
200                .map_err(TailFinError::Browser)?
201                .as_u64()
202                .unwrap_or(0);
203            if captured >= count as u64 {
204                break;
205            }
206        }
207
208        let raw = self
209            .session
210            .eval(js::pinia::COLLECT_FEED)
211            .await
212            .map_err(TailFinError::Browser)?;
213        Ok(parsing::parse_feed(&raw, count))
214    }
215
216    pub async fn notifications(&self, count: usize) -> Result<Vec<Notification>, TailFinError> {
217        ensure_on_domain(&self.session, &["www.xiaohongshu.com"]).await?;
218        self.session
219            .navigate("https://www.xiaohongshu.com/user/notifications")
220            .await?;
221        let _ = self.session.wait_for_network_idle(15000, 1000).await;
222
223        let inject_result = self
224            .session
225            .eval(js::pinia::INJECT_NOTIFICATIONS)
226            .await
227            .map_err(TailFinError::Browser)?;
228
229        if inject_result.get("error").is_some() {
230            return Err(TailFinError::Api(format!(
231                "XHS: unable to access app state: {}",
232                inject_result
233                    .get("error")
234                    .and_then(|v| v.as_str())
235                    .unwrap_or("unknown")
236            )));
237        }
238
239        for _ in 0..8 {
240            tokio::time::sleep(Duration::from_secs(1)).await;
241            let captured = self
242                .session
243                .eval("window.__TF_CAPTURED_NOTIF?.length || 0")
244                .await
245                .map_err(TailFinError::Browser)?
246                .as_u64()
247                .unwrap_or(0);
248            if captured > 0 {
249                break;
250            }
251        }
252
253        let raw = self
254            .session
255            .eval(js::pinia::COLLECT_NOTIFICATIONS)
256            .await
257            .map_err(TailFinError::Browser)?;
258        Ok(parsing::parse_notifications(&raw, count))
259    }
260
261    pub async fn media(&self, note_id: &str) -> Result<Vec<MediaItem>, TailFinError> {
262        let id = crate::util::extract_note_id(note_id);
263        let url = format!("https://www.xiaohongshu.com/explore/{}", id);
264        ensure_on_domain(&self.session, &["www.xiaohongshu.com"]).await?;
265        self.session.navigate(&url).await?;
266        let _ = self.session.wait_for_network_idle(15000, 1000).await;
267        self.session
268            .eval(WAIT_FOR_STATE_JS)
269            .await
270            .map_err(TailFinError::Browser)?;
271        let raw = self
272            .session
273            .eval(js::download::JS)
274            .await
275            .map_err(TailFinError::Browser)?;
276        parsing::check_page_status(&raw)?;
277        Ok(parsing::parse_media(&raw))
278    }
279}