1pub mod auth;
2pub mod js;
3pub mod parsing;
4pub mod site;
5pub mod types;
6pub mod util;
7
8use std::time::Duration;
9
10use tail_fin_common::page::ensure_on_domain;
11use tail_fin_common::BrowserSession;
12use tail_fin_common::TailFinError;
13
14pub use auth::extract_xhs_cookies;
15pub use site::XhsSite;
16pub use types::{Comment, FeedItem, MediaItem, Note, Notification, SearchNote, UserNote};
17pub use util::extract_note_id;
18
19const SCROLL_JS: &str = r#"(() => {
20 const el = document.querySelector('.note-scroller') || document.documentElement;
21 el.scrollBy(0, window.innerHeight);
22 return 'scrolled';
23})()"#;
24
25const WAIT_FOR_STATE_JS: &str = r#"(async () => {
28 for (let i = 0; i < 20; i++) {
29 if (window.__INITIAL_STATE__) return true;
30 await new Promise(r => setTimeout(r, 500));
31 }
32 return false;
33})()"#;
34
35pub struct XhsClient {
36 session: BrowserSession,
37}
38
39impl XhsClient {
40 pub fn new(session: BrowserSession) -> Self {
41 Self { session }
42 }
43
44 pub async fn note(&self, id: &str) -> Result<Note, TailFinError> {
45 let note_id = crate::util::extract_note_id(id);
46 let url = format!("https://www.xiaohongshu.com/explore/{}", note_id);
47
48 ensure_on_domain(&self.session, &["www.xiaohongshu.com"]).await?;
49 self.session.navigate(&url).await?;
50 let _ = self.session.wait_for_network_idle(15000, 1000).await;
51 self.session
52 .eval(WAIT_FOR_STATE_JS)
53 .await
54 .map_err(TailFinError::Browser)?;
55
56 let raw = self
57 .session
58 .eval(js::note::JS)
59 .await
60 .map_err(TailFinError::Browser)?;
61 parsing::check_page_status(&raw)?;
62 parsing::parse_note(&raw)
63 }
64
65 pub async fn search(&self, query: &str, count: usize) -> Result<Vec<SearchNote>, TailFinError> {
66 ensure_on_domain(&self.session, &["www.xiaohongshu.com"]).await?;
67 let url = format!(
68 "https://www.xiaohongshu.com/search_result?keyword={}&source=web_search_result_note",
69 urlencoding::encode(query)
70 );
71 self.session.navigate(&url).await?;
72 let _ = self.session.wait_for_network_idle(15000, 1000).await;
73 self.session
74 .eval(WAIT_FOR_STATE_JS)
75 .await
76 .map_err(TailFinError::Browser)?;
77 let raw = self
78 .session
79 .eval(js::search::JS)
80 .await
81 .map_err(TailFinError::Browser)?;
82 parsing::check_page_status(&raw)?;
83 Ok(parsing::parse_search(&raw, count))
84 }
85
86 pub async fn comments(
87 &self,
88 note_id: &str,
89 count: usize,
90 with_replies: bool,
91 ) -> Result<Vec<Comment>, TailFinError> {
92 let id = crate::util::extract_note_id(note_id);
93 let url = format!("https://www.xiaohongshu.com/explore/{}", id);
94
95 ensure_on_domain(&self.session, &["www.xiaohongshu.com"]).await?;
96 self.session.navigate(&url).await?;
97 let _ = self.session.wait_for_network_idle(15000, 1000).await;
98 self.session
99 .eval(WAIT_FOR_STATE_JS)
100 .await
101 .map_err(TailFinError::Browser)?;
102
103 let raw = self
104 .session
105 .eval(js::comments::JS)
106 .await
107 .map_err(TailFinError::Browser)?;
108 parsing::check_page_status(&raw)?;
109 let mut comments = parsing::parse_comments(&raw, count);
110
111 if with_replies && !comments.is_empty() {
112 let replies_raw = self
113 .session
114 .eval(js::comments::EXPAND_REPLIES_JS)
115 .await
116 .map_err(TailFinError::Browser)?;
117 parsing::merge_replies(&mut comments, &replies_raw);
118 }
119
120 Ok(comments)
121 }
122
123 pub async fn user_notes(
124 &self,
125 user_id: &str,
126 count: usize,
127 ) -> Result<Vec<UserNote>, TailFinError> {
128 let url = format!(
129 "https://www.xiaohongshu.com/user/profile/{}",
130 user_id.trim()
131 );
132 ensure_on_domain(&self.session, &["www.xiaohongshu.com"]).await?;
133 self.session.navigate(&url).await?;
134 let _ = self.session.wait_for_network_idle(15000, 1000).await;
135 self.session
136 .eval(WAIT_FOR_STATE_JS)
137 .await
138 .map_err(TailFinError::Browser)?;
139
140 let raw = self
141 .session
142 .eval(js::user::JS)
143 .await
144 .map_err(TailFinError::Browser)?;
145 parsing::check_page_status(&raw)?;
146 let mut notes = parsing::parse_user_notes(&raw, count);
147
148 if notes.len() < count {
149 for _ in 0..4 {
150 self.session
151 .eval(SCROLL_JS)
152 .await
153 .map_err(TailFinError::Browser)?;
154 tokio::time::sleep(Duration::from_millis(1500)).await;
155 let raw = self
156 .session
157 .eval(js::user::JS)
158 .await
159 .map_err(TailFinError::Browser)?;
160 let next = parsing::parse_user_notes(&raw, count);
161 if next.len() <= notes.len() {
162 break;
163 }
164 notes = next;
165 }
166 }
167 Ok(notes.into_iter().take(count).collect())
168 }
169
170 pub async fn feed(&self, count: usize) -> Result<Vec<FeedItem>, TailFinError> {
171 ensure_on_domain(&self.session, &["www.xiaohongshu.com"]).await?;
172
173 let inject_result = self
174 .session
175 .eval(js::pinia::INJECT_FEED)
176 .await
177 .map_err(TailFinError::Browser)?;
178
179 if inject_result.get("error").is_some() {
180 return Err(TailFinError::Api(format!(
181 "XHS: unable to access app state: {}",
182 inject_result
183 .get("error")
184 .and_then(|v| v.as_str())
185 .unwrap_or("unknown")
186 )));
187 }
188
189 let max_rounds = (count / 5).clamp(3, 15);
190 for _ in 0..max_rounds {
191 self.session
192 .eval(SCROLL_JS)
193 .await
194 .map_err(TailFinError::Browser)?;
195 tokio::time::sleep(Duration::from_millis(1500)).await;
196 let captured = self
197 .session
198 .eval("window.__TF_CAPTURED?.length || 0")
199 .await
200 .map_err(TailFinError::Browser)?
201 .as_u64()
202 .unwrap_or(0);
203 if captured >= count as u64 {
204 break;
205 }
206 }
207
208 let raw = self
209 .session
210 .eval(js::pinia::COLLECT_FEED)
211 .await
212 .map_err(TailFinError::Browser)?;
213 Ok(parsing::parse_feed(&raw, count))
214 }
215
216 pub async fn notifications(&self, count: usize) -> Result<Vec<Notification>, TailFinError> {
217 ensure_on_domain(&self.session, &["www.xiaohongshu.com"]).await?;
218 self.session
219 .navigate("https://www.xiaohongshu.com/user/notifications")
220 .await?;
221 let _ = self.session.wait_for_network_idle(15000, 1000).await;
222
223 let inject_result = self
224 .session
225 .eval(js::pinia::INJECT_NOTIFICATIONS)
226 .await
227 .map_err(TailFinError::Browser)?;
228
229 if inject_result.get("error").is_some() {
230 return Err(TailFinError::Api(format!(
231 "XHS: unable to access app state: {}",
232 inject_result
233 .get("error")
234 .and_then(|v| v.as_str())
235 .unwrap_or("unknown")
236 )));
237 }
238
239 for _ in 0..8 {
240 tokio::time::sleep(Duration::from_secs(1)).await;
241 let captured = self
242 .session
243 .eval("window.__TF_CAPTURED_NOTIF?.length || 0")
244 .await
245 .map_err(TailFinError::Browser)?
246 .as_u64()
247 .unwrap_or(0);
248 if captured > 0 {
249 break;
250 }
251 }
252
253 let raw = self
254 .session
255 .eval(js::pinia::COLLECT_NOTIFICATIONS)
256 .await
257 .map_err(TailFinError::Browser)?;
258 Ok(parsing::parse_notifications(&raw, count))
259 }
260
261 pub async fn media(&self, note_id: &str) -> Result<Vec<MediaItem>, TailFinError> {
262 let id = crate::util::extract_note_id(note_id);
263 let url = format!("https://www.xiaohongshu.com/explore/{}", id);
264 ensure_on_domain(&self.session, &["www.xiaohongshu.com"]).await?;
265 self.session.navigate(&url).await?;
266 let _ = self.session.wait_for_network_idle(15000, 1000).await;
267 self.session
268 .eval(WAIT_FOR_STATE_JS)
269 .await
270 .map_err(TailFinError::Browser)?;
271 let raw = self
272 .session
273 .eval(js::download::JS)
274 .await
275 .map_err(TailFinError::Browser)?;
276 parsing::check_page_status(&raw)?;
277 Ok(parsing::parse_media(&raw))
278 }
279}