pub mod auth;
pub mod js;
pub mod parsing;
pub mod site;
pub mod types;
pub mod util;
use std::time::Duration;
use tail_fin_common::page::ensure_on_domain;
use tail_fin_common::BrowserSession;
use tail_fin_common::TailFinError;
pub use auth::extract_xhs_cookies;
pub use site::XhsSite;
pub use types::{Comment, FeedItem, MediaItem, Note, Notification, SearchNote, UserNote};
pub use util::extract_note_id;
const SCROLL_JS: &str = r#"(() => {
const el = document.querySelector('.note-scroller') || document.documentElement;
el.scrollBy(0, window.innerHeight);
return 'scrolled';
})()"#;
const WAIT_FOR_STATE_JS: &str = r#"(async () => {
for (let i = 0; i < 20; i++) {
if (window.__INITIAL_STATE__) return true;
await new Promise(r => setTimeout(r, 500));
}
return false;
})()"#;
pub struct XhsClient {
session: BrowserSession,
}
impl XhsClient {
pub fn new(session: BrowserSession) -> Self {
Self { session }
}
pub async fn note(&self, id: &str) -> Result<Note, TailFinError> {
let note_id = crate::util::extract_note_id(id);
let url = format!("https://www.xiaohongshu.com/explore/{}", note_id);
ensure_on_domain(&self.session, &["www.xiaohongshu.com"]).await?;
self.session.navigate(&url).await?;
let _ = self.session.wait_for_network_idle(15000, 1000).await;
self.session
.eval(WAIT_FOR_STATE_JS)
.await
.map_err(TailFinError::Browser)?;
let raw = self
.session
.eval(js::note::JS)
.await
.map_err(TailFinError::Browser)?;
parsing::check_page_status(&raw)?;
parsing::parse_note(&raw)
}
pub async fn search(&self, query: &str, count: usize) -> Result<Vec<SearchNote>, TailFinError> {
ensure_on_domain(&self.session, &["www.xiaohongshu.com"]).await?;
let url = format!(
"https://www.xiaohongshu.com/search_result?keyword={}&source=web_search_result_note",
urlencoding::encode(query)
);
self.session.navigate(&url).await?;
let _ = self.session.wait_for_network_idle(15000, 1000).await;
self.session
.eval(WAIT_FOR_STATE_JS)
.await
.map_err(TailFinError::Browser)?;
let raw = self
.session
.eval(js::search::JS)
.await
.map_err(TailFinError::Browser)?;
parsing::check_page_status(&raw)?;
Ok(parsing::parse_search(&raw, count))
}
pub async fn comments(
&self,
note_id: &str,
count: usize,
with_replies: bool,
) -> Result<Vec<Comment>, TailFinError> {
let id = crate::util::extract_note_id(note_id);
let url = format!("https://www.xiaohongshu.com/explore/{}", id);
ensure_on_domain(&self.session, &["www.xiaohongshu.com"]).await?;
self.session.navigate(&url).await?;
let _ = self.session.wait_for_network_idle(15000, 1000).await;
self.session
.eval(WAIT_FOR_STATE_JS)
.await
.map_err(TailFinError::Browser)?;
let raw = self
.session
.eval(js::comments::JS)
.await
.map_err(TailFinError::Browser)?;
parsing::check_page_status(&raw)?;
let mut comments = parsing::parse_comments(&raw, count);
if with_replies && !comments.is_empty() {
let replies_raw = self
.session
.eval(js::comments::EXPAND_REPLIES_JS)
.await
.map_err(TailFinError::Browser)?;
parsing::merge_replies(&mut comments, &replies_raw);
}
Ok(comments)
}
pub async fn user_notes(
&self,
user_id: &str,
count: usize,
) -> Result<Vec<UserNote>, TailFinError> {
let url = format!(
"https://www.xiaohongshu.com/user/profile/{}",
user_id.trim()
);
ensure_on_domain(&self.session, &["www.xiaohongshu.com"]).await?;
self.session.navigate(&url).await?;
let _ = self.session.wait_for_network_idle(15000, 1000).await;
self.session
.eval(WAIT_FOR_STATE_JS)
.await
.map_err(TailFinError::Browser)?;
let raw = self
.session
.eval(js::user::JS)
.await
.map_err(TailFinError::Browser)?;
parsing::check_page_status(&raw)?;
let mut notes = parsing::parse_user_notes(&raw, count);
if notes.len() < count {
for _ in 0..4 {
self.session
.eval(SCROLL_JS)
.await
.map_err(TailFinError::Browser)?;
tokio::time::sleep(Duration::from_millis(1500)).await;
let raw = self
.session
.eval(js::user::JS)
.await
.map_err(TailFinError::Browser)?;
let next = parsing::parse_user_notes(&raw, count);
if next.len() <= notes.len() {
break;
}
notes = next;
}
}
Ok(notes.into_iter().take(count).collect())
}
pub async fn feed(&self, count: usize) -> Result<Vec<FeedItem>, TailFinError> {
ensure_on_domain(&self.session, &["www.xiaohongshu.com"]).await?;
let inject_result = self
.session
.eval(js::pinia::INJECT_FEED)
.await
.map_err(TailFinError::Browser)?;
if inject_result.get("error").is_some() {
return Err(TailFinError::Api(format!(
"XHS: unable to access app state: {}",
inject_result
.get("error")
.and_then(|v| v.as_str())
.unwrap_or("unknown")
)));
}
let max_rounds = (count / 5).clamp(3, 15);
for _ in 0..max_rounds {
self.session
.eval(SCROLL_JS)
.await
.map_err(TailFinError::Browser)?;
tokio::time::sleep(Duration::from_millis(1500)).await;
let captured = self
.session
.eval("window.__TF_CAPTURED?.length || 0")
.await
.map_err(TailFinError::Browser)?
.as_u64()
.unwrap_or(0);
if captured >= count as u64 {
break;
}
}
let raw = self
.session
.eval(js::pinia::COLLECT_FEED)
.await
.map_err(TailFinError::Browser)?;
Ok(parsing::parse_feed(&raw, count))
}
pub async fn notifications(&self, count: usize) -> Result<Vec<Notification>, TailFinError> {
ensure_on_domain(&self.session, &["www.xiaohongshu.com"]).await?;
self.session
.navigate("https://www.xiaohongshu.com/user/notifications")
.await?;
let _ = self.session.wait_for_network_idle(15000, 1000).await;
let inject_result = self
.session
.eval(js::pinia::INJECT_NOTIFICATIONS)
.await
.map_err(TailFinError::Browser)?;
if inject_result.get("error").is_some() {
return Err(TailFinError::Api(format!(
"XHS: unable to access app state: {}",
inject_result
.get("error")
.and_then(|v| v.as_str())
.unwrap_or("unknown")
)));
}
for _ in 0..8 {
tokio::time::sleep(Duration::from_secs(1)).await;
let captured = self
.session
.eval("window.__TF_CAPTURED_NOTIF?.length || 0")
.await
.map_err(TailFinError::Browser)?
.as_u64()
.unwrap_or(0);
if captured > 0 {
break;
}
}
let raw = self
.session
.eval(js::pinia::COLLECT_NOTIFICATIONS)
.await
.map_err(TailFinError::Browser)?;
Ok(parsing::parse_notifications(&raw, count))
}
pub async fn media(&self, note_id: &str) -> Result<Vec<MediaItem>, TailFinError> {
let id = crate::util::extract_note_id(note_id);
let url = format!("https://www.xiaohongshu.com/explore/{}", id);
ensure_on_domain(&self.session, &["www.xiaohongshu.com"]).await?;
self.session.navigate(&url).await?;
let _ = self.session.wait_for_network_idle(15000, 1000).await;
self.session
.eval(WAIT_FOR_STATE_JS)
.await
.map_err(TailFinError::Browser)?;
let raw = self
.session
.eval(js::download::JS)
.await
.map_err(TailFinError::Browser)?;
parsing::check_page_status(&raw)?;
Ok(parsing::parse_media(&raw))
}
}