use serde_json::Value;
use tail_fin_common::BrowserSession;
use tail_fin_common::TailFinError;
use crate::types::{SearchListing, SearchParams};
const PAGE_SIZE: usize = 30;
const LIST_URL: &str = "https://rent.591.com.tw/list";
pub async fn search(
session: &BrowserSession,
params: &SearchParams,
) -> Result<(u32, Vec<SearchListing>), TailFinError> {
let mut page_url = format!("{}?region={}", LIST_URL, params.region_id);
if let Some(kind) = params.kind {
page_url.push_str(&format!("&kind={}", kind));
}
if let Some(p) = params.price_max {
page_url.push_str(&format!("&price={}", p));
}
if let Some(p) = params.price_min {
page_url.push_str(&format!("&price_min={}", p));
}
if let Some(order) = ¶ms.order {
page_url.push_str(&format!("&order={}", order));
}
if params.first_row > 0 {
page_url.push_str(&format!("&firstRow={}", params.first_row));
}
let _ = session.navigate(&page_url).await;
tokio::time::sleep(tokio::time::Duration::from_secs(2)).await;
let actual_url_js = r#"document.URL"#;
let actual_url_raw = session.eval(actual_url_js).await.ok();
let actual_page_url = match &actual_url_raw {
Some(Value::String(s)) => s.clone(),
_ => page_url.clone(),
};
let limit = params.limit.max(1);
let page_url_json = serde_json::to_string(&actual_page_url).unwrap_or_default();
let js = format!(
r#"
(async () => {{
try {{
const resp = await fetch({page_url_json}, {{
credentials: 'include',
headers: {{ 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8' }}
}});
if (!resp.ok) {{
return JSON.stringify({{ __error: 'HTML fetch HTTP ' + resp.status }});
}}
const html = await resp.text();
// Extract the __NUXT__ IIFE expression from the embedded script tag.
const marker = 'window.__NUXT__=';
const start = html.indexOf(marker);
if (start === -1) {{
return JSON.stringify({{ __error: 'NUXT state not found in HTML', html_len: html.length }});
}}
const scriptEnd = html.indexOf('</script>', start);
const expr = html.substring(start + marker.length, scriptEnd).trim().replace(/;\s*$/, '');
let nuxt;
try {{
nuxt = eval('(' + expr + ')');
}} catch(e) {{
return JSON.stringify({{ __error: 'eval failed: ' + e.message }});
}}
if (!nuxt) return JSON.stringify({{ __error: 'eval returned falsy' }});
// Walk the Nuxt3 SSR data payload — each key is a hash of the useFetch call.
// Find the entry with status:1 and a data.items array of rental listings.
let foundItems = null;
let foundTotal = 0;
if (nuxt.data && typeof nuxt.data === 'object') {{
for (const key of Object.keys(nuxt.data)) {{
const entry = nuxt.data[key];
if (entry && entry.status === 1 && entry.data &&
Array.isArray(entry.data.items) && entry.data.items.length > 0 &&
typeof entry.data.items[0].id !== 'undefined' &&
typeof entry.data.items[0].kind_name !== 'undefined') {{
foundItems = entry.data.items;
foundTotal = entry.data.total_count || entry.data.items.length;
break;
}}
}}
}}
if (!foundItems) {{
return JSON.stringify({{ __error: 'listings not in NUXT state' }});
}}
// Apply price filter client-side — 591 SSR ignores price params server-side.
const priceMin = {price_min};
const priceMax = {price_max};
if (priceMin > 0 || priceMax > 0) {{
foundItems = foundItems.filter(item => {{
const raw = item.price ? String(item.price).replace(/,/g, '') : '';
const p = parseInt(raw, 10);
if (isNaN(p)) return true;
if (priceMin > 0 && p < priceMin) return false;
if (priceMax > 0 && p > priceMax) return false;
return true;
}});
foundTotal = foundItems.length;
}}
// Normalize SSR field names to match parse_listing() expectations.
const normalized = foundItems.slice(0, {limit}).map(item => ({{
post_id: item.id,
title: item.title || '',
price: item.price ? String(item.price) : undefined,
price_unit: item.unit || item.price_unit || undefined,
address: item.address || undefined,
area: item.area ? String(item.area) : undefined,
kind_name: item.kind_name || undefined,
room: item.room_str || item.room || undefined,
floor: item.floor_str || item.floor || undefined,
photo_list: Array.isArray(item.photoList) ? item.photoList : undefined,
tags: Array.isArray(item.tags) ? item.tags : undefined,
post_time: item.refreshTime || item.post_time || undefined,
}}));
return JSON.stringify({{ status: 1, data: {{ total_count: foundTotal, data: normalized }} }});
}} catch (e) {{
return JSON.stringify({{ __error: e.message }});
}}
}})()
"#,
page_url_json = page_url_json,
limit = limit,
price_min = params.price_min.unwrap_or(0),
price_max = params.price_max.unwrap_or(0),
);
let raw = session.eval(&js).await?;
let text = match &raw {
Value::String(s) => s.clone(),
other => other.to_string(),
};
let json: Value =
serde_json::from_str(&text).map_err(|e| TailFinError::Parse(e.to_string()))?;
if let Some(err) = json.get("__error").and_then(|v| v.as_str()) {
return Err(TailFinError::Api(format!("591 search: {}", err)));
}
if json.get("status").and_then(|v| v.as_i64()).unwrap_or(0) != 1 {
return Err(TailFinError::Api("591 search returned non-1 status".into()));
}
let total = json
.get("data")
.and_then(|d| d.get("total_count"))
.and_then(|v| v.as_u64())
.unwrap_or(0) as u32;
let items_val = json
.pointer("/data/data")
.cloned()
.unwrap_or(Value::Array(vec![]));
let listings: Vec<SearchListing> = items_val
.as_array()
.map(|arr| arr.iter().filter_map(parse_listing).collect())
.unwrap_or_default();
Ok((total, listings))
}
fn parse_listing(item: &Value) -> Option<SearchListing> {
let post_id = item.get("post_id")?.as_u64()?;
let title = item.get("title")?.as_str()?.to_string();
let price = item
.get("price")
.and_then(|v| v.as_str())
.map(|s| s.to_string());
let price_unit = item
.get("price_unit")
.and_then(|v| v.as_str())
.map(|s| s.to_string());
let address = item
.get("address")
.and_then(|v| v.as_str())
.map(|s| s.to_string());
let area = item
.get("area")
.and_then(|v| v.as_str())
.map(|s| s.to_string());
let kind_name = item
.get("kind_name")
.and_then(|v| v.as_str())
.map(|s| s.to_string());
let room = item
.get("room")
.and_then(|v| v.as_str())
.map(|s| s.to_string());
let floor = item
.get("floor")
.and_then(|v| v.as_str())
.map(|s| s.to_string());
let post_time = item
.get("post_time")
.and_then(|v| v.as_str())
.map(|s| s.to_string());
let photo_list = item
.get("photo_list")
.and_then(|v| v.as_array())
.map(|arr| {
arr.iter()
.filter_map(|p| p.as_str().map(|s| s.to_string()))
.collect::<Vec<_>>()
})
.filter(|v| !v.is_empty());
let tags = item
.get("tags")
.and_then(|v| v.as_array())
.map(|arr| {
arr.iter()
.filter_map(|t| t.as_str().map(|s| s.to_string()))
.collect::<Vec<_>>()
})
.filter(|v| !v.is_empty());
Some(SearchListing {
post_id,
title,
price,
price_unit,
address,
area,
kind_name,
room,
floor,
photo_list,
tags,
post_time,
})
}
#[cfg(test)]
mod tests {
use super::*;
use serde_json::json;
#[test]
fn test_parse_listing_full() {
let item = json!({
"post_id": 12345678,
"title": "精裝套房近捷運",
"price": "18000",
"price_unit": "元/月",
"address": "台北市中正區",
"area": "12.5",
"kind_name": "獨立套房",
"room": "1房1廳1衛",
"floor": "5F/12F",
"photo_list": ["https://img.591.com.tw/a.jpg", "https://img.591.com.tw/b.jpg"],
"tags": ["近捷運", "含管理費"],
"post_time": "2026-04-10"
});
let listing = parse_listing(&item).unwrap();
assert_eq!(listing.post_id, 12345678);
assert_eq!(listing.title, "精裝套房近捷運");
assert_eq!(listing.price.as_deref(), Some("18000"));
assert_eq!(listing.price_unit.as_deref(), Some("元/月"));
assert_eq!(listing.address.as_deref(), Some("台北市中正區"));
assert_eq!(listing.area.as_deref(), Some("12.5"));
assert_eq!(listing.kind_name.as_deref(), Some("獨立套房"));
assert_eq!(listing.room.as_deref(), Some("1房1廳1衛"));
assert_eq!(listing.floor.as_deref(), Some("5F/12F"));
assert_eq!(listing.photo_list.as_ref().map(|v| v.len()), Some(2));
assert_eq!(listing.tags.as_ref().map(|v| v.len()), Some(2));
assert_eq!(listing.post_time.as_deref(), Some("2026-04-10"));
}
#[test]
fn test_parse_listing_minimal() {
let item = json!({ "post_id": 99, "title": "小套房" });
let listing = parse_listing(&item).unwrap();
assert_eq!(listing.post_id, 99);
assert_eq!(listing.title, "小套房");
assert!(listing.price.is_none());
assert!(listing.tags.is_none());
assert!(listing.photo_list.is_none());
}
#[test]
fn test_parse_listing_missing_post_id_returns_none() {
let item = json!({ "title": "沒有 post_id" });
assert!(parse_listing(&item).is_none());
}
#[test]
fn test_parse_listing_missing_title_returns_none() {
let item = json!({ "post_id": 1 });
assert!(parse_listing(&item).is_none());
}
#[test]
fn test_parse_listing_empty_tags_filtered() {
let item = json!({
"post_id": 1,
"title": "test",
"tags": [],
"photo_list": []
});
let listing = parse_listing(&item).unwrap();
assert!(listing.tags.is_none());
assert!(listing.photo_list.is_none());
}
}
pub struct CrawlOptions {
pub max_pages: usize,
pub delay_ms: u64,
pub retries: u32,
pub start_page: usize,
}
impl Default for CrawlOptions {
fn default() -> Self {
Self {
max_pages: 0,
delay_ms: 1000,
retries: 3,
start_page: 0,
}
}
}
pub async fn crawl<F>(
session: &BrowserSession,
params: &SearchParams,
opts: &CrawlOptions,
mut on_page: F,
) -> Result<usize, TailFinError>
where
F: FnMut(usize, usize, &[SearchListing]),
{
let mut total_fetched = 0;
let mut page = opts.start_page;
let mut pages_fetched = 0;
loop {
let first_row = page * PAGE_SIZE;
let page_params = SearchParams {
first_row,
limit: PAGE_SIZE,
..params.clone()
};
let listings = {
let mut last_err: Option<TailFinError> = None;
let mut result: Option<Vec<SearchListing>> = None;
for attempt in 0..=opts.retries {
match search(session, &page_params).await {
Ok((_total, items)) => {
result = Some(items);
break;
}
Err(e) => {
if attempt < opts.retries {
eprintln!(
"[crawl] page {} attempt {}/{} failed: {}; retrying in 2s",
page + 1,
attempt + 1,
opts.retries,
e
);
tokio::time::sleep(tokio::time::Duration::from_secs(2)).await;
}
last_err = Some(e);
}
}
}
match result {
Some(items) => items,
None => return Err(last_err.unwrap()),
}
};
let n = listings.len();
if n > 0 {
on_page(page, first_row, &listings);
total_fetched += n;
}
pages_fetched += 1;
let max_reached = opts.max_pages > 0 && pages_fetched >= opts.max_pages;
let last_page = n < PAGE_SIZE;
if last_page || max_reached {
break;
}
if opts.delay_ms > 0 {
tokio::time::sleep(tokio::time::Duration::from_millis(opts.delay_ms)).await;
}
page += 1;
}
Ok(total_fetched)
}