tail-fin-cli 0.5.1

Multi-site browser automation CLI — attaches to Chrome or auto-launches a stealth browser to drive 15+ sites
use std::io::Write;

use clap::Subcommand;
use tail_fin_common::TailFinError;

use crate::session::{browser_session, launch_browser, print_json, Ctx};

pub fn checkpoint_591_key(region: u32, kind: Option<u32>) -> String {
    match kind {
        Some(k) => format!("{}:{}", region, k),
        None => format!("{}:all", region),
    }
}

pub fn load_591_checkpoint(path: &std::path::Path) -> std::collections::HashMap<String, usize> {
    std::fs::read_to_string(path)
        .ok()
        .and_then(|s| serde_json::from_str(&s).ok())
        .unwrap_or_default()
}

pub fn save_591_checkpoint(path: &std::path::Path, cp: &std::collections::HashMap<String, usize>) {
    if let Ok(s) = serde_json::to_string(cp) {
        let _ = std::fs::write(path, s);
    }
}

#[derive(Subcommand)]
pub enum Rent591Action {
    /// List hot communities in a region (no browser required)
    Hot {
        /// Region ID (1 = Taipei City, 2 = New Taipei, 3 = Taoyuan, ...)
        #[arg(long, default_value_t = 1)]
        region: u32,
        /// Maximum number of results
        #[arg(long, default_value_t = 20)]
        limit: usize,
    },
    /// Get detailed info for a community by ID
    Community {
        /// Community ID (from `hot` output)
        id: u64,
    },
    /// Get actual transaction price history for a community
    PriceHistory {
        /// Community ID
        id: u64,
        #[arg(long, default_value_t = 20)]
        limit: usize,
    },
    /// Get active sale listings near a community
    Sales {
        /// Community ID
        id: u64,
        #[arg(long, default_value_t = 20)]
        limit: usize,
    },
    /// List all supported region IDs and names
    Regions,
    /// Crawl all rental listings, paginating automatically. Outputs JSONL (one listing per line).
    Crawl {
        /// Region IDs to crawl (repeat for multiple: --region 1 --region 2). Default: 1.
        #[arg(long)]
        region: Vec<u32>,

        /// Crawl all 22 Taiwan regions (overrides --region).
        #[arg(long)]
        all_regions: bool,

        #[arg(long)]
        kind: Option<u32>,

        #[arg(long)]
        price_max: Option<u32>,

        #[arg(long)]
        price_min: Option<u32>,

        #[arg(long)]
        order: Option<String>,

        /// Stop after this many pages per region (0 = no limit).
        #[arg(long, default_value_t = 0)]
        max_pages: usize,

        /// Milliseconds between pages (default: 1000).
        #[arg(long, default_value_t = 1000)]
        delay: u64,

        /// Write JSONL output to this file (default: stdout).
        #[arg(long)]
        output: Option<String>,

        /// Resume from checkpoint file (skip already-completed pages).
        #[arg(long)]
        resume: bool,

        /// Checkpoint file path (default: <output>.checkpoint.json or ./591-crawl.checkpoint.json).
        #[arg(long)]
        checkpoint: Option<String>,
    },
    /// Search rental listings (requires --connect)
    Search {
        /// Region ID (1 = Taipei City). Use `regions` to see all IDs.
        #[arg(long, default_value_t = 1)]
        region: u32,
        /// House kind: 0=all, 1=整層住家, 2=獨立套房, 3=分租套房, 8=雅房
        #[arg(long)]
        kind: Option<u32>,
        /// Maximum monthly rent (NTD)
        #[arg(long)]
        price_max: Option<u32>,
        /// Minimum monthly rent (NTD)
        #[arg(long)]
        price_min: Option<u32>,
        /// Sort by: posttime (default) or price
        #[arg(long)]
        order: Option<String>,
        #[arg(long, default_value_t = 20)]
        limit: usize,
    },
}

pub async fn run(action: Rent591Action, ctx: &Ctx) -> Result<(), TailFinError> {
    let client = tail_fin_591::Client591::new()?;
    match action {
        Rent591Action::Hot { region, limit } => {
            let communities = client.hot(region, limit).await?;
            print_json(&serde_json::json!({
                "region_id": region,
                "communities": communities,
                "count": communities.len(),
            }))?;
        }
        Rent591Action::Community { id } => match client.community(id).await? {
            Some(detail) => print_json(&detail)?,
            None => {
                return Err(TailFinError::Api(format!("community {} not found", id)));
            }
        },
        Rent591Action::PriceHistory { id, limit } => {
            let records = client.price_history(id, limit).await?;
            print_json(&serde_json::json!({
                "community_id": id,
                "records": records,
                "count": records.len(),
            }))?;
        }
        Rent591Action::Sales { id, limit } => {
            let (total, listings) = client.sales(id, limit).await?;
            print_json(&serde_json::json!({
                "community_id": id,
                "total": total,
                "listings": listings,
                "count": listings.len(),
            }))?;
        }
        Rent591Action::Regions => {
            print_json(tail_fin_591::REGIONS)?;
        }
        Rent591Action::Crawl {
            region,
            all_regions,
            kind,
            price_max,
            price_min,
            order,
            max_pages,
            delay,
            output,
            resume,
            checkpoint,
        } => {
            // Determine regions to crawl.
            let regions: Vec<u32> = if all_regions {
                (1u32..=22).collect()
            } else if region.is_empty() {
                vec![1]
            } else {
                region
            };

            // Determine checkpoint file path.
            let cp_path = if let Some(ref p) = checkpoint {
                std::path::PathBuf::from(p)
            } else if let Some(ref o) = output {
                std::path::PathBuf::from(format!("{}.checkpoint.json", o))
            } else {
                std::path::PathBuf::from("./591-crawl.checkpoint.json")
            };

            // Load checkpoint if --resume, else empty.
            let mut cp: std::collections::HashMap<String, usize> = if resume {
                load_591_checkpoint(&cp_path)
            } else {
                std::collections::HashMap::new()
            };

            // Open output writer.
            let stdout_handle;
            let file_handle;
            let writer: Box<dyn std::io::Write + Send> = if let Some(ref path) = output {
                file_handle = std::fs::OpenOptions::new()
                    .create(true)
                    .append(true)
                    .open(path)
                    .map_err(|e| TailFinError::Api(format!("open output file: {}", e)))?;
                Box::new(std::io::BufWriter::new(file_handle))
            } else {
                stdout_handle = std::io::stdout();
                Box::new(std::io::BufWriter::new(stdout_handle))
            };

            // We need the writer inside the closure (FnMut, sync).
            // Use a Mutex so the future is Send (required by the adapter trait).
            let writer = std::sync::Mutex::new(writer);

            // Deduplication set across all regions.
            let mut seen_ids: std::collections::HashSet<u64> = std::collections::HashSet::new();

            let session = match ctx.connect.as_deref() {
                Some(host) => browser_session(host, ctx.headed).await?,
                None => launch_browser(ctx.headed).await?,
            };

            let mut grand_total: usize = 0;

            for region_id in &regions {
                let region_id = *region_id;
                let cp_key = checkpoint_591_key(region_id, kind);
                let start_page = cp.get(&cp_key).copied().unwrap_or(0) / 30;

                let params = tail_fin_591::SearchParams {
                    region_id,
                    kind,
                    price_max,
                    price_min,
                    order: order.clone(),
                    limit: 30,
                    first_row: 0,
                };
                let opts = tail_fin_591::CrawlOptions {
                    max_pages,
                    delay_ms: delay,
                    retries: 3,
                    start_page,
                };

                let region_seen = &mut seen_ids;
                let region_cp = &mut cp;
                let region_writer = &writer;

                let total_for_region = tail_fin_591::crawl(
                    &session,
                    &params,
                    &opts,
                    |page_num, first_row, listings| {
                        // Deduplicate.
                        let new_listings: Vec<_> = listings
                            .iter()
                            .filter(|l| region_seen.insert(l.post_id))
                            .collect();

                        // Write JSONL lines.
                        {
                            let mut w = region_writer.lock().unwrap();
                            for listing in &new_listings {
                                let line = serde_json::to_string(listing).unwrap_or_default();
                                let _ = writeln!(*w, "{}", line);
                            }
                            let _ = w.flush();
                        }

                        // Update and save checkpoint.
                        region_cp.insert(checkpoint_591_key(region_id, kind), first_row + 30);
                        save_591_checkpoint(&cp_path, region_cp);

                        eprintln!(
                            "[region {} page {}] {} listings (first_row={})",
                            region_id,
                            page_num + 1,
                            listings.len(),
                            first_row
                        );
                    },
                )
                .await?;

                eprintln!("[region {}] done — {} total", region_id, total_for_region);
                grand_total += total_for_region;
            }

            eprintln!("[crawl complete] {} total listings", grand_total);
        }
        Rent591Action::Search {
            region,
            kind,
            price_max,
            price_min,
            order,
            limit,
        } => {
            let session = match ctx.connect.as_deref() {
                Some(host) => browser_session(host, ctx.headed).await?,
                None => {
                    eprintln!("No --connect specified; launching headless Chrome...");
                    launch_browser(ctx.headed).await?
                }
            };
            let params = tail_fin_591::SearchParams {
                region_id: region,
                kind,
                price_max,
                price_min,
                order,
                limit,
                first_row: 0,
            };
            let (total, listings) = tail_fin_591::search(&session, &params).await?;
            print_json(&serde_json::json!({
                "region_id": region,
                "total": total,
                "listings": listings,
                "count": listings.len(),
            }))?;
        }
    }
    Ok(())
}

pub struct Adapter;

impl crate::adapter::CliAdapter for Adapter {
    fn name(&self) -> &'static str {
        "591"
    }

    fn about(&self) -> &'static str {
        "591 rental platform operations"
    }

    fn command(&self) -> clap::Command {
        <Rent591Action as clap::Subcommand>::augment_subcommands(
            clap::Command::new("591").about("591 rental platform operations"),
        )
    }

    fn dispatch<'a>(
        &'a self,
        matches: &'a clap::ArgMatches,
        ctx: &'a crate::session::Ctx,
    ) -> std::pin::Pin<
        Box<
            dyn std::future::Future<Output = Result<(), tail_fin_common::TailFinError>> + Send + 'a,
        >,
    > {
        Box::pin(async move {
            let action = <Rent591Action as clap::FromArgMatches>::from_arg_matches(matches)
                .map_err(|e| tail_fin_common::TailFinError::Api(e.to_string()))?;
            run(action, ctx).await
        })
    }
}