tail-fin-pcc 0.7.5

Taiwan government procurement (PCC) adapter for tail-fin: tender search, company lookup, budget tracking
Documentation
//! `Site` implementation for PCC (政府電子採購網) via community mirrors.
//!
//! The canonical `web.pcc.gov.tw` site sits behind Cloudflare with
//! aggressive anti-bot — tail-fin instead hits the community mirrors
//! (`pcc.g0v.ronny.tw`, `pcc-api.openfun.app`) which expose the same
//! tender data without the anti-bot envelope. All commands are
//! anonymous.
//!
//! Because the mirrors are the actual targets, `cookie_domain_patterns`
//! is deliberately empty: there are no cookies to refresh, and the
//! mirrors don't share a parent domain we could filter on. Validation
//! is a network-liveness check against the primary mirror.

use std::time::Duration;

use async_trait::async_trait;
use tail_fin_core::{
    AuthFailureKind, BrowserSession, FailureIndicators, SessionStatus, Site, SiteError,
};

pub struct PccSite;

#[async_trait]
impl Site for PccSite {
    fn id(&self) -> &'static str {
        "pcc"
    }

    fn display_name(&self) -> &'static str {
        "PCC 政府採購 (via g0v mirrors)"
    }

    fn cookie_domain_patterns(&self) -> &'static [&'static str] {
        // No shared parent domain across mirrors, and commands don't
        // need cookies. Leave empty — `refresh` won't find anything
        // to return, which is fine because we override it to a no-op.
        &[]
    }

    fn refresh_url(&self) -> &'static str {
        "https://pcc.g0v.ronny.tw/"
    }

    fn refresh_interval_min(&self) -> Duration {
        // Anonymous sites don't need frequent refreshes.
        Duration::from_secs(600)
    }

    /// No cookies to refresh — navigate just to confirm the mirror is
    /// reachable, but skip `get_cookies_for_domain` entirely.
    async fn refresh(&self, session: &BrowserSession) -> Result<Vec<serde_json::Value>, SiteError> {
        session
            .refresh_cookies(self.refresh_url())
            .await
            .map_err(|e| SiteError::RefreshFailed {
                site: self.id(),
                reason: format!("navigate failed: {e}"),
            })?;
        Ok(Vec::new())
    }

    async fn validate(&self, session: &BrowserSession) -> Result<SessionStatus, SiteError> {
        let status = session
            .http_ping("https://pcc.g0v.ronny.tw/")
            .await
            .map_err(|e| SiteError::ValidationFailed {
                site: self.id(),
                reason: format!("mirror ping: {e}"),
            })?;

        Ok(match status {
            200 => SessionStatus::Valid,
            429 => SessionStatus::Blocked {
                reason: "PCC mirror rate limit".into(),
                retry_after: Some(Duration::from_secs(300)),
            },
            0 => SessionStatus::Unknown,
            other => SessionStatus::Degrading {
                estimated_expiry: None,
                hint: format!("unexpected HTTP {other} from mirror"),
            },
        })
    }

    fn detect_auth_failure(&self, indicators: &FailureIndicators) -> Option<AuthFailureKind> {
        // Mirror is anonymous — no auth failures expected. 429 is the
        // only signal callers should back off on.
        match indicators.status {
            Some(429) => Some(AuthFailureKind::RateLimited {
                retry_after: Duration::from_secs(300),
            }),
            _ => None,
        }
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    fn indicators(status: u16) -> FailureIndicators {
        FailureIndicators {
            status: Some(status),
            ..Default::default()
        }
    }

    #[test]
    fn identity_fields() {
        let s = PccSite;
        assert_eq!(s.id(), "pcc");
        assert_eq!(s.cookie_domain_patterns(), &[] as &[&'static str]);
        assert_eq!(s.refresh_url(), "https://pcc.g0v.ronny.tw/");
    }

    #[test]
    fn detect_rate_limited_on_429() {
        match PccSite.detect_auth_failure(&indicators(429)) {
            Some(AuthFailureKind::RateLimited { retry_after }) => {
                assert_eq!(retry_after, Duration::from_secs(300));
            }
            other => panic!("expected RateLimited, got {other:?}"),
        }
    }

    #[test]
    fn detect_returns_none_for_all_other_statuses() {
        // Anonymous mirror — no 401/403 expected.
        for status in [200, 401, 403, 500] {
            assert!(PccSite.detect_auth_failure(&indicators(status)).is_none());
        }
    }
}