tail_fin_core/site.rs
1//! Cross-site session lifecycle abstractions.
2//!
3//! Every site crate (`tail-fin-twitter`, `tail-fin-sa`, `tail-fin-reddit`,
4//! ...) implements the [`Site`] trait. Orchestration layers ([`SessionManager`]
5//! in [`crate::session`], plus Flock A2A agents) consume the trait and don't
6//! need to know which site they're talking to beyond the trait contract.
7//!
8//! See `docs/superpowers/plans/2026-04-17-site-trait-phase-1.md` for the
9//! architecture rationale.
10
11use async_trait::async_trait;
12use night_fury_core::BrowserSession;
13use serde_json::Value;
14use std::time::{Duration, SystemTime};
15
16use crate::error::{AuthFailureKind, SiteError};
17
18/// Session liveness state as reported by [`Site::validate`].
19#[derive(Debug, Clone)]
20pub enum SessionStatus {
21 /// Session is healthy; continue using.
22 Valid,
23
24 /// Session still works but may expire soon — consider preemptive refresh.
25 Degrading {
26 estimated_expiry: Option<SystemTime>,
27 hint: String,
28 },
29
30 /// Session is dead — need to refresh or re-login.
31 Expired,
32
33 /// Server is blocking us (captcha, rate limit, anti-bot). Don't retry immediately.
34 Blocked {
35 reason: String,
36 retry_after: Option<Duration>,
37 },
38
39 /// Validation itself failed (network error, etc.) — state is unknown.
40 Unknown,
41}
42
43/// Input to [`Site::detect_auth_failure`]. Not every site uses HTTP status as
44/// the failure signal — some return JSON with error codes, some redirect to a
45/// login page. This struct carries the raw observations.
46#[derive(Debug, Clone, Default)]
47pub struct FailureIndicators {
48 pub status: Option<u16>,
49 /// First ~1KB of the response body, trimmed.
50 pub body_preview: String,
51 pub final_url: Option<String>,
52 pub response_headers: Vec<(String, String)>,
53}
54
55/// Per-site lifecycle + identity contract.
56///
57/// # Implementation guidance
58///
59/// Required methods (no defaults): `id`, `display_name`, `cookie_domain_patterns`,
60/// `refresh_url`, `validate`.
61///
62/// Optional methods with sensible defaults: `refresh_interval_min`, `refresh`,
63/// `attempt_login`, `detect_auth_failure`.
64///
65/// Override `refresh` only if the site needs more than `navigate + wait + get_cookies`
66/// (e.g. Twitter needs a `scroll` to trigger GraphQL that emits Set-Cookie).
67#[async_trait]
68pub trait Site: Send + Sync + 'static {
69 /// Globally unique site identifier. Convention: lowercase, no spaces.
70 /// Examples: `"twitter"`, `"seekingalpha"`, `"shopee-sg"`.
71 fn id(&self) -> &'static str;
72
73 /// Human-readable name for logging / UI.
74 fn display_name(&self) -> &'static str;
75
76 /// Cookie domain glob patterns to filter `get_cookies_for_domain`.
77 /// Example: `&["*.twitter.com", "*.x.com"]` for Twitter/X.
78 fn cookie_domain_patterns(&self) -> &'static [&'static str];
79
80 /// URL to visit for server-side cookie refresh.
81 /// The default [`Self::refresh`] impl navigates here.
82 fn refresh_url(&self) -> &'static str;
83
84 /// Minimum interval between refreshes. Callers (e.g. [`crate::session::SessionManager`])
85 /// respect this to avoid hammering the site. Default: 60 seconds.
86 fn refresh_interval_min(&self) -> Duration {
87 Duration::from_secs(60)
88 }
89
90 /// Trigger server-side cookie refresh and return fresh cookies for
91 /// this site's domains.
92 ///
93 /// Default impl: navigate to [`Self::refresh_url`], read cookies filtered
94 /// by the first entry of [`Self::cookie_domain_patterns`]. Override if
95 /// the site needs custom actions (scroll to trigger GraphQL, wait for
96 /// specific URL, etc.).
97 async fn refresh(&self, session: &BrowserSession) -> Result<Vec<Value>, SiteError> {
98 session
99 .refresh_cookies(self.refresh_url())
100 .await
101 .map_err(|e| SiteError::RefreshFailed {
102 site: self.id(),
103 reason: format!("navigate failed: {e}"),
104 })?;
105
106 let pattern = self
107 .cookie_domain_patterns()
108 .first()
109 .copied()
110 .unwrap_or("*");
111
112 session
113 .get_cookies_for_domain(pattern)
114 .await
115 .map_err(|e| SiteError::RefreshFailed {
116 site: self.id(),
117 reason: format!("get_cookies_for_domain failed: {e}"),
118 })
119 }
120
121 /// Check session liveness. Typically a lightweight HTTP probe against
122 /// an authenticated endpoint.
123 async fn validate(&self, session: &BrowserSession) -> Result<SessionStatus, SiteError>;
124
125 /// Attempt automated login. Most sites return `ManualLoginRequired`
126 /// because login is too complex or involves 2FA — override only for
127 /// sites with clean API login flows.
128 async fn attempt_login(
129 &self,
130 _session: &BrowserSession,
131 _credentials: &Credentials,
132 ) -> Result<Vec<Value>, SiteError> {
133 Err(SiteError::ManualLoginRequired { site: self.id() })
134 }
135
136 /// Given observations from a failed request, classify the failure mode.
137 /// Default returns `None` — caller falls back to generic treatment.
138 fn detect_auth_failure(&self, _indicators: &FailureIndicators) -> Option<AuthFailureKind> {
139 None
140 }
141}
142
143/// Credentials for sites that support automated login.
144///
145/// Phase 1 defines this type but doesn't use it — most sites return
146/// `ManualLoginRequired` from `attempt_login`. Phase 3 fleshes out
147/// credential storage / vault resolution.
148#[derive(Clone)]
149pub enum Credentials {
150 UsernamePassword {
151 username: String,
152 password: String, // NOTE: Phase 3 will replace String with SecretString
153 },
154 OAuth {
155 refresh_token: String,
156 client_id: String,
157 },
158 /// "Don't log in — just use these cookies."
159 CookieJar(Vec<Value>),
160 /// Manual — no stored credentials; login happens out-of-band.
161 Manual,
162}
163
164impl std::fmt::Debug for Credentials {
165 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
166 // Never expose actual secrets in debug output.
167 match self {
168 Credentials::UsernamePassword { username, .. } => f
169 .debug_struct("UsernamePassword")
170 .field("username", username)
171 .field("password", &"***")
172 .finish(),
173 Credentials::OAuth { client_id, .. } => f
174 .debug_struct("OAuth")
175 .field("refresh_token", &"***")
176 .field("client_id", client_id)
177 .finish(),
178 Credentials::CookieJar(v) => f.debug_tuple("CookieJar").field(&v.len()).finish(),
179 Credentials::Manual => f.debug_struct("Manual").finish(),
180 }
181 }
182}
183
184#[cfg(test)]
185mod tests {
186 use super::*;
187
188 #[test]
189 fn session_status_debug() {
190 let s = SessionStatus::Valid;
191 assert!(format!("{s:?}").contains("Valid"));
192
193 let s = SessionStatus::Blocked {
194 reason: "captcha".into(),
195 retry_after: Some(Duration::from_secs(30)),
196 };
197 let debug = format!("{s:?}");
198 assert!(debug.contains("Blocked"));
199 assert!(debug.contains("captcha"));
200 }
201
202 #[test]
203 fn failure_indicators_default_empty() {
204 let f = FailureIndicators::default();
205 assert!(f.status.is_none());
206 assert!(f.body_preview.is_empty());
207 assert!(f.final_url.is_none());
208 assert!(f.response_headers.is_empty());
209 }
210
211 #[test]
212 fn credentials_debug_hides_secrets() {
213 let c = Credentials::UsernamePassword {
214 username: "alice".into(),
215 password: "supersecret".into(),
216 };
217 let debug = format!("{c:?}");
218 assert!(debug.contains("alice"));
219 assert!(!debug.contains("supersecret"));
220 assert!(debug.contains("***"));
221 }
222
223 #[test]
224 fn credentials_oauth_debug_hides_token() {
225 let c = Credentials::OAuth {
226 refresh_token: "tok_xyz_123".into(),
227 client_id: "client_abc".into(),
228 };
229 let debug = format!("{c:?}");
230 assert!(debug.contains("client_abc"));
231 assert!(!debug.contains("tok_xyz_123"));
232 }
233}