1use std::path::PathBuf;
2use chrono::{DateTime, Utc};
3use url::Url;
4
5#[derive(Debug, Clone)]
6pub struct RawHtml {
7 pub bytes: Vec<u8>,
8 pub url: Url,
9 pub content_type: String,
10 pub status: u16,
11}
12
13#[derive(Debug, Clone)]
14pub struct ExtractedContent {
15 pub url: Url,
16 pub title: String,
17 pub byline: Option<String>,
18 pub body_text: String,
19 pub body_html: String,
20 pub links: Vec<ExtractedLink>,
21 pub metadata: PageMetadata,
22}
23
24#[derive(Debug, Clone)]
25pub struct ExtractedLink {
26 pub text: String,
27 pub href: Url,
28 pub rel: Option<String>,
29}
30
31#[derive(Debug, Clone)]
32pub struct PageMetadata {
33 pub description: Option<String>,
34 pub og_title: Option<String>,
35 pub og_image: Option<String>,
36 pub canonical: Option<Url>,
37 pub published_at: Option<DateTime<Utc>>,
38}
39
40#[derive(Debug, Clone)]
41pub struct MarkdownDocument {
42 pub content: String,
43 pub source_url: Url,
44 pub extracted_at: DateTime<Utc>,
45}
46
47#[derive(Debug, Clone, PartialEq)]
48pub enum SpaDetection {
49 Static,
50 SuspectedSpa { text_length: usize },
51 FrameworkDetected { framework: JsFramework },
52}
53
54#[derive(Debug, Clone, PartialEq)]
55pub enum JsFramework {
56 React,
57 Vue,
58 Angular,
59 Next,
60 Nuxt,
61 Unknown(String),
62}
63
64#[derive(Debug, Clone, PartialEq)]
65pub enum DelegationTarget {
66 LocalCdp { port: u16 },
67 PlaywrightDaemon { socket_path: PathBuf },
68 JinaReader { api_key: Option<String> },
69 Firecrawl { base_url: Url, api_key: String },
70 Unavailable { reason: String },
71}
72
73#[derive(Debug)]
74pub enum FetchResult {
75 Static(RawHtml),
76 SpaDelegated {
77 detection: SpaDetection,
78 target: DelegationTarget,
79 },
80 DelegatedHtml(RawHtml),
81 Failed(FetchError),
82}
83
84#[derive(Debug, thiserror::Error)]
85pub enum FetchError {
86 #[error("HTTP error: {status} {url}")]
87 Http { status: u16, url: Url },
88 #[error("TLS error: {0}")]
89 Tls(String),
90 #[error("Timeout after {seconds}s")]
91 Timeout { seconds: u64 },
92 #[error("Delegation failed: {0}")]
93 DelegationFailed(String),
94 #[error("All delegation targets unavailable")]
95 NoDelegationAvailable,
96}