Skip to main content

stillo_core/
document.rs

1use std::path::PathBuf;
2use chrono::{DateTime, Utc};
3use url::Url;
4use crate::ast::Document;
5
6#[derive(Debug, Clone)]
7pub struct RawHtml {
8    pub bytes: Vec<u8>,
9    pub url: Url,
10    pub content_type: String,
11    pub status: u16,
12}
13
14#[derive(Debug, Clone)]
15pub struct ExtractedContent {
16    pub url: Url,
17    pub title: String,
18    pub byline: Option<String>,
19    pub body_text: String,
20    pub body_html: String,
21    pub links: Vec<ExtractedLink>,
22    pub metadata: PageMetadata,
23}
24
25#[derive(Debug, Clone)]
26pub struct ExtractedLink {
27    pub text: String,
28    pub href: Url,
29    pub rel: Option<String>,
30}
31
32#[derive(Debug, Clone)]
33pub struct PageMetadata {
34    pub description: Option<String>,
35    pub og_title: Option<String>,
36    pub og_image: Option<String>,
37    pub canonical: Option<Url>,
38    pub published_at: Option<DateTime<Utc>>,
39}
40
41/// フォーマット非依存のブラウズ用ページ表現。
42/// HTML / RSS / Markdown など各入力から変換して TuiBrowser に渡す。
43#[derive(Debug, Clone)]
44pub struct BrowsePage {
45    pub title: String,
46    pub url: Url,
47    pub doc: Document,
48    pub links: Vec<ExtractedLink>,
49    /// TUI の 'd' キー dump 用 Markdown テキスト
50    pub markdown: String,
51}
52
53#[derive(Debug, Clone)]
54pub struct MarkdownDocument {
55    pub content: String,
56    pub source_url: Url,
57    pub extracted_at: DateTime<Utc>,
58}
59
60#[derive(Debug, Clone, PartialEq)]
61pub enum SpaDetection {
62    Static,
63    SuspectedSpa { text_length: usize },
64    FrameworkDetected { framework: JsFramework },
65}
66
67#[derive(Debug, Clone, PartialEq)]
68pub enum JsFramework {
69    React,
70    Vue,
71    Angular,
72    Next,
73    Nuxt,
74    Unknown(String),
75}
76
77#[derive(Debug, Clone, PartialEq)]
78pub enum DelegationTarget {
79    LocalCdp { port: u16 },
80    PlaywrightDaemon { socket_path: PathBuf },
81    JinaReader { api_key: Option<String> },
82    Firecrawl { base_url: Url, api_key: String },
83    Unavailable { reason: String },
84}
85
86#[derive(Debug)]
87pub enum FetchResult {
88    Static(RawHtml),
89    SpaDelegated {
90        detection: SpaDetection,
91        target: DelegationTarget,
92    },
93    DelegatedHtml(RawHtml),
94    Failed(FetchError),
95}
96
97#[derive(Debug, thiserror::Error)]
98pub enum FetchError {
99    #[error("HTTP error: {status} {url}")]
100    Http { status: u16, url: Url },
101    #[error("TLS error: {0}")]
102    Tls(String),
103    #[error("Timeout after {seconds}s")]
104    Timeout { seconds: u64 },
105    #[error("Delegation failed: {0}")]
106    DelegationFailed(String),
107    #[error("All delegation targets unavailable")]
108    NoDelegationAvailable,
109}