Skip to main content

webfetch/
types.rs

1use serde::{Deserialize, Serialize};
2
3/// Result of fetching and converting a web page.
4#[derive(Debug, Clone, Serialize, Deserialize)]
5pub struct FetchResult {
6    pub title: String,
7    pub final_url: String,
8    pub content: String,
9    pub content_type: ContentType,
10    /// The detected source media kind: "html", "json", "text", or a raw
11    /// content-type for anything not rendered.
12    pub media: String,
13    pub token_estimate: usize,
14    pub references: Vec<UrlReference>,
15    #[serde(default)]
16    pub metadata: Metadata,
17    pub source: String,
18}
19
20/// Citation-oriented page metadata, all best-effort.
21#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq, Eq)]
22pub struct Metadata {
23    #[serde(skip_serializing_if = "Option::is_none")]
24    pub description: Option<String>,
25    #[serde(skip_serializing_if = "Option::is_none")]
26    pub author: Option<String>,
27    #[serde(skip_serializing_if = "Option::is_none")]
28    pub published: Option<String>,
29    #[serde(skip_serializing_if = "Option::is_none")]
30    pub lang: Option<String>,
31    #[serde(skip_serializing_if = "Option::is_none")]
32    pub site_name: Option<String>,
33}
34
35/// A single preserved URL, recoverable by its `index`.
36#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
37pub struct UrlReference {
38    pub index: usize,
39    pub url: String,
40    /// The anchor text the link was attached to (best-effort).
41    pub text: String,
42}
43
44impl crate::refs::Referable for UrlReference {
45    fn index(&self) -> usize {
46        self.index
47    }
48    fn url(&self) -> &str {
49        &self.url
50    }
51}
52
53#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
54#[serde(rename_all = "lowercase")]
55pub enum ContentType {
56    Text,
57    Markdown,
58    Structured,
59}
60
61impl ContentType {
62    pub fn parse(s: &str) -> Self {
63        match s.to_ascii_lowercase().as_str() {
64            "markdown" | "md" => ContentType::Markdown,
65            "structured" | "json" => ContentType::Structured,
66            _ => ContentType::Text,
67        }
68    }
69}
70
71#[derive(Debug, Clone, Deserialize)]
72pub struct FetchOptions {
73    pub url: String,
74    pub content_type: ContentType,
75    pub max_tokens: Option<usize>,
76    pub timeout_secs: u64,
77}
78
79impl Default for FetchOptions {
80    fn default() -> Self {
81        Self {
82            url: String::new(),
83            content_type: ContentType::Text,
84            max_tokens: None,
85            timeout_secs: 10,
86        }
87    }
88}