use schemars::JsonSchema;
use serde::{Deserialize, Serialize};
use std::str::FromStr;
#[derive(Debug, Clone, Copy, Default, PartialEq, Eq, Serialize, Deserialize, JsonSchema)]
#[serde(rename_all = "UPPERCASE")]
pub enum HttpMethod {
#[default]
Get,
Head,
}
impl FromStr for HttpMethod {
type Err = String;
fn from_str(s: &str) -> Result<Self, Self::Err> {
match s.to_uppercase().as_str() {
"GET" => Ok(HttpMethod::Get),
"HEAD" => Ok(HttpMethod::Head),
_ => Err("Invalid method: must be GET or HEAD".to_string()),
}
}
}
impl std::fmt::Display for HttpMethod {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
HttpMethod::Get => write!(f, "GET"),
HttpMethod::Head => write!(f, "HEAD"),
}
}
}
#[derive(Debug, Clone, Default, Serialize, Deserialize, JsonSchema)]
pub struct FetchRequest {
pub url: String,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub method: Option<HttpMethod>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub as_markdown: Option<bool>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub as_text: Option<bool>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub save_to_file: Option<String>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub content_focus: Option<String>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub if_none_match: Option<String>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub if_modified_since: Option<String>,
}
impl FetchRequest {
pub fn new(url: impl Into<String>) -> Self {
Self {
url: url.into(),
..Default::default()
}
}
pub fn method(mut self, method: HttpMethod) -> Self {
self.method = Some(method);
self
}
pub fn as_markdown(mut self) -> Self {
self.as_markdown = Some(true);
self
}
pub fn as_text(mut self) -> Self {
self.as_text = Some(true);
self
}
pub fn save_to_file(mut self, path: impl Into<String>) -> Self {
self.save_to_file = Some(path.into());
self
}
pub fn content_focus(mut self, focus: impl Into<String>) -> Self {
self.content_focus = Some(focus.into());
self
}
pub fn if_none_match(mut self, etag: impl Into<String>) -> Self {
self.if_none_match = Some(etag.into());
self
}
pub fn if_modified_since(mut self, date: impl Into<String>) -> Self {
self.if_modified_since = Some(date.into());
self
}
pub fn effective_method(&self) -> HttpMethod {
self.method.unwrap_or_default()
}
pub fn wants_markdown(&self) -> bool {
self.as_markdown.unwrap_or(false)
}
pub fn wants_text(&self) -> bool {
self.as_text.unwrap_or(false)
}
pub fn wants_main_content(&self) -> bool {
self.content_focus
.as_deref()
.map(|f| f.eq_ignore_ascii_case("main"))
.unwrap_or(false)
}
}
#[derive(Debug, Clone, Default, Serialize, Deserialize, JsonSchema, PartialEq, Eq)]
pub struct PageLink {
pub text: String,
pub href: String,
}
#[derive(Debug, Clone, Default, Serialize, Deserialize, JsonSchema)]
pub struct PageMetadata {
#[serde(skip_serializing_if = "Option::is_none")]
pub title: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub description: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub language: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub canonical_url: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub author: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub published_date: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub modified_date: Option<String>,
#[serde(skip_serializing_if = "Vec::is_empty", default)]
pub links: Vec<PageLink>,
#[serde(skip_serializing_if = "Vec::is_empty", default)]
pub headings: Vec<String>,
}
impl PageMetadata {
pub fn is_empty(&self) -> bool {
self.title.is_none()
&& self.description.is_none()
&& self.language.is_none()
&& self.canonical_url.is_none()
&& self.author.is_none()
&& self.published_date.is_none()
&& self.modified_date.is_none()
&& self.links.is_empty()
&& self.headings.is_empty()
}
}
#[derive(Debug, Clone, Default, Serialize, Deserialize, JsonSchema)]
pub struct FetchResponse {
pub url: String,
pub status_code: u16,
#[serde(skip_serializing_if = "Option::is_none")]
pub content_type: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub size: Option<u64>,
#[serde(skip_serializing_if = "Option::is_none")]
pub last_modified: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub etag: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub filename: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub format: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub content: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub truncated: Option<bool>,
#[serde(skip_serializing_if = "Option::is_none")]
pub method: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub error: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub saved_path: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub bytes_written: Option<u64>,
#[serde(skip_serializing_if = "Option::is_none")]
pub metadata: Option<PageMetadata>,
#[serde(skip_serializing_if = "Option::is_none")]
pub word_count: Option<u64>,
#[serde(skip_serializing_if = "Vec::is_empty", default)]
pub redirect_chain: Vec<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub is_paywall: Option<bool>,
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_http_method_from_str() {
assert_eq!(HttpMethod::from_str("GET").unwrap(), HttpMethod::Get);
assert_eq!(HttpMethod::from_str("get").unwrap(), HttpMethod::Get);
assert_eq!(HttpMethod::from_str("Get").unwrap(), HttpMethod::Get);
assert_eq!(HttpMethod::from_str("HEAD").unwrap(), HttpMethod::Head);
assert_eq!(HttpMethod::from_str("head").unwrap(), HttpMethod::Head);
assert!(HttpMethod::from_str("POST").is_err());
assert!(HttpMethod::from_str("invalid").is_err());
}
#[test]
fn test_http_method_display() {
assert_eq!(HttpMethod::Get.to_string(), "GET");
assert_eq!(HttpMethod::Head.to_string(), "HEAD");
}
#[test]
fn test_request_builder() {
let req = FetchRequest::new("https://example.com")
.method(HttpMethod::Head)
.as_markdown();
assert_eq!(req.url, "https://example.com");
assert_eq!(req.method, Some(HttpMethod::Head));
assert_eq!(req.as_markdown, Some(true));
}
#[test]
fn test_request_effective_method() {
let req = FetchRequest::new("https://example.com");
assert_eq!(req.effective_method(), HttpMethod::Get);
let req = req.method(HttpMethod::Head);
assert_eq!(req.effective_method(), HttpMethod::Head);
}
#[test]
fn test_request_serialization() {
let req = FetchRequest::new("https://example.com").as_markdown();
let json = serde_json::to_string(&req).unwrap();
assert!(json.contains("\"url\":\"https://example.com\""));
assert!(json.contains("\"as_markdown\":true"));
}
#[test]
fn test_response_serialization() {
let resp = FetchResponse {
url: "https://example.com".to_string(),
status_code: 200,
content: Some("Hello".to_string()),
..Default::default()
};
let json = serde_json::to_string(&resp).unwrap();
assert!(!json.contains("content_type"));
assert!(json.contains("\"content\":\"Hello\""));
}
}