fetchkit 0.1.2

AI-friendly web content fetching and HTML-to-Markdown conversion library
Documentation
//! Core types for FetchKit

use schemars::JsonSchema;
use serde::{Deserialize, Serialize};
use std::str::FromStr;

/// HTTP method for the request
#[derive(Debug, Clone, Copy, Default, PartialEq, Eq, Serialize, Deserialize, JsonSchema)]
#[serde(rename_all = "UPPERCASE")]
pub enum HttpMethod {
    /// HTTP GET request
    #[default]
    Get,
    /// HTTP HEAD request
    Head,
}

impl FromStr for HttpMethod {
    type Err = String;

    fn from_str(s: &str) -> Result<Self, Self::Err> {
        match s.to_uppercase().as_str() {
            "GET" => Ok(HttpMethod::Get),
            "HEAD" => Ok(HttpMethod::Head),
            _ => Err("Invalid method: must be GET or HEAD".to_string()),
        }
    }
}

impl std::fmt::Display for HttpMethod {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        match self {
            HttpMethod::Get => write!(f, "GET"),
            HttpMethod::Head => write!(f, "HEAD"),
        }
    }
}

/// Request to fetch a URL
///
/// # Examples
///
/// ```
/// use fetchkit::{FetchRequest, HttpMethod};
///
/// // Simple GET request
/// let req = FetchRequest::new("https://example.com");
/// assert_eq!(req.effective_method(), HttpMethod::Get);
///
/// // Request with markdown conversion
/// let req = FetchRequest::new("https://example.com").as_markdown();
/// assert!(req.wants_markdown());
///
/// // HEAD request
/// let req = FetchRequest::new("https://example.com")
///     .method(HttpMethod::Head);
/// assert_eq!(req.effective_method(), HttpMethod::Head);
/// ```
#[derive(Debug, Clone, Default, Serialize, Deserialize, JsonSchema)]
pub struct FetchRequest {
    /// The URL to fetch (required, must be http:// or https://)
    pub url: String,

    /// HTTP method (optional, default GET)
    #[serde(default, skip_serializing_if = "Option::is_none")]
    pub method: Option<HttpMethod>,

    /// Convert HTML to markdown (optional)
    #[serde(default, skip_serializing_if = "Option::is_none")]
    pub as_markdown: Option<bool>,

    /// Convert HTML to plain text (optional)
    #[serde(default, skip_serializing_if = "Option::is_none")]
    pub as_text: Option<bool>,
}

impl FetchRequest {
    /// Create a new request with the given URL
    pub fn new(url: impl Into<String>) -> Self {
        Self {
            url: url.into(),
            ..Default::default()
        }
    }

    /// Set the HTTP method
    pub fn method(mut self, method: HttpMethod) -> Self {
        self.method = Some(method);
        self
    }

    /// Enable markdown conversion
    pub fn as_markdown(mut self) -> Self {
        self.as_markdown = Some(true);
        self
    }

    /// Enable text conversion
    pub fn as_text(mut self) -> Self {
        self.as_text = Some(true);
        self
    }

    /// Get the effective method (default to GET)
    pub fn effective_method(&self) -> HttpMethod {
        self.method.unwrap_or_default()
    }

    /// Check if markdown conversion is requested
    pub fn wants_markdown(&self) -> bool {
        self.as_markdown.unwrap_or(false)
    }

    /// Check if text conversion is requested
    pub fn wants_text(&self) -> bool {
        self.as_text.unwrap_or(false)
    }
}

/// Response from a fetch operation
///
/// Contains the fetched content along with metadata like status code,
/// content type, and size. Optional fields are omitted when not applicable.
///
/// # Examples
///
/// ```
/// use fetchkit::FetchResponse;
///
/// let response = FetchResponse {
///     url: "https://example.com".to_string(),
///     status_code: 200,
///     content_type: Some("text/html".to_string()),
///     format: Some("markdown".to_string()),
///     content: Some("# Example Domain".to_string()),
///     ..Default::default()
/// };
///
/// assert_eq!(response.status_code, 200);
/// assert!(response.content.unwrap().contains("Example"));
/// ```
#[derive(Debug, Clone, Default, Serialize, Deserialize, JsonSchema)]
pub struct FetchResponse {
    /// The fetched URL
    pub url: String,

    /// HTTP status code
    pub status_code: u16,

    /// Content-Type header value
    #[serde(skip_serializing_if = "Option::is_none")]
    pub content_type: Option<String>,

    /// Content size in bytes
    #[serde(skip_serializing_if = "Option::is_none")]
    pub size: Option<u64>,

    /// Last-Modified header value
    #[serde(skip_serializing_if = "Option::is_none")]
    pub last_modified: Option<String>,

    /// Extracted filename
    #[serde(skip_serializing_if = "Option::is_none")]
    pub filename: Option<String>,

    /// Content format: "markdown", "text", or "raw"
    #[serde(skip_serializing_if = "Option::is_none")]
    pub format: Option<String>,

    /// The fetched/converted content
    #[serde(skip_serializing_if = "Option::is_none")]
    pub content: Option<String>,

    /// True if content was truncated due to timeout
    #[serde(skip_serializing_if = "Option::is_none")]
    pub truncated: Option<bool>,

    /// "HEAD" for HEAD requests
    #[serde(skip_serializing_if = "Option::is_none")]
    pub method: Option<String>,

    /// Error message (for binary content)
    #[serde(skip_serializing_if = "Option::is_none")]
    pub error: Option<String>,
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_http_method_from_str() {
        assert_eq!(HttpMethod::from_str("GET").unwrap(), HttpMethod::Get);
        assert_eq!(HttpMethod::from_str("get").unwrap(), HttpMethod::Get);
        assert_eq!(HttpMethod::from_str("Get").unwrap(), HttpMethod::Get);
        assert_eq!(HttpMethod::from_str("HEAD").unwrap(), HttpMethod::Head);
        assert_eq!(HttpMethod::from_str("head").unwrap(), HttpMethod::Head);
        assert!(HttpMethod::from_str("POST").is_err());
        assert!(HttpMethod::from_str("invalid").is_err());
    }

    #[test]
    fn test_http_method_display() {
        assert_eq!(HttpMethod::Get.to_string(), "GET");
        assert_eq!(HttpMethod::Head.to_string(), "HEAD");
    }

    #[test]
    fn test_request_builder() {
        let req = FetchRequest::new("https://example.com")
            .method(HttpMethod::Head)
            .as_markdown();

        assert_eq!(req.url, "https://example.com");
        assert_eq!(req.method, Some(HttpMethod::Head));
        assert_eq!(req.as_markdown, Some(true));
    }

    #[test]
    fn test_request_effective_method() {
        let req = FetchRequest::new("https://example.com");
        assert_eq!(req.effective_method(), HttpMethod::Get);

        let req = req.method(HttpMethod::Head);
        assert_eq!(req.effective_method(), HttpMethod::Head);
    }

    #[test]
    fn test_request_serialization() {
        let req = FetchRequest::new("https://example.com").as_markdown();
        let json = serde_json::to_string(&req).unwrap();
        assert!(json.contains("\"url\":\"https://example.com\""));
        assert!(json.contains("\"as_markdown\":true"));
    }

    #[test]
    fn test_response_serialization() {
        let resp = FetchResponse {
            url: "https://example.com".to_string(),
            status_code: 200,
            content: Some("Hello".to_string()),
            ..Default::default()
        };
        let json = serde_json::to_string(&resp).unwrap();
        // Optional None fields should be omitted
        assert!(!json.contains("content_type"));
        assert!(json.contains("\"content\":\"Hello\""));
    }
}