use serde::{Deserialize, Serialize};
use super::CrawlPageResult;
#[derive(Clone, Default, Serialize, Deserialize, PartialEq, Eq)]
#[serde(rename_all = "snake_case")]
pub enum LinkType {
#[default]
Internal,
External,
Anchor,
Document,
}
impl std::fmt::Display for LinkType {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::Internal => write!(f, "internal"),
Self::External => write!(f, "external"),
Self::Anchor => write!(f, "anchor"),
Self::Document => write!(f, "document"),
}
}
}
impl std::fmt::Debug for LinkType {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
std::fmt::Display::fmt(self, f)
}
}
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
#[serde(deny_unknown_fields)]
pub struct LinkInfo {
pub url: String,
pub text: String,
pub link_type: LinkType,
pub rel: Option<String>,
pub nofollow: bool,
}
#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq, Eq)]
#[serde(rename_all = "snake_case")]
pub enum ImageSource {
#[default]
Img,
PictureSource,
#[serde(rename = "og:image")]
OgImage,
#[serde(rename = "twitter:image")]
TwitterImage,
}
impl std::fmt::Display for ImageSource {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::Img => write!(f, "img"),
Self::PictureSource => write!(f, "picture_source"),
Self::OgImage => write!(f, "og:image"),
Self::TwitterImage => write!(f, "twitter:image"),
}
}
}
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
#[serde(deny_unknown_fields)]
pub struct ImageInfo {
pub url: String,
pub alt: Option<String>,
pub width: Option<u32>,
pub height: Option<u32>,
pub source: ImageSource,
}
#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq, Eq)]
#[serde(rename_all = "snake_case")]
pub enum FeedType {
#[default]
Rss,
Atom,
JsonFeed,
}
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
#[serde(deny_unknown_fields)]
pub struct FeedInfo {
pub url: String,
pub title: Option<String>,
pub feed_type: FeedType,
}
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
#[serde(deny_unknown_fields)]
pub struct JsonLdEntry {
pub schema_type: String,
pub name: Option<String>,
pub raw: String,
}
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
#[serde(deny_unknown_fields)]
pub struct CookieInfo {
pub name: String,
pub value: String,
pub domain: Option<String>,
pub path: Option<String>,
}
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
#[serde(deny_unknown_fields)]
pub struct DownloadedAsset {
pub url: String,
pub content_hash: String,
pub mime_type: Option<String>,
pub size: usize,
pub asset_category: AssetCategory,
pub html_tag: Option<String>,
}
#[derive(Clone, Default, Serialize, Deserialize, PartialEq, Eq)]
#[serde(rename_all = "snake_case")]
pub enum AssetCategory {
Document,
#[default]
Image,
Audio,
Video,
Font,
Stylesheet,
Script,
Archive,
Data,
Other,
}
impl std::fmt::Display for AssetCategory {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::Document => write!(f, "document"),
Self::Image => write!(f, "image"),
Self::Audio => write!(f, "audio"),
Self::Video => write!(f, "video"),
Self::Font => write!(f, "font"),
Self::Stylesheet => write!(f, "stylesheet"),
Self::Script => write!(f, "script"),
Self::Archive => write!(f, "archive"),
Self::Data => write!(f, "data"),
Self::Other => write!(f, "other"),
}
}
}
impl std::fmt::Debug for AssetCategory {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
std::fmt::Display::fmt(self, f)
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum CrawlEvent {
Page(Box<CrawlPageResult>),
Error {
url: String,
error: String,
},
Complete {
pages_crawled: usize,
},
}
impl Default for CrawlEvent {
fn default() -> Self {
Self::Complete { pages_crawled: 0 }
}
}