use serde::{Deserialize, Serialize};
#[derive(Debug, Clone)]
#[non_exhaustive]
#[expect(clippy::struct_excessive_bools)]
pub struct DecruftOptions {
pub url: Option<String>,
pub debug: bool,
pub remove_exact_selectors: bool,
pub remove_partial_selectors: bool,
pub remove_images: bool,
pub remove_hidden_elements: bool,
pub remove_low_scoring: bool,
pub remove_small_images: bool,
pub standardize: bool,
pub remove_content_patterns: bool,
pub content_selector: Option<String>,
pub markdown: bool,
pub separate_markdown: bool,
pub include_replies: bool,
}
impl Default for DecruftOptions {
fn default() -> Self {
Self {
url: None,
debug: false,
remove_exact_selectors: true,
remove_partial_selectors: true,
remove_images: false,
remove_hidden_elements: true,
remove_low_scoring: true,
remove_small_images: true,
standardize: true,
remove_content_patterns: true,
content_selector: None,
markdown: false,
separate_markdown: false,
include_replies: true,
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
#[non_exhaustive]
pub struct DecruftResult {
pub content: String,
#[serde(skip_serializing_if = "Option::is_none")]
pub title: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub description: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub domain: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub favicon: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub image: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub language: Option<String>,
pub parse_time_ms: u64,
#[serde(skip_serializing_if = "Option::is_none")]
pub published: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub modified: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub author: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub site: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub canonical_url: Option<String>,
#[serde(skip_serializing_if = "Vec::is_empty")]
pub keywords: Vec<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub content_type: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub content_markdown: Option<String>,
pub word_count: usize,
pub schema_org_data: Option<serde_json::Value>,
#[serde(skip_serializing_if = "Option::is_none")]
pub meta_tags: Option<Vec<MetaTag>>,
#[serde(skip_serializing_if = "Option::is_none")]
pub extractor_type: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub debug: Option<DebugInfo>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct MetaTag {
pub name: Option<String>,
pub property: Option<String>,
pub content: Option<String>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct DebugInfo {
pub content_selector: String,
pub removals: Vec<Removal>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Removal {
pub step: String,
#[serde(skip_serializing_if = "Option::is_none")]
pub selector: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub reason: Option<String>,
pub text: String,
}
#[derive(Debug, Clone, Default, Serialize)]
pub(crate) struct Metadata {
pub title: Option<String>,
pub description: Option<String>,
pub domain: Option<String>,
pub favicon: Option<String>,
pub image: Option<String>,
pub language: Option<String>,
pub published: Option<String>,
pub modified: Option<String>,
pub author: Option<String>,
pub site_name: Option<String>,
pub canonical_url: Option<String>,
pub keywords: Vec<String>,
pub content_type: Option<String>,
}