#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
#[non_exhaustive]
pub enum ExtractionFocus {
#[default]
Balanced,
FavorRecall,
FavorPrecision,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
#[non_exhaustive]
pub enum HtmlDateMode {
#[default]
Default,
Fast,
Extensive,
Disabled,
}
#[derive(Debug, Clone, Default)]
#[non_exhaustive]
pub struct FallbackCandidates {
pub readability_html: Option<String>,
}
#[derive(Debug, Clone)]
#[non_exhaustive]
pub struct Config {
pub cache_size: usize,
pub min_duplicate_check_size: usize,
pub max_duplicate_count: usize,
pub min_extracted_size: usize,
pub min_extracted_comment_size: usize,
pub min_output_size: usize,
pub min_output_comment_size: usize,
}
impl Default for Config {
fn default() -> Self {
Self {
cache_size: 4096,
min_duplicate_check_size: 100,
max_duplicate_count: 2,
min_extracted_size: 250,
min_extracted_comment_size: 1,
min_output_size: 1,
min_output_comment_size: 1,
}
}
}
impl Config {
pub fn with_cache_size(mut self, size: usize) -> Self {
self.cache_size = size;
self
}
pub fn with_min_duplicate_check_size(mut self, size: usize) -> Self {
self.min_duplicate_check_size = size;
self
}
pub fn with_max_duplicate_count(mut self, count: usize) -> Self {
self.max_duplicate_count = count;
self
}
pub fn with_min_extracted_size(mut self, size: usize) -> Self {
self.min_extracted_size = size;
self
}
pub fn with_min_extracted_comment_size(mut self, size: usize) -> Self {
self.min_extracted_comment_size = size;
self
}
pub fn with_min_output_size(mut self, size: usize) -> Self {
self.min_output_size = size;
self
}
pub fn with_min_output_comment_size(mut self, size: usize) -> Self {
self.min_output_comment_size = size;
self
}
}
#[derive(Debug, Clone, Default)]
#[non_exhaustive]
pub struct Options {
pub config: Config,
pub original_url: Option<url::Url>,
pub target_language: Option<String>,
pub enable_fallback: bool,
pub focus: ExtractionFocus,
pub exclude_comments: bool,
pub exclude_tables: bool,
pub include_images: bool,
pub include_links: bool,
pub excluded_authors: Vec<String>,
pub deduplicate: bool,
pub has_essential_metadata: bool,
pub max_tree_size: Option<usize>,
pub prune_selector: Option<String>,
pub enable_log: bool,
pub html_date_mode: HtmlDateMode,
pub html_date_override: Option<chrono::NaiveDate>,
pub fallback_candidates: Option<FallbackCandidates>,
}
impl Options {
pub fn with_fallback(mut self, enable: bool) -> Self {
self.enable_fallback = enable;
self
}
pub fn with_links(mut self, include: bool) -> Self {
self.include_links = include;
self
}
pub fn with_images(mut self, include: bool) -> Self {
self.include_images = include;
self
}
pub fn with_focus(mut self, focus: ExtractionFocus) -> Self {
self.focus = focus;
self
}
pub fn with_exclude_comments(mut self, exclude: bool) -> Self {
self.exclude_comments = exclude;
self
}
pub fn with_exclude_tables(mut self, exclude: bool) -> Self {
self.exclude_tables = exclude;
self
}
pub fn with_url(mut self, url: url::Url) -> Self {
self.original_url = Some(url);
self
}
pub fn with_target_language(mut self, lang: impl Into<String>) -> Self {
self.target_language = Some(lang.into());
self
}
pub fn with_deduplicate(mut self, enable: bool) -> Self {
self.deduplicate = enable;
self
}
pub fn with_prune_selector(mut self, selector: impl Into<String>) -> Self {
self.prune_selector = Some(selector.into());
self
}
pub fn with_essential_metadata(mut self, require: bool) -> Self {
self.has_essential_metadata = require;
self
}
pub fn with_max_tree_size(mut self, max: usize) -> Self {
self.max_tree_size = Some(max);
self
}
pub fn with_config(mut self, config: Config) -> Self {
self.config = config;
self
}
pub fn with_html_date_mode(mut self, mode: HtmlDateMode) -> Self {
self.html_date_mode = mode;
self
}
pub fn with_html_date_override(mut self, date: chrono::NaiveDate) -> Self {
self.html_date_override = Some(date);
self
}
pub fn with_excluded_authors(mut self, authors: Vec<String>) -> Self {
self.excluded_authors = authors;
self
}
pub fn with_fallback_candidates(mut self, candidates: FallbackCandidates) -> Self {
self.fallback_candidates = Some(candidates);
self
}
pub fn with_log(mut self, enable: bool) -> Self {
self.enable_log = enable;
self
}
}