use crate::markdown::MarkdownOptions;
use regex::Regex;
#[derive(Debug, Clone)]
pub struct ReadabilityOptions {
pub debug: bool,
pub max_elems_to_parse: usize,
pub nb_top_candidates: usize,
pub char_threshold: usize,
pub classes_to_preserve: Vec<String>,
pub keep_classes: bool,
pub disable_json_ld: bool,
pub allowed_video_regex: Option<Regex>,
pub link_density_modifier: f64,
pub remove_title_from_content: bool,
pub clean_styles: bool,
pub clean_whitespace: bool,
pub output_markdown: bool,
pub markdown_options: Option<MarkdownOptions>,
}
impl Default for ReadabilityOptions {
fn default() -> Self {
Self {
debug: false,
max_elems_to_parse: 0,
nb_top_candidates: 5,
char_threshold: 500,
classes_to_preserve: vec!["page".to_string()],
keep_classes: false,
disable_json_ld: false,
allowed_video_regex: None,
link_density_modifier: 0.0,
remove_title_from_content: false,
clean_styles: true,
clean_whitespace: true,
output_markdown: false,
markdown_options: None,
}
}
}
impl ReadabilityOptions {
pub fn builder() -> ReadabilityOptionsBuilder {
ReadabilityOptionsBuilder::default()
}
}
#[derive(Default)]
pub struct ReadabilityOptionsBuilder {
debug: Option<bool>,
max_elems_to_parse: Option<usize>,
nb_top_candidates: Option<usize>,
char_threshold: Option<usize>,
classes_to_preserve: Option<Vec<String>>,
keep_classes: Option<bool>,
disable_json_ld: Option<bool>,
allowed_video_regex: Option<Regex>,
link_density_modifier: Option<f64>,
remove_title_from_content: Option<bool>,
clean_styles: Option<bool>,
clean_whitespace: Option<bool>,
output_markdown: Option<bool>,
markdown_options: Option<MarkdownOptions>,
}
impl ReadabilityOptionsBuilder {
pub fn debug(mut self, debug: bool) -> Self {
self.debug = Some(debug);
self
}
pub fn max_elems_to_parse(mut self, max: usize) -> Self {
self.max_elems_to_parse = Some(max);
self
}
pub fn nb_top_candidates(mut self, nb: usize) -> Self {
self.nb_top_candidates = Some(nb);
self
}
pub fn char_threshold(mut self, threshold: usize) -> Self {
self.char_threshold = Some(threshold);
self
}
pub fn classes_to_preserve(mut self, classes: Vec<String>) -> Self {
self.classes_to_preserve = Some(classes);
self
}
pub fn keep_classes(mut self, keep: bool) -> Self {
self.keep_classes = Some(keep);
self
}
pub fn disable_json_ld(mut self, disable: bool) -> Self {
self.disable_json_ld = Some(disable);
self
}
pub fn allowed_video_regex(mut self, regex: Regex) -> Self {
self.allowed_video_regex = Some(regex);
self
}
pub fn link_density_modifier(mut self, modifier: f64) -> Self {
self.link_density_modifier = Some(modifier);
self
}
pub fn remove_title_from_content(mut self, remove: bool) -> Self {
self.remove_title_from_content = Some(remove);
self
}
pub fn clean_styles(mut self, clean: bool) -> Self {
self.clean_styles = Some(clean);
self
}
pub fn clean_whitespace(mut self, clean: bool) -> Self {
self.clean_whitespace = Some(clean);
self
}
pub fn output_markdown(mut self, enabled: bool) -> Self {
self.output_markdown = Some(enabled);
self
}
pub fn markdown_options(mut self, opts: MarkdownOptions) -> Self {
self.markdown_options = Some(opts);
self
}
pub fn build(self) -> ReadabilityOptions {
let defaults = ReadabilityOptions::default();
ReadabilityOptions {
debug: self.debug.unwrap_or(defaults.debug),
max_elems_to_parse: self
.max_elems_to_parse
.unwrap_or(defaults.max_elems_to_parse),
nb_top_candidates: self.nb_top_candidates.unwrap_or(defaults.nb_top_candidates),
char_threshold: self.char_threshold.unwrap_or(defaults.char_threshold),
classes_to_preserve: self
.classes_to_preserve
.unwrap_or(defaults.classes_to_preserve),
keep_classes: self.keep_classes.unwrap_or(defaults.keep_classes),
disable_json_ld: self.disable_json_ld.unwrap_or(defaults.disable_json_ld),
allowed_video_regex: self.allowed_video_regex.or(defaults.allowed_video_regex),
link_density_modifier: self
.link_density_modifier
.unwrap_or(defaults.link_density_modifier),
remove_title_from_content: self
.remove_title_from_content
.unwrap_or(defaults.remove_title_from_content),
clean_styles: self.clean_styles.unwrap_or(defaults.clean_styles),
clean_whitespace: self.clean_whitespace.unwrap_or(defaults.clean_whitespace),
output_markdown: self.output_markdown.unwrap_or(defaults.output_markdown),
markdown_options: self.markdown_options.or(defaults.markdown_options),
}
}
}