pub const DEFAULT_MAX_STRUCTURED_DATA_SIZE: usize = 1_000_000;
#[derive(Debug, Clone)]
#[cfg_attr(feature = "metadata", derive(serde::Serialize, serde::Deserialize))]
pub struct MetadataConfig {
pub extract_document: bool,
pub extract_headers: bool,
pub extract_links: bool,
pub extract_images: bool,
pub extract_structured_data: bool,
pub max_structured_data_size: usize,
}
#[derive(Debug, Clone, Default)]
#[cfg_attr(any(feature = "serde", feature = "metadata"), derive(serde::Deserialize))]
#[cfg_attr(any(feature = "serde", feature = "metadata"), serde(deny_unknown_fields))]
pub struct MetadataConfigUpdate {
#[cfg_attr(any(feature = "serde", feature = "metadata"), serde(alias = "extract_document"))]
pub extract_document: Option<bool>,
#[cfg_attr(any(feature = "serde", feature = "metadata"), serde(alias = "extract_headers"))]
pub extract_headers: Option<bool>,
#[cfg_attr(any(feature = "serde", feature = "metadata"), serde(alias = "extract_links"))]
pub extract_links: Option<bool>,
#[cfg_attr(any(feature = "serde", feature = "metadata"), serde(alias = "extract_images"))]
pub extract_images: Option<bool>,
#[cfg_attr(
any(feature = "serde", feature = "metadata"),
serde(alias = "extract_structured_data")
)]
pub extract_structured_data: Option<bool>,
#[cfg_attr(
any(feature = "serde", feature = "metadata"),
serde(alias = "max_structured_data_size")
)]
pub max_structured_data_size: Option<usize>,
}
impl Default for MetadataConfig {
fn default() -> Self {
Self {
extract_document: true,
extract_headers: true,
extract_links: true,
extract_images: true,
extract_structured_data: true,
max_structured_data_size: DEFAULT_MAX_STRUCTURED_DATA_SIZE,
}
}
}
impl MetadataConfig {
#[must_use]
pub const fn any_enabled(&self) -> bool {
self.extract_document
|| self.extract_headers
|| self.extract_links
|| self.extract_images
|| self.extract_structured_data
}
pub const fn apply_update(&mut self, update: MetadataConfigUpdate) {
if let Some(extract_document) = update.extract_document {
self.extract_document = extract_document;
}
if let Some(extract_headers) = update.extract_headers {
self.extract_headers = extract_headers;
}
if let Some(extract_links) = update.extract_links {
self.extract_links = extract_links;
}
if let Some(extract_images) = update.extract_images {
self.extract_images = extract_images;
}
if let Some(extract_structured_data) = update.extract_structured_data {
self.extract_structured_data = extract_structured_data;
}
if let Some(max_structured_data_size) = update.max_structured_data_size {
self.max_structured_data_size = max_structured_data_size;
}
}
#[must_use]
pub fn from_update(update: MetadataConfigUpdate) -> Self {
let mut config = Self::default();
config.apply_update(update);
config
}
}
impl From<MetadataConfigUpdate> for MetadataConfig {
fn from(update: MetadataConfigUpdate) -> Self {
Self::from_update(update)
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_metadata_config_default() {
let config = MetadataConfig::default();
assert!(config.extract_headers);
assert!(config.extract_links);
assert!(config.extract_images);
assert!(config.extract_structured_data);
assert_eq!(config.max_structured_data_size, DEFAULT_MAX_STRUCTURED_DATA_SIZE);
}
#[test]
fn test_metadata_config_any_enabled() {
let all_enabled = MetadataConfig::default();
assert!(all_enabled.any_enabled());
let some_enabled = MetadataConfig {
extract_headers: true,
extract_document: false,
extract_links: false,
extract_images: false,
extract_structured_data: false,
max_structured_data_size: 1_000_000,
};
assert!(some_enabled.any_enabled());
let none_enabled = MetadataConfig {
extract_document: false,
extract_headers: false,
extract_links: false,
extract_images: false,
extract_structured_data: false,
max_structured_data_size: 1_000_000,
};
assert!(!none_enabled.any_enabled());
}
}