#![allow(clippy::cast_precision_loss, clippy::cast_sign_loss, clippy::unused_self)]
pub mod collector;
pub mod config;
pub mod extraction;
pub mod types;
pub use collector::MetadataCollector;
pub use config::{DEFAULT_MAX_STRUCTURED_DATA_SIZE, MetadataConfig, MetadataConfigUpdate};
pub use types::{
DocumentMetadata, HeaderMetadata, HtmlMetadata, ImageMetadata, ImageType, LinkMetadata, LinkType, StructuredData,
StructuredDataType, TextDirection,
};
use std::cell::RefCell;
use std::rc::Rc;
#[allow(dead_code)]
pub(crate) type MetadataCollectorHandle = Rc<RefCell<MetadataCollector>>;
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_text_direction_parse() {
assert_eq!(TextDirection::parse("ltr"), Some(TextDirection::LeftToRight));
assert_eq!(TextDirection::parse("rtl"), Some(TextDirection::RightToLeft));
assert_eq!(TextDirection::parse("auto"), Some(TextDirection::Auto));
assert_eq!(TextDirection::parse("invalid"), None);
assert_eq!(TextDirection::parse("LTR"), Some(TextDirection::LeftToRight));
}
#[test]
fn test_text_direction_display() {
assert_eq!(TextDirection::LeftToRight.to_string(), "ltr");
assert_eq!(TextDirection::RightToLeft.to_string(), "rtl");
assert_eq!(TextDirection::Auto.to_string(), "auto");
}
#[test]
fn test_link_classification() {
assert_eq!(LinkMetadata::classify_link("#section"), LinkType::Anchor);
assert_eq!(LinkMetadata::classify_link("mailto:test@example.com"), LinkType::Email);
assert_eq!(LinkMetadata::classify_link("tel:+1234567890"), LinkType::Phone);
assert_eq!(LinkMetadata::classify_link("https://example.com"), LinkType::External);
assert_eq!(LinkMetadata::classify_link("http://example.com"), LinkType::External);
assert_eq!(LinkMetadata::classify_link("/path/to/page"), LinkType::Internal);
assert_eq!(LinkMetadata::classify_link("../relative"), LinkType::Internal);
assert_eq!(LinkMetadata::classify_link("./same"), LinkType::Internal);
}
#[test]
fn test_header_validation() {
let valid = HeaderMetadata {
level: 3,
text: "Title".to_string(),
id: None,
depth: 2,
html_offset: 100,
};
assert!(valid.is_valid());
let invalid_high = HeaderMetadata {
level: 7,
text: "Title".to_string(),
id: None,
depth: 2,
html_offset: 100,
};
assert!(!invalid_high.is_valid());
let invalid_low = HeaderMetadata {
level: 0,
text: "Title".to_string(),
id: None,
depth: 2,
html_offset: 100,
};
assert!(!invalid_low.is_valid());
}
#[test]
fn test_document_metadata_default() {
let doc = DocumentMetadata::default();
assert!(doc.title.is_none());
assert!(doc.description.is_none());
assert!(doc.keywords.is_empty());
assert!(doc.open_graph.is_empty());
assert!(doc.twitter_card.is_empty());
assert!(doc.meta_tags.is_empty());
}
#[test]
fn test_image_type_classification() {
let data_uri = ImageMetadata {
src: "data:image/png;base64,iVBORw0KG...".to_string(),
alt: None,
title: None,
dimensions: None,
image_type: ImageType::DataUri,
attributes: Default::default(),
};
assert_eq!(data_uri.image_type, ImageType::DataUri);
let external = ImageMetadata {
src: "https://example.com/image.jpg".to_string(),
alt: None,
title: None,
dimensions: None,
image_type: ImageType::External,
attributes: Default::default(),
};
assert_eq!(external.image_type, ImageType::External);
}
#[test]
fn test_link_type_display() {
assert_eq!(LinkType::Anchor.to_string(), "anchor");
assert_eq!(LinkType::Internal.to_string(), "internal");
assert_eq!(LinkType::External.to_string(), "external");
assert_eq!(LinkType::Email.to_string(), "email");
assert_eq!(LinkType::Phone.to_string(), "phone");
assert_eq!(LinkType::Other.to_string(), "other");
}
#[test]
fn test_structured_data_type_display() {
assert_eq!(StructuredDataType::JsonLd.to_string(), "json_ld");
assert_eq!(StructuredDataType::Microdata.to_string(), "microdata");
assert_eq!(StructuredDataType::RDFa.to_string(), "rdfa");
}
}