use ahash::AHashSet;
use std::sync::LazyLock;
pub const KNOWN_FORMATS: &[&str] = &[
"format_type",
"title",
"author",
"keywords",
"creator",
"producer",
"creation_date",
"modification_date",
"page_count",
"sheet_count",
"sheet_names",
"from_email",
"from_name",
"to_emails",
"cc_emails",
"bcc_emails",
"message_id",
"attachments",
"description",
"summary",
"fonts",
"format",
"file_count",
"file_list",
"total_size",
"compressed_size",
"width",
"height",
"element_count",
"unique_elements",
"line_count",
"word_count",
"character_count",
"headers",
"links",
"code_blocks",
"canonical",
"base_href",
"og_title",
"og_description",
"og_image",
"og_url",
"og_type",
"og_site_name",
"twitter_card",
"twitter_title",
"twitter_description",
"twitter_image",
"twitter_site",
"twitter_creator",
"link_author",
"link_license",
"link_alternate",
"psm",
"output_format",
"table_count",
"table_rows",
"table_cols",
];
static FORMAT_FIELD_SET: LazyLock<AHashSet<&'static str>> = LazyLock::new(|| KNOWN_FORMATS.iter().copied().collect());
#[inline]
pub fn is_valid_format_field(field: &str) -> bool {
FORMAT_FIELD_SET.contains(field)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_known_formats_count() {
assert_eq!(KNOWN_FORMATS.len(), 58, "Expected 58 known format fields");
}
#[test]
fn test_known_formats_no_duplicates() {
let mut seen = std::collections::HashSet::new();
for field in KNOWN_FORMATS {
assert!(seen.insert(field), "Duplicate format field found: {}", field);
}
}
#[test]
fn test_is_valid_format_field_true_cases() {
assert!(is_valid_format_field("title"));
assert!(is_valid_format_field("author"));
assert!(is_valid_format_field("creation_date"));
assert!(is_valid_format_field("page_count"));
assert!(is_valid_format_field("from_email"));
assert!(is_valid_format_field("og_title"));
assert!(is_valid_format_field("twitter_card"));
}
#[test]
fn test_is_valid_format_field_false_cases() {
assert!(!is_valid_format_field("invalid_field"));
assert!(!is_valid_format_field("unknown_metadata"));
assert!(!is_valid_format_field(""));
assert!(!is_valid_format_field("TITLE"));
assert!(!is_valid_format_field("title "));
}
#[test]
fn test_all_document_property_fields() {
let doc_fields = ["format_type", "title", "author", "keywords", "creator", "producer"];
for field in &doc_fields {
assert!(is_valid_format_field(field), "Missing field: {}", field);
}
}
#[test]
fn test_all_email_fields() {
let email_fields = [
"from_email",
"from_name",
"to_emails",
"cc_emails",
"bcc_emails",
"message_id",
"attachments",
];
for field in &email_fields {
assert!(is_valid_format_field(field), "Missing email field: {}", field);
}
}
#[test]
fn test_all_web_meta_fields() {
let web_fields = [
"og_title",
"og_description",
"og_image",
"og_url",
"og_type",
"og_site_name",
"twitter_card",
"twitter_title",
"twitter_description",
"twitter_image",
"twitter_site",
"twitter_creator",
"canonical",
"base_href",
];
for field in &web_fields {
assert!(is_valid_format_field(field), "Missing web field: {}", field);
}
}
#[test]
fn test_all_table_fields() {
let table_fields = ["table_count", "table_rows", "table_cols"];
for field in &table_fields {
assert!(is_valid_format_field(field), "Missing table field: {}", field);
}
}
}