#[derive(Debug, Clone, PartialEq, Eq)]
pub enum ContentType {
Html,
Xhtml,
Xml,
Json,
Pdf,
PlainText,
Image(String),
Video(String),
Audio(String),
Binary,
Unknown,
}
impl ContentType {
pub fn from_header(content_type: Option<&str>) -> Self {
let ct = match content_type {
Some(ct) => ct.split(';').next().unwrap_or("").trim().to_lowercase(),
None => return ContentType::Unknown,
};
match ct.as_str() {
"text/html" => ContentType::Html,
"application/xhtml+xml" => ContentType::Xhtml,
"application/xml" | "text/xml" => ContentType::Xml,
"application/json" | "text/json" => ContentType::Json,
"application/pdf" => ContentType::Pdf,
"text/plain" => ContentType::PlainText,
_ if ct.starts_with("image/") => {
ContentType::Image(ct.strip_prefix("image/").unwrap_or("").to_string())
}
_ if ct.starts_with("video/") => {
ContentType::Video(ct.strip_prefix("video/").unwrap_or("").to_string())
}
_ if ct.starts_with("audio/") => {
ContentType::Audio(ct.strip_prefix("audio/").unwrap_or("").to_string())
}
_ if ct.starts_with("application/octet-stream") => ContentType::Binary,
_ => ContentType::Unknown,
}
}
pub fn detect_from_content(content: &[u8]) -> Self {
if content.len() < 4 {
return ContentType::Unknown;
}
if content.starts_with(b"%PDF") {
return ContentType::Pdf;
}
if content.starts_with(&[0x89, 0x50, 0x4E, 0x47]) {
return ContentType::Image("png".to_string());
}
if content.starts_with(&[0xFF, 0xD8, 0xFF]) {
return ContentType::Image("jpeg".to_string());
}
if content.starts_with(b"GIF87a") || content.starts_with(b"GIF89a") {
return ContentType::Image("gif".to_string());
}
if content.starts_with(b"RIFF") && content.len() > 12 && &content[8..12] == b"WEBP" {
return ContentType::Image("webp".to_string());
}
let start = String::from_utf8_lossy(&content[..std::cmp::min(1024, content.len())]);
let start_lower = start.to_lowercase();
if start_lower.contains("<!doctype html") || start_lower.contains("<html") {
return ContentType::Html;
}
if start.trim_start().starts_with("<?xml") {
return ContentType::Xml;
}
let trimmed = start.trim_start();
if trimmed.starts_with('{') || trimmed.starts_with('[') {
return ContentType::Json;
}
ContentType::Unknown
}
pub fn is_html(&self) -> bool {
matches!(self, ContentType::Html | ContentType::Xhtml)
}
pub fn is_text(&self) -> bool {
matches!(
self,
ContentType::Html
| ContentType::Xhtml
| ContentType::Xml
| ContentType::Json
| ContentType::PlainText
)
}
}
pub struct ContentRouter;
impl ContentRouter {
pub fn detect(content_type_header: Option<&str>, content: &[u8]) -> ContentType {
let from_header = ContentType::from_header(content_type_header);
if from_header != ContentType::Unknown {
return from_header;
}
ContentType::detect_from_content(content)
}
pub fn is_extractable(content_type: &ContentType) -> bool {
matches!(
content_type,
ContentType::Html | ContentType::Xhtml | ContentType::Xml | ContentType::PlainText
)
}
}