halldyll_core/parse/
router.rs1#[derive(Debug, Clone, PartialEq, Eq)]
6pub enum ContentType {
7 Html,
9 Xhtml,
11 Xml,
13 Json,
15 Pdf,
17 PlainText,
19 Image(String),
21 Video(String),
23 Audio(String),
25 Binary,
27 Unknown,
29}
30
31impl ContentType {
32 pub fn from_header(content_type: Option<&str>) -> Self {
34 let ct = match content_type {
35 Some(ct) => ct.split(';').next().unwrap_or("").trim().to_lowercase(),
36 None => return ContentType::Unknown,
37 };
38
39 match ct.as_str() {
40 "text/html" => ContentType::Html,
41 "application/xhtml+xml" => ContentType::Xhtml,
42 "application/xml" | "text/xml" => ContentType::Xml,
43 "application/json" | "text/json" => ContentType::Json,
44 "application/pdf" => ContentType::Pdf,
45 "text/plain" => ContentType::PlainText,
46 _ if ct.starts_with("image/") => {
47 ContentType::Image(ct.strip_prefix("image/").unwrap_or("").to_string())
48 }
49 _ if ct.starts_with("video/") => {
50 ContentType::Video(ct.strip_prefix("video/").unwrap_or("").to_string())
51 }
52 _ if ct.starts_with("audio/") => {
53 ContentType::Audio(ct.strip_prefix("audio/").unwrap_or("").to_string())
54 }
55 _ if ct.starts_with("application/octet-stream") => ContentType::Binary,
56 _ => ContentType::Unknown,
57 }
58 }
59
60 pub fn detect_from_content(content: &[u8]) -> Self {
62 if content.len() < 4 {
63 return ContentType::Unknown;
64 }
65
66 if content.starts_with(b"%PDF") {
68 return ContentType::Pdf;
69 }
70
71 if content.starts_with(&[0x89, 0x50, 0x4E, 0x47]) {
73 return ContentType::Image("png".to_string());
74 }
75 if content.starts_with(&[0xFF, 0xD8, 0xFF]) {
76 return ContentType::Image("jpeg".to_string());
77 }
78 if content.starts_with(b"GIF87a") || content.starts_with(b"GIF89a") {
79 return ContentType::Image("gif".to_string());
80 }
81 if content.starts_with(b"RIFF") && content.len() > 12 && &content[8..12] == b"WEBP" {
82 return ContentType::Image("webp".to_string());
83 }
84
85 let start = String::from_utf8_lossy(&content[..std::cmp::min(1024, content.len())]);
87 let start_lower = start.to_lowercase();
88 if start_lower.contains("<!doctype html") || start_lower.contains("<html") {
89 return ContentType::Html;
90 }
91
92 if start.trim_start().starts_with("<?xml") {
94 return ContentType::Xml;
95 }
96
97 let trimmed = start.trim_start();
99 if trimmed.starts_with('{') || trimmed.starts_with('[') {
100 return ContentType::Json;
101 }
102
103 ContentType::Unknown
104 }
105
106 pub fn is_html(&self) -> bool {
108 matches!(self, ContentType::Html | ContentType::Xhtml)
109 }
110
111 pub fn is_text(&self) -> bool {
113 matches!(
114 self,
115 ContentType::Html
116 | ContentType::Xhtml
117 | ContentType::Xml
118 | ContentType::Json
119 | ContentType::PlainText
120 )
121 }
122}
123
124pub struct ContentRouter;
126
127impl ContentRouter {
128 pub fn detect(content_type_header: Option<&str>, content: &[u8]) -> ContentType {
130 let from_header = ContentType::from_header(content_type_header);
132 if from_header != ContentType::Unknown {
133 return from_header;
134 }
135
136 ContentType::detect_from_content(content)
138 }
139
140 pub fn is_extractable(content_type: &ContentType) -> bool {
142 matches!(
143 content_type,
144 ContentType::Html | ContentType::Xhtml | ContentType::Xml | ContentType::PlainText
145 )
146 }
147}