docspec_http/
mime_parser.rs1use axum::http::HeaderValue;
4use docspec::{InputFormat, OutputFormat};
5
6use crate::error::HttpError;
7use crate::format::{
8 OUTPUT_MIME_ALIAS, OUTPUT_MIME_HTML_PRIMARY, OUTPUT_MIME_OXA_PRIMARY,
9 OUTPUT_MIME_PANDOC_NATIVE_PRIMARY, OUTPUT_MIME_PRIMARY,
10};
11
12#[inline]
26pub fn negotiate_accept(header_value: Option<&HeaderValue>) -> Result<OutputFormat, HttpError> {
27 let Some(header_val) = header_value else {
29 return Ok(OutputFormat::Blocknote);
30 };
31 let header_str = header_val
32 .to_str()
33 .map_err(|_err| HttpError::NotAcceptable)?;
34
35 for part in header_str.split(',') {
36 let type_part = part.trim().split(';').next().map_or("", str::trim);
37 if type_part.eq_ignore_ascii_case(OUTPUT_MIME_OXA_PRIMARY) {
38 return Ok(OutputFormat::Oxa);
39 }
40 if type_part.eq_ignore_ascii_case(OUTPUT_MIME_PANDOC_NATIVE_PRIMARY) {
41 return Ok(OutputFormat::PandocNative);
42 }
43 if type_part.eq_ignore_ascii_case(OUTPUT_MIME_HTML_PRIMARY) {
44 return Ok(OutputFormat::Html);
45 }
46 if type_part.eq_ignore_ascii_case("*/*")
47 || type_part.eq_ignore_ascii_case("application/*")
48 || type_part.eq_ignore_ascii_case(OUTPUT_MIME_PRIMARY)
49 || type_part.eq_ignore_ascii_case(OUTPUT_MIME_ALIAS)
50 {
51 return Ok(OutputFormat::Blocknote);
52 }
53 }
54 Err(HttpError::NotAcceptable)
55}
56
57#[inline]
74pub fn validate_content_type(header_value: Option<&HeaderValue>) -> Result<InputFormat, HttpError> {
75 let Some(header_val) = header_value else {
76 return Err(HttpError::UnsupportedMediaType { received: None });
77 };
78 let header_str = header_val
79 .to_str()
80 .ok()
81 .ok_or_else(|| HttpError::UnsupportedMediaType {
82 received: Some("<invalid header value>".to_owned()),
83 })?;
84 let parsed: mime::Mime =
85 header_str
86 .parse()
87 .ok()
88 .ok_or_else(|| HttpError::UnsupportedMediaType {
89 received: Some(header_str.to_owned()),
90 })?;
91 if parsed.type_() == mime::APPLICATION
93 && parsed.subtype().as_str()
94 == "vnd.openxmlformats-officedocument.wordprocessingml.document"
95 {
96 if parsed.params().next().is_some() {
97 return Err(HttpError::UnsupportedMediaType {
98 received: Some(header_str.to_owned()),
99 });
100 }
101 return Ok(InputFormat::Docx);
102 }
103 let format = match (parsed.type_(), parsed.subtype().as_str()) {
104 (mime::TEXT, "markdown") => InputFormat::Markdown,
105 (mime::TEXT, "html") => InputFormat::Html,
106 _ => {
107 return Err(HttpError::UnsupportedMediaType {
108 received: Some(header_str.to_owned()),
109 });
110 }
111 };
112 if let Some(charset) = parsed.get_param(mime::CHARSET) {
113 if !charset.as_str().eq_ignore_ascii_case("utf-8") {
114 return Err(HttpError::UnsupportedMediaType {
115 received: Some(header_str.to_owned()),
116 });
117 }
118 }
119 for (name, _) in parsed.params() {
123 if name != mime::CHARSET {
124 return Err(HttpError::UnsupportedMediaType {
125 received: Some(header_str.to_owned()),
126 });
127 }
128 }
129 Ok(format)
130}
131
132#[must_use]
147#[inline]
148pub fn bucket_input_mime(header_value: Option<&HeaderValue>) -> &'static str {
149 let Some(header_val) = header_value else {
150 return crate::metrics::INPUT_MIME_NONE;
151 };
152 let Ok(header_str) = header_val.to_str() else {
153 return crate::metrics::INPUT_MIME_UNSUPPORTED;
154 };
155 let Ok(parsed) = header_str.parse::<mime::Mime>() else {
156 return crate::metrics::INPUT_MIME_UNSUPPORTED;
157 };
158 match (parsed.type_(), parsed.subtype().as_str()) {
159 (mime::TEXT, "markdown") => crate::metrics::INPUT_MIME_MARKDOWN,
160 (mime::TEXT, "html") => crate::metrics::INPUT_MIME_HTML,
161 (mime::APPLICATION, "vnd.openxmlformats-officedocument.wordprocessingml.document") => {
162 crate::metrics::INPUT_MIME_DOCX
163 }
164 _ => crate::metrics::INPUT_MIME_UNSUPPORTED,
165 }
166}
167
168#[inline]
172#[must_use]
173pub fn bucket_output_mime(chosen_format: Option<OutputFormat>) -> &'static str {
174 match chosen_format {
175 Some(OutputFormat::Blocknote) => crate::metrics::OUTPUT_MIME_BLOCKNOTE,
176 Some(OutputFormat::Html) => crate::metrics::OUTPUT_MIME_HTML,
177 Some(OutputFormat::Oxa) => crate::metrics::OUTPUT_MIME_OXA,
178 Some(OutputFormat::PandocNative) => crate::metrics::OUTPUT_MIME_PANDOC_NATIVE,
179 None | Some(_) => crate::metrics::OUTPUT_MIME_NONE,
180 }
181}
182
183#[cfg(test)]
184mod bucket_tests {
185 #![allow(
186 clippy::tests_outside_test_module,
187 clippy::unwrap_used,
188 clippy::expect_used
189 )]
190
191 use super::*;
192 use axum::http::HeaderValue;
193
194 #[test]
197 fn bucket_input_mime_none_when_header_absent() {
198 assert_eq!(bucket_input_mime(None), crate::metrics::INPUT_MIME_NONE);
199 }
200
201 #[test]
202 fn bucket_input_mime_markdown_when_text_markdown() {
203 let val = HeaderValue::from_static("text/markdown");
204 assert_eq!(
205 bucket_input_mime(Some(&val)),
206 crate::metrics::INPUT_MIME_MARKDOWN
207 );
208 }
209
210 #[test]
211 fn bucket_input_mime_markdown_when_text_markdown_with_charset() {
212 let val = HeaderValue::from_static("text/markdown; charset=utf-8");
213 assert_eq!(
214 bucket_input_mime(Some(&val)),
215 crate::metrics::INPUT_MIME_MARKDOWN
216 );
217 }
218
219 #[test]
220 fn bucket_input_mime_markdown_case_insensitive() {
221 let val = HeaderValue::from_static("TEXT/MARKDOWN");
222 assert_eq!(
223 bucket_input_mime(Some(&val)),
224 crate::metrics::INPUT_MIME_MARKDOWN
225 );
226 }
227
228 #[test]
229 fn bucket_input_mime_html_when_text_html() {
230 let val = HeaderValue::from_static("text/html");
231 assert_eq!(
232 bucket_input_mime(Some(&val)),
233 crate::metrics::INPUT_MIME_HTML
234 );
235 }
236
237 #[test]
238 fn bucket_input_mime_html_when_text_html_with_charset() {
239 let val = HeaderValue::from_static("text/html; charset=utf-8");
240 assert_eq!(
241 bucket_input_mime(Some(&val)),
242 crate::metrics::INPUT_MIME_HTML
243 );
244 }
245
246 #[test]
247 fn bucket_input_mime_html_case_insensitive() {
248 let val = HeaderValue::from_static("TEXT/HTML");
249 assert_eq!(
250 bucket_input_mime(Some(&val)),
251 crate::metrics::INPUT_MIME_HTML
252 );
253 }
254
255 #[test]
256 fn bucket_input_mime_html_with_non_utf8_charset_still_buckets_html() {
257 let val = HeaderValue::from_static("text/html; charset=iso-8859-1");
258 assert_eq!(
259 bucket_input_mime(Some(&val)),
260 crate::metrics::INPUT_MIME_HTML
261 );
262 }
263
264 #[test]
265 fn bucket_input_mime_unsupported_when_other_format() {
266 let val = HeaderValue::from_static("application/pdf");
267 assert_eq!(
268 bucket_input_mime(Some(&val)),
269 crate::metrics::INPUT_MIME_UNSUPPORTED
270 );
271 }
272
273 #[test]
274 fn bucket_input_mime_unsupported_when_malformed() {
275 let val = HeaderValue::from_static("not a mime type at all");
276 assert_eq!(
277 bucket_input_mime(Some(&val)),
278 crate::metrics::INPUT_MIME_UNSUPPORTED
279 );
280 }
281
282 #[test]
283 fn bucket_input_mime_unsupported_when_non_ascii() {
284 let val = HeaderValue::from_bytes(&[0xFF, 0xFE]).unwrap();
285 assert_eq!(
286 bucket_input_mime(Some(&val)),
287 crate::metrics::INPUT_MIME_UNSUPPORTED
288 );
289 }
290
291 #[test]
294 fn bucket_output_mime_blocknote_when_blocknote_succeeded() {
295 assert_eq!(
296 bucket_output_mime(Some(OutputFormat::Blocknote)),
297 crate::metrics::OUTPUT_MIME_BLOCKNOTE
298 );
299 }
300
301 #[test]
302 fn bucket_output_mime_html_when_html_succeeded() {
303 assert_eq!(
304 bucket_output_mime(Some(OutputFormat::Html)),
305 crate::metrics::OUTPUT_MIME_HTML
306 );
307 }
308
309 #[test]
310 fn bucket_output_mime_oxa_when_oxa_succeeded() {
311 assert_eq!(
312 bucket_output_mime(Some(OutputFormat::Oxa)),
313 crate::metrics::OUTPUT_MIME_OXA
314 );
315 }
316
317 #[test]
318 fn bucket_output_mime_pandoc_native_when_pandoc_native_succeeded() {
319 assert_eq!(
320 bucket_output_mime(Some(OutputFormat::PandocNative)),
321 crate::metrics::OUTPUT_MIME_PANDOC_NATIVE
322 );
323 }
324
325 #[test]
326 fn bucket_output_mime_none_when_no_format_chosen() {
327 assert_eq!(bucket_output_mime(None), crate::metrics::OUTPUT_MIME_NONE);
328 }
329}