Skip to main content

docspec_http/
mime_parser.rs

1//! HTTP `Accept` negotiation and `Content-Type` validation for the conversion API.
2
3use axum::http::HeaderValue;
4use docspec::{InputFormat, OutputFormat};
5
6use crate::error::HttpError;
7use crate::format::{
8    OUTPUT_MIME_ALIAS, OUTPUT_MIME_HTML_PRIMARY, OUTPUT_MIME_OXA_PRIMARY, OUTPUT_MIME_PRIMARY,
9};
10
11/// Negotiates the `Accept` header for the `/conversion` endpoint.
12///
13/// Returns [`OutputFormat::Oxa`] for `Accept: application/vnd.oxa+json`,
14/// [`OutputFormat::Html`] for `Accept: text/html`, and [`OutputFormat::Blocknote`]
15/// for the `BlockNote` MIMEs, `application/*`, `*/*`, and missing `Accept`.
16/// Wildcards default to `BlockNote` for back-compat with pre-oxa clients. When
17/// `Accept` lists multiple types, the first whose bare MIME matches a supported
18/// value wins (case-insensitive); `q=...` is stripped.
19///
20/// # Errors
21///
22/// Returns [`HttpError::NotAcceptable`] if no acceptable MIME type found.
23#[inline]
24pub fn negotiate_accept(header_value: Option<&HeaderValue>) -> Result<OutputFormat, HttpError> {
25    // Missing Accept == */* per RFC 7231 §5.3.2
26    let Some(header_val) = header_value else {
27        return Ok(OutputFormat::Blocknote);
28    };
29    let header_str = header_val
30        .to_str()
31        .map_err(|_err| HttpError::NotAcceptable)?;
32
33    for part in header_str.split(',') {
34        let type_part = part.trim().split(';').next().map_or("", str::trim);
35        if type_part.eq_ignore_ascii_case(OUTPUT_MIME_OXA_PRIMARY) {
36            return Ok(OutputFormat::Oxa);
37        }
38        if type_part.eq_ignore_ascii_case(OUTPUT_MIME_HTML_PRIMARY) {
39            return Ok(OutputFormat::Html);
40        }
41        if type_part.eq_ignore_ascii_case("*/*")
42            || type_part.eq_ignore_ascii_case("application/*")
43            || type_part.eq_ignore_ascii_case(OUTPUT_MIME_PRIMARY)
44            || type_part.eq_ignore_ascii_case(OUTPUT_MIME_ALIAS)
45        {
46            return Ok(OutputFormat::Blocknote);
47        }
48    }
49    Err(HttpError::NotAcceptable)
50}
51
52/// Validates the `Content-Type` header for the `/conversion` endpoint and
53/// resolves it to the matching reader format.
54///
55/// Accepts `text/markdown` ([`InputFormat::Markdown`]) and `text/html`
56/// ([`InputFormat::Html`]), each with no charset, or with `charset=utf-8`
57/// (case-insensitive). Any other charset is rejected — the handler always
58/// decodes the body as UTF-8, so a non-UTF-8 charset is unsupportable.
59/// Returns `Err` if the header is missing, malformed, the MIME type is
60/// neither `text/markdown` nor `text/html`, the charset is anything other
61/// than `utf-8`, or an unknown parameter is present.
62///
63/// # Errors
64///
65/// Returns [`HttpError::UnsupportedMediaType`] with the received value (or `None` if missing).
66#[inline]
67pub fn validate_content_type(header_value: Option<&HeaderValue>) -> Result<InputFormat, HttpError> {
68    let Some(header_val) = header_value else {
69        return Err(HttpError::UnsupportedMediaType { received: None });
70    };
71    let header_str = header_val
72        .to_str()
73        .ok()
74        .ok_or_else(|| HttpError::UnsupportedMediaType {
75            received: Some("<invalid header value>".to_owned()),
76        })?;
77    let parsed: mime::Mime =
78        header_str
79            .parse()
80            .ok()
81            .ok_or_else(|| HttpError::UnsupportedMediaType {
82                received: Some(header_str.to_owned()),
83            })?;
84    let format = match (parsed.type_(), parsed.subtype().as_str()) {
85        (mime::TEXT, "markdown") => InputFormat::Markdown,
86        (mime::TEXT, "html") => InputFormat::Html,
87        _ => {
88            return Err(HttpError::UnsupportedMediaType {
89                received: Some(header_str.to_owned()),
90            });
91        }
92    };
93    if let Some(charset) = parsed.get_param(mime::CHARSET) {
94        if !charset.as_str().eq_ignore_ascii_case("utf-8") {
95            return Err(HttpError::UnsupportedMediaType {
96                received: Some(header_str.to_owned()),
97            });
98        }
99    }
100    // Strict: only the optional charset parameter is allowed. Unknown params
101    // (e.g. `boundary`, `format`) cause 415 to prevent accidental acceptance
102    // of unrelated media types that happen to share the text/markdown prefix.
103    for (name, _) in parsed.params() {
104        if name != mime::CHARSET {
105            return Err(HttpError::UnsupportedMediaType {
106                received: Some(header_str.to_owned()),
107            });
108        }
109    }
110    Ok(format)
111}
112
113/// Returns the bounded `input_mime_type` label value for a `Content-Type` header.
114///
115/// This function is intentionally MORE permissive than [`validate_content_type`]:
116/// it returns the matching label for any `text/markdown` or `text/html` value
117/// regardless of charset or other parameters, because the label answers
118/// "what did the client try to send?" rather than "is it valid?".
119///
120/// # Label values
121///
122/// - [`crate::metrics::INPUT_MIME_NONE`] — header absent
123/// - [`crate::metrics::INPUT_MIME_MARKDOWN`] — `text/markdown` (any params)
124/// - [`crate::metrics::INPUT_MIME_HTML`] — `text/html` (any params)
125/// - [`crate::metrics::INPUT_MIME_UNSUPPORTED`] — anything else
126#[must_use]
127#[inline]
128pub fn bucket_input_mime(header_value: Option<&HeaderValue>) -> &'static str {
129    let Some(header_val) = header_value else {
130        return crate::metrics::INPUT_MIME_NONE;
131    };
132    let Ok(header_str) = header_val.to_str() else {
133        return crate::metrics::INPUT_MIME_UNSUPPORTED;
134    };
135    let Ok(parsed) = header_str.parse::<mime::Mime>() else {
136        return crate::metrics::INPUT_MIME_UNSUPPORTED;
137    };
138    match (parsed.type_(), parsed.subtype().as_str()) {
139        (mime::TEXT, "markdown") => crate::metrics::INPUT_MIME_MARKDOWN,
140        (mime::TEXT, "html") => crate::metrics::INPUT_MIME_HTML,
141        _ => crate::metrics::INPUT_MIME_UNSUPPORTED,
142    }
143}
144
145/// Returns the bounded `output_mime_type` label value for a conversion outcome.
146///
147/// `chosen_format` is `None` for any error path, and `Some(format)` on success.
148#[inline]
149#[must_use]
150pub fn bucket_output_mime(chosen_format: Option<OutputFormat>) -> &'static str {
151    match chosen_format {
152        None => crate::metrics::OUTPUT_MIME_NONE,
153        Some(OutputFormat::Blocknote) => crate::metrics::OUTPUT_MIME_BLOCKNOTE,
154        Some(OutputFormat::Html) => crate::metrics::OUTPUT_MIME_HTML,
155        Some(OutputFormat::Oxa) => crate::metrics::OUTPUT_MIME_OXA,
156    }
157}
158
159#[cfg(test)]
160mod bucket_tests {
161    #![allow(
162        clippy::tests_outside_test_module,
163        clippy::unwrap_used,
164        clippy::expect_used
165    )]
166
167    use super::*;
168    use axum::http::HeaderValue;
169
170    // ─── bucket_input_mime tests ───────────────────────────────────────────
171
172    #[test]
173    fn bucket_input_mime_none_when_header_absent() {
174        assert_eq!(bucket_input_mime(None), crate::metrics::INPUT_MIME_NONE);
175    }
176
177    #[test]
178    fn bucket_input_mime_markdown_when_text_markdown() {
179        let val = HeaderValue::from_static("text/markdown");
180        assert_eq!(
181            bucket_input_mime(Some(&val)),
182            crate::metrics::INPUT_MIME_MARKDOWN
183        );
184    }
185
186    #[test]
187    fn bucket_input_mime_markdown_when_text_markdown_with_charset() {
188        let val = HeaderValue::from_static("text/markdown; charset=utf-8");
189        assert_eq!(
190            bucket_input_mime(Some(&val)),
191            crate::metrics::INPUT_MIME_MARKDOWN
192        );
193    }
194
195    #[test]
196    fn bucket_input_mime_markdown_case_insensitive() {
197        let val = HeaderValue::from_static("TEXT/MARKDOWN");
198        assert_eq!(
199            bucket_input_mime(Some(&val)),
200            crate::metrics::INPUT_MIME_MARKDOWN
201        );
202    }
203
204    #[test]
205    fn bucket_input_mime_html_when_text_html() {
206        let val = HeaderValue::from_static("text/html");
207        assert_eq!(
208            bucket_input_mime(Some(&val)),
209            crate::metrics::INPUT_MIME_HTML
210        );
211    }
212
213    #[test]
214    fn bucket_input_mime_html_when_text_html_with_charset() {
215        let val = HeaderValue::from_static("text/html; charset=utf-8");
216        assert_eq!(
217            bucket_input_mime(Some(&val)),
218            crate::metrics::INPUT_MIME_HTML
219        );
220    }
221
222    #[test]
223    fn bucket_input_mime_html_case_insensitive() {
224        let val = HeaderValue::from_static("TEXT/HTML");
225        assert_eq!(
226            bucket_input_mime(Some(&val)),
227            crate::metrics::INPUT_MIME_HTML
228        );
229    }
230
231    #[test]
232    fn bucket_input_mime_html_with_non_utf8_charset_still_buckets_html() {
233        let val = HeaderValue::from_static("text/html; charset=iso-8859-1");
234        assert_eq!(
235            bucket_input_mime(Some(&val)),
236            crate::metrics::INPUT_MIME_HTML
237        );
238    }
239
240    #[test]
241    fn bucket_input_mime_unsupported_when_other_format() {
242        let val = HeaderValue::from_static("application/pdf");
243        assert_eq!(
244            bucket_input_mime(Some(&val)),
245            crate::metrics::INPUT_MIME_UNSUPPORTED
246        );
247    }
248
249    #[test]
250    fn bucket_input_mime_unsupported_when_malformed() {
251        let val = HeaderValue::from_static("not a mime type at all");
252        assert_eq!(
253            bucket_input_mime(Some(&val)),
254            crate::metrics::INPUT_MIME_UNSUPPORTED
255        );
256    }
257
258    #[test]
259    fn bucket_input_mime_unsupported_when_non_ascii() {
260        let val = HeaderValue::from_bytes(&[0xFF, 0xFE]).unwrap();
261        assert_eq!(
262            bucket_input_mime(Some(&val)),
263            crate::metrics::INPUT_MIME_UNSUPPORTED
264        );
265    }
266
267    // ─── bucket_output_mime tests ──────────────────────────────────────────
268
269    #[test]
270    fn bucket_output_mime_blocknote_when_blocknote_succeeded() {
271        assert_eq!(
272            bucket_output_mime(Some(OutputFormat::Blocknote)),
273            crate::metrics::OUTPUT_MIME_BLOCKNOTE
274        );
275    }
276
277    #[test]
278    fn bucket_output_mime_html_when_html_succeeded() {
279        assert_eq!(
280            bucket_output_mime(Some(OutputFormat::Html)),
281            crate::metrics::OUTPUT_MIME_HTML
282        );
283    }
284
285    #[test]
286    fn bucket_output_mime_oxa_when_oxa_succeeded() {
287        assert_eq!(
288            bucket_output_mime(Some(OutputFormat::Oxa)),
289            crate::metrics::OUTPUT_MIME_OXA
290        );
291    }
292
293    #[test]
294    fn bucket_output_mime_none_when_no_format_chosen() {
295        assert_eq!(bucket_output_mime(None), crate::metrics::OUTPUT_MIME_NONE);
296    }
297}