Skip to main content

docspec_http/
mime_parser.rs

1//! HTTP `Accept` negotiation and `Content-Type` validation for the conversion API.
2
3use axum::http::HeaderValue;
4use docspec::{InputFormat, OutputFormat};
5
6use crate::error::HttpError;
7use crate::format::{OUTPUT_MIME_ALIAS, OUTPUT_MIME_OXA_PRIMARY, OUTPUT_MIME_PRIMARY};
8
9/// Negotiates the `Accept` header for the `/conversion` endpoint.
10///
11/// Returns [`OutputFormat::Oxa`] for `Accept: application/vnd.oxa+json`, and
12/// [`OutputFormat::Blocknote`] for the `BlockNote` MIMEs, `application/*`, `*/*`, and
13/// missing `Accept`. Wildcards default to `BlockNote` for back-compat with pre-oxa
14/// clients. When `Accept` lists multiple types, the first whose bare MIME matches a
15/// supported value wins (case-insensitive); `q=...` is stripped.
16///
17/// # Errors
18///
19/// Returns [`HttpError::NotAcceptable`] if no acceptable MIME type found.
20#[inline]
21pub fn negotiate_accept(header_value: Option<&HeaderValue>) -> Result<OutputFormat, HttpError> {
22    // Missing Accept == */* per RFC 7231 §5.3.2
23    let Some(header_val) = header_value else {
24        return Ok(OutputFormat::Blocknote);
25    };
26    let header_str = header_val
27        .to_str()
28        .map_err(|_err| HttpError::NotAcceptable)?;
29
30    for part in header_str.split(',') {
31        let type_part = part.trim().split(';').next().map_or("", str::trim);
32        if type_part.eq_ignore_ascii_case(OUTPUT_MIME_OXA_PRIMARY) {
33            return Ok(OutputFormat::Oxa);
34        }
35        if type_part.eq_ignore_ascii_case("*/*")
36            || type_part.eq_ignore_ascii_case("application/*")
37            || type_part.eq_ignore_ascii_case(OUTPUT_MIME_PRIMARY)
38            || type_part.eq_ignore_ascii_case(OUTPUT_MIME_ALIAS)
39        {
40            return Ok(OutputFormat::Blocknote);
41        }
42    }
43    Err(HttpError::NotAcceptable)
44}
45
46/// Validates the `Content-Type` header for the `/conversion` endpoint and
47/// resolves it to the matching reader format.
48///
49/// Accepts `text/markdown` ([`InputFormat::Markdown`]) and `text/html`
50/// ([`InputFormat::Html`]), each with no charset, or with `charset=utf-8`
51/// (case-insensitive). Any other charset is rejected — the handler always
52/// decodes the body as UTF-8, so a non-UTF-8 charset is unsupportable.
53/// Returns `Err` if the header is missing, malformed, the MIME type is
54/// neither `text/markdown` nor `text/html`, the charset is anything other
55/// than `utf-8`, or an unknown parameter is present.
56///
57/// # Errors
58///
59/// Returns [`HttpError::UnsupportedMediaType`] with the received value (or `None` if missing).
60#[inline]
61pub fn validate_content_type(header_value: Option<&HeaderValue>) -> Result<InputFormat, HttpError> {
62    let Some(header_val) = header_value else {
63        return Err(HttpError::UnsupportedMediaType { received: None });
64    };
65    let header_str = header_val
66        .to_str()
67        .ok()
68        .ok_or_else(|| HttpError::UnsupportedMediaType {
69            received: Some("<invalid header value>".to_owned()),
70        })?;
71    let parsed: mime::Mime =
72        header_str
73            .parse()
74            .ok()
75            .ok_or_else(|| HttpError::UnsupportedMediaType {
76                received: Some(header_str.to_owned()),
77            })?;
78    let format = match (parsed.type_(), parsed.subtype().as_str()) {
79        (mime::TEXT, "markdown") => InputFormat::Markdown,
80        (mime::TEXT, "html") => InputFormat::Html,
81        _ => {
82            return Err(HttpError::UnsupportedMediaType {
83                received: Some(header_str.to_owned()),
84            });
85        }
86    };
87    if let Some(charset) = parsed.get_param(mime::CHARSET) {
88        if !charset.as_str().eq_ignore_ascii_case("utf-8") {
89            return Err(HttpError::UnsupportedMediaType {
90                received: Some(header_str.to_owned()),
91            });
92        }
93    }
94    // Strict: only the optional charset parameter is allowed. Unknown params
95    // (e.g. `boundary`, `format`) cause 415 to prevent accidental acceptance
96    // of unrelated media types that happen to share the text/markdown prefix.
97    for (name, _) in parsed.params() {
98        if name != mime::CHARSET {
99            return Err(HttpError::UnsupportedMediaType {
100                received: Some(header_str.to_owned()),
101            });
102        }
103    }
104    Ok(format)
105}
106
107/// Returns the bounded `input_mime_type` label value for a `Content-Type` header.
108///
109/// This function is intentionally MORE permissive than [`validate_content_type`]:
110/// it returns the matching label for any `text/markdown` or `text/html` value
111/// regardless of charset or other parameters, because the label answers
112/// "what did the client try to send?" rather than "is it valid?".
113///
114/// # Label values
115///
116/// - [`crate::metrics::INPUT_MIME_NONE`] — header absent
117/// - [`crate::metrics::INPUT_MIME_MARKDOWN`] — `text/markdown` (any params)
118/// - [`crate::metrics::INPUT_MIME_HTML`] — `text/html` (any params)
119/// - [`crate::metrics::INPUT_MIME_UNSUPPORTED`] — anything else
120#[must_use]
121#[inline]
122pub fn bucket_input_mime(header_value: Option<&HeaderValue>) -> &'static str {
123    let Some(header_val) = header_value else {
124        return crate::metrics::INPUT_MIME_NONE;
125    };
126    let Ok(header_str) = header_val.to_str() else {
127        return crate::metrics::INPUT_MIME_UNSUPPORTED;
128    };
129    let Ok(parsed) = header_str.parse::<mime::Mime>() else {
130        return crate::metrics::INPUT_MIME_UNSUPPORTED;
131    };
132    match (parsed.type_(), parsed.subtype().as_str()) {
133        (mime::TEXT, "markdown") => crate::metrics::INPUT_MIME_MARKDOWN,
134        (mime::TEXT, "html") => crate::metrics::INPUT_MIME_HTML,
135        _ => crate::metrics::INPUT_MIME_UNSUPPORTED,
136    }
137}
138
139/// Returns the bounded `output_mime_type` label value for a conversion outcome.
140///
141/// `chosen_format` is `None` for any error path, and `Some(format)` on success.
142#[inline]
143#[must_use]
144pub fn bucket_output_mime(chosen_format: Option<OutputFormat>) -> &'static str {
145    match chosen_format {
146        None => crate::metrics::OUTPUT_MIME_NONE,
147        Some(OutputFormat::Blocknote) => crate::metrics::OUTPUT_MIME_BLOCKNOTE,
148        Some(OutputFormat::Oxa) => crate::metrics::OUTPUT_MIME_OXA,
149    }
150}
151
152#[cfg(test)]
153mod bucket_tests {
154    #![allow(
155        clippy::tests_outside_test_module,
156        clippy::unwrap_used,
157        clippy::expect_used
158    )]
159
160    use super::*;
161    use axum::http::HeaderValue;
162
163    // ─── bucket_input_mime tests ───────────────────────────────────────────
164
165    #[test]
166    fn bucket_input_mime_none_when_header_absent() {
167        assert_eq!(bucket_input_mime(None), crate::metrics::INPUT_MIME_NONE);
168    }
169
170    #[test]
171    fn bucket_input_mime_markdown_when_text_markdown() {
172        let val = HeaderValue::from_static("text/markdown");
173        assert_eq!(
174            bucket_input_mime(Some(&val)),
175            crate::metrics::INPUT_MIME_MARKDOWN
176        );
177    }
178
179    #[test]
180    fn bucket_input_mime_markdown_when_text_markdown_with_charset() {
181        let val = HeaderValue::from_static("text/markdown; charset=utf-8");
182        assert_eq!(
183            bucket_input_mime(Some(&val)),
184            crate::metrics::INPUT_MIME_MARKDOWN
185        );
186    }
187
188    #[test]
189    fn bucket_input_mime_markdown_case_insensitive() {
190        let val = HeaderValue::from_static("TEXT/MARKDOWN");
191        assert_eq!(
192            bucket_input_mime(Some(&val)),
193            crate::metrics::INPUT_MIME_MARKDOWN
194        );
195    }
196
197    #[test]
198    fn bucket_input_mime_html_when_text_html() {
199        let val = HeaderValue::from_static("text/html");
200        assert_eq!(
201            bucket_input_mime(Some(&val)),
202            crate::metrics::INPUT_MIME_HTML
203        );
204    }
205
206    #[test]
207    fn bucket_input_mime_html_when_text_html_with_charset() {
208        let val = HeaderValue::from_static("text/html; charset=utf-8");
209        assert_eq!(
210            bucket_input_mime(Some(&val)),
211            crate::metrics::INPUT_MIME_HTML
212        );
213    }
214
215    #[test]
216    fn bucket_input_mime_html_case_insensitive() {
217        let val = HeaderValue::from_static("TEXT/HTML");
218        assert_eq!(
219            bucket_input_mime(Some(&val)),
220            crate::metrics::INPUT_MIME_HTML
221        );
222    }
223
224    #[test]
225    fn bucket_input_mime_html_with_non_utf8_charset_still_buckets_html() {
226        let val = HeaderValue::from_static("text/html; charset=iso-8859-1");
227        assert_eq!(
228            bucket_input_mime(Some(&val)),
229            crate::metrics::INPUT_MIME_HTML
230        );
231    }
232
233    #[test]
234    fn bucket_input_mime_unsupported_when_other_format() {
235        let val = HeaderValue::from_static("application/pdf");
236        assert_eq!(
237            bucket_input_mime(Some(&val)),
238            crate::metrics::INPUT_MIME_UNSUPPORTED
239        );
240    }
241
242    #[test]
243    fn bucket_input_mime_unsupported_when_malformed() {
244        let val = HeaderValue::from_static("not a mime type at all");
245        assert_eq!(
246            bucket_input_mime(Some(&val)),
247            crate::metrics::INPUT_MIME_UNSUPPORTED
248        );
249    }
250
251    #[test]
252    fn bucket_input_mime_unsupported_when_non_ascii() {
253        let val = HeaderValue::from_bytes(&[0xFF, 0xFE]).unwrap();
254        assert_eq!(
255            bucket_input_mime(Some(&val)),
256            crate::metrics::INPUT_MIME_UNSUPPORTED
257        );
258    }
259
260    // ─── bucket_output_mime tests ──────────────────────────────────────────
261
262    #[test]
263    fn bucket_output_mime_blocknote_when_blocknote_succeeded() {
264        assert_eq!(
265            bucket_output_mime(Some(OutputFormat::Blocknote)),
266            crate::metrics::OUTPUT_MIME_BLOCKNOTE
267        );
268    }
269
270    #[test]
271    fn bucket_output_mime_oxa_when_oxa_succeeded() {
272        assert_eq!(
273            bucket_output_mime(Some(OutputFormat::Oxa)),
274            crate::metrics::OUTPUT_MIME_OXA
275        );
276    }
277
278    #[test]
279    fn bucket_output_mime_none_when_no_format_chosen() {
280        assert_eq!(bucket_output_mime(None), crate::metrics::OUTPUT_MIME_NONE);
281    }
282}