Skip to main content

docspec_http/
mime_parser.rs

1//! HTTP `Accept` negotiation and `Content-Type` validation for the conversion API.
2
3use axum::http::HeaderValue;
4use docspec::OutputFormat;
5
6use crate::error::HttpError;
7use crate::format::{OUTPUT_MIME_ALIAS, OUTPUT_MIME_OXA_PRIMARY, OUTPUT_MIME_PRIMARY};
8
9/// Negotiates the `Accept` header for the `/conversion` endpoint.
10///
11/// Returns [`OutputFormat::Oxa`] for `Accept: application/vnd.oxa+json`, and
12/// [`OutputFormat::Blocknote`] for the `BlockNote` MIMEs, `application/*`, `*/*`, and
13/// missing `Accept`. Wildcards default to `BlockNote` for back-compat with pre-oxa
14/// clients. When `Accept` lists multiple types, the first whose bare MIME matches a
15/// supported value wins (case-insensitive); `q=...` is stripped.
16///
17/// # Errors
18///
19/// Returns [`HttpError::NotAcceptable`] if no acceptable MIME type found.
20#[inline]
21pub fn negotiate_accept(header_value: Option<&HeaderValue>) -> Result<OutputFormat, HttpError> {
22    // Missing Accept == */* per RFC 7231 §5.3.2
23    let Some(header_val) = header_value else {
24        return Ok(OutputFormat::Blocknote);
25    };
26    let header_str = header_val
27        .to_str()
28        .map_err(|_err| HttpError::NotAcceptable)?;
29
30    for part in header_str.split(',') {
31        let type_part = part.trim().split(';').next().map_or("", str::trim);
32        if type_part.eq_ignore_ascii_case(OUTPUT_MIME_OXA_PRIMARY) {
33            return Ok(OutputFormat::Oxa);
34        }
35        if type_part.eq_ignore_ascii_case("*/*")
36            || type_part.eq_ignore_ascii_case("application/*")
37            || type_part.eq_ignore_ascii_case(OUTPUT_MIME_PRIMARY)
38            || type_part.eq_ignore_ascii_case(OUTPUT_MIME_ALIAS)
39        {
40            return Ok(OutputFormat::Blocknote);
41        }
42    }
43    Err(HttpError::NotAcceptable)
44}
45
46/// Validates the `Content-Type` header for the `/conversion` endpoint.
47///
48/// Accepts `text/markdown` with no charset, or `text/markdown; charset=utf-8`
49/// (case-insensitive). Any other charset is rejected — the handler always
50/// decodes the body as UTF-8, so a non-UTF-8 charset is unsupportable.
51/// Returns `Err` if the header is missing, malformed, the MIME type is not
52/// `text/markdown`, or the charset is anything other than `utf-8`.
53///
54/// # Errors
55///
56/// Returns [`HttpError::UnsupportedMediaType`] with the received value (or `None` if missing).
57#[inline]
58pub fn validate_content_type(header_value: Option<&HeaderValue>) -> Result<(), HttpError> {
59    let Some(header_val) = header_value else {
60        return Err(HttpError::UnsupportedMediaType { received: None });
61    };
62    let header_str = header_val
63        .to_str()
64        .ok()
65        .ok_or_else(|| HttpError::UnsupportedMediaType {
66            received: Some("<invalid header value>".to_owned()),
67        })?;
68    let parsed: mime::Mime =
69        header_str
70            .parse()
71            .ok()
72            .ok_or_else(|| HttpError::UnsupportedMediaType {
73                received: Some(header_str.to_owned()),
74            })?;
75    if parsed.type_() != mime::TEXT || parsed.subtype().as_str() != "markdown" {
76        return Err(HttpError::UnsupportedMediaType {
77            received: Some(header_str.to_owned()),
78        });
79    }
80    if let Some(charset) = parsed.get_param(mime::CHARSET) {
81        if !charset.as_str().eq_ignore_ascii_case("utf-8") {
82            return Err(HttpError::UnsupportedMediaType {
83                received: Some(header_str.to_owned()),
84            });
85        }
86    }
87    // Strict: only the optional charset parameter is allowed. Unknown params
88    // (e.g. `boundary`, `format`) cause 415 to prevent accidental acceptance
89    // of unrelated media types that happen to share the text/markdown prefix.
90    for (name, _) in parsed.params() {
91        if name != mime::CHARSET {
92            return Err(HttpError::UnsupportedMediaType {
93                received: Some(header_str.to_owned()),
94            });
95        }
96    }
97    Ok(())
98}
99
100/// Returns the bounded `input_mime_type` label value for a `Content-Type` header.
101///
102/// This function is intentionally MORE permissive than [`validate_content_type`]:
103/// it returns [`crate::metrics::INPUT_MIME_MARKDOWN`] for any `text/markdown`
104/// value regardless of charset or other parameters, because the label answers
105/// "what did the client try to send?" rather than "is it valid?".
106///
107/// # Label values
108///
109/// - [`crate::metrics::INPUT_MIME_NONE`] — header absent
110/// - [`crate::metrics::INPUT_MIME_MARKDOWN`] — `text/markdown` (any params)
111/// - [`crate::metrics::INPUT_MIME_UNSUPPORTED`] — anything else
112#[must_use]
113#[inline]
114pub fn bucket_input_mime(header_value: Option<&HeaderValue>) -> &'static str {
115    let Some(header_val) = header_value else {
116        return crate::metrics::INPUT_MIME_NONE;
117    };
118    let Ok(header_str) = header_val.to_str() else {
119        return crate::metrics::INPUT_MIME_UNSUPPORTED;
120    };
121    let Ok(parsed) = header_str.parse::<mime::Mime>() else {
122        return crate::metrics::INPUT_MIME_UNSUPPORTED;
123    };
124    if parsed.type_() == mime::TEXT && parsed.subtype().as_str() == "markdown" {
125        crate::metrics::INPUT_MIME_MARKDOWN
126    } else {
127        crate::metrics::INPUT_MIME_UNSUPPORTED
128    }
129}
130
131/// Returns the bounded `output_mime_type` label value for a conversion outcome.
132///
133/// `chosen_format` is `None` for any error path, and `Some(format)` on success.
134#[inline]
135#[must_use]
136pub fn bucket_output_mime(chosen_format: Option<OutputFormat>) -> &'static str {
137    match chosen_format {
138        None => crate::metrics::OUTPUT_MIME_NONE,
139        Some(OutputFormat::Blocknote) => crate::metrics::OUTPUT_MIME_BLOCKNOTE,
140        Some(OutputFormat::Oxa) => crate::metrics::OUTPUT_MIME_OXA,
141    }
142}
143
144#[cfg(test)]
145mod bucket_tests {
146    #![allow(
147        clippy::tests_outside_test_module,
148        clippy::unwrap_used,
149        clippy::expect_used
150    )]
151
152    use super::*;
153    use axum::http::HeaderValue;
154
155    // ─── bucket_input_mime tests ───────────────────────────────────────────
156
157    #[test]
158    fn bucket_input_mime_none_when_header_absent() {
159        assert_eq!(bucket_input_mime(None), crate::metrics::INPUT_MIME_NONE);
160    }
161
162    #[test]
163    fn bucket_input_mime_markdown_when_text_markdown() {
164        let val = HeaderValue::from_static("text/markdown");
165        assert_eq!(
166            bucket_input_mime(Some(&val)),
167            crate::metrics::INPUT_MIME_MARKDOWN
168        );
169    }
170
171    #[test]
172    fn bucket_input_mime_markdown_when_text_markdown_with_charset() {
173        let val = HeaderValue::from_static("text/markdown; charset=utf-8");
174        assert_eq!(
175            bucket_input_mime(Some(&val)),
176            crate::metrics::INPUT_MIME_MARKDOWN
177        );
178    }
179
180    #[test]
181    fn bucket_input_mime_markdown_case_insensitive() {
182        let val = HeaderValue::from_static("TEXT/MARKDOWN");
183        assert_eq!(
184            bucket_input_mime(Some(&val)),
185            crate::metrics::INPUT_MIME_MARKDOWN
186        );
187    }
188
189    #[test]
190    fn bucket_input_mime_unsupported_when_other_format() {
191        let val = HeaderValue::from_static("application/pdf");
192        assert_eq!(
193            bucket_input_mime(Some(&val)),
194            crate::metrics::INPUT_MIME_UNSUPPORTED
195        );
196    }
197
198    #[test]
199    fn bucket_input_mime_unsupported_when_malformed() {
200        let val = HeaderValue::from_static("not a mime type at all");
201        assert_eq!(
202            bucket_input_mime(Some(&val)),
203            crate::metrics::INPUT_MIME_UNSUPPORTED
204        );
205    }
206
207    #[test]
208    fn bucket_input_mime_unsupported_when_non_ascii() {
209        let val = HeaderValue::from_bytes(&[0xFF, 0xFE]).unwrap();
210        assert_eq!(
211            bucket_input_mime(Some(&val)),
212            crate::metrics::INPUT_MIME_UNSUPPORTED
213        );
214    }
215
216    // ─── bucket_output_mime tests ──────────────────────────────────────────
217
218    #[test]
219    fn bucket_output_mime_blocknote_when_blocknote_succeeded() {
220        assert_eq!(
221            bucket_output_mime(Some(OutputFormat::Blocknote)),
222            crate::metrics::OUTPUT_MIME_BLOCKNOTE
223        );
224    }
225
226    #[test]
227    fn bucket_output_mime_oxa_when_oxa_succeeded() {
228        assert_eq!(
229            bucket_output_mime(Some(OutputFormat::Oxa)),
230            crate::metrics::OUTPUT_MIME_OXA
231        );
232    }
233
234    #[test]
235    fn bucket_output_mime_none_when_no_format_chosen() {
236        assert_eq!(bucket_output_mime(None), crate::metrics::OUTPUT_MIME_NONE);
237    }
238}