Skip to main content

docspec_http/
mime_parser.rs

1//! HTTP `Accept` negotiation and `Content-Type` validation for the conversion API.
2
3use axum::http::HeaderValue;
4
5use crate::error::HttpError;
6use crate::format::{OUTPUT_MIME_ALIAS, OUTPUT_MIME_PRIMARY};
7
8/// Negotiates the `Accept` header for the `/conversion` endpoint.
9///
10/// Returns the primary output MIME type string for:
11/// - Missing `Accept` header (HTTP default is `*/*`)
12/// - `Accept: */*`
13/// - `Accept: application/*`
14/// - `Accept: application/vnd.docspec.blocknote+json`
15/// - `Accept: application/vnd.blocknote+json` (alias)
16///
17/// Returns `Err(HttpError::NotAcceptable)` for any other value.
18///
19/// Quality parameters (`q=...`) are stripped and ignored.
20///
21/// # Errors
22///
23/// Returns [`HttpError::NotAcceptable`] if no acceptable MIME type found.
24#[inline]
25pub fn negotiate_accept(header_value: Option<&HeaderValue>) -> Result<&'static str, HttpError> {
26    // Missing Accept == */* per RFC 7231 §5.3.2
27    let Some(header_val) = header_value else {
28        return Ok(OUTPUT_MIME_PRIMARY);
29    };
30    let header_str = header_val
31        .to_str()
32        .map_err(|_err| HttpError::NotAcceptable)?;
33
34    for part in header_str.split(',') {
35        let type_part = part.trim().split(';').next().map_or("", str::trim);
36        if type_part.eq_ignore_ascii_case("*/*")
37            || type_part.eq_ignore_ascii_case("application/*")
38            || type_part.eq_ignore_ascii_case(OUTPUT_MIME_PRIMARY)
39            || type_part.eq_ignore_ascii_case(OUTPUT_MIME_ALIAS)
40        {
41            return Ok(OUTPUT_MIME_PRIMARY);
42        }
43    }
44    Err(HttpError::NotAcceptable)
45}
46
47/// Validates the `Content-Type` header for the `/conversion` endpoint.
48///
49/// Accepts `text/markdown` with no charset, or `text/markdown; charset=utf-8`
50/// (case-insensitive). Any other charset is rejected — the handler always
51/// decodes the body as UTF-8, so a non-UTF-8 charset is unsupportable.
52/// Returns `Err` if the header is missing, malformed, the MIME type is not
53/// `text/markdown`, or the charset is anything other than `utf-8`.
54///
55/// # Errors
56///
57/// Returns [`HttpError::UnsupportedMediaType`] with the received value (or `None` if missing).
58#[inline]
59pub fn validate_content_type(header_value: Option<&HeaderValue>) -> Result<(), HttpError> {
60    let Some(header_val) = header_value else {
61        return Err(HttpError::UnsupportedMediaType { received: None });
62    };
63    let header_str = header_val
64        .to_str()
65        .ok()
66        .ok_or_else(|| HttpError::UnsupportedMediaType {
67            received: Some("<invalid header value>".to_owned()),
68        })?;
69    let parsed: mime::Mime =
70        header_str
71            .parse()
72            .ok()
73            .ok_or_else(|| HttpError::UnsupportedMediaType {
74                received: Some(header_str.to_owned()),
75            })?;
76    if parsed.type_() != mime::TEXT || parsed.subtype().as_str() != "markdown" {
77        return Err(HttpError::UnsupportedMediaType {
78            received: Some(header_str.to_owned()),
79        });
80    }
81    if let Some(charset) = parsed.get_param(mime::CHARSET) {
82        if !charset.as_str().eq_ignore_ascii_case("utf-8") {
83            return Err(HttpError::UnsupportedMediaType {
84                received: Some(header_str.to_owned()),
85            });
86        }
87    }
88    // Strict: only the optional charset parameter is allowed. Unknown params
89    // (e.g. `boundary`, `format`) cause 415 to prevent accidental acceptance
90    // of unrelated media types that happen to share the text/markdown prefix.
91    for (name, _) in parsed.params() {
92        if name != mime::CHARSET {
93            return Err(HttpError::UnsupportedMediaType {
94                received: Some(header_str.to_owned()),
95            });
96        }
97    }
98    Ok(())
99}
100
101/// Returns the bounded `input_mime_type` label value for a `Content-Type` header.
102///
103/// This function is intentionally MORE permissive than [`validate_content_type`]:
104/// it returns [`crate::metrics::INPUT_MIME_MARKDOWN`] for any `text/markdown`
105/// value regardless of charset or other parameters, because the label answers
106/// "what did the client try to send?" rather than "is it valid?".
107///
108/// # Label values
109///
110/// - [`crate::metrics::INPUT_MIME_NONE`] — header absent
111/// - [`crate::metrics::INPUT_MIME_MARKDOWN`] — `text/markdown` (any params)
112/// - [`crate::metrics::INPUT_MIME_UNSUPPORTED`] — anything else
113#[must_use]
114#[inline]
115pub fn bucket_input_mime(header_value: Option<&HeaderValue>) -> &'static str {
116    let Some(header_val) = header_value else {
117        return crate::metrics::INPUT_MIME_NONE;
118    };
119    let Ok(header_str) = header_val.to_str() else {
120        return crate::metrics::INPUT_MIME_UNSUPPORTED;
121    };
122    let Ok(parsed) = header_str.parse::<mime::Mime>() else {
123        return crate::metrics::INPUT_MIME_UNSUPPORTED;
124    };
125    if parsed.type_() == mime::TEXT && parsed.subtype().as_str() == "markdown" {
126        crate::metrics::INPUT_MIME_MARKDOWN
127    } else {
128        crate::metrics::INPUT_MIME_UNSUPPORTED
129    }
130}
131
132/// Returns the bounded `output_mime_type` label value for a conversion outcome.
133///
134/// When `conversion_ok` is `false`, no output was produced, so the label is
135/// always [`crate::metrics::OUTPUT_MIME_NONE`].
136///
137/// When `conversion_ok` is `true`, the output is always `BlockNote` JSON because
138/// the writer is fixed today.
139///
140/// # Label values
141///
142/// - [`crate::metrics::OUTPUT_MIME_NONE`] — conversion failed (no output)
143/// - [`crate::metrics::OUTPUT_MIME_BLOCKNOTE`] — conversion succeeded
144#[inline]
145#[must_use]
146pub fn bucket_output_mime(conversion_ok: bool) -> &'static str {
147    if conversion_ok {
148        crate::metrics::OUTPUT_MIME_BLOCKNOTE
149    } else {
150        crate::metrics::OUTPUT_MIME_NONE
151    }
152}
153
154#[cfg(test)]
155mod bucket_tests {
156    #![allow(
157        clippy::tests_outside_test_module,
158        clippy::unwrap_used,
159        clippy::expect_used
160    )]
161
162    use super::*;
163    use axum::http::HeaderValue;
164
165    // ─── bucket_input_mime tests ───────────────────────────────────────────
166
167    #[test]
168    fn bucket_input_mime_none_when_header_absent() {
169        assert_eq!(bucket_input_mime(None), crate::metrics::INPUT_MIME_NONE);
170    }
171
172    #[test]
173    fn bucket_input_mime_markdown_when_text_markdown() {
174        let val = HeaderValue::from_static("text/markdown");
175        assert_eq!(
176            bucket_input_mime(Some(&val)),
177            crate::metrics::INPUT_MIME_MARKDOWN
178        );
179    }
180
181    #[test]
182    fn bucket_input_mime_markdown_when_text_markdown_with_charset() {
183        let val = HeaderValue::from_static("text/markdown; charset=utf-8");
184        assert_eq!(
185            bucket_input_mime(Some(&val)),
186            crate::metrics::INPUT_MIME_MARKDOWN
187        );
188    }
189
190    #[test]
191    fn bucket_input_mime_markdown_case_insensitive() {
192        let val = HeaderValue::from_static("TEXT/MARKDOWN");
193        assert_eq!(
194            bucket_input_mime(Some(&val)),
195            crate::metrics::INPUT_MIME_MARKDOWN
196        );
197    }
198
199    #[test]
200    fn bucket_input_mime_unsupported_when_other_format() {
201        let val = HeaderValue::from_static("application/pdf");
202        assert_eq!(
203            bucket_input_mime(Some(&val)),
204            crate::metrics::INPUT_MIME_UNSUPPORTED
205        );
206    }
207
208    #[test]
209    fn bucket_input_mime_unsupported_when_malformed() {
210        let val = HeaderValue::from_static("not a mime type at all");
211        assert_eq!(
212            bucket_input_mime(Some(&val)),
213            crate::metrics::INPUT_MIME_UNSUPPORTED
214        );
215    }
216
217    #[test]
218    fn bucket_input_mime_unsupported_when_non_ascii() {
219        let val = HeaderValue::from_bytes(&[0xFF, 0xFE]).unwrap();
220        assert_eq!(
221            bucket_input_mime(Some(&val)),
222            crate::metrics::INPUT_MIME_UNSUPPORTED
223        );
224    }
225
226    // ─── bucket_output_mime tests ──────────────────────────────────────────
227
228    #[test]
229    fn bucket_output_mime_blocknote_when_success() {
230        assert_eq!(
231            bucket_output_mime(true),
232            crate::metrics::OUTPUT_MIME_BLOCKNOTE
233        );
234    }
235
236    #[test]
237    fn bucket_output_mime_none_when_failure() {
238        assert_eq!(bucket_output_mime(false), crate::metrics::OUTPUT_MIME_NONE);
239    }
240}