Skip to main content

docspec_http/handlers/
conversion.rs

1//! Document conversion request handlers.
2
3use axum::{
4    body::{Body, Bytes},
5    http::{header, HeaderMap, HeaderValue, Response, StatusCode},
6    response::IntoResponse,
7};
8use docspec::OutputFormat;
9use docspec_core::{EventSink as _, EventSource as _};
10
11use crate::{error::HttpError, mime_parser};
12
13/// Handle `OPTIONS /conversion` — returns allowed methods.
14#[allow(clippy::unused_async)]
15// Reason: Axum handlers are async for route consistency even when no await is needed.
16#[inline]
17pub async fn options_conversion() -> impl IntoResponse {
18    (
19        StatusCode::NO_CONTENT,
20        [(header::ALLOW, HeaderValue::from_static("POST, OPTIONS"))],
21    )
22}
23
24/// Handle `POST /conversion` — convert markdown to `BlockNote` or `oxa.dev` JSON.
25///
26/// The output writer is selected by the request's `Accept` header (see
27/// [`crate::mime_parser::negotiate_accept`]).
28///
29/// The request body is buffered, then converted to completion inside
30/// `spawn_blocking`, then returned in a single response. Conversion errors
31/// surface as 422 (parse / sink errors) or 500 (finalize errors) **before**
32/// any response body is sent — no truncated `200 OK` on failure.
33///
34/// `request_id` is accepted as `Option<Extension<RequestId>>` so the handler
35/// remains usable for downstream consumers that mount it standalone without
36/// the [`tower_http::request_id::SetRequestIdLayer`]. When the extension is
37/// absent, the `request_id` field is **omitted** from the structured
38/// `conversion_completed` event rather than logged as an empty string —
39/// "no correlation id supplied" is a distinct state from "supplied empty".
40/// The same treatment applies to `trace_id`, which is only set when the
41/// upstream `X-Trace-ID` header is present.
42///
43/// Conversion outcome metrics are recorded with intentionally different scopes:
44///
45/// - `docspec_conversions_total` and `docspec_conversion_duration_seconds` are
46///   recorded for **every** request to this endpoint — including early
47///   validation failures — so that all outcomes are visible in dashboards.
48/// - `docspec_http_request_body_bytes` is recorded only after `Content-Type`
49///   and `Accept` validation pass and the body is confirmed non-empty.
50/// - `docspec_conversion_output_bytes` is recorded only on successful
51///   conversions (failed conversions produce no output).
52///
53/// # Errors
54///
55/// Returns [`HttpError`] when request headers or body are invalid, the
56/// conversion fails, or the response cannot be constructed.
57#[inline]
58pub async fn post_conversion(
59    request_id: Option<axum::extract::Extension<tower_http::request_id::RequestId>>,
60    headers: HeaderMap,
61    body: Bytes,
62) -> Result<Response<Body>, HttpError> {
63    let input_mime_label = crate::mime_parser::bucket_input_mime(headers.get(header::CONTENT_TYPE));
64    let trace_id_owned: Option<String> = headers
65        .get(axum::http::HeaderName::from_static("x-trace-id"))
66        .and_then(|header_value| header_value.to_str().ok())
67        .map(str::to_owned);
68    let body_len_for_logging = body.len();
69
70    let conversion_start = std::time::Instant::now();
71    let outcome = do_conversion(input_mime_label, headers, body).await;
72    let conversion_duration = conversion_start.elapsed();
73    let conversion_duration_secs = conversion_duration.as_secs_f64();
74    let conversion_duration_ms =
75        u64::try_from(conversion_duration.as_millis().min(u128::from(u64::MAX)))
76            .unwrap_or(u64::MAX);
77
78    let (response_or_error, output_bytes, chosen_format) = match outcome {
79        Ok((response, bytes, format)) => (Ok(response), bytes, Some(format)),
80        Err(http_error) => (Err(http_error), 0, None),
81    };
82    let conversion_ok = response_or_error.is_ok();
83    let output_mime_label = crate::mime_parser::bucket_output_mime(chosen_format);
84
85    let (result_label, error_class_label) = match &response_or_error {
86        Ok(_) => (
87            crate::metrics::RESULT_SUCCESS,
88            crate::metrics::ERROR_CLASS_NONE,
89        ),
90        Err(http_error) => (http_error.result_class(), http_error.error_class()),
91    };
92
93    metrics::counter!(
94        crate::metrics::METRIC_CONVERSIONS_TOTAL,
95        crate::metrics::LABEL_RESULT => result_label,
96        crate::metrics::LABEL_ERROR_CLASS => error_class_label,
97        crate::metrics::LABEL_INPUT_MIME_TYPE => input_mime_label,
98        crate::metrics::LABEL_OUTPUT_MIME_TYPE => output_mime_label,
99    )
100    .increment(1);
101
102    metrics::histogram!(
103        crate::metrics::METRIC_CONVERSION_DURATION_SECONDS,
104        crate::metrics::LABEL_RESULT => result_label,
105        crate::metrics::LABEL_INPUT_MIME_TYPE => input_mime_label,
106        crate::metrics::LABEL_OUTPUT_MIME_TYPE => output_mime_label,
107    )
108    .record(conversion_duration_secs);
109
110    if conversion_ok {
111        // Reason: u64 → f64 is lossy at extreme values but bounded by realistic output sizes.
112        #[allow(clippy::cast_precision_loss, clippy::as_conversions)]
113        let output_bytes_f64 = output_bytes as f64;
114        metrics::histogram!(
115            crate::metrics::METRIC_CONVERSION_OUTPUT_BYTES,
116            crate::metrics::LABEL_INPUT_MIME_TYPE => input_mime_label,
117            crate::metrics::LABEL_OUTPUT_MIME_TYPE => output_mime_label,
118        )
119        .record(output_bytes_f64);
120    }
121
122    let request_id_opt: Option<&str> = request_id
123        .as_ref()
124        .and_then(|axum::extract::Extension(req_id)| req_id.header_value().to_str().ok());
125    tracing::info!(
126        event = "conversion_completed",
127        result = result_label,
128        error_class = error_class_label,
129        input_mime_type = input_mime_label,
130        output_mime_type = output_mime_label,
131        input_bytes = body_len_for_logging,
132        output_bytes,
133        duration_ms = conversion_duration_ms,
134        request_id = request_id_opt,
135        trace_id = trace_id_owned.as_deref(),
136    );
137
138    response_or_error
139}
140
141/// Perform the actual validation and conversion without recording outcome metrics.
142///
143/// Body size is recorded here because it is only known after header validation
144/// succeeds and the body is confirmed non-empty.
145async fn do_conversion(
146    input_mime_label: &'static str,
147    headers: HeaderMap,
148    body: Bytes,
149) -> Result<(Response<Body>, u64, OutputFormat), HttpError> {
150    mime_parser::validate_content_type(headers.get(header::CONTENT_TYPE))?;
151    let output_format = mime_parser::negotiate_accept(headers.get(header::ACCEPT))?;
152
153    if body.is_empty() {
154        return Err(HttpError::EmptyBody);
155    }
156
157    // Reason: Body sizes are bounded by request memory and never approach the 2^53
158    // f64 precision limit, so the cast is exact in practice. The Prometheus histogram
159    // API requires f64; usize has no native lossless f64 conversion. Workspace
160    // clippy bans both lints below as a general policy; this is the single
161    // documented false-positive exception for bounded numeric metric recording.
162    #[allow(clippy::cast_precision_loss, clippy::as_conversions)]
163    let body_len_bytes = body.len() as f64;
164    metrics::histogram!(
165        crate::metrics::METRIC_HTTP_REQUEST_BODY_BYTES,
166        crate::metrics::LABEL_INPUT_MIME_TYPE => input_mime_label,
167    )
168    .record(body_len_bytes);
169
170    let markdown = String::from_utf8(body.into()).map_err(|error| {
171        tracing::debug!(error = %error, "request body is not valid UTF-8");
172        HttpError::BodyNotUtf8
173    })?;
174
175    let join_result = tokio::task::spawn_blocking(move || -> Result<(Vec<u8>, u64), HttpError> {
176        let mut output_buffer = Vec::new();
177        let mut reader = docspec::AnyReader::new(docspec::InputFormat::Markdown, &markdown);
178        let mut sink = docspec::AnyWriter::new(output_format, &mut output_buffer);
179
180        loop {
181            match reader.next_event() {
182                Ok(Some(event)) => sink.handle_event(event).map_err(|error| {
183                    tracing::debug!(error = %error, "conversion sink failed");
184                    HttpError::Unprocessable {
185                        detail: error.to_string(),
186                    }
187                })?,
188                Ok(None) => break,
189                Err(error) => {
190                    tracing::debug!(error = %error, "markdown reader failed");
191                    return Err(HttpError::Unprocessable {
192                        detail: error.to_string(),
193                    });
194                }
195            }
196        }
197
198        sink.finish().map_err(|error| {
199            tracing::debug!(error = %error, "conversion sink finish failed");
200            HttpError::Internal
201        })?;
202
203        // Capture byte count before output_buffer is consumed by Body::from.
204        // u64::try_from is lossless on 64-bit targets (usize ≤ u64::MAX).
205        let output_bytes =
206            u64::try_from(output_buffer.len()).map_err(|_conversion_error| HttpError::Internal)?;
207        Ok((output_buffer, output_bytes))
208    })
209    .await;
210
211    let content_type = match output_format {
212        OutputFormat::Blocknote => {
213            HeaderValue::from_static("application/vnd.docspec.blocknote+json; charset=utf-8")
214        }
215        OutputFormat::Oxa => HeaderValue::from_static("application/vnd.oxa+json; charset=utf-8"),
216    };
217
218    match join_result {
219        Ok(Ok((output, output_bytes))) => Response::builder()
220            .status(StatusCode::OK)
221            .header(header::CONTENT_TYPE, content_type)
222            .body(Body::from(output))
223            .map(|response| (response, output_bytes, output_format))
224            .map_err(|error| {
225                tracing::error!(error = %error, "failed to build conversion response");
226                HttpError::Internal
227            }),
228        Ok(Err(http_error)) => Err(http_error),
229        Err(join_error) => {
230            tracing::error!(error = %join_error, "spawn_blocking join failed");
231            Err(HttpError::Internal)
232        }
233    }
234}