Skip to main content

docspec_http/handlers/
conversion.rs

1//! Document conversion request handlers.
2
3use axum::{
4    body::{Body, Bytes},
5    http::{header, HeaderMap, HeaderValue, Response, StatusCode},
6    response::IntoResponse,
7};
8use docspec::OutputFormat;
9use docspec_core::{EventSink as _, EventSource as _};
10
11use crate::{error::HttpError, mime_parser};
12
13/// Handle `OPTIONS /conversion` — returns allowed methods.
14#[allow(clippy::unused_async)]
15// Reason: Axum handlers are async for route consistency even when no await is needed.
16#[inline]
17pub async fn options_conversion() -> impl IntoResponse {
18    (
19        StatusCode::NO_CONTENT,
20        [(header::ALLOW, HeaderValue::from_static("POST, OPTIONS"))],
21    )
22}
23
24/// Handle `POST /conversion` — convert markdown or HTML to `BlockNote` or `oxa.dev` JSON.
25///
26/// The input reader is selected by the request's `Content-Type` header (see
27/// [`crate::mime_parser::validate_content_type`]). The output writer is
28/// selected by the request's `Accept` header (see
29/// [`crate::mime_parser::negotiate_accept`]).
30///
31/// The request body is buffered, then converted to completion inside
32/// `spawn_blocking`, then returned in a single response. Conversion errors
33/// surface as 422 (parse / sink errors) or 500 (finalize errors) **before**
34/// any response body is sent — no truncated `200 OK` on failure.
35///
36/// `request_id` is accepted as `Option<Extension<RequestId>>` so the handler
37/// remains usable for downstream consumers that mount it standalone without
38/// the [`tower_http::request_id::SetRequestIdLayer`]. When the extension is
39/// absent, the `request_id` field is **omitted** from the structured
40/// `conversion_completed` event rather than logged as an empty string —
41/// "no correlation id supplied" is a distinct state from "supplied empty".
42/// The same treatment applies to `trace_id`, which is only set when the
43/// upstream `X-Trace-ID` header is present.
44///
45/// Conversion outcome metrics are recorded with intentionally different scopes:
46///
47/// - `docspec_conversions_total` and `docspec_conversion_duration_seconds` are
48///   recorded for **every** request to this endpoint — including early
49///   validation failures — so that all outcomes are visible in dashboards.
50/// - `docspec_http_request_body_bytes` is recorded only after `Content-Type`
51///   and `Accept` validation pass and the body is confirmed non-empty.
52/// - `docspec_conversion_output_bytes` is recorded only on successful
53///   conversions (failed conversions produce no output).
54///
55/// # Errors
56///
57/// Returns [`HttpError`] when request headers or body are invalid, the
58/// conversion fails, or the response cannot be constructed.
59#[inline]
60pub async fn post_conversion(
61    request_id: Option<axum::extract::Extension<tower_http::request_id::RequestId>>,
62    headers: HeaderMap,
63    body: Bytes,
64) -> Result<Response<Body>, HttpError> {
65    let input_mime_label = crate::mime_parser::bucket_input_mime(headers.get(header::CONTENT_TYPE));
66    let trace_id_owned: Option<String> = headers
67        .get(axum::http::HeaderName::from_static("x-trace-id"))
68        .and_then(|header_value| header_value.to_str().ok())
69        .map(str::to_owned);
70    let body_len_for_logging = body.len();
71
72    let conversion_start = std::time::Instant::now();
73    let outcome = do_conversion(input_mime_label, headers, body).await;
74    let conversion_duration = conversion_start.elapsed();
75    let conversion_duration_secs = conversion_duration.as_secs_f64();
76    let conversion_duration_ms =
77        u64::try_from(conversion_duration.as_millis().min(u128::from(u64::MAX)))
78            .unwrap_or(u64::MAX);
79
80    let (response_or_error, output_bytes, chosen_format) = match outcome {
81        Ok((response, bytes, format)) => (Ok(response), bytes, Some(format)),
82        Err(http_error) => (Err(http_error), 0, None),
83    };
84    let conversion_ok = response_or_error.is_ok();
85    let output_mime_label = crate::mime_parser::bucket_output_mime(chosen_format);
86
87    let (result_label, error_class_label) = match &response_or_error {
88        Ok(_) => (
89            crate::metrics::RESULT_SUCCESS,
90            crate::metrics::ERROR_CLASS_NONE,
91        ),
92        Err(http_error) => (http_error.result_class(), http_error.error_class()),
93    };
94
95    metrics::counter!(
96        crate::metrics::METRIC_CONVERSIONS_TOTAL,
97        crate::metrics::LABEL_RESULT => result_label,
98        crate::metrics::LABEL_ERROR_CLASS => error_class_label,
99        crate::metrics::LABEL_INPUT_MIME_TYPE => input_mime_label,
100        crate::metrics::LABEL_OUTPUT_MIME_TYPE => output_mime_label,
101    )
102    .increment(1);
103
104    metrics::histogram!(
105        crate::metrics::METRIC_CONVERSION_DURATION_SECONDS,
106        crate::metrics::LABEL_RESULT => result_label,
107        crate::metrics::LABEL_INPUT_MIME_TYPE => input_mime_label,
108        crate::metrics::LABEL_OUTPUT_MIME_TYPE => output_mime_label,
109    )
110    .record(conversion_duration_secs);
111
112    if conversion_ok {
113        // Reason: u64 → f64 is lossy at extreme values but bounded by realistic output sizes.
114        #[allow(clippy::cast_precision_loss, clippy::as_conversions)]
115        let output_bytes_f64 = output_bytes as f64;
116        metrics::histogram!(
117            crate::metrics::METRIC_CONVERSION_OUTPUT_BYTES,
118            crate::metrics::LABEL_INPUT_MIME_TYPE => input_mime_label,
119            crate::metrics::LABEL_OUTPUT_MIME_TYPE => output_mime_label,
120        )
121        .record(output_bytes_f64);
122    }
123
124    let request_id_opt: Option<&str> = request_id
125        .as_ref()
126        .and_then(|axum::extract::Extension(req_id)| req_id.header_value().to_str().ok());
127    tracing::info!(
128        event = "conversion_completed",
129        result = result_label,
130        error_class = error_class_label,
131        input_mime_type = input_mime_label,
132        output_mime_type = output_mime_label,
133        input_bytes = body_len_for_logging,
134        output_bytes,
135        duration_ms = conversion_duration_ms,
136        request_id = request_id_opt,
137        trace_id = trace_id_owned.as_deref(),
138    );
139
140    response_or_error
141}
142
143/// Perform the actual validation and conversion without recording outcome metrics.
144///
145/// Body size is recorded here because it is only known after header validation
146/// succeeds and the body is confirmed non-empty.
147async fn do_conversion(
148    input_mime_label: &'static str,
149    headers: HeaderMap,
150    body: Bytes,
151) -> Result<(Response<Body>, u64, OutputFormat), HttpError> {
152    let input_format = mime_parser::validate_content_type(headers.get(header::CONTENT_TYPE))?;
153    let output_format = mime_parser::negotiate_accept(headers.get(header::ACCEPT))?;
154
155    if body.is_empty() {
156        return Err(HttpError::EmptyBody);
157    }
158
159    // Reason: Body sizes are bounded by request memory and never approach the 2^53
160    // f64 precision limit, so the cast is exact in practice. The Prometheus histogram
161    // API requires f64; usize has no native lossless f64 conversion. Workspace
162    // clippy bans both lints below as a general policy; this is the single
163    // documented false-positive exception for bounded numeric metric recording.
164    #[allow(clippy::cast_precision_loss, clippy::as_conversions)]
165    let body_len_bytes = body.len() as f64;
166    metrics::histogram!(
167        crate::metrics::METRIC_HTTP_REQUEST_BODY_BYTES,
168        crate::metrics::LABEL_INPUT_MIME_TYPE => input_mime_label,
169    )
170    .record(body_len_bytes);
171
172    let input_text = String::from_utf8(body.into()).map_err(|error| {
173        tracing::debug!(error = %error, "request body is not valid UTF-8");
174        HttpError::BodyNotUtf8
175    })?;
176
177    let join_result = tokio::task::spawn_blocking(move || -> Result<(Vec<u8>, u64), HttpError> {
178        let mut output_buffer = Vec::new();
179        let mut reader = docspec::AnyReader::new(input_format, &input_text);
180        let mut sink = docspec::AnyWriter::new(output_format, &mut output_buffer);
181
182        loop {
183            match reader.next_event() {
184                Ok(Some(event)) => sink.handle_event(event).map_err(|error| {
185                    tracing::debug!(error = %error, "conversion sink failed");
186                    HttpError::Unprocessable {
187                        detail: error.to_string(),
188                    }
189                })?,
190                Ok(None) => break,
191                Err(error) => {
192                    tracing::debug!(error = %error, "reader failed");
193                    return Err(HttpError::Unprocessable {
194                        detail: error.to_string(),
195                    });
196                }
197            }
198        }
199
200        sink.finish().map_err(|error| {
201            tracing::debug!(error = %error, "conversion sink finish failed");
202            HttpError::Internal
203        })?;
204
205        // Capture byte count before output_buffer is consumed by Body::from.
206        // u64::try_from is lossless on 64-bit targets (usize ≤ u64::MAX).
207        let output_bytes =
208            u64::try_from(output_buffer.len()).map_err(|_conversion_error| HttpError::Internal)?;
209        Ok((output_buffer, output_bytes))
210    })
211    .await;
212
213    let content_type = match output_format {
214        OutputFormat::Blocknote => {
215            HeaderValue::from_static("application/vnd.docspec.blocknote+json; charset=utf-8")
216        }
217        OutputFormat::Oxa => HeaderValue::from_static("application/vnd.oxa+json; charset=utf-8"),
218    };
219
220    match join_result {
221        Ok(Ok((output, output_bytes))) => Response::builder()
222            .status(StatusCode::OK)
223            .header(header::CONTENT_TYPE, content_type)
224            .body(Body::from(output))
225            .map(|response| (response, output_bytes, output_format))
226            .map_err(|error| {
227                tracing::error!(error = %error, "failed to build conversion response");
228                HttpError::Internal
229            }),
230        Ok(Err(http_error)) => Err(http_error),
231        Err(join_error) => {
232            tracing::error!(error = %join_error, "spawn_blocking join failed");
233            Err(HttpError::Internal)
234        }
235    }
236}