Skip to main content

docspec_http/handlers/
conversion.rs

1//! Document conversion request handlers.
2
3use axum::{
4    body::{Body, Bytes},
5    http::{header, HeaderMap, HeaderValue, Response, StatusCode},
6    response::IntoResponse,
7};
8use docspec::{InputFormat, OutputFormat};
9use docspec_core::{EventSink as _, EventSource as _};
10
11use crate::{error::HttpError, mime_parser};
12
13enum Utf8Prevalidation {
14    Required,
15    SkippedBinary,
16    SkippedFutureFormat,
17}
18
19/// Handle `OPTIONS /conversion` — returns allowed methods.
20#[allow(clippy::unused_async)]
21// Reason: Axum handlers are async for route consistency even when no await is needed.
22#[inline]
23pub async fn options_conversion() -> impl IntoResponse {
24    (
25        StatusCode::NO_CONTENT,
26        [(header::ALLOW, HeaderValue::from_static("POST, OPTIONS"))],
27    )
28}
29
30/// Handle `POST /conversion` — convert markdown or HTML to `BlockNote` JSON, HTML, `oxa.dev` JSON, or Pandoc native.
31///
32/// The input reader is selected by the request's `Content-Type` header (see
33/// [`crate::mime_parser::validate_content_type`]). The output writer is
34/// selected by the request's `Accept` header (see
35/// [`crate::mime_parser::negotiate_accept`]).
36///
37/// The request body is buffered, then converted to completion inside
38/// `spawn_blocking`, then returned in a single response. Conversion errors
39/// surface as 422 (parse / sink errors) or 500 (finalize errors) **before**
40/// any response body is sent — no truncated `200 OK` on failure.
41///
42/// `request_id` is accepted as `Option<Extension<RequestId>>` so the handler
43/// remains usable for downstream consumers that mount it standalone without
44/// the [`tower_http::request_id::SetRequestIdLayer`]. When the extension is
45/// absent, the `request_id` field is **omitted** from the structured
46/// `conversion_completed` event rather than logged as an empty string —
47/// "no correlation id supplied" is a distinct state from "supplied empty".
48/// The same treatment applies to `trace_id`, which is only set when the
49/// upstream `X-Trace-ID` header is present.
50///
51/// Conversion outcome metrics are recorded with intentionally different scopes:
52///
53/// - `docspec_conversions_total` and `docspec_conversion_duration_seconds` are
54///   recorded for **every** request to this endpoint — including early
55///   validation failures — so that all outcomes are visible in dashboards.
56/// - `docspec_http_request_body_bytes` is recorded only after `Content-Type`
57///   and `Accept` validation pass and the body is confirmed non-empty.
58/// - `docspec_conversion_output_bytes` is recorded only on successful
59///   conversions (failed conversions produce no output).
60///
61/// # Errors
62///
63/// Returns [`HttpError`] when request headers or body are invalid, the
64/// conversion fails, or the response cannot be constructed.
65#[inline]
66pub async fn post_conversion(
67    request_id: Option<axum::extract::Extension<tower_http::request_id::RequestId>>,
68    headers: HeaderMap,
69    body: Bytes,
70) -> Result<Response<Body>, HttpError> {
71    let input_mime_label = crate::mime_parser::bucket_input_mime(headers.get(header::CONTENT_TYPE));
72    let trace_id_owned: Option<String> = headers
73        .get(axum::http::HeaderName::from_static("x-trace-id"))
74        .and_then(|header_value| header_value.to_str().ok())
75        .map(str::to_owned);
76    let body_len_for_logging = body.len();
77
78    let conversion_start = std::time::Instant::now();
79    let outcome = do_conversion(input_mime_label, headers, body).await;
80    let conversion_duration = conversion_start.elapsed();
81    let conversion_duration_secs = conversion_duration.as_secs_f64();
82    let conversion_duration_ms =
83        u64::try_from(conversion_duration.as_millis().min(u128::from(u64::MAX)))
84            .unwrap_or(u64::MAX);
85
86    let (response_or_error, output_bytes, chosen_format) = match outcome {
87        Ok((response, bytes, format)) => (Ok(response), bytes, Some(format)),
88        Err(http_error) => (Err(http_error), 0, None),
89    };
90    let conversion_ok = response_or_error.is_ok();
91    let output_mime_label = crate::mime_parser::bucket_output_mime(chosen_format);
92
93    let (result_label, error_class_label) = match &response_or_error {
94        Ok(_) => (
95            crate::metrics::RESULT_SUCCESS,
96            crate::metrics::ERROR_CLASS_NONE,
97        ),
98        Err(http_error) => (http_error.result_class(), http_error.error_class()),
99    };
100
101    metrics::counter!(
102        crate::metrics::METRIC_CONVERSIONS_TOTAL,
103        crate::metrics::LABEL_RESULT => result_label,
104        crate::metrics::LABEL_ERROR_CLASS => error_class_label,
105        crate::metrics::LABEL_INPUT_MIME_TYPE => input_mime_label,
106        crate::metrics::LABEL_OUTPUT_MIME_TYPE => output_mime_label,
107    )
108    .increment(1);
109
110    metrics::histogram!(
111        crate::metrics::METRIC_CONVERSION_DURATION_SECONDS,
112        crate::metrics::LABEL_RESULT => result_label,
113        crate::metrics::LABEL_INPUT_MIME_TYPE => input_mime_label,
114        crate::metrics::LABEL_OUTPUT_MIME_TYPE => output_mime_label,
115    )
116    .record(conversion_duration_secs);
117
118    if conversion_ok {
119        // Reason: u64 → f64 is lossy at extreme values but bounded by realistic output sizes.
120        #[allow(clippy::cast_precision_loss, clippy::as_conversions)]
121        let output_bytes_f64 = output_bytes as f64;
122        metrics::histogram!(
123            crate::metrics::METRIC_CONVERSION_OUTPUT_BYTES,
124            crate::metrics::LABEL_INPUT_MIME_TYPE => input_mime_label,
125            crate::metrics::LABEL_OUTPUT_MIME_TYPE => output_mime_label,
126        )
127        .record(output_bytes_f64);
128    }
129
130    let request_id_opt: Option<&str> = request_id
131        .as_ref()
132        .and_then(|axum::extract::Extension(req_id)| req_id.header_value().to_str().ok());
133    tracing::info!(
134        event = "conversion_completed",
135        result = result_label,
136        error_class = error_class_label,
137        input_mime_type = input_mime_label,
138        output_mime_type = output_mime_label,
139        input_bytes = body_len_for_logging,
140        output_bytes,
141        duration_ms = conversion_duration_ms,
142        request_id = request_id_opt,
143        trace_id = trace_id_owned.as_deref(),
144    );
145
146    response_or_error
147}
148
149/// Perform the actual validation and conversion without recording outcome metrics.
150///
151/// Body size is recorded here because it is only known after header validation
152/// succeeds and the body is confirmed non-empty.
153async fn do_conversion(
154    input_mime_label: &'static str,
155    headers: HeaderMap,
156    body: Bytes,
157) -> Result<(Response<Body>, u64, OutputFormat), HttpError> {
158    let input_format = mime_parser::validate_content_type(headers.get(header::CONTENT_TYPE))?;
159    let output_format = mime_parser::negotiate_accept(headers.get(header::ACCEPT))?;
160
161    if body.is_empty() {
162        return Err(HttpError::EmptyBody);
163    }
164
165    // Reason: Body sizes are bounded by request memory and never approach the 2^53
166    // f64 precision limit, so the cast is exact in practice. The Prometheus histogram
167    // API requires f64; usize has no native lossless f64 conversion. Workspace
168    // clippy bans both lints below as a general policy; this is the single
169    // documented false-positive exception for bounded numeric metric recording.
170    #[allow(clippy::cast_precision_loss, clippy::as_conversions)]
171    let body_len_bytes = body.len() as f64;
172    metrics::histogram!(
173        crate::metrics::METRIC_HTTP_REQUEST_BODY_BYTES,
174        crate::metrics::LABEL_INPUT_MIME_TYPE => input_mime_label,
175    )
176    .record(body_len_bytes);
177
178    let utf8_prevalidation = match input_format {
179        InputFormat::Markdown | InputFormat::Html => Utf8Prevalidation::Required,
180        InputFormat::Docx => {
181            // Binary format; UTF-8 prevalidation does not apply.
182            Utf8Prevalidation::SkippedBinary
183        }
184        _ => {
185            // Future InputFormat variants. Conservative policy: skip UTF-8
186            // prevalidation and let the reader surface its own error.
187            // Update this arm deliberately when a new format is added.
188            Utf8Prevalidation::SkippedFutureFormat
189        }
190    };
191
192    if matches!(utf8_prevalidation, Utf8Prevalidation::Required) {
193        core::str::from_utf8(&body).map_err(|_error| {
194            tracing::debug!("request body is not valid UTF-8");
195            HttpError::BodyNotUtf8
196        })?;
197    }
198
199    let join_result = tokio::task::spawn_blocking(move || -> Result<(Vec<u8>, u64), HttpError> {
200        let mut output_buffer = Vec::new();
201        let mut reader = docspec::AnyReader::from_reader(input_format, std::io::Cursor::new(body))
202            .map_err(|error| {
203                tracing::debug!(error = %error, "reader construction failed");
204                HttpError::Unprocessable {
205                    detail: error.to_string(),
206                }
207            })?;
208        let mut sink = docspec::AnyWriter::new(output_format, &mut output_buffer);
209
210        loop {
211            match reader.next_event() {
212                Ok(Some(event)) => sink.handle_event(event).map_err(|error| {
213                    tracing::debug!(error = %error, "conversion sink failed");
214                    HttpError::Unprocessable {
215                        detail: error.to_string(),
216                    }
217                })?,
218                Ok(None) => break,
219                Err(error) => {
220                    tracing::debug!(error = %error, "reader failed");
221                    return Err(HttpError::Unprocessable {
222                        detail: error.to_string(),
223                    });
224                }
225            }
226        }
227
228        sink.finish().map_err(|error| {
229            tracing::debug!(error = %error, "conversion sink finish failed");
230            HttpError::Internal
231        })?;
232
233        // Capture byte count before output_buffer is consumed by Body::from.
234        // u64::try_from is lossless on 64-bit targets (usize ≤ u64::MAX).
235        let output_bytes =
236            u64::try_from(output_buffer.len()).map_err(|_conversion_error| HttpError::Internal)?;
237        Ok((output_buffer, output_bytes))
238    })
239    .await;
240
241    let content_type = match output_format {
242        OutputFormat::Blocknote => {
243            HeaderValue::from_static("application/vnd.docspec.blocknote+json; charset=utf-8")
244        }
245        OutputFormat::Html => HeaderValue::from_static("text/html; charset=utf-8"),
246        OutputFormat::Oxa => HeaderValue::from_static("application/vnd.oxa+json; charset=utf-8"),
247        OutputFormat::PandocNative => {
248            HeaderValue::from_static("application/vnd.pandoc.native; charset=utf-8")
249        }
250        _ => HeaderValue::from_static("application/vnd.docspec.blocknote+json; charset=utf-8"),
251    };
252
253    match join_result {
254        Ok(Ok((output, output_bytes))) => Response::builder()
255            .status(StatusCode::OK)
256            .header(header::CONTENT_TYPE, content_type)
257            .body(Body::from(output))
258            .map(|response| (response, output_bytes, output_format))
259            .map_err(|error| {
260                tracing::error!(error = %error, "failed to build conversion response");
261                HttpError::Internal
262            }),
263        Ok(Err(http_error)) => Err(http_error),
264        Err(join_error) => {
265            tracing::error!(error = %join_error, "spawn_blocking join failed");
266            Err(HttpError::Internal)
267        }
268    }
269}