1use axum::{
4 body::{Body, Bytes},
5 http::{header, HeaderMap, HeaderValue, Response, StatusCode},
6 response::IntoResponse,
7};
8use docspec::{InputFormat, OutputFormat};
9use docspec_core::{EventSink as _, EventSource as _};
10
11use crate::{error::HttpError, mime_parser};
12
13enum Utf8Prevalidation {
14 Required,
15 SkippedBinary,
16 SkippedFutureFormat,
17}
18
19#[allow(clippy::unused_async)]
21#[inline]
23pub async fn options_conversion() -> impl IntoResponse {
24 (
25 StatusCode::NO_CONTENT,
26 [(header::ALLOW, HeaderValue::from_static("POST, OPTIONS"))],
27 )
28}
29
30#[inline]
66pub async fn post_conversion(
67 request_id: Option<axum::extract::Extension<tower_http::request_id::RequestId>>,
68 headers: HeaderMap,
69 body: Bytes,
70) -> Result<Response<Body>, HttpError> {
71 let input_mime_label = crate::mime_parser::bucket_input_mime(headers.get(header::CONTENT_TYPE));
72 let trace_id_owned: Option<String> = headers
73 .get(axum::http::HeaderName::from_static("x-trace-id"))
74 .and_then(|header_value| header_value.to_str().ok())
75 .map(str::to_owned);
76 let body_len_for_logging = body.len();
77
78 let conversion_start = std::time::Instant::now();
79 let outcome = do_conversion(input_mime_label, headers, body).await;
80 let conversion_duration = conversion_start.elapsed();
81 let conversion_duration_secs = conversion_duration.as_secs_f64();
82 let conversion_duration_ms =
83 u64::try_from(conversion_duration.as_millis().min(u128::from(u64::MAX)))
84 .unwrap_or(u64::MAX);
85
86 let (response_or_error, output_bytes, chosen_format) = match outcome {
87 Ok((response, bytes, format)) => (Ok(response), bytes, Some(format)),
88 Err(http_error) => (Err(http_error), 0, None),
89 };
90 let conversion_ok = response_or_error.is_ok();
91 let output_mime_label = crate::mime_parser::bucket_output_mime(chosen_format);
92
93 let (result_label, error_class_label) = match &response_or_error {
94 Ok(_) => (
95 crate::metrics::RESULT_SUCCESS,
96 crate::metrics::ERROR_CLASS_NONE,
97 ),
98 Err(http_error) => (http_error.result_class(), http_error.error_class()),
99 };
100
101 metrics::counter!(
102 crate::metrics::METRIC_CONVERSIONS_TOTAL,
103 crate::metrics::LABEL_RESULT => result_label,
104 crate::metrics::LABEL_ERROR_CLASS => error_class_label,
105 crate::metrics::LABEL_INPUT_MIME_TYPE => input_mime_label,
106 crate::metrics::LABEL_OUTPUT_MIME_TYPE => output_mime_label,
107 )
108 .increment(1);
109
110 metrics::histogram!(
111 crate::metrics::METRIC_CONVERSION_DURATION_SECONDS,
112 crate::metrics::LABEL_RESULT => result_label,
113 crate::metrics::LABEL_INPUT_MIME_TYPE => input_mime_label,
114 crate::metrics::LABEL_OUTPUT_MIME_TYPE => output_mime_label,
115 )
116 .record(conversion_duration_secs);
117
118 if conversion_ok {
119 #[allow(clippy::cast_precision_loss, clippy::as_conversions)]
121 let output_bytes_f64 = output_bytes as f64;
122 metrics::histogram!(
123 crate::metrics::METRIC_CONVERSION_OUTPUT_BYTES,
124 crate::metrics::LABEL_INPUT_MIME_TYPE => input_mime_label,
125 crate::metrics::LABEL_OUTPUT_MIME_TYPE => output_mime_label,
126 )
127 .record(output_bytes_f64);
128 }
129
130 let request_id_opt: Option<&str> = request_id
131 .as_ref()
132 .and_then(|axum::extract::Extension(req_id)| req_id.header_value().to_str().ok());
133 tracing::info!(
134 event = "conversion_completed",
135 result = result_label,
136 error_class = error_class_label,
137 input_mime_type = input_mime_label,
138 output_mime_type = output_mime_label,
139 input_bytes = body_len_for_logging,
140 output_bytes,
141 duration_ms = conversion_duration_ms,
142 request_id = request_id_opt,
143 trace_id = trace_id_owned.as_deref(),
144 );
145
146 response_or_error
147}
148
149async fn do_conversion(
154 input_mime_label: &'static str,
155 headers: HeaderMap,
156 body: Bytes,
157) -> Result<(Response<Body>, u64, OutputFormat), HttpError> {
158 let input_format = mime_parser::validate_content_type(headers.get(header::CONTENT_TYPE))?;
159 let output_format = mime_parser::negotiate_accept(headers.get(header::ACCEPT))?;
160
161 if body.is_empty() {
162 return Err(HttpError::EmptyBody);
163 }
164
165 #[allow(clippy::cast_precision_loss, clippy::as_conversions)]
171 let body_len_bytes = body.len() as f64;
172 metrics::histogram!(
173 crate::metrics::METRIC_HTTP_REQUEST_BODY_BYTES,
174 crate::metrics::LABEL_INPUT_MIME_TYPE => input_mime_label,
175 )
176 .record(body_len_bytes);
177
178 let utf8_prevalidation = match input_format {
179 InputFormat::Markdown | InputFormat::Html => Utf8Prevalidation::Required,
180 InputFormat::Docx => {
181 Utf8Prevalidation::SkippedBinary
183 }
184 _ => {
185 Utf8Prevalidation::SkippedFutureFormat
189 }
190 };
191
192 if matches!(utf8_prevalidation, Utf8Prevalidation::Required) {
193 core::str::from_utf8(&body).map_err(|_error| {
194 tracing::debug!("request body is not valid UTF-8");
195 HttpError::BodyNotUtf8
196 })?;
197 }
198
199 let join_result = tokio::task::spawn_blocking(move || -> Result<(Vec<u8>, u64), HttpError> {
200 let mut output_buffer = Vec::new();
201 let mut reader = docspec::AnyReader::from_reader(input_format, std::io::Cursor::new(body))
202 .map_err(|error| {
203 tracing::debug!(error = %error, "reader construction failed");
204 HttpError::Unprocessable {
205 detail: error.to_string(),
206 }
207 })?;
208 let mut sink = docspec::AnyWriter::new(output_format, &mut output_buffer);
209
210 loop {
211 match reader.next_event() {
212 Ok(Some(event)) => sink.handle_event(event).map_err(|error| {
213 tracing::debug!(error = %error, "conversion sink failed");
214 HttpError::Unprocessable {
215 detail: error.to_string(),
216 }
217 })?,
218 Ok(None) => break,
219 Err(error) => {
220 tracing::debug!(error = %error, "reader failed");
221 return Err(HttpError::Unprocessable {
222 detail: error.to_string(),
223 });
224 }
225 }
226 }
227
228 sink.finish().map_err(|error| {
229 tracing::debug!(error = %error, "conversion sink finish failed");
230 HttpError::Internal
231 })?;
232
233 let output_bytes =
236 u64::try_from(output_buffer.len()).map_err(|_conversion_error| HttpError::Internal)?;
237 Ok((output_buffer, output_bytes))
238 })
239 .await;
240
241 let content_type = match output_format {
242 OutputFormat::Blocknote => {
243 HeaderValue::from_static("application/vnd.docspec.blocknote+json; charset=utf-8")
244 }
245 OutputFormat::Html => HeaderValue::from_static("text/html; charset=utf-8"),
246 OutputFormat::Oxa => HeaderValue::from_static("application/vnd.oxa+json; charset=utf-8"),
247 OutputFormat::PandocNative => {
248 HeaderValue::from_static("application/vnd.pandoc.native; charset=utf-8")
249 }
250 _ => HeaderValue::from_static("application/vnd.docspec.blocknote+json; charset=utf-8"),
251 };
252
253 match join_result {
254 Ok(Ok((output, output_bytes))) => Response::builder()
255 .status(StatusCode::OK)
256 .header(header::CONTENT_TYPE, content_type)
257 .body(Body::from(output))
258 .map(|response| (response, output_bytes, output_format))
259 .map_err(|error| {
260 tracing::error!(error = %error, "failed to build conversion response");
261 HttpError::Internal
262 }),
263 Ok(Err(http_error)) => Err(http_error),
264 Err(join_error) => {
265 tracing::error!(error = %join_error, "spawn_blocking join failed");
266 Err(HttpError::Internal)
267 }
268 }
269}