use axum::{Json, body::Bytes, extract::State, http::HeaderMap};
use tower::Service;
use crate::service::ExtractionRequest;
use super::{
error::{ApiError, MultipartApi},
types::{ApiState, DoclingCompatDocument, DoclingCompatResponse, OpenWebDocumentMetadata, OpenWebDocumentResponse},
};
#[utoipa::path(
put,
path = "/process",
tag = "openweb",
request_body(content_type = "application/octet-stream", content = Vec<u8>),
responses(
(status = 200, description = "Document extracted", body = OpenWebDocumentResponse),
(status = 400, description = "Bad request", body = crate::api::types::ErrorResponse),
(status = 500, description = "Internal server error", body = crate::api::types::ErrorResponse),
)
)]
#[cfg_attr(
feature = "otel",
tracing::instrument(name = "api.openweb_process", skip(state, headers, body))
)]
pub async fn openweb_external_handler(
State(state): State<ApiState>,
headers: HeaderMap,
body: Bytes,
) -> Result<Json<OpenWebDocumentResponse>, ApiError> {
if body.is_empty() {
return Err(ApiError::validation(crate::error::KreuzbergError::validation(
"Empty request body — upload a file as the raw request body",
)));
}
let mime_type = headers
.get(axum::http::header::CONTENT_TYPE)
.and_then(|v| v.to_str().ok())
.map(|v| v.split(';').next().unwrap_or(v).trim())
.unwrap_or("application/octet-stream")
.to_string();
let filename = headers
.get("X-Filename")
.and_then(|v| v.to_str().ok())
.map(|v| urlencoding::decode(v).unwrap_or_else(|_| v.into()).into_owned())
.unwrap_or_else(|| "unknown".to_string());
let mime_type = if mime_type == "application/octet-stream" {
crate::core::mime::detect_mime_type(&filename, false).unwrap_or(mime_type)
} else {
mime_type
};
let mut config = (*state.default_config).clone();
config.output_format = crate::core::config::OutputFormat::Markdown;
let request = ExtractionRequest::bytes(body.to_vec(), mime_type, config);
let mut svc = state
.extraction_service
.lock()
.expect("extraction service lock poisoned")
.clone();
let result = svc.call(request).await?;
Ok(Json(OpenWebDocumentResponse {
page_content: result.content,
metadata: OpenWebDocumentMetadata { source: filename },
}))
}
#[utoipa::path(
post,
path = "/v1/convert/file",
tag = "openweb",
request_body(content_type = "multipart/form-data"),
responses(
(status = 200, description = "Document converted", body = DoclingCompatResponse),
(status = 400, description = "Bad request", body = crate::api::types::ErrorResponse),
(status = 500, description = "Internal server error", body = crate::api::types::ErrorResponse),
)
)]
#[cfg_attr(
feature = "otel",
tracing::instrument(name = "api.openweb_docling", skip(state, multipart))
)]
pub async fn openweb_docling_handler(
State(state): State<ApiState>,
MultipartApi(mut multipart): MultipartApi,
) -> Result<Json<DoclingCompatResponse>, ApiError> {
let mut file_data: Option<(Vec<u8>, String)> = None;
while let Some(field) = multipart
.next_field()
.await
.map_err(|e| ApiError::validation(crate::error::KreuzbergError::validation(e.to_string())))?
{
let field_name = field.name().unwrap_or("").to_string();
if field_name == "files" || field_name == "file" {
let file_name = field.file_name().map(|s| s.to_string());
let content_type = field.content_type().map(|s| s.to_string());
let data = field
.bytes()
.await
.map_err(|e| ApiError::validation(crate::error::KreuzbergError::validation(e.to_string())))?;
let mut mime_type = content_type.unwrap_or_else(|| "application/octet-stream".to_string());
if mime_type == "application/octet-stream"
&& let Some(ref name) = file_name
&& let Ok(detected) = crate::core::mime::detect_mime_type(name, false)
{
mime_type = detected;
}
file_data = Some((data.to_vec(), mime_type));
break;
}
}
let (data, mime_type) = file_data.ok_or_else(|| {
ApiError::validation(crate::error::KreuzbergError::validation(
"No file provided. Upload a file with field name 'files'.",
))
})?;
let mut config = (*state.default_config).clone();
config.output_format = crate::core::config::OutputFormat::Markdown;
let request = ExtractionRequest::bytes(data, mime_type, config);
let mut svc = state
.extraction_service
.lock()
.expect("extraction service lock poisoned")
.clone();
let result = svc.call(request).await?;
Ok(Json(DoclingCompatResponse {
document: DoclingCompatDocument {
md_content: result.content,
},
status: "success".to_string(),
}))
}