#[cfg(not(feature = "office"))]
use crate::KreuzbergError;
use crate::Result;
use crate::core::config::ExtractionConfig;
use crate::core::mime::{LEGACY_POWERPOINT_MIME_TYPE, LEGACY_WORD_MIME_TYPE};
use crate::types::ExtractionResult;
use super::file::extract_bytes_with_extractor;
#[cfg_attr(feature = "otel", tracing::instrument(
skip(config, content),
fields(
{ crate::telemetry::conventions::OPERATION } = crate::telemetry::conventions::operations::EXTRACT_BYTES,
{ crate::telemetry::conventions::DOCUMENT_MIME_TYPE } = mime_type,
{ crate::telemetry::conventions::DOCUMENT_SIZE_BYTES } = content.len(),
{ crate::telemetry::conventions::OTEL_STATUS_CODE } = tracing::field::Empty,
{ crate::telemetry::conventions::ERROR_TYPE } = tracing::field::Empty,
{ crate::telemetry::conventions::ERROR_MESSAGE } = tracing::field::Empty,
)
))]
pub async fn extract_bytes(content: &[u8], mime_type: &str, config: &ExtractionConfig) -> Result<ExtractionResult> {
use crate::core::mime;
let result = async {
let validated_mime = if mime_type == "application/octet-stream" {
mime::detect_mime_type_from_bytes(content)?
} else {
mime::validate_mime_type(mime_type)?
};
#[cfg(not(feature = "office"))]
match validated_mime.as_str() {
LEGACY_WORD_MIME_TYPE => {
return Err(KreuzbergError::UnsupportedFormat(
"Legacy Word extraction requires the `office` feature".to_string(),
));
}
LEGACY_POWERPOINT_MIME_TYPE => {
return Err(KreuzbergError::UnsupportedFormat(
"Legacy PowerPoint extraction requires the `office` feature".to_string(),
));
}
_ => {}
}
#[cfg(feature = "office")]
{
let _ = LEGACY_WORD_MIME_TYPE;
let _ = LEGACY_POWERPOINT_MIME_TYPE;
}
extract_bytes_with_extractor(content, &validated_mime, config).await
}
.await;
#[cfg(feature = "otel")]
if let Err(ref e) = result {
crate::telemetry::spans::record_error_on_current_span(e);
}
result
}