pub const OPERATION: &str = "kreuzberg.operation";
pub const DOCUMENT_MIME_TYPE: &str = "kreuzberg.document.mime_type";
pub const DOCUMENT_SIZE_BYTES: &str = "kreuzberg.document.size_bytes";
pub const DOCUMENT_FILENAME: &str = "kreuzberg.document.filename";
pub const EXTRACTOR_NAME: &str = "kreuzberg.extractor.name";
pub const EXTRACTOR_PRIORITY: &str = "kreuzberg.extractor.priority";
pub const PIPELINE_STAGE: &str = "kreuzberg.pipeline.stage";
pub const PIPELINE_PROCESSOR_NAME: &str = "kreuzberg.pipeline.processor_name";
pub const CACHE_HIT: &str = "kreuzberg.cache.hit";
pub const CACHE_KEY: &str = "kreuzberg.cache.key";
pub const BATCH_SIZE: &str = "kreuzberg.batch.size";
pub const BATCH_INDEX: &str = "kreuzberg.batch.index";
pub const OCR_BACKEND: &str = "kreuzberg.ocr.backend";
pub const OCR_LANGUAGE: &str = "kreuzberg.ocr.language";
pub const MODEL_NAME: &str = "kreuzberg.model.name";
pub const MODEL_INFERENCE_MS: &str = "kreuzberg.model.inference_ms";
pub const ERROR_TYPE: &str = "kreuzberg.error.type";
pub fn sanitize_filename(path: &std::path::Path) -> &str {
path.file_name().and_then(|n| n.to_str()).unwrap_or("unknown")
}
pub const OTEL_STATUS_CODE: &str = "otel.status_code";
pub const ERROR_MESSAGE: &str = "error.message";
pub mod operations {
pub const EXTRACT_FILE: &str = "extract_file";
pub const EXTRACT_BYTES: &str = "extract_bytes";
pub const BATCH_EXTRACT: &str = "batch_extract";
pub const PIPELINE: &str = "pipeline";
pub const CACHE_LOOKUP: &str = "cache_lookup";
pub const CACHE_WRITE: &str = "cache_write";
}
pub mod stages {
pub const EXTRACTION: &str = "extraction";
pub const POST_PROCESSING_EARLY: &str = "post_processing.early";
pub const POST_PROCESSING_MIDDLE: &str = "post_processing.middle";
pub const POST_PROCESSING_LATE: &str = "post_processing.late";
pub const VALIDATION: &str = "validation";
pub const CHUNKING: &str = "chunking";
pub const LANGUAGE_DETECTION: &str = "language_detection";
pub const TOKEN_REDUCTION: &str = "token_reduction";
}
pub mod metrics {
pub const EXTRACTION_TOTAL: &str = "kreuzberg.extraction.total";
pub const CACHE_HITS: &str = "kreuzberg.extraction.cache.hits";
pub const CACHE_MISSES: &str = "kreuzberg.extraction.cache.misses";
pub const BATCH_TOTAL: &str = "kreuzberg.batch.total";
pub const EXTRACTION_DURATION_MS: &str = "kreuzberg.extraction.duration_ms";
pub const EXTRACTION_INPUT_BYTES: &str = "kreuzberg.extraction.input_size_bytes";
pub const EXTRACTION_OUTPUT_BYTES: &str = "kreuzberg.extraction.output_size_bytes";
pub const PIPELINE_DURATION_MS: &str = "kreuzberg.pipeline.duration_ms";
pub const OCR_DURATION_MS: &str = "kreuzberg.ocr.duration_ms";
pub const BATCH_DURATION_MS: &str = "kreuzberg.batch.duration_ms";
pub const CONCURRENT_EXTRACTIONS: &str = "kreuzberg.extraction.concurrent";
}
#[cfg(test)]
mod tests {
use super::*;
use std::path::Path;
#[test]
fn sanitize_filename_normal_path() {
let path = Path::new("/home/user/doc.pdf");
assert_eq!(sanitize_filename(path), "doc.pdf");
}
#[test]
fn sanitize_filename_root_file() {
let path = Path::new("doc.pdf");
assert_eq!(sanitize_filename(path), "doc.pdf");
}
#[test]
fn sanitize_filename_empty_path_returns_unknown() {
let path = Path::new("");
assert_eq!(sanitize_filename(path), "unknown");
}
#[cfg(unix)]
#[test]
fn sanitize_filename_non_utf8_path() {
use std::ffi::OsStr;
use std::os::unix::ffi::OsStrExt;
let bad = OsStr::from_bytes(&[0xFF, 0xFE]);
let path = Path::new(bad);
assert_eq!(sanitize_filename(path), "unknown");
}
}