use crate::Result;
use crate::plugins::registry::{
get_document_extractor_registry, get_ocr_backend_registry, get_post_processor_registry, get_validator_registry,
};
#[derive(Debug, Clone)]
pub struct PluginHealthStatus {
pub ocr_backends_count: usize,
pub ocr_backends: Vec<String>,
pub extractors_count: usize,
pub extractors: Vec<String>,
pub post_processors_count: usize,
pub post_processors: Vec<String>,
pub validators_count: usize,
pub validators: Vec<String>,
}
impl PluginHealthStatus {
pub fn check() -> Self {
let ocr_registry = get_ocr_backend_registry();
let ocr_backends = ocr_registry.read().list();
let extractor_registry = get_document_extractor_registry();
let extractors = extractor_registry.read().list();
let processor_registry = get_post_processor_registry();
let post_processors = processor_registry.read().list();
let validator_registry = get_validator_registry();
let validators = validator_registry.read().list();
let ocr_backends_count = ocr_backends.len();
let extractors_count = extractors.len();
let post_processors_count = post_processors.len();
let validators_count = validators.len();
PluginHealthStatus {
ocr_backends_count,
ocr_backends,
extractors_count,
extractors,
post_processors_count,
post_processors,
validators_count,
validators,
}
}
}
pub fn validate_plugins_at_startup() -> Result<PluginHealthStatus> {
let status = PluginHealthStatus::check();
if status.ocr_backends_count == 0 {
tracing::warn!(
"No OCR backends registered. OCR functionality will be unavailable. \
This is normal if OCR is not required. \
If OCR is needed, check that: \
1. The 'ocr' feature is enabled in Cargo.toml \
2. TESSDATA_PREFIX environment variable is set (e.g., /usr/share/tesseract-ocr/tessdata) \
3. Tessdata files exist and are readable (tessdata/*.traineddata) \
4. In containers, mount tessdata volume or install tesseract-ocr package. \
See https://docs.kreuzberg.dev/guides/docker/ for Kubernetes setup."
);
} else {
tracing::info!(
"OCR backends registered: [{}]. Ready for OCR processing.",
status.ocr_backends.join(", ")
);
}
if status.extractors_count == 0 {
tracing::warn!(
"No document extractors registered. \
Document extraction will fail. \
This usually indicates a configuration issue. \
Ensure extractors are properly registered during initialization."
);
} else {
tracing::info!("Document extractors registered: [{}]", status.extractors.join(", "));
}
if status.post_processors_count > 0 {
tracing::info!("Post-processors registered: [{}]", status.post_processors.join(", "));
}
if status.validators_count > 0 {
tracing::info!("Validators registered: [{}]", status.validators.join(", "));
}
check_environment_variables();
Ok(status)
}
fn check_environment_variables() {
match std::env::var("TESSDATA_PREFIX") {
Ok(path) => {
tracing::debug!("TESSDATA_PREFIX={}", path);
if let Ok(metadata) = std::fs::metadata(&path) {
if metadata.is_dir() {
tracing::debug!(
"TESSDATA_PREFIX directory exists and is readable. \
Tesseract should find trained data files."
);
} else {
tracing::warn!(
"TESSDATA_PREFIX={} exists but is not a directory. \
Tesseract may fail to initialize.",
path
);
}
} else {
tracing::warn!(
"TESSDATA_PREFIX={} does not exist or is not readable. \
Tesseract may fail to initialize. \
Check directory permissions in containerized environments.",
path
);
}
}
Err(_) => {
tracing::debug!("TESSDATA_PREFIX not set. Tesseract will use system default paths.");
}
}
if std::path::Path::new("/usr/share/tesseract-ocr/tessdata").exists() {
tracing::debug!("Found tessdata at system default: /usr/share/tesseract-ocr/tessdata");
}
if let Ok(log_level) = std::env::var("RUST_LOG") {
tracing::debug!("RUST_LOG={}", log_level);
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_plugin_health_status_check() {
let status = PluginHealthStatus::check();
let _ = status.ocr_backends_count;
let _ = status.extractors_count;
}
#[test]
fn test_validate_plugins_at_startup() {
let _ = tracing_subscriber::fmt()
.with_max_level(tracing::Level::DEBUG)
.with_test_writer()
.try_init();
let result = validate_plugins_at_startup();
assert!(result.is_ok());
let status = result.unwrap();
let _ = status.ocr_backends_count;
}
#[test]
fn test_plugin_health_status_ocr_backends_empty() {
let status = PluginHealthStatus::check();
assert_eq!(status.ocr_backends.len(), status.ocr_backends_count);
}
#[test]
fn test_plugin_health_status_extractors_empty() {
let status = PluginHealthStatus::check();
assert_eq!(status.extractors.len(), status.extractors_count);
}
#[test]
fn test_plugin_health_status_post_processors_empty() {
let status = PluginHealthStatus::check();
assert_eq!(status.post_processors.len(), status.post_processors_count);
}
#[test]
fn test_plugin_health_status_validators_empty() {
let status = PluginHealthStatus::check();
assert_eq!(status.validators.len(), status.validators_count);
}
#[test]
fn test_validate_plugins_at_startup_returns_status() {
let _ = tracing_subscriber::fmt()
.with_max_level(tracing::Level::DEBUG)
.with_test_writer()
.try_init();
let result = validate_plugins_at_startup();
assert!(result.is_ok());
let status = result.unwrap();
assert_eq!(status.ocr_backends.len(), status.ocr_backends_count);
assert_eq!(status.extractors.len(), status.extractors_count);
assert_eq!(status.post_processors.len(), status.post_processors_count);
assert_eq!(status.validators.len(), status.validators_count);
}
#[test]
fn test_plugin_health_status_check_consistency() {
let status1 = PluginHealthStatus::check();
let status2 = PluginHealthStatus::check();
assert_eq!(status1.ocr_backends_count, status2.ocr_backends_count);
assert_eq!(status1.extractors_count, status2.extractors_count);
assert_eq!(status1.post_processors_count, status2.post_processors_count);
assert_eq!(status1.validators_count, status2.validators_count);
}
#[test]
fn test_validate_plugins_at_startup_with_logging() {
let _ = tracing_subscriber::fmt()
.with_max_level(tracing::Level::INFO)
.with_test_writer()
.try_init();
let result = validate_plugins_at_startup();
assert!(result.is_ok());
let status = result.unwrap();
assert_eq!(status.ocr_backends.len(), status.ocr_backends_count);
assert_eq!(status.extractors.len(), status.extractors_count);
assert_eq!(status.post_processors.len(), status.post_processors_count);
assert_eq!(status.validators.len(), status.validators_count);
}
#[test]
fn test_plugin_health_status_all_counts_valid() {
let status = PluginHealthStatus::check();
assert_eq!(status.ocr_backends.len(), status.ocr_backends_count);
assert_eq!(status.extractors.len(), status.extractors_count);
assert_eq!(status.post_processors.len(), status.post_processors_count);
assert_eq!(status.validators.len(), status.validators_count);
}
#[test]
fn test_plugin_health_status_vec_sizes_match_counts() {
let status = PluginHealthStatus::check();
assert_eq!(status.ocr_backends.len(), status.ocr_backends_count);
assert_eq!(status.extractors.len(), status.extractors_count);
assert_eq!(status.post_processors.len(), status.post_processors_count);
assert_eq!(status.validators.len(), status.validators_count);
}
#[test]
fn test_validate_plugins_at_startup_logs_warnings_and_info() {
let _ = tracing_subscriber::fmt()
.with_max_level(tracing::Level::DEBUG)
.with_test_writer()
.try_init();
let result = validate_plugins_at_startup();
assert!(result.is_ok());
let status = result.unwrap();
assert_eq!(status.ocr_backends.len(), status.ocr_backends_count);
}
#[test]
fn test_check_environment_variables_with_rust_log() {
let _ = tracing_subscriber::fmt()
.with_max_level(tracing::Level::DEBUG)
.with_test_writer()
.try_init();
let result = validate_plugins_at_startup();
assert!(result.is_ok());
}
#[test]
fn test_plugin_health_status_clone() {
let status1 = PluginHealthStatus::check();
let status2 = status1.clone();
assert_eq!(status1.ocr_backends_count, status2.ocr_backends_count);
assert_eq!(status1.extractors_count, status2.extractors_count);
assert_eq!(status1.post_processors_count, status2.post_processors_count);
assert_eq!(status1.validators_count, status2.validators_count);
}
#[test]
fn test_plugin_health_status_debug_format() {
let status = PluginHealthStatus::check();
let debug_str = format!("{:?}", status);
assert!(!debug_str.is_empty());
assert!(debug_str.contains("ocr_backends_count"));
}
}