pub mod backend;
pub mod batch;
pub mod engine;
pub mod error;
pub mod extraction;
pub mod models;
pub mod ner;
pub mod reranker;
pub mod tiered;
pub(crate) static GPU_INFERENCE_SEMAPHORE: std::sync::LazyLock<
std::sync::Arc<tokio::sync::Semaphore>,
> = std::sync::LazyLock::new(|| std::sync::Arc::new(tokio::sync::Semaphore::new(1)));
pub use backend::{BackendKind, EmbeddingBackend};
pub use batch::TokenBudgetBatcher;
pub use engine::{EmbeddingEngine, EmbeddingEngineBuilder};
pub use error::{InferenceError, Result};
pub use extraction::{
build_provider, ExtractionOpts, ExtractionProvider, ExtractionResult, ExtractorConfig,
};
pub use models::{EmbeddingModel, ModelConfig};
pub use ner::{rule_based_extract, ExtractedEntity, GlinerEngine, NerEngine};
pub use reranker::CrossEncoderEngine;
pub use tiered::TieredEngine;
pub mod prelude {
pub use crate::engine::{EmbeddingEngine, EmbeddingEngineBuilder};
pub use crate::error::{InferenceError, Result};
pub use crate::models::{EmbeddingModel, ModelConfig};
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_model_defaults() {
let config = ModelConfig::default();
assert_eq!(config.model, EmbeddingModel::BgeLarge);
assert_eq!(config.max_batch_size, 32);
assert!(!config.use_gpu);
}
#[test]
fn test_model_dimensions() {
assert_eq!(EmbeddingModel::BgeLarge.dimension(), 1024);
assert_eq!(EmbeddingModel::MiniLM.dimension(), 384);
assert_eq!(EmbeddingModel::BgeSmall.dimension(), 384);
assert_eq!(EmbeddingModel::E5Small.dimension(), 384);
}
#[test]
fn test_gpu_semaphore_has_one_permit() {
let avail = GPU_INFERENCE_SEMAPHORE.available_permits();
assert!(
avail <= 1,
"GPU semaphore must not have more than 1 permit; got {avail}"
);
}
#[tokio::test]
async fn test_gpu_semaphore_acquire_and_release() {
let result = GPU_INFERENCE_SEMAPHORE.try_acquire();
if let Ok(permit) = result {
assert_eq!(GPU_INFERENCE_SEMAPHORE.available_permits(), 0);
drop(permit);
assert_eq!(GPU_INFERENCE_SEMAPHORE.available_permits(), 1);
}
assert!(GPU_INFERENCE_SEMAPHORE.available_permits() <= 1);
}
#[tokio::test]
async fn test_gpu_semaphore_second_acquire_blocks_until_first_released() {
let p1 = GPU_INFERENCE_SEMAPHORE.try_acquire();
if let Ok(permit) = p1 {
let p2 = GPU_INFERENCE_SEMAPHORE.try_acquire();
assert!(
p2.is_err(),
"Second try_acquire must fail while the first permit is outstanding"
);
drop(permit);
assert_eq!(GPU_INFERENCE_SEMAPHORE.available_permits(), 1);
}
}
}