#[cfg(feature = "classifiers")]
pub mod candle;
#[cfg(feature = "classifiers")]
pub mod candle_pii;
pub mod llm;
pub mod metrics;
#[cfg(feature = "classifiers")]
pub mod ner;
#[cfg(feature = "classifiers")]
pub mod three_class;
#[cfg(feature = "classifiers")]
pub(crate) const MAX_CHUNK_TOKENS: usize = 448;
#[cfg(feature = "classifiers")]
pub(crate) const MAX_CHUNK_CONTENT_TOKENS: usize = MAX_CHUNK_TOKENS - 2;
#[cfg(feature = "classifiers")]
pub(crate) const CHUNK_OVERLAP_TOKENS: usize = 64;
use std::future::Future;
use std::pin::Pin;
use crate::error::LlmError;
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub enum ClassifierTask {
Injection,
Pii,
Feedback,
}
#[derive(Debug, Clone)]
pub struct PiiSpan {
pub entity_type: String,
pub start: usize,
pub end: usize,
pub score: f32,
}
#[derive(Debug, Clone)]
pub struct PiiResult {
pub spans: Vec<PiiSpan>,
pub has_pii: bool,
}
pub trait PiiDetector: Send + Sync {
fn detect_pii<'a>(
&'a self,
text: &'a str,
) -> Pin<Box<dyn Future<Output = Result<PiiResult, LlmError>> + Send + 'a>>;
fn backend_name(&self) -> &'static str;
}
#[cfg(feature = "classifiers")]
pub(super) fn verify_sha256(path: &std::path::Path, expected: &str) -> Result<(), LlmError> {
use hex;
use sha2::{Digest, Sha256};
use std::io::Read;
let mut hasher = Sha256::new();
let mut file = std::fs::File::open(path)
.map_err(|e| LlmError::ModelLoad(format!("cannot open file for hash check: {e}")))?;
let mut buf = [0u8; 8192];
loop {
let n = file
.read(&mut buf)
.map_err(|e| LlmError::ModelLoad(format!("read error during hash check: {e}")))?;
if n == 0 {
break;
}
hasher.update(&buf[..n]);
}
let computed = hex::encode(hasher.finalize());
if computed != expected.to_lowercase() {
return Err(LlmError::ModelLoad(format!(
"SHA-256 mismatch for {}: expected {}, got {} \
(file may be corrupt or tampered — do not auto-retry)",
path.display(),
expected,
computed
)));
}
Ok(())
}
#[derive(Debug, Clone)]
pub struct NerSpan {
pub label: String,
pub score: f32,
pub start: usize,
pub end: usize,
}
#[derive(Debug, Clone)]
pub struct ClassificationResult {
pub label: String,
pub score: f32,
pub is_positive: bool,
pub spans: Vec<NerSpan>,
}
pub trait ClassifierBackend: Send + Sync {
fn classify<'a>(
&'a self,
text: &'a str,
) -> Pin<Box<dyn Future<Output = Result<ClassificationResult, LlmError>> + Send + 'a>>;
fn backend_name(&self) -> &'static str;
}
#[cfg(all(test, feature = "classifiers"))]
mod sha256_tests {
use std::io::Write;
use super::verify_sha256;
fn write_tmp(data: &[u8]) -> tempfile::NamedTempFile {
let mut f = tempfile::NamedTempFile::new().unwrap();
f.write_all(data).unwrap();
f
}
fn sha256_hex(data: &[u8]) -> String {
use sha2::{Digest, Sha256};
let mut h = Sha256::new();
h.update(data);
hex::encode(h.finalize())
}
#[test]
fn verify_sha256_matching_digest_returns_ok() {
let data = b"hello world";
let f = write_tmp(data);
let expected = sha256_hex(data);
assert!(verify_sha256(f.path(), &expected).is_ok());
}
#[test]
fn verify_sha256_uppercase_expected_accepted() {
let data = b"case test";
let f = write_tmp(data);
let expected = sha256_hex(data).to_uppercase();
assert!(verify_sha256(f.path(), &expected).is_ok());
}
#[test]
fn verify_sha256_mismatch_returns_err() {
let data = b"original";
let f = write_tmp(data);
let result = verify_sha256(
f.path(),
"0000000000000000000000000000000000000000000000000000000000000000",
);
assert!(result.is_err());
let msg = result.unwrap_err().to_string();
assert!(msg.contains("SHA-256 mismatch"));
}
#[test]
fn verify_sha256_missing_file_returns_err() {
let result = verify_sha256(std::path::Path::new("/nonexistent/path/file.bin"), "abc");
assert!(result.is_err());
}
#[test]
fn verify_sha256_empty_file_ok() {
let f = write_tmp(b"");
let expected = sha256_hex(b"");
assert!(verify_sha256(f.path(), &expected).is_ok());
}
}
#[cfg(test)]
pub mod mock {
use std::future::Future;
use std::pin::Pin;
use std::sync::Arc;
use crate::error::LlmError;
use super::{ClassificationResult, ClassifierBackend};
pub struct MockClassifierBackend {
pub result: Arc<ClassificationResult>,
}
impl MockClassifierBackend {
#[must_use]
pub fn new(label: &str, score: f32, is_positive: bool) -> Self {
Self {
result: Arc::new(ClassificationResult {
label: label.to_owned(),
score,
is_positive,
spans: vec![],
}),
}
}
}
impl ClassifierBackend for MockClassifierBackend {
fn classify<'a>(
&'a self,
_text: &'a str,
) -> Pin<Box<dyn Future<Output = Result<ClassificationResult, LlmError>> + Send + 'a>>
{
let result = self.result.as_ref().clone();
Box::pin(async move { Ok(result) })
}
fn backend_name(&self) -> &'static str {
"mock"
}
}
}