use serde::{Deserialize, Serialize};
use std::collections::HashMap;
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ExtractResult {
pub text: String,
pub source: String,
pub content_type: Option<String>,
pub original_length: usize,
pub text_length: usize,
pub title: Option<String>,
pub metadata: HashMap<String, String>,
pub sanitized: bool,
#[cfg(feature = "sanitize")]
pub sanitization: Option<SanitizationInfo>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
#[cfg(feature = "sanitize")]
pub struct SanitizationInfo {
pub pii_redacted: usize,
pub pii_types: Vec<String>,
pub injection_detected: bool,
pub injection_confidence: f32,
pub blocked: bool,
pub block_reason: Option<String>,
}
impl ExtractResult {
pub fn new(text: String, source: String) -> Self {
let text_length = text.len();
Self {
text,
source,
content_type: None,
original_length: 0,
text_length,
title: None,
metadata: HashMap::new(),
sanitized: false,
#[cfg(feature = "sanitize")]
sanitization: None,
}
}
pub fn with_content_type(mut self, content_type: impl Into<String>) -> Self {
self.content_type = Some(content_type.into());
self
}
pub fn with_original_length(mut self, length: usize) -> Self {
self.original_length = length;
self
}
pub fn with_title(mut self, title: impl Into<String>) -> Self {
self.title = Some(title.into());
self
}
pub fn with_metadata(mut self, key: impl Into<String>, value: impl Into<String>) -> Self {
self.metadata.insert(key.into(), value.into());
self
}
#[cfg(feature = "sanitize")]
pub fn with_sanitization(mut self, info: SanitizationInfo) -> Self {
self.sanitized = true;
self.sanitization = Some(info);
self
}
pub fn is_safe(&self) -> bool {
#[cfg(feature = "sanitize")]
{
if let Some(ref info) = self.sanitization {
return !info.blocked && !info.injection_detected;
}
}
true
}
pub fn truncate(&self, max_chars: usize) -> String {
if self.text.len() <= max_chars {
self.text.clone()
} else {
let mut result = self.text.chars().take(max_chars).collect::<String>();
result.push_str("...");
result
}
}
}
#[cfg(feature = "sanitize")]
impl Default for SanitizationInfo {
fn default() -> Self {
Self {
pii_redacted: 0,
pii_types: Vec::new(),
injection_detected: false,
injection_confidence: 0.0,
blocked: false,
block_reason: None,
}
}
}