use crate::Vector;
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
pub enum DocumentFormat {
Pdf,
Html,
Xml,
Markdown,
PlainText,
Docx,
Pptx,
Xlsx,
Rtf,
Epub,
Json,
Csv,
Image,
Audio,
Video,
Unknown,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ContentExtractionConfig {
pub extract_text: bool,
pub extract_metadata: bool,
pub extract_images: bool,
pub extract_tables: bool,
pub extract_links: bool,
pub max_content_length: usize,
pub preserve_structure: bool,
pub extract_page_info: bool,
pub detect_language: bool,
pub chunking_strategy: ChunkingStrategy,
pub extract_multimedia_features: bool,
pub generate_image_embeddings: bool,
pub extract_audio_features: bool,
pub extract_video_features: bool,
pub max_image_resolution: Option<(u32, u32)>,
}
impl Default for ContentExtractionConfig {
fn default() -> Self {
Self {
extract_text: true,
extract_metadata: true,
extract_images: false,
extract_tables: true,
extract_links: true,
max_content_length: 1_000_000, preserve_structure: true,
extract_page_info: true,
detect_language: true,
chunking_strategy: ChunkingStrategy::Paragraph,
extract_multimedia_features: false,
generate_image_embeddings: false,
extract_audio_features: false,
extract_video_features: false,
max_image_resolution: Some((1920, 1080)), }
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum ChunkingStrategy {
Paragraph,
Sentence,
FixedTokens(usize),
Semantic,
Page,
Custom(String),
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ExtractedContent {
pub format: DocumentFormat,
pub text: String,
pub metadata: HashMap<String, String>,
pub images: Vec<ExtractedImage>,
pub tables: Vec<ExtractedTable>,
pub links: Vec<ExtractedLink>,
pub structure: DocumentStructure,
pub chunks: Vec<ContentChunk>,
pub language: Option<String>,
pub processing_stats: ProcessingStats,
pub audio_content: Vec<ExtractedAudio>,
pub video_content: Vec<ExtractedVideo>,
pub cross_modal_embeddings: Vec<CrossModalEmbedding>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ExtractedImage {
pub data: String,
pub format: String,
pub width: u32,
pub height: u32,
pub alt_text: Option<String>,
pub caption: Option<String>,
pub location: ContentLocation,
pub visual_features: Option<ImageFeatures>,
pub embedding: Option<Vector>,
pub detected_objects: Vec<DetectedObject>,
pub classification_labels: Vec<ClassificationLabel>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ImageFeatures {
pub color_histogram: Option<Vec<f32>>,
pub texture_features: Option<Vec<f32>>,
pub edge_features: Option<Vec<f32>>,
pub sift_features: Option<Vec<f32>>,
pub cnn_features: Option<Vec<f32>>,
pub dominant_colors: Vec<(u8, u8, u8)>, pub complexity_metrics: ImageComplexityMetrics,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct DetectedObject {
pub label: String,
pub confidence: f32,
pub bbox: (u32, u32, u32, u32),
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ClassificationLabel {
pub label: String,
pub confidence: f32,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ImageComplexityMetrics {
pub edge_density: f32,
pub color_diversity: f32,
pub texture_complexity: f32,
pub entropy: f32,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ExtractedAudio {
pub data: String,
pub format: String,
pub duration: f32,
pub sample_rate: u32,
pub channels: u16,
pub audio_features: Option<AudioFeatures>,
pub embedding: Option<Vector>,
pub transcription: Option<String>,
pub music_analysis: Option<MusicAnalysis>,
pub speech_analysis: Option<SpeechAnalysis>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct AudioFeatures {
pub mfcc: Option<Vec<f32>>,
pub spectral_features: Option<Vec<f32>>,
pub rhythm_features: Option<Vec<f32>>,
pub harmonic_features: Option<Vec<f32>>,
pub zero_crossing_rate: f32,
pub energy_metrics: AudioEnergyMetrics,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct AudioEnergyMetrics {
pub rms_energy: f32,
pub peak_amplitude: f32,
pub average_loudness: f32,
pub dynamic_range: f32,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct MusicAnalysis {
pub tempo: Option<f32>,
pub key: Option<String>,
pub time_signature: Option<String>,
pub genre: Option<String>,
pub valence: Option<f32>,
pub energy: Option<f32>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct SpeechAnalysis {
pub language: Option<String>,
pub speaker_gender: Option<String>,
pub emotion: Option<String>,
pub speech_rate: Option<f32>,
pub pitch_stats: Option<PitchStatistics>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct PitchStatistics {
pub mean_pitch: f32,
pub pitch_std: f32,
pub pitch_range: f32,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ExtractedVideo {
pub data: String,
pub format: String,
pub duration: f32,
pub frame_rate: f32,
pub resolution: (u32, u32),
pub keyframes: Vec<VideoKeyframe>,
pub embedding: Option<Vector>,
pub audio_analysis: Option<ExtractedAudio>,
pub video_analysis: Option<VideoAnalysis>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct VideoKeyframe {
pub timestamp: f32,
pub image: ExtractedImage,
pub scene_change_score: f32,
}
pub type ColorTimelineEntry = (f32, Vec<(u8, u8, u8)>);
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct VideoAnalysis {
pub scenes: Vec<VideoScene>,
pub motion_analysis: Option<MotionAnalysis>,
pub activity_level: f32,
pub color_timeline: Vec<ColorTimelineEntry>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct VideoScene {
pub start_time: f32,
pub end_time: f32,
pub description: Option<String>,
pub representative_frame: Option<ExtractedImage>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct MotionAnalysis {
pub average_motion: f32,
pub motion_variance: f32,
pub camera_motion: Option<String>,
pub object_motion: Vec<ObjectMotion>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ObjectMotion {
pub object_id: String,
pub trajectory: Vec<(f32, f32, f32)>,
pub speed: f32,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ExtractedTable {
pub headers: Vec<String>,
pub rows: Vec<Vec<String>>,
pub caption: Option<String>,
pub location: ContentLocation,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ExtractedLink {
pub url: String,
pub text: String,
pub title: Option<String>,
pub location: ContentLocation,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct DocumentStructure {
pub title: Option<String>,
pub headings: Vec<Heading>,
pub page_count: usize,
pub section_count: usize,
pub table_of_contents: Vec<TocEntry>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Heading {
pub level: usize,
pub text: String,
pub location: ContentLocation,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct TocEntry {
pub title: String,
pub level: usize,
pub page: Option<usize>,
pub location: ContentLocation,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ContentChunk {
pub text: String,
pub chunk_type: ChunkType,
pub location: ContentLocation,
pub metadata: HashMap<String, String>,
pub embedding: Option<Vector>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum ChunkType {
Paragraph,
Heading,
Table,
List,
Quote,
Code,
Caption,
Footnote,
Header,
Footer,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ContentLocation {
pub page: Option<usize>,
pub section: Option<usize>,
pub char_offset: Option<usize>,
pub line: Option<usize>,
pub column: Option<usize>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct CrossModalEmbedding {
pub embedding: Vector,
pub modalities: Vec<Modality>,
pub fusion_strategy: FusionStrategy,
pub confidence: f32,
pub content_ids: Vec<String>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum Modality {
Text,
Image,
Audio,
Video,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum FusionStrategy {
Concatenation,
WeightedAverage(Vec<f32>), Attention,
LateFusion,
MlpFusion,
TransformerFusion,
}
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct ProcessingStats {
pub processing_time_ms: u64,
pub total_chars: usize,
pub total_words: usize,
pub image_count: usize,
pub table_count: usize,
pub link_count: usize,
pub chunk_count: usize,
pub audio_count: usize,
pub video_count: usize,
pub cross_modal_embedding_count: usize,
pub image_processing_time_ms: u64,
pub audio_processing_time_ms: u64,
pub video_processing_time_ms: u64,
pub warnings: Vec<String>,
}