use paladin::infrastructure::adapters::document::DocumentAdapter;
use paladin::infrastructure::adapters::document::PdfExtractor;
use paladin_ports::input::document_port::{ChunkConfig, DocumentPort, DocumentSource};
use std::path::{Path, PathBuf};
use std::sync::Arc;
#[tokio::main]
async fn main() -> Result<(), Box<dyn std::error::Error>> {
env_logger::init();
println!("📄 Sentinel Document Processing Example\n");
println!("📋 Step 1: Creating PDF extractor...");
let extractor = PdfExtractor::new();
println!("✅ PDF extractor ready\n");
println!("📋 Step 2: Creating document adapter...");
let adapter: Arc<dyn DocumentPort> = Arc::new(DocumentAdapter::new());
println!("✅ Document adapter ready\n");
let pdf_path = PathBuf::from("./examples/assets/sample_document.pdf");
if !pdf_path.exists() {
println!("⚠️ Sample PDF not found, demonstrating with text content instead\n");
demonstrate_text_processing(&adapter).await?;
} else {
demonstrate_pdf_processing(&adapter, &extractor, &pdf_path).await?;
}
println!();
println!("{}", "=".repeat(80));
println!("✅ Document processing example completed successfully!");
println!("{}", "=".repeat(80));
Ok(())
}
async fn demonstrate_pdf_processing(
adapter: &Arc<dyn DocumentPort>,
extractor: &PdfExtractor,
pdf_path: &Path,
) -> Result<(), Box<dyn std::error::Error>> {
println!("{}", "=".repeat(80));
println!("📄 PDF EXTRACTION");
println!("{}", "=".repeat(80));
println!();
println!("🔍 Extracting text from: {:?}", pdf_path);
let document = extractor.extract(pdf_path)?;
println!("📊 DOCUMENT METADATA");
println!("{}", "─".repeat(80));
println!("Title: {:?}", document.metadata.title);
println!("Author: {:?}", document.metadata.author);
println!("Pages: {}", document.metadata.page_count);
println!("Created: {:?}", document.metadata.creation_date);
println!("Total Characters: {}", document.total_chars);
println!("{}", "─".repeat(80));
println!();
println!("📑 PAGE CONTENT (First 3 pages):");
for page in document.pages.iter().take(3) {
println!(
"\n┌─ Page {} ({} chars) ─┐",
page.number,
page.content.len()
);
let preview = if page.content.len() > 200 {
format!("{}...", &page.content[..200])
} else {
page.content.clone()
};
println!("{}", preview);
println!("└{}", "─".repeat(79));
}
println!("\n📋 Step 4: Ingesting document via DocumentAdapter...");
let ingested_doc = adapter
.ingest(DocumentSource::File(pdf_path.to_path_buf()))
.await?;
println!("✅ Document ingested: {} pages\n", ingested_doc.pages.len());
demonstrate_chunking(adapter, &ingested_doc).await?;
Ok(())
}
async fn demonstrate_text_processing(
adapter: &Arc<dyn DocumentPort>,
) -> Result<(), Box<dyn std::error::Error>> {
println!("{}", "=".repeat(80));
println!("📝 TEXT DOCUMENT PROCESSING");
println!("{}", "=".repeat(80));
println!();
let sample_text = r#"
# Artificial Intelligence and Machine Learning
Artificial Intelligence (AI) is revolutionizing how we interact with technology.
Machine learning, a subset of AI, enables computers to learn from data without
being explicitly programmed.
## Deep Learning
Deep learning uses neural networks with multiple layers to process complex patterns.
These networks have achieved remarkable results in computer vision, natural language
processing, and reinforcement learning.
## Applications
AI is being applied across various domains:
- Healthcare: Diagnostic systems and drug discovery
- Finance: Fraud detection and algorithmic trading
- Transportation: Autonomous vehicles
- Manufacturing: Predictive maintenance
## Challenges
Despite progress, AI faces several challenges:
1. Data quality and availability
2. Ethical considerations and bias
3. Interpretability of complex models
4. Computational requirements
## Future Directions
The future of AI includes:
- More efficient architectures
- Better generalization capabilities
- Enhanced human-AI collaboration
- Responsible AI development
AI continues to evolve rapidly, promising transformative changes across industries.
"#;
let temp_path = PathBuf::from("/tmp/sample_ai_document.txt");
std::fs::write(&temp_path, sample_text)?;
println!("📄 Created sample text document");
println!("📏 Size: {} characters", sample_text.len());
println!("\n📋 Ingesting text document...");
let document = adapter
.ingest(DocumentSource::File(temp_path.clone()))
.await?;
println!("✅ Document ingested successfully");
println!(" Pages: {}", document.pages.len());
println!(" Total Characters: {}", document.total_chars);
std::fs::remove_file(&temp_path).ok();
demonstrate_chunking(adapter, &document).await?;
Ok(())
}
async fn demonstrate_chunking(
adapter: &Arc<dyn DocumentPort>,
document: &paladin::core::platform::container::document::Document,
) -> Result<(), Box<dyn std::error::Error>> {
println!();
println!("{}", "=".repeat(80));
println!("✂️ INTELLIGENT DOCUMENT CHUNKING");
println!("{}", "=".repeat(80));
println!();
println!("📐 Configuration 1: Small Chunks (RAG-optimized)");
let config_rag = ChunkConfig {
chunk_size: 500,
chunk_overlap: 100,
separator: "\n\n".to_string(),
};
let chunks_rag = adapter.chunk(document, config_rag).await?;
println!(" Chunk Size: 500 characters");
println!(" Overlap: 100 characters (20%)");
println!(" Separator: Double newline (paragraphs)");
println!(" Total Chunks: {}\n", chunks_rag.len());
display_chunks(&chunks_rag, 3);
println!("\n📐 Configuration 2: Large Chunks (Summarization)");
let config_summary = ChunkConfig {
chunk_size: 2000,
chunk_overlap: 200,
separator: "\n\n".to_string(),
};
let chunks_summary = adapter.chunk(document, config_summary).await?;
println!(" Chunk Size: 2000 characters");
println!(" Overlap: 200 characters (10%)");
println!(" Separator: Double newline (paragraphs)");
println!(" Total Chunks: {}\n", chunks_summary.len());
display_chunks(&chunks_summary, 2);
println!("\n📐 Configuration 3: Sentence-Based Chunks");
let config_sentence = ChunkConfig {
chunk_size: 300,
chunk_overlap: 50,
separator: ". ".to_string(),
};
let chunks_sentence = adapter.chunk(document, config_sentence).await?;
println!(" Chunk Size: 300 characters");
println!(" Overlap: 50 characters (16%)");
println!(" Separator: Period+space (sentences)");
println!(" Total Chunks: {}\n", chunks_sentence.len());
display_chunks(&chunks_sentence, 3);
Ok(())
}
fn display_chunks(
chunks: &[paladin_ports::input::document_port::DocumentChunk],
max_display: usize,
) {
for (i, chunk) in chunks.iter().take(max_display).enumerate() {
println!("┌─ Chunk {} ─┐", i + 1);
println!("│ Index: {}", chunk.chunk_index);
println!("│ Size: {} chars", chunk.content.len());
let preview = if chunk.content.len() > 150 {
format!("{}...", &chunk.content[..150].replace('\n', " "))
} else {
chunk.content.replace('\n', " ")
};
println!("│ Preview: {}", preview);
println!("└{}", "─".repeat(79));
println!();
}
if chunks.len() > max_display {
println!("... and {} more chunks\n", chunks.len() - max_display);
}
}