Expand description
Text processing and chunking
Re-exports§
pub use analysis::TextAnalyzer;pub use analysis::TextStats;pub use boundary_detection::Boundary;pub use boundary_detection::BoundaryDetectionConfig;pub use boundary_detection::BoundaryDetector;pub use boundary_detection::BoundaryType;pub use chunk_enricher::ChunkEnricher;pub use chunk_enricher::EnrichmentStatistics;pub use chunking_strategies::BoundaryAwareChunkingStrategy;pub use chunking_strategies::HierarchicalChunkingStrategy;pub use chunking_strategies::SemanticChunkingStrategy;pub use contextual_enricher::ContextualEnricher;pub use contextual_enricher::ContextualEnricherConfig;pub use document_structure::DocumentStructure;pub use document_structure::Heading;pub use document_structure::HeadingHierarchy;pub use document_structure::Section;pub use document_structure::SectionNumber;pub use document_structure::SectionNumberFormat;pub use document_structure::StructureStatistics;pub use extractive_summarizer::ExtractiveSummarizer;pub use keyword_extraction::TfIdfKeywordExtractor;pub use late_chunking::JinaLateChunkingClient;pub use late_chunking::LateChunkingConfig;pub use late_chunking::LateChunkingStrategy;pub use layout_parser::LayoutParser;pub use layout_parser::LayoutParserFactory;pub use semantic_chunking::BreakpointStrategy;pub use semantic_chunking::SemanticChunk;pub use semantic_chunking::SemanticChunker;pub use semantic_chunking::SemanticChunkerConfig;pub use semantic_coherence::CoherenceConfig;pub use semantic_coherence::OptimalSplit;pub use semantic_coherence::ScoredChunk;pub use semantic_coherence::SemanticCoherenceScorer;
Modules§
- analysis
- Text analysis utilities Text analysis utilities for document structure detection
- boundary_
detection - Semantic boundary detection for BAR-RAG Semantic Boundary Detection for Boundary-Aware Chunking
- chunk_
enricher - Chunk enrichment pipeline Chunk enrichment pipeline
- chunking
- Text chunking utilities module
- chunking_
strategies - Trait-based chunking strategies Trait-based chunking strategy implementations
- contextual_
enricher - LLM-based contextual chunk enrichment (Anthropic Contextual Retrieval pattern) Contextual Chunk Enrichment via LLM (Anthropic Contextual Retrieval pattern)
- document_
structure - Document structure representation Document structure representation for hierarchical parsing
- extractive_
summarizer - Extractive summarization Real extractive summarization with sentence ranking
- keyword_
extraction - TF-IDF keyword extraction Real TF-IDF keyword extraction
- late_
chunking - Late Chunking for context-preserving embeddings (Jina AI technique) Late Chunking — context-preserving embeddings for RAG
- layout_
parser - Layout parser trait Layout parser trait and factory for document structure detection
- parsers
- Document layout parsers Document layout parsers
- semantic_
chunking - Semantic chunking based on embedding similarity Semantic Chunking for RAG
- semantic_
coherence - Semantic coherence scoring for BAR-RAG Semantic Coherence Scoring for Boundary-Aware Chunking
Structs§
- Language
Detector - Language detection utilities
- Text
Processor - Text processing utilities for chunking and preprocessing