pub struct Chunker { /* private fields */ }Expand description
Text chunker with configurable strategy and parameters.
§Example
use pdfvec::{Chunker, ChunkStrategy};
let chunker = Chunker::new(ChunkStrategy::Fixed)
.chunk_size(512)
.overlap(50);
let text = "Your document text here...";
let chunks: Vec<_> = chunker.chunks(text).collect();Implementations§
Source§impl Chunker
impl Chunker
Sourcepub fn new(strategy: ChunkStrategy) -> Self
pub fn new(strategy: ChunkStrategy) -> Self
Creates a new chunker with the specified strategy.
§Example
use pdfvec::{Chunker, ChunkStrategy};
let chunker = Chunker::new(ChunkStrategy::Paragraph);Sourcepub fn chunk_size(self, size: usize) -> Self
pub fn chunk_size(self, size: usize) -> Self
Sets the target chunk size in characters.
For ChunkStrategy::Fixed, this is the exact size (except for the last chunk).
For other strategies, this is the target size for merging small segments.
Default: 512
§Example
use pdfvec::{Chunker, ChunkStrategy};
let chunker = Chunker::new(ChunkStrategy::Fixed).chunk_size(1024);Sourcepub fn overlap(self, chars: usize) -> Self
pub fn overlap(self, chars: usize) -> Self
Sets the overlap between consecutive chunks in characters.
Overlap helps maintain context across chunk boundaries. Only applies
to ChunkStrategy::Fixed.
Default: 0
§Example
use pdfvec::{Chunker, ChunkStrategy};
let chunker = Chunker::new(ChunkStrategy::Fixed)
.chunk_size(100)
.overlap(20);Sourcepub fn min_chunk_size(self, size: usize) -> Self
pub fn min_chunk_size(self, size: usize) -> Self
Sets the minimum chunk size for paragraph and sentence strategies.
Chunks smaller than this will be merged with adjacent chunks.
Default: 100
§Example
use pdfvec::{Chunker, ChunkStrategy};
let chunker = Chunker::new(ChunkStrategy::Paragraph)
.min_chunk_size(50);Sourcepub fn chunks<'a>(
&self,
text: &'a str,
) -> Box<dyn Iterator<Item = Chunk<'a>> + 'a>
pub fn chunks<'a>( &self, text: &'a str, ) -> Box<dyn Iterator<Item = Chunk<'a>> + 'a>
Returns an iterator over chunks of the input text.
§Example
use pdfvec::{Chunker, ChunkStrategy};
let text = "Hello world. How are you?";
let chunker = Chunker::new(ChunkStrategy::Sentence);
for chunk in chunker.chunks(text) {
println!("Chunk {}: {}", chunk.index(), chunk.text());
}Sourcepub fn get_chunk_size(&self) -> usize
pub fn get_chunk_size(&self) -> usize
Returns the configured chunk size.
Sourcepub fn get_overlap(&self) -> usize
pub fn get_overlap(&self) -> usize
Returns the configured overlap.
Sourcepub fn strategy(&self) -> ChunkStrategy
pub fn strategy(&self) -> ChunkStrategy
Returns the configured strategy.
Trait Implementations§
Auto Trait Implementations§
impl Freeze for Chunker
impl RefUnwindSafe for Chunker
impl Send for Chunker
impl Sync for Chunker
impl Unpin for Chunker
impl UnwindSafe for Chunker
Blanket Implementations§
Source§impl<T> BorrowMut<T> for Twhere
T: ?Sized,
impl<T> BorrowMut<T> for Twhere
T: ?Sized,
Source§fn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
Source§impl<T> CloneToUninit for Twhere
T: Clone,
impl<T> CloneToUninit for Twhere
T: Clone,
Source§impl<T> IntoEither for T
impl<T> IntoEither for T
Source§fn into_either(self, into_left: bool) -> Either<Self, Self>
fn into_either(self, into_left: bool) -> Either<Self, Self>
self into a Left variant of Either<Self, Self>
if into_left is true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read moreSource§fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
self into a Left variant of Either<Self, Self>
if into_left(&self) returns true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read more