pub struct Splitter<T: Sizer> { /* private fields */ }
Expand description
A struct for splitting code into chunks.
Implementations§
Source§impl<T> Splitter<T>where
T: Sizer,
impl<T> Splitter<T>where
T: Sizer,
Sourcepub fn new(language: Language, sizer: T) -> Result<Self>
pub fn new(language: Language, sizer: T) -> Result<Self>
Create a new Splitter
that counts the size of code chunks with the given sizer.
§Example: split by characters
use code_splitter::{CharCounter, Splitter};
let lang = tree_sitter_md::language();
let splitter = Splitter::new(lang, CharCounter).unwrap();
let chunks = splitter.split(b"hello, world!").unwrap();
§Example: split by words
use code_splitter::{Splitter, WordCounter};
let lang = tree_sitter_md::language();
let splitter = Splitter::new(lang, WordCounter).unwrap();
let chunks = splitter.split(b"hello, world!").unwrap();
§Example: split by tokens with huggingface tokenizer
use code_splitter::Splitter;
use tokenizers::Tokenizer;
let lang = tree_sitter_md::language();
let tokenizer = Tokenizer::from_pretrained("bert-base-cased", None).unwrap();
let splitter = Splitter::new(lang, tokenizer).unwrap();
let chunks = splitter.split(b"hello, world!").unwrap();
§Example: split by tokens with tiktoken core BPE
use code_splitter::Splitter;
use tiktoken_rs::cl100k_base;
let lang = tree_sitter_md::language();
let bpe = cl100k_base().unwrap();
let splitter = Splitter::new(lang, bpe).unwrap();
let chunks = splitter.split(b"hello, world!").unwrap();
Sourcepub fn with_max_size(self, max_size: usize) -> Self
pub fn with_max_size(self, max_size: usize) -> Self
Set the maximum size of a chunk. The default is 512.
§Example: set the maximum size to 256
use code_splitter::{CharCounter, Splitter};
let lang = tree_sitter_md::language();
let splitter = Splitter::new(lang, CharCounter)
.unwrap()
.with_max_size(256);
let chunks = splitter.split(b"hello, world!").unwrap();
Auto Trait Implementations§
impl<T> Freeze for Splitter<T>where
T: Freeze,
impl<T> RefUnwindSafe for Splitter<T>where
T: RefUnwindSafe,
impl<T> Send for Splitter<T>where
T: Send,
impl<T> Sync for Splitter<T>where
T: Sync,
impl<T> Unpin for Splitter<T>where
T: Unpin,
impl<T> UnwindSafe for Splitter<T>where
T: UnwindSafe,
Blanket Implementations§
Source§impl<T> BorrowMut<T> for Twhere
T: ?Sized,
impl<T> BorrowMut<T> for Twhere
T: ?Sized,
Source§fn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
Mutably borrows from an owned value. Read more