pub struct TokenChunk {
pub token_interval: TokenInterval,
pub document: Option<Document>,
/* private fields */
}
Expand description
A token-based chunk with sophisticated linguistic boundaries
Fields§
§token_interval: TokenInterval
Token interval of the chunk in the source document
document: Option<Document>
Optional reference to the source document
Implementations§
Source§impl TokenChunk
impl TokenChunk
Sourcepub fn new(token_interval: TokenInterval, document: Option<Document>) -> Self
pub fn new(token_interval: TokenInterval, document: Option<Document>) -> Self
Create a new token chunk
Sourcepub fn with_char_end(
token_interval: TokenInterval,
document: Option<Document>,
char_end: usize,
) -> Self
pub fn with_char_end( token_interval: TokenInterval, document: Option<Document>, char_end: usize, ) -> Self
Create a new token chunk with custom character end position
Sourcepub fn document_id(&self) -> Option<&str>
pub fn document_id(&self) -> Option<&str>
Get the document ID from the source document
Sourcepub fn document_text(&self) -> Option<&TokenizedText>
pub fn document_text(&self) -> Option<&TokenizedText>
Get the tokenized text from the source document
Sourcepub fn chunk_text(&self, tokenizer: &Tokenizer) -> LangExtractResult<String>
pub fn chunk_text(&self, tokenizer: &Tokenizer) -> LangExtractResult<String>
Get the chunk text (requires tokenizer to reconstruct)
Sourcepub fn sanitized_chunk_text(
&self,
tokenizer: &Tokenizer,
) -> LangExtractResult<String>
pub fn sanitized_chunk_text( &self, tokenizer: &Tokenizer, ) -> LangExtractResult<String>
Get the sanitized chunk text (removes excess whitespace)
Sourcepub fn additional_context(&self) -> Option<&str>
pub fn additional_context(&self) -> Option<&str>
Get the additional context for prompting from the source document
Sourcepub fn char_interval(
&self,
tokenizer: &Tokenizer,
) -> LangExtractResult<CharInterval>
pub fn char_interval( &self, tokenizer: &Tokenizer, ) -> LangExtractResult<CharInterval>
Get the character interval corresponding to the token interval
Trait Implementations§
Source§impl Clone for TokenChunk
impl Clone for TokenChunk
Source§fn clone(&self) -> TokenChunk
fn clone(&self) -> TokenChunk
Returns a duplicate of the value. Read more
1.0.0 · Source§fn clone_from(&mut self, source: &Self)
fn clone_from(&mut self, source: &Self)
Performs copy-assignment from
source
. Read moreAuto Trait Implementations§
impl Freeze for TokenChunk
impl RefUnwindSafe for TokenChunk
impl Send for TokenChunk
impl Sync for TokenChunk
impl Unpin for TokenChunk
impl UnwindSafe for TokenChunk
Blanket Implementations§
Source§impl<T> BorrowMut<T> for Twhere
T: ?Sized,
impl<T> BorrowMut<T> for Twhere
T: ?Sized,
Source§fn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
Mutably borrows from an owned value. Read more