pub struct TimestampChunker { /* private fields */ }Expand description
Timestamp-aware chunker for subtitle/transcript content.
Groups subtitle cues into chunks based on time duration rather than
character count. Each chunk carries start_secs and end_secs in
its metadata for timestamp-aware retrieval and citation.
Falls back to RecursiveChunker for documents without subtitle
cue metadata.
§Example
use aprender_rag::chunk::{TimestampChunker, Chunker};
use aprender_rag::Document;
use aprender_rag::media::SubtitleCue;
let cues = vec![
SubtitleCue { index: 0, start_secs: 0.0, end_secs: 30.0, text: "First segment.".into() },
SubtitleCue { index: 1, start_secs: 30.0, end_secs: 65.0, text: "Second segment.".into() },
SubtitleCue { index: 2, start_secs: 65.0, end_secs: 90.0, text: "Third segment.".into() },
];
let mut doc = Document::new("First segment. Second segment. Third segment.");
doc.metadata.insert(
"subtitle_cues".into(),
serde_json::to_value(&cues).unwrap(),
);
doc.metadata.insert("duration_secs".into(), serde_json::json!(90.0));
let chunker = TimestampChunker::new(60.0);
let chunks = chunker.chunk(&doc).unwrap();
assert!(chunks.len() >= 2);
assert!(chunks[0].metadata.custom.contains_key("start_secs"));Implementations§
Source§impl TimestampChunker
impl TimestampChunker
Sourcepub fn new(target_duration_secs: f64) -> Self
pub fn new(target_duration_secs: f64) -> Self
Create a timestamp chunker with the given target duration.
Sourcepub fn with_min_duration(self, secs: f64) -> Self
pub fn with_min_duration(self, secs: f64) -> Self
Set minimum chunk duration.
Sourcepub fn with_max_duration(self, secs: f64) -> Self
pub fn with_max_duration(self, secs: f64) -> Self
Set maximum chunk duration.
Sourcepub fn with_overlap(self, secs: f64) -> Self
pub fn with_overlap(self, secs: f64) -> Self
Set overlap duration.
Trait Implementations§
Source§impl Chunker for TimestampChunker
impl Chunker for TimestampChunker
Source§impl Clone for TimestampChunker
impl Clone for TimestampChunker
Source§fn clone(&self) -> TimestampChunker
fn clone(&self) -> TimestampChunker
Returns a duplicate of the value. Read more
1.0.0 · Source§fn clone_from(&mut self, source: &Self)
fn clone_from(&mut self, source: &Self)
Performs copy-assignment from
source. Read moreSource§impl Debug for TimestampChunker
impl Debug for TimestampChunker
Auto Trait Implementations§
impl Freeze for TimestampChunker
impl RefUnwindSafe for TimestampChunker
impl Send for TimestampChunker
impl Sync for TimestampChunker
impl Unpin for TimestampChunker
impl UnsafeUnpin for TimestampChunker
impl UnwindSafe for TimestampChunker
Blanket Implementations§
Source§impl<T> BorrowMut<T> for Twhere
T: ?Sized,
impl<T> BorrowMut<T> for Twhere
T: ?Sized,
Source§fn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
Mutably borrows from an owned value. Read more
Source§impl<T> CloneToUninit for Twhere
T: Clone,
impl<T> CloneToUninit for Twhere
T: Clone,
Source§impl<T> Instrument for T
impl<T> Instrument for T
Source§fn instrument(self, span: Span) -> Instrumented<Self>
fn instrument(self, span: Span) -> Instrumented<Self>
Source§fn in_current_span(self) -> Instrumented<Self>
fn in_current_span(self) -> Instrumented<Self>
Source§impl<T> IntoEither for T
impl<T> IntoEither for T
Source§fn into_either(self, into_left: bool) -> Either<Self, Self>
fn into_either(self, into_left: bool) -> Either<Self, Self>
Converts
self into a Left variant of Either<Self, Self>
if into_left is true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read moreSource§fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
Converts
self into a Left variant of Either<Self, Self>
if into_left(&self) returns true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read moreSource§impl<F, T> IntoSample<T> for Fwhere
T: FromSample<F>,
impl<F, T> IntoSample<T> for Fwhere
T: FromSample<F>,
fn into_sample(self) -> T
Source§impl<T> Pointable for T
impl<T> Pointable for T
Source§impl<T> PolicyExt for Twhere
T: ?Sized,
impl<T> PolicyExt for Twhere
T: ?Sized,
Source§impl<R, P> ReadPrimitive<R> for P
impl<R, P> ReadPrimitive<R> for P
Source§fn read_from_little_endian(read: &mut R) -> Result<Self, Error>
fn read_from_little_endian(read: &mut R) -> Result<Self, Error>
Read this value from the supplied reader. Same as
ReadEndian::read_from_little_endian().