pub struct TripletSampler<S: SplitStore + EpochStateStore + SamplerStateStore + 'static> { /* private fields */ }Expand description
Sampler that draws anchors from a single shared epoch cursor and then selects chunks from those records. Ingestion happens on demand when sampling.
Implementations§
Source§impl<S: SplitStore + EpochStateStore + SamplerStateStore + 'static> TripletSampler<S>
impl<S: SplitStore + EpochStateStore + SamplerStateStore + 'static> TripletSampler<S>
Sourcepub fn new(config: SamplerConfig, split_store: Arc<S>) -> Self
pub fn new(config: SamplerConfig, split_store: Arc<S>) -> Self
Create a sampler from config and a split-state backend.
Sourcepub fn new_with_chunker(
config: SamplerConfig,
split_store: Arc<S>,
chunker: Arc<dyn ChunkingAlgorithm>,
) -> Self
pub fn new_with_chunker( config: SamplerConfig, split_store: Arc<S>, chunker: Arc<dyn ChunkingAlgorithm>, ) -> Self
Create a sampler from config with a custom chunking implementation.
Sourcepub fn next_pair_batch_for_split(
&self,
split: SplitLabel,
) -> Result<SampleBatch, SamplerError>
pub fn next_pair_batch_for_split( &self, split: SplitLabel, ) -> Result<SampleBatch, SamplerError>
Return an unweighted pair batch for split.
Sourcepub fn next_text_batch_for_split(
&self,
split: SplitLabel,
) -> Result<TextBatch, SamplerError>
pub fn next_text_batch_for_split( &self, split: SplitLabel, ) -> Result<TextBatch, SamplerError>
Return an unweighted text batch for split.
Sourcepub fn next_triplet_batch_for_split(
&self,
split: SplitLabel,
) -> Result<TripletBatch, SamplerError>
pub fn next_triplet_batch_for_split( &self, split: SplitLabel, ) -> Result<TripletBatch, SamplerError>
Return an unweighted triplet batch for split.
Sourcepub fn next_pair_batch_with_weights_for_split(
&self,
split: SplitLabel,
weights: &HashMap<SourceId, f32>,
) -> Result<SampleBatch, SamplerError>
pub fn next_pair_batch_with_weights_for_split( &self, split: SplitLabel, weights: &HashMap<SourceId, f32>, ) -> Result<SampleBatch, SamplerError>
Return a weighted pair batch for split using per-source weights.
Sourcepub fn next_text_batch_with_weights_for_split(
&self,
split: SplitLabel,
weights: &HashMap<SourceId, f32>,
) -> Result<TextBatch, SamplerError>
pub fn next_text_batch_with_weights_for_split( &self, split: SplitLabel, weights: &HashMap<SourceId, f32>, ) -> Result<TextBatch, SamplerError>
Return a weighted text batch for split using per-source weights.
Sourcepub fn next_triplet_batch_with_weights_for_split(
&self,
split: SplitLabel,
weights: &HashMap<SourceId, f32>,
) -> Result<TripletBatch, SamplerError>
pub fn next_triplet_batch_with_weights_for_split( &self, split: SplitLabel, weights: &HashMap<SourceId, f32>, ) -> Result<TripletBatch, SamplerError>
Return a weighted triplet batch for split using per-source weights.
Sourcepub fn prefetch_triplet_batches(
self: Arc<Self>,
split: SplitLabel,
capacity: usize,
) -> BatchPrefetcher<TripletBatch>
pub fn prefetch_triplet_batches( self: Arc<Self>, split: SplitLabel, capacity: usize, ) -> BatchPrefetcher<TripletBatch>
Spawn a background prefetcher for triplet batches.
Sourcepub fn prefetch_triplet_batches_with_weights(
self: Arc<Self>,
split: SplitLabel,
capacity: usize,
weights: HashMap<SourceId, f32>,
) -> BatchPrefetcher<TripletBatch>
pub fn prefetch_triplet_batches_with_weights( self: Arc<Self>, split: SplitLabel, capacity: usize, weights: HashMap<SourceId, f32>, ) -> BatchPrefetcher<TripletBatch>
Spawn a background prefetcher for weighted triplet batches.
Sourcepub fn prefetch_pair_batches(
self: Arc<Self>,
split: SplitLabel,
capacity: usize,
) -> BatchPrefetcher<SampleBatch>
pub fn prefetch_pair_batches( self: Arc<Self>, split: SplitLabel, capacity: usize, ) -> BatchPrefetcher<SampleBatch>
Spawn a background prefetcher for pair batches.
Sourcepub fn prefetch_pair_batches_with_weights(
self: Arc<Self>,
split: SplitLabel,
capacity: usize,
weights: HashMap<SourceId, f32>,
) -> BatchPrefetcher<SampleBatch>
pub fn prefetch_pair_batches_with_weights( self: Arc<Self>, split: SplitLabel, capacity: usize, weights: HashMap<SourceId, f32>, ) -> BatchPrefetcher<SampleBatch>
Spawn a background prefetcher for weighted pair batches.
Sourcepub fn prefetch_text_batches(
self: Arc<Self>,
split: SplitLabel,
capacity: usize,
) -> BatchPrefetcher<TextBatch>
pub fn prefetch_text_batches( self: Arc<Self>, split: SplitLabel, capacity: usize, ) -> BatchPrefetcher<TextBatch>
Spawn a background prefetcher for text batches.
Sourcepub fn prefetch_text_batches_with_weights(
self: Arc<Self>,
split: SplitLabel,
capacity: usize,
weights: HashMap<SourceId, f32>,
) -> BatchPrefetcher<TextBatch>
pub fn prefetch_text_batches_with_weights( self: Arc<Self>, split: SplitLabel, capacity: usize, weights: HashMap<SourceId, f32>, ) -> BatchPrefetcher<TextBatch>
Spawn a background prefetcher for weighted text batches.
Sourcepub fn text_recipes(&self) -> Vec<TextRecipe>
pub fn text_recipes(&self) -> Vec<TextRecipe>
Return the currently active text recipes.
Sourcepub fn register_source(
&self,
source: Box<dyn DataSource + 'static>,
) -> Result<(), SamplerError>
pub fn register_source( &self, source: Box<dyn DataSource + 'static>, ) -> Result<(), SamplerError>
Register a data source for ingestion and sampling.
Returns an error if the source’s id() matches the reserved __*__
pattern used for internal synthetic/metadata source identifiers.
Sourcepub fn set_epoch(&self, epoch: u64) -> Result<(), SamplerError>
pub fn set_epoch(&self, epoch: u64) -> Result<(), SamplerError>
Force sampler epoch to epoch (advanced deterministic replay control).
Sourcepub fn save_sampler_state(
&self,
save_to: Option<&Path>,
) -> Result<(), SamplerError>
pub fn save_sampler_state( &self, save_to: Option<&Path>, ) -> Result<(), SamplerError>
Persist sampler and split runtime state for restart-resume.
When save_to is Some(path), current persisted runtime state is also
mirrored to path when supported by the split-store backend.
Trait Implementations§
Source§impl<S: SplitStore + EpochStateStore + SamplerStateStore + 'static> Sampler for TripletSampler<S>
impl<S: SplitStore + EpochStateStore + SamplerStateStore + 'static> Sampler for TripletSampler<S>
Source§fn next_pair_batch(
&self,
split: SplitLabel,
) -> Result<SampleBatch, SamplerError>
fn next_pair_batch( &self, split: SplitLabel, ) -> Result<SampleBatch, SamplerError>
Source§fn next_pair_batch_with_weights(
&self,
split: SplitLabel,
weights: &HashMap<SourceId, f32>,
) -> Result<SampleBatch, SamplerError>
fn next_pair_batch_with_weights( &self, split: SplitLabel, weights: &HashMap<SourceId, f32>, ) -> Result<SampleBatch, SamplerError>
Source§fn next_text_batch(&self, split: SplitLabel) -> Result<TextBatch, SamplerError>
fn next_text_batch(&self, split: SplitLabel) -> Result<TextBatch, SamplerError>
Source§fn next_text_batch_with_weights(
&self,
split: SplitLabel,
weights: &HashMap<SourceId, f32>,
) -> Result<TextBatch, SamplerError>
fn next_text_batch_with_weights( &self, split: SplitLabel, weights: &HashMap<SourceId, f32>, ) -> Result<TextBatch, SamplerError>
Source§fn next_triplet_batch(
&self,
split: SplitLabel,
) -> Result<TripletBatch, SamplerError>
fn next_triplet_batch( &self, split: SplitLabel, ) -> Result<TripletBatch, SamplerError>
Source§fn next_triplet_batch_with_weights(
&self,
split: SplitLabel,
weights: &HashMap<SourceId, f32>,
) -> Result<TripletBatch, SamplerError>
fn next_triplet_batch_with_weights( &self, split: SplitLabel, weights: &HashMap<SourceId, f32>, ) -> Result<TripletBatch, SamplerError>
Auto Trait Implementations§
impl<S> !Freeze for TripletSampler<S>
impl<S> RefUnwindSafe for TripletSampler<S>
impl<S> Send for TripletSampler<S>
impl<S> Sync for TripletSampler<S>
impl<S> Unpin for TripletSampler<S>
impl<S> UnsafeUnpin for TripletSampler<S>
impl<S> UnwindSafe for TripletSampler<S>
Blanket Implementations§
Source§impl<T> BorrowMut<T> for Twhere
T: ?Sized,
impl<T> BorrowMut<T> for Twhere
T: ?Sized,
Source§fn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
Source§impl<T> Instrument for T
impl<T> Instrument for T
Source§fn instrument(self, span: Span) -> Instrumented<Self>
fn instrument(self, span: Span) -> Instrumented<Self>
Source§fn in_current_span(self) -> Instrumented<Self>
fn in_current_span(self) -> Instrumented<Self>
Source§impl<T> IntoEither for T
impl<T> IntoEither for T
Source§fn into_either(self, into_left: bool) -> Either<Self, Self>
fn into_either(self, into_left: bool) -> Either<Self, Self>
self into a Left variant of Either<Self, Self>
if into_left is true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read moreSource§fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
self into a Left variant of Either<Self, Self>
if into_left(&self) returns true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read more