Skip to main content

SamplingStrategy

Enum SamplingStrategy 

Source
pub enum SamplingStrategy {
    None,
    Random {
        size: usize,
    },
    Reservoir {
        size: usize,
    },
    Stratified {
        key_columns: Vec<String>,
        samples_per_stratum: usize,
    },
    Progressive {
        initial_size: usize,
        confidence_level: f64,
        max_size: usize,
    },
    Systematic {
        interval: usize,
    },
    Importance {
        weight_threshold: f64,
    },
    MultiStage {
        stages: Vec<SamplingStrategy>,
    },
}

Variants§

§

None

No sampling - analyze all data

§

Random

Simple random sampling with fixed size

Fields

§size: usize
§

Reservoir

Reservoir sampling for streaming data

Fields

§size: usize
§

Stratified

Stratified sampling balanced by categories

Fields

§key_columns: Vec<String>
§samples_per_stratum: usize
§

Progressive

Progressive sampling - stop when confidence is reached

Fields

§initial_size: usize
§confidence_level: f64
§max_size: usize
§

Systematic

Systematic sampling (every Nth row)

Fields

§interval: usize
§

Importance

Importance sampling for anomaly detection

Fields

§weight_threshold: f64
§

MultiStage

Multi-stage sampling (combination of strategies)

Fields

Implementations§

Source§

impl SamplingStrategy

Source

pub fn adaptive(total_rows: Option<usize>, file_size_mb: f64) -> Self

Create adaptive strategy based on data characteristics

Source

pub fn stratified(key_columns: Vec<String>, samples_per_stratum: usize) -> Self

Create stratified sampling strategy

Source

pub fn importance(weight_threshold: f64) -> Self

Create importance sampling strategy

Source

pub fn should_include(&self, row_index: usize, total_processed: usize) -> bool

Check if row should be included in sample

Source

pub fn should_include_with_state( &self, row_index: usize, total_processed: usize, state: &mut SamplingState, row_data: Option<&HashMap<String, String>>, ) -> bool

Check if row should be included with state tracking

Source

pub fn target_sample_size(&self) -> Option<usize>

Source

pub fn description(&self) -> String

Get description of the sampling strategy

Trait Implementations§

Source§

impl Clone for SamplingStrategy

Source§

fn clone(&self) -> SamplingStrategy

Returns a duplicate of the value. Read more
1.0.0 (const: unstable) · Source§

fn clone_from(&mut self, source: &Self)

Performs copy-assignment from source. Read more
Source§

impl Debug for SamplingStrategy

Source§

fn fmt(&self, f: &mut Formatter<'_>) -> Result

Formats the value using the given formatter. Read more

Auto Trait Implementations§

Blanket Implementations§

Source§

impl<T> Any for T
where T: 'static + ?Sized,

Source§

fn type_id(&self) -> TypeId

Gets the TypeId of self. Read more
Source§

impl<T> Borrow<T> for T
where T: ?Sized,

Source§

fn borrow(&self) -> &T

Immutably borrows from an owned value. Read more
Source§

impl<T> BorrowMut<T> for T
where T: ?Sized,

Source§

fn borrow_mut(&mut self) -> &mut T

Mutably borrows from an owned value. Read more
Source§

impl<T> CloneToUninit for T
where T: Clone,

Source§

unsafe fn clone_to_uninit(&self, dest: *mut u8)

🔬This is a nightly-only experimental API. (clone_to_uninit)
Performs copy-assignment from self to dest. Read more
Source§

impl<T> From<T> for T

Source§

fn from(t: T) -> T

Returns the argument unchanged.

Source§

impl<T, U> Into<U> for T
where U: From<T>,

Source§

fn into(self) -> U

Calls U::from(self).

That is, this conversion is whatever the implementation of From<T> for U chooses to do.

Source§

impl<T> ToOwned for T
where T: Clone,

Source§

type Owned = T

The resulting type after obtaining ownership.
Source§

fn to_owned(&self) -> T

Creates owned data from borrowed data, usually by cloning. Read more
Source§

fn clone_into(&self, target: &mut T)

Uses borrowed data to replace owned data, usually by cloning. Read more
Source§

impl<T, U> TryFrom<U> for T
where U: Into<T>,

Source§

type Error = Infallible

The type returned in the event of a conversion error.
Source§

fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>

Performs the conversion.
Source§

impl<T, U> TryInto<U> for T
where U: TryFrom<T>,

Source§

type Error = <U as TryFrom<T>>::Error

The type returned in the event of a conversion error.
Source§

fn try_into(self) -> Result<U, <U as TryFrom<T>>::Error>

Performs the conversion.
Source§

impl<V, T> VZip<V> for T
where V: MultiLane<T>,

Source§

fn vzip(self) -> V