orign 0.2.3

A globally distributed container orchestrator
Documentation
use rand::seq::SliceRandom;
use rand::{rngs::StdRng, SeedableRng};
use std::io::{self, BufRead};

// Define the Sampler trait
pub trait Sampler: Send + Sync {
    fn name(&self) -> &str;
    fn sample(
        &self,
        filepath: &str,
    ) -> Result<Vec<String>, Box<dyn std::error::Error + Send + Sync>>;
}

// LatestDataSampler struct and its implementation
#[derive(Debug, Clone)]
pub struct LatestDataSampler {
    pub last_index: i32,
}

impl Sampler for LatestDataSampler {
    fn name(&self) -> &str {
        "Latest"
    }

    fn sample(
        &self,
        filepath: &str,
    ) -> Result<Vec<String>, Box<dyn std::error::Error + Send + Sync>> {
        // Open the file
        let file = std::fs::File::open(filepath)?;
        let reader = io::BufReader::new(file);

        // Collect lines from last_index to the end
        let data: Vec<String> = reader
            .lines()
            .skip(self.last_index as usize)
            .collect::<Result<Vec<_>, _>>()?;

        Ok(data)
    }
}

// LatestAndRandomSampler struct and its implementation
#[derive(Debug, Clone)]
pub struct LatestAndRandomSampler {
    pub last_index: i32,
    pub sample_size: i32,
    pub rng: StdRng,
}

impl LatestAndRandomSampler {
    pub fn new(last_index: i32, sample_size: i32, seed: u64) -> Self {
        Self {
            last_index,
            sample_size,
            rng: StdRng::seed_from_u64(seed),
        }
    }
}

impl Sampler for LatestAndRandomSampler {
    fn name(&self) -> &str {
        "LatestWithRandom"
    }

    fn sample(
        &self,
        filepath: &str,
    ) -> Result<Vec<String>, Box<dyn std::error::Error + Send + Sync>> {
        // Open the file
        let file = std::fs::File::open(filepath)?;
        let reader = io::BufReader::new(file);

        // Read all lines
        let all_data: Vec<String> = reader.lines().collect::<Result<Vec<_>, _>>()?;

        // Split data into latest and rest
        let (rest, latest) = all_data.split_at(self.last_index as usize);

        // Randomly sample from rest
        let mut rest_vec = rest.to_vec();
        // Clone the RNG to avoid mutating self
        let mut rng_clone = self.rng.clone();
        rest_vec.shuffle(&mut rng_clone);
        let random_sample = rest_vec
            .into_iter()
            .take(self.sample_size as usize)
            .collect::<Vec<_>>();

        // Combine latest data with random sample
        let mut combined_data = latest.to_vec();
        combined_data.extend(random_sample);

        Ok(combined_data)
    }
}

// RandomSampler struct and its implementation
#[derive(Debug, Clone)]
pub struct RandomSampler {
    pub sample_size: i32,
    pub rng: StdRng,
}

impl RandomSampler {
    pub fn new(sample_size: i32, seed: u64) -> Self {
        Self {
            sample_size,
            rng: StdRng::seed_from_u64(seed),
        }
    }
}

impl Sampler for RandomSampler {
    fn name(&self) -> &str {
        "Random"
    }

    fn sample(
        &self,
        filepath: &str,
    ) -> Result<Vec<String>, Box<dyn std::error::Error + Send + Sync>> {
        // Open the file
        let file = std::fs::File::open(filepath)?;
        let reader = io::BufReader::new(file);

        // Read all lines
        let mut data: Vec<String> = reader.lines().collect::<Result<Vec<_>, _>>()?;

        // Randomly sample from all data
        let mut rng_clone = self.rng.clone();
        data.shuffle(&mut rng_clone);
        let sampled_data = data
            .into_iter()
            .take(self.sample_size as usize)
            .collect::<Vec<_>>();

        Ok(sampled_data)
    }
}