use rand::seq::SliceRandom;
use rand::{rngs::StdRng, SeedableRng};
use std::io::{self, BufRead};
pub trait Sampler: Send + Sync {
fn name(&self) -> &str;
fn sample(
&self,
filepath: &str,
) -> Result<Vec<String>, Box<dyn std::error::Error + Send + Sync>>;
}
#[derive(Debug, Clone)]
pub struct LatestDataSampler {
pub last_index: i32,
}
impl Sampler for LatestDataSampler {
fn name(&self) -> &str {
"Latest"
}
fn sample(
&self,
filepath: &str,
) -> Result<Vec<String>, Box<dyn std::error::Error + Send + Sync>> {
let file = std::fs::File::open(filepath)?;
let reader = io::BufReader::new(file);
let data: Vec<String> = reader
.lines()
.skip(self.last_index as usize)
.collect::<Result<Vec<_>, _>>()?;
Ok(data)
}
}
#[derive(Debug, Clone)]
pub struct LatestAndRandomSampler {
pub last_index: i32,
pub sample_size: i32,
pub rng: StdRng,
}
impl LatestAndRandomSampler {
pub fn new(last_index: i32, sample_size: i32, seed: u64) -> Self {
Self {
last_index,
sample_size,
rng: StdRng::seed_from_u64(seed),
}
}
}
impl Sampler for LatestAndRandomSampler {
fn name(&self) -> &str {
"LatestWithRandom"
}
fn sample(
&self,
filepath: &str,
) -> Result<Vec<String>, Box<dyn std::error::Error + Send + Sync>> {
let file = std::fs::File::open(filepath)?;
let reader = io::BufReader::new(file);
let all_data: Vec<String> = reader.lines().collect::<Result<Vec<_>, _>>()?;
let (rest, latest) = all_data.split_at(self.last_index as usize);
let mut rest_vec = rest.to_vec();
let mut rng_clone = self.rng.clone();
rest_vec.shuffle(&mut rng_clone);
let random_sample = rest_vec
.into_iter()
.take(self.sample_size as usize)
.collect::<Vec<_>>();
let mut combined_data = latest.to_vec();
combined_data.extend(random_sample);
Ok(combined_data)
}
}
#[derive(Debug, Clone)]
pub struct RandomSampler {
pub sample_size: i32,
pub rng: StdRng,
}
impl RandomSampler {
pub fn new(sample_size: i32, seed: u64) -> Self {
Self {
sample_size,
rng: StdRng::seed_from_u64(seed),
}
}
}
impl Sampler for RandomSampler {
fn name(&self) -> &str {
"Random"
}
fn sample(
&self,
filepath: &str,
) -> Result<Vec<String>, Box<dyn std::error::Error + Send + Sync>> {
let file = std::fs::File::open(filepath)?;
let reader = io::BufReader::new(file);
let mut data: Vec<String> = reader.lines().collect::<Result<Vec<_>, _>>()?;
let mut rng_clone = self.rng.clone();
data.shuffle(&mut rng_clone);
let sampled_data = data
.into_iter()
.take(self.sample_size as usize)
.collect::<Vec<_>>();
Ok(sampled_data)
}
}