Skip to main content

synth_claw/datasets/
mod.rs

1pub mod hf;
2mod local;
3
4pub use hf::{HuggingFaceSource, Split};
5pub use local::LocalSource;
6
7use crate::Result;
8use serde_json::Value;
9
10pub struct Record {
11    pub data: Value,
12    pub index: usize,
13}
14
15pub trait DataSource: Send + Sync {
16    fn info(&self) -> &DatasetInfo;
17    fn load(&mut self, sample: Option<usize>) -> Result<Vec<Record>>;
18}
19
20#[derive(Default)]
21pub struct DatasetInfo {
22    pub name: String,
23    pub description: Option<String>,
24    pub num_rows: usize,
25    pub columns: Vec<String>,
26    pub splits: Vec<Split>,
27}