synth_claw/datasets/
mod.rs1pub mod hf;
2mod local;
3
4pub use hf::{HuggingFaceSource, Split};
5pub use local::LocalSource;
6
7use crate::Result;
8use serde_json::Value;
9
10pub struct Record {
11 pub data: Value,
12 pub index: usize,
13}
14
15pub trait DataSource: Send + Sync {
16 fn info(&self) -> &DatasetInfo;
17 fn load(&mut self, sample: Option<usize>) -> Result<Vec<Record>>;
18}
19
20#[derive(Default)]
21pub struct DatasetInfo {
22 pub name: String,
23 pub description: Option<String>,
24 pub num_rows: usize,
25 pub columns: Vec<String>,
26 pub splits: Vec<Split>,
27}