pub trait Dataset: Send + Sync {
// Required methods
fn len(&self) -> usize;
fn get(&self, index: usize) -> Option<RecordBatch>;
fn schema(&self) -> SchemaRef;
fn iter(&self) -> Box<dyn Iterator<Item = RecordBatch> + Send + '_>;
fn num_batches(&self) -> usize;
fn get_batch(&self, index: usize) -> Option<&RecordBatch>;
// Provided method
fn is_empty(&self) -> bool { ... }
}Expand description
A dataset that can be iterated over.
Datasets provide access to tabular data stored as Arrow RecordBatches. All implementations must be thread-safe (Send + Sync).
Required Methods§
Sourcefn get(&self, index: usize) -> Option<RecordBatch>
fn get(&self, index: usize) -> Option<RecordBatch>
Returns a single row as a RecordBatch with one row.
Returns None if the index is out of bounds.
Sourcefn iter(&self) -> Box<dyn Iterator<Item = RecordBatch> + Send + '_>
fn iter(&self) -> Box<dyn Iterator<Item = RecordBatch> + Send + '_>
Returns an iterator over all RecordBatches in the dataset.
Sourcefn num_batches(&self) -> usize
fn num_batches(&self) -> usize
Returns the number of batches in the dataset.
Sourcefn get_batch(&self, index: usize) -> Option<&RecordBatch>
fn get_batch(&self, index: usize) -> Option<&RecordBatch>
Returns a specific batch by index.