Skip to main content

Dataset

Trait Dataset 

Source
pub trait Dataset:
    Send
    + Sync
    + Debug {
    // Required methods
    fn origin(&self) -> &Path;
    fn arrow_schema(&self) -> SchemaRef;
    fn physical_schema_debug(
        &self,
        projection: Option<&[String]>,
    ) -> Result<String>;
    fn count_rows<'life0, 'async_trait>(
        &'life0 self,
    ) -> Pin<Box<dyn Future<Output = Result<u64>> + Send + 'async_trait>>
       where Self: 'async_trait,
             'life0: 'async_trait;
    fn scan<'life0, 'life1, 'async_trait>(
        &'life0 self,
        projection: Option<&'life1 [String]>,
    ) -> Pin<Box<dyn Future<Output = Result<BatchStream>> + Send + 'async_trait>>
       where Self: 'async_trait,
             'life0: 'async_trait,
             'life1: 'async_trait;
    fn take<'life0, 'life1, 'life2, 'async_trait>(
        &'life0 self,
        indices: &'life1 [u64],
        projection: Option<&'life2 [String]>,
    ) -> Pin<Box<dyn Future<Output = Result<RecordBatch>> + Send + 'async_trait>>
       where Self: 'async_trait,
             'life0: 'async_trait,
             'life1: 'async_trait,
             'life2: 'async_trait;

    // Provided method
    fn lance(&self) -> Option<&dyn LanceCapabilities> { ... }
}
Expand description

Format-agnostic dataset view used by every command.

Input-format adapters (Lance today, potentially others in the future) implement this trait. Commands are written against the trait only — they never see format-specific types.

Required Methods§

Source

fn origin(&self) -> &Path

Path or URI the dataset was opened from.

Source

fn arrow_schema(&self) -> SchemaRef

Logical arrow schema of the dataset.

Source

fn physical_schema_debug(&self, projection: Option<&[String]>) -> Result<String>

Pretty-printed format-native schema (for schema --type physical), optionally projected to a subset of columns.

Source

fn count_rows<'life0, 'async_trait>( &'life0 self, ) -> Pin<Box<dyn Future<Output = Result<u64>> + Send + 'async_trait>>
where Self: 'async_trait, 'life0: 'async_trait,

Total row count.

Source

fn scan<'life0, 'life1, 'async_trait>( &'life0 self, projection: Option<&'life1 [String]>, ) -> Pin<Box<dyn Future<Output = Result<BatchStream>> + Send + 'async_trait>>
where Self: 'async_trait, 'life0: 'async_trait, 'life1: 'async_trait,

Stream all rows, optionally projected to the given columns.

Source

fn take<'life0, 'life1, 'life2, 'async_trait>( &'life0 self, indices: &'life1 [u64], projection: Option<&'life2 [String]>, ) -> Pin<Box<dyn Future<Output = Result<RecordBatch>> + Send + 'async_trait>>
where Self: 'async_trait, 'life0: 'async_trait, 'life1: 'async_trait, 'life2: 'async_trait,

Materialise a RecordBatch containing only the rows at the given indices, in the order given. indices must all be < count_rows().

Provided Methods§

Source

fn lance(&self) -> Option<&dyn LanceCapabilities>

Returns Some(...) when this dataset is backed by a format that supports Lance-specific operations (versions, branches, indices). The default None covers any future format that doesn’t.

Implementors§