Struct Dataset

Source

pub struct Dataset {
    pub object_store: Arc<ObjectStore>,
    pub manifest: Arc<Manifest>,
    pub refs: Refs,
    /* private fields */
}

Expand description

Lance Dataset

Fields§

§object_store: Arc<ObjectStore>§manifest: Arc<Manifest>§refs: Refs

Implementations§

Source §

impl Dataset

Source

pub async fn open(uri: &str) -> Result<Self>

Open an existing dataset.

pub async fn checkout_version(&self, version: impl Into<Ref>) -> Result<Self>

Check out a dataset version with a ref

Source

pub fn tags(&self) -> Tags<'_>

Source

pub fn branches(&self) -> Branches<'_>

Source

pub async fn checkout_latest(&mut self) -> Result<()>

Check out the latest version of the dataset

Source

pub async fn checkout_branch(&self, branch: &str) -> Result<Self>

Check out the latest version of the branch

Source

pub async fn create_branch( &mut self, branch: &str, version: impl Into<Ref>, store_params: Option<ObjectStoreParams>, ) -> Result<Self>

This is a two-phase operation:

Create the branch dataset by shallow cloning.
Create the branch metadata (a.k.a. BranchContents).

These two phases are not atomic. We consider BranchContents as the source of truth for the branch.

The cleanup procedure should:

Clean up zombie branch datasets that have no related BranchContents.
Delete broken BranchContents entries that have no related branch dataset.

If create_branch stops at phase 1, it may leave a zombie branch dataset, which can be cleaned up later. Such a zombie dataset may cause a branch creation failure if we use the same name to create_branch. In that case, you need to call force_delete_branch to interactively clean up the zombie dataset.

Source

pub async fn delete_branch(&mut self, branch: &str) -> Result<()>

Source

pub async fn force_delete_branch(&mut self, branch: &str) -> Result<()>

Delete the branch even if the BranchContents is not found. This could be useful when we have zombie branches and want to clean them up immediately.

Source

pub async fn list_branches(&self) -> Result<HashMap<String, BranchContents>>

Source

pub async fn write( batches: impl RecordBatchReader + Send + 'static, dest: impl Into<WriteDestination<'_>>, params: Option<WriteParams>, ) -> Result<Self>

Write to or Create a Dataset with a stream of RecordBatchs.

dest can be a &str, object_store::path::Path or Arc<Dataset>.

Returns the newly created Dataset. Or Returns Error if the dataset already exists.

Source

pub async fn write_into_namespace( batches: impl RecordBatchReader + Send + 'static, namespace: Arc<dyn LanceNamespace>, table_id: Vec<String>, params: Option<WriteParams>, ) -> Result<Self>

Write into a namespace-managed table with automatic credential vending.

For CREATE mode, calls create_empty_table() to initialize the table. For other modes, calls describe_table() and opens dataset with namespace credentials.

§Arguments

batches - The record batches to write
namespace - The namespace to use for table management
table_id - The table identifier
params - Write parameters

Source

pub async fn append( &mut self, batches: impl RecordBatchReader + Send + 'static, params: Option<WriteParams>, ) -> Result<()>

Append to existing Dataset with a stream of RecordBatchs

Returns void result or Returns Error

Source

pub fn uri(&self) -> &str

Get the fully qualified URI of this dataset.

Source

pub fn branch_location(&self) -> BranchLocation

Source

pub fn find_branch_location(&self, branch_name: &str) -> Result<BranchLocation>

Source

pub fn manifest(&self) -> &Manifest

Get the full manifest of the dataset version.

Source

pub fn manifest_location(&self) -> &ManifestLocation

Source

pub fn delta(&self) -> DatasetDeltaBuilder

Create a delta::DatasetDeltaBuilder to explore changes between dataset versions.

§Example

let delta = dataset.delta()
    .compared_against_version(5)
    .build()?;
let inserted = delta.get_inserted_rows().await?;

Source

pub async fn latest_manifest(&self) -> Result<(Arc<Manifest>, ManifestLocation)>

Source

pub async fn read_transaction(&self) -> Result<Option<Transaction>>

Read the transaction file for this version of the dataset.

If there was no transaction file written for this version of the dataset then this will return None.

Source

pub async fn read_transaction_by_version( &self, version: u64, ) -> Result<Option<Transaction>>

Read the transaction file for this version of the dataset.

If there was no transaction file written for this version of the dataset then this will return None.

Source

pub async fn get_transactions( &self, recent_transactions: usize, ) -> Result<Vec<Option<Transaction>>>

List transactions for the dataset, up to a maximum number.

This method iterates through dataset versions, starting from the current version, and collects the transaction for each version. It stops when either recent_transactions is reached or there are no more versions.

§Arguments

recent_transactions - Maximum number of transactions to return

§Returns

A vector of optional transactions. Each element corresponds to a version, and may be None if no transaction file exists for that version.

Source

pub async fn restore(&mut self) -> Result<()>

Restore the currently checked out version of the dataset as the latest version.

Source

pub fn cleanup_old_versions( &self, older_than: Duration, delete_unverified: Option<bool>, error_if_tagged_old_versions: Option<bool>, ) -> BoxFuture<'_, Result<RemovalStats>>

Removes old versions of the dataset from disk

This function will remove all versions of the dataset that are older than the provided timestamp. This function will not remove the current version of the dataset.

Once a version is removed it can no longer be checked out or restored. Any data unique to that version will be lost.

§Arguments

older_than - Versions older than this will be deleted.
delete_unverified - If false (the default) then files will only be deleted if they are listed in at least one manifest. Otherwise these files will be kept since they cannot be distinguished from an in-progress transaction. Set to true to delete these files if you are sure there are no other in-progress dataset operations.

§Returns

RemovalStats - Statistics about the removal operation

Source

pub fn cleanup_with_policy( &self, policy: CleanupPolicy, ) -> BoxFuture<'_, Result<RemovalStats>>

Removes old versions of the dataset from storage

This function will remove all versions of the dataset that satisfies the given policy. This function will not remove the current version of the dataset.

Once a version is removed it can no longer be checked out or restored. Any data unique to that version will be lost.

§Arguments

policy - CleanupPolicy determines the behaviour of cleanup.

§Returns

RemovalStats - Statistics about the removal operation

Source

pub async fn commit( dest: impl Into<WriteDestination<'_>>, operation: Operation, read_version: Option<u64>, store_params: Option<ObjectStoreParams>, commit_handler: Option<Arc<dyn CommitHandler>>, session: Arc<Session>, enable_v2_manifest_paths: bool, ) -> Result<Self>

Commit changes to the dataset

This operation is not needed if you are using append/write/delete to manipulate the dataset. It is used to commit changes to the dataset that are made externally. For example, a bulk import tool may import large amounts of new data and write the appropriate lance files directly instead of using the write function.

This method can be used to commit this change to the dataset’s manifest. This method will not verify that the provided fragments exist and correct, that is the caller’s responsibility. Some validation can be performed using the function crate::dataset::transaction::validate_operation.

If this commit is a change to an existing dataset then it will often need to be based on an existing version of the dataset. For example, if this change is a delete operation then the caller will have read in the existing data (at some version) to determine which fragments need to be deleted. The base version that the caller used should be supplied as the read_version parameter. Some operations (e.g. Overwrite) do not depend on a previous version and read_version can be None. An error will be returned if the read_version is needed for an operation and it is not specified.

All operations except Overwrite will fail if the dataset does not already exist.

§Arguments

base_uri - The base URI of the dataset
operation - A description of the change to commit
read_version - The version of the dataset that this change is based on
store_params Parameters controlling object store access to the manifest
enable_v2_manifest_paths: If set to true, and this is a new dataset, uses the new v2 manifest paths. These allow constant-time lookups for the latest manifest on object storage. This parameter has no effect on existing datasets. To migrate an existing dataset, use the Self::migrate_manifest_paths_v2 method. WARNING: turning this on will make the dataset unreadable for older versions of Lance (prior to 0.17.0). Default is False.

Source

pub async fn commit_detached( dest: impl Into<WriteDestination<'_>>, operation: Operation, read_version: Option<u64>, store_params: Option<ObjectStoreParams>, commit_handler: Option<Arc<dyn CommitHandler>>, session: Arc<Session>, enable_v2_manifest_paths: bool, ) -> Result<Self>

Commits changes exactly the same as Self::commit but the commit will not be associated with the dataset lineage.

The commit will not show up in the dataset’s history and will never be the latest version of the dataset.

This can be used to stage changes or to handle “secondary” datasets whose lineage is tracked elsewhere.

Source

pub fn scan(&self) -> Scanner

Create a Scanner to scan the dataset.

Source

pub async fn count_rows(&self, filter: Option<String>) -> Result<usize>

Count the number of rows in the dataset.

It offers a fast path of counting rows by just computing via metadata.

Source

pub async fn take( &self, row_indices: &[u64], projection: impl Into<ProjectionRequest>, ) -> Result<RecordBatch>

Take rows by indices.

Source

pub async fn take_rows( &self, row_ids: &[u64], projection: impl Into<ProjectionRequest>, ) -> Result<RecordBatch>

Take Rows by the internal ROW ids.

In Lance format, each row has a unique u64 id, which is used to identify the row globally.

let schema = dataset.schema().clone();
let row_ids = vec![0, 4, 7];
let rows = dataset.take_rows(&row_ids, schema).await.unwrap();

// We can have more fine-grained control over the projection, i.e., SQL projection.
let projection = ProjectionRequest::from_sql([("identity", "id * 2")]);
let rows = dataset.take_rows(&row_ids, projection).await.unwrap();

Source

pub fn take_builder( self: &Arc<Self>, row_ids: &[u64], projection: impl Into<ProjectionRequest>, ) -> Result<TakeBuilder>

Source

pub async fn take_blobs( self: &Arc<Self>, row_ids: &[u64], column: impl AsRef<str>, ) -> Result<Vec<BlobFile>>

Take BlobFile by row ids (row address).

Source

pub async fn take_blobs_by_addresses( self: &Arc<Self>, row_addrs: &[u64], column: impl AsRef<str>, ) -> Result<Vec<BlobFile>>

Take BlobFile by row addresses.

Row addresses are u64 values encoding (fragment_id << 32) | row_offset. Use this method when you already have row addresses, for example from a scan with with_row_address(). For row IDs (stable identifiers), use Self::take_blobs. For row indices (offsets), use Self::take_blobs_by_indices.

Source

pub async fn take_blobs_by_indices( self: &Arc<Self>, row_indices: &[u64], column: impl AsRef<str>, ) -> Result<Vec<BlobFile>>

Take BlobFile by row indices (offsets in the dataset).

Source

pub fn take_scan( &self, row_ranges: Pin<Box<dyn Stream<Item = Result<Range<u64>>> + Send>>, projection: Arc<Schema>, batch_readahead: usize, ) -> DatasetRecordBatchStream

Get a stream of batches based on iterator of ranges of row numbers.

This is an experimental API. It may change at any time.

Source

pub async fn delete(&mut self, predicate: &str) -> Result<()>

Delete rows based on a predicate.

Source

pub async fn add_bases( self: &Arc<Self>, new_bases: Vec<BasePath>, transaction_properties: Option<HashMap<String, String>>, ) -> Result<Self>

Add new base paths to the dataset.

This method allows you to register additional storage locations (buckets) that can be used for future data writes. The base paths are added to the dataset’s manifest and can be referenced by name in subsequent write operations.

§Arguments

new_bases - A vector of lance_table::format::BasePath objects representing the new storage locations to add. Each base path should have a unique name and path.

§Returns

Returns a new Dataset instance with the updated manifest containing the new base paths.

Source

pub async fn count_deleted_rows(&self) -> Result<usize>

Source

pub fn object_store(&self) -> &ObjectStore

Source

pub fn storage_options(&self) -> Option<&HashMap<String, String>>

👎Deprecated since 0.25.0: Use initial_storage_options() instead

Returns the initial storage options used when opening this dataset, if any.

This returns the static initial options without triggering any refresh. For the latest refreshed options, use Self::latest_storage_options.

Source

pub fn initial_storage_options(&self) -> Option<&HashMap<String, String>>

Returns the initial storage options without triggering any refresh.

For the latest refreshed options, use Self::latest_storage_options.

Source

pub fn storage_options_provider( &self, ) -> Option<Arc<dyn StorageOptionsProvider>>

Returns the storage options provider used when opening this dataset, if any.

Source

pub fn storage_options_accessor(&self) -> Option<Arc<StorageOptionsAccessor>>

Returns the unified storage options accessor for this dataset, if any.

The accessor handles both static and dynamic storage options with automatic caching and refresh. Use StorageOptionsAccessor::get_storage_options to get the latest options.

Source

pub async fn latest_storage_options(&self) -> Result<Option<StorageOptions>>

Returns the latest (possibly refreshed) storage options.

If a dynamic storage options provider is configured, this will return the cached options if still valid, or fetch fresh options if expired.

For the initial static options without refresh, use Self::storage_options.

§Returns

Ok(Some(options)) - Storage options are available (static or refreshed)
Ok(None) - No storage options were configured for this dataset
Err(...) - Error occurred while fetching/refreshing options from provider

Source

pub async fn index_cache_entry_count(&self) -> usize

Get the number of entries currently in the index cache.

Source

pub async fn index_cache_hit_rate(&self) -> f32

Get cache hit ratio.

Source

pub fn cache_size_bytes(&self) -> u64

Source

pub async fn versions(&self) -> Result<Vec<Version>>

Get all versions.

Source

pub async fn latest_version_id(&self) -> Result<u64>

Get the latest version of the dataset This is meant to be a fast path for checking if a dataset has changed. This is why we don’t return the full version struct.

Source

pub fn count_fragments(&self) -> usize

Source

pub fn schema(&self) -> &Schema

Get the schema of the dataset

Source

pub fn empty_projection(self: &Arc<Self>) -> Projection

Similar to Self::schema, but only returns fields that are not marked as blob columns Creates a new empty projection into the dataset schema

Source

pub fn full_projection(self: &Arc<Self>) -> Projection

Creates a projection that includes all columns in the dataset

Source

pub fn get_fragments(&self) -> Vec<FileFragment>

Get fragments.

If filter is provided, only fragments with the given name will be returned.

Source

pub fn get_fragment(&self, fragment_id: usize) -> Option<FileFragment>

Source

pub fn fragments(&self) -> &Arc<Vec<Fragment>>

Source

pub fn get_frags_from_ordered_ids( &self, ordered_ids: &[u32], ) -> Vec<Option<FileFragment>>

Source

pub async fn num_small_files(&self, max_rows_per_group: usize) -> usize

Gets the number of files that are so small they don’t even have a full group. These are considered too small because reading many of them is much less efficient than reading a single file because the separate files split up what would otherwise be single IO requests into multiple.

Source

pub async fn validate(&self) -> Result<()>

Source

pub async fn migrate_manifest_paths_v2(&mut self) -> Result<()>

Migrate the dataset to use the new manifest path scheme.

This function will rename all V1 manifests to ManifestNamingScheme::V2. These paths provide more efficient opening of datasets with many versions on object stores.

This function is idempotent, and can be run multiple times without changing the state of the object store.

However, it should not be run while other concurrent operations are happening. And it should also run until completion before resuming other operations.

let mut dataset = Dataset::write(data, "memory://test", None).await.unwrap();
assert_eq!(dataset.manifest_location().naming_scheme, ManifestNamingScheme::V1);

dataset.migrate_manifest_paths_v2().await.unwrap();
assert_eq!(dataset.manifest_location().naming_scheme, ManifestNamingScheme::V2);

Source

pub async fn shallow_clone( &mut self, target_path: &str, version: impl Into<Ref>, store_params: Option<ObjectStoreParams>, ) -> Result<Self>

Shallow clone the target version into a new dataset at target_path. ‘target_path’: the uri string to clone the dataset into. ‘version’: the version cloned from, could be a version number or tag. ‘store_params’: the object store params to use for the new dataset.

Source

pub fn sql(&self, sql: &str) -> SqlQueryBuilder

Run a SQL query against the dataset. The underlying SQL engine is DataFusion. Please refer to the DataFusion documentation for supported SQL syntax.

Source §

impl Dataset

§Schema Evolution

Lance datasets support evolving the schema. Several operations are supported that mirror common SQL operations:

Self::add_columns(): Add new columns to the dataset, similar to ALTER TABLE ADD COLUMN.
Self::drop_columns(): Drop columns from the dataset, similar to ALTER TABLE DROP COLUMN.
Self::alter_columns(): Modify columns in the dataset, changing their name, type, or nullability. Similar to ALTER TABLE ALTER COLUMN.

In addition, one operation is unique to Lance: merge. This operation allows inserting precomputed data into the dataset.

Because these operations change the schema of the dataset, they will conflict with most other concurrent operations. Therefore, they should be performed when no other write operations are being run.

Source

pub async fn add_columns( &mut self, transforms: NewColumnTransform, read_columns: Option<Vec<String>>, batch_size: Option<u32>, ) -> Result<()>

Append new columns to the dataset.

Source

pub async fn alter_columns( &mut self, alterations: &[ColumnAlteration], ) -> Result<()>

Modify columns in the dataset, changing their name, type, or nullability.

If only changing the name or nullability of a column, this is a zero-copy operation and any indices will be preserved. If changing the type of a column, the data for that column will be rewritten and any indices will be dropped. The old column data will not be immediately deleted. To remove it, call optimize::compact_files() and then cleanup::cleanup_old_versions() on the dataset.

Source

pub async fn drop_columns(&mut self, columns: &[&str]) -> Result<()>

Remove columns from the dataset.

This is a metadata-only operation and does not remove the data from the underlying storage. In order to remove the data, you must subsequently call optimize::compact_files() to rewrite the data without the removed columns and then call cleanup::cleanup_old_versions() to remove the old files.

Source

pub async fn drop(&mut self, columns: &[&str]) -> Result<()>

👎Deprecated since 0.9.12: Please use drop_columns instead.

Drop columns from the dataset and return updated dataset. Note that this is a zero-copy operation and column is not physically removed from the dataset. Parameters:

columns: the list of column names to drop.

Source

pub async fn merge( &mut self, stream: impl RecordBatchReader + Send + 'static, left_on: &str, right_on: &str, ) -> Result<()>

Merge this dataset with another arrow Table / Dataset, and returns a new version of dataset.

Parameters:

stream: the stream of RecordBatch to merge.
left_on: the column name to join on the left side (self).
right_on: the column name to join on the right side (stream).

Returns: a new version of dataset.

It performs a left-join on the two datasets.

Source §

impl Dataset

§Dataset metadata APIs

There are four kinds of metadata on datasets:

Schema metadata: metadata about the data itself.
Field metadata: metadata about the dataset itself.
Dataset metadata: metadata about the dataset. For example, this could store a created_at date.
Dataset config: configuration values controlling how engines should manage the dataset. This configures things like auto-cleanup.

You can get

Source

pub fn metadata(&self) -> &HashMap<String, String>

Get dataset metadata.

Source

pub fn config(&self) -> &HashMap<String, String>

Get the dataset config from manifest

Source

pub async fn delete_config_keys(&mut self, delete_keys: &[&str]) -> Result<()>

👎Deprecated: Use the new update_config(values, replace) method - pass None values to delete keys

Delete keys from the config.

Source

pub fn update_metadata( &mut self, values: impl IntoIterator<Item = impl Into<UpdateMapEntry>>, ) -> UpdateMetadataBuilder<'_>

Update table metadata.

Pass None for a value to remove that key.

Use .replace() to replace the entire metadata map instead of merging.

Returns the updated metadata map after the operation.

// Update single key
dataset.update_metadata([("key", "value")]).await?;

// Remove a key
dataset.update_metadata([("to_delete", None)]).await?;

// Clear all metadata
dataset.update_metadata([] as [UpdateMapEntry; 0]).replace().await?;

// Replace full metadata
dataset.update_metadata([("k1", "v1"), ("k2", "v2")]).replace().await?;

Source

pub fn update_config( &mut self, values: impl IntoIterator<Item = impl Into<UpdateMapEntry>>, ) -> UpdateMetadataBuilder<'_>

Update config.

Pass None for a value to remove that key.

Use .replace() to replace the entire config map instead of merging.

Returns the updated config map after the operation.

// Update single key
dataset.update_config([("key", "value")]).await?;

// Remove a key
dataset.update_config([("to_delete", None)]).await?;

// Clear all config
dataset.update_config([] as [UpdateMapEntry; 0]).replace().await?;

// Replace full config
dataset.update_config([("k1", "v1"), ("k2", "v2")]).replace().await?;

Source

pub fn update_schema_metadata( &mut self, values: impl IntoIterator<Item = impl Into<UpdateMapEntry>>, ) -> UpdateMetadataBuilder<'_>

Update schema metadata.

Pass None for a value to remove that key.

Use .replace() to replace the entire schema metadata map instead of merging.

Returns the updated schema metadata map after the operation.

// Update single key
dataset.update_schema_metadata([("key", "value")]).await?;

// Remove a key
dataset.update_schema_metadata([("to_delete", None)]).await?;

// Clear all schema metadata
dataset.update_schema_metadata([] as [UpdateMapEntry; 0]).replace().await?;

// Replace full schema metadata
dataset.update_schema_metadata([("k1", "v1"), ("k2", "v2")]).replace().await?;

Source

pub async fn replace_schema_metadata( &mut self, new_values: impl IntoIterator<Item = (String, String)>, ) -> Result<()>

👎Deprecated: Use the new update_schema_metadata(values).replace() instead

Update schema metadata

Source

pub fn update_field_metadata(&mut self) -> UpdateFieldMetadataBuilder<'_>

Update field metadata

// Update metadata by field path
dataset.update_field_metadata()
    .update("path.to_field", [("key", "value")])?
    .await?;

// Update metadata by field id
dataset.update_field_metadata()
    .update(12, [("key", "value")])?
    .await?;

// Clear field metadata
dataset.update_field_metadata()
    .replace("path.to_field", [] as [UpdateMapEntry; 0])?
    .replace(12, [] as [UpdateMapEntry; 0])?
    .await?;

// Replace field metadata
dataset.update_field_metadata()
    .replace("field_name", [("k1", "v1"), ("k2", "v2")])?
    .await?;

Source

pub async fn replace_field_metadata( &mut self, new_values: impl IntoIterator<Item = (u32, HashMap<String, String>)>, ) -> Result<()>

Update field metadata

Trait Implementations§

Source §

impl Clone for Dataset

Source §

fn clone(&self) -> Dataset

Returns a duplicate of the value. Read more

1.0.0 · Source§

fn clone_from(&mut self, source: &Self)

Performs copy-assignment from source. Read more

Source §

impl DatasetIndexExt for Dataset

Source §

fn create_index_builder<'a>( &'a mut self, columns: &[&str], index_type: IndexType, params: &'a dyn IndexParams, ) -> CreateIndexBuilder<'a>

Create a builder for creating an index on columns.

This returns a builder that can be configured with additional options before awaiting to execute.

§Examples

Create a scalar BTREE index:

let params = ScalarIndexParams::default();
dataset
    .create_index_builder(&["id"], IndexType::BTree, &params)
    .name("id_index".to_string())
    .await?;

Create an empty index that will be populated later:

let params = ScalarIndexParams::default();
dataset
    .create_index_builder(&["category"], IndexType::Bitmap, &params)
    .train(false)  // Create empty index
    .replace(true)  // Replace if exists
    .await?;

Source §

type IndexBuilder<'a> = CreateIndexBuilder<'a>

Source §

fn create_index<'life0, 'life1, 'life2, 'life3, 'async_trait>( &'life0 mut self, columns: &'life1 [&'life2 str], index_type: IndexType, name: Option<String>, params: &'life3 dyn IndexParams, replace: bool, ) -> Pin<Box<dyn Future<Output = Result<()>> + Send + 'async_trait>>
where Self: 'async_trait, 'life0: 'async_trait, 'life1: 'async_trait, 'life2: 'async_trait, 'life3: 'async_trait,

Create indices on columns. Read more

Source §

fn drop_index<'life0, 'life1, 'async_trait>( &'life0 mut self, name: &'life1 str, ) -> Pin<Box<dyn Future<Output = Result<()>> + Send + 'async_trait>>
where Self: 'async_trait, 'life0: 'async_trait, 'life1: 'async_trait,

Drop indices by name. Read more

Source §

fn prewarm_index<'life0, 'life1, 'async_trait>( &'life0 self, name: &'life1 str, ) -> Pin<Box<dyn Future<Output = Result<()>> + Send + 'async_trait>>
where Self: 'async_trait, 'life0: 'async_trait, 'life1: 'async_trait,

Prewarm an index by name. Read more

Source §

fn describe_indices<'a, 'b, 'async_trait>( &'a self, criteria: Option<IndexCriteria<'b>>, ) -> Pin<Box<dyn Future<Output = Result<Vec<Arc<dyn IndexDescription>>>> + Send + 'async_trait>>
where Self: 'async_trait, 'a: 'async_trait, 'b: 'async_trait,

Describes indexes in a dataset Read more

Source §

fn load_indices<'life0, 'async_trait>( &'life0 self, ) -> Pin<Box<dyn Future<Output = Result<Arc<Vec<IndexMetadata>>>> + Send + 'async_trait>>
where Self: 'async_trait, 'life0: 'async_trait,

Read all indices of this Dataset version. Read more

Source §

fn commit_existing_index<'life0, 'life1, 'life2, 'async_trait>( &'life0 mut self, index_name: &'life1 str, column: &'life2 str, index_id: Uuid, ) -> Pin<Box<dyn Future<Output = Result<()>> + Send + 'async_trait>>
where Self: 'async_trait, 'life0: 'async_trait, 'life1: 'async_trait, 'life2: 'async_trait,

Source §

fn load_scalar_index<'a, 'b, 'async_trait>( &'a self, criteria: IndexCriteria<'b>, ) -> Pin<Box<dyn Future<Output = Result<Option<IndexMetadata>>> + Send + 'async_trait>>
where Self: 'async_trait, 'a: 'async_trait, 'b: 'async_trait,

Loads a specific index with the given index name.

Source §

fn optimize_indices<'life0, 'life1, 'async_trait>( &'life0 mut self, options: &'life1 OptimizeOptions, ) -> Pin<Box<dyn Future<Output = Result<()>> + Send + 'async_trait>>
where Self: 'async_trait, 'life0: 'async_trait, 'life1: 'async_trait,

Optimize indices.

Source §

fn index_statistics<'life0, 'life1, 'async_trait>( &'life0 self, index_name: &'life1 str, ) -> Pin<Box<dyn Future<Output = Result<String>> + Send + 'async_trait>>
where Self: 'async_trait, 'life0: 'async_trait, 'life1: 'async_trait,

Find index with a given index_name and return its serialized statistics. Read more

Source §

fn read_index_partition<'life0, 'life1, 'async_trait>( &'life0 self, index_name: &'life1 str, partition_id: usize, with_vector: bool, ) -> Pin<Box<dyn Future<Output = Result<SendableRecordBatchStream>> + Send + 'async_trait>>
where Self: 'async_trait, 'life0: 'async_trait, 'life1: 'async_trait,

Source §

fn load_index<'life0, 'life1, 'async_trait>( &'life0 self, uuid: &'life1 str, ) -> Pin<Box<dyn Future<Output = Result<Option<IndexMetadata>, Error>> + Send + 'async_trait>>
where 'life0: 'async_trait, 'life1: 'async_trait, Self: Sync + 'async_trait,

Loads all the indies of a given UUID. Read more

Source §

fn load_indices_by_name<'life0, 'life1, 'async_trait>( &'life0 self, name: &'life1 str, ) -> Pin<Box<dyn Future<Output = Result<Vec<IndexMetadata>, Error>> + Send + 'async_trait>>
where 'life0: 'async_trait, 'life1: 'async_trait, Self: Sync + 'async_trait,

Loads a specific index with the given index name Read more

Source §

fn load_index_by_name<'life0, 'life1, 'async_trait>( &'life0 self, name: &'life1 str, ) -> Pin<Box<dyn Future<Output = Result<Option<IndexMetadata>, Error>> + Send + 'async_trait>>
where 'life0: 'async_trait, 'life1: 'async_trait, Self: Sync + 'async_trait,

Loads a specific index with the given index name. This function only works for indices that are unique. If there are multiple indices sharing the same name, please use [load_indices_by_name] Read more

Source §

impl DatasetIndexInternalExt for Dataset

Source §

fn open_generic_index<'life0, 'life1, 'life2, 'life3, 'async_trait>( &'life0 self, column: &'life1 str, uuid: &'life2 str, metrics: &'life3 dyn MetricsCollector, ) -> Pin<Box<dyn Future<Output = Result<Arc<dyn Index>>> + Send + 'async_trait>>
where Self: 'async_trait, 'life0: 'async_trait, 'life1: 'async_trait, 'life2: 'async_trait, 'life3: 'async_trait,

Opens an index (scalar or vector) as a generic index

Source §

fn open_scalar_index<'life0, 'life1, 'life2, 'life3, 'async_trait>( &'life0 self, column: &'life1 str, uuid: &'life2 str, metrics: &'life3 dyn MetricsCollector, ) -> Pin<Box<dyn Future<Output = Result<Arc<dyn ScalarIndex>>> + Send + 'async_trait>>
where Self: 'async_trait, 'life0: 'async_trait, 'life1: 'async_trait, 'life2: 'async_trait, 'life3: 'async_trait,

Opens the requested scalar index

Source §

fn open_vector_index<'life0, 'life1, 'life2, 'life3, 'async_trait>( &'life0 self, column: &'life1 str, uuid: &'life2 str, metrics: &'life3 dyn MetricsCollector, ) -> Pin<Box<dyn Future<Output = Result<Arc<dyn VectorIndex>>> + Send + 'async_trait>>
where Self: 'async_trait, 'life0: 'async_trait, 'life1: 'async_trait, 'life2: 'async_trait, 'life3: 'async_trait,

Opens the requested vector index

Source §

fn open_frag_reuse_index<'life0, 'life1, 'async_trait>( &'life0 self, metrics: &'life1 dyn MetricsCollector, ) -> Pin<Box<dyn Future<Output = Result<Option<Arc<FragReuseIndex>>>> + Send + 'async_trait>>
where Self: 'async_trait, 'life0: 'async_trait, 'life1: 'async_trait,

Opens the fragment reuse index

Source §

fn open_mem_wal_index<'life0, 'life1, 'async_trait>( &'life0 self, metrics: &'life1 dyn MetricsCollector, ) -> Pin<Box<dyn Future<Output = Result<Option<Arc<MemWalIndex>>>> + Send + 'async_trait>>
where Self: 'async_trait, 'life0: 'async_trait, 'life1: 'async_trait,

Opens the MemWAL index

Source §

fn frag_reuse_index_uuid<'life0, 'async_trait>( &'life0 self, ) -> Pin<Box<dyn Future<Output = Option<Uuid>> + Send + 'async_trait>>
where Self: 'async_trait, 'life0: 'async_trait,

Gets the fragment reuse index UUID from the current manifest, if it exists

Source §

fn scalar_index_info<'life0, 'async_trait>( &'life0 self, ) -> Pin<Box<dyn Future<Output = Result<ScalarIndexInfo>> + Send + 'async_trait>>
where Self: 'async_trait, 'life0: 'async_trait,

Loads information about all the available scalar indices on the dataset

Source §

fn unindexed_fragments<'life0, 'life1, 'async_trait>( &'life0 self, name: &'life1 str, ) -> Pin<Box<dyn Future<Output = Result<Vec<Fragment>>> + Send + 'async_trait>>
where Self: 'async_trait, 'life0: 'async_trait, 'life1: 'async_trait,

Return the fragments that are not covered by any of the deltas of the index.

Source §

fn indexed_fragments<'life0, 'life1, 'async_trait>( &'life0 self, name: &'life1 str, ) -> Pin<Box<dyn Future<Output = Result<Vec<Vec<Fragment>>>> + Send + 'async_trait>>
where Self: 'async_trait, 'life0: 'async_trait, 'life1: 'async_trait,

Return the fragments that are covered by each of the deltas of the index.

Source §

fn initialize_index<'life0, 'life1, 'life2, 'async_trait>( &'life0 mut self, source_dataset: &'life1 Dataset, index_name: &'life2 str, ) -> Pin<Box<dyn Future<Output = Result<()>> + Send + 'async_trait>>
where Self: 'async_trait, 'life0: 'async_trait, 'life1: 'async_trait, 'life2: 'async_trait,

Initialize a specific index on this dataset based on an index from a source dataset.

Source §

fn initialize_indices<'life0, 'life1, 'async_trait>( &'life0 mut self, source_dataset: &'life1 Dataset, ) -> Pin<Box<dyn Future<Output = Result<()>> + Send + 'async_trait>>
where Self: 'async_trait, 'life0: 'async_trait, 'life1: 'async_trait,

Initialize all indices on this dataset based on indices from a source dataset. This will call initialize_index for each non-system index in the source dataset.

Source §

impl DatasetStatisticsExt for Dataset

Source §

async fn calculate_data_stats(self: &Arc<Self>) -> Result<DataStatistics>

Get statistics about the data in the dataset

Source §

impl DatasetTakeRows for Dataset

Source §

fn schema(&self) -> &Schema

The schema of the dataset.

Source §

fn take_rows<'life0, 'life1, 'life2, 'async_trait>( &'life0 self, row_ids: &'life1 [u64], projection: &'life2 Schema, ) -> Pin<Box<dyn Future<Output = Result<RecordBatch>> + Send + 'async_trait>>
where Self: 'async_trait, 'life0: 'async_trait, 'life1: 'async_trait, 'life2: 'async_trait,

Take rows by the internal ROW ids.

Source §

impl Debug for Dataset

Source §

fn fmt(&self, f: &mut Formatter<'_>) -> Result

Formats the value using the given formatter. Read more

Source §

impl Projectable for Dataset

Source §

fn schema(&self) -> &Schema

Source §

impl ScalarIndexLoader for Dataset

Source §

fn load_index<'life0, 'life1, 'life2, 'life3, 'async_trait>( &'life0 self, column: &'life1 str, index_name: &'life2 str, metrics: &'life3 dyn MetricsCollector, ) -> Pin<Box<dyn Future<Output = Result<Arc<dyn ScalarIndex>>> + Send + 'async_trait>>
where Self: 'async_trait, 'life0: 'async_trait, 'life1: 'async_trait, 'life2: 'async_trait, 'life3: 'async_trait,

Load the index with the given name

Source §

impl TableProvider for Dataset

Source §

fn as_any(&self) -> &dyn Any

Returns the table provider as Any so that it can be downcast to a specific implementation.

Source §

fn schema(&self) -> Arc<ArrowSchema>

Get a reference to the schema for this table

Source §

fn table_type(&self) -> TableType

Get the type of this table for metadata/catalog purposes.

Source §

fn get_table_definition(&self) -> Option<&str>

Get the create statement used to create this table, if available.

Source §

fn get_logical_plan(&self) -> Option<Cow<'_, LogicalPlan>>

Get the LogicalPlan of this table, if available.

Source §

fn scan<'life0, 'life1, 'life2, 'life3, 'async_trait>( &'life0 self, _: &'life1 dyn Session, projection: Option<&'life2 Vec<usize>>, _: &'life3 [Expr], limit: Option<usize>, ) -> Pin<Box<dyn Future<Output = DatafusionResult<Arc<dyn ExecutionPlan>>> + Send + 'async_trait>>
where Self: 'async_trait, 'life0: 'async_trait, 'life1: 'async_trait, 'life2: 'async_trait, 'life3: 'async_trait,

Create an ExecutionPlan for scanning the table with optionally specified projection, filter and limit, described below. Read more

Source §

fn constraints(&self) -> Option<&Constraints>

Get a reference to the constraints of the table. Returns: Read more

Source §

fn get_column_default(&self, _column: &str) -> Option<&Expr>

Get the default value for a column, if available.

Source §

fn supports_filters_pushdown( &self, filters: &[&Expr], ) -> Result<Vec<TableProviderFilterPushDown>, DataFusionError>

Specify if DataFusion should provide filter expressions to the TableProvider to apply during the scan. Read more

Source §

fn statistics(&self) -> Option<Statistics>

Get statistics for this table, if available Although not presently used in mainline DataFusion, this allows implementation specific behavior for downstream repositories, in conjunction with specialized optimizer rules to perform operations such as re-ordering of joins.

Source §

fn insert_into<'life0, 'life1, 'async_trait>( &'life0 self, _state: &'life1 dyn Session, _input: Arc<dyn ExecutionPlan>, _insert_op: InsertOp, ) -> Pin<Box<dyn Future<Output = Result<Arc<dyn ExecutionPlan>, DataFusionError>> + Send + 'async_trait>>
where 'life0: 'async_trait, 'life1: 'async_trait, Self: 'async_trait,

Return an ExecutionPlan to insert data into this table, if supported. Read more

Auto Trait Implementations§

§

impl Freeze for Dataset

§

impl !RefUnwindSafe for Dataset

§

impl Send for Dataset

§

impl Sync for Dataset

§

impl Unpin for Dataset

§

impl UnsafeUnpin for Dataset

§

impl !UnwindSafe for Dataset

Blanket Implementations§

Source §

impl<T> Any for T
where T: 'static + ?Sized,

Source §

fn type_id(&self) -> TypeId

Gets the TypeId of self. Read more

Source §

impl<T> Borrow<T> for T
where T: ?Sized,

Source §

fn borrow(&self) -> &T

Immutably borrows from an owned value. Read more

Source §

impl<T> BorrowMut<T> for T
where T: ?Sized,

Source §

fn borrow_mut(&mut self) -> &mut T

Mutably borrows from an owned value. Read more

Source §

impl<T> CloneToUninit for T
where T: Clone,

Source §

unsafe fn clone_to_uninit(&self, dest: *mut u8)

🔬This is a nightly-only experimental API. (clone_to_uninit)

Performs copy-assignment from self to dest. Read more

Source §

impl<T> Conv for T

Source §

fn conv<T>(self) -> T
where Self: Into<T>,

Converts self into T using Into<T>. Read more

Source §

impl<T> Downcast for T
where T: Any,

Source §

fn into_any(self: Box<T>) -> Box<dyn Any>

Converts Box<dyn Trait> (where Trait: Downcast) to Box<dyn Any>, which can then be downcast into Box<dyn ConcreteType> where ConcreteType implements Trait.

Source §

fn into_any_rc(self: Rc<T>) -> Rc<dyn Any>

Converts Rc<Trait> (where Trait: Downcast) to Rc<Any>, which can then be further downcast into Rc<ConcreteType> where ConcreteType implements Trait.

Source §

fn as_any(&self) -> &(dyn Any + 'static)

Converts &Trait (where Trait: Downcast) to &Any. This is needed since Rust cannot generate &Any’s vtable from &Trait’s.

Source §

fn as_any_mut(&mut self) -> &mut (dyn Any + 'static)

Converts &mut Trait (where Trait: Downcast) to &Any. This is needed since Rust cannot generate &mut Any’s vtable from &mut Trait’s.

Source §

impl<T> DowncastSend for T
where T: Any + Send,

Source §

fn into_any_send(self: Box<T>) -> Box<dyn Any + Send>

Converts Box<Trait> (where Trait: DowncastSend) to Box<dyn Any + Send>, which can then be downcast into Box<ConcreteType> where ConcreteType implements Trait.

Source §

impl<T> DowncastSync for T
where T: Any + Send + Sync,

Source §

fn into_any_sync(self: Box<T>) -> Box<dyn Any + Sync + Send>

Converts Box<Trait> (where Trait: DowncastSync) to Box<dyn Any + Send + Sync>, which can then be downcast into Box<ConcreteType> where ConcreteType implements Trait.

Source §

fn into_any_arc(self: Arc<T>) -> Arc<dyn Any + Sync + Send>

Converts Arc<Trait> (where Trait: DowncastSync) to Arc<Any>, which can then be downcast into Arc<ConcreteType> where ConcreteType implements Trait.

Source §

impl<T> FmtForward for T

Source §

fn fmt_binary(self) -> FmtBinary<Self>
where Self: Binary,

Causes self to use its Binary implementation when Debug-formatted.

Source §

fn fmt_display(self) -> FmtDisplay<Self>
where Self: Display,

Causes self to use its Display implementation when Debug-formatted.

Source §

fn fmt_lower_exp(self) -> FmtLowerExp<Self>
where Self: LowerExp,

Causes self to use its LowerExp implementation when Debug-formatted.

Source §

fn fmt_lower_hex(self) -> FmtLowerHex<Self>
where Self: LowerHex,

Causes self to use its LowerHex implementation when Debug-formatted.

Source §

fn fmt_octal(self) -> FmtOctal<Self>
where Self: Octal,

Causes self to use its Octal implementation when Debug-formatted.

Source §

fn fmt_pointer(self) -> FmtPointer<Self>
where Self: Pointer,

Causes self to use its Pointer implementation when Debug-formatted.

Source §

fn fmt_upper_exp(self) -> FmtUpperExp<Self>
where Self: UpperExp,

Causes self to use its UpperExp implementation when Debug-formatted.

Source §

fn fmt_upper_hex(self) -> FmtUpperHex<Self>
where Self: UpperHex,

Causes self to use its UpperHex implementation when Debug-formatted.

Source §

fn fmt_list(self) -> FmtList<Self>
where &'a Self: for<'a> IntoIterator,

Formats each item in a sequence. Read more

Source §

impl<T> From<T> for T

Source §

fn from(t: T) -> T

Returns the argument unchanged.

Source §

impl<T> Instrument for T

Source §

fn instrument(self, span: Span) -> Instrumented<Self>

Instruments this type with the provided Span, returning an Instrumented wrapper. Read more

Source §

fn in_current_span(self) -> Instrumented<Self>

Instruments this type with the current Span, returning an Instrumented wrapper. Read more

Source §

impl<T, U> Into for T
where U: From<T>,

Source §

fn into(self) -> U

Calls U::from(self).

That is, this conversion is whatever the implementation of From<T> for U chooses to do.

Source §

impl<T> IntoEither for T

Source §

fn into_either(self, into_left: bool) -> Either<Self, Self>

Converts self into a Left variant of Either<Self, Self> if into_left is true. Converts self into a Right variant of Either<Self, Self> otherwise. Read more

Source §

fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
where F: FnOnce(&Self) -> bool,

Converts self into a Left variant of Either<Self, Self> if into_left(&self) returns true. Converts self into a Right variant of Either<Self, Self> otherwise. Read more

Source §

impl<T> Pipe for T
where T: ?Sized,

Source §

fn pipe<R>(self, func: impl FnOnce(Self) -> R) -> R
where Self: Sized,

Pipes by value. This is generally the method you want to use. Read more

Source §

fn pipe_ref<'a, R>(&'a self, func: impl FnOnce(&'a Self) -> R) -> R
where R: 'a,

Borrows self and passes that borrow into the pipe function. Read more

Source §

fn pipe_ref_mut<'a, R>(&'a mut self, func: impl FnOnce(&'a mut Self) -> R) -> R
where R: 'a,

Mutably borrows self and passes that borrow into the pipe function. Read more

Source §

fn pipe_borrow<'a, B, R>(&'a self, func: impl FnOnce(&'a B) -> R) -> R
where Self: Borrow, B: 'a + ?Sized, R: 'a,

Borrows self, then passes self.borrow() into the pipe function. Read more

Source §

fn pipe_borrow_mut<'a, B, R>( &'a mut self, func: impl FnOnce(&'a mut B) -> R, ) -> R
where Self: BorrowMut, B: 'a + ?Sized, R: 'a,

Mutably borrows self, then passes self.borrow_mut() into the pipe function. Read more

Source §

fn pipe_as_ref<'a, U, R>(&'a self, func: impl FnOnce(&'a U) -> R) -> R
where Self: AsRef, U: 'a + ?Sized, R: 'a,

Borrows self, then passes self.as_ref() into the pipe function.

Source §

fn pipe_as_mut<'a, U, R>(&'a mut self, func: impl FnOnce(&'a mut U) -> R) -> R
where Self: AsMut, U: 'a + ?Sized, R: 'a,

Mutably borrows self, then passes self.as_mut() into the pipe function.

Source §

fn pipe_deref<'a, T, R>(&'a self, func: impl FnOnce(&'a T) -> R) -> R
where Self: Deref<Target = T>, T: 'a + ?Sized, R: 'a,

Borrows self, then passes self.deref() into the pipe function.

Source §

fn pipe_deref_mut<'a, T, R>( &'a mut self, func: impl FnOnce(&'a mut T) -> R, ) -> R
where Self: DerefMut<Target = T> + Deref, T: 'a + ?Sized, R: 'a,

Mutably borrows self, then passes self.deref_mut() into the pipe function.

Source §

impl<T> Pointable for T

Source §

const ALIGN: usize

The alignment of pointer.

Source §

type Init = T

The type for initializers.

Source §

unsafe fn init(init: <T as Pointable>::Init) -> usize

Initializes a with the given initializer. Read more

Source §

unsafe fn deref<'a>(ptr: usize) -> &'a T

Dereferences the given pointer. Read more

Source §

unsafe fn deref_mut<'a>(ptr: usize) -> &'a mut T

Mutably dereferences the given pointer. Read more

Source §

unsafe fn drop(ptr: usize)

Drops the object pointed to by the given pointer. Read more

Source §

impl<T> PolicyExt for T
where T: ?Sized,

Source §

fn and<P, B, E>(self, other: P) -> And<T, P>
where T: Policy<B, E>, P: Policy<B, E>,

Create a new Policy that returns Action::Follow only if self and other return Action::Follow. Read more

Source §

fn or<P, B, E>(self, other: P) -> Or<T, P>
where T: Policy<B, E>, P: Policy<B, E>,

Create a new Policy that returns Action::Follow if either self or other returns Action::Follow. Read more

Source §

impl<T> Same for T

Source §

type Output = T

Should always be Self

Source §

impl<T> Tap for T

Source §

fn tap(self, func: impl FnOnce(&Self)) -> Self

Immutable access to a value. Read more

Source §

fn tap_mut(self, func: impl FnOnce(&mut Self)) -> Self

Mutable access to a value. Read more

Source §

fn tap_borrow(self, func: impl FnOnce(&B)) -> Self
where Self: Borrow, B: ?Sized,

Immutable access to the Borrow of a value. Read more

Source §

fn tap_borrow_mut(self, func: impl FnOnce(&mut B)) -> Self
where Self: BorrowMut, B: ?Sized,

Mutable access to the BorrowMut of a value. Read more

Source §

fn tap_ref<R>(self, func: impl FnOnce(&R)) -> Self
where Self: AsRef<R>, R: ?Sized,

Immutable access to the AsRef<R> view of a value. Read more

Source §

fn tap_ref_mut<R>(self, func: impl FnOnce(&mut R)) -> Self
where Self: AsMut<R>, R: ?Sized,

Mutable access to the AsMut<R> view of a value. Read more

Source §

fn tap_deref<T>(self, func: impl FnOnce(&T)) -> Self
where Self: Deref<Target = T>, T: ?Sized,

Immutable access to the Deref::Target of a value. Read more

Source §

fn tap_deref_mut<T>(self, func: impl FnOnce(&mut T)) -> Self
where Self: DerefMut<Target = T> + Deref, T: ?Sized,

Mutable access to the Deref::Target of a value. Read more

Source §

fn tap_dbg(self, func: impl FnOnce(&Self)) -> Self

Calls .tap() only in debug builds, and is erased in release builds.

Source §

fn tap_mut_dbg(self, func: impl FnOnce(&mut Self)) -> Self

Calls .tap_mut() only in debug builds, and is erased in release builds.

Source §

fn tap_borrow_dbg(self, func: impl FnOnce(&B)) -> Self
where Self: Borrow, B: ?Sized,

Calls .tap_borrow() only in debug builds, and is erased in release builds.

Source §

fn tap_borrow_mut_dbg(self, func: impl FnOnce(&mut B)) -> Self
where Self: BorrowMut, B: ?Sized,

Calls .tap_borrow_mut() only in debug builds, and is erased in release builds.

Source §

fn tap_ref_dbg<R>(self, func: impl FnOnce(&R)) -> Self
where Self: AsRef<R>, R: ?Sized,

Calls .tap_ref() only in debug builds, and is erased in release builds.

Source §

fn tap_ref_mut_dbg<R>(self, func: impl FnOnce(&mut R)) -> Self
where Self: AsMut<R>, R: ?Sized,

Calls .tap_ref_mut() only in debug builds, and is erased in release builds.

Source §

fn tap_deref_dbg<T>(self, func: impl FnOnce(&T)) -> Self
where Self: Deref<Target = T>, T: ?Sized,

Calls .tap_deref() only in debug builds, and is erased in release builds.

Source §

fn tap_deref_mut_dbg<T>(self, func: impl FnOnce(&mut T)) -> Self
where Self: DerefMut<Target = T> + Deref, T: ?Sized,

Calls .tap_deref_mut() only in debug builds, and is erased in release builds.

Source §

impl<T> ToOwned for T
where T: Clone,

Source §

type Owned = T

The resulting type after obtaining ownership.

Source §

fn to_owned(&self) -> T

Creates owned data from borrowed data, usually by cloning. Read more

Source §

fn clone_into(&self, target: &mut T)

Uses borrowed data to replace owned data, usually by cloning. Read more

Source §

impl<T> TryConv for T

Source §

fn try_conv<T>(self) -> Result<T, Self::Error>
where Self: TryInto<T>,

Attempts to convert self into T using TryInto<T>. Read more

Source §

impl<T, U> TryFrom for T
where U: Into<T>,

Source §

type Error = Infallible

The type returned in the event of a conversion error.

Source §

fn try_from(value: U) -> Result<T, <T as TryFrom>::Error>

Performs the conversion.

Source §

impl<T, U> TryInto for T
where U: TryFrom<T>,

Source §

type Error = >::Error

The type returned in the event of a conversion error.

Source §

fn try_into(self) -> Result<U, >::Error>

Performs the conversion.

Source §

impl<V, T> VZip<V> for T
where V: MultiLane<T>,

Source §

fn vzip(self) -> V

Source §

impl<T> WithSubscriber for T

Source §

fn with_subscriber<S>(self, subscriber: S) -> WithDispatch<Self>
where S: Into<Dispatch>,

Attaches the provided Subscriber to this type, returning a WithDispatch wrapper. Read more

Source §

fn with_current_subscriber(self) -> WithDispatch<Self>

Attaches the current default Subscriber to this type, returning a WithDispatch wrapper. Read more

Source §

impl<G1, G2> Within<G2> for G1
where G2: Contains<G1>,

Source §

fn is_within(&self, b: &G2) -> bool

Source §

Struct Dataset Copy item path

Fields§

Implementations§

impl Dataset

pub async fn open(uri: &str) -> Result<Self>

pub async fn checkout_version(&self, version: impl Into<Ref>) -> Result<Self>

pub fn tags(&self) -> Tags<'_>

pub fn branches(&self) -> Branches<'_>

pub async fn checkout_latest(&mut self) -> Result<()>

pub async fn checkout_branch(&self, branch: &str) -> Result<Self>

pub async fn create_branch( &mut self, branch: &str, version: impl Into<Ref>, store_params: Option<ObjectStoreParams>, ) -> Result<Self>

pub async fn delete_branch(&mut self, branch: &str) -> Result<()>

pub async fn force_delete_branch(&mut self, branch: &str) -> Result<()>

pub async fn list_branches(&self) -> Result<HashMap<String, BranchContents>>

pub async fn write( batches: impl RecordBatchReader + Send + 'static, dest: impl Into<WriteDestination<'_>>, params: Option<WriteParams>, ) -> Result<Self>

pub async fn write_into_namespace( batches: impl RecordBatchReader + Send + 'static, namespace: Arc<dyn LanceNamespace>, table_id: Vec<String>, params: Option<WriteParams>, ) -> Result<Self>

§Arguments

pub async fn append( &mut self, batches: impl RecordBatchReader + Send + 'static, params: Option<WriteParams>, ) -> Result<()>

pub fn uri(&self) -> &str

pub fn branch_location(&self) -> BranchLocation

pub fn find_branch_location(&self, branch_name: &str) -> Result<BranchLocation>

pub fn manifest(&self) -> &Manifest

pub fn manifest_location(&self) -> &ManifestLocation

pub fn delta(&self) -> DatasetDeltaBuilder

§Example

pub async fn latest_manifest(&self) -> Result<(Arc<Manifest>, ManifestLocation)>

pub async fn read_transaction(&self) -> Result<Option<Transaction>>

pub async fn read_transaction_by_version( &self, version: u64, ) -> Result<Option<Transaction>>

pub async fn get_transactions( &self, recent_transactions: usize, ) -> Result<Vec<Option<Transaction>>>

§Arguments

§Returns

pub async fn restore(&mut self) -> Result<()>

pub fn cleanup_old_versions( &self, older_than: Duration, delete_unverified: Option<bool>, error_if_tagged_old_versions: Option<bool>, ) -> BoxFuture<'_, Result<RemovalStats>>

§Arguments

§Returns

pub fn cleanup_with_policy( &self, policy: CleanupPolicy, ) -> BoxFuture<'_, Result<RemovalStats>>

§Arguments

§Returns

pub async fn commit( dest: impl Into<WriteDestination<'_>>, operation: Operation, read_version: Option<u64>, store_params: Option<ObjectStoreParams>, commit_handler: Option<Arc<dyn CommitHandler>>, session: Arc<Session>, enable_v2_manifest_paths: bool, ) -> Result<Self>

§Arguments

pub async fn commit_detached( dest: impl Into<WriteDestination<'_>>, operation: Operation, read_version: Option<u64>, store_params: Option<ObjectStoreParams>, commit_handler: Option<Arc<dyn CommitHandler>>, session: Arc<Session>, enable_v2_manifest_paths: bool, ) -> Result<Self>

pub fn scan(&self) -> Scanner

pub async fn count_rows(&self, filter: Option<String>) -> Result<usize>

pub async fn take( &self, row_indices: &[u64], projection: impl Into<ProjectionRequest>, ) -> Result<RecordBatch>

pub async fn take_rows( &self, row_ids: &[u64], projection: impl Into<ProjectionRequest>, ) -> Result<RecordBatch>

pub fn take_builder( self: &Arc<Self>, row_ids: &[u64], projection: impl Into<ProjectionRequest>, ) -> Result<TakeBuilder>

pub async fn take_blobs( self: &Arc<Self>, row_ids: &[u64], column: impl AsRef<str>, ) -> Result<Vec<BlobFile>>

pub async fn take_blobs_by_addresses( self: &Arc<Self>, row_addrs: &[u64], column: impl AsRef<str>, ) -> Result<Vec<BlobFile>>

pub async fn take_blobs_by_indices( self: &Arc<Self>, row_indices: &[u64], column: impl AsRef<str>, ) -> Result<Vec<BlobFile>>

pub fn take_scan( &self, row_ranges: Pin<Box<dyn Stream<Item = Result<Range<u64>>> + Send>>, projection: Arc<Schema>, batch_readahead: usize, ) -> DatasetRecordBatchStream

pub async fn delete(&mut self, predicate: &str) -> Result<()>

pub async fn add_bases( self: &Arc<Self>, new_bases: Vec<BasePath>, transaction_properties: Option<HashMap<String, String>>, ) -> Result<Self>

§Arguments

§Returns

pub async fn count_deleted_rows(&self) -> Result<usize>

pub fn object_store(&self) -> &ObjectStore

pub fn storage_options(&self) -> Option<&HashMap<String, String>>

pub fn initial_storage_options(&self) -> Option<&HashMap<String, String>>

pub fn storage_options_provider( &self, ) -> Option<Arc<dyn StorageOptionsProvider>>

pub fn storage_options_accessor(&self) -> Option<Arc<StorageOptionsAccessor>>

pub async fn latest_storage_options(&self) -> Result<Option<StorageOptions>>

§Returns

pub fn data_dir(&self) -> Path

pub fn indices_dir(&self) -> Path

pub fn session(&self) -> Arc<Session>

pub fn version(&self) -> Version

pub async fn index_cache_entry_count(&self) -> usize

pub async fn index_cache_hit_rate(&self) -> f32

pub fn cache_size_bytes(&self) -> u64

pub async fn versions(&self) -> Result<Vec<Version>>

pub async fn latest_version_id(&self) -> Result<u64>

pub fn count_fragments(&self) -> usize

pub fn schema(&self) -> &Schema

pub fn empty_projection(self: &Arc<Self>) -> Projection

pub fn full_projection(self: &Arc<Self>) -> Projection

pub fn get_fragments(&self) -> Vec<FileFragment>

pub fn get_fragment(&self, fragment_id: usize) -> Option<FileFragment>

pub fn fragments(&self) -> &Arc<Vec<Fragment>>

pub fn get_frags_from_ordered_ids( &self, ordered_ids: &[u32], ) -> Vec<Option<FileFragment>>

pub async fn num_small_files(&self, max_rows_per_group: usize) -> usize

Struct Dataset

fn drop_index<'life0, 'life1, 'async_trait>( &'life0 mut self, name: &'life1 str, ) -> Pin<Box<dyn Future<Output = Result<()>> + Send + 'async_trait>>
where Self: 'async_trait, 'life0: 'async_trait, 'life1: 'async_trait,

fn prewarm_index<'life0, 'life1, 'async_trait>( &'life0 self, name: &'life1 str, ) -> Pin<Box<dyn Future<Output = Result<()>> + Send + 'async_trait>>
where Self: 'async_trait, 'life0: 'async_trait, 'life1: 'async_trait,

fn describe_indices<'a, 'b, 'async_trait>( &'a self, criteria: Option<IndexCriteria<'b>>, ) -> Pin<Box<dyn Future<Output = Result<Vec<Arc<dyn IndexDescription>>>> + Send + 'async_trait>>
where Self: 'async_trait, 'a: 'async_trait, 'b: 'async_trait,

fn load_indices<'life0, 'async_trait>( &'life0 self, ) -> Pin<Box<dyn Future<Output = Result<Arc<Vec<IndexMetadata>>>> + Send + 'async_trait>>
where Self: 'async_trait, 'life0: 'async_trait,

fn load_scalar_index<'a, 'b, 'async_trait>( &'a self, criteria: IndexCriteria<'b>, ) -> Pin<Box<dyn Future<Output = Result<Option<IndexMetadata>>> + Send + 'async_trait>>
where Self: 'async_trait, 'a: 'async_trait, 'b: 'async_trait,

fn optimize_indices<'life0, 'life1, 'async_trait>( &'life0 mut self, options: &'life1 OptimizeOptions, ) -> Pin<Box<dyn Future<Output = Result<()>> + Send + 'async_trait>>
where Self: 'async_trait, 'life0: 'async_trait, 'life1: 'async_trait,

fn index_statistics<'life0, 'life1, 'async_trait>( &'life0 self, index_name: &'life1 str, ) -> Pin<Box<dyn Future<Output = Result<String>> + Send + 'async_trait>>
where Self: 'async_trait, 'life0: 'async_trait, 'life1: 'async_trait,

fn read_index_partition<'life0, 'life1, 'async_trait>( &'life0 self, index_name: &'life1 str, partition_id: usize, with_vector: bool, ) -> Pin<Box<dyn Future<Output = Result<SendableRecordBatchStream>> + Send + 'async_trait>>
where Self: 'async_trait, 'life0: 'async_trait, 'life1: 'async_trait,

fn load_index<'life0, 'life1, 'async_trait>( &'life0 self, uuid: &'life1 str, ) -> Pin<Box<dyn Future<Output = Result<Option<IndexMetadata>, Error>> + Send + 'async_trait>>
where 'life0: 'async_trait, 'life1: 'async_trait, Self: Sync + 'async_trait,

fn load_indices_by_name<'life0, 'life1, 'async_trait>( &'life0 self, name: &'life1 str, ) -> Pin<Box<dyn Future<Output = Result<Vec<IndexMetadata>, Error>> + Send + 'async_trait>>
where 'life0: 'async_trait, 'life1: 'async_trait, Self: Sync + 'async_trait,

fn load_index_by_name<'life0, 'life1, 'async_trait>( &'life0 self, name: &'life1 str, ) -> Pin<Box<dyn Future<Output = Result<Option<IndexMetadata>, Error>> + Send + 'async_trait>>
where 'life0: 'async_trait, 'life1: 'async_trait, Self: Sync + 'async_trait,

fn open_frag_reuse_index<'life0, 'life1, 'async_trait>( &'life0 self, metrics: &'life1 dyn MetricsCollector, ) -> Pin<Box<dyn Future<Output = Result<Option<Arc<FragReuseIndex>>>> + Send + 'async_trait>>
where Self: 'async_trait, 'life0: 'async_trait, 'life1: 'async_trait,

fn open_mem_wal_index<'life0, 'life1, 'async_trait>( &'life0 self, metrics: &'life1 dyn MetricsCollector, ) -> Pin<Box<dyn Future<Output = Result<Option<Arc<MemWalIndex>>>> + Send + 'async_trait>>
where Self: 'async_trait, 'life0: 'async_trait, 'life1: 'async_trait,

fn frag_reuse_index_uuid<'life0, 'async_trait>( &'life0 self, ) -> Pin<Box<dyn Future<Output = Option<Uuid>> + Send + 'async_trait>>
where Self: 'async_trait, 'life0: 'async_trait,

fn scalar_index_info<'life0, 'async_trait>( &'life0 self, ) -> Pin<Box<dyn Future<Output = Result<ScalarIndexInfo>> + Send + 'async_trait>>
where Self: 'async_trait, 'life0: 'async_trait,

fn unindexed_fragments<'life0, 'life1, 'async_trait>( &'life0 self, name: &'life1 str, ) -> Pin<Box<dyn Future<Output = Result<Vec<Fragment>>> + Send + 'async_trait>>
where Self: 'async_trait, 'life0: 'async_trait, 'life1: 'async_trait,

fn indexed_fragments<'life0, 'life1, 'async_trait>( &'life0 self, name: &'life1 str, ) -> Pin<Box<dyn Future<Output = Result<Vec<Vec<Fragment>>>> + Send + 'async_trait>>
where Self: 'async_trait, 'life0: 'async_trait, 'life1: 'async_trait,

fn initialize_index<'life0, 'life1, 'life2, 'async_trait>( &'life0 mut self, source_dataset: &'life1 Dataset, index_name: &'life2 str, ) -> Pin<Box<dyn Future<Output = Result<()>> + Send + 'async_trait>>
where Self: 'async_trait, 'life0: 'async_trait, 'life1: 'async_trait, 'life2: 'async_trait,

fn initialize_indices<'life0, 'life1, 'async_trait>( &'life0 mut self, source_dataset: &'life1 Dataset, ) -> Pin<Box<dyn Future<Output = Result<()>> + Send + 'async_trait>>
where Self: 'async_trait, 'life0: 'async_trait, 'life1: 'async_trait,

fn take_rows<'life0, 'life1, 'life2, 'async_trait>( &'life0 self, row_ids: &'life1 [u64], projection: &'life2 Schema, ) -> Pin<Box<dyn Future<Output = Result<RecordBatch>> + Send + 'async_trait>>
where Self: 'async_trait, 'life0: 'async_trait, 'life1: 'async_trait, 'life2: 'async_trait,