DatasetReadGuard

Struct DatasetReadGuard 

Source
pub struct DatasetReadGuard<'a> { /* private fields */ }

Methods from Deref<Target = Dataset>§

Source

pub async fn checkout_version( &self, version: impl Into<Ref>, ) -> Result<Dataset, Error>

Check out a dataset version with a ref

Source

pub fn tags(&self) -> Tags<'_>

Source

pub fn branches(&self) -> Branches<'_>

Source

pub async fn checkout_branch(&self, branch: &str) -> Result<Dataset, Error>

Check out the latest version of the branch

Source

pub async fn list_branches( &self, ) -> Result<HashMap<String, BranchContents>, Error>

Source

pub fn uri(&self) -> &str

Get the fully qualified URI of this dataset.

Source

pub fn branch_location(&self) -> BranchLocation

Source

pub fn find_branch_location( &self, branch_name: &str, ) -> Result<BranchLocation, Error>

Source

pub fn manifest(&self) -> &Manifest

Get the full manifest of the dataset version.

Source

pub fn manifest_location(&self) -> &ManifestLocation

Source

pub fn delta(&self) -> DatasetDeltaBuilder

Create a delta::DatasetDeltaBuilder to explore changes between dataset versions.

§Example
let delta = dataset.delta()
    .compared_against_version(5)
    .build()?;
let inserted = delta.get_inserted_rows().await?;
Source

pub async fn latest_manifest( &self, ) -> Result<(Arc<Manifest>, ManifestLocation), Error>

Source

pub async fn read_transaction(&self) -> Result<Option<Transaction>, Error>

Read the transaction file for this version of the dataset.

If there was no transaction file written for this version of the dataset then this will return None.

Source

pub async fn read_transaction_by_version( &self, version: u64, ) -> Result<Option<Transaction>, Error>

Read the transaction file for this version of the dataset.

If there was no transaction file written for this version of the dataset then this will return None.

Source

pub async fn get_transactions( &self, recent_transactions: usize, ) -> Result<Vec<Option<Transaction>>, Error>

List transactions for the dataset, up to a maximum number.

This method iterates through dataset versions, starting from the current version, and collects the transaction for each version. It stops when either recent_transactions is reached or there are no more versions.

§Arguments
  • recent_transactions - Maximum number of transactions to return
§Returns

A vector of optional transactions. Each element corresponds to a version, and may be None if no transaction file exists for that version.

Source

pub fn cleanup_old_versions( &self, older_than: TimeDelta, delete_unverified: Option<bool>, error_if_tagged_old_versions: Option<bool>, ) -> Pin<Box<dyn Future<Output = Result<RemovalStats, Error>> + Send + '_>>

Removes old versions of the dataset from disk

This function will remove all versions of the dataset that are older than the provided timestamp. This function will not remove the current version of the dataset.

Once a version is removed it can no longer be checked out or restored. Any data unique to that version will be lost.

§Arguments
  • older_than - Versions older than this will be deleted.
  • delete_unverified - If false (the default) then files will only be deleted if they are listed in at least one manifest. Otherwise these files will be kept since they cannot be distinguished from an in-progress transaction. Set to true to delete these files if you are sure there are no other in-progress dataset operations.
§Returns
  • RemovalStats - Statistics about the removal operation
Source

pub fn cleanup_with_policy( &self, policy: CleanupPolicy, ) -> Pin<Box<dyn Future<Output = Result<RemovalStats, Error>> + Send + '_>>

Removes old versions of the dataset from storage

This function will remove all versions of the dataset that satisfies the given policy. This function will not remove the current version of the dataset.

Once a version is removed it can no longer be checked out or restored. Any data unique to that version will be lost.

§Arguments
  • policy - CleanupPolicy determines the behaviour of cleanup.
§Returns
  • RemovalStats - Statistics about the removal operation
Source

pub fn scan(&self) -> Scanner

Create a Scanner to scan the dataset.

Source

pub async fn count_rows(&self, filter: Option<String>) -> Result<usize, Error>

Count the number of rows in the dataset.

It offers a fast path of counting rows by just computing via metadata.

Source

pub async fn take( &self, row_indices: &[u64], projection: impl Into<ProjectionRequest>, ) -> Result<RecordBatch, Error>

Take rows by indices.

Source

pub async fn take_rows( &self, row_ids: &[u64], projection: impl Into<ProjectionRequest>, ) -> Result<RecordBatch, Error>

Take Rows by the internal ROW ids.

In Lance format, each row has a unique u64 id, which is used to identify the row globally.

let schema = dataset.schema().clone();
let row_ids = vec![0, 4, 7];
let rows = dataset.take_rows(&row_ids, schema).await.unwrap();

// We can have more fine-grained control over the projection, i.e., SQL projection.
let projection = ProjectionRequest::from_sql([("identity", "id * 2")]);
let rows = dataset.take_rows(&row_ids, projection).await.unwrap();
Source

pub fn take_builder( self: &Arc<Dataset>, row_ids: &[u64], projection: impl Into<ProjectionRequest>, ) -> Result<TakeBuilder, Error>

Source

pub async fn take_blobs( self: &Arc<Dataset>, row_ids: &[u64], column: impl AsRef<str>, ) -> Result<Vec<BlobFile>, Error>

Take BlobFile by row ids (row address).

Source

pub async fn take_blobs_by_addresses( self: &Arc<Dataset>, row_addrs: &[u64], column: impl AsRef<str>, ) -> Result<Vec<BlobFile>, Error>

Take BlobFile by row addresses.

Row addresses are u64 values encoding (fragment_id << 32) | row_offset. Use this method when you already have row addresses, for example from a scan with with_row_address(). For row IDs (stable identifiers), use Self::take_blobs. For row indices (offsets), use Self::take_blobs_by_indices.

Source

pub async fn take_blobs_by_indices( self: &Arc<Dataset>, row_indices: &[u64], column: impl AsRef<str>, ) -> Result<Vec<BlobFile>, Error>

Take BlobFile by row indices (offsets in the dataset).

Source

pub fn take_scan( &self, row_ranges: Pin<Box<dyn Stream<Item = Result<Range<u64>, Error>> + Send>>, projection: Arc<Schema>, batch_readahead: usize, ) -> DatasetRecordBatchStream

Get a stream of batches based on iterator of ranges of row numbers.

This is an experimental API. It may change at any time.

Source

pub async fn add_bases( self: &Arc<Dataset>, new_bases: Vec<BasePath>, transaction_properties: Option<HashMap<String, String>>, ) -> Result<Dataset, Error>

Add new base paths to the dataset.

This method allows you to register additional storage locations (buckets) that can be used for future data writes. The base paths are added to the dataset’s manifest and can be referenced by name in subsequent write operations.

§Arguments
  • new_bases - A vector of lance_table::format::BasePath objects representing the new storage locations to add. Each base path should have a unique name and path.
§Returns

Returns a new Dataset instance with the updated manifest containing the new base paths.

Source

pub async fn count_deleted_rows(&self) -> Result<usize, Error>

Source

pub fn object_store(&self) -> &ObjectStore

Source

pub fn storage_options(&self) -> Option<&HashMap<String, String>>

Returns the storage options used when opening this dataset, if any.

Source

pub fn storage_options_provider( &self, ) -> Option<Arc<dyn StorageOptionsProvider>>

Returns the storage options provider used when opening this dataset, if any.

Source

pub fn data_dir(&self) -> Path

Source

pub fn indices_dir(&self) -> Path

Source

pub fn session(&self) -> Arc<Session>

Source

pub fn version(&self) -> Version

Source

pub async fn index_cache_entry_count(&self) -> usize

Get the number of entries currently in the index cache.

Source

pub async fn index_cache_hit_rate(&self) -> f32

Get cache hit ratio.

Source

pub fn cache_size_bytes(&self) -> u64

Source

pub async fn versions(&self) -> Result<Vec<Version>, Error>

Get all versions.

Source

pub async fn latest_version_id(&self) -> Result<u64, Error>

Get the latest version of the dataset This is meant to be a fast path for checking if a dataset has changed. This is why we don’t return the full version struct.

Source

pub fn count_fragments(&self) -> usize

Source

pub fn schema(&self) -> &Schema

Get the schema of the dataset

Source

pub fn empty_projection(self: &Arc<Dataset>) -> Projection

Similar to Self::schema, but only returns fields that are not marked as blob columns Creates a new empty projection into the dataset schema

Source

pub fn full_projection(self: &Arc<Dataset>) -> Projection

Creates a projection that includes all columns in the dataset

Source

pub fn get_fragments(&self) -> Vec<FileFragment>

Get fragments.

If filter is provided, only fragments with the given name will be returned.

Source

pub fn get_fragment(&self, fragment_id: usize) -> Option<FileFragment>

Source

pub fn fragments(&self) -> &Arc<Vec<Fragment>>

Source

pub fn get_frags_from_ordered_ids( &self, ordered_ids: &[u32], ) -> Vec<Option<FileFragment>>

Source

pub async fn num_small_files(&self, max_rows_per_group: usize) -> usize

Gets the number of files that are so small they don’t even have a full group. These are considered too small because reading many of them is much less efficient than reading a single file because the separate files split up what would otherwise be single IO requests into multiple.

Source

pub async fn validate(&self) -> Result<(), Error>

Source

pub fn sql(&self, sql: &str) -> SqlQueryBuilder

Run a SQL query against the dataset. The underlying SQL engine is DataFusion. Please refer to the DataFusion documentation for supported SQL syntax.

Source

pub fn metadata(&self) -> &HashMap<String, String>

Get dataset metadata.

Source

pub fn config(&self) -> &HashMap<String, String>

Get the dataset config from manifest

Trait Implementations§

Source§

impl Deref for DatasetReadGuard<'_>

Source§

type Target = Dataset

The resulting type after dereferencing.
Source§

fn deref(&self) -> &Self::Target

Dereferences the value.

Auto Trait Implementations§

§

impl<'a> Freeze for DatasetReadGuard<'a>

§

impl<'a> !RefUnwindSafe for DatasetReadGuard<'a>

§

impl<'a> Send for DatasetReadGuard<'a>

§

impl<'a> Sync for DatasetReadGuard<'a>

§

impl<'a> Unpin for DatasetReadGuard<'a>

§

impl<'a> !UnwindSafe for DatasetReadGuard<'a>

Blanket Implementations§

§

impl<T> Any for T
where T: 'static + ?Sized,

§

fn type_id(&self) -> TypeId

Gets the TypeId of self. Read more
§

impl<T> Borrow<T> for T
where T: ?Sized,

§

fn borrow(&self) -> &T

Immutably borrows from an owned value. Read more
§

impl<T> BorrowMut<T> for T
where T: ?Sized,

§

fn borrow_mut(&mut self) -> &mut T

Mutably borrows from an owned value. Read more
Source§

impl<T> Conv for T

Source§

fn conv<T>(self) -> T
where Self: Into<T>,

Converts self into T using Into<T>. Read more
Source§

impl<T> Downcast for T
where T: Any,

Source§

fn into_any(self: Box<T>) -> Box<dyn Any>

Converts Box<dyn Trait> (where Trait: Downcast) to Box<dyn Any>, which can then be downcast into Box<dyn ConcreteType> where ConcreteType implements Trait.
Source§

fn into_any_rc(self: Rc<T>) -> Rc<dyn Any>

Converts Rc<Trait> (where Trait: Downcast) to Rc<Any>, which can then be further downcast into Rc<ConcreteType> where ConcreteType implements Trait.
Source§

fn as_any(&self) -> &(dyn Any + 'static)

Converts &Trait (where Trait: Downcast) to &Any. This is needed since Rust cannot generate &Any’s vtable from &Trait’s.
Source§

fn as_any_mut(&mut self) -> &mut (dyn Any + 'static)

Converts &mut Trait (where Trait: Downcast) to &Any. This is needed since Rust cannot generate &mut Any’s vtable from &mut Trait’s.
Source§

impl<T> DowncastSend for T
where T: Any + Send,

Source§

fn into_any_send(self: Box<T>) -> Box<dyn Any + Send>

Converts Box<Trait> (where Trait: DowncastSend) to Box<dyn Any + Send>, which can then be downcast into Box<ConcreteType> where ConcreteType implements Trait.
Source§

impl<T> DowncastSync for T
where T: Any + Send + Sync,

Source§

fn into_any_sync(self: Box<T>) -> Box<dyn Any + Sync + Send>

Converts Box<Trait> (where Trait: DowncastSync) to Box<dyn Any + Send + Sync>, which can then be downcast into Box<ConcreteType> where ConcreteType implements Trait.
Source§

fn into_any_arc(self: Arc<T>) -> Arc<dyn Any + Sync + Send>

Converts Arc<Trait> (where Trait: DowncastSync) to Arc<Any>, which can then be downcast into Arc<ConcreteType> where ConcreteType implements Trait.
Source§

impl<T> FmtForward for T

Source§

fn fmt_binary(self) -> FmtBinary<Self>
where Self: Binary,

Causes self to use its Binary implementation when Debug-formatted.
Source§

fn fmt_display(self) -> FmtDisplay<Self>
where Self: Display,

Causes self to use its Display implementation when Debug-formatted.
Source§

fn fmt_lower_exp(self) -> FmtLowerExp<Self>
where Self: LowerExp,

Causes self to use its LowerExp implementation when Debug-formatted.
Source§

fn fmt_lower_hex(self) -> FmtLowerHex<Self>
where Self: LowerHex,

Causes self to use its LowerHex implementation when Debug-formatted.
Source§

fn fmt_octal(self) -> FmtOctal<Self>
where Self: Octal,

Causes self to use its Octal implementation when Debug-formatted.
Source§

fn fmt_pointer(self) -> FmtPointer<Self>
where Self: Pointer,

Causes self to use its Pointer implementation when Debug-formatted.
Source§

fn fmt_upper_exp(self) -> FmtUpperExp<Self>
where Self: UpperExp,

Causes self to use its UpperExp implementation when Debug-formatted.
Source§

fn fmt_upper_hex(self) -> FmtUpperHex<Self>
where Self: UpperHex,

Causes self to use its UpperHex implementation when Debug-formatted.
Source§

fn fmt_list(self) -> FmtList<Self>
where &'a Self: for<'a> IntoIterator,

Formats each item in a sequence. Read more
§

impl<T> From<T> for T

§

fn from(t: T) -> T

Returns the argument unchanged.

Source§

impl<T> Instrument for T

Source§

fn instrument(self, span: Span) -> Instrumented<Self>

Instruments this type with the provided Span, returning an Instrumented wrapper. Read more
Source§

fn in_current_span(self) -> Instrumented<Self>

Instruments this type with the current Span, returning an Instrumented wrapper. Read more
§

impl<T, U> Into<U> for T
where U: From<T>,

§

fn into(self) -> U

Calls U::from(self).

That is, this conversion is whatever the implementation of From<T> for U chooses to do.

Source§

impl<T> IntoEither for T

Source§

fn into_either(self, into_left: bool) -> Either<Self, Self>

Converts self into a Left variant of Either<Self, Self> if into_left is true. Converts self into a Right variant of Either<Self, Self> otherwise. Read more
Source§

fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
where F: FnOnce(&Self) -> bool,

Converts self into a Left variant of Either<Self, Self> if into_left(&self) returns true. Converts self into a Right variant of Either<Self, Self> otherwise. Read more
Source§

impl<Unshared, Shared> IntoShared<Shared> for Unshared
where Shared: FromUnshared<Unshared>,

Source§

fn into_shared(self) -> Shared

Creates a shared type from an unshared type.
Source§

impl<T> Pipe for T
where T: ?Sized,

Source§

fn pipe<R>(self, func: impl FnOnce(Self) -> R) -> R
where Self: Sized,

Pipes by value. This is generally the method you want to use. Read more
Source§

fn pipe_ref<'a, R>(&'a self, func: impl FnOnce(&'a Self) -> R) -> R
where R: 'a,

Borrows self and passes that borrow into the pipe function. Read more
Source§

fn pipe_ref_mut<'a, R>(&'a mut self, func: impl FnOnce(&'a mut Self) -> R) -> R
where R: 'a,

Mutably borrows self and passes that borrow into the pipe function. Read more
Source§

fn pipe_borrow<'a, B, R>(&'a self, func: impl FnOnce(&'a B) -> R) -> R
where Self: Borrow<B>, B: 'a + ?Sized, R: 'a,

Borrows self, then passes self.borrow() into the pipe function. Read more
Source§

fn pipe_borrow_mut<'a, B, R>( &'a mut self, func: impl FnOnce(&'a mut B) -> R, ) -> R
where Self: BorrowMut<B>, B: 'a + ?Sized, R: 'a,

Mutably borrows self, then passes self.borrow_mut() into the pipe function. Read more
Source§

fn pipe_as_ref<'a, U, R>(&'a self, func: impl FnOnce(&'a U) -> R) -> R
where Self: AsRef<U>, U: 'a + ?Sized, R: 'a,

Borrows self, then passes self.as_ref() into the pipe function.
Source§

fn pipe_as_mut<'a, U, R>(&'a mut self, func: impl FnOnce(&'a mut U) -> R) -> R
where Self: AsMut<U>, U: 'a + ?Sized, R: 'a,

Mutably borrows self, then passes self.as_mut() into the pipe function.
Source§

fn pipe_deref<'a, T, R>(&'a self, func: impl FnOnce(&'a T) -> R) -> R
where Self: Deref<Target = T>, T: 'a + ?Sized, R: 'a,

Borrows self, then passes self.deref() into the pipe function.
Source§

fn pipe_deref_mut<'a, T, R>( &'a mut self, func: impl FnOnce(&'a mut T) -> R, ) -> R
where Self: DerefMut<Target = T> + Deref, T: 'a + ?Sized, R: 'a,

Mutably borrows self, then passes self.deref_mut() into the pipe function.
Source§

impl<T> Pointable for T

Source§

const ALIGN: usize

The alignment of pointer.
Source§

type Init = T

The type for initializers.
Source§

unsafe fn init(init: <T as Pointable>::Init) -> usize

Initializes a with the given initializer. Read more
Source§

unsafe fn deref<'a>(ptr: usize) -> &'a T

Dereferences the given pointer. Read more
Source§

unsafe fn deref_mut<'a>(ptr: usize) -> &'a mut T

Mutably dereferences the given pointer. Read more
Source§

unsafe fn drop(ptr: usize)

Drops the object pointed to by the given pointer. Read more
Source§

impl<T> PolicyExt for T
where T: ?Sized,

Source§

fn and<P, B, E>(self, other: P) -> And<T, P>
where T: Policy<B, E>, P: Policy<B, E>,

Create a new Policy that returns Action::Follow only if self and other return Action::Follow. Read more
Source§

fn or<P, B, E>(self, other: P) -> Or<T, P>
where T: Policy<B, E>, P: Policy<B, E>,

Create a new Policy that returns Action::Follow if either self or other returns Action::Follow. Read more
§

impl<P, T> Receiver for P
where P: Deref<Target = T> + ?Sized, T: ?Sized,

§

type Target = T

🔬This is a nightly-only experimental API. (arbitrary_self_types)
The target type on which the method may be called.
Source§

impl<T> Same for T

Source§

type Output = T

Should always be Self
Source§

impl<T> Tap for T

Source§

fn tap(self, func: impl FnOnce(&Self)) -> Self

Immutable access to a value. Read more
Source§

fn tap_mut(self, func: impl FnOnce(&mut Self)) -> Self

Mutable access to a value. Read more
Source§

fn tap_borrow<B>(self, func: impl FnOnce(&B)) -> Self
where Self: Borrow<B>, B: ?Sized,

Immutable access to the Borrow<B> of a value. Read more
Source§

fn tap_borrow_mut<B>(self, func: impl FnOnce(&mut B)) -> Self
where Self: BorrowMut<B>, B: ?Sized,

Mutable access to the BorrowMut<B> of a value. Read more
Source§

fn tap_ref<R>(self, func: impl FnOnce(&R)) -> Self
where Self: AsRef<R>, R: ?Sized,

Immutable access to the AsRef<R> view of a value. Read more
Source§

fn tap_ref_mut<R>(self, func: impl FnOnce(&mut R)) -> Self
where Self: AsMut<R>, R: ?Sized,

Mutable access to the AsMut<R> view of a value. Read more
Source§

fn tap_deref<T>(self, func: impl FnOnce(&T)) -> Self
where Self: Deref<Target = T>, T: ?Sized,

Immutable access to the Deref::Target of a value. Read more
Source§

fn tap_deref_mut<T>(self, func: impl FnOnce(&mut T)) -> Self
where Self: DerefMut<Target = T> + Deref, T: ?Sized,

Mutable access to the Deref::Target of a value. Read more
Source§

fn tap_dbg(self, func: impl FnOnce(&Self)) -> Self

Calls .tap() only in debug builds, and is erased in release builds.
Source§

fn tap_mut_dbg(self, func: impl FnOnce(&mut Self)) -> Self

Calls .tap_mut() only in debug builds, and is erased in release builds.
Source§

fn tap_borrow_dbg<B>(self, func: impl FnOnce(&B)) -> Self
where Self: Borrow<B>, B: ?Sized,

Calls .tap_borrow() only in debug builds, and is erased in release builds.
Source§

fn tap_borrow_mut_dbg<B>(self, func: impl FnOnce(&mut B)) -> Self
where Self: BorrowMut<B>, B: ?Sized,

Calls .tap_borrow_mut() only in debug builds, and is erased in release builds.
Source§

fn tap_ref_dbg<R>(self, func: impl FnOnce(&R)) -> Self
where Self: AsRef<R>, R: ?Sized,

Calls .tap_ref() only in debug builds, and is erased in release builds.
Source§

fn tap_ref_mut_dbg<R>(self, func: impl FnOnce(&mut R)) -> Self
where Self: AsMut<R>, R: ?Sized,

Calls .tap_ref_mut() only in debug builds, and is erased in release builds.
Source§

fn tap_deref_dbg<T>(self, func: impl FnOnce(&T)) -> Self
where Self: Deref<Target = T>, T: ?Sized,

Calls .tap_deref() only in debug builds, and is erased in release builds.
Source§

fn tap_deref_mut_dbg<T>(self, func: impl FnOnce(&mut T)) -> Self
where Self: DerefMut<Target = T> + Deref, T: ?Sized,

Calls .tap_deref_mut() only in debug builds, and is erased in release builds.
Source§

impl<T> TryConv for T

Source§

fn try_conv<T>(self) -> Result<T, Self::Error>
where Self: TryInto<T>,

Attempts to convert self into T using TryInto<T>. Read more
§

impl<T, U> TryFrom<U> for T
where U: Into<T>,

§

type Error = Infallible

The type returned in the event of a conversion error.
§

fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>

Performs the conversion.
§

impl<T, U> TryInto<U> for T
where U: TryFrom<T>,

§

type Error = <U as TryFrom<T>>::Error

The type returned in the event of a conversion error.
§

fn try_into(self) -> Result<U, <U as TryFrom<T>>::Error>

Performs the conversion.
Source§

impl<V, T> VZip<V> for T
where V: MultiLane<T>,

Source§

fn vzip(self) -> V

Source§

impl<T> WithSubscriber for T

Source§

fn with_subscriber<S>(self, subscriber: S) -> WithDispatch<Self>
where S: Into<Dispatch>,

Attaches the provided Subscriber to this type, returning a WithDispatch wrapper. Read more
Source§

fn with_current_subscriber(self) -> WithDispatch<Self>

Attaches the current default Subscriber to this type, returning a WithDispatch wrapper. Read more
Source§

impl<G1, G2> Within<G2> for G1
where G2: Contains<G1>,

Source§

fn is_within(&self, b: &G2) -> bool

Source§

impl<T> Fruit for T
where T: Send + Downcast,

Source§

impl<T> MaybeSend for T
where T: Send,