pub struct DatasetBuilder { /* private fields */ }Expand description
builder for loading a Dataset.
Implementations§
Source§impl DatasetBuilder
impl DatasetBuilder
pub fn from_uri<T: AsRef<str>>(table_uri: T) -> Self
Sourcepub async fn from_namespace(
namespace: Arc<dyn LanceNamespace>,
table_id: Vec<String>,
ignore_namespace_table_storage_options: bool,
) -> Result<Self>
pub async fn from_namespace( namespace: Arc<dyn LanceNamespace>, table_id: Vec<String>, ignore_namespace_table_storage_options: bool, ) -> Result<Self>
Create a DatasetBuilder from a LanceNamespace
This will automatically fetch the table location and storage options from the namespace
via describe_table().
Storage options from the namespace will override any user-provided storage options
set via .with_storage_options(). This ensures the namespace is always the source
of truth for storage options.
§Arguments
namespace- The namespace implementation to fetch table info fromtable_id- The table identifier (e.g., vec![“my_table”])ignore_namespace_table_storage_options- If true, storage options returned from the namespace’sdescribe_table()will be ignored (treated as None). Defaults to false.
§Example
use lance_namespace_impls::ConnectBuilder;
use lance::dataset::DatasetBuilder;
// Connect to a REST namespace
let namespace = ConnectBuilder::new("rest")
.property("uri", "http://localhost:8080")
.connect()
.await?;
// Load a dataset using storage options from namespace
let dataset = DatasetBuilder::from_namespace(
namespace.clone(),
vec!["my_table".to_string()],
false,
)
.await?
.load()
.await?;
// Load a dataset ignoring namespace storage options
let dataset = DatasetBuilder::from_namespace(
namespace,
vec!["my_table".to_string()],
true,
)
.await?
.load()
.await?;Source§impl DatasetBuilder
impl DatasetBuilder
Sourcepub fn with_index_cache_size_bytes(self, cache_size: usize) -> Self
pub fn with_index_cache_size_bytes(self, cache_size: usize) -> Self
Set the cache size for indices. Set to zero, to disable the cache.
Sourcepub fn with_index_cache_size(self, cache_size: usize) -> Self
👎Deprecated since 0.30.0: Use with_index_cache_size_bytes instead
pub fn with_index_cache_size(self, cache_size: usize) -> Self
with_index_cache_size_bytes insteadSet the cache size for indices. Set to zero, to disable the cache.
Sourcepub fn with_metadata_cache_size_bytes(self, cache_size: usize) -> Self
pub fn with_metadata_cache_size_bytes(self, cache_size: usize) -> Self
Size of the metadata cache in bytes. This cache stores metadata in memory for faster open table and scans. The default is 1 GiB.
Sourcepub fn with_metadata_cache_size(self, cache_size: usize) -> Self
👎Deprecated since 0.30.0: Use with_metadata_cache_size_bytes instead
pub fn with_metadata_cache_size(self, cache_size: usize) -> Self
with_metadata_cache_size_bytes insteadSet the cache size for the file metadata. Set to zero to disable this cache.
Sourcepub fn with_block_size(self, block_size: usize) -> Self
pub fn with_block_size(self, block_size: usize) -> Self
The block size passed to the underlying Object Store reader.
This is used to control the minimal request size. Defaults to 4KB for local files and 64KB for others
Sourcepub fn with_version(self, version: u64) -> Self
pub fn with_version(self, version: u64) -> Self
Sets version for the builder using a version number
Sourcepub fn with_branch(self, branch: &str, version_number: Option<u64>) -> Self
pub fn with_branch(self, branch: &str, version_number: Option<u64>) -> Self
Sets version for the builder using a branch and optional version number
If version_number is null, checkout the latest version
pub fn with_commit_handler(self, commit_handler: Arc<dyn CommitHandler>) -> Self
Sourcepub fn with_s3_credentials_refresh_offset(self, offset: Duration) -> Self
pub fn with_s3_credentials_refresh_offset(self, offset: Duration) -> Self
Sets the s3 credentials refresh. This only applies to s3 storage.
Sourcepub fn with_object_store(
self,
object_store: Arc<DynObjectStore>,
location: Url,
commit_handler: Arc<dyn CommitHandler>,
) -> Self
👎Deprecated: Implement an ObjectStoreProvider instead
pub fn with_object_store( self, object_store: Arc<DynObjectStore>, location: Url, commit_handler: Arc<dyn CommitHandler>, ) -> Self
Directly set the object store to use.
Sourcepub fn with_serialized_manifest(self, manifest: &[u8]) -> Result<Self>
pub fn with_serialized_manifest(self, manifest: &[u8]) -> Result<Self>
Use a serialized manifest instead of loading it from the object store.
This is common when transferring a dataset across IPC boundaries.
Sourcepub fn with_storage_options(
self,
storage_options: HashMap<String, String>,
) -> Self
pub fn with_storage_options( self, storage_options: HashMap<String, String>, ) -> Self
Set options used to initialize storage backend
Options may be passed in the HashMap or set as environment variables. See documentation of underlying object store implementation for details.
Sourcepub fn with_storage_option(
self,
key: impl AsRef<str>,
value: impl AsRef<str>,
) -> Self
pub fn with_storage_option( self, key: impl AsRef<str>, value: impl AsRef<str>, ) -> Self
Set a single option used to initialize storage backend For example, to set the region for S3, you can use:
let builder = DatasetBuilder::from_uri("s3://bucket/path")
.with_storage_option("region", "us-east-1");Sourcepub fn with_storage_options_provider(
self,
provider: Arc<dyn StorageOptionsProvider>,
) -> Self
pub fn with_storage_options_provider( self, provider: Arc<dyn StorageOptionsProvider>, ) -> Self
Enable credential vending from a LanceNamespace
Credentials will be automatically refreshed from the namespace
before they expire. The namespace should return expires_at_millis
in the storage_options from describe_table().
Use with_s3_credentials_refresh_offset() to configure how early
credentials should be refreshed before they expire (default is 5 minutes).
§Arguments
provider- The storage options provider to fetch credentials from
§Example
use std::sync::Arc;
use std::time::Duration;
use lance_namespace_impls::ConnectBuilder;
use lance_io::object_store::{StorageOptionsProvider, LanceNamespaceStorageOptionsProvider};
// Connect to a REST namespace
let namespace = ConnectBuilder::new("rest")
.property("uri", "http://localhost:8080")
.connect()
.await?;
// Create a storage options provider from namespace
let provider = Arc::new(LanceNamespaceStorageOptionsProvider::new(
namespace,
vec!["my_table".to_string()],
));
// With default settings (5 minute refresh offset)
let dataset = DatasetBuilder::from_uri("s3://bucket/table.lance")
.with_storage_options_provider(provider)
.load()
.await?;// With custom refresh offset (refresh 10 minutes before expiration) let dataset = DatasetBuilder::from_uri(“s3://bucket/table.lance”) .with_storage_options_provider(provider.clone()) .with_s3_credentials_refresh_offset(Duration::from_secs(600)) .load() .await?;
Sourcepub fn with_read_params(self, read_params: ReadParams) -> Self
pub fn with_read_params(self, read_params: ReadParams) -> Self
Set options based on ReadParams.
Sourcepub fn with_write_params(self, write_params: WriteParams) -> Self
pub fn with_write_params(self, write_params: WriteParams) -> Self
Set options based on WriteParams.
Sourcepub fn with_session(self, session: Arc<Session>) -> Self
pub fn with_session(self, session: Arc<Session>) -> Self
Re-use an existing session.
The session holds caches for index and metadata.
If this is set, then with_index_cache_size and with_metadata_cache_size are ignored.
Sourcepub async fn build_object_store(
self,
) -> Result<(Arc<ObjectStore>, Path, Arc<dyn CommitHandler>)>
pub async fn build_object_store( self, ) -> Result<(Arc<ObjectStore>, Path, Arc<dyn CommitHandler>)>
Build a lance object store for the given config
pub async fn load(self) -> Result<Dataset>
pub async fn load_by_uri( session: Arc<Session>, manifest: Option<Manifest>, file_reader_options: Option<FileReaderOptions>, table_uri: String, version_number: Option<u64>, object_store: Arc<ObjectStore>, base_path: Path, commit_handler: Arc<dyn CommitHandler>, store_params: Option<ObjectStoreParams>, ) -> Result<Dataset>
Trait Implementations§
Source§impl Clone for DatasetBuilder
impl Clone for DatasetBuilder
Source§fn clone(&self) -> DatasetBuilder
fn clone(&self) -> DatasetBuilder
1.0.0 · Source§fn clone_from(&mut self, source: &Self)
fn clone_from(&mut self, source: &Self)
source. Read moreAuto Trait Implementations§
impl Freeze for DatasetBuilder
impl !RefUnwindSafe for DatasetBuilder
impl Send for DatasetBuilder
impl Sync for DatasetBuilder
impl Unpin for DatasetBuilder
impl !UnwindSafe for DatasetBuilder
Blanket Implementations§
Source§impl<T> BorrowMut<T> for Twhere
T: ?Sized,
impl<T> BorrowMut<T> for Twhere
T: ?Sized,
Source§fn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
Source§impl<T> CloneToUninit for Twhere
T: Clone,
impl<T> CloneToUninit for Twhere
T: Clone,
Source§impl<T> Downcast for Twhere
T: Any,
impl<T> Downcast for Twhere
T: Any,
Source§fn into_any(self: Box<T>) -> Box<dyn Any>
fn into_any(self: Box<T>) -> Box<dyn Any>
Box<dyn Trait> (where Trait: Downcast) to Box<dyn Any>, which can then be
downcast into Box<dyn ConcreteType> where ConcreteType implements Trait.Source§fn into_any_rc(self: Rc<T>) -> Rc<dyn Any>
fn into_any_rc(self: Rc<T>) -> Rc<dyn Any>
Rc<Trait> (where Trait: Downcast) to Rc<Any>, which can then be further
downcast into Rc<ConcreteType> where ConcreteType implements Trait.Source§fn as_any(&self) -> &(dyn Any + 'static)
fn as_any(&self) -> &(dyn Any + 'static)
&Trait (where Trait: Downcast) to &Any. This is needed since Rust cannot
generate &Any’s vtable from &Trait’s.Source§fn as_any_mut(&mut self) -> &mut (dyn Any + 'static)
fn as_any_mut(&mut self) -> &mut (dyn Any + 'static)
&mut Trait (where Trait: Downcast) to &Any. This is needed since Rust cannot
generate &mut Any’s vtable from &mut Trait’s.Source§impl<T> DowncastSend for T
impl<T> DowncastSend for T
Source§impl<T> DowncastSync for T
impl<T> DowncastSync for T
Source§impl<T> FmtForward for T
impl<T> FmtForward for T
Source§fn fmt_binary(self) -> FmtBinary<Self>where
Self: Binary,
fn fmt_binary(self) -> FmtBinary<Self>where
Self: Binary,
self to use its Binary implementation when Debug-formatted.Source§fn fmt_display(self) -> FmtDisplay<Self>where
Self: Display,
fn fmt_display(self) -> FmtDisplay<Self>where
Self: Display,
self to use its Display implementation when
Debug-formatted.Source§fn fmt_lower_exp(self) -> FmtLowerExp<Self>where
Self: LowerExp,
fn fmt_lower_exp(self) -> FmtLowerExp<Self>where
Self: LowerExp,
self to use its LowerExp implementation when
Debug-formatted.Source§fn fmt_lower_hex(self) -> FmtLowerHex<Self>where
Self: LowerHex,
fn fmt_lower_hex(self) -> FmtLowerHex<Self>where
Self: LowerHex,
self to use its LowerHex implementation when
Debug-formatted.Source§fn fmt_octal(self) -> FmtOctal<Self>where
Self: Octal,
fn fmt_octal(self) -> FmtOctal<Self>where
Self: Octal,
self to use its Octal implementation when Debug-formatted.Source§fn fmt_pointer(self) -> FmtPointer<Self>where
Self: Pointer,
fn fmt_pointer(self) -> FmtPointer<Self>where
Self: Pointer,
self to use its Pointer implementation when
Debug-formatted.Source§fn fmt_upper_exp(self) -> FmtUpperExp<Self>where
Self: UpperExp,
fn fmt_upper_exp(self) -> FmtUpperExp<Self>where
Self: UpperExp,
self to use its UpperExp implementation when
Debug-formatted.Source§fn fmt_upper_hex(self) -> FmtUpperHex<Self>where
Self: UpperHex,
fn fmt_upper_hex(self) -> FmtUpperHex<Self>where
Self: UpperHex,
self to use its UpperHex implementation when
Debug-formatted.Source§impl<T> Instrument for T
impl<T> Instrument for T
Source§fn instrument(self, span: Span) -> Instrumented<Self>
fn instrument(self, span: Span) -> Instrumented<Self>
Source§fn in_current_span(self) -> Instrumented<Self>
fn in_current_span(self) -> Instrumented<Self>
Source§impl<T> IntoEither for T
impl<T> IntoEither for T
Source§fn into_either(self, into_left: bool) -> Either<Self, Self>
fn into_either(self, into_left: bool) -> Either<Self, Self>
self into a Left variant of Either<Self, Self>
if into_left is true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read moreSource§fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
self into a Left variant of Either<Self, Self>
if into_left(&self) returns true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read moreSource§impl<T> Pipe for Twhere
T: ?Sized,
impl<T> Pipe for Twhere
T: ?Sized,
Source§fn pipe<R>(self, func: impl FnOnce(Self) -> R) -> Rwhere
Self: Sized,
fn pipe<R>(self, func: impl FnOnce(Self) -> R) -> Rwhere
Self: Sized,
Source§fn pipe_ref<'a, R>(&'a self, func: impl FnOnce(&'a Self) -> R) -> Rwhere
R: 'a,
fn pipe_ref<'a, R>(&'a self, func: impl FnOnce(&'a Self) -> R) -> Rwhere
R: 'a,
self and passes that borrow into the pipe function. Read moreSource§fn pipe_ref_mut<'a, R>(&'a mut self, func: impl FnOnce(&'a mut Self) -> R) -> Rwhere
R: 'a,
fn pipe_ref_mut<'a, R>(&'a mut self, func: impl FnOnce(&'a mut Self) -> R) -> Rwhere
R: 'a,
self and passes that borrow into the pipe function. Read moreSource§fn pipe_borrow<'a, B, R>(&'a self, func: impl FnOnce(&'a B) -> R) -> R
fn pipe_borrow<'a, B, R>(&'a self, func: impl FnOnce(&'a B) -> R) -> R
Source§fn pipe_borrow_mut<'a, B, R>(
&'a mut self,
func: impl FnOnce(&'a mut B) -> R,
) -> R
fn pipe_borrow_mut<'a, B, R>( &'a mut self, func: impl FnOnce(&'a mut B) -> R, ) -> R
Source§fn pipe_as_ref<'a, U, R>(&'a self, func: impl FnOnce(&'a U) -> R) -> R
fn pipe_as_ref<'a, U, R>(&'a self, func: impl FnOnce(&'a U) -> R) -> R
self, then passes self.as_ref() into the pipe function.Source§fn pipe_as_mut<'a, U, R>(&'a mut self, func: impl FnOnce(&'a mut U) -> R) -> R
fn pipe_as_mut<'a, U, R>(&'a mut self, func: impl FnOnce(&'a mut U) -> R) -> R
self, then passes self.as_mut() into the pipe
function.Source§fn pipe_deref<'a, T, R>(&'a self, func: impl FnOnce(&'a T) -> R) -> R
fn pipe_deref<'a, T, R>(&'a self, func: impl FnOnce(&'a T) -> R) -> R
self, then passes self.deref() into the pipe function.Source§impl<T> Pointable for T
impl<T> Pointable for T
Source§impl<T> PolicyExt for Twhere
T: ?Sized,
impl<T> PolicyExt for Twhere
T: ?Sized,
Source§impl<T> Tap for T
impl<T> Tap for T
Source§fn tap_borrow<B>(self, func: impl FnOnce(&B)) -> Self
fn tap_borrow<B>(self, func: impl FnOnce(&B)) -> Self
Borrow<B> of a value. Read moreSource§fn tap_borrow_mut<B>(self, func: impl FnOnce(&mut B)) -> Self
fn tap_borrow_mut<B>(self, func: impl FnOnce(&mut B)) -> Self
BorrowMut<B> of a value. Read moreSource§fn tap_ref<R>(self, func: impl FnOnce(&R)) -> Self
fn tap_ref<R>(self, func: impl FnOnce(&R)) -> Self
AsRef<R> view of a value. Read moreSource§fn tap_ref_mut<R>(self, func: impl FnOnce(&mut R)) -> Self
fn tap_ref_mut<R>(self, func: impl FnOnce(&mut R)) -> Self
AsMut<R> view of a value. Read moreSource§fn tap_deref<T>(self, func: impl FnOnce(&T)) -> Self
fn tap_deref<T>(self, func: impl FnOnce(&T)) -> Self
Deref::Target of a value. Read moreSource§fn tap_deref_mut<T>(self, func: impl FnOnce(&mut T)) -> Self
fn tap_deref_mut<T>(self, func: impl FnOnce(&mut T)) -> Self
Deref::Target of a value. Read moreSource§fn tap_dbg(self, func: impl FnOnce(&Self)) -> Self
fn tap_dbg(self, func: impl FnOnce(&Self)) -> Self
.tap() only in debug builds, and is erased in release builds.Source§fn tap_mut_dbg(self, func: impl FnOnce(&mut Self)) -> Self
fn tap_mut_dbg(self, func: impl FnOnce(&mut Self)) -> Self
.tap_mut() only in debug builds, and is erased in release
builds.Source§fn tap_borrow_dbg<B>(self, func: impl FnOnce(&B)) -> Self
fn tap_borrow_dbg<B>(self, func: impl FnOnce(&B)) -> Self
.tap_borrow() only in debug builds, and is erased in release
builds.Source§fn tap_borrow_mut_dbg<B>(self, func: impl FnOnce(&mut B)) -> Self
fn tap_borrow_mut_dbg<B>(self, func: impl FnOnce(&mut B)) -> Self
.tap_borrow_mut() only in debug builds, and is erased in release
builds.Source§fn tap_ref_dbg<R>(self, func: impl FnOnce(&R)) -> Self
fn tap_ref_dbg<R>(self, func: impl FnOnce(&R)) -> Self
.tap_ref() only in debug builds, and is erased in release
builds.Source§fn tap_ref_mut_dbg<R>(self, func: impl FnOnce(&mut R)) -> Self
fn tap_ref_mut_dbg<R>(self, func: impl FnOnce(&mut R)) -> Self
.tap_ref_mut() only in debug builds, and is erased in release
builds.Source§fn tap_deref_dbg<T>(self, func: impl FnOnce(&T)) -> Self
fn tap_deref_dbg<T>(self, func: impl FnOnce(&T)) -> Self
.tap_deref() only in debug builds, and is erased in release
builds.