1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
// SPDX-License-Identifier: Apache-2.0
// SPDX-FileCopyrightText: Copyright The Lance Authors
//! Lance Columnar Data Format
//!
//! Lance columnar data format is an alternative to Parquet. It provides 100x faster for random access,
//! automatic versioning, optimized for computer vision, bioinformatics, spatial and ML data.
//! [Apache Arrow](https://arrow.apache.org/) and DuckDB compatible.
//!
//!
//! # Create a Dataset
//!
//! ```rust
//! # use std::sync::Arc;
//! # use tokio::runtime::Runtime;
//! # use arrow_array::{RecordBatch, RecordBatchIterator};
//! # use arrow_schema::{Schema, Field, DataType};
//! use lance::{dataset::WriteParams, Dataset};
//!
//! # let mut rt = Runtime::new().unwrap();
//! # rt.block_on(async {
//! #
//! # let test_dir = tempfile::tempdir().unwrap();
//! # let uri = test_dir.path().to_str().unwrap().to_string();
//! let schema = Arc::new(Schema::new(vec![Field::new("test", DataType::Int64, false)]));
//! let batches = vec![RecordBatch::new_empty(schema.clone())];
//! let reader = RecordBatchIterator::new(
//! batches.into_iter().map(Ok), schema
//! );
//!
//! let write_params = WriteParams::default();
//! Dataset::write(reader, &uri, Some(write_params)).await.unwrap();
//! # })
//! ```
//!
//! # Scan a Dataset
//!
//! ```rust
//! # use std::sync::Arc;
//! # use arrow_array::{RecordBatch, Int32Array, RecordBatchIterator, ArrayRef};
//! # use tokio::runtime::Runtime;
//! use futures::StreamExt;
//! use lance::Dataset;
//! # use lance::dataset::WriteParams;
//!
//! # let array: ArrayRef = Arc::new(Int32Array::from(vec![1, 2]));
//! # let batches = vec![RecordBatch::try_from_iter(vec![("test", array)]).unwrap()];
//! # let test_dir = tempfile::tempdir().unwrap();
//! # let path = test_dir.path().to_str().unwrap().to_string();
//! # let schema = batches[0].schema();
//! # let mut rt = Runtime::new().unwrap();
//! # rt.block_on(async {
//! # let write_params = WriteParams::default();
//! # let reader = RecordBatchIterator::new(
//! # batches.into_iter().map(Ok), schema
//! # );
//! # Dataset::write(reader, &path, Some(write_params)).await.unwrap();
//! let dataset = Dataset::open(&path).await.unwrap();
//! let mut scanner = dataset.scan();
//! let batches: Vec<RecordBatch> = scanner
//! .try_into_stream()
//! .await
//! .unwrap()
//! .map(|b| b.unwrap())
//! .collect::<Vec<RecordBatch>>()
//! .await;
//! # })
//!
//! ```
//!
use DataType;
use DatasetBuilder;
pub use datatypes;
pub use ;
use LazyLock;
pub use ;
pub use Dataset;
use DIST_COL;
/// Creates and loads a [`Dataset`] from the given path.
/// Infers the storage backend to use from the scheme in the given table path.
///
/// For more advanced configurations use [`DatasetBuilder`].
pub async
pub static DIST_FIELD: =
new;
/// Re-exports of 3rd party dependencies used in lance public APIs
///
/// Users that only use these dependencies for the sake of communicating with
/// Lance APIs can use these re-exports to ensure they are always pinned to the
/// same version that lance is using.