1pub mod manifest;
10
11use arrow::array::Float32Array;
12use arrow::record_batch::RecordBatchIterator;
13use arrow_ipc::reader::StreamReader;
14use async_trait::async_trait;
15use bytes::Bytes;
16use futures::{StreamExt, TryStreamExt};
17use lance::dataset::builder::DatasetBuilder;
18use lance::dataset::scanner::Scanner;
19use lance::dataset::statistics::DatasetStatisticsExt;
20use lance::dataset::transaction::{Operation, Transaction};
21use lance::dataset::{
22 Dataset, MergeInsertBuilder, WhenMatched, WhenNotMatched, WhenNotMatchedBySource, WriteMode,
23 WriteParams,
24};
25use lance::index::{DatasetIndexExt, IndexParams, vector::VectorIndexParams};
26use lance::session::Session;
27use lance_index::scalar::{
28 BuiltinIndexType, FullTextSearchQuery, InvertedIndexParams, ScalarIndexParams,
29};
30use lance_index::vector::{
31 bq::RQBuildParams, hnsw::builder::HnswBuildParams, ivf::IvfBuildParams, pq::PQBuildParams,
32 sq::builder::SQBuildParams,
33};
34use lance_index::{IndexType, is_system_index};
35use lance_io::object_store::{ObjectStore, ObjectStoreParams, ObjectStoreRegistry};
36use lance_linalg::distance::MetricType;
37use lance_table::io::commit::{ManifestNamingScheme, VERSIONS_DIR};
38use object_store::ObjectStoreExt;
39use object_store::path::Path;
40use object_store::{Error as ObjectStoreError, ObjectStore as OSObjectStore, PutMode, PutOptions};
41use std::collections::HashMap;
42use std::io::Cursor;
43use std::sync::{Arc, Mutex};
44
45use crate::context::DynamicContextProvider;
46use lance_namespace::models::{
47 AnalyzeTableQueryPlanRequest, BatchDeleteTableVersionsRequest,
48 BatchDeleteTableVersionsResponse, CountTableRowsRequest, CreateNamespaceRequest,
49 CreateNamespaceResponse, CreateTableIndexRequest, CreateTableIndexResponse, CreateTableRequest,
50 CreateTableResponse, CreateTableScalarIndexResponse, CreateTableVersionRequest,
51 CreateTableVersionResponse, DeclareTableRequest, DeclareTableResponse,
52 DescribeNamespaceRequest, DescribeNamespaceResponse, DescribeTableIndexStatsRequest,
53 DescribeTableIndexStatsResponse, DescribeTableRequest, DescribeTableResponse,
54 DescribeTableVersionRequest, DescribeTableVersionResponse, DescribeTransactionRequest,
55 DescribeTransactionResponse, DropNamespaceRequest, DropNamespaceResponse,
56 DropTableIndexRequest, DropTableIndexResponse, DropTableRequest, DropTableResponse,
57 ExplainTableQueryPlanRequest, FragmentStats, FragmentSummary, GetTableStatsRequest,
58 GetTableStatsResponse, Identity, IndexContent, InsertIntoTableRequest, InsertIntoTableResponse,
59 ListNamespacesRequest, ListNamespacesResponse, ListTableIndicesRequest,
60 ListTableIndicesResponse, ListTableVersionsRequest, ListTableVersionsResponse,
61 ListTablesRequest, ListTablesResponse, MergeInsertIntoTableRequest,
62 MergeInsertIntoTableResponse, NamespaceExistsRequest, QueryTableRequest,
63 QueryTableRequestColumns, QueryTableRequestVector, RestoreTableRequest, RestoreTableResponse,
64 TableExistsRequest, TableVersion, UpdateTableSchemaMetadataRequest,
65 UpdateTableSchemaMetadataResponse,
66};
67
68use lance_core::{Error, Result};
69use lance_namespace::LanceNamespace;
70use lance_namespace::error::NamespaceError;
71use lance_namespace::schema::arrow_schema_to_json;
72
73use crate::credentials::{
74 CredentialVendor, create_credential_vendor_for_location, has_credential_vendor_config,
75};
76
77#[derive(Debug, Default)]
82pub struct OpsMetrics {
83 counters: Mutex<HashMap<String, u64>>,
84}
85
86impl OpsMetrics {
87 pub fn increment(&self, operation: &str) {
89 if let Ok(mut counters) = self.counters.lock() {
90 *counters.entry(operation.to_string()).or_insert(0) += 1;
91 }
92 }
93
94 pub fn retrieve(&self) -> HashMap<String, u64> {
96 self.counters.lock().map(|c| c.clone()).unwrap_or_default()
97 }
98
99 pub fn reset(&self) {
101 if let Ok(mut counters) = self.counters.lock() {
102 counters.clear();
103 }
104 }
105}
106
107pub(crate) struct TableStatus {
112 pub(crate) exists: bool,
114 pub(crate) is_deregistered: bool,
116 pub(crate) has_reserved_file: bool,
118}
119
120enum DirectoryIndexParams {
121 Scalar {
122 index_type: IndexType,
123 params: ScalarIndexParams,
124 },
125 Inverted(InvertedIndexParams),
126 Vector {
127 index_type: IndexType,
128 params: VectorIndexParams,
129 },
130}
131
132impl DirectoryIndexParams {
133 fn index_type(&self) -> IndexType {
134 match self {
135 Self::Scalar { index_type, .. } | Self::Vector { index_type, .. } => *index_type,
136 Self::Inverted(_) => IndexType::Inverted,
137 }
138 }
139
140 fn params(&self) -> &dyn IndexParams {
141 match self {
142 Self::Scalar { params, .. } => params,
143 Self::Inverted(params) => params,
144 Self::Vector { params, .. } => params,
145 }
146 }
147}
148
149#[derive(Clone)]
184pub struct DirectoryNamespaceBuilder {
185 root: String,
186 storage_options: Option<HashMap<String, String>>,
187 session: Option<Arc<Session>>,
188 manifest_enabled: bool,
189 dir_listing_enabled: bool,
190 inline_optimization_enabled: bool,
191 table_version_tracking_enabled: bool,
192 table_version_storage_enabled: bool,
195 dir_listing_to_manifest_migration_enabled: bool,
200 credential_vendor_properties: HashMap<String, String>,
201 context_provider: Option<Arc<dyn DynamicContextProvider>>,
202 commit_retries: Option<u32>,
203 vend_input_storage_options: bool,
206 vend_input_storage_options_refresh_interval_millis: Option<u64>,
211 ops_metrics_enabled: bool,
213}
214
215impl std::fmt::Debug for DirectoryNamespaceBuilder {
216 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
217 f.debug_struct("DirectoryNamespaceBuilder")
218 .field("root", &self.root)
219 .field("storage_options", &self.storage_options)
220 .field("manifest_enabled", &self.manifest_enabled)
221 .field("dir_listing_enabled", &self.dir_listing_enabled)
222 .field(
223 "inline_optimization_enabled",
224 &self.inline_optimization_enabled,
225 )
226 .field(
227 "table_version_tracking_enabled",
228 &self.table_version_tracking_enabled,
229 )
230 .field(
231 "table_version_storage_enabled",
232 &self.table_version_storage_enabled,
233 )
234 .field(
235 "dir_listing_to_manifest_migration_enabled",
236 &self.dir_listing_to_manifest_migration_enabled,
237 )
238 .field(
239 "context_provider",
240 &self.context_provider.as_ref().map(|_| "Some(...)"),
241 )
242 .field(
243 "vend_input_storage_options",
244 &self.vend_input_storage_options,
245 )
246 .field(
247 "vend_input_storage_options_refresh_interval_millis",
248 &self.vend_input_storage_options_refresh_interval_millis,
249 )
250 .field("ops_metrics_enabled", &self.ops_metrics_enabled)
251 .finish()
252 }
253}
254
255impl DirectoryNamespaceBuilder {
256 pub fn new(root: impl Into<String>) -> Self {
262 Self {
263 root: root.into().trim_end_matches('/').to_string(),
264 storage_options: None,
265 session: None,
266 manifest_enabled: true,
267 dir_listing_enabled: true, inline_optimization_enabled: true,
269 table_version_tracking_enabled: false, table_version_storage_enabled: false, dir_listing_to_manifest_migration_enabled: false, credential_vendor_properties: HashMap::new(),
273 context_provider: None,
274 commit_retries: None,
275 vend_input_storage_options: false,
276 vend_input_storage_options_refresh_interval_millis: None,
277 ops_metrics_enabled: false,
278 }
279 }
280
281 pub fn manifest_enabled(mut self, enabled: bool) -> Self {
286 self.manifest_enabled = enabled;
287 self
288 }
289
290 pub fn dir_listing_enabled(mut self, enabled: bool) -> Self {
295 self.dir_listing_enabled = enabled;
296 self
297 }
298
299 pub fn dir_listing_to_manifest_migration_enabled(mut self, enabled: bool) -> Self {
306 self.dir_listing_to_manifest_migration_enabled = enabled;
307 self
308 }
309
310 pub fn inline_optimization_enabled(mut self, enabled: bool) -> Self {
316 self.inline_optimization_enabled = enabled;
317 self
318 }
319
320 pub fn table_version_tracking_enabled(mut self, enabled: bool) -> Self {
328 self.table_version_tracking_enabled = enabled;
329 self
330 }
331
332 pub fn table_version_storage_enabled(mut self, enabled: bool) -> Self {
341 self.table_version_storage_enabled = enabled;
342 self
343 }
344
345 pub fn from_properties(
413 properties: HashMap<String, String>,
414 session: Option<Arc<Session>>,
415 ) -> Result<Self> {
416 let root = properties.get("root").cloned().ok_or_else(|| {
418 lance_core::Error::from(NamespaceError::InvalidInput {
419 message: "Missing required property 'root' for directory namespace".to_string(),
420 })
421 })?;
422
423 let storage_options: HashMap<String, String> = properties
425 .iter()
426 .filter_map(|(k, v)| {
427 k.strip_prefix("storage.")
428 .map(|key| (key.to_string(), v.clone()))
429 })
430 .collect();
431
432 let storage_options = if storage_options.is_empty() {
433 None
434 } else {
435 Some(storage_options)
436 };
437
438 let manifest_enabled = properties
440 .get("manifest_enabled")
441 .and_then(|v| v.parse::<bool>().ok())
442 .unwrap_or(true);
443
444 let dir_listing_enabled = properties
446 .get("dir_listing_enabled")
447 .and_then(|v| v.parse::<bool>().ok())
448 .unwrap_or(true);
449
450 let inline_optimization_enabled = properties
452 .get("inline_optimization_enabled")
453 .and_then(|v| v.parse::<bool>().ok())
454 .unwrap_or(true);
455
456 let table_version_tracking_enabled = properties
458 .get("table_version_tracking_enabled")
459 .and_then(|v| v.parse::<bool>().ok())
460 .unwrap_or(false);
461
462 let table_version_storage_enabled = properties
464 .get("table_version_storage_enabled")
465 .and_then(|v| v.parse::<bool>().ok())
466 .unwrap_or(false);
467
468 let dir_listing_to_manifest_migration_enabled = properties
470 .get("dir_listing_to_manifest_migration_enabled")
471 .and_then(|v| v.parse::<bool>().ok())
472 .unwrap_or(false);
473
474 let credential_vendor_properties: HashMap<String, String> = properties
478 .iter()
479 .filter_map(|(k, v)| {
480 k.strip_prefix("credential_vendor.")
481 .map(|key| (key.to_string(), v.clone()))
482 })
483 .collect();
484
485 let commit_retries = properties
486 .get("commit_retries")
487 .and_then(|v| v.parse::<u32>().ok());
488
489 let vend_input_storage_options = properties
491 .get("vend_input_storage_options")
492 .and_then(|v| v.parse::<bool>().ok())
493 .unwrap_or(false);
494
495 let vend_input_storage_options_refresh_interval_millis = properties
497 .get("vend_input_storage_options_refresh_interval_millis")
498 .and_then(|v| v.parse::<u64>().ok());
499
500 let ops_metrics_enabled = properties
502 .get("ops_metrics_enabled")
503 .and_then(|v| v.parse::<bool>().ok())
504 .unwrap_or(false);
505
506 Ok(Self {
507 root: root.trim_end_matches('/').to_string(),
508 storage_options,
509 session,
510 manifest_enabled,
511 dir_listing_enabled,
512 inline_optimization_enabled,
513 table_version_tracking_enabled,
514 table_version_storage_enabled,
515 dir_listing_to_manifest_migration_enabled,
516 credential_vendor_properties,
517 context_provider: None,
518 commit_retries,
519 vend_input_storage_options,
520 vend_input_storage_options_refresh_interval_millis,
521 ops_metrics_enabled,
522 })
523 }
524
525 pub fn storage_option(mut self, key: impl Into<String>, value: impl Into<String>) -> Self {
532 self.storage_options
533 .get_or_insert_with(HashMap::new)
534 .insert(key.into(), value.into());
535 self
536 }
537
538 pub fn storage_options(mut self, options: HashMap<String, String>) -> Self {
544 self.storage_options
545 .get_or_insert_with(HashMap::new)
546 .extend(options);
547 self
548 }
549
550 pub fn session(mut self, session: Arc<Session>) -> Self {
560 self.session = Some(session);
561 self
562 }
563
564 pub fn commit_retries(mut self, retries: u32) -> Self {
567 self.commit_retries = Some(retries);
568 self
569 }
570
571 pub fn credential_vendor_property(
599 mut self,
600 key: impl Into<String>,
601 value: impl Into<String>,
602 ) -> Self {
603 self.credential_vendor_properties
604 .insert(key.into(), value.into());
605 self
606 }
607
608 pub fn credential_vendor_properties(mut self, properties: HashMap<String, String>) -> Self {
616 self.credential_vendor_properties.extend(properties);
617 self
618 }
619
620 pub fn context_provider(mut self, provider: Arc<dyn DynamicContextProvider>) -> Self {
630 self.context_provider = Some(provider);
631 self
632 }
633
634 pub fn vend_input_storage_options(mut self, enabled: bool) -> Self {
643 self.vend_input_storage_options = enabled;
644 self
645 }
646
647 pub fn vend_input_storage_options_refresh_interval_millis(
659 mut self,
660 interval_millis: u64,
661 ) -> Self {
662 self.vend_input_storage_options_refresh_interval_millis = Some(interval_millis);
663 self
664 }
665
666 pub fn ops_metrics_enabled(mut self, enabled: bool) -> Self {
674 self.ops_metrics_enabled = enabled;
675 self
676 }
677
678 pub async fn build(self) -> Result<DirectoryNamespace> {
691 if self.table_version_storage_enabled && !self.manifest_enabled {
693 return Err(NamespaceError::InvalidInput {
694 message: "table_version_storage_enabled requires manifest_enabled=true".to_string(),
695 }
696 .into());
697 }
698
699 let (object_store, base_path) =
700 Self::initialize_object_store(&self.root, &self.storage_options, &self.session).await?;
701
702 let manifest_ns = if self.manifest_enabled {
703 match manifest::ManifestNamespace::from_directory(
704 self.root.clone(),
705 self.storage_options.clone(),
706 self.session.clone(),
707 object_store.clone(),
708 base_path.clone(),
709 self.dir_listing_enabled,
710 self.inline_optimization_enabled,
711 self.commit_retries,
712 self.table_version_storage_enabled,
713 )
714 .await
715 {
716 Ok(ns) => Some(Arc::new(ns)),
717 Err(e) => {
718 log::warn!(
720 "Failed to initialize manifest namespace, falling back to directory listing only: {}",
721 e
722 );
723 None
724 }
725 }
726 } else {
727 None
728 };
729
730 let credential_vendor = if has_credential_vendor_config(&self.credential_vendor_properties)
732 {
733 create_credential_vendor_for_location(&self.root, &self.credential_vendor_properties)
734 .await?
735 .map(Arc::from)
736 } else {
737 None
738 };
739
740 let ops_metrics = if self.ops_metrics_enabled {
741 Some(Arc::new(OpsMetrics::default()))
742 } else {
743 None
744 };
745
746 Ok(DirectoryNamespace {
747 root: self.root,
748 storage_options: self.storage_options,
749 session: self.session,
750 object_store,
751 base_path,
752 manifest_ns,
753 dir_listing_enabled: self.dir_listing_enabled,
754 dir_listing_to_manifest_migration_enabled: self
755 .dir_listing_to_manifest_migration_enabled,
756 table_version_tracking_enabled: self.table_version_tracking_enabled,
757 table_version_storage_enabled: self.table_version_storage_enabled,
758 credential_vendor,
759 context_provider: self.context_provider,
760 vend_input_storage_options: self.vend_input_storage_options,
761 vend_input_storage_options_refresh_interval_millis: self
762 .vend_input_storage_options_refresh_interval_millis,
763 ops_metrics,
764 })
765 }
766
767 async fn initialize_object_store(
769 root: &str,
770 storage_options: &Option<HashMap<String, String>>,
771 session: &Option<Arc<Session>>,
772 ) -> Result<(Arc<ObjectStore>, Path)> {
773 let accessor = storage_options.clone().map(|opts| {
775 Arc::new(lance_io::object_store::StorageOptionsAccessor::with_static_options(opts))
776 });
777 let params = ObjectStoreParams {
778 storage_options_accessor: accessor,
779 ..Default::default()
780 };
781
782 let registry = if let Some(session) = session {
784 session.store_registry()
785 } else {
786 Arc::new(ObjectStoreRegistry::default())
787 };
788
789 let (object_store, base_path) = ObjectStore::from_uri_and_params(registry, root, ¶ms)
791 .await
792 .map_err(|e| {
793 lance_core::Error::from(NamespaceError::Internal {
794 message: format!("Failed to create object store: {:?}", e),
795 })
796 })?;
797
798 Ok((object_store, base_path))
799 }
800}
801
802pub struct DirectoryNamespace {
826 root: String,
827 storage_options: Option<HashMap<String, String>>,
828 session: Option<Arc<Session>>,
829 object_store: Arc<ObjectStore>,
830 base_path: Path,
831 manifest_ns: Option<Arc<manifest::ManifestNamespace>>,
832 dir_listing_enabled: bool,
833 dir_listing_to_manifest_migration_enabled: bool,
837 table_version_tracking_enabled: bool,
840 table_version_storage_enabled: bool,
842 credential_vendor: Option<Arc<dyn CredentialVendor>>,
845 #[allow(dead_code)]
848 context_provider: Option<Arc<dyn DynamicContextProvider>>,
849 vend_input_storage_options: bool,
851 vend_input_storage_options_refresh_interval_millis: Option<u64>,
854 ops_metrics: Option<Arc<OpsMetrics>>,
856}
857
858impl std::fmt::Debug for DirectoryNamespace {
859 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
860 write!(f, "{}", self.namespace_id())
861 }
862}
863
864impl std::fmt::Display for DirectoryNamespace {
865 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
866 write!(f, "{}", self.namespace_id())
867 }
868}
869
870struct TableDeleteEntry {
873 table_id: Option<Vec<String>>,
874 ranges: Vec<(i64, i64)>,
875}
876
877impl DirectoryNamespace {
878 fn apply_pagination(
891 names: &mut Vec<String>,
892 page_token: Option<String>,
893 limit: Option<i32>,
894 ) -> Option<String> {
895 names.sort();
897
898 if let Some(start_after) = page_token {
900 if let Some(index) = names
901 .iter()
902 .position(|name| name.as_str() > start_after.as_str())
903 {
904 names.drain(0..index);
905 } else {
906 names.clear();
907 }
908 }
909
910 if let Some(limit) = limit
912 && limit >= 0
913 {
914 let limit = limit as usize;
915 if names.len() > limit {
916 let next_page_token = if limit > 0 {
917 Some(names[limit - 1].clone())
918 } else {
919 None
920 };
921 names.truncate(limit);
922 return next_page_token;
923 }
924 }
925
926 None
927 }
928
929 async fn list_directory_tables(&self) -> Result<Vec<String>> {
931 let mut tables = Vec::new();
932 let entries = self
933 .object_store
934 .read_dir(self.base_path.clone())
935 .await
936 .map_err(|e| {
937 lance_core::Error::from(NamespaceError::Internal {
938 message: format!("Failed to list directory: {:?}", e),
939 })
940 })?;
941
942 for entry in entries {
943 let path = entry.trim_end_matches('/');
944 if !path.ends_with(".lance") {
945 continue;
946 }
947
948 let table_name = &path[..path.len() - 6];
949
950 let status = self.check_table_status(table_name).await;
952 if status.is_deregistered {
953 continue;
954 }
955
956 tables.push(table_name.to_string());
957 }
958
959 Ok(tables)
960 }
961
962 fn validate_root_namespace_id(id: &Option<Vec<String>>) -> Result<()> {
964 if let Some(id) = id
965 && !id.is_empty()
966 {
967 return Err(NamespaceError::Unsupported {
968 message: format!(
969 "Directory namespace only supports root namespace operations, but got namespace ID: {:?}. Expected empty ID.",
970 id
971 ),
972 }
973 .into());
974 }
975 Ok(())
976 }
977
978 fn table_name_from_id(id: &Option<Vec<String>>) -> Result<String> {
980 let id = id.as_ref().ok_or_else(|| {
981 lance_core::Error::from(NamespaceError::InvalidInput {
982 message: "Directory namespace table ID cannot be empty".to_string(),
983 })
984 })?;
985
986 if id.len() != 1 {
987 return Err(NamespaceError::Unsupported {
988 message: format!(
989 "Multi-level table IDs are only supported when manifest mode is enabled, but got: {:?}",
990 id
991 ),
992 }
993 .into());
994 }
995
996 Ok(id[0].clone())
997 }
998
999 fn format_table_id(table_id: &[String]) -> String {
1000 format!(
1001 "table id '{}'",
1002 manifest::ManifestNamespace::str_object_id(table_id)
1003 )
1004 }
1005
1006 fn format_table_id_from_request(id: &Option<Vec<String>>) -> String {
1007 id.as_ref()
1008 .map(|table_id| Self::format_table_id(table_id))
1009 .unwrap_or_else(|| "table id '<unknown>'".to_string())
1010 }
1011
1012 async fn resolve_table_location(&self, id: &Option<Vec<String>>) -> Result<String> {
1013 let mut describe_req = DescribeTableRequest::new();
1014 describe_req.id = id.clone();
1015 describe_req.load_detailed_metadata = Some(false);
1016
1017 let describe_resp = self.describe_table_impl(describe_req).await?;
1019
1020 describe_resp.location.ok_or_else(|| {
1021 lance_core::Error::from(NamespaceError::TableNotFound {
1022 message: format!("Table location not found for: {:?}", id),
1023 })
1024 })
1025 }
1026
1027 async fn table_has_actual_manifests(&self, table_name: &str) -> Result<bool> {
1028 manifest::ManifestNamespace::path_has_actual_manifests(
1029 &self.object_store,
1030 &self.table_path(table_name),
1031 )
1032 .await
1033 }
1034
1035 async fn filter_declared_tables(
1036 &self,
1037 tables: Vec<String>,
1038 include_declared: bool,
1039 ) -> Result<Vec<String>> {
1040 if include_declared {
1041 return Ok(tables);
1042 }
1043
1044 let mut stream = futures::stream::iter(tables.into_iter().map(|table_name| async move {
1045 if self.table_has_actual_manifests(&table_name).await? {
1049 Ok::<Option<String>, Error>(Some(table_name))
1050 } else {
1051 Ok::<Option<String>, Error>(None)
1052 }
1053 }))
1054 .buffered(manifest::DECLARED_FILTER_CONCURRENCY);
1055
1056 let mut filtered = Vec::new();
1057 while let Some(result) = stream.next().await {
1058 if let Some(table_name) = result? {
1059 filtered.push(table_name);
1060 }
1061 }
1062 Ok(filtered)
1063 }
1064
1065 fn ipc_reader_from_request_data(
1066 request_data: &Bytes,
1067 operation: &str,
1068 ) -> Result<(
1069 Box<dyn arrow::record_batch::RecordBatchReader + Send>,
1070 usize,
1071 )> {
1072 if request_data.is_empty() {
1073 return Err(NamespaceError::InvalidInput {
1074 message: format!(
1075 "Request data (Arrow IPC stream) is required for {}",
1076 operation
1077 ),
1078 }
1079 .into());
1080 }
1081
1082 let cursor = Cursor::new(request_data.as_ref());
1083 let stream_reader =
1084 StreamReader::try_new(cursor, None).map_err(|e| NamespaceError::InvalidInput {
1085 message: format!("Invalid Arrow IPC stream: {}", e),
1086 })?;
1087 let arrow_schema = stream_reader.schema();
1088
1089 let mut num_rows = 0usize;
1090 let mut batches = Vec::new();
1091 for batch_result in stream_reader {
1092 let batch = batch_result.map_err(|e| NamespaceError::Internal {
1093 message: format!("Failed to read batch from IPC stream: {}", e),
1094 })?;
1095 num_rows += batch.num_rows();
1096 batches.push(batch);
1097 }
1098
1099 let reader: Box<dyn arrow::record_batch::RecordBatchReader + Send> = if batches.is_empty() {
1100 let batch = arrow::record_batch::RecordBatch::new_empty(arrow_schema.clone());
1101 Box::new(RecordBatchIterator::new(vec![Ok(batch)], arrow_schema))
1102 } else {
1103 let batch_results: Vec<_> = batches.into_iter().map(Ok).collect();
1104 Box::new(RecordBatchIterator::new(batch_results, arrow_schema))
1105 };
1106
1107 Ok((reader, num_rows))
1108 }
1109
1110 async fn table_uri_has_actual_manifests(&self, table_uri: &str) -> Result<bool> {
1111 let table_path = self.object_store_path_from_uri(table_uri)?;
1112 manifest::ManifestNamespace::path_has_actual_manifests(&self.object_store, &table_path)
1113 .await
1114 }
1115
1116 fn object_store_path_from_uri(&self, uri: &str) -> Result<Path> {
1117 let registry = self
1118 .session
1119 .as_ref()
1120 .map(|session| session.store_registry())
1121 .unwrap_or_else(|| Arc::new(ObjectStoreRegistry::default()));
1122 ObjectStore::extract_path_from_uri(registry, uri)
1123 }
1124
1125 fn validate_dir_only_properties(
1126 properties: Option<&HashMap<String, String>>,
1127 operation: &str,
1128 ) -> Result<()> {
1129 if properties.is_some_and(|properties| !properties.is_empty()) {
1133 return Err(NamespaceError::Unsupported {
1134 message: format!(
1135 "{} with non-empty table properties requires manifest_enabled=true",
1136 operation
1137 ),
1138 }
1139 .into());
1140 }
1141 Ok(())
1142 }
1143
1144 async fn write_reader_to_table(
1145 &self,
1146 table_uri: &str,
1147 reader: Box<dyn arrow::record_batch::RecordBatchReader + Send>,
1148 mode: WriteMode,
1149 extra_storage_options: Option<HashMap<String, String>>,
1150 ) -> Result<Dataset> {
1151 let mut merged_storage_options = self.storage_options.clone().unwrap_or_default();
1154 if let Some(extra_storage_options) = extra_storage_options {
1155 merged_storage_options.extend(extra_storage_options);
1156 }
1157 let store_params = (!merged_storage_options.is_empty()).then(|| ObjectStoreParams {
1158 storage_options_accessor: Some(Arc::new(
1159 lance_io::object_store::StorageOptionsAccessor::with_static_options(
1160 merged_storage_options,
1161 ),
1162 )),
1163 ..Default::default()
1164 });
1165
1166 let write_params = WriteParams {
1167 mode,
1168 store_params,
1169 session: self.session.clone(),
1170 ..Default::default()
1171 };
1172
1173 let dataset = Dataset::write(reader, table_uri, Some(write_params))
1174 .await
1175 .map_err(|e| NamespaceError::Internal {
1176 message: format!("Failed to write table at '{}': {}", table_uri, e),
1177 })?;
1178
1179 Ok(dataset)
1180 }
1181
1182 async fn list_table_versions_from_storage(
1183 &self,
1184 table_uri: &str,
1185 descending: bool,
1186 limit: Option<i32>,
1187 ) -> Result<Vec<TableVersion>> {
1188 let table_path = self.object_store_path_from_uri(table_uri)?;
1189 let versions_dir = table_path.clone().join(VERSIONS_DIR);
1190 let manifest_metas: Vec<_> = self
1191 .object_store
1192 .read_dir_all(&versions_dir, None)
1193 .try_collect()
1194 .await
1195 .map_err(|e| {
1196 lance_core::Error::from(NamespaceError::Internal {
1197 message: format!(
1198 "Failed to list manifest files for table at '{}': {}",
1199 table_uri, e
1200 ),
1201 })
1202 })?;
1203
1204 let is_v2_naming = manifest_metas
1205 .first()
1206 .is_some_and(|meta| meta.location.filename().is_some_and(|f| f.len() == 29));
1207
1208 let mut table_versions: Vec<TableVersion> = manifest_metas
1209 .into_iter()
1210 .filter_map(|meta| {
1211 let filename = meta.location.filename()?;
1212 let version_str = filename.strip_suffix(".manifest")?;
1213 if version_str.starts_with('d') {
1214 return None;
1215 }
1216 let file_version: u64 = version_str.parse().ok()?;
1217
1218 let actual_version = if file_version > u64::MAX / 2 {
1219 u64::MAX - file_version
1220 } else {
1221 file_version
1222 };
1223
1224 Some(TableVersion {
1225 version: actual_version as i64,
1226 manifest_path: meta.location.to_string(),
1227 manifest_size: Some(meta.size as i64),
1228 e_tag: meta.e_tag,
1229 timestamp_millis: Some(meta.last_modified.timestamp_millis()),
1230 metadata: None,
1231 })
1232 })
1233 .collect();
1234
1235 let list_is_ordered = self.object_store.list_is_lexically_ordered;
1236
1237 let needs_sort = if list_is_ordered {
1238 if is_v2_naming {
1239 !descending
1240 } else {
1241 descending
1242 }
1243 } else {
1244 true
1245 };
1246
1247 if needs_sort {
1248 if descending {
1249 table_versions.sort_by(|a, b| b.version.cmp(&a.version));
1250 } else {
1251 table_versions.sort_by(|a, b| a.version.cmp(&b.version));
1252 }
1253 }
1254
1255 if let Some(limit) = limit {
1256 table_versions.truncate(limit as usize);
1257 }
1258
1259 Ok(table_versions)
1260 }
1261
1262 async fn describe_table_impl(
1266 &self,
1267 request: DescribeTableRequest,
1268 ) -> Result<DescribeTableResponse> {
1269 let is_root_level = request.id.as_ref().is_some_and(|id| id.len() == 1);
1270 let skip_manifest_for_root = self.dir_listing_enabled
1271 && is_root_level
1272 && !self.dir_listing_to_manifest_migration_enabled;
1273 if let Some(ref manifest_ns) = self.manifest_ns
1274 && !skip_manifest_for_root
1275 {
1276 match manifest_ns.describe_table(request.clone()).await {
1277 Ok(mut response) => {
1278 if let Some(ref table_uri) = response.table_uri {
1279 let vend = request.vend_credentials.unwrap_or(true);
1281 let identity = request.identity.as_deref();
1282 response.storage_options = self
1283 .get_storage_options_for_table(table_uri, vend, identity)
1284 .await?;
1285 }
1286 if self.table_version_tracking_enabled {
1288 response.managed_versioning = Some(true);
1289 }
1290 return Ok(response);
1291 }
1292 Err(_) if self.dir_listing_enabled && is_root_level => {
1293 }
1295 Err(e) => return Err(e),
1296 }
1297 }
1298
1299 let table_name = Self::table_name_from_id(&request.id)?;
1300 let table_id = Self::format_table_id_from_request(&request.id);
1301 let table_uri = self.table_full_uri(&table_name);
1302
1303 let status = self.check_table_status(&table_name).await;
1305
1306 if !status.exists {
1307 return Err(NamespaceError::TableNotFound {
1308 message: table_id.clone(),
1309 }
1310 .into());
1311 }
1312
1313 if status.is_deregistered {
1314 return Err(NamespaceError::TableNotFound {
1315 message: format!("Table is deregistered: {}", table_id),
1316 }
1317 .into());
1318 }
1319
1320 let load_detailed_metadata = request.load_detailed_metadata.unwrap_or(false);
1321 let should_check_declared =
1322 load_detailed_metadata || request.check_declared.unwrap_or(false);
1323 let vend_credentials = request.vend_credentials.unwrap_or(true);
1325 let identity = request.identity.as_deref();
1326 let is_only_declared = if should_check_declared {
1327 if status.has_reserved_file {
1328 Some(!self.table_has_actual_manifests(&table_name).await?)
1329 } else {
1330 Some(false)
1331 }
1332 } else {
1333 None
1334 };
1335
1336 if !load_detailed_metadata {
1337 let storage_options = self
1338 .get_storage_options_for_table(&table_uri, vend_credentials, identity)
1339 .await?;
1340 return Ok(DescribeTableResponse {
1341 table: Some(table_name),
1342 namespace: request.id.as_ref().map(|id| {
1343 if id.len() > 1 {
1344 id[..id.len() - 1].to_vec()
1345 } else {
1346 vec![]
1347 }
1348 }),
1349 location: Some(table_uri.clone()),
1350 table_uri: Some(table_uri),
1351 storage_options,
1352 is_only_declared,
1353 managed_versioning: if self.table_version_tracking_enabled {
1354 Some(true)
1355 } else {
1356 None
1357 },
1358 ..Default::default()
1359 });
1360 }
1361
1362 if is_only_declared == Some(true) {
1363 let storage_options = self
1364 .get_storage_options_for_table(&table_uri, vend_credentials, identity)
1365 .await?;
1366 return Ok(DescribeTableResponse {
1367 table: Some(table_name),
1368 namespace: request.id.as_ref().map(|id| {
1369 if id.len() > 1 {
1370 id[..id.len() - 1].to_vec()
1371 } else {
1372 vec![]
1373 }
1374 }),
1375 location: Some(table_uri.clone()),
1376 table_uri: Some(table_uri),
1377 storage_options,
1378 is_only_declared,
1379 managed_versioning: if self.table_version_tracking_enabled {
1380 Some(true)
1381 } else {
1382 None
1383 },
1384 ..Default::default()
1385 });
1386 }
1387
1388 let mut builder = DatasetBuilder::from_uri(&table_uri);
1391 if let Some(opts) = &self.storage_options {
1392 builder = builder.with_storage_options(opts.clone());
1393 }
1394 if let Some(sess) = &self.session {
1395 builder = builder.with_session(sess.clone());
1396 }
1397 match builder.load().await {
1398 Ok(mut dataset) => {
1399 if let Some(requested_version) = request.version {
1401 dataset = dataset
1402 .checkout_version(requested_version as u64)
1403 .await
1404 .map_err(|e| {
1405 lance_core::Error::from(NamespaceError::TableVersionNotFound {
1406 message: format!(
1407 "Version {} not found for table '{}': {}",
1408 requested_version, table_name, e
1409 ),
1410 })
1411 })?;
1412 }
1413
1414 let version_info = dataset.version();
1415 let lance_schema = dataset.schema();
1416 let arrow_schema: arrow_schema::Schema = lance_schema.into();
1417 let json_schema = arrow_schema_to_json(&arrow_schema)?;
1418 let storage_options = self
1419 .get_storage_options_for_table(&table_uri, vend_credentials, identity)
1420 .await?;
1421
1422 let metadata: std::collections::HashMap<String, String> =
1424 version_info.metadata.into_iter().collect();
1425
1426 Ok(DescribeTableResponse {
1427 table: Some(table_name),
1428 namespace: request.id.as_ref().map(|id| {
1429 if id.len() > 1 {
1430 id[..id.len() - 1].to_vec()
1431 } else {
1432 vec![]
1433 }
1434 }),
1435 version: Some(version_info.version as i64),
1436 location: Some(table_uri.clone()),
1437 table_uri: Some(table_uri),
1438 schema: Some(Box::new(json_schema)),
1439 storage_options,
1440 metadata: Some(metadata),
1441 is_only_declared,
1442 managed_versioning: if self.table_version_tracking_enabled {
1443 Some(true)
1444 } else {
1445 None
1446 },
1447 ..Default::default()
1448 })
1449 }
1450 Err(err) => {
1451 if manifest::ManifestNamespace::is_not_found_load_error(&err)
1452 && is_only_declared == Some(true)
1453 {
1454 let storage_options = self
1455 .get_storage_options_for_table(&table_uri, vend_credentials, identity)
1456 .await?;
1457 Ok(DescribeTableResponse {
1458 table: Some(table_name),
1459 namespace: request.id.as_ref().map(|id| {
1460 if id.len() > 1 {
1461 id[..id.len() - 1].to_vec()
1462 } else {
1463 vec![]
1464 }
1465 }),
1466 location: Some(table_uri.clone()),
1467 table_uri: Some(table_uri),
1468 storage_options,
1469 is_only_declared,
1470 managed_versioning: if self.table_version_tracking_enabled {
1471 Some(true)
1472 } else {
1473 None
1474 },
1475 ..Default::default()
1476 })
1477 } else {
1478 Err(NamespaceError::Internal {
1479 message: format!(
1480 "Table directory exists but cannot load dataset {}: {:?}",
1481 table_name, err
1482 ),
1483 }
1484 .into())
1485 }
1486 }
1487 }
1488 }
1489
1490 async fn load_dataset(
1491 &self,
1492 table_uri: &str,
1493 version: Option<i64>,
1494 operation: &str,
1495 ) -> Result<Dataset> {
1496 if let Some(version) = version
1497 && version < 0
1498 {
1499 return Err(NamespaceError::InvalidInput {
1500 message: format!(
1501 "Table version for {} must be non-negative, got {}",
1502 operation, version
1503 ),
1504 }
1505 .into());
1506 }
1507
1508 let mut builder = DatasetBuilder::from_uri(table_uri);
1509 if let Some(opts) = &self.storage_options {
1510 builder = builder.with_storage_options(opts.clone());
1511 }
1512 if let Some(sess) = &self.session {
1513 builder = builder.with_session(sess.clone());
1514 }
1515
1516 let dataset = builder.load().await.map_err(|e| {
1517 lance_core::Error::from(NamespaceError::TableNotFound {
1518 message: format!(
1519 "Failed to open table at '{}' for {}: {}",
1520 table_uri, operation, e
1521 ),
1522 })
1523 })?;
1524
1525 if let Some(version) = version {
1526 return dataset.checkout_version(version as u64).await.map_err(|e| {
1527 lance_core::Error::from(NamespaceError::TableVersionNotFound {
1528 message: format!(
1529 "Failed to checkout version {} for table at '{}' during {}: {}",
1530 version, table_uri, operation, e
1531 ),
1532 })
1533 });
1534 }
1535
1536 Ok(dataset)
1537 }
1538
1539 fn parse_index_type(index_type: &str) -> Result<IndexType> {
1540 match index_type.trim().to_ascii_uppercase().as_str() {
1541 "SCALAR" | "BTREE" => Ok(IndexType::BTree),
1542 "BITMAP" => Ok(IndexType::Bitmap),
1543 "LABEL_LIST" | "LABELLIST" => Ok(IndexType::LabelList),
1544 "INVERTED" | "FTS" => Ok(IndexType::Inverted),
1545 "NGRAM" => Ok(IndexType::NGram),
1546 "ZONEMAP" | "ZONE_MAP" => Ok(IndexType::ZoneMap),
1547 "BLOOMFILTER" | "BLOOM_FILTER" => Ok(IndexType::BloomFilter),
1548 "RTREE" | "R_TREE" => Ok(IndexType::RTree),
1549 "VECTOR" | "IVF_PQ" => Ok(IndexType::IvfPq),
1550 "IVF_FLAT" => Ok(IndexType::IvfFlat),
1551 "IVF_SQ" => Ok(IndexType::IvfSq),
1552 "IVF_RQ" => Ok(IndexType::IvfRq),
1553 "IVF_HNSW_FLAT" => Ok(IndexType::IvfHnswFlat),
1554 "IVF_HNSW_SQ" => Ok(IndexType::IvfHnswSq),
1555 "IVF_HNSW_PQ" => Ok(IndexType::IvfHnswPq),
1556 other => Err(NamespaceError::InvalidInput {
1557 message: format!("Unsupported index_type '{}'", other),
1558 }
1559 .into()),
1560 }
1561 }
1562
1563 fn parse_metric_type(distance_type: Option<&str>) -> Result<MetricType> {
1564 let distance_type = distance_type.unwrap_or("l2");
1565 MetricType::try_from(distance_type).map_err(|e| {
1566 lance_core::Error::from(NamespaceError::InvalidInput {
1567 message: format!(
1568 "Unsupported distance_type '{}' for vector index: {}",
1569 distance_type, e
1570 ),
1571 })
1572 })
1573 }
1574
1575 fn build_index_params(request: &CreateTableIndexRequest) -> Result<DirectoryIndexParams> {
1576 let index_type = Self::parse_index_type(&request.index_type)?;
1577 Ok(match index_type {
1578 IndexType::BTree => DirectoryIndexParams::Scalar {
1579 index_type,
1580 params: ScalarIndexParams::for_builtin(BuiltinIndexType::BTree),
1581 },
1582 IndexType::Bitmap => DirectoryIndexParams::Scalar {
1583 index_type,
1584 params: ScalarIndexParams::for_builtin(BuiltinIndexType::Bitmap),
1585 },
1586 IndexType::LabelList => DirectoryIndexParams::Scalar {
1587 index_type,
1588 params: ScalarIndexParams::for_builtin(BuiltinIndexType::LabelList),
1589 },
1590 IndexType::NGram => DirectoryIndexParams::Scalar {
1591 index_type,
1592 params: ScalarIndexParams::for_builtin(BuiltinIndexType::NGram),
1593 },
1594 IndexType::ZoneMap => DirectoryIndexParams::Scalar {
1595 index_type,
1596 params: ScalarIndexParams::for_builtin(BuiltinIndexType::ZoneMap),
1597 },
1598 IndexType::BloomFilter => DirectoryIndexParams::Scalar {
1599 index_type,
1600 params: ScalarIndexParams::for_builtin(BuiltinIndexType::BloomFilter),
1601 },
1602 IndexType::RTree => DirectoryIndexParams::Scalar {
1603 index_type,
1604 params: ScalarIndexParams::for_builtin(BuiltinIndexType::RTree),
1605 },
1606 IndexType::Inverted => {
1607 let mut params = InvertedIndexParams::default();
1608 if let Some(with_position) = request.with_position {
1609 params = params.with_position(with_position);
1610 }
1611 if let Some(base_tokenizer) = &request.base_tokenizer {
1612 params = params.base_tokenizer(base_tokenizer.clone());
1613 }
1614 if let Some(language) = &request.language {
1615 params = params.language(language)?;
1616 }
1617 if let Some(max_token_length) = request.max_token_length {
1618 if max_token_length < 0 {
1619 return Err(NamespaceError::InvalidInput {
1620 message: format!(
1621 "FTS max_token_length must be non-negative, got {}",
1622 max_token_length
1623 ),
1624 }
1625 .into());
1626 }
1627 params = params.max_token_length(Some(max_token_length as usize));
1628 }
1629 if let Some(lower_case) = request.lower_case {
1630 params = params.lower_case(lower_case);
1631 }
1632 if let Some(stem) = request.stem {
1633 params = params.stem(stem);
1634 }
1635 if let Some(remove_stop_words) = request.remove_stop_words {
1636 params = params.remove_stop_words(remove_stop_words);
1637 }
1638 if let Some(ascii_folding) = request.ascii_folding {
1639 params = params.ascii_folding(ascii_folding);
1640 }
1641 DirectoryIndexParams::Inverted(params)
1642 }
1643 IndexType::IvfFlat => DirectoryIndexParams::Vector {
1644 index_type,
1645 params: VectorIndexParams::with_ivf_flat_params(
1646 Self::parse_metric_type(request.distance_type.as_deref())?,
1647 IvfBuildParams::default(),
1648 ),
1649 },
1650 IndexType::IvfPq => DirectoryIndexParams::Vector {
1651 index_type,
1652 params: VectorIndexParams::with_ivf_pq_params(
1653 Self::parse_metric_type(request.distance_type.as_deref())?,
1654 IvfBuildParams::default(),
1655 PQBuildParams::default(),
1656 ),
1657 },
1658 IndexType::IvfSq => DirectoryIndexParams::Vector {
1659 index_type,
1660 params: VectorIndexParams::with_ivf_sq_params(
1661 Self::parse_metric_type(request.distance_type.as_deref())?,
1662 IvfBuildParams::default(),
1663 SQBuildParams::default(),
1664 ),
1665 },
1666 IndexType::IvfRq => DirectoryIndexParams::Vector {
1667 index_type,
1668 params: VectorIndexParams::with_ivf_rq_params(
1669 Self::parse_metric_type(request.distance_type.as_deref())?,
1670 IvfBuildParams::default(),
1671 RQBuildParams::default(),
1672 ),
1673 },
1674 IndexType::IvfHnswFlat => DirectoryIndexParams::Vector {
1675 index_type,
1676 params: VectorIndexParams::ivf_hnsw(
1677 Self::parse_metric_type(request.distance_type.as_deref())?,
1678 IvfBuildParams::default(),
1679 HnswBuildParams::default(),
1680 ),
1681 },
1682 IndexType::IvfHnswSq => DirectoryIndexParams::Vector {
1683 index_type,
1684 params: VectorIndexParams::with_ivf_hnsw_sq_params(
1685 Self::parse_metric_type(request.distance_type.as_deref())?,
1686 IvfBuildParams::default(),
1687 HnswBuildParams::default(),
1688 SQBuildParams::default(),
1689 ),
1690 },
1691 IndexType::IvfHnswPq => DirectoryIndexParams::Vector {
1692 index_type,
1693 params: VectorIndexParams::with_ivf_hnsw_pq_params(
1694 Self::parse_metric_type(request.distance_type.as_deref())?,
1695 IvfBuildParams::default(),
1696 HnswBuildParams::default(),
1697 PQBuildParams::default(),
1698 ),
1699 },
1700 other => {
1701 return Err(NamespaceError::InvalidInput {
1702 message: format!("Unsupported index type for namespace API: {}", other),
1703 }
1704 .into());
1705 }
1706 })
1707 }
1708
1709 fn paginate_indices(
1710 indices: &mut Vec<IndexContent>,
1711 page_token: Option<String>,
1712 limit: Option<i32>,
1713 ) -> Option<String> {
1714 indices.sort_by(|a, b| a.index_name.cmp(&b.index_name));
1715
1716 if let Some(start_after) = page_token {
1717 if let Some(index) = indices
1718 .iter()
1719 .position(|index| index.index_name.as_str() > start_after.as_str())
1720 {
1721 indices.drain(0..index);
1722 } else {
1723 indices.clear();
1724 }
1725 }
1726
1727 let mut next_page_token = None;
1728 if let Some(limit) = limit
1729 && limit >= 0
1730 {
1731 let limit = limit as usize;
1732 if limit > 0 && indices.len() > limit {
1733 next_page_token = Some(indices[limit - 1].index_name.clone());
1734 }
1735 indices.truncate(limit);
1736 }
1737 if indices.is_empty() {
1738 None
1739 } else {
1740 next_page_token
1741 }
1742 }
1743
1744 fn transaction_operation_name(transaction: &Transaction) -> String {
1745 match &transaction.operation {
1746 Operation::CreateIndex {
1747 new_indices,
1748 removed_indices,
1749 } if new_indices.is_empty() && !removed_indices.is_empty() => "DropIndex".to_string(),
1750 _ => transaction.operation.to_string(),
1751 }
1752 }
1753
1754 fn transaction_response(
1755 version: u64,
1756 transaction: &Transaction,
1757 ) -> DescribeTransactionResponse {
1758 let mut properties = transaction
1759 .transaction_properties
1760 .as_ref()
1761 .map(|properties| (**properties).clone())
1762 .unwrap_or_default();
1763 properties.insert("uuid".to_string(), transaction.uuid.clone());
1764 properties.insert("version".to_string(), version.to_string());
1765 properties.insert(
1766 "read_version".to_string(),
1767 transaction.read_version.to_string(),
1768 );
1769 properties.insert(
1770 "operation".to_string(),
1771 Self::transaction_operation_name(transaction),
1772 );
1773 if let Some(tag) = &transaction.tag {
1774 properties.insert("tag".to_string(), tag.clone());
1775 }
1776
1777 DescribeTransactionResponse {
1778 status: "SUCCEEDED".to_string(),
1779 properties: Some(properties),
1780 }
1781 }
1782
1783 fn describe_table_index_stats_response(
1784 stats: &serde_json::Value,
1785 ) -> DescribeTableIndexStatsResponse {
1786 let get_i64 = |key: &str| {
1787 stats.get(key).and_then(|value| {
1788 value
1789 .as_i64()
1790 .or_else(|| value.as_u64().and_then(|v| i64::try_from(v).ok()))
1791 })
1792 };
1793
1794 DescribeTableIndexStatsResponse {
1795 distance_type: stats
1796 .get("distance_type")
1797 .and_then(|value| value.as_str())
1798 .map(str::to_string),
1799 index_type: stats
1800 .get("index_type")
1801 .and_then(|value| value.as_str())
1802 .map(str::to_string),
1803 num_indexed_rows: get_i64("num_indexed_rows"),
1804 num_unindexed_rows: get_i64("num_unindexed_rows"),
1805 num_indices: get_i64("num_indices").and_then(|value| i32::try_from(value).ok()),
1806 }
1807 }
1808
1809 async fn find_transaction(&self, dataset: &Dataset, id: &str) -> Result<(u64, Transaction)> {
1814 if let Ok(version) = id.parse::<u64>() {
1815 let transaction = dataset
1816 .read_transaction_by_version(version)
1817 .await
1818 .map_err(|e| {
1819 lance_core::Error::from(NamespaceError::TransactionNotFound {
1820 message: format!(
1821 "Failed to read transaction for version {}: {}",
1822 version, e
1823 ),
1824 })
1825 })?
1826 .ok_or_else(|| {
1827 lance_core::Error::from(NamespaceError::TransactionNotFound {
1828 message: format!("version {}", version),
1829 })
1830 })?;
1831 return Ok((version, transaction));
1832 }
1833
1834 let versions = dataset.versions().await.map_err(|e| {
1835 lance_core::Error::from(NamespaceError::Internal {
1836 message: format!(
1837 "Failed to list table versions while resolving transaction '{}': {}",
1838 id, e
1839 ),
1840 })
1841 })?;
1842
1843 for version in versions.into_iter().rev() {
1844 if let Some(transaction) = dataset
1845 .read_transaction_by_version(version.version)
1846 .await
1847 .map_err(|e| {
1848 lance_core::Error::from(NamespaceError::Internal {
1849 message: format!(
1850 "Failed to read transaction for version {} while resolving '{}': {}",
1851 version.version, id, e
1852 ),
1853 })
1854 })?
1855 && transaction.uuid == id
1856 {
1857 return Ok((version.version, transaction));
1858 }
1859 }
1860
1861 Err(NamespaceError::TransactionNotFound {
1862 message: id.to_string(),
1863 }
1864 .into())
1865 }
1866
1867 fn table_full_uri(&self, table_name: &str) -> String {
1868 format!("{}/{}.lance", &self.root, table_name)
1869 }
1870
1871 fn table_path(&self, table_name: &str) -> Path {
1873 self.base_path
1874 .clone()
1875 .join(format!("{}.lance", table_name).as_str())
1876 }
1877
1878 fn table_reserved_file_path(&self, table_name: &str) -> Path {
1880 self.base_path
1881 .clone()
1882 .join(format!("{}.lance", table_name).as_str())
1883 .join(".lance-reserved")
1884 }
1885
1886 fn table_deregistered_file_path(&self, table_name: &str) -> Path {
1888 self.base_path
1889 .clone()
1890 .join(format!("{}.lance", table_name).as_str())
1891 .join(".lance-deregistered")
1892 }
1893
1894 pub(crate) async fn check_table_status(&self, table_name: &str) -> TableStatus {
1900 let table_path = self.table_path(table_name);
1901 match self.object_store.read_dir(table_path).await {
1902 Ok(entries) => {
1903 let exists = !entries.is_empty();
1904 let is_deregistered = entries.iter().any(|e| e.ends_with(".lance-deregistered"));
1905 let has_reserved_file = entries.iter().any(|e| e.ends_with(".lance-reserved"));
1906 TableStatus {
1907 exists,
1908 is_deregistered,
1909 has_reserved_file,
1910 }
1911 }
1912 Err(_) => TableStatus {
1913 exists: false,
1914 is_deregistered: false,
1915 has_reserved_file: false,
1916 },
1917 }
1918 }
1919
1920 async fn put_marker_file_atomic(
1921 &self,
1922 path: &Path,
1923 file_description: &str,
1924 ) -> std::result::Result<(), String> {
1925 let put_opts = PutOptions {
1926 mode: PutMode::Create,
1927 ..Default::default()
1928 };
1929
1930 match self
1931 .object_store
1932 .inner
1933 .put_opts(path, bytes::Bytes::new().into(), put_opts)
1934 .await
1935 {
1936 Ok(_) => Ok(()),
1937 Err(ObjectStoreError::AlreadyExists { .. })
1938 | Err(ObjectStoreError::Precondition { .. }) => {
1939 Err(format!("{} already exists", file_description))
1940 }
1941 Err(e) => Err(format!("Failed to create {}: {:?}", file_description, e)),
1942 }
1943 }
1944
1945 async fn get_storage_options_for_table(
1965 &self,
1966 table_uri: &str,
1967 vend_credentials: bool,
1968 identity: Option<&Identity>,
1969 ) -> Result<Option<HashMap<String, String>>> {
1970 if vend_credentials && let Some(ref vendor) = self.credential_vendor {
1971 let vended = vendor.vend_credentials(table_uri, identity).await?;
1972 return Ok(Some(vended.storage_options));
1973 }
1974 if self.vend_input_storage_options {
1977 let mut options = self.storage_options.clone().unwrap_or_default();
1978 if let Some(refresh_interval_millis) =
1980 self.vend_input_storage_options_refresh_interval_millis
1981 {
1982 let now_millis = std::time::SystemTime::now()
1983 .duration_since(std::time::UNIX_EPOCH)
1984 .unwrap()
1985 .as_millis() as u64;
1986 let expires_at_millis = now_millis + refresh_interval_millis;
1987 options.insert(
1988 "expires_at_millis".to_string(),
1989 expires_at_millis.to_string(),
1990 );
1991 }
1992 return Ok(Some(options));
1993 }
1994 Ok(None)
1997 }
1998
1999 pub async fn migrate(&self) -> Result<usize> {
2052 let Some(ref manifest_ns) = self.manifest_ns else {
2054 return Ok(0); };
2056
2057 let manifest_locations = manifest_ns.list_manifest_table_locations().await?;
2059
2060 let dir_tables = self
2063 .filter_declared_tables(self.list_directory_tables().await?, false)
2064 .await?;
2065
2066 let mut migrated_count = 0;
2071 for table_name in dir_tables {
2072 let dir_name = format!("{}.lance", table_name);
2074 if !manifest_locations.contains(&dir_name) {
2075 manifest_ns.register_table(&table_name, dir_name).await?;
2076 migrated_count += 1;
2077 }
2078 }
2079
2080 Ok(migrated_count)
2081 }
2082
2083 async fn delete_physical_version_files(
2092 &self,
2093 table_entries: &[TableDeleteEntry],
2094 best_effort: bool,
2095 ) -> Result<i64> {
2096 let mut deleted_count = 0i64;
2097 for te in table_entries {
2098 let table_uri = self.resolve_table_location(&te.table_id).await?;
2099 let table_path = self.object_store_path_from_uri(&table_uri)?;
2100 let versions_dir_path = table_path.clone().join(VERSIONS_DIR);
2101
2102 for (start, end) in &te.ranges {
2103 for version in *start..=*end {
2104 let version_path = versions_dir_path
2105 .clone()
2106 .join(format!("{}.manifest", version as u64));
2107 match self.object_store.inner.delete(&version_path).await {
2108 Ok(_) => {
2109 deleted_count += 1;
2110 }
2111 Err(object_store::Error::NotFound { .. }) => {}
2112 Err(e) => {
2113 if best_effort {
2114 log::warn!(
2115 "Failed to delete manifest file for version {} of table {:?}: {:?}",
2116 version,
2117 te.table_id,
2118 e
2119 );
2120 } else {
2121 return Err(NamespaceError::Internal {
2122 message: format!(
2123 "Failed to delete version {} for table at '{}': {}",
2124 version, table_uri, e
2125 ),
2126 }
2127 .into());
2128 }
2129 }
2130 }
2131 }
2132 }
2133 }
2134 Ok(deleted_count)
2135 }
2136
2137 #[allow(clippy::too_many_arguments)]
2142 fn apply_query_params_to_scanner(
2143 scanner: &mut Scanner,
2144 filter: Option<&str>,
2145 columns: Option<&QueryTableRequestColumns>,
2146 vector_column: Option<&str>,
2147 vector: &QueryTableRequestVector,
2148 k: i32,
2149 offset: Option<i32>,
2150 prefilter: Option<bool>,
2151 bypass_vector_index: Option<bool>,
2152 nprobes: Option<i32>,
2153 ef: Option<i32>,
2154 refine_factor: Option<i32>,
2155 distance_type: Option<&str>,
2156 fast_search_flag: Option<bool>,
2157 with_row_id: Option<bool>,
2158 lower_bound: Option<f32>,
2159 upper_bound: Option<f32>,
2160 operation: &str,
2161 ) -> Result<()> {
2162 if let Some(pf) = prefilter {
2164 scanner.prefilter(pf);
2165 }
2166
2167 if let Some(filter) = filter {
2168 scanner.filter(filter).map_err(|e| {
2169 Error::invalid_input_source(
2170 format!("Invalid filter expression for {}: {}", operation, e).into(),
2171 )
2172 })?;
2173 }
2174
2175 if let Some(cols) = columns {
2176 if let Some(ref names) = cols.column_names {
2177 scanner.project(names.as_slice()).map_err(|e| {
2178 Error::invalid_input_source(
2179 format!("Invalid column projection for {}: {}", operation, e).into(),
2180 )
2181 })?;
2182 } else if let Some(ref aliases) = cols.column_aliases {
2183 let pairs: Vec<(&str, &str)> = aliases
2185 .iter()
2186 .map(|(alias, src)| (alias.as_str(), src.as_str()))
2187 .collect();
2188 scanner.project_with_transform(&pairs).map_err(|e| {
2189 Error::invalid_input_source(
2190 format!("Invalid column aliases for {}: {}", operation, e).into(),
2191 )
2192 })?;
2193 }
2194 }
2195
2196 let query_vec: Option<Vec<f32>> = vector
2198 .single_vector
2199 .as_ref()
2200 .filter(|v| !v.is_empty())
2201 .cloned()
2202 .or_else(|| {
2203 vector
2204 .multi_vector
2205 .as_ref()
2206 .and_then(|mv| mv.first())
2207 .filter(|v| !v.is_empty())
2208 .cloned()
2209 });
2210
2211 if let Some(q_vec) = query_vec {
2212 let col = vector_column.unwrap_or("vector");
2213 let q = Arc::new(Float32Array::from(q_vec));
2214 scanner
2215 .nearest(col, q.as_ref(), k.max(1) as usize)
2216 .map_err(|e| {
2217 Error::invalid_input_source(
2218 format!("Invalid vector query for {}: {}", operation, e).into(),
2219 )
2220 })?;
2221
2222 if let Some(n) = nprobes {
2224 scanner.nprobes(n.max(1) as usize);
2225 }
2226 if let Some(e) = ef {
2227 scanner.ef(e.max(1) as usize);
2228 }
2229 if let Some(rf) = refine_factor {
2230 scanner.refine(rf.max(0) as u32);
2231 }
2232 if let Some(true) = bypass_vector_index {
2234 scanner.use_index(false);
2235 }
2236 if let Some(true) = fast_search_flag {
2237 scanner.fast_search();
2238 }
2239 if lower_bound.is_some() || upper_bound.is_some() {
2240 scanner.distance_range(lower_bound, upper_bound);
2241 }
2242 if let Some(dt) = distance_type {
2243 let metric = Self::parse_metric_type(Some(dt))?;
2244 scanner.distance_metric(metric);
2245 }
2246 if let Some(off) = offset.filter(|&o| o > 0) {
2248 scanner.limit(None, Some(off as i64)).map_err(|e| {
2249 Error::invalid_input_source(
2250 format!("Invalid offset for {}: {}", operation, e).into(),
2251 )
2252 })?;
2253 }
2254 } else {
2255 let limit = if k > 0 { Some(k as i64) } else { None };
2257 scanner
2258 .limit(limit, offset.map(|o| o as i64))
2259 .map_err(|e| {
2260 Error::invalid_input_source(
2261 format!("Invalid limit/offset for {}: {}", operation, e).into(),
2262 )
2263 })?;
2264 }
2265
2266 if let Some(true) = with_row_id {
2267 scanner.with_row_id();
2268 }
2269
2270 Ok(())
2271 }
2272
2273 pub fn retrieve_ops_metrics(&self) -> HashMap<String, u64> {
2280 self.ops_metrics
2281 .as_ref()
2282 .map(|m| m.retrieve())
2283 .unwrap_or_default()
2284 }
2285
2286 pub fn reset_ops_metrics(&self) {
2290 if let Some(ref metrics) = self.ops_metrics {
2291 metrics.reset();
2292 }
2293 }
2294
2295 fn record_op(&self, operation: &str) {
2297 if let Some(ref metrics) = self.ops_metrics {
2298 metrics.increment(operation);
2299 }
2300 }
2301}
2302
2303#[async_trait]
2304impl LanceNamespace for DirectoryNamespace {
2305 async fn list_namespaces(
2306 &self,
2307 request: ListNamespacesRequest,
2308 ) -> Result<ListNamespacesResponse> {
2309 self.record_op("list_namespaces");
2310 if let Some(ref manifest_ns) = self.manifest_ns {
2311 return manifest_ns.list_namespaces(request).await;
2312 }
2313
2314 Self::validate_root_namespace_id(&request.id)?;
2315 Ok(ListNamespacesResponse::new(vec![]))
2316 }
2317
2318 async fn describe_namespace(
2319 &self,
2320 request: DescribeNamespaceRequest,
2321 ) -> Result<DescribeNamespaceResponse> {
2322 self.record_op("describe_namespace");
2323 if let Some(ref manifest_ns) = self.manifest_ns {
2324 return manifest_ns.describe_namespace(request).await;
2325 }
2326
2327 Self::validate_root_namespace_id(&request.id)?;
2328 #[allow(clippy::needless_update)]
2329 Ok(DescribeNamespaceResponse {
2330 properties: Some(HashMap::new()),
2331 ..Default::default()
2332 })
2333 }
2334
2335 async fn create_namespace(
2336 &self,
2337 request: CreateNamespaceRequest,
2338 ) -> Result<CreateNamespaceResponse> {
2339 self.record_op("create_namespace");
2340 if let Some(ref manifest_ns) = self.manifest_ns {
2341 return manifest_ns.create_namespace(request).await;
2342 }
2343
2344 if request.id.is_none() || request.id.as_ref().unwrap().is_empty() {
2345 return Err(NamespaceError::NamespaceAlreadyExists {
2346 message: "root namespace".to_string(),
2347 }
2348 .into());
2349 }
2350
2351 Err(NamespaceError::Unsupported {
2352 message: "Child namespaces are only supported when manifest mode is enabled"
2353 .to_string(),
2354 }
2355 .into())
2356 }
2357
2358 async fn drop_namespace(&self, request: DropNamespaceRequest) -> Result<DropNamespaceResponse> {
2359 self.record_op("drop_namespace");
2360 if let Some(ref manifest_ns) = self.manifest_ns {
2361 return manifest_ns.drop_namespace(request).await;
2362 }
2363
2364 if request.id.is_none() || request.id.as_ref().unwrap().is_empty() {
2365 return Err(NamespaceError::InvalidInput {
2366 message: "Root namespace cannot be dropped".to_string(),
2367 }
2368 .into());
2369 }
2370
2371 Err(NamespaceError::Unsupported {
2372 message: "Child namespaces are only supported when manifest mode is enabled"
2373 .to_string(),
2374 }
2375 .into())
2376 }
2377
2378 async fn namespace_exists(&self, request: NamespaceExistsRequest) -> Result<()> {
2379 self.record_op("namespace_exists");
2380 if let Some(ref manifest_ns) = self.manifest_ns {
2381 return manifest_ns.namespace_exists(request).await;
2382 }
2383
2384 if request.id.is_none() || request.id.as_ref().unwrap().is_empty() {
2385 return Ok(());
2386 }
2387
2388 Err(NamespaceError::NamespaceNotFound {
2389 message: "Child namespaces are only supported when manifest mode is enabled"
2390 .to_string(),
2391 }
2392 .into())
2393 }
2394
2395 async fn list_tables(&self, request: ListTablesRequest) -> Result<ListTablesResponse> {
2396 self.record_op("list_tables");
2397 let namespace_id = request.id.as_ref().ok_or_else(|| {
2399 lance_core::Error::from(NamespaceError::InvalidInput {
2400 message: "Namespace ID is required".to_string(),
2401 })
2402 })?;
2403
2404 if !namespace_id.is_empty() {
2406 if let Some(ref manifest_ns) = self.manifest_ns {
2407 return manifest_ns.list_tables(request).await;
2408 }
2409 return Err(NamespaceError::Unsupported {
2410 message: "Child namespaces are only supported when manifest mode is enabled"
2411 .to_string(),
2412 }
2413 .into());
2414 }
2415
2416 if let Some(ref manifest_ns) = self.manifest_ns
2418 && !self.dir_listing_enabled
2419 {
2420 return manifest_ns.list_tables(request).await;
2421 }
2422
2423 let mut tables = if self.manifest_ns.is_some()
2426 && self.dir_listing_enabled
2427 && self.dir_listing_to_manifest_migration_enabled
2428 {
2429 let manifest_locations = if let Some(ref manifest_ns) = self.manifest_ns {
2431 manifest_ns.list_manifest_table_locations().await?
2432 } else {
2433 std::collections::HashSet::new()
2434 };
2435
2436 let mut manifest_request = request.clone();
2438 manifest_request.limit = None;
2439 manifest_request.page_token = None;
2440 let manifest_tables = if let Some(ref manifest_ns) = self.manifest_ns {
2441 let manifest_response = manifest_ns.list_tables(manifest_request).await?;
2442 manifest_response.tables
2443 } else {
2444 vec![]
2445 };
2446
2447 let mut all_tables: Vec<String> = manifest_tables;
2450 let dir_tables = self.list_directory_tables().await?;
2451 for table_name in dir_tables {
2452 let full_location = format!("{}/{}.lance", self.root, table_name);
2455 let relative_location = format!("{}.lance", table_name);
2456 if !manifest_locations.contains(&full_location)
2457 && !manifest_locations.contains(&relative_location)
2458 {
2459 all_tables.push(table_name);
2460 }
2461 }
2462
2463 all_tables
2464 } else {
2465 self.list_directory_tables().await?
2466 };
2467
2468 tables = self
2469 .filter_declared_tables(tables, request.include_declared.unwrap_or(true))
2470 .await?;
2471
2472 let next_page_token =
2474 Self::apply_pagination(&mut tables, request.page_token, request.limit);
2475 let mut response = ListTablesResponse::new(tables);
2476 response.page_token = next_page_token;
2477 Ok(response)
2478 }
2479
2480 async fn describe_table(&self, request: DescribeTableRequest) -> Result<DescribeTableResponse> {
2481 self.record_op("describe_table");
2482 self.describe_table_impl(request).await
2483 }
2484
2485 async fn table_exists(&self, request: TableExistsRequest) -> Result<()> {
2486 self.record_op("table_exists");
2487 let is_root_level = request.id.as_ref().is_some_and(|id| id.len() == 1);
2488 let skip_manifest_for_root = self.dir_listing_enabled
2489 && is_root_level
2490 && !self.dir_listing_to_manifest_migration_enabled;
2491 if let Some(ref manifest_ns) = self.manifest_ns
2492 && !skip_manifest_for_root
2493 {
2494 match manifest_ns.table_exists(request.clone()).await {
2495 Ok(()) => return Ok(()),
2496 Err(_) if self.dir_listing_enabled && is_root_level => {
2497 }
2499 Err(e) => return Err(e),
2500 }
2501 }
2502
2503 let table_name = Self::table_name_from_id(&request.id)?;
2504 let table_id = Self::format_table_id_from_request(&request.id);
2505
2506 let status = self.check_table_status(&table_name).await;
2508
2509 if !status.exists {
2510 return Err(NamespaceError::TableNotFound {
2511 message: table_id.clone(),
2512 }
2513 .into());
2514 }
2515
2516 if status.is_deregistered {
2517 return Err(NamespaceError::TableNotFound {
2518 message: format!("Table is deregistered: {}", table_id),
2519 }
2520 .into());
2521 }
2522
2523 Ok(())
2524 }
2525
2526 async fn drop_table(&self, request: DropTableRequest) -> Result<DropTableResponse> {
2527 self.record_op("drop_table");
2528 if let Some(ref manifest_ns) = self.manifest_ns {
2529 return manifest_ns.drop_table(request).await;
2530 }
2531
2532 let table_name = Self::table_name_from_id(&request.id)?;
2533 let table_uri = self.table_full_uri(&table_name);
2534 let table_path = self.table_path(&table_name);
2535
2536 self.object_store
2537 .remove_dir_all(table_path)
2538 .await
2539 .map_err(|e| {
2540 lance_core::Error::from(NamespaceError::Internal {
2541 message: format!("Failed to drop table {}: {:?}", table_name, e),
2542 })
2543 })?;
2544
2545 Ok(DropTableResponse {
2546 id: request.id,
2547 location: Some(table_uri),
2548 ..Default::default()
2549 })
2550 }
2551
2552 async fn create_table(
2553 &self,
2554 request: CreateTableRequest,
2555 request_data: Bytes,
2556 ) -> Result<CreateTableResponse> {
2557 self.record_op("create_table");
2558 if let Some(ref manifest_ns) = self.manifest_ns {
2559 return manifest_ns.create_table(request, request_data).await;
2560 }
2561
2562 Self::validate_dir_only_properties(request.properties.as_ref(), "create_table")?;
2563
2564 let table_name = Self::table_name_from_id(&request.id)?;
2565 let table_uri = self.table_full_uri(&table_name);
2566 let status = self.check_table_status(&table_name).await;
2567 let (reader, _num_rows) =
2568 Self::ipc_reader_from_request_data(&request_data, "create_table")?;
2569
2570 if status.exists && self.table_has_actual_manifests(&table_name).await? {
2571 return Err(NamespaceError::TableAlreadyExists {
2572 message: table_name,
2573 }
2574 .into());
2575 }
2576
2577 let write_result = self
2578 .write_reader_to_table(
2579 &table_uri,
2580 reader,
2581 WriteMode::Create,
2582 request.storage_options.clone(),
2583 )
2584 .await;
2585 if let Err(err) = write_result {
2586 if self.table_uri_has_actual_manifests(&table_uri).await? {
2587 return Err(NamespaceError::TableAlreadyExists {
2588 message: table_name,
2589 }
2590 .into());
2591 }
2592 return Err(err);
2593 }
2594 Ok(CreateTableResponse {
2595 version: Some(1),
2596 location: Some(table_uri),
2597 storage_options: self.storage_options.clone(),
2598 properties: request.properties,
2599 ..Default::default()
2600 })
2601 }
2602
2603 async fn declare_table(&self, request: DeclareTableRequest) -> Result<DeclareTableResponse> {
2604 self.record_op("declare_table");
2605 if let Some(ref manifest_ns) = self.manifest_ns {
2606 let mut response = manifest_ns.declare_table(request.clone()).await?;
2607 if let Some(ref location) = response.location {
2608 let vend = request.vend_credentials.unwrap_or(true);
2610 let identity = request.identity.as_deref();
2611 response.storage_options = self
2612 .get_storage_options_for_table(location, vend, identity)
2613 .await?;
2614 }
2615 if self.table_version_tracking_enabled {
2617 response.managed_versioning = Some(true);
2618 }
2619 return Ok(response);
2620 }
2621
2622 Self::validate_dir_only_properties(request.properties.as_ref(), "declare_table")?;
2623
2624 let table_name = Self::table_name_from_id(&request.id)?;
2625 let table_uri = self.table_full_uri(&table_name);
2626
2627 if let Some(location) = &request.location {
2629 let location = location.trim_end_matches('/');
2630 if location != table_uri {
2631 return Err(NamespaceError::InvalidInput {
2632 message: format!(
2633 "Cannot declare table {} at location {}, must be at location {}",
2634 table_name, location, table_uri
2635 ),
2636 }
2637 .into());
2638 }
2639 }
2640
2641 let status = self.check_table_status(&table_name).await;
2645 if status.exists && !status.has_reserved_file {
2646 return Err(NamespaceError::TableAlreadyExists {
2648 message: table_name.to_string(),
2649 }
2650 .into());
2651 }
2652
2653 let reserved_file_path = self.table_reserved_file_path(&table_name);
2657
2658 self.put_marker_file_atomic(&reserved_file_path, &format!("table {}", table_name))
2659 .await
2660 .map_err(|e| {
2661 if e.contains("already exists") {
2662 lance_core::Error::from(NamespaceError::TableAlreadyExists {
2663 message: table_name.to_string(),
2664 })
2665 } else {
2666 lance_core::Error::from(NamespaceError::Internal { message: e })
2667 }
2668 })?;
2669
2670 let vend_credentials = request.vend_credentials.unwrap_or(true);
2672 let identity = request.identity.as_deref();
2673 let storage_options = self
2674 .get_storage_options_for_table(&table_uri, vend_credentials, identity)
2675 .await?;
2676
2677 Ok(DeclareTableResponse {
2678 location: Some(table_uri),
2679 storage_options,
2680 properties: request.properties,
2681 managed_versioning: if self.table_version_tracking_enabled {
2682 Some(true)
2683 } else {
2684 None
2685 },
2686 ..Default::default()
2687 })
2688 }
2689
2690 async fn register_table(
2691 &self,
2692 request: lance_namespace::models::RegisterTableRequest,
2693 ) -> Result<lance_namespace::models::RegisterTableResponse> {
2694 self.record_op("register_table");
2695 if let Some(ref manifest_ns) = self.manifest_ns {
2697 return LanceNamespace::register_table(manifest_ns.as_ref(), request).await;
2698 }
2699
2700 Err(NamespaceError::Unsupported {
2702 message: "register_table is only supported when manifest mode is enabled".to_string(),
2703 }
2704 .into())
2705 }
2706
2707 async fn deregister_table(
2708 &self,
2709 request: lance_namespace::models::DeregisterTableRequest,
2710 ) -> Result<lance_namespace::models::DeregisterTableResponse> {
2711 self.record_op("deregister_table");
2712 if let Some(ref manifest_ns) = self.manifest_ns {
2714 return LanceNamespace::deregister_table(manifest_ns.as_ref(), request).await;
2715 }
2716
2717 let table_name = Self::table_name_from_id(&request.id)?;
2719 let table_uri = self.table_full_uri(&table_name);
2720
2721 let status = self.check_table_status(&table_name).await;
2724
2725 if !status.exists {
2726 return Err(NamespaceError::TableNotFound {
2727 message: table_name.to_string(),
2728 }
2729 .into());
2730 }
2731
2732 if status.is_deregistered {
2733 return Err(NamespaceError::TableNotFound {
2734 message: format!("Table is already deregistered: {}", table_name),
2735 }
2736 .into());
2737 }
2738
2739 let deregistered_path = self.table_deregistered_file_path(&table_name);
2745 self.put_marker_file_atomic(
2746 &deregistered_path,
2747 &format!("deregistration marker for table {}", table_name),
2748 )
2749 .await
2750 .map_err(|e| {
2751 if e.contains("already exists") {
2752 lance_core::Error::from(NamespaceError::InvalidTableState {
2753 message: format!("Table is already deregistered: {}", table_name),
2754 })
2755 } else {
2756 lance_core::Error::from(NamespaceError::Internal { message: e })
2757 }
2758 })?;
2759
2760 Ok(lance_namespace::models::DeregisterTableResponse {
2761 id: request.id,
2762 location: Some(table_uri),
2763 ..Default::default()
2764 })
2765 }
2766
2767 async fn list_table_versions(
2768 &self,
2769 request: ListTableVersionsRequest,
2770 ) -> Result<ListTableVersionsResponse> {
2771 self.record_op("list_table_versions");
2772 if self.table_version_storage_enabled
2774 && let Some(ref manifest_ns) = self.manifest_ns
2775 {
2776 let table_id = request.id.clone().unwrap_or_default();
2777 let want_descending = request.descending == Some(true);
2778 return manifest_ns
2779 .list_table_versions(&table_id, want_descending, request.limit)
2780 .await;
2781 }
2782
2783 let table_uri = self.resolve_table_location(&request.id).await?;
2785 let want_descending = request.descending == Some(true);
2786 let table_versions = self
2787 .list_table_versions_from_storage(&table_uri, want_descending, request.limit)
2788 .await?;
2789
2790 Ok(ListTableVersionsResponse {
2791 versions: table_versions,
2792 page_token: None,
2793 })
2794 }
2795
2796 async fn create_table_version(
2797 &self,
2798 request: CreateTableVersionRequest,
2799 ) -> Result<CreateTableVersionResponse> {
2800 self.record_op("create_table_version");
2801 let table_uri = self.resolve_table_location(&request.id).await?;
2802
2803 let staging_manifest_path = &request.manifest_path;
2804 let version = request.version as u64;
2805
2806 let table_path = self.object_store_path_from_uri(&table_uri)?;
2807
2808 let naming_scheme = match request.naming_scheme.as_deref() {
2810 Some("V1") => ManifestNamingScheme::V1,
2811 _ => ManifestNamingScheme::V2,
2812 };
2813
2814 let final_path = naming_scheme.manifest_path(&table_path, version);
2816
2817 let staging_path = Path::parse(staging_manifest_path).map_err(|e| {
2818 lance_core::Error::from(NamespaceError::InvalidInput {
2819 message: format!(
2820 "Invalid staging manifest path '{}': {}",
2821 staging_manifest_path, e
2822 ),
2823 })
2824 })?;
2825
2826 let copy_result = match self
2827 .object_store
2828 .inner
2829 .copy_if_not_exists(&staging_path, &final_path)
2830 .await
2831 {
2832 Ok(()) => Ok(()),
2833 Err(ObjectStoreError::NotImplemented { .. })
2834 | Err(ObjectStoreError::NotSupported { .. }) => {
2835 let manifest_data = self
2836 .object_store
2837 .inner
2838 .get(&staging_path)
2839 .await
2840 .map_err(|e| {
2841 lance_core::Error::from(NamespaceError::Internal {
2842 message: format!(
2843 "Failed to read staging manifest at '{}': {}",
2844 staging_manifest_path, e
2845 ),
2846 })
2847 })?
2848 .bytes()
2849 .await
2850 .map_err(|e| {
2851 lance_core::Error::from(NamespaceError::Internal {
2852 message: format!(
2853 "Failed to read staging manifest bytes at '{}': {}",
2854 staging_manifest_path, e
2855 ),
2856 })
2857 })?;
2858 self.object_store
2859 .inner
2860 .put_opts(
2861 &final_path,
2862 manifest_data.into(),
2863 PutOptions {
2864 mode: PutMode::Create,
2865 ..Default::default()
2866 },
2867 )
2868 .await
2869 .map(|_| ())
2870 }
2871 Err(e) => Err(e),
2872 };
2873
2874 match copy_result {
2875 Ok(()) => {}
2876 Err(ObjectStoreError::AlreadyExists { .. })
2877 | Err(ObjectStoreError::Precondition { .. }) => {
2878 return Err(lance_core::Error::from(
2879 NamespaceError::ConcurrentModification {
2880 message: format!(
2881 "Version {} already exists for table at '{}'",
2882 version, table_uri
2883 ),
2884 },
2885 ));
2886 }
2887 Err(e) => {
2888 return Err(lance_core::Error::from(NamespaceError::Internal {
2889 message: format!(
2890 "Failed to create version {} for table at '{}': {}",
2891 version, table_uri, e
2892 ),
2893 }));
2894 }
2895 }
2896
2897 let final_meta = self
2898 .object_store
2899 .inner
2900 .head(&final_path)
2901 .await
2902 .map_err(|e| {
2903 lance_core::Error::from(NamespaceError::Internal {
2904 message: format!(
2905 "Failed to stat created version {} for table at '{}': {}",
2906 version, table_uri, e
2907 ),
2908 })
2909 })?;
2910 let manifest_size = final_meta.size as i64;
2911
2912 if let Err(e) = self.object_store.inner.delete(&staging_path).await {
2914 log::warn!(
2915 "Failed to delete staging manifest at '{}': {:?}",
2916 staging_path,
2917 e
2918 );
2919 }
2920
2921 if self.table_version_storage_enabled
2923 && let Some(ref manifest_ns) = self.manifest_ns
2924 {
2925 let table_id_str =
2926 manifest::ManifestNamespace::str_object_id(&request.id.clone().unwrap_or_default());
2927 let object_id =
2928 manifest::ManifestNamespace::build_version_object_id(&table_id_str, version as i64);
2929 let metadata_json = serde_json::json!({
2930 "manifest_path": final_path.to_string(),
2931 "manifest_size": manifest_size,
2932 "e_tag": final_meta.e_tag,
2933 "naming_scheme": request.naming_scheme.as_deref().unwrap_or("V2"),
2934 })
2935 .to_string();
2936
2937 if let Err(e) = manifest_ns
2938 .insert_into_manifest_with_metadata(
2939 vec![manifest::ManifestEntry {
2940 object_id,
2941 object_type: manifest::ObjectType::TableVersion,
2942 location: None,
2943 metadata: Some(metadata_json),
2944 }],
2945 None,
2946 )
2947 .await
2948 {
2949 log::warn!(
2950 "Failed to record table version in __manifest (best-effort): {:?}",
2951 e
2952 );
2953 }
2954 }
2955
2956 Ok(CreateTableVersionResponse {
2957 transaction_id: None,
2958 version: Some(Box::new(TableVersion {
2959 version: version as i64,
2960 manifest_path: final_path.to_string(),
2961 manifest_size: Some(manifest_size),
2962 e_tag: final_meta.e_tag,
2963 timestamp_millis: None,
2964 metadata: None,
2965 })),
2966 })
2967 }
2968
2969 async fn describe_table_version(
2970 &self,
2971 request: DescribeTableVersionRequest,
2972 ) -> Result<DescribeTableVersionResponse> {
2973 self.record_op("describe_table_version");
2974 if self.table_version_storage_enabled
2977 && let (Some(manifest_ns), Some(version)) = (&self.manifest_ns, request.version)
2978 {
2979 let table_id = request.id.clone().unwrap_or_default();
2980 return manifest_ns.describe_table_version(&table_id, version).await;
2981 }
2982
2983 let table_uri = self.resolve_table_location(&request.id).await?;
2985 let versions = self
2986 .list_table_versions_from_storage(&table_uri, true, None)
2987 .await?;
2988 let table_version = if let Some(requested_version) = request.version {
2989 versions
2990 .into_iter()
2991 .find(|version| version.version == requested_version)
2992 .ok_or_else(|| {
2993 lance_core::Error::from(NamespaceError::TableVersionNotFound {
2994 message: format!(
2995 "version {} for table {}",
2996 requested_version,
2997 Self::format_table_id_from_request(&request.id)
2998 ),
2999 })
3000 })?
3001 } else {
3002 versions.into_iter().next().ok_or_else(|| {
3003 lance_core::Error::from(NamespaceError::TableVersionNotFound {
3004 message: format!(
3005 "latest version for table {}",
3006 Self::format_table_id_from_request(&request.id)
3007 ),
3008 })
3009 })?
3010 };
3011
3012 Ok(DescribeTableVersionResponse {
3013 version: Box::new(table_version),
3014 })
3015 }
3016
3017 async fn batch_delete_table_versions(
3018 &self,
3019 request: BatchDeleteTableVersionsRequest,
3020 ) -> Result<BatchDeleteTableVersionsResponse> {
3021 self.record_op("batch_delete_table_versions");
3022 let ranges: Vec<(i64, i64)> = request
3025 .ranges
3026 .iter()
3027 .map(|r| {
3028 let start = r.start_version;
3029 let end = if r.end_version > 0 {
3030 r.end_version
3031 } else {
3032 start
3033 };
3034 (start, end)
3035 })
3036 .collect();
3037 let table_entries = vec![TableDeleteEntry {
3038 table_id: request.id.clone(),
3039 ranges,
3040 }];
3041
3042 let mut total_deleted_count = 0i64;
3043
3044 if self.table_version_storage_enabled
3045 && let Some(ref manifest_ns) = self.manifest_ns
3046 {
3047 let mut all_object_ids: Vec<String> = Vec::new();
3054 for te in &table_entries {
3055 let table_id_str = manifest::ManifestNamespace::str_object_id(
3056 &te.table_id.clone().unwrap_or_default(),
3057 );
3058 for (start, end) in &te.ranges {
3059 for version in *start..=*end {
3060 let object_id = manifest::ManifestNamespace::build_version_object_id(
3061 &table_id_str,
3062 version,
3063 );
3064 all_object_ids.push(object_id);
3065 }
3066 }
3067 }
3068
3069 if !all_object_ids.is_empty() {
3070 total_deleted_count = manifest_ns
3071 .batch_delete_table_versions_by_object_ids(&all_object_ids)
3072 .await?;
3073 }
3074
3075 let _ = self
3080 .delete_physical_version_files(&table_entries, true)
3081 .await;
3082
3083 return Ok(BatchDeleteTableVersionsResponse {
3084 deleted_count: Some(total_deleted_count),
3085 transaction_id: None,
3086 });
3087 }
3088
3089 total_deleted_count = self
3091 .delete_physical_version_files(&table_entries, false)
3092 .await?;
3093
3094 Ok(BatchDeleteTableVersionsResponse {
3095 deleted_count: Some(total_deleted_count),
3096 transaction_id: None,
3097 })
3098 }
3099
3100 async fn create_table_index(
3101 &self,
3102 request: CreateTableIndexRequest,
3103 ) -> Result<CreateTableIndexResponse> {
3104 self.record_op("create_table_index");
3105 let table_uri = self.resolve_table_location(&request.id).await?;
3106 let mut dataset = self
3107 .load_dataset(&table_uri, None, "create_table_index")
3108 .await?;
3109 let index_request = Self::build_index_params(&request)?;
3110
3111 dataset
3112 .create_index(
3113 &[request.column.as_str()],
3114 index_request.index_type(),
3115 request.name.clone(),
3116 index_request.params(),
3117 false,
3118 )
3119 .await
3120 .map_err(|e| {
3121 let err_msg = format!("{}", e);
3122 let ns_err = if err_msg.contains("already exists") {
3123 NamespaceError::TableIndexAlreadyExists {
3124 message: format!(
3125 "Index '{}' already exists on table '{}': {:?}",
3126 request.name.as_deref().unwrap_or("<auto-generated>"),
3127 table_uri,
3128 e
3129 ),
3130 }
3131 } else if err_msg.contains("not found") || err_msg.contains("does not exist") {
3132 NamespaceError::TableColumnNotFound {
3133 message: format!(
3134 "Column '{}' not found for table '{}': {:?}",
3135 request.column, table_uri, e
3136 ),
3137 }
3138 } else {
3139 NamespaceError::Internal {
3140 message: format!(
3141 "Failed to create {} index '{}' on column '{}' for table '{}': {:?}",
3142 request.index_type,
3143 request.name.as_deref().unwrap_or("<auto-generated>"),
3144 request.column,
3145 table_uri,
3146 e
3147 ),
3148 }
3149 };
3150 lance_core::Error::from(ns_err)
3151 })?;
3152
3153 let transaction_id = dataset
3154 .read_transaction()
3155 .await
3156 .map_err(|e| {
3157 lance_core::Error::from(NamespaceError::Internal {
3158 message: format!(
3159 "Failed to read committed transaction after creating index on '{}': {}",
3160 table_uri, e
3161 ),
3162 })
3163 })?
3164 .map(|transaction| transaction.uuid);
3165
3166 Ok(CreateTableIndexResponse { transaction_id })
3167 }
3168
3169 async fn list_table_indices(
3170 &self,
3171 request: ListTableIndicesRequest,
3172 ) -> Result<ListTableIndicesResponse> {
3173 self.record_op("list_table_indices");
3174 let table_uri = self.resolve_table_location(&request.id).await?;
3175 let dataset = self
3176 .load_dataset(&table_uri, request.version, "list_table_indices")
3177 .await?;
3178 let mut indices = dataset
3179 .describe_indices(None)
3180 .await
3181 .map_err(|e| {
3182 lance_core::Error::from(NamespaceError::Internal {
3183 message: format!("Failed to describe table indices for '{}': {:?}", table_uri, e),
3184 })
3185 })?
3186 .into_iter()
3187 .filter(|description| {
3188 description
3189 .metadata()
3190 .first()
3191 .map(|metadata| !is_system_index(metadata))
3192 .unwrap_or(false)
3193 })
3194 .map(|description| {
3195 let columns = description
3196 .field_ids()
3197 .iter()
3198 .map(|field_id| {
3199 dataset
3200 .schema()
3201 .field_path(i32::try_from(*field_id).map_err(|e| {
3202 lance_core::Error::from(NamespaceError::Internal {
3203 message: format!(
3204 "Field id {} does not fit in i32 for table '{}': {}",
3205 field_id, table_uri, e
3206 ),
3207 })
3208 })?)
3209 .map_err(|e| {
3210 lance_core::Error::from(NamespaceError::Internal {
3211 message: format!(
3212 "Failed to resolve field path for field_id {} in table '{}': {}",
3213 field_id, table_uri, e
3214 ),
3215 })
3216 })
3217 })
3218 .collect::<Result<Vec<_>>>()?;
3219
3220 Ok(IndexContent {
3221 index_name: description.name().to_string(),
3222 index_uuid: description.metadata()[0].uuid.to_string(),
3223 columns,
3224 status: "SUCCEEDED".to_string(),
3225 })
3226 })
3227 .collect::<Result<Vec<_>>>()?;
3228
3229 let page_token = Self::paginate_indices(&mut indices, request.page_token, request.limit);
3230 Ok(ListTableIndicesResponse {
3231 indexes: indices,
3232 page_token,
3233 })
3234 }
3235
3236 async fn describe_table_index_stats(
3237 &self,
3238 request: DescribeTableIndexStatsRequest,
3239 ) -> Result<DescribeTableIndexStatsResponse> {
3240 self.record_op("describe_table_index_stats");
3241 let table_uri = self.resolve_table_location(&request.id).await?;
3242 let dataset = self
3243 .load_dataset(&table_uri, request.version, "describe_table_index_stats")
3244 .await?;
3245 let index_name = request.index_name.as_deref().ok_or_else(|| {
3246 lance_core::Error::from(NamespaceError::InvalidInput {
3247 message: "Index name is required for describe_table_index_stats".to_string(),
3248 })
3249 })?;
3250 let metadatas = dataset
3251 .load_indices_by_name(index_name)
3252 .await
3253 .map_err(|e| {
3254 lance_core::Error::from(NamespaceError::TableIndexNotFound {
3255 message: format!(
3256 "Failed to load index '{}' metadata for table '{}': {}",
3257 index_name, table_uri, e
3258 ),
3259 })
3260 })?;
3261 if metadatas.first().is_some_and(is_system_index) {
3262 return Err(NamespaceError::Unsupported {
3263 message: format!("System index '{}' is not exposed by this API", index_name),
3264 }
3265 .into());
3266 }
3267
3268 let stats = <Dataset as DatasetIndexExt>::index_statistics(&dataset, index_name)
3269 .await
3270 .map_err(|e| {
3271 lance_core::Error::from(NamespaceError::TableIndexNotFound {
3272 message: format!(
3273 "Failed to describe index statistics for '{}' on table '{}': {}",
3274 index_name, table_uri, e
3275 ),
3276 })
3277 })?;
3278 let stats: serde_json::Value = serde_json::from_str(&stats).map_err(|e| {
3279 lance_core::Error::from(NamespaceError::Internal {
3280 message: format!(
3281 "Failed to parse index statistics for '{}' on table '{}': {}",
3282 index_name, table_uri, e
3283 ),
3284 })
3285 })?;
3286
3287 Ok(Self::describe_table_index_stats_response(&stats))
3288 }
3289
3290 async fn describe_transaction(
3291 &self,
3292 request: DescribeTransactionRequest,
3293 ) -> Result<DescribeTransactionResponse> {
3294 self.record_op("describe_transaction");
3295 let mut request_id = request.id.ok_or_else(|| {
3296 lance_core::Error::from(NamespaceError::InvalidInput {
3297 message: "Transaction id must include table id and transaction identifier"
3298 .to_string(),
3299 })
3300 })?;
3301 if request_id.len() < 2 {
3302 return Err(NamespaceError::InvalidInput {
3303 message: format!(
3304 "Transaction request id must include table id and transaction identifier, got {:?}",
3305 request_id
3306 ),
3307 }
3308 .into());
3309 }
3310
3311 let id = request_id.pop().expect("request_id len checked above");
3312 let table_id = Some(request_id);
3313 let table_uri = self.resolve_table_location(&table_id).await?;
3314 let dataset = self
3315 .load_dataset(&table_uri, None, "describe_transaction")
3316 .await?;
3317 let (version, transaction) = self.find_transaction(&dataset, &id).await?;
3318
3319 Ok(Self::transaction_response(version, &transaction))
3320 }
3321
3322 async fn create_table_scalar_index(
3323 &self,
3324 request: CreateTableIndexRequest,
3325 ) -> Result<CreateTableScalarIndexResponse> {
3326 self.record_op("create_table_scalar_index");
3327 let index_type = Self::parse_index_type(&request.index_type)?;
3328 if !index_type.is_scalar() {
3329 return Err(NamespaceError::InvalidInput {
3330 message: format!(
3331 "create_table_scalar_index only supports scalar index types, got {}",
3332 request.index_type
3333 ),
3334 }
3335 .into());
3336 }
3337
3338 let response = self.create_table_index(request).await?;
3339 Ok(CreateTableScalarIndexResponse {
3340 transaction_id: response.transaction_id,
3341 })
3342 }
3343
3344 async fn drop_table_index(
3345 &self,
3346 request: DropTableIndexRequest,
3347 ) -> Result<DropTableIndexResponse> {
3348 self.record_op("drop_table_index");
3349 let table_uri = self.resolve_table_location(&request.id).await?;
3350 let index_name = request.index_name.as_deref().ok_or_else(|| {
3351 lance_core::Error::from(NamespaceError::InvalidInput {
3352 message: "Index name is required for drop_table_index".to_string(),
3353 })
3354 })?;
3355 let mut dataset = self
3356 .load_dataset(&table_uri, None, "drop_table_index")
3357 .await?;
3358 let metadatas = dataset
3359 .load_indices_by_name(index_name)
3360 .await
3361 .map_err(|e| {
3362 lance_core::Error::from(NamespaceError::TableIndexNotFound {
3363 message: format!(
3364 "Failed to load index '{}' before dropping it from table '{}': {}",
3365 index_name, table_uri, e
3366 ),
3367 })
3368 })?;
3369 if metadatas.first().is_some_and(is_system_index) {
3370 return Err(NamespaceError::Unsupported {
3371 message: format!(
3372 "System index '{}' cannot be dropped via this API",
3373 index_name
3374 ),
3375 }
3376 .into());
3377 }
3378
3379 dataset.drop_index(index_name).await.map_err(|e| {
3380 lance_core::Error::from(NamespaceError::TableIndexNotFound {
3381 message: format!(
3382 "Failed to drop index '{}' from table '{}': {}",
3383 index_name, table_uri, e
3384 ),
3385 })
3386 })?;
3387
3388 let transaction_id = dataset
3389 .read_transaction()
3390 .await
3391 .map_err(|e| {
3392 lance_core::Error::from(NamespaceError::Internal {
3393 message: format!(
3394 "Failed to read committed transaction after dropping index '{}' from '{}': {}",
3395 index_name, table_uri, e
3396 ),
3397 })
3398 })?
3399 .map(|transaction| transaction.uuid);
3400
3401 Ok(DropTableIndexResponse { transaction_id })
3402 }
3403
3404 async fn list_all_tables(&self, request: ListTablesRequest) -> Result<ListTablesResponse> {
3405 let mut tables = self.list_directory_tables().await?;
3408 tables = self
3409 .filter_declared_tables(tables, request.include_declared.unwrap_or(true))
3410 .await?;
3411 Self::apply_pagination(&mut tables, request.page_token, request.limit);
3412 Ok(ListTablesResponse::new(tables))
3413 }
3414
3415 async fn restore_table(&self, request: RestoreTableRequest) -> Result<RestoreTableResponse> {
3416 let version = request.version;
3417 if version < 0 {
3418 return Err(Error::invalid_input_source(
3419 format!(
3420 "Table version for restore_table must be non-negative, got {}",
3421 version
3422 )
3423 .into(),
3424 ));
3425 }
3426
3427 let table_uri = self.resolve_table_location(&request.id).await?;
3428 let mut dataset = self.load_dataset(&table_uri, None, "restore_table").await?;
3429
3430 dataset = dataset
3431 .checkout_version(version as u64)
3432 .await
3433 .map_err(|e| {
3434 Error::namespace_source(
3435 format!(
3436 "Failed to checkout version {} for restore at '{}': {}",
3437 version, table_uri, e
3438 )
3439 .into(),
3440 )
3441 })?;
3442
3443 dataset.restore().await.map_err(|e| {
3444 Error::namespace_source(
3445 format!(
3446 "Failed to restore table at '{}' to version {}: {}",
3447 table_uri, version, e
3448 )
3449 .into(),
3450 )
3451 })?;
3452
3453 let transaction_id = dataset
3454 .read_transaction()
3455 .await
3456 .map_err(|e| {
3457 Error::namespace_source(
3458 format!(
3459 "Failed to read transaction after restoring '{}': {}",
3460 table_uri, e
3461 )
3462 .into(),
3463 )
3464 })?
3465 .map(|t| t.uuid);
3466
3467 Ok(RestoreTableResponse { transaction_id })
3468 }
3469
3470 async fn update_table_schema_metadata(
3471 &self,
3472 request: UpdateTableSchemaMetadataRequest,
3473 ) -> Result<UpdateTableSchemaMetadataResponse> {
3474 let table_uri = self.resolve_table_location(&request.id).await?;
3475 let mut dataset = self
3476 .load_dataset(&table_uri, None, "update_table_schema_metadata")
3477 .await?;
3478
3479 let new_metadata = request.metadata.unwrap_or_default();
3480 let updated_metadata = dataset
3481 .update_schema_metadata(new_metadata.iter().map(|(k, v)| (k.as_str(), v.as_str())))
3482 .await
3483 .map_err(|e| {
3484 Error::namespace_source(
3485 format!(
3486 "Failed to update schema metadata for table at '{}': {}",
3487 table_uri, e
3488 )
3489 .into(),
3490 )
3491 })?;
3492
3493 let transaction_id = dataset
3494 .read_transaction()
3495 .await
3496 .map_err(|e| {
3497 Error::namespace_source(
3498 format!(
3499 "Failed to read transaction after updating metadata for '{}': {}",
3500 table_uri, e
3501 )
3502 .into(),
3503 )
3504 })?
3505 .map(|t| t.uuid);
3506
3507 Ok(UpdateTableSchemaMetadataResponse {
3508 metadata: Some(updated_metadata),
3509 transaction_id,
3510 })
3511 }
3512
3513 async fn get_table_stats(
3514 &self,
3515 request: GetTableStatsRequest,
3516 ) -> Result<GetTableStatsResponse> {
3517 let table_uri = self.resolve_table_location(&request.id).await?;
3518 let dataset = Arc::new(
3519 self.load_dataset(&table_uri, None, "get_table_stats")
3520 .await?,
3521 );
3522
3523 let data_stats = dataset.calculate_data_stats().await.map_err(|e| {
3525 Error::namespace_source(
3526 format!(
3527 "Failed to calculate data statistics for table at '{}': {}",
3528 table_uri, e
3529 )
3530 .into(),
3531 )
3532 })?;
3533 let total_bytes: i64 = data_stats
3534 .fields
3535 .iter()
3536 .map(|f| f.bytes_on_disk as i64)
3537 .sum();
3538
3539 let fragment_row_futures: Vec<_> = dataset
3541 .get_fragments()
3542 .into_iter()
3543 .map(|f| async move { f.physical_rows().await })
3544 .collect();
3545 let fragment_row_results = futures::future::join_all(fragment_row_futures).await;
3546 let mut fragment_row_counts: Vec<i64> = fragment_row_results
3547 .into_iter()
3548 .filter_map(|r| r.ok())
3549 .map(|r| r as i64)
3550 .collect();
3551
3552 let num_fragments = fragment_row_counts.len() as i64;
3553 let num_rows: i64 = fragment_row_counts.iter().sum();
3554
3555 const SMALL_FRAGMENT_THRESHOLD: i64 = 1024 * 1024;
3558 let num_small_fragments = fragment_row_counts
3559 .iter()
3560 .filter(|&&r| r < SMALL_FRAGMENT_THRESHOLD)
3561 .count() as i64;
3562
3563 fragment_row_counts.sort_unstable();
3565 let lengths = if fragment_row_counts.is_empty() {
3566 FragmentSummary::new(0, 0, 0, 0, 0, 0, 0)
3567 } else {
3568 let len = fragment_row_counts.len();
3569 let min = fragment_row_counts[0];
3570 let max = fragment_row_counts[len - 1];
3571 let mean = num_rows / num_fragments;
3572 let pct = |p: f64| fragment_row_counts[((len - 1) as f64 * p) as usize];
3573 FragmentSummary::new(min, max, mean, pct(0.25), pct(0.50), pct(0.75), pct(0.99))
3574 };
3575
3576 let indices = dataset.load_indices().await.map_err(|e| {
3578 Error::namespace_source(
3579 format!("Failed to load indices for table at '{}': {}", table_uri, e).into(),
3580 )
3581 })?;
3582 let num_indices = indices.iter().filter(|m| !is_system_index(m)).count() as i64;
3583
3584 let fragment_stats = FragmentStats::new(num_fragments, num_small_fragments, lengths);
3585 Ok(GetTableStatsResponse::new(
3586 total_bytes,
3587 num_rows,
3588 num_indices,
3589 fragment_stats,
3590 ))
3591 }
3592
3593 async fn explain_table_query_plan(
3594 &self,
3595 request: ExplainTableQueryPlanRequest,
3596 ) -> Result<String> {
3597 let table_uri = self.resolve_table_location(&request.id).await?;
3598 let dataset = self
3599 .load_dataset(
3600 &table_uri,
3601 request.query.version,
3602 "explain_table_query_plan",
3603 )
3604 .await?;
3605 let verbose = request.verbose.unwrap_or(false);
3606
3607 let mut scanner = dataset.scan();
3608 Self::apply_query_params_to_scanner(
3609 &mut scanner,
3610 request.query.filter.as_deref(),
3611 request.query.columns.as_deref(),
3612 request.query.vector_column.as_deref(),
3613 &request.query.vector,
3614 request.query.k,
3615 request.query.offset,
3616 request.query.prefilter,
3617 request.query.bypass_vector_index,
3618 request.query.nprobes,
3619 request.query.ef,
3620 request.query.refine_factor,
3621 request.query.distance_type.as_deref(),
3622 request.query.fast_search,
3623 request.query.with_row_id,
3624 request.query.lower_bound,
3625 request.query.upper_bound,
3626 "explain_table_query_plan",
3627 )?;
3628
3629 scanner.explain_plan(verbose).await.map_err(|e| {
3630 Error::namespace_source(
3631 format!(
3632 "Failed to explain query plan for table at '{}': {}",
3633 table_uri, e
3634 )
3635 .into(),
3636 )
3637 })
3638 }
3639
3640 async fn analyze_table_query_plan(
3641 &self,
3642 request: AnalyzeTableQueryPlanRequest,
3643 ) -> Result<String> {
3644 let table_uri = self.resolve_table_location(&request.id).await?;
3645 let dataset = self
3646 .load_dataset(&table_uri, request.version, "analyze_table_query_plan")
3647 .await?;
3648
3649 let mut scanner = dataset.scan();
3650 Self::apply_query_params_to_scanner(
3651 &mut scanner,
3652 request.filter.as_deref(),
3653 request.columns.as_deref(),
3654 request.vector_column.as_deref(),
3655 &request.vector,
3656 request.k,
3657 request.offset,
3658 request.prefilter,
3659 request.bypass_vector_index,
3660 request.nprobes,
3661 request.ef,
3662 request.refine_factor,
3663 request.distance_type.as_deref(),
3664 request.fast_search,
3665 request.with_row_id,
3666 request.lower_bound,
3667 request.upper_bound,
3668 "analyze_table_query_plan",
3669 )?;
3670
3671 scanner.analyze_plan().await.map_err(|e| {
3672 Error::namespace_source(
3673 format!(
3674 "Failed to analyze query plan for table at '{}': {}",
3675 table_uri, e
3676 )
3677 .into(),
3678 )
3679 })
3680 }
3681
3682 async fn count_table_rows(&self, request: CountTableRowsRequest) -> Result<i64> {
3683 self.record_op("count_table_rows");
3684 let table_uri = self.resolve_table_location(&request.id).await?;
3685 let dataset = self
3686 .load_dataset(&table_uri, request.version, "count_table_rows")
3687 .await?;
3688
3689 let count =
3690 dataset
3691 .count_rows(request.predicate)
3692 .await
3693 .map_err(|e| NamespaceError::Internal {
3694 message: format!("Failed to count rows for table at '{}': {:?}", table_uri, e),
3695 })?;
3696
3697 Ok(count as i64)
3698 }
3699
3700 async fn insert_into_table(
3701 &self,
3702 request: InsertIntoTableRequest,
3703 request_data: Bytes,
3704 ) -> Result<InsertIntoTableResponse> {
3705 self.record_op("insert_into_table");
3706 let table_uri = self.resolve_table_location(&request.id).await?;
3707 let (reader, _num_rows) =
3708 Self::ipc_reader_from_request_data(&request_data, "insert_into_table")?;
3709
3710 let mode = match request.mode.as_deref() {
3711 Some(m) if m.eq_ignore_ascii_case("overwrite") => WriteMode::Overwrite,
3712 Some(m) if m.eq_ignore_ascii_case("append") => WriteMode::Append,
3713 None => WriteMode::Append,
3714 Some(m) => {
3715 return Err(lance_namespace::error::NamespaceError::InvalidInput {
3716 message: format!(
3717 "Unsupported write mode '{}'. Supported modes are: 'append', 'overwrite'",
3718 m
3719 ),
3720 }
3721 .into());
3722 }
3723 };
3724
3725 if !self.table_uri_has_actual_manifests(&table_uri).await? {
3726 self.write_reader_to_table(&table_uri, reader, WriteMode::Create, None)
3727 .await?;
3728 } else {
3729 self.write_reader_to_table(&table_uri, reader, mode, None)
3730 .await?;
3731 }
3732
3733 Ok(InsertIntoTableResponse {
3734 transaction_id: None,
3735 })
3736 }
3737
3738 async fn merge_insert_into_table(
3739 &self,
3740 request: MergeInsertIntoTableRequest,
3741 request_data: Bytes,
3742 ) -> Result<MergeInsertIntoTableResponse> {
3743 self.record_op("merge_insert_into_table");
3744 let table_uri = self.resolve_table_location(&request.id).await?;
3745 let on = request.on.as_ref().ok_or_else(|| {
3746 lance_core::Error::from(NamespaceError::InvalidInput {
3747 message: "'on' field is required for merge_insert_into_table".to_string(),
3748 })
3749 })?;
3750
3751 let table_has_manifests = self.table_uri_has_actual_manifests(&table_uri).await?;
3752 let (reader, num_rows) =
3753 Self::ipc_reader_from_request_data(&request_data, "merge_insert_into_table")?;
3754
3755 if !table_has_manifests {
3756 let dataset = self
3757 .write_reader_to_table(&table_uri, reader, WriteMode::Create, None)
3758 .await?;
3759 let version = dataset.version().version as i64;
3760 return Ok(MergeInsertIntoTableResponse {
3761 transaction_id: None,
3762 num_updated_rows: Some(0),
3763 num_inserted_rows: Some(num_rows as i64),
3764 num_deleted_rows: Some(0),
3765 version: Some(version),
3766 });
3767 }
3768
3769 let dataset = Arc::new(
3770 self.load_dataset(&table_uri, None, "merge_insert_into_table")
3771 .await?,
3772 );
3773
3774 let mut merge_builder = MergeInsertBuilder::try_new(dataset.clone(), vec![on.clone()])
3775 .map_err(|e| {
3776 lance_core::Error::from(NamespaceError::InvalidInput {
3777 message: format!("Failed to create merge_insert_into_table builder: {}", e),
3778 })
3779 })?;
3780
3781 if let Some(filter) = request.when_matched_update_all_filt.as_deref() {
3782 let behavior = WhenMatched::update_if(dataset.as_ref(), filter).map_err(|e| {
3783 lance_core::Error::from(NamespaceError::InvalidInput {
3784 message: format!(
3785 "Invalid when_matched_update_all_filt for merge_insert_into_table: {}",
3786 e
3787 ),
3788 })
3789 })?;
3790 merge_builder.when_matched(behavior);
3791 } else if request.when_matched_update_all.unwrap_or(false) {
3792 merge_builder.when_matched(WhenMatched::UpdateAll);
3793 }
3794
3795 if matches!(request.when_not_matched_insert_all, Some(false)) {
3796 merge_builder.when_not_matched(WhenNotMatched::DoNothing);
3797 } else {
3798 merge_builder.when_not_matched(WhenNotMatched::InsertAll);
3799 }
3800
3801 if let Some(filter) = request.when_not_matched_by_source_delete_filt.as_deref() {
3802 let behavior = WhenNotMatchedBySource::delete_if(dataset.as_ref(), filter).map_err(|e| {
3803 lance_core::Error::from(NamespaceError::InvalidInput {
3804 message: format!(
3805 "Invalid when_not_matched_by_source_delete_filt for merge_insert_into_table: {}",
3806 e
3807 ),
3808 })
3809 })?;
3810 merge_builder.when_not_matched_by_source(behavior);
3811 } else if request.when_not_matched_by_source_delete.unwrap_or(false) {
3812 merge_builder.when_not_matched_by_source(WhenNotMatchedBySource::Delete);
3813 }
3814
3815 if let Some(use_index) = request.use_index {
3816 merge_builder.use_index(use_index);
3817 }
3818
3819 let (dataset, stats) = merge_builder
3820 .try_build()
3821 .map_err(|e| {
3822 lance_core::Error::from(NamespaceError::InvalidInput {
3823 message: format!("Failed to build merge_insert_into_table job: {}", e),
3824 })
3825 })?
3826 .execute_reader(reader)
3827 .await
3828 .map_err(|e| NamespaceError::Internal {
3829 message: format!(
3830 "Failed to merge_insert_into_table at '{}': {}",
3831 table_uri, e
3832 ),
3833 })?;
3834
3835 Ok(MergeInsertIntoTableResponse {
3836 transaction_id: None,
3837 num_updated_rows: Some(stats.num_updated_rows as i64),
3838 num_inserted_rows: Some(stats.num_inserted_rows as i64),
3839 num_deleted_rows: Some(stats.num_deleted_rows as i64),
3840 version: Some(dataset.version().version as i64),
3841 })
3842 }
3843
3844 async fn query_table(&self, request: QueryTableRequest) -> Result<Bytes> {
3845 use arrow::ipc::writer::FileWriter;
3846
3847 self.record_op("query_table");
3848 let table_uri = self.resolve_table_location(&request.id).await?;
3849 let dataset = self
3850 .load_dataset(&table_uri, request.version, "query_table")
3851 .await?;
3852
3853 let mut scanner = dataset.scan();
3855
3856 let has_vector_query = request
3859 .vector
3860 .single_vector
3861 .as_ref()
3862 .map(|sv| !sv.is_empty())
3863 .unwrap_or(false)
3864 || request
3865 .vector
3866 .multi_vector
3867 .as_ref()
3868 .map(|mv| !mv.is_empty())
3869 .unwrap_or(false);
3870
3871 if let Some(prefilter) = request.prefilter {
3873 scanner.prefilter(prefilter);
3874 }
3875
3876 if has_vector_query {
3878 let vector_column = request.vector_column.as_deref().unwrap_or("vector");
3879
3880 let query_vector: Vec<f32> = request
3882 .vector
3883 .single_vector
3884 .clone()
3885 .or_else(|| {
3886 request
3887 .vector
3888 .multi_vector
3889 .as_ref()
3890 .and_then(|mv| mv.first().cloned())
3891 })
3892 .unwrap_or_default();
3893
3894 if !query_vector.is_empty() {
3895 let k = if request.k > 0 {
3896 request.k as usize
3897 } else {
3898 10
3899 };
3900 let query_array = Float32Array::from(query_vector);
3901 scanner
3902 .nearest(vector_column, &query_array, k)
3903 .map_err(|e| NamespaceError::InvalidInput {
3904 message: format!("Invalid vector search: {:?}", e),
3905 })?;
3906
3907 if let Some(ref distance_type) = request.distance_type {
3909 let metric = match distance_type.to_lowercase().as_str() {
3910 "l2" | "euclidean" => MetricType::L2,
3911 "cosine" => MetricType::Cosine,
3912 "dot" | "inner_product" => MetricType::Dot,
3913 "hamming" => MetricType::Hamming,
3914 _ => {
3915 return Err(NamespaceError::InvalidInput {
3916 message: format!("Unknown distance type: {}", distance_type),
3917 }
3918 .into());
3919 }
3920 };
3921 scanner.distance_metric(metric);
3922 }
3923
3924 if let Some(nprobes) = request.nprobes {
3926 scanner.minimum_nprobes(nprobes as usize);
3927 }
3928
3929 if let Some(ef) = request.ef {
3931 scanner.ef(ef as usize);
3932 }
3933
3934 if let Some(refine_factor) = request.refine_factor {
3936 scanner.refine(refine_factor as u32);
3937 }
3938
3939 if request.lower_bound.is_some() || request.upper_bound.is_some() {
3941 scanner.distance_range(request.lower_bound, request.upper_bound);
3942 }
3943
3944 if let Some(bypass) = request.bypass_vector_index {
3946 scanner.use_index(!bypass);
3947 }
3948
3949 if request.fast_search == Some(true) {
3951 scanner.fast_search();
3952 }
3953 }
3954 }
3955
3956 if let Some(ref fts_query) = request.full_text_query {
3958 if let Some(ref string_query) = fts_query.string_query {
3960 let mut fts = FullTextSearchQuery::new(string_query.query.clone());
3961
3962 if let Some(ref columns) = string_query.columns
3964 && !columns.is_empty()
3965 {
3966 fts = fts
3967 .with_columns(columns)
3968 .map_err(|e| NamespaceError::InvalidInput {
3969 message: format!("Invalid FTS columns: {:?}", e),
3970 })?;
3971 }
3972
3973 scanner
3974 .full_text_search(fts)
3975 .map_err(|e| NamespaceError::InvalidInput {
3976 message: format!("Invalid full text search: {:?}", e),
3977 })?;
3978 }
3979 }
3982
3983 if let Some(ref columns) = request.columns {
3985 if let Some(ref column_names) = columns.column_names
3986 && !column_names.is_empty()
3987 {
3988 scanner
3989 .project(column_names)
3990 .map_err(|e| NamespaceError::InvalidInput {
3991 message: format!("Invalid column projection: {:?}", e),
3992 })?;
3993 } else if let Some(ref column_aliases) = columns.column_aliases
3994 && !column_aliases.is_empty()
3995 {
3996 let transform_pairs: Vec<(String, String)> = column_aliases
3998 .iter()
3999 .map(|(alias, sql)| (alias.clone(), sql.clone()))
4000 .collect();
4001 scanner
4002 .project_with_transform(
4003 &transform_pairs
4004 .iter()
4005 .map(|(a, s)| (a.as_str(), s.as_str()))
4006 .collect::<Vec<_>>(),
4007 )
4008 .map_err(|e| NamespaceError::InvalidInput {
4009 message: format!("Invalid column alias expression: {:?}", e),
4010 })?;
4011 }
4012 }
4013
4014 if let Some(ref filter) = request.filter
4016 && !filter.is_empty()
4017 {
4018 scanner
4019 .filter(filter)
4020 .map_err(|e| NamespaceError::InvalidInput {
4021 message: format!("Invalid filter expression: {:?}", e),
4022 })?;
4023 }
4024
4025 if request.with_row_id == Some(true) {
4027 scanner.with_row_id();
4028 }
4029
4030 if !has_vector_query && request.k > 0 {
4034 let offset = request.offset.map(|o| o as i64);
4035 scanner.limit(Some(request.k as i64), offset).map_err(|e| {
4036 NamespaceError::InvalidInput {
4037 message: format!("Invalid limit/offset: {:?}", e),
4038 }
4039 })?;
4040 } else if has_vector_query && request.offset.is_some() {
4041 let offset = request.offset.map(|o| o as i64);
4043 scanner
4044 .limit(None, offset)
4045 .map_err(|e| NamespaceError::InvalidInput {
4046 message: format!("Invalid offset: {:?}", e),
4047 })?;
4048 }
4049
4050 let batch = scanner
4052 .try_into_batch()
4053 .await
4054 .map_err(|e| NamespaceError::Internal {
4055 message: format!("Failed to execute query: {:?}", e),
4056 })?;
4057
4058 let schema = batch.schema();
4060 let mut buffer = Vec::new();
4061 {
4062 let mut writer = FileWriter::try_new(&mut buffer, &schema).map_err(|e| {
4063 NamespaceError::Internal {
4064 message: format!("Failed to create IPC writer: {:?}", e),
4065 }
4066 })?;
4067 writer.write(&batch).map_err(|e| NamespaceError::Internal {
4068 message: format!("Failed to write batch to IPC: {:?}", e),
4069 })?;
4070 writer.finish().map_err(|e| NamespaceError::Internal {
4071 message: format!("Failed to finish IPC writer: {:?}", e),
4072 })?;
4073 }
4074
4075 Ok(Bytes::from(buffer))
4076 }
4077
4078 fn namespace_id(&self) -> String {
4079 format!("DirectoryNamespace {{ root: {:?} }}", self.root)
4080 }
4081}
4082
4083#[cfg(test)]
4084mod tests {
4085 use super::*;
4086 use arrow_ipc::reader::{FileReader, StreamReader};
4087 use lance::dataset::Dataset;
4088 use lance::index::DatasetIndexExt;
4089 use lance_core::utils::tempfile::{TempStdDir, TempStrDir};
4090 use lance_core::utils::testing::CountingObjectStore;
4091 use lance_io::object_store::{providers::local::FileStoreProvider, uri_to_url};
4092 use lance_namespace::models::{
4093 CreateTableRequest, JsonArrowDataType, JsonArrowField, JsonArrowSchema, ListTablesRequest,
4094 QueryTableRequestColumns,
4095 };
4096 use lance_namespace::schema::convert_json_arrow_schema;
4097 use std::io::Cursor;
4098 use std::sync::{
4099 Arc,
4100 atomic::{AtomicUsize, Ordering},
4101 };
4102 use url::Url;
4103
4104 fn assert_plan_contains_all(plan: &str, expected_fragments: &[&str], context: &str) {
4105 for expected_fragment in expected_fragments {
4106 assert!(
4107 plan.contains(expected_fragment),
4108 "{}. Missing fragment: '{}'. Plan:\n{}",
4109 context,
4110 expected_fragment,
4111 plan
4112 );
4113 }
4114 }
4115
4116 async fn create_test_namespace() -> (DirectoryNamespace, TempStdDir) {
4118 let temp_dir = TempStdDir::default();
4119
4120 let namespace = DirectoryNamespaceBuilder::new(temp_dir.to_str().unwrap())
4121 .build()
4122 .await
4123 .unwrap();
4124 (namespace, temp_dir)
4125 }
4126
4127 #[derive(Debug)]
4128 struct CountingFileStoreProvider {
4129 listing_count: Arc<AtomicUsize>,
4130 }
4131
4132 #[async_trait]
4133 impl lance_io::object_store::ObjectStoreProvider for CountingFileStoreProvider {
4134 async fn new_store(
4135 &self,
4136 base_path: Url,
4137 params: &ObjectStoreParams,
4138 ) -> Result<ObjectStore> {
4139 let provider = FileStoreProvider;
4140 let mut store = provider.new_store(base_path, params).await?;
4141 store.inner = Arc::new(CountingObjectStore::new(
4142 store.inner.clone(),
4143 self.listing_count.clone(),
4144 ));
4145 Ok(store)
4146 }
4147
4148 fn extract_path(&self, url: &Url) -> Result<Path> {
4149 let provider = FileStoreProvider;
4150 provider.extract_path(url)
4151 }
4152
4153 fn calculate_object_store_prefix(
4154 &self,
4155 url: &Url,
4156 storage_options: Option<&HashMap<String, String>>,
4157 ) -> Result<String> {
4158 let provider = FileStoreProvider;
4159 provider.calculate_object_store_prefix(url, storage_options)
4160 }
4161 }
4162
4163 fn file_object_store_uri(path: &str) -> String {
4164 let file_url = uri_to_url(path).unwrap();
4165 let mut url = Url::parse("file-object-store:///").unwrap();
4166 url.set_path(file_url.path());
4167 url.to_string()
4168 }
4169
4170 fn build_listing_counting_session(listing_count: Arc<AtomicUsize>) -> Arc<Session> {
4171 let registry = Arc::new(ObjectStoreRegistry::default());
4172 registry.insert(
4173 "file-object-store",
4174 Arc::new(CountingFileStoreProvider { listing_count }),
4175 );
4176 Arc::new(Session::new(0, 0, registry))
4177 }
4178
4179 fn create_test_ipc_data(schema: &JsonArrowSchema) -> Vec<u8> {
4181 use arrow::ipc::writer::StreamWriter;
4182
4183 let arrow_schema = convert_json_arrow_schema(schema).unwrap();
4184 let arrow_schema = Arc::new(arrow_schema);
4185 let batch = arrow::record_batch::RecordBatch::new_empty(arrow_schema.clone());
4186 let mut buffer = Vec::new();
4187 {
4188 let mut writer = StreamWriter::try_new(&mut buffer, &arrow_schema).unwrap();
4189 writer.write(&batch).unwrap();
4190 writer.finish().unwrap();
4191 }
4192 buffer
4193 }
4194
4195 fn create_ipc_data_from_batches(
4196 schema: Arc<arrow_schema::Schema>,
4197 batches: Vec<arrow::record_batch::RecordBatch>,
4198 ) -> Vec<u8> {
4199 use arrow::ipc::writer::StreamWriter;
4200
4201 let mut buffer = Vec::new();
4202 {
4203 let mut writer = StreamWriter::try_new(&mut buffer, &schema).unwrap();
4204 for batch in &batches {
4205 writer.write(batch).unwrap();
4206 }
4207 writer.finish().unwrap();
4208 }
4209 buffer
4210 }
4211
4212 fn create_non_empty_test_ipc_data() -> Vec<u8> {
4213 use arrow::array::{Int32Array, StringArray};
4214 use arrow::record_batch::RecordBatch;
4215
4216 let schema = Arc::new(convert_json_arrow_schema(&create_test_schema()).unwrap());
4217 let batch = RecordBatch::try_new(
4218 schema.clone(),
4219 vec![
4220 Arc::new(Int32Array::from(vec![1, 2])),
4221 Arc::new(StringArray::from(vec![Some("alice"), Some("bob")])),
4222 ],
4223 )
4224 .unwrap();
4225 create_ipc_data_from_batches(schema, vec![batch])
4226 }
4227
4228 fn create_single_row_test_ipc_data() -> Vec<u8> {
4229 use arrow::array::{Int32Array, StringArray};
4230 use arrow::record_batch::RecordBatch;
4231
4232 let schema = Arc::new(convert_json_arrow_schema(&create_test_schema()).unwrap());
4233 let batch = RecordBatch::try_new(
4234 schema.clone(),
4235 vec![
4236 Arc::new(Int32Array::from(vec![10])),
4237 Arc::new(StringArray::from(vec![Some("carol")])),
4238 ],
4239 )
4240 .unwrap();
4241 create_ipc_data_from_batches(schema, vec![batch])
4242 }
4243
4244 fn create_test_schema() -> JsonArrowSchema {
4246 let int_type = JsonArrowDataType::new("int32".to_string());
4247 let string_type = JsonArrowDataType::new("utf8".to_string());
4248
4249 let id_field = JsonArrowField {
4250 name: "id".to_string(),
4251 r#type: Box::new(int_type),
4252 nullable: false,
4253 metadata: None,
4254 };
4255
4256 let name_field = JsonArrowField {
4257 name: "name".to_string(),
4258 r#type: Box::new(string_type),
4259 nullable: true,
4260 metadata: None,
4261 };
4262
4263 JsonArrowSchema {
4264 fields: vec![id_field, name_field],
4265 metadata: None,
4266 }
4267 }
4268
4269 fn create_scalar_table_ipc_data() -> Vec<u8> {
4270 use arrow::array::{Int32Array, StringArray};
4271 use arrow::datatypes::{DataType, Field, Schema as ArrowSchema};
4272
4273 let schema = Arc::new(ArrowSchema::new(vec![
4274 Field::new("id", DataType::Int32, false),
4275 Field::new("name", DataType::Utf8, true),
4276 ]));
4277 let batch = arrow::record_batch::RecordBatch::try_new(
4278 schema.clone(),
4279 vec![
4280 Arc::new(Int32Array::from(vec![1, 2, 3])),
4281 Arc::new(StringArray::from(vec!["alice", "bob", "cory"])),
4282 ],
4283 )
4284 .unwrap();
4285 create_ipc_data_from_batches(schema, vec![batch])
4286 }
4287
4288 fn create_vector_table_ipc_data() -> Vec<u8> {
4289 use arrow::array::{FixedSizeListArray, Float32Array, Int32Array};
4290 use arrow::datatypes::{DataType, Field, Schema as ArrowSchema};
4291
4292 let schema = Arc::new(ArrowSchema::new(vec![
4293 Field::new("id", DataType::Int32, false),
4294 Field::new(
4295 "vector",
4296 DataType::FixedSizeList(Arc::new(Field::new("item", DataType::Float32, true)), 2),
4297 true,
4298 ),
4299 ]));
4300 let vector_field = Arc::new(Field::new("item", DataType::Float32, true));
4301 let vectors = FixedSizeListArray::try_new(
4302 vector_field,
4303 2,
4304 Arc::new(Float32Array::from(vec![0.1, 0.2, 0.3, 0.4, 0.5, 0.6])),
4305 None,
4306 )
4307 .unwrap();
4308 let batch = arrow::record_batch::RecordBatch::try_new(
4309 schema.clone(),
4310 vec![Arc::new(Int32Array::from(vec![1, 2, 3])), Arc::new(vectors)],
4311 )
4312 .unwrap();
4313 create_ipc_data_from_batches(schema, vec![batch])
4314 }
4315
4316 async fn create_scalar_table(namespace: &DirectoryNamespace, table_name: &str) {
4317 let mut create_table_request = CreateTableRequest::new();
4318 create_table_request.id = Some(vec![table_name.to_string()]);
4319 namespace
4320 .create_table(
4321 create_table_request,
4322 Bytes::from(create_scalar_table_ipc_data()),
4323 )
4324 .await
4325 .unwrap();
4326 }
4327
4328 async fn create_vector_table(namespace: &DirectoryNamespace, table_name: &str) {
4329 let mut create_table_request = CreateTableRequest::new();
4330 create_table_request.id = Some(vec![table_name.to_string()]);
4331 namespace
4332 .create_table(
4333 create_table_request,
4334 Bytes::from(create_vector_table_ipc_data()),
4335 )
4336 .await
4337 .unwrap();
4338 }
4339
4340 async fn open_dataset(namespace: &DirectoryNamespace, table_name: &str) -> Dataset {
4341 let mut describe_request = DescribeTableRequest::new();
4342 describe_request.id = Some(vec![table_name.to_string()]);
4343 let table_uri = namespace
4344 .describe_table(describe_request)
4345 .await
4346 .unwrap()
4347 .location
4348 .expect("table location should exist");
4349 Dataset::open(&table_uri).await.unwrap()
4350 }
4351
4352 async fn create_scalar_index(
4353 namespace: &DirectoryNamespace,
4354 table_name: &str,
4355 index_name: &str,
4356 ) -> Option<String> {
4357 use lance_namespace::models::CreateTableIndexRequest;
4358
4359 let mut create_index_request =
4360 CreateTableIndexRequest::new("id".to_string(), "BTREE".to_string());
4361 create_index_request.id = Some(vec![table_name.to_string()]);
4362 create_index_request.name = Some(index_name.to_string());
4363 namespace
4364 .create_table_scalar_index(create_index_request)
4365 .await
4366 .unwrap()
4367 .transaction_id
4368 }
4369
4370 #[tokio::test]
4371 async fn test_create_table() {
4372 let (namespace, _temp_dir) = create_test_namespace().await;
4373
4374 let schema = create_test_schema();
4376 let ipc_data = create_test_ipc_data(&schema);
4377
4378 let mut request = CreateTableRequest::new();
4379 request.id = Some(vec!["test_table".to_string()]);
4380
4381 let response = namespace
4382 .create_table(request, bytes::Bytes::from(ipc_data))
4383 .await
4384 .unwrap();
4385
4386 assert!(response.location.is_some());
4387 assert!(response.location.unwrap().ends_with("test_table.lance"));
4388 assert_eq!(response.version, Some(1));
4389 }
4390
4391 #[tokio::test]
4392 async fn test_create_table_without_data() {
4393 let (namespace, _temp_dir) = create_test_namespace().await;
4394
4395 let mut request = CreateTableRequest::new();
4396 request.id = Some(vec!["test_table".to_string()]);
4397
4398 let result = namespace.create_table(request, bytes::Bytes::new()).await;
4399 assert!(result.is_err());
4400 assert!(
4401 result
4402 .unwrap_err()
4403 .to_string()
4404 .contains("Arrow IPC stream) is required")
4405 );
4406 }
4407
4408 #[tokio::test]
4409 async fn test_create_table_with_invalid_id() {
4410 let (namespace, _temp_dir) = create_test_namespace().await;
4411
4412 let schema = create_test_schema();
4414 let ipc_data = create_test_ipc_data(&schema);
4415
4416 let mut request = CreateTableRequest::new();
4418 request.id = Some(vec![]);
4419
4420 let result = namespace
4421 .create_table(request, bytes::Bytes::from(ipc_data.clone()))
4422 .await;
4423 assert!(result.is_err());
4424
4425 let mut create_ns_req = CreateNamespaceRequest::new();
4428 create_ns_req.id = Some(vec!["test_namespace".to_string()]);
4429 namespace.create_namespace(create_ns_req).await.unwrap();
4430
4431 let mut request = CreateTableRequest::new();
4433 request.id = Some(vec!["test_namespace".to_string(), "table".to_string()]);
4434
4435 let result = namespace
4436 .create_table(request, bytes::Bytes::from(ipc_data))
4437 .await;
4438 assert!(
4440 result.is_ok(),
4441 "Multi-level table IDs should work with manifest enabled"
4442 );
4443 }
4444
4445 #[tokio::test]
4446 async fn test_list_tables() {
4447 let (namespace, _temp_dir) = create_test_namespace().await;
4448
4449 let mut request = ListTablesRequest::new();
4451 request.id = Some(vec![]);
4452 let response = namespace.list_tables(request).await.unwrap();
4453 assert_eq!(response.tables.len(), 0);
4454
4455 let schema = create_test_schema();
4457 let ipc_data = create_test_ipc_data(&schema);
4458
4459 let mut create_request = CreateTableRequest::new();
4461 create_request.id = Some(vec!["table1".to_string()]);
4462 namespace
4463 .create_table(create_request, bytes::Bytes::from(ipc_data.clone()))
4464 .await
4465 .unwrap();
4466
4467 let mut create_request = CreateTableRequest::new();
4469 create_request.id = Some(vec!["table2".to_string()]);
4470 namespace
4471 .create_table(create_request, bytes::Bytes::from(ipc_data))
4472 .await
4473 .unwrap();
4474
4475 let mut request = ListTablesRequest::new();
4477 request.id = Some(vec![]);
4478 let response = namespace.list_tables(request).await.unwrap();
4479 let tables = response.tables;
4480 assert_eq!(tables.len(), 2);
4481 assert!(tables.contains(&"table1".to_string()));
4482 assert!(tables.contains(&"table2".to_string()));
4483 }
4484
4485 #[tokio::test]
4486 async fn test_list_tables_pagination() {
4487 let (namespace, _temp_dir) = create_test_namespace().await;
4488
4489 let schema = create_test_schema();
4490 let ipc_data = create_test_ipc_data(&schema);
4491
4492 for name in ["alpha", "bravo", "charlie"] {
4493 let mut req = CreateTableRequest::new();
4494 req.id = Some(vec![name.to_string()]);
4495 namespace
4496 .create_table(req, bytes::Bytes::from(ipc_data.clone()))
4497 .await
4498 .unwrap();
4499 }
4500
4501 let first_page = namespace
4503 .list_tables(ListTablesRequest {
4504 id: Some(vec![]),
4505 limit: Some(2),
4506 ..Default::default()
4507 })
4508 .await
4509 .unwrap();
4510
4511 assert_eq!(first_page.tables, vec!["alpha", "bravo"]);
4512 assert_eq!(first_page.page_token.as_deref(), Some("bravo"));
4513
4514 let second_page = namespace
4516 .list_tables(ListTablesRequest {
4517 id: Some(vec![]),
4518 limit: Some(2),
4519 page_token: first_page.page_token.clone(),
4520 ..Default::default()
4521 })
4522 .await
4523 .unwrap();
4524
4525 assert_eq!(second_page.tables, vec!["charlie"]);
4526 assert!(second_page.page_token.is_none());
4527 }
4528
4529 #[tokio::test]
4530 async fn test_list_tables_pagination_limit_zero() {
4531 let (namespace, _temp_dir) = create_test_namespace().await;
4532
4533 let schema = create_test_schema();
4534 let ipc_data = create_test_ipc_data(&schema);
4535
4536 let mut req = CreateTableRequest::new();
4537 req.id = Some(vec!["alpha".to_string()]);
4538 namespace
4539 .create_table(req, bytes::Bytes::from(ipc_data))
4540 .await
4541 .unwrap();
4542
4543 let response = namespace
4544 .list_tables(ListTablesRequest {
4545 id: Some(vec![]),
4546 limit: Some(0),
4547 ..Default::default()
4548 })
4549 .await
4550 .unwrap();
4551
4552 assert!(response.tables.is_empty());
4553 assert!(response.page_token.is_none());
4554 }
4555
4556 #[tokio::test]
4557 async fn test_list_tables_with_namespace_id() {
4558 let (namespace, _temp_dir) = create_test_namespace().await;
4559
4560 let mut create_ns_req = CreateNamespaceRequest::new();
4562 create_ns_req.id = Some(vec!["test_namespace".to_string()]);
4563 namespace.create_namespace(create_ns_req).await.unwrap();
4564
4565 let mut request = ListTablesRequest::new();
4567 request.id = Some(vec!["test_namespace".to_string()]);
4568
4569 let result = namespace.list_tables(request).await;
4570 assert!(
4572 result.is_ok(),
4573 "list_tables should work with child namespace when manifest is enabled"
4574 );
4575 let response = result.unwrap();
4576 assert_eq!(
4577 response.tables.len(),
4578 0,
4579 "Namespace should have no tables yet"
4580 );
4581 }
4582
4583 #[tokio::test]
4584 async fn test_create_scalar_index() {
4585 let (namespace, _temp_dir) = create_test_namespace().await;
4586 create_scalar_table(&namespace, "users").await;
4587
4588 let transaction_id = create_scalar_index(&namespace, "users", "users_id_idx").await;
4589 let dataset = open_dataset(&namespace, "users").await;
4590 let expected_transaction_id = dataset
4591 .read_transaction()
4592 .await
4593 .unwrap()
4594 .map(|transaction| transaction.uuid);
4595 assert_eq!(transaction_id, expected_transaction_id);
4596 let indices = dataset.load_indices().await.unwrap();
4597 assert!(indices.iter().any(|index| index.name == "users_id_idx"));
4598 }
4599
4600 #[tokio::test]
4601 async fn test_create_vector_index() {
4602 use lance_namespace::models::CreateTableIndexRequest;
4603
4604 let (namespace, _temp_dir) = create_test_namespace().await;
4605 create_vector_table(&namespace, "vectors").await;
4606
4607 let mut create_index_request =
4608 CreateTableIndexRequest::new("vector".to_string(), "IVF_FLAT".to_string());
4609 create_index_request.id = Some(vec!["vectors".to_string()]);
4610 create_index_request.name = Some("vector_idx".to_string());
4611 create_index_request.distance_type = Some("l2".to_string());
4612 let transaction_id = namespace
4613 .create_table_index(create_index_request)
4614 .await
4615 .unwrap()
4616 .transaction_id;
4617
4618 let dataset = open_dataset(&namespace, "vectors").await;
4619 let expected_transaction_id = dataset
4620 .read_transaction()
4621 .await
4622 .unwrap()
4623 .map(|transaction| transaction.uuid);
4624 assert_eq!(transaction_id, expected_transaction_id);
4625 let indices = dataset.load_indices().await.unwrap();
4626 assert!(indices.iter().any(|index| index.name == "vector_idx"));
4627 }
4628
4629 #[tokio::test]
4630 async fn test_list_table_indices() {
4631 use lance_namespace::models::ListTableIndicesRequest;
4632
4633 let (namespace, _temp_dir) = create_test_namespace().await;
4634 create_scalar_table(&namespace, "users").await;
4635 create_scalar_index(&namespace, "users", "a_idx").await;
4636 create_scalar_index(&namespace, "users", "b_idx").await;
4637 let transaction_id = create_scalar_index(&namespace, "users", "users_id_idx").await;
4638
4639 let response = namespace
4640 .list_table_indices(ListTableIndicesRequest {
4641 id: Some(vec!["users".to_string()]),
4642 ..Default::default()
4643 })
4644 .await
4645 .unwrap();
4646
4647 assert_eq!(response.indexes.len(), 3);
4648 assert_eq!(response.indexes[0].index_name, "a_idx");
4649 assert_eq!(response.indexes[1].index_name, "b_idx");
4650 assert_eq!(response.indexes[2].index_name, "users_id_idx");
4651 assert!(response.page_token.is_none());
4652 let users_id_idx = response
4653 .indexes
4654 .iter()
4655 .find(|index| index.index_name == "users_id_idx")
4656 .unwrap();
4657 assert_eq!(users_id_idx.columns, vec!["id"]);
4658 assert_eq!(users_id_idx.status, "SUCCEEDED");
4659
4660 let dataset = open_dataset(&namespace, "users").await;
4661 let expected_transaction_id = dataset
4662 .read_transaction()
4663 .await
4664 .unwrap()
4665 .map(|transaction| transaction.uuid);
4666 assert_eq!(transaction_id, expected_transaction_id);
4667 let indices = dataset.load_indices().await.unwrap();
4668 assert_eq!(
4669 indices
4670 .iter()
4671 .filter(|index| index.name == "users_id_idx")
4672 .count(),
4673 1
4674 );
4675
4676 let first_page = namespace
4677 .list_table_indices(ListTableIndicesRequest {
4678 id: Some(vec!["users".to_string()]),
4679 limit: Some(2),
4680 ..Default::default()
4681 })
4682 .await
4683 .unwrap();
4684
4685 assert_eq!(first_page.indexes.len(), 2);
4686 assert_eq!(first_page.indexes[0].index_name, "a_idx");
4687 assert_eq!(first_page.indexes[1].index_name, "b_idx");
4688 assert_eq!(first_page.page_token.as_deref(), Some("b_idx"));
4689
4690 let second_page = namespace
4691 .list_table_indices(ListTableIndicesRequest {
4692 id: Some(vec!["users".to_string()]),
4693 page_token: first_page.page_token.clone(),
4694 limit: Some(2),
4695 ..Default::default()
4696 })
4697 .await
4698 .unwrap();
4699
4700 assert_eq!(second_page.indexes.len(), 1);
4701 assert_eq!(second_page.indexes[0].index_name, "users_id_idx");
4702 assert!(second_page.page_token.is_none());
4703 }
4704
4705 #[tokio::test]
4706 async fn test_describe_table_index_stats() {
4707 use lance_namespace::models::DescribeTableIndexStatsRequest;
4708
4709 let (namespace, _temp_dir) = create_test_namespace().await;
4710 create_scalar_table(&namespace, "users").await;
4711 let transaction_id = create_scalar_index(&namespace, "users", "users_id_idx").await;
4712
4713 let response = namespace
4714 .describe_table_index_stats(DescribeTableIndexStatsRequest {
4715 id: Some(vec!["users".to_string()]),
4716 index_name: Some("users_id_idx".to_string()),
4717 ..Default::default()
4718 })
4719 .await
4720 .unwrap();
4721 assert_eq!(response.index_type, Some("BTree".to_string()));
4722 assert_eq!(response.num_indices, Some(1));
4723 assert_eq!(response.num_indexed_rows, Some(3));
4724 assert_eq!(response.num_unindexed_rows, Some(0));
4725
4726 let dataset = open_dataset(&namespace, "users").await;
4727 let expected_transaction_id = dataset
4728 .read_transaction()
4729 .await
4730 .unwrap()
4731 .map(|transaction| transaction.uuid);
4732 assert_eq!(transaction_id, expected_transaction_id);
4733 let stats: serde_json::Value =
4734 serde_json::from_str(&dataset.index_statistics("users_id_idx").await.unwrap()).unwrap();
4735 assert_eq!(stats["index_type"], "BTree");
4736 assert_eq!(stats["num_indices"], 1);
4737 assert_eq!(stats["num_indexed_rows"], 3);
4738 assert_eq!(stats["num_unindexed_rows"], 0);
4739 }
4740
4741 #[tokio::test]
4742 async fn test_describe_transaction() {
4743 use lance_namespace::models::DescribeTransactionRequest;
4744
4745 let (namespace, _temp_dir) = create_test_namespace().await;
4746 create_scalar_table(&namespace, "users").await;
4747 let transaction_id = create_scalar_index(&namespace, "users", "users_id_idx").await;
4748 let dataset = open_dataset(&namespace, "users").await;
4749 let latest_transaction = dataset.read_transaction().await.unwrap();
4750 assert_eq!(
4751 transaction_id,
4752 latest_transaction
4753 .as_ref()
4754 .map(|transaction| transaction.uuid.clone())
4755 );
4756
4757 if let Some(transaction_id) = transaction_id {
4758 let response = namespace
4759 .describe_transaction(DescribeTransactionRequest {
4760 id: Some(vec!["users".to_string(), transaction_id.clone()]),
4761 ..Default::default()
4762 })
4763 .await
4764 .unwrap();
4765 assert_eq!(response.status, "SUCCEEDED");
4766 assert_eq!(
4767 response
4768 .properties
4769 .as_ref()
4770 .and_then(|props| props.get("operation")),
4771 Some(&"CreateIndex".to_string())
4772 );
4773 assert_eq!(
4774 response
4775 .properties
4776 .as_ref()
4777 .and_then(|props| props.get("uuid")),
4778 Some(&transaction_id)
4779 );
4780 } else {
4781 assert!(latest_transaction.is_none());
4782 }
4783 }
4784
4785 #[tokio::test]
4786 async fn test_drop_table_index() {
4787 use lance_namespace::models::{DropTableIndexRequest, ListTableIndicesRequest};
4788
4789 let (namespace, _temp_dir) = create_test_namespace().await;
4790 create_scalar_table(&namespace, "users").await;
4791 let create_transaction_id = create_scalar_index(&namespace, "users", "users_id_idx").await;
4792
4793 let drop_transaction_id = namespace
4794 .drop_table_index(DropTableIndexRequest {
4795 id: Some(vec!["users".to_string()]),
4796 index_name: Some("users_id_idx".to_string()),
4797 ..Default::default()
4798 })
4799 .await
4800 .unwrap()
4801 .transaction_id;
4802
4803 let dataset = open_dataset(&namespace, "users").await;
4804 let previous_dataset = dataset
4805 .checkout_version(dataset.version().version - 1)
4806 .await
4807 .unwrap();
4808 let previous_transaction_id = previous_dataset
4809 .read_transaction()
4810 .await
4811 .unwrap()
4812 .map(|transaction| transaction.uuid);
4813 assert_eq!(create_transaction_id, previous_transaction_id);
4814 let expected_drop_transaction_id = dataset
4815 .read_transaction()
4816 .await
4817 .unwrap()
4818 .map(|transaction| transaction.uuid);
4819 assert_eq!(drop_transaction_id, expected_drop_transaction_id);
4820 let indices = dataset.load_indices().await.unwrap();
4821 assert!(!indices.iter().any(|index| index.name == "users_id_idx"));
4822
4823 let list_response = namespace
4824 .list_table_indices(ListTableIndicesRequest {
4825 id: Some(vec!["users".to_string()]),
4826 ..Default::default()
4827 })
4828 .await
4829 .unwrap();
4830 assert!(list_response.indexes.is_empty());
4831 }
4832
4833 #[tokio::test]
4834 async fn test_describe_table() {
4835 let (namespace, _temp_dir) = create_test_namespace().await;
4836
4837 let schema = create_test_schema();
4839 let ipc_data = create_test_ipc_data(&schema);
4840
4841 let mut create_request = CreateTableRequest::new();
4842 create_request.id = Some(vec!["test_table".to_string()]);
4843 namespace
4844 .create_table(create_request, bytes::Bytes::from(ipc_data))
4845 .await
4846 .unwrap();
4847
4848 let mut request = DescribeTableRequest::new();
4850 request.id = Some(vec!["test_table".to_string()]);
4851 let response = namespace.describe_table(request).await.unwrap();
4852
4853 assert!(response.location.is_some());
4854 assert!(response.location.unwrap().ends_with("test_table.lance"));
4855 }
4856
4857 #[tokio::test]
4858 async fn test_describe_nonexistent_table() {
4859 let (namespace, _temp_dir) = create_test_namespace().await;
4860
4861 let mut request = DescribeTableRequest::new();
4862 request.id = Some(vec!["nonexistent".to_string()]);
4863
4864 let result = namespace.describe_table(request).await;
4865 assert!(result.is_err());
4866 assert!(result.unwrap_err().to_string().contains("Table not found"));
4867 }
4868
4869 #[tokio::test]
4870 async fn test_table_exists() {
4871 let (namespace, _temp_dir) = create_test_namespace().await;
4872
4873 let schema = create_test_schema();
4875 let ipc_data = create_test_ipc_data(&schema);
4876
4877 let mut create_request = CreateTableRequest::new();
4878 create_request.id = Some(vec!["existing_table".to_string()]);
4879 namespace
4880 .create_table(create_request, bytes::Bytes::from(ipc_data))
4881 .await
4882 .unwrap();
4883
4884 let mut request = TableExistsRequest::new();
4886 request.id = Some(vec!["existing_table".to_string()]);
4887 let result = namespace.table_exists(request).await;
4888 assert!(result.is_ok());
4889
4890 let mut request = TableExistsRequest::new();
4892 request.id = Some(vec!["nonexistent".to_string()]);
4893 let result = namespace.table_exists(request).await;
4894 assert!(result.is_err());
4895 assert!(result.unwrap_err().to_string().contains("Table not found"));
4896 }
4897
4898 #[tokio::test]
4899 async fn test_drop_table() {
4900 let (namespace, _temp_dir) = create_test_namespace().await;
4901
4902 let schema = create_test_schema();
4904 let ipc_data = create_test_ipc_data(&schema);
4905
4906 let mut create_request = CreateTableRequest::new();
4907 create_request.id = Some(vec!["table_to_drop".to_string()]);
4908 namespace
4909 .create_table(create_request, bytes::Bytes::from(ipc_data))
4910 .await
4911 .unwrap();
4912
4913 let mut exists_request = TableExistsRequest::new();
4915 exists_request.id = Some(vec!["table_to_drop".to_string()]);
4916 assert!(namespace.table_exists(exists_request.clone()).await.is_ok());
4917
4918 let mut drop_request = DropTableRequest::new();
4920 drop_request.id = Some(vec!["table_to_drop".to_string()]);
4921 let response = namespace.drop_table(drop_request).await.unwrap();
4922 assert!(response.location.is_some());
4923
4924 assert!(namespace.table_exists(exists_request).await.is_err());
4926 }
4927
4928 #[tokio::test]
4929 async fn test_drop_nonexistent_table() {
4930 let (namespace, _temp_dir) = create_test_namespace().await;
4931
4932 let mut request = DropTableRequest::new();
4933 request.id = Some(vec!["nonexistent".to_string()]);
4934
4935 let result = namespace.drop_table(request).await;
4937 let _ = result;
4940 }
4941
4942 #[tokio::test]
4943 async fn test_root_namespace_operations() {
4944 let (namespace, _temp_dir) = create_test_namespace().await;
4945
4946 let mut request = ListNamespacesRequest::new();
4948 request.id = Some(vec![]);
4949 let result = namespace.list_namespaces(request).await;
4950 assert!(result.is_ok());
4951 assert_eq!(result.unwrap().namespaces.len(), 0);
4952
4953 let mut request = DescribeNamespaceRequest::new();
4955 request.id = Some(vec![]);
4956 let result = namespace.describe_namespace(request).await;
4957 assert!(result.is_ok());
4958
4959 let mut request = NamespaceExistsRequest::new();
4961 request.id = Some(vec![]);
4962 let result = namespace.namespace_exists(request).await;
4963 assert!(result.is_ok());
4964
4965 let mut request = CreateNamespaceRequest::new();
4967 request.id = Some(vec![]);
4968 let result = namespace.create_namespace(request).await;
4969 assert!(result.is_err());
4970 assert!(result.unwrap_err().to_string().contains("already exists"));
4971
4972 let mut request = DropNamespaceRequest::new();
4974 request.id = Some(vec![]);
4975 let result = namespace.drop_namespace(request).await;
4976 assert!(result.is_err());
4977 assert!(
4978 result
4979 .unwrap_err()
4980 .to_string()
4981 .contains("cannot be dropped")
4982 );
4983 }
4984
4985 #[tokio::test]
4986 async fn test_non_root_namespace_operations() {
4987 let (namespace, _temp_dir) = create_test_namespace().await;
4988
4989 let mut request = CreateNamespaceRequest::new();
4992 request.id = Some(vec!["child".to_string()]);
4993 let result = namespace.create_namespace(request).await;
4994 assert!(
4995 result.is_ok(),
4996 "Child namespace creation should succeed with manifest enabled"
4997 );
4998
4999 let mut request = NamespaceExistsRequest::new();
5001 request.id = Some(vec!["child".to_string()]);
5002 let result = namespace.namespace_exists(request).await;
5003 assert!(
5004 result.is_ok(),
5005 "Child namespace should exist after creation"
5006 );
5007
5008 let mut request = DropNamespaceRequest::new();
5010 request.id = Some(vec!["child".to_string()]);
5011 let result = namespace.drop_namespace(request).await;
5012 assert!(
5013 result.is_ok(),
5014 "Child namespace drop should succeed with manifest enabled"
5015 );
5016
5017 let mut request = NamespaceExistsRequest::new();
5019 request.id = Some(vec!["child".to_string()]);
5020 let result = namespace.namespace_exists(request).await;
5021 assert!(
5022 result.is_err(),
5023 "Child namespace should not exist after drop"
5024 );
5025 }
5026
5027 #[tokio::test]
5028 async fn test_config_custom_root() {
5029 let temp_dir = TempStdDir::default();
5030 let custom_path = temp_dir.join("custom");
5031 std::fs::create_dir(&custom_path).unwrap();
5032
5033 let namespace = DirectoryNamespaceBuilder::new(custom_path.to_string_lossy().to_string())
5034 .build()
5035 .await
5036 .unwrap();
5037
5038 let schema = create_test_schema();
5040 let ipc_data = create_test_ipc_data(&schema);
5041
5042 let mut request = CreateTableRequest::new();
5044 request.id = Some(vec!["test_table".to_string()]);
5045
5046 let response = namespace
5047 .create_table(request, bytes::Bytes::from(ipc_data))
5048 .await
5049 .unwrap();
5050
5051 assert!(response.location.unwrap().contains("custom"));
5052 }
5053
5054 #[tokio::test]
5055 async fn test_config_storage_options() {
5056 let temp_dir = TempStdDir::default();
5057
5058 let namespace = DirectoryNamespaceBuilder::new(temp_dir.to_str().unwrap())
5059 .storage_option("option1", "value1")
5060 .storage_option("option2", "value2")
5061 .build()
5062 .await
5063 .unwrap();
5064
5065 let schema = create_test_schema();
5067 let ipc_data = create_test_ipc_data(&schema);
5068
5069 let mut request = CreateTableRequest::new();
5071 request.id = Some(vec!["test_table".to_string()]);
5072
5073 let response = namespace
5074 .create_table(request, bytes::Bytes::from(ipc_data))
5075 .await
5076 .unwrap();
5077
5078 let storage_options = response.storage_options.unwrap();
5079 assert_eq!(storage_options.get("option1"), Some(&"value1".to_string()));
5080 assert_eq!(storage_options.get("option2"), Some(&"value2".to_string()));
5081 }
5082
5083 #[tokio::test]
5087 async fn test_no_storage_options_without_vendor() {
5088 use lance_namespace::models::DeclareTableRequest;
5089
5090 let temp_dir = TempStdDir::default();
5091
5092 let namespace = DirectoryNamespaceBuilder::new(temp_dir.to_str().unwrap())
5094 .manifest_enabled(false)
5095 .storage_option("aws_access_key_id", "AKID")
5096 .storage_option("aws_secret_access_key", "SECRET")
5097 .storage_option("region", "us-east-1")
5098 .build()
5099 .await
5100 .unwrap();
5101
5102 let schema = create_test_schema();
5103 let ipc_data = create_test_ipc_data(&schema);
5104
5105 let mut create_req = CreateTableRequest::new();
5107 create_req.id = Some(vec!["t1".to_string()]);
5108 namespace
5109 .create_table(create_req, bytes::Bytes::from(ipc_data))
5110 .await
5111 .unwrap();
5112
5113 let mut desc_req = DescribeTableRequest::new();
5115 desc_req.id = Some(vec!["t1".to_string()]);
5116 let resp = namespace.describe_table(desc_req).await.unwrap();
5117 assert!(resp.storage_options.is_none());
5118
5119 let mut decl_req = DeclareTableRequest::new();
5121 decl_req.id = Some(vec!["t2".to_string()]);
5122 let resp = namespace.declare_table(decl_req).await.unwrap();
5123 assert!(resp.storage_options.is_none());
5124 }
5125
5126 #[tokio::test]
5128 async fn test_no_storage_options_without_vendor_manifest() {
5129 let temp_dir = TempStdDir::default();
5130
5131 let namespace = DirectoryNamespaceBuilder::new(temp_dir.to_str().unwrap())
5132 .storage_option("aws_access_key_id", "AKID")
5133 .storage_option("aws_secret_access_key", "SECRET")
5134 .storage_option("region", "us-east-1")
5135 .build()
5136 .await
5137 .unwrap();
5138
5139 let schema = create_test_schema();
5140 let ipc_data = create_test_ipc_data(&schema);
5141
5142 let mut create_req = CreateTableRequest::new();
5143 create_req.id = Some(vec!["t1".to_string()]);
5144 namespace
5145 .create_table(create_req, bytes::Bytes::from(ipc_data))
5146 .await
5147 .unwrap();
5148
5149 let mut desc_req = DescribeTableRequest::new();
5151 desc_req.id = Some(vec!["t1".to_string()]);
5152 let resp = namespace.describe_table(desc_req).await.unwrap();
5153 assert!(resp.storage_options.is_none());
5154 }
5155
5156 #[tokio::test]
5157 async fn test_from_properties_manifest_enabled() {
5158 let temp_dir = TempStdDir::default();
5159
5160 let mut properties = HashMap::new();
5161 properties.insert("root".to_string(), temp_dir.to_str().unwrap().to_string());
5162 properties.insert("manifest_enabled".to_string(), "true".to_string());
5163 properties.insert("dir_listing_enabled".to_string(), "false".to_string());
5164
5165 let builder = DirectoryNamespaceBuilder::from_properties(properties, None).unwrap();
5166 assert!(builder.manifest_enabled);
5167 assert!(!builder.dir_listing_enabled);
5168
5169 let namespace = builder.build().await.unwrap();
5170
5171 let schema = create_test_schema();
5173 let ipc_data = create_test_ipc_data(&schema);
5174
5175 let mut request = CreateTableRequest::new();
5177 request.id = Some(vec!["test_table".to_string()]);
5178
5179 let response = namespace
5180 .create_table(request, bytes::Bytes::from(ipc_data))
5181 .await
5182 .unwrap();
5183
5184 assert!(response.location.is_some());
5185 }
5186
5187 #[tokio::test]
5188 async fn test_from_properties_dir_listing_enabled() {
5189 let temp_dir = TempStdDir::default();
5190
5191 let mut properties = HashMap::new();
5192 properties.insert("root".to_string(), temp_dir.to_str().unwrap().to_string());
5193 properties.insert("manifest_enabled".to_string(), "false".to_string());
5194 properties.insert("dir_listing_enabled".to_string(), "true".to_string());
5195
5196 let builder = DirectoryNamespaceBuilder::from_properties(properties, None).unwrap();
5197 assert!(!builder.manifest_enabled);
5198 assert!(builder.dir_listing_enabled);
5199
5200 let namespace = builder.build().await.unwrap();
5201
5202 let schema = create_test_schema();
5204 let ipc_data = create_test_ipc_data(&schema);
5205
5206 let mut request = CreateTableRequest::new();
5208 request.id = Some(vec!["test_table".to_string()]);
5209
5210 let response = namespace
5211 .create_table(request, bytes::Bytes::from(ipc_data))
5212 .await
5213 .unwrap();
5214
5215 assert!(response.location.is_some());
5216 }
5217
5218 #[tokio::test]
5219 async fn test_from_properties_defaults() {
5220 let temp_dir = TempStdDir::default();
5221
5222 let mut properties = HashMap::new();
5223 properties.insert("root".to_string(), temp_dir.to_str().unwrap().to_string());
5224
5225 let builder = DirectoryNamespaceBuilder::from_properties(properties, None).unwrap();
5226 assert!(builder.manifest_enabled);
5228 assert!(builder.dir_listing_enabled);
5229 }
5230
5231 #[tokio::test]
5232 async fn test_from_properties_with_storage_options() {
5233 let temp_dir = TempStdDir::default();
5234
5235 let mut properties = HashMap::new();
5236 properties.insert("root".to_string(), temp_dir.to_str().unwrap().to_string());
5237 properties.insert("manifest_enabled".to_string(), "true".to_string());
5238 properties.insert("storage.region".to_string(), "us-west-2".to_string());
5239 properties.insert("storage.bucket".to_string(), "my-bucket".to_string());
5240
5241 let builder = DirectoryNamespaceBuilder::from_properties(properties, None).unwrap();
5242 assert!(builder.manifest_enabled);
5243 assert!(builder.storage_options.is_some());
5244
5245 let storage_options = builder.storage_options.unwrap();
5246 assert_eq!(
5247 storage_options.get("region"),
5248 Some(&"us-west-2".to_string())
5249 );
5250 assert_eq!(
5251 storage_options.get("bucket"),
5252 Some(&"my-bucket".to_string())
5253 );
5254 }
5255
5256 #[tokio::test]
5257 async fn test_various_arrow_types() {
5258 let (namespace, _temp_dir) = create_test_namespace().await;
5259
5260 let fields = vec![
5262 JsonArrowField {
5263 name: "bool_col".to_string(),
5264 r#type: Box::new(JsonArrowDataType::new("bool".to_string())),
5265 nullable: true,
5266 metadata: None,
5267 },
5268 JsonArrowField {
5269 name: "int8_col".to_string(),
5270 r#type: Box::new(JsonArrowDataType::new("int8".to_string())),
5271 nullable: true,
5272 metadata: None,
5273 },
5274 JsonArrowField {
5275 name: "float64_col".to_string(),
5276 r#type: Box::new(JsonArrowDataType::new("float64".to_string())),
5277 nullable: true,
5278 metadata: None,
5279 },
5280 JsonArrowField {
5281 name: "binary_col".to_string(),
5282 r#type: Box::new(JsonArrowDataType::new("binary".to_string())),
5283 nullable: true,
5284 metadata: None,
5285 },
5286 ];
5287
5288 let schema = JsonArrowSchema {
5289 fields,
5290 metadata: None,
5291 };
5292
5293 let ipc_data = create_test_ipc_data(&schema);
5295
5296 let mut request = CreateTableRequest::new();
5297 request.id = Some(vec!["complex_table".to_string()]);
5298
5299 let response = namespace
5300 .create_table(request, bytes::Bytes::from(ipc_data))
5301 .await
5302 .unwrap();
5303
5304 assert!(response.location.is_some());
5305 }
5306
5307 #[tokio::test]
5308 async fn test_connect_dir() {
5309 let temp_dir = TempStdDir::default();
5310
5311 let namespace = DirectoryNamespaceBuilder::new(temp_dir.to_str().unwrap())
5312 .build()
5313 .await
5314 .unwrap();
5315
5316 let mut request = ListTablesRequest::new();
5318 request.id = Some(vec![]);
5319 let response = namespace.list_tables(request).await.unwrap();
5320 assert_eq!(response.tables.len(), 0);
5321 }
5322
5323 #[tokio::test]
5324 async fn test_create_table_with_ipc_data() {
5325 use arrow::array::{Int32Array, StringArray};
5326 use arrow::ipc::writer::StreamWriter;
5327
5328 let (namespace, _temp_dir) = create_test_namespace().await;
5329
5330 let schema = create_test_schema();
5332
5333 let arrow_schema = convert_json_arrow_schema(&schema).unwrap();
5335 let arrow_schema = Arc::new(arrow_schema);
5336
5337 let id_array = Int32Array::from(vec![1, 2, 3]);
5339 let name_array = StringArray::from(vec!["Alice", "Bob", "Charlie"]);
5340 let batch = arrow::record_batch::RecordBatch::try_new(
5341 arrow_schema.clone(),
5342 vec![Arc::new(id_array), Arc::new(name_array)],
5343 )
5344 .unwrap();
5345
5346 let mut buffer = Vec::new();
5348 {
5349 let mut writer = StreamWriter::try_new(&mut buffer, &arrow_schema).unwrap();
5350 writer.write(&batch).unwrap();
5351 writer.finish().unwrap();
5352 }
5353
5354 let mut request = CreateTableRequest::new();
5356 request.id = Some(vec!["test_table_with_data".to_string()]);
5357
5358 let response = namespace
5359 .create_table(request, Bytes::from(buffer))
5360 .await
5361 .unwrap();
5362
5363 assert_eq!(response.version, Some(1));
5364 assert!(
5365 response
5366 .location
5367 .unwrap()
5368 .contains("test_table_with_data.lance")
5369 );
5370
5371 let mut exists_request = TableExistsRequest::new();
5373 exists_request.id = Some(vec!["test_table_with_data".to_string()]);
5374 namespace.table_exists(exists_request).await.unwrap();
5375 }
5376
5377 #[tokio::test]
5378 async fn test_child_namespace_create_and_list() {
5379 let (namespace, _temp_dir) = create_test_namespace().await;
5380
5381 for i in 1..=3 {
5383 let mut create_req = CreateNamespaceRequest::new();
5384 create_req.id = Some(vec![format!("ns{}", i)]);
5385 let result = namespace.create_namespace(create_req).await;
5386 assert!(result.is_ok(), "Failed to create child namespace ns{}", i);
5387 }
5388
5389 let list_req = ListNamespacesRequest {
5391 id: Some(vec![]),
5392 ..Default::default()
5393 };
5394 let result = namespace.list_namespaces(list_req).await;
5395 assert!(result.is_ok());
5396 let namespaces = result.unwrap().namespaces;
5397 assert_eq!(namespaces.len(), 3);
5398 assert!(namespaces.contains(&"ns1".to_string()));
5399 assert!(namespaces.contains(&"ns2".to_string()));
5400 assert!(namespaces.contains(&"ns3".to_string()));
5401 }
5402
5403 #[tokio::test]
5404 async fn test_nested_namespace_hierarchy() {
5405 let (namespace, _temp_dir) = create_test_namespace().await;
5406
5407 let mut create_req = CreateNamespaceRequest::new();
5409 create_req.id = Some(vec!["parent".to_string()]);
5410 namespace.create_namespace(create_req).await.unwrap();
5411
5412 let mut create_req = CreateNamespaceRequest::new();
5414 create_req.id = Some(vec!["parent".to_string(), "child1".to_string()]);
5415 namespace.create_namespace(create_req).await.unwrap();
5416
5417 let mut create_req = CreateNamespaceRequest::new();
5418 create_req.id = Some(vec!["parent".to_string(), "child2".to_string()]);
5419 namespace.create_namespace(create_req).await.unwrap();
5420
5421 let list_req = ListNamespacesRequest {
5423 id: Some(vec!["parent".to_string()]),
5424 ..Default::default()
5425 };
5426 let result = namespace.list_namespaces(list_req).await;
5427 assert!(result.is_ok());
5428 let children = result.unwrap().namespaces;
5429 assert_eq!(children.len(), 2);
5430 assert!(children.contains(&"child1".to_string()));
5431 assert!(children.contains(&"child2".to_string()));
5432
5433 let list_req = ListNamespacesRequest {
5435 id: Some(vec![]),
5436 ..Default::default()
5437 };
5438 let result = namespace.list_namespaces(list_req).await;
5439 assert!(result.is_ok());
5440 let root_namespaces = result.unwrap().namespaces;
5441 assert_eq!(root_namespaces.len(), 1);
5442 assert_eq!(root_namespaces[0], "parent");
5443 }
5444
5445 #[tokio::test]
5446 async fn test_table_in_child_namespace() {
5447 let (namespace, _temp_dir) = create_test_namespace().await;
5448
5449 let mut create_ns_req = CreateNamespaceRequest::new();
5451 create_ns_req.id = Some(vec!["test_ns".to_string()]);
5452 namespace.create_namespace(create_ns_req).await.unwrap();
5453
5454 let schema = create_test_schema();
5456 let ipc_data = create_test_ipc_data(&schema);
5457 let mut create_table_req = CreateTableRequest::new();
5458 create_table_req.id = Some(vec!["test_ns".to_string(), "table1".to_string()]);
5459 let result = namespace
5460 .create_table(create_table_req, bytes::Bytes::from(ipc_data))
5461 .await;
5462 assert!(result.is_ok(), "Failed to create table in child namespace");
5463
5464 let list_req = ListTablesRequest {
5466 id: Some(vec!["test_ns".to_string()]),
5467 ..Default::default()
5468 };
5469 let result = namespace.list_tables(list_req).await;
5470 assert!(result.is_ok());
5471 let tables = result.unwrap().tables;
5472 assert_eq!(tables.len(), 1);
5473 assert_eq!(tables[0], "table1");
5474
5475 let mut exists_req = TableExistsRequest::new();
5477 exists_req.id = Some(vec!["test_ns".to_string(), "table1".to_string()]);
5478 let result = namespace.table_exists(exists_req).await;
5479 assert!(result.is_ok());
5480
5481 let mut describe_req = DescribeTableRequest::new();
5483 describe_req.id = Some(vec!["test_ns".to_string(), "table1".to_string()]);
5484 let result = namespace.describe_table(describe_req).await;
5485 assert!(result.is_ok());
5486 let response = result.unwrap();
5487 assert!(response.location.is_some());
5488 }
5489
5490 #[tokio::test]
5491 async fn test_multiple_tables_in_child_namespace() {
5492 let (namespace, _temp_dir) = create_test_namespace().await;
5493
5494 let mut create_ns_req = CreateNamespaceRequest::new();
5496 create_ns_req.id = Some(vec!["test_ns".to_string()]);
5497 namespace.create_namespace(create_ns_req).await.unwrap();
5498
5499 let schema = create_test_schema();
5501 let ipc_data = create_test_ipc_data(&schema);
5502 for i in 1..=3 {
5503 let mut create_table_req = CreateTableRequest::new();
5504 create_table_req.id = Some(vec!["test_ns".to_string(), format!("table{}", i)]);
5505 namespace
5506 .create_table(create_table_req, bytes::Bytes::from(ipc_data.clone()))
5507 .await
5508 .unwrap();
5509 }
5510
5511 let list_req = ListTablesRequest {
5513 id: Some(vec!["test_ns".to_string()]),
5514 ..Default::default()
5515 };
5516 let result = namespace.list_tables(list_req).await;
5517 assert!(result.is_ok());
5518 let tables = result.unwrap().tables;
5519 assert_eq!(tables.len(), 3);
5520 assert!(tables.contains(&"table1".to_string()));
5521 assert!(tables.contains(&"table2".to_string()));
5522 assert!(tables.contains(&"table3".to_string()));
5523 }
5524
5525 #[tokio::test]
5526 async fn test_drop_table_in_child_namespace() {
5527 let (namespace, _temp_dir) = create_test_namespace().await;
5528
5529 let mut create_ns_req = CreateNamespaceRequest::new();
5531 create_ns_req.id = Some(vec!["test_ns".to_string()]);
5532 namespace.create_namespace(create_ns_req).await.unwrap();
5533
5534 let schema = create_test_schema();
5536 let ipc_data = create_test_ipc_data(&schema);
5537 let mut create_table_req = CreateTableRequest::new();
5538 create_table_req.id = Some(vec!["test_ns".to_string(), "table1".to_string()]);
5539 namespace
5540 .create_table(create_table_req, bytes::Bytes::from(ipc_data))
5541 .await
5542 .unwrap();
5543
5544 let mut drop_req = DropTableRequest::new();
5546 drop_req.id = Some(vec!["test_ns".to_string(), "table1".to_string()]);
5547 let result = namespace.drop_table(drop_req).await;
5548 assert!(result.is_ok(), "Failed to drop table in child namespace");
5549
5550 let mut exists_req = TableExistsRequest::new();
5552 exists_req.id = Some(vec!["test_ns".to_string(), "table1".to_string()]);
5553 let result = namespace.table_exists(exists_req).await;
5554 assert!(result.is_err());
5555 }
5556
5557 #[tokio::test]
5558 async fn test_deeply_nested_namespace() {
5559 let (namespace, _temp_dir) = create_test_namespace().await;
5560
5561 let mut create_req = CreateNamespaceRequest::new();
5563 create_req.id = Some(vec!["level1".to_string()]);
5564 namespace.create_namespace(create_req).await.unwrap();
5565
5566 let mut create_req = CreateNamespaceRequest::new();
5567 create_req.id = Some(vec!["level1".to_string(), "level2".to_string()]);
5568 namespace.create_namespace(create_req).await.unwrap();
5569
5570 let mut create_req = CreateNamespaceRequest::new();
5571 create_req.id = Some(vec![
5572 "level1".to_string(),
5573 "level2".to_string(),
5574 "level3".to_string(),
5575 ]);
5576 namespace.create_namespace(create_req).await.unwrap();
5577
5578 let schema = create_test_schema();
5580 let ipc_data = create_test_ipc_data(&schema);
5581 let mut create_table_req = CreateTableRequest::new();
5582 create_table_req.id = Some(vec![
5583 "level1".to_string(),
5584 "level2".to_string(),
5585 "level3".to_string(),
5586 "table1".to_string(),
5587 ]);
5588 let result = namespace
5589 .create_table(create_table_req, bytes::Bytes::from(ipc_data))
5590 .await;
5591 assert!(
5592 result.is_ok(),
5593 "Failed to create table in deeply nested namespace"
5594 );
5595
5596 let mut exists_req = TableExistsRequest::new();
5598 exists_req.id = Some(vec![
5599 "level1".to_string(),
5600 "level2".to_string(),
5601 "level3".to_string(),
5602 "table1".to_string(),
5603 ]);
5604 let result = namespace.table_exists(exists_req).await;
5605 assert!(result.is_ok());
5606 }
5607
5608 #[tokio::test]
5609 async fn test_namespace_with_properties() {
5610 let (namespace, _temp_dir) = create_test_namespace().await;
5611
5612 let mut properties = HashMap::new();
5614 properties.insert("owner".to_string(), "test_user".to_string());
5615 properties.insert("description".to_string(), "Test namespace".to_string());
5616
5617 let mut create_req = CreateNamespaceRequest::new();
5618 create_req.id = Some(vec!["test_ns".to_string()]);
5619 create_req.properties = Some(properties.clone());
5620 namespace.create_namespace(create_req).await.unwrap();
5621
5622 let describe_req = DescribeNamespaceRequest {
5624 id: Some(vec!["test_ns".to_string()]),
5625 ..Default::default()
5626 };
5627 let result = namespace.describe_namespace(describe_req).await;
5628 assert!(result.is_ok());
5629 let response = result.unwrap();
5630 assert!(response.properties.is_some());
5631 let props = response.properties.unwrap();
5632 assert_eq!(props.get("owner"), Some(&"test_user".to_string()));
5633 assert_eq!(
5634 props.get("description"),
5635 Some(&"Test namespace".to_string())
5636 );
5637 }
5638
5639 #[tokio::test]
5640 async fn test_cannot_drop_namespace_with_tables() {
5641 let (namespace, _temp_dir) = create_test_namespace().await;
5642
5643 let mut create_ns_req = CreateNamespaceRequest::new();
5645 create_ns_req.id = Some(vec!["test_ns".to_string()]);
5646 namespace.create_namespace(create_ns_req).await.unwrap();
5647
5648 let schema = create_test_schema();
5650 let ipc_data = create_test_ipc_data(&schema);
5651 let mut create_table_req = CreateTableRequest::new();
5652 create_table_req.id = Some(vec!["test_ns".to_string(), "table1".to_string()]);
5653 namespace
5654 .create_table(create_table_req, bytes::Bytes::from(ipc_data))
5655 .await
5656 .unwrap();
5657
5658 let mut drop_req = DropNamespaceRequest::new();
5660 drop_req.id = Some(vec!["test_ns".to_string()]);
5661 let result = namespace.drop_namespace(drop_req).await;
5662 assert!(
5663 result.is_err(),
5664 "Should not be able to drop namespace with tables"
5665 );
5666 }
5667
5668 #[tokio::test]
5669 async fn test_isolation_between_namespaces() {
5670 let (namespace, _temp_dir) = create_test_namespace().await;
5671
5672 let mut create_req = CreateNamespaceRequest::new();
5674 create_req.id = Some(vec!["ns1".to_string()]);
5675 namespace.create_namespace(create_req).await.unwrap();
5676
5677 let mut create_req = CreateNamespaceRequest::new();
5678 create_req.id = Some(vec!["ns2".to_string()]);
5679 namespace.create_namespace(create_req).await.unwrap();
5680
5681 let schema = create_test_schema();
5683 let ipc_data = create_test_ipc_data(&schema);
5684
5685 let mut create_table_req = CreateTableRequest::new();
5686 create_table_req.id = Some(vec!["ns1".to_string(), "table1".to_string()]);
5687 namespace
5688 .create_table(create_table_req, bytes::Bytes::from(ipc_data.clone()))
5689 .await
5690 .unwrap();
5691
5692 let mut create_table_req = CreateTableRequest::new();
5693 create_table_req.id = Some(vec!["ns2".to_string(), "table1".to_string()]);
5694 namespace
5695 .create_table(create_table_req, bytes::Bytes::from(ipc_data))
5696 .await
5697 .unwrap();
5698
5699 let list_req = ListTablesRequest {
5701 id: Some(vec!["ns1".to_string()]),
5702 page_token: None,
5703 limit: None,
5704 ..Default::default()
5705 };
5706 let result = namespace.list_tables(list_req).await.unwrap();
5707 assert_eq!(result.tables.len(), 1);
5708 assert_eq!(result.tables[0], "table1");
5709
5710 let list_req = ListTablesRequest {
5711 id: Some(vec!["ns2".to_string()]),
5712 page_token: None,
5713 limit: None,
5714 ..Default::default()
5715 };
5716 let result = namespace.list_tables(list_req).await.unwrap();
5717 assert_eq!(result.tables.len(), 1);
5718 assert_eq!(result.tables[0], "table1");
5719
5720 let mut drop_req = DropTableRequest::new();
5722 drop_req.id = Some(vec!["ns1".to_string(), "table1".to_string()]);
5723 namespace.drop_table(drop_req).await.unwrap();
5724
5725 let mut exists_req = TableExistsRequest::new();
5727 exists_req.id = Some(vec!["ns1".to_string(), "table1".to_string()]);
5728 assert!(namespace.table_exists(exists_req).await.is_err());
5729
5730 let mut exists_req = TableExistsRequest::new();
5731 exists_req.id = Some(vec!["ns2".to_string(), "table1".to_string()]);
5732 assert!(namespace.table_exists(exists_req).await.is_ok());
5733 }
5734
5735 #[tokio::test]
5736 async fn test_migrate_directory_tables() {
5737 let temp_dir = TempStdDir::default();
5738 let temp_path = temp_dir.to_str().unwrap();
5739
5740 let dir_only_ns = DirectoryNamespaceBuilder::new(temp_path)
5742 .manifest_enabled(false)
5743 .dir_listing_enabled(true)
5744 .build()
5745 .await
5746 .unwrap();
5747
5748 let schema = create_test_schema();
5750 let ipc_data = create_test_ipc_data(&schema);
5751
5752 for i in 1..=3 {
5753 let mut create_req = CreateTableRequest::new();
5754 create_req.id = Some(vec![format!("table{}", i)]);
5755 dir_only_ns
5756 .create_table(create_req, bytes::Bytes::from(ipc_data.clone()))
5757 .await
5758 .unwrap();
5759 }
5760
5761 drop(dir_only_ns);
5762
5763 let dual_mode_ns = DirectoryNamespaceBuilder::new(temp_path)
5765 .manifest_enabled(true)
5766 .dir_listing_enabled(true)
5767 .build()
5768 .await
5769 .unwrap();
5770
5771 let mut list_req = ListTablesRequest::new();
5773 list_req.id = Some(vec![]);
5774 let tables = dual_mode_ns.list_tables(list_req).await.unwrap().tables;
5775 assert_eq!(tables.len(), 3);
5776
5777 let migrated_count = dual_mode_ns.migrate().await.unwrap();
5779 assert_eq!(migrated_count, 3, "Should migrate all 3 tables");
5780
5781 let mut list_req = ListTablesRequest::new();
5783 list_req.id = Some(vec![]);
5784 let tables = dual_mode_ns.list_tables(list_req).await.unwrap().tables;
5785 assert_eq!(tables.len(), 3);
5786
5787 let migrated_count = dual_mode_ns.migrate().await.unwrap();
5789 assert_eq!(
5790 migrated_count, 0,
5791 "Should not migrate already-migrated tables"
5792 );
5793
5794 drop(dual_mode_ns);
5795
5796 let manifest_only_ns = DirectoryNamespaceBuilder::new(temp_path)
5798 .manifest_enabled(true)
5799 .dir_listing_enabled(false)
5800 .build()
5801 .await
5802 .unwrap();
5803
5804 let mut list_req = ListTablesRequest::new();
5806 list_req.id = Some(vec![]);
5807 let tables = manifest_only_ns.list_tables(list_req).await.unwrap().tables;
5808 assert_eq!(tables.len(), 3);
5809 assert!(tables.contains(&"table1".to_string()));
5810 assert!(tables.contains(&"table2".to_string()));
5811 assert!(tables.contains(&"table3".to_string()));
5812 }
5813
5814 #[tokio::test]
5815 async fn test_migrate_without_manifest() {
5816 let temp_dir = TempStdDir::default();
5817 let temp_path = temp_dir.to_str().unwrap();
5818
5819 let namespace = DirectoryNamespaceBuilder::new(temp_path)
5821 .manifest_enabled(false)
5822 .dir_listing_enabled(true)
5823 .build()
5824 .await
5825 .unwrap();
5826
5827 let migrated_count = namespace.migrate().await.unwrap();
5829 assert_eq!(migrated_count, 0);
5830 }
5831
5832 #[tokio::test]
5833 async fn test_register_table() {
5834 use lance_namespace::models::{RegisterTableRequest, TableExistsRequest};
5835
5836 let temp_dir = TempStdDir::default();
5837 let temp_path = temp_dir.to_str().unwrap();
5838
5839 let namespace = DirectoryNamespaceBuilder::new(temp_path)
5840 .dir_listing_to_manifest_migration_enabled(true)
5841 .build()
5842 .await
5843 .unwrap();
5844
5845 let schema = create_test_schema();
5847 let ipc_data = create_test_ipc_data(&schema);
5848
5849 let table_uri = format!("{}/external_table.lance", temp_path);
5850 let cursor = Cursor::new(ipc_data);
5851 let stream_reader = StreamReader::try_new(cursor, None).unwrap();
5852 let batches: Vec<_> = stream_reader
5853 .collect::<std::result::Result<Vec<_>, _>>()
5854 .unwrap();
5855 let schema = batches[0].schema();
5856 let batch_results: Vec<_> = batches.into_iter().map(Ok).collect();
5857 let reader = RecordBatchIterator::new(batch_results, schema);
5858 Dataset::write(Box::new(reader), &table_uri, None)
5859 .await
5860 .unwrap();
5861
5862 let mut register_req = RegisterTableRequest::new("external_table.lance".to_string());
5864 register_req.id = Some(vec!["registered_table".to_string()]);
5865
5866 let response = namespace.register_table(register_req).await.unwrap();
5867 assert_eq!(response.location, Some("external_table.lance".to_string()));
5868
5869 let mut exists_req = TableExistsRequest::new();
5871 exists_req.id = Some(vec!["registered_table".to_string()]);
5872 assert!(namespace.table_exists(exists_req).await.is_ok());
5873
5874 let mut list_req = ListTablesRequest::new();
5876 list_req.id = Some(vec![]);
5877 let tables = namespace.list_tables(list_req).await.unwrap();
5878 assert!(tables.tables.contains(&"registered_table".to_string()));
5879 }
5880
5881 #[tokio::test]
5882 async fn test_register_table_duplicate_fails() {
5883 use lance_namespace::models::RegisterTableRequest;
5884
5885 let temp_dir = TempStdDir::default();
5886 let temp_path = temp_dir.to_str().unwrap();
5887
5888 let namespace = DirectoryNamespaceBuilder::new(temp_path)
5889 .build()
5890 .await
5891 .unwrap();
5892
5893 let mut register_req = RegisterTableRequest::new("test_table.lance".to_string());
5895 register_req.id = Some(vec!["test_table".to_string()]);
5896
5897 namespace
5898 .register_table(register_req.clone())
5899 .await
5900 .unwrap();
5901
5902 let result = namespace.register_table(register_req).await;
5904 assert!(result.is_err());
5905 assert!(result.unwrap_err().to_string().contains("already exists"));
5906 }
5907
5908 #[tokio::test]
5909 async fn test_deregister_table() {
5910 use lance_namespace::models::{DeregisterTableRequest, TableExistsRequest};
5911
5912 let temp_dir = TempStdDir::default();
5913 let temp_path = temp_dir.to_str().unwrap();
5914
5915 let namespace = DirectoryNamespaceBuilder::new(temp_path)
5918 .manifest_enabled(true)
5919 .dir_listing_enabled(false)
5920 .build()
5921 .await
5922 .unwrap();
5923
5924 let schema = create_test_schema();
5926 let ipc_data = create_test_ipc_data(&schema);
5927
5928 let mut create_req = CreateTableRequest::new();
5929 create_req.id = Some(vec!["test_table".to_string()]);
5930 namespace
5931 .create_table(create_req, bytes::Bytes::from(ipc_data))
5932 .await
5933 .unwrap();
5934
5935 let mut exists_req = TableExistsRequest::new();
5937 exists_req.id = Some(vec!["test_table".to_string()]);
5938 assert!(namespace.table_exists(exists_req.clone()).await.is_ok());
5939
5940 let mut deregister_req = DeregisterTableRequest::new();
5942 deregister_req.id = Some(vec!["test_table".to_string()]);
5943 let response = namespace.deregister_table(deregister_req).await.unwrap();
5944
5945 assert!(
5947 response.location.is_some(),
5948 "Deregister should return location"
5949 );
5950 let location = response.location.as_ref().unwrap();
5951 let expected_url = lance_io::object_store::uri_to_url(temp_path)
5954 .expect("Failed to convert temp path to URL");
5955 let expected_prefix = expected_url.to_string();
5956 assert!(
5957 location.starts_with(&expected_prefix),
5958 "Location should start with '{}', got: {}",
5959 expected_prefix,
5960 location
5961 );
5962 assert!(
5963 location.contains("test_table"),
5964 "Location should contain table name: {}",
5965 location
5966 );
5967 assert_eq!(response.id, Some(vec!["test_table".to_string()]));
5968
5969 assert!(namespace.table_exists(exists_req).await.is_err());
5971
5972 let dataset = Dataset::open(location).await;
5974 assert!(
5975 dataset.is_ok(),
5976 "Physical table data should still exist at {}",
5977 location
5978 );
5979 }
5980
5981 #[tokio::test]
5982 async fn test_deregister_table_in_child_namespace() {
5983 use lance_namespace::models::{
5984 CreateNamespaceRequest, DeregisterTableRequest, TableExistsRequest,
5985 };
5986
5987 let temp_dir = TempStdDir::default();
5988 let temp_path = temp_dir.to_str().unwrap();
5989
5990 let namespace = DirectoryNamespaceBuilder::new(temp_path)
5991 .build()
5992 .await
5993 .unwrap();
5994
5995 let mut create_ns_req = CreateNamespaceRequest::new();
5997 create_ns_req.id = Some(vec!["test_ns".to_string()]);
5998 namespace.create_namespace(create_ns_req).await.unwrap();
5999
6000 let schema = create_test_schema();
6002 let ipc_data = create_test_ipc_data(&schema);
6003
6004 let mut create_req = CreateTableRequest::new();
6005 create_req.id = Some(vec!["test_ns".to_string(), "test_table".to_string()]);
6006 namespace
6007 .create_table(create_req, bytes::Bytes::from(ipc_data))
6008 .await
6009 .unwrap();
6010
6011 let mut deregister_req = DeregisterTableRequest::new();
6013 deregister_req.id = Some(vec!["test_ns".to_string(), "test_table".to_string()]);
6014 let response = namespace.deregister_table(deregister_req).await.unwrap();
6015
6016 assert!(
6018 response.location.is_some(),
6019 "Deregister should return location"
6020 );
6021 let location = response.location.as_ref().unwrap();
6022 let expected_url = lance_io::object_store::uri_to_url(temp_path)
6025 .expect("Failed to convert temp path to URL");
6026 let expected_prefix = expected_url.to_string();
6027 assert!(
6028 location.starts_with(&expected_prefix),
6029 "Location should start with '{}', got: {}",
6030 expected_prefix,
6031 location
6032 );
6033 assert!(
6034 location.contains("test_ns") && location.contains("test_table"),
6035 "Location should contain namespace and table name: {}",
6036 location
6037 );
6038 assert_eq!(
6039 response.id,
6040 Some(vec!["test_ns".to_string(), "test_table".to_string()])
6041 );
6042
6043 let mut exists_req = TableExistsRequest::new();
6045 exists_req.id = Some(vec!["test_ns".to_string(), "test_table".to_string()]);
6046 assert!(namespace.table_exists(exists_req).await.is_err());
6047 }
6048
6049 #[tokio::test]
6050 async fn test_register_without_manifest_fails() {
6051 use lance_namespace::models::RegisterTableRequest;
6052
6053 let temp_dir = TempStdDir::default();
6054 let temp_path = temp_dir.to_str().unwrap();
6055
6056 let namespace = DirectoryNamespaceBuilder::new(temp_path)
6058 .manifest_enabled(false)
6059 .build()
6060 .await
6061 .unwrap();
6062
6063 let mut register_req = RegisterTableRequest::new("test_table.lance".to_string());
6065 register_req.id = Some(vec!["test_table".to_string()]);
6066 let result = namespace.register_table(register_req).await;
6067 assert!(result.is_err());
6068 assert!(
6069 result
6070 .unwrap_err()
6071 .to_string()
6072 .contains("manifest mode is enabled")
6073 );
6074
6075 }
6078
6079 #[tokio::test]
6080 async fn test_register_table_rejects_absolute_uri() {
6081 use lance_namespace::models::RegisterTableRequest;
6082
6083 let temp_dir = TempStdDir::default();
6084 let temp_path = temp_dir.to_str().unwrap();
6085
6086 let namespace = DirectoryNamespaceBuilder::new(temp_path)
6087 .build()
6088 .await
6089 .unwrap();
6090
6091 let mut register_req = RegisterTableRequest::new("s3://bucket/table.lance".to_string());
6093 register_req.id = Some(vec!["test_table".to_string()]);
6094 let result = namespace.register_table(register_req).await;
6095 assert!(result.is_err());
6096 let err_msg = result.unwrap_err().to_string();
6097 assert!(err_msg.contains("Absolute URIs are not allowed"));
6098 }
6099
6100 #[tokio::test]
6101 async fn test_register_table_rejects_absolute_path() {
6102 use lance_namespace::models::RegisterTableRequest;
6103
6104 let temp_dir = TempStdDir::default();
6105 let temp_path = temp_dir.to_str().unwrap();
6106
6107 let namespace = DirectoryNamespaceBuilder::new(temp_path)
6108 .build()
6109 .await
6110 .unwrap();
6111
6112 let mut register_req = RegisterTableRequest::new("/tmp/table.lance".to_string());
6114 register_req.id = Some(vec!["test_table".to_string()]);
6115 let result = namespace.register_table(register_req).await;
6116 assert!(result.is_err());
6117 let err_msg = result.unwrap_err().to_string();
6118 assert!(err_msg.contains("Absolute paths are not allowed"));
6119 }
6120
6121 #[tokio::test]
6122 async fn test_register_table_rejects_path_traversal() {
6123 use lance_namespace::models::RegisterTableRequest;
6124
6125 let temp_dir = TempStdDir::default();
6126 let temp_path = temp_dir.to_str().unwrap();
6127
6128 let namespace = DirectoryNamespaceBuilder::new(temp_path)
6129 .build()
6130 .await
6131 .unwrap();
6132
6133 let mut register_req = RegisterTableRequest::new("../outside/table.lance".to_string());
6135 register_req.id = Some(vec!["test_table".to_string()]);
6136 let result = namespace.register_table(register_req).await;
6137 assert!(result.is_err());
6138 let err_msg = result.unwrap_err().to_string();
6139 assert!(err_msg.contains("Path traversal is not allowed"));
6140 }
6141
6142 #[tokio::test]
6143 async fn test_namespace_write() {
6144 use arrow::array::Int32Array;
6145 use arrow::datatypes::{DataType, Field as ArrowField, Schema as ArrowSchema};
6146 use arrow::record_batch::{RecordBatch, RecordBatchIterator};
6147 use lance::dataset::{Dataset, WriteMode, WriteParams};
6148 use lance_namespace::LanceNamespace;
6149
6150 let (namespace, _temp_dir) = create_test_namespace().await;
6151 let namespace = Arc::new(namespace) as Arc<dyn LanceNamespace>;
6152
6153 let table_id = vec!["test_ns".to_string(), "test_table".to_string()];
6155 let schema = Arc::new(ArrowSchema::new(vec![
6156 ArrowField::new("a", DataType::Int32, false),
6157 ArrowField::new("b", DataType::Int32, false),
6158 ]));
6159
6160 let data1 = RecordBatch::try_new(
6162 schema.clone(),
6163 vec![
6164 Arc::new(Int32Array::from(vec![1, 2, 3])),
6165 Arc::new(Int32Array::from(vec![10, 20, 30])),
6166 ],
6167 )
6168 .unwrap();
6169
6170 let reader1 = RecordBatchIterator::new(vec![data1].into_iter().map(Ok), schema.clone());
6171 let dataset =
6172 Dataset::write_into_namespace(reader1, namespace.clone(), table_id.clone(), None)
6173 .await
6174 .unwrap();
6175
6176 assert_eq!(dataset.count_rows(None).await.unwrap(), 3);
6177 assert_eq!(dataset.version().version, 1);
6178
6179 let data2 = RecordBatch::try_new(
6181 schema.clone(),
6182 vec![
6183 Arc::new(Int32Array::from(vec![4, 5])),
6184 Arc::new(Int32Array::from(vec![40, 50])),
6185 ],
6186 )
6187 .unwrap();
6188
6189 let params_append = WriteParams {
6190 mode: WriteMode::Append,
6191 ..Default::default()
6192 };
6193
6194 let reader2 = RecordBatchIterator::new(vec![data2].into_iter().map(Ok), schema.clone());
6195 let dataset = Dataset::write_into_namespace(
6196 reader2,
6197 namespace.clone(),
6198 table_id.clone(),
6199 Some(params_append),
6200 )
6201 .await
6202 .unwrap();
6203
6204 assert_eq!(dataset.count_rows(None).await.unwrap(), 5);
6205 assert_eq!(dataset.version().version, 2);
6206
6207 let data3 = RecordBatch::try_new(
6209 schema.clone(),
6210 vec![
6211 Arc::new(Int32Array::from(vec![100, 200])),
6212 Arc::new(Int32Array::from(vec![1000, 2000])),
6213 ],
6214 )
6215 .unwrap();
6216
6217 let params_overwrite = WriteParams {
6218 mode: WriteMode::Overwrite,
6219 ..Default::default()
6220 };
6221
6222 let reader3 = RecordBatchIterator::new(vec![data3].into_iter().map(Ok), schema.clone());
6223 let dataset = Dataset::write_into_namespace(
6224 reader3,
6225 namespace.clone(),
6226 table_id.clone(),
6227 Some(params_overwrite),
6228 )
6229 .await
6230 .unwrap();
6231
6232 assert_eq!(dataset.count_rows(None).await.unwrap(), 2);
6233 assert_eq!(dataset.version().version, 3);
6234
6235 let result = dataset.scan().try_into_batch().await.unwrap();
6237 let a_col = result
6238 .column_by_name("a")
6239 .unwrap()
6240 .as_any()
6241 .downcast_ref::<Int32Array>()
6242 .unwrap();
6243 assert_eq!(a_col.values(), &[100, 200]);
6244 }
6245
6246 #[tokio::test]
6251 async fn test_declare_table_v1_mode() {
6252 use lance_namespace::models::{
6253 DeclareTableRequest, DescribeTableRequest, ListTablesRequest, TableExistsRequest,
6254 };
6255
6256 let temp_dir = TempStdDir::default();
6257 let temp_path = temp_dir.to_str().unwrap();
6258
6259 let namespace = DirectoryNamespaceBuilder::new(temp_path)
6261 .manifest_enabled(false)
6262 .build()
6263 .await
6264 .unwrap();
6265
6266 let mut declare_req = DeclareTableRequest::new();
6268 declare_req.id = Some(vec!["test_table".to_string()]);
6269 let response = namespace.declare_table(declare_req).await.unwrap();
6270
6271 assert!(response.location.is_some());
6273 let location = response.location.as_ref().unwrap();
6274 assert!(location.ends_with("test_table.lance"));
6275
6276 let mut exists_req = TableExistsRequest::new();
6278 exists_req.id = Some(vec!["test_table".to_string()]);
6279 assert!(namespace.table_exists(exists_req).await.is_ok());
6280
6281 let mut describe_req = DescribeTableRequest::new();
6283 describe_req.id = Some(vec!["test_table".to_string()]);
6284 let describe_response = namespace.describe_table(describe_req).await.unwrap();
6285 assert!(describe_response.location.is_some());
6286 assert!(describe_response.version.is_none()); assert!(describe_response.schema.is_none()); assert_eq!(describe_response.is_only_declared, None);
6289
6290 let mut describe_req = DescribeTableRequest::new();
6291 describe_req.id = Some(vec!["test_table".to_string()]);
6292 describe_req.check_declared = Some(true);
6293 let describe_response = namespace.describe_table(describe_req).await.unwrap();
6294 assert_eq!(describe_response.is_only_declared, Some(true));
6295
6296 let mut list_req = ListTablesRequest::new();
6297 list_req.id = Some(vec![]);
6298 let list_response = namespace.list_tables(list_req.clone()).await.unwrap();
6299 assert_eq!(list_response.tables, vec!["test_table".to_string()]);
6300
6301 list_req.include_declared = Some(false);
6302 let list_response = namespace.list_tables(list_req).await.unwrap();
6303 assert!(list_response.tables.is_empty());
6304 }
6305
6306 #[tokio::test]
6307 async fn test_insert_into_declared_table_promotes_it_from_declared_state() {
6308 use lance_namespace::models::{
6309 DeclareTableRequest, DescribeTableRequest, InsertIntoTableRequest,
6310 };
6311
6312 let temp_dir = TempStdDir::default();
6313 let temp_path = temp_dir.to_str().unwrap();
6314
6315 let namespace = DirectoryNamespaceBuilder::new(temp_path)
6316 .manifest_enabled(false)
6317 .build()
6318 .await
6319 .unwrap();
6320
6321 let mut declare_req = DeclareTableRequest::new();
6322 declare_req.id = Some(vec!["test_table".to_string()]);
6323 namespace.declare_table(declare_req).await.unwrap();
6324
6325 let schema = create_test_schema();
6326 let ipc_data = create_test_ipc_data(&schema);
6327 let mut insert_req = InsertIntoTableRequest::new();
6328 insert_req.id = Some(vec!["test_table".to_string()]);
6329 namespace
6330 .insert_into_table(insert_req, bytes::Bytes::from(ipc_data))
6331 .await
6332 .unwrap();
6333
6334 let mut describe_req = DescribeTableRequest::new();
6335 describe_req.id = Some(vec!["test_table".to_string()]);
6336 describe_req.load_detailed_metadata = Some(true);
6337 let describe_response = namespace.describe_table(describe_req).await.unwrap();
6338
6339 assert_eq!(describe_response.is_only_declared, Some(false));
6340 assert_eq!(describe_response.version, Some(1));
6341 assert!(describe_response.schema.is_some());
6342
6343 let mut list_req = ListTablesRequest::new();
6344 list_req.id = Some(vec![]);
6345 list_req.include_declared = Some(false);
6346 assert_eq!(
6347 namespace.list_tables(list_req).await.unwrap().tables,
6348 vec!["test_table".to_string()]
6349 );
6350 }
6351
6352 #[tokio::test]
6353 async fn test_create_table_after_declare_table_v1_mode_creates_table() {
6354 use lance_namespace::models::{
6355 DeclareTableRequest, DescribeTableRequest, ListTablesRequest,
6356 };
6357
6358 let temp_dir = TempStdDir::default();
6359 let temp_path = temp_dir.to_str().unwrap();
6360
6361 let namespace = DirectoryNamespaceBuilder::new(temp_path)
6362 .manifest_enabled(false)
6363 .build()
6364 .await
6365 .unwrap();
6366
6367 let mut declare_req = DeclareTableRequest::new();
6368 declare_req.id = Some(vec!["test_table".to_string()]);
6369 namespace.declare_table(declare_req).await.unwrap();
6370
6371 let mut create_req = CreateTableRequest::new();
6372 create_req.id = Some(vec!["test_table".to_string()]);
6373 let response = namespace
6374 .create_table(
6375 create_req,
6376 bytes::Bytes::from(create_non_empty_test_ipc_data()),
6377 )
6378 .await
6379 .unwrap();
6380
6381 assert_eq!(response.version, Some(1));
6382
6383 let mut describe_req = DescribeTableRequest::new();
6384 describe_req.id = Some(vec!["test_table".to_string()]);
6385 describe_req.load_detailed_metadata = Some(true);
6386 let describe_response = namespace.describe_table(describe_req).await.unwrap();
6387 assert_eq!(describe_response.is_only_declared, Some(false));
6388 assert_eq!(describe_response.version, Some(1));
6389
6390 let mut list_req = ListTablesRequest::new();
6391 list_req.id = Some(vec![]);
6392 list_req.include_declared = Some(false);
6393 assert_eq!(
6394 namespace.list_tables(list_req).await.unwrap().tables,
6395 vec!["test_table".to_string()]
6396 );
6397 }
6398
6399 #[tokio::test]
6400 async fn test_insert_into_declared_table_with_manifest_promotes_it() {
6401 use lance_namespace::models::{
6402 DeclareTableRequest, DescribeTableRequest, InsertIntoTableRequest, ListTablesRequest,
6403 };
6404
6405 let temp_dir = TempStdDir::default();
6406 let temp_path = temp_dir.to_str().unwrap();
6407
6408 let namespace = DirectoryNamespaceBuilder::new(temp_path)
6409 .manifest_enabled(true)
6410 .dir_listing_enabled(false)
6411 .build()
6412 .await
6413 .unwrap();
6414
6415 let mut declare_req = DeclareTableRequest::new();
6416 declare_req.id = Some(vec!["test_table".to_string()]);
6417 namespace.declare_table(declare_req).await.unwrap();
6418
6419 let mut insert_req = InsertIntoTableRequest::new();
6420 insert_req.id = Some(vec!["test_table".to_string()]);
6421 namespace
6422 .insert_into_table(
6423 insert_req,
6424 bytes::Bytes::from(create_non_empty_test_ipc_data()),
6425 )
6426 .await
6427 .unwrap();
6428
6429 let mut describe_req = DescribeTableRequest::new();
6430 describe_req.id = Some(vec!["test_table".to_string()]);
6431 describe_req.load_detailed_metadata = Some(true);
6432 let describe_response = namespace.describe_table(describe_req).await.unwrap();
6433 assert_eq!(describe_response.is_only_declared, Some(false));
6434 assert_eq!(describe_response.version, Some(1));
6435
6436 let mut list_req = ListTablesRequest::new();
6437 list_req.id = Some(vec![]);
6438 list_req.include_declared = Some(false);
6439 assert_eq!(
6440 namespace.list_tables(list_req).await.unwrap().tables,
6441 vec!["test_table".to_string()]
6442 );
6443 }
6444
6445 #[tokio::test]
6446 async fn test_create_table_after_declare_table_with_manifest_creates_table() {
6447 use lance_namespace::models::{
6448 CreateTableRequest, DeclareTableRequest, DescribeTableRequest, ListTablesRequest,
6449 };
6450
6451 let temp_dir = TempStdDir::default();
6452 let temp_path = temp_dir.to_str().unwrap();
6453
6454 let namespace = DirectoryNamespaceBuilder::new(temp_path)
6455 .manifest_enabled(true)
6456 .dir_listing_enabled(false)
6457 .build()
6458 .await
6459 .unwrap();
6460
6461 let mut declare_req = DeclareTableRequest::new();
6462 declare_req.id = Some(vec!["test_table".to_string()]);
6463 declare_req.properties = Some(HashMap::from([("owner".to_string(), "alice".to_string())]));
6464 namespace.declare_table(declare_req).await.unwrap();
6465
6466 let mut create_req = CreateTableRequest::new();
6467 create_req.id = Some(vec!["test_table".to_string()]);
6468 create_req.mode = Some("Overwrite".to_string());
6469 let response = namespace
6470 .create_table(
6471 create_req,
6472 bytes::Bytes::from(create_non_empty_test_ipc_data()),
6473 )
6474 .await
6475 .unwrap();
6476
6477 assert_eq!(response.version, Some(1));
6478 assert_eq!(
6479 response
6480 .properties
6481 .as_ref()
6482 .and_then(|properties| properties.get("owner")),
6483 Some(&"alice".to_string())
6484 );
6485
6486 let mut describe_req = DescribeTableRequest::new();
6487 describe_req.id = Some(vec!["test_table".to_string()]);
6488 describe_req.load_detailed_metadata = Some(true);
6489 let describe_response = namespace.describe_table(describe_req).await.unwrap();
6490 assert_eq!(describe_response.is_only_declared, Some(false));
6491 assert_eq!(describe_response.version, Some(1));
6492 assert_eq!(
6493 describe_response
6494 .properties
6495 .as_ref()
6496 .and_then(|properties| properties.get("owner")),
6497 Some(&"alice".to_string())
6498 );
6499
6500 let mut list_req = ListTablesRequest::new();
6501 list_req.id = Some(vec![]);
6502 list_req.include_declared = Some(false);
6503 assert_eq!(
6504 namespace.list_tables(list_req).await.unwrap().tables,
6505 vec!["test_table".to_string()]
6506 );
6507 }
6508
6509 #[tokio::test]
6510 async fn test_create_table_after_declare_table_with_manifest_rejects_new_properties() {
6511 use lance_namespace::models::{CreateTableRequest, DeclareTableRequest};
6512
6513 let temp_dir = TempStdDir::default();
6514 let temp_path = temp_dir.to_str().unwrap();
6515
6516 let namespace = DirectoryNamespaceBuilder::new(temp_path)
6517 .manifest_enabled(true)
6518 .dir_listing_enabled(false)
6519 .build()
6520 .await
6521 .unwrap();
6522
6523 let mut declare_req = DeclareTableRequest::new();
6524 declare_req.id = Some(vec!["test_table".to_string()]);
6525 declare_req.properties = Some(HashMap::from([("owner".to_string(), "alice".to_string())]));
6526 namespace.declare_table(declare_req).await.unwrap();
6527
6528 let mut create_req = CreateTableRequest::new();
6529 create_req.id = Some(vec!["test_table".to_string()]);
6530 create_req.properties = Some(HashMap::from([("owner".to_string(), "bob".to_string())]));
6531
6532 let result = namespace
6533 .create_table(
6534 create_req,
6535 bytes::Bytes::from(create_non_empty_test_ipc_data()),
6536 )
6537 .await;
6538
6539 assert!(result.is_err());
6540 assert!(
6541 result
6542 .unwrap_err()
6543 .to_string()
6544 .contains("cannot set properties for already declared table")
6545 );
6546 }
6547
6548 #[tokio::test]
6549 async fn test_create_table_with_manifest_exist_ok_keeps_existing_table() {
6550 use lance_namespace::models::{CreateTableRequest, DescribeTableRequest};
6551
6552 let temp_dir = TempStdDir::default();
6553 let temp_path = temp_dir.to_str().unwrap();
6554
6555 let namespace = DirectoryNamespaceBuilder::new(temp_path)
6556 .manifest_enabled(true)
6557 .dir_listing_enabled(false)
6558 .build()
6559 .await
6560 .unwrap();
6561
6562 let mut create_req = CreateTableRequest::new();
6563 create_req.id = Some(vec!["test_table".to_string()]);
6564 create_req.properties = Some(HashMap::from([("owner".to_string(), "alice".to_string())]));
6565 namespace
6566 .create_table(
6567 create_req,
6568 bytes::Bytes::from(create_non_empty_test_ipc_data()),
6569 )
6570 .await
6571 .unwrap();
6572
6573 let mut create_req = CreateTableRequest::new();
6574 create_req.id = Some(vec!["test_table".to_string()]);
6575 create_req.mode = Some("ExistOk".to_string());
6576 create_req.properties = Some(HashMap::from([("owner".to_string(), "bob".to_string())]));
6577 let response = namespace
6578 .create_table(
6579 create_req,
6580 bytes::Bytes::from(create_single_row_test_ipc_data()),
6581 )
6582 .await
6583 .unwrap();
6584
6585 assert_eq!(
6586 response
6587 .properties
6588 .as_ref()
6589 .and_then(|properties| properties.get("owner")),
6590 Some(&"alice".to_string())
6591 );
6592 assert_eq!(
6593 open_dataset(&namespace, "test_table")
6594 .await
6595 .count_rows(None)
6596 .await
6597 .unwrap(),
6598 2
6599 );
6600
6601 let mut describe_req = DescribeTableRequest::new();
6602 describe_req.id = Some(vec!["test_table".to_string()]);
6603 let describe_response = namespace.describe_table(describe_req).await.unwrap();
6604 assert_eq!(
6605 describe_response
6606 .properties
6607 .as_ref()
6608 .and_then(|properties| properties.get("owner")),
6609 Some(&"alice".to_string())
6610 );
6611 }
6612
6613 #[tokio::test]
6614 async fn test_create_table_with_manifest_overwrite_replaces_existing_table() {
6615 use lance_namespace::models::{CreateTableRequest, DescribeTableRequest};
6616
6617 let temp_dir = TempStdDir::default();
6618 let temp_path = temp_dir.to_str().unwrap();
6619
6620 let namespace = DirectoryNamespaceBuilder::new(temp_path)
6621 .manifest_enabled(true)
6622 .dir_listing_enabled(false)
6623 .build()
6624 .await
6625 .unwrap();
6626
6627 let mut create_req = CreateTableRequest::new();
6628 create_req.id = Some(vec!["test_table".to_string()]);
6629 create_req.properties = Some(HashMap::from([("owner".to_string(), "alice".to_string())]));
6630 namespace
6631 .create_table(
6632 create_req,
6633 bytes::Bytes::from(create_non_empty_test_ipc_data()),
6634 )
6635 .await
6636 .unwrap();
6637
6638 let mut create_req = CreateTableRequest::new();
6639 create_req.id = Some(vec!["test_table".to_string()]);
6640 create_req.mode = Some("overwrite".to_string());
6641 create_req.properties = Some(HashMap::from([("owner".to_string(), "bob".to_string())]));
6642 let response = namespace
6643 .create_table(
6644 create_req,
6645 bytes::Bytes::from(create_single_row_test_ipc_data()),
6646 )
6647 .await
6648 .unwrap();
6649
6650 assert_eq!(response.version, Some(2));
6651 assert_eq!(
6652 response
6653 .properties
6654 .as_ref()
6655 .and_then(|properties| properties.get("owner")),
6656 Some(&"bob".to_string())
6657 );
6658 assert_eq!(
6659 open_dataset(&namespace, "test_table")
6660 .await
6661 .count_rows(None)
6662 .await
6663 .unwrap(),
6664 1
6665 );
6666
6667 let mut describe_req = DescribeTableRequest::new();
6668 describe_req.id = Some(vec!["test_table".to_string()]);
6669 let describe_response = namespace.describe_table(describe_req).await.unwrap();
6670 assert_eq!(
6671 describe_response
6672 .properties
6673 .as_ref()
6674 .and_then(|properties| properties.get("owner")),
6675 Some(&"bob".to_string())
6676 );
6677 }
6678
6679 #[tokio::test]
6680 async fn test_create_table_with_manifest_invalid_mode_rejected() {
6681 use lance_namespace::models::CreateTableRequest;
6682
6683 let temp_dir = TempStdDir::default();
6684 let temp_path = temp_dir.to_str().unwrap();
6685
6686 let namespace = DirectoryNamespaceBuilder::new(temp_path)
6687 .manifest_enabled(true)
6688 .dir_listing_enabled(false)
6689 .build()
6690 .await
6691 .unwrap();
6692
6693 let mut create_req = CreateTableRequest::new();
6694 create_req.id = Some(vec!["test_table".to_string()]);
6695 create_req.mode = Some("append".to_string());
6696 let result = namespace
6697 .create_table(
6698 create_req,
6699 bytes::Bytes::from(create_non_empty_test_ipc_data()),
6700 )
6701 .await;
6702
6703 assert!(result.is_err());
6704 assert!(
6705 result
6706 .unwrap_err()
6707 .to_string()
6708 .contains("Unsupported create_table mode")
6709 );
6710 }
6711
6712 #[tokio::test]
6713 async fn test_merge_insert_into_declared_table_v1_mode_creates_table() {
6714 use lance_namespace::models::{
6715 DeclareTableRequest, DescribeTableRequest, ListTablesRequest,
6716 MergeInsertIntoTableRequest,
6717 };
6718
6719 let temp_dir = TempStdDir::default();
6720 let temp_path = temp_dir.to_str().unwrap();
6721
6722 let namespace = DirectoryNamespaceBuilder::new(temp_path)
6723 .manifest_enabled(false)
6724 .build()
6725 .await
6726 .unwrap();
6727
6728 let mut declare_req = DeclareTableRequest::new();
6729 declare_req.id = Some(vec!["test_table".to_string()]);
6730 namespace.declare_table(declare_req).await.unwrap();
6731
6732 let mut merge_req = MergeInsertIntoTableRequest::new();
6733 merge_req.id = Some(vec!["test_table".to_string()]);
6734 merge_req.on = Some("id".to_string());
6735 let response = namespace
6736 .merge_insert_into_table(
6737 merge_req,
6738 bytes::Bytes::from(create_non_empty_test_ipc_data()),
6739 )
6740 .await
6741 .unwrap();
6742
6743 assert_eq!(response.num_inserted_rows, Some(2));
6744 assert_eq!(response.num_updated_rows, Some(0));
6745
6746 let mut describe_req = DescribeTableRequest::new();
6747 describe_req.id = Some(vec!["test_table".to_string()]);
6748 describe_req.load_detailed_metadata = Some(true);
6749 let describe_response = namespace.describe_table(describe_req).await.unwrap();
6750 assert_eq!(describe_response.is_only_declared, Some(false));
6751 assert_eq!(describe_response.version, Some(1));
6752
6753 let mut list_req = ListTablesRequest::new();
6754 list_req.id = Some(vec![]);
6755 list_req.include_declared = Some(false);
6756 assert_eq!(
6757 namespace.list_tables(list_req).await.unwrap().tables,
6758 vec!["test_table".to_string()]
6759 );
6760 }
6761
6762 #[tokio::test]
6763 async fn test_merge_insert_into_declared_table_with_manifest_creates_table() {
6764 use lance_namespace::models::{
6765 DeclareTableRequest, DescribeTableRequest, ListTablesRequest,
6766 MergeInsertIntoTableRequest,
6767 };
6768
6769 let temp_dir = TempStdDir::default();
6770 let temp_path = temp_dir.to_str().unwrap();
6771
6772 let namespace = DirectoryNamespaceBuilder::new(temp_path)
6773 .manifest_enabled(true)
6774 .dir_listing_enabled(false)
6775 .build()
6776 .await
6777 .unwrap();
6778
6779 let mut declare_req = DeclareTableRequest::new();
6780 declare_req.id = Some(vec!["test_table".to_string()]);
6781 namespace.declare_table(declare_req).await.unwrap();
6782
6783 let mut merge_req = MergeInsertIntoTableRequest::new();
6784 merge_req.id = Some(vec!["test_table".to_string()]);
6785 merge_req.on = Some("id".to_string());
6786 let response = namespace
6787 .merge_insert_into_table(
6788 merge_req,
6789 bytes::Bytes::from(create_non_empty_test_ipc_data()),
6790 )
6791 .await
6792 .unwrap();
6793
6794 assert_eq!(response.num_inserted_rows, Some(2));
6795 assert_eq!(response.num_updated_rows, Some(0));
6796
6797 let mut describe_req = DescribeTableRequest::new();
6798 describe_req.id = Some(vec!["test_table".to_string()]);
6799 describe_req.load_detailed_metadata = Some(true);
6800 let describe_response = namespace.describe_table(describe_req).await.unwrap();
6801 assert_eq!(describe_response.is_only_declared, Some(false));
6802 assert_eq!(describe_response.version, Some(1));
6803
6804 let mut list_req = ListTablesRequest::new();
6805 list_req.id = Some(vec![]);
6806 list_req.include_declared = Some(false);
6807 assert_eq!(
6808 namespace.list_tables(list_req).await.unwrap().tables,
6809 vec!["test_table".to_string()]
6810 );
6811 }
6812
6813 #[tokio::test]
6814 async fn test_declare_table_with_manifest() {
6815 use lance_namespace::models::{
6816 DeclareTableRequest, DescribeTableRequest, ListTablesRequest, TableExistsRequest,
6817 };
6818
6819 let temp_dir = TempStdDir::default();
6820 let temp_path = temp_dir.to_str().unwrap();
6821
6822 let namespace = DirectoryNamespaceBuilder::new(temp_path)
6824 .manifest_enabled(true)
6825 .dir_listing_enabled(false)
6826 .build()
6827 .await
6828 .unwrap();
6829
6830 let mut declare_req = DeclareTableRequest::new();
6832 declare_req.id = Some(vec!["test_table".to_string()]);
6833 declare_req.properties = Some(HashMap::from([("owner".to_string(), "alice".to_string())]));
6834 let response = namespace.declare_table(declare_req).await.unwrap();
6835
6836 assert!(response.location.is_some());
6838 assert_eq!(
6839 response
6840 .properties
6841 .as_ref()
6842 .and_then(|properties| properties.get("owner")),
6843 Some(&"alice".to_string())
6844 );
6845
6846 let mut exists_req = TableExistsRequest::new();
6848 exists_req.id = Some(vec!["test_table".to_string()]);
6849 assert!(namespace.table_exists(exists_req).await.is_ok());
6850
6851 let mut describe_req = DescribeTableRequest::new();
6852 describe_req.id = Some(vec!["test_table".to_string()]);
6853 let describe_response = namespace.describe_table(describe_req).await.unwrap();
6854 assert_eq!(describe_response.is_only_declared, None);
6855
6856 let mut describe_req = DescribeTableRequest::new();
6857 describe_req.id = Some(vec!["test_table".to_string()]);
6858 describe_req.check_declared = Some(true);
6859 let describe_response = namespace.describe_table(describe_req).await.unwrap();
6860 assert_eq!(describe_response.is_only_declared, Some(true));
6861 assert_eq!(
6862 describe_response
6863 .properties
6864 .as_ref()
6865 .and_then(|properties| properties.get("owner")),
6866 Some(&"alice".to_string())
6867 );
6868
6869 let mut list_req = ListTablesRequest::new();
6870 list_req.id = Some(vec![]);
6871 assert_eq!(
6872 namespace
6873 .list_tables(list_req.clone())
6874 .await
6875 .unwrap()
6876 .tables,
6877 vec!["test_table".to_string()]
6878 );
6879 list_req.include_declared = Some(false);
6880 assert!(
6881 namespace
6882 .list_tables(list_req)
6883 .await
6884 .unwrap()
6885 .tables
6886 .is_empty()
6887 );
6888 }
6889
6890 #[tokio::test]
6891 async fn test_declare_table_when_table_exists() {
6892 use lance_namespace::models::DeclareTableRequest;
6893
6894 let temp_dir = TempStdDir::default();
6895 let temp_path = temp_dir.to_str().unwrap();
6896
6897 let namespace = DirectoryNamespaceBuilder::new(temp_path)
6898 .manifest_enabled(false)
6899 .build()
6900 .await
6901 .unwrap();
6902
6903 let schema = create_test_schema();
6905 let ipc_data = create_test_ipc_data(&schema);
6906 let mut create_req = CreateTableRequest::new();
6907 create_req.id = Some(vec!["test_table".to_string()]);
6908 namespace
6909 .create_table(create_req, bytes::Bytes::from(ipc_data))
6910 .await
6911 .unwrap();
6912
6913 let mut declare_req = DeclareTableRequest::new();
6915 declare_req.id = Some(vec!["test_table".to_string()]);
6916 let result = namespace.declare_table(declare_req).await;
6917 assert!(result.is_err());
6918 }
6919
6920 #[tokio::test]
6925 async fn test_deregister_table_v1_mode() {
6926 use lance_namespace::models::{DeregisterTableRequest, TableExistsRequest};
6927
6928 let temp_dir = TempStdDir::default();
6929 let temp_path = temp_dir.to_str().unwrap();
6930
6931 let namespace = DirectoryNamespaceBuilder::new(temp_path)
6933 .manifest_enabled(false)
6934 .dir_listing_enabled(true)
6935 .build()
6936 .await
6937 .unwrap();
6938
6939 let schema = create_test_schema();
6941 let ipc_data = create_test_ipc_data(&schema);
6942 let mut create_req = CreateTableRequest::new();
6943 create_req.id = Some(vec!["test_table".to_string()]);
6944 namespace
6945 .create_table(create_req, bytes::Bytes::from(ipc_data))
6946 .await
6947 .unwrap();
6948
6949 let mut exists_req = TableExistsRequest::new();
6951 exists_req.id = Some(vec!["test_table".to_string()]);
6952 assert!(namespace.table_exists(exists_req.clone()).await.is_ok());
6953
6954 let mut deregister_req = DeregisterTableRequest::new();
6956 deregister_req.id = Some(vec!["test_table".to_string()]);
6957 let response = namespace.deregister_table(deregister_req).await.unwrap();
6958
6959 assert!(response.location.is_some());
6961 let location = response.location.as_ref().unwrap();
6962 assert!(location.contains("test_table"));
6963
6964 let result = namespace.table_exists(exists_req).await;
6966 assert!(result.is_err());
6967 assert!(result.unwrap_err().to_string().contains("deregistered"));
6968
6969 let dataset = Dataset::open(location).await;
6971 assert!(dataset.is_ok(), "Physical table data should still exist");
6972 }
6973
6974 #[tokio::test]
6975 async fn test_deregister_table_v1_already_deregistered() {
6976 use lance_namespace::models::DeregisterTableRequest;
6977
6978 let temp_dir = TempStdDir::default();
6979 let temp_path = temp_dir.to_str().unwrap();
6980
6981 let namespace = DirectoryNamespaceBuilder::new(temp_path)
6982 .manifest_enabled(false)
6983 .dir_listing_enabled(true)
6984 .build()
6985 .await
6986 .unwrap();
6987
6988 let schema = create_test_schema();
6990 let ipc_data = create_test_ipc_data(&schema);
6991 let mut create_req = CreateTableRequest::new();
6992 create_req.id = Some(vec!["test_table".to_string()]);
6993 namespace
6994 .create_table(create_req, bytes::Bytes::from(ipc_data))
6995 .await
6996 .unwrap();
6997
6998 let mut deregister_req = DeregisterTableRequest::new();
7000 deregister_req.id = Some(vec!["test_table".to_string()]);
7001 namespace
7002 .deregister_table(deregister_req.clone())
7003 .await
7004 .unwrap();
7005
7006 let result = namespace.deregister_table(deregister_req).await;
7008 assert!(result.is_err());
7009 assert!(
7010 result
7011 .unwrap_err()
7012 .to_string()
7013 .contains("already deregistered")
7014 );
7015 }
7016
7017 #[tokio::test]
7022 async fn test_list_tables_skips_deregistered_v1() {
7023 use lance_namespace::models::DeregisterTableRequest;
7024
7025 let temp_dir = TempStdDir::default();
7026 let temp_path = temp_dir.to_str().unwrap();
7027
7028 let namespace = DirectoryNamespaceBuilder::new(temp_path)
7029 .manifest_enabled(false)
7030 .dir_listing_enabled(true)
7031 .build()
7032 .await
7033 .unwrap();
7034
7035 let schema = create_test_schema();
7037 let ipc_data = create_test_ipc_data(&schema);
7038
7039 let mut create_req1 = CreateTableRequest::new();
7040 create_req1.id = Some(vec!["table1".to_string()]);
7041 namespace
7042 .create_table(create_req1, bytes::Bytes::from(ipc_data.clone()))
7043 .await
7044 .unwrap();
7045
7046 let mut create_req2 = CreateTableRequest::new();
7047 create_req2.id = Some(vec!["table2".to_string()]);
7048 namespace
7049 .create_table(create_req2, bytes::Bytes::from(ipc_data))
7050 .await
7051 .unwrap();
7052
7053 let mut list_req = ListTablesRequest::new();
7055 list_req.id = Some(vec![]);
7056 let list_response = namespace.list_tables(list_req.clone()).await.unwrap();
7057 assert_eq!(list_response.tables.len(), 2);
7058
7059 let mut deregister_req = DeregisterTableRequest::new();
7061 deregister_req.id = Some(vec!["table1".to_string()]);
7062 namespace.deregister_table(deregister_req).await.unwrap();
7063
7064 let list_response = namespace.list_tables(list_req).await.unwrap();
7066 assert_eq!(list_response.tables.len(), 1);
7067 assert!(list_response.tables.contains(&"table2".to_string()));
7068 assert!(!list_response.tables.contains(&"table1".to_string()));
7069 }
7070
7071 #[tokio::test]
7076 async fn test_describe_table_fails_for_deregistered_v1() {
7077 use lance_namespace::models::{DeregisterTableRequest, DescribeTableRequest};
7078
7079 let temp_dir = TempStdDir::default();
7080 let temp_path = temp_dir.to_str().unwrap();
7081
7082 let namespace = DirectoryNamespaceBuilder::new(temp_path)
7083 .manifest_enabled(false)
7084 .dir_listing_enabled(true)
7085 .build()
7086 .await
7087 .unwrap();
7088
7089 let schema = create_test_schema();
7091 let ipc_data = create_test_ipc_data(&schema);
7092 let mut create_req = CreateTableRequest::new();
7093 create_req.id = Some(vec!["test_table".to_string()]);
7094 namespace
7095 .create_table(create_req, bytes::Bytes::from(ipc_data))
7096 .await
7097 .unwrap();
7098
7099 let mut describe_req = DescribeTableRequest::new();
7101 describe_req.id = Some(vec!["test_table".to_string()]);
7102 assert!(namespace.describe_table(describe_req.clone()).await.is_ok());
7103
7104 let mut deregister_req = DeregisterTableRequest::new();
7106 deregister_req.id = Some(vec!["test_table".to_string()]);
7107 namespace.deregister_table(deregister_req).await.unwrap();
7108
7109 let result = namespace.describe_table(describe_req).await;
7111 assert!(result.is_err());
7112 let err = result.unwrap_err();
7113 assert!(matches!(err, Error::Namespace { .. }));
7114 let err_msg = err.to_string();
7115 assert!(err_msg.contains("deregistered"));
7116 assert!(err_msg.contains("table id 'test_table'"));
7117 }
7118
7119 #[tokio::test]
7120 async fn test_table_exists_fails_for_deregistered_v1() {
7121 use lance_namespace::models::{DeregisterTableRequest, TableExistsRequest};
7122
7123 let temp_dir = TempStdDir::default();
7124 let temp_path = temp_dir.to_str().unwrap();
7125
7126 let namespace = DirectoryNamespaceBuilder::new(temp_path)
7127 .manifest_enabled(false)
7128 .dir_listing_enabled(true)
7129 .build()
7130 .await
7131 .unwrap();
7132
7133 let schema = create_test_schema();
7135 let ipc_data = create_test_ipc_data(&schema);
7136 let mut create_req = CreateTableRequest::new();
7137 create_req.id = Some(vec!["test_table".to_string()]);
7138 namespace
7139 .create_table(create_req, bytes::Bytes::from(ipc_data))
7140 .await
7141 .unwrap();
7142
7143 let mut exists_req = TableExistsRequest::new();
7145 exists_req.id = Some(vec!["test_table".to_string()]);
7146 assert!(namespace.table_exists(exists_req.clone()).await.is_ok());
7147
7148 let mut deregister_req = DeregisterTableRequest::new();
7150 deregister_req.id = Some(vec!["test_table".to_string()]);
7151 namespace.deregister_table(deregister_req).await.unwrap();
7152
7153 let result = namespace.table_exists(exists_req).await;
7155 assert!(result.is_err());
7156 let err = result.unwrap_err();
7157 assert!(matches!(err, Error::Namespace { .. }));
7158 let err_msg = err.to_string();
7159 assert!(err_msg.contains("deregistered"));
7160 assert!(err_msg.contains("table id 'test_table'"));
7161 }
7162
7163 #[tokio::test]
7164 async fn test_atomic_table_status_check() {
7165 let temp_dir = TempStdDir::default();
7169 let temp_path = temp_dir.to_str().unwrap();
7170
7171 let namespace = DirectoryNamespaceBuilder::new(temp_path)
7172 .manifest_enabled(false)
7173 .dir_listing_enabled(true)
7174 .build()
7175 .await
7176 .unwrap();
7177
7178 let schema = create_test_schema();
7180 let ipc_data = create_test_ipc_data(&schema);
7181 let mut create_req = CreateTableRequest::new();
7182 create_req.id = Some(vec!["test_table".to_string()]);
7183 namespace
7184 .create_table(create_req, bytes::Bytes::from(ipc_data))
7185 .await
7186 .unwrap();
7187
7188 let status = namespace.check_table_status("test_table").await;
7190 assert!(status.exists);
7191 assert!(!status.is_deregistered);
7192 assert!(!status.has_reserved_file);
7193 }
7194
7195 #[tokio::test]
7196 async fn test_table_version_tracking_enabled_managed_versioning() {
7197 use lance_namespace::models::DescribeTableRequest;
7198
7199 let temp_dir = TempStdDir::default();
7200 let temp_path = temp_dir.to_str().unwrap();
7201
7202 let namespace = DirectoryNamespaceBuilder::new(temp_path)
7204 .table_version_tracking_enabled(true)
7205 .build()
7206 .await
7207 .unwrap();
7208
7209 let schema = create_test_schema();
7211 let ipc_data = create_test_ipc_data(&schema);
7212 let mut create_req = CreateTableRequest::new();
7213 create_req.id = Some(vec!["test_table".to_string()]);
7214 namespace
7215 .create_table(create_req, bytes::Bytes::from(ipc_data))
7216 .await
7217 .unwrap();
7218
7219 let mut describe_req = DescribeTableRequest::new();
7221 describe_req.id = Some(vec!["test_table".to_string()]);
7222 let describe_resp = namespace.describe_table(describe_req).await.unwrap();
7223
7224 assert_eq!(
7226 describe_resp.managed_versioning,
7227 Some(true),
7228 "managed_versioning should be true when table_version_tracking_enabled=true"
7229 );
7230 }
7231
7232 #[tokio::test]
7233 async fn test_table_version_tracking_disabled_no_managed_versioning() {
7234 use lance_namespace::models::DescribeTableRequest;
7235
7236 let temp_dir = TempStdDir::default();
7237 let temp_path = temp_dir.to_str().unwrap();
7238
7239 let namespace = DirectoryNamespaceBuilder::new(temp_path)
7241 .table_version_tracking_enabled(false)
7242 .build()
7243 .await
7244 .unwrap();
7245
7246 let schema = create_test_schema();
7248 let ipc_data = create_test_ipc_data(&schema);
7249 let mut create_req = CreateTableRequest::new();
7250 create_req.id = Some(vec!["test_table".to_string()]);
7251 namespace
7252 .create_table(create_req, bytes::Bytes::from(ipc_data))
7253 .await
7254 .unwrap();
7255
7256 let mut describe_req = DescribeTableRequest::new();
7258 describe_req.id = Some(vec!["test_table".to_string()]);
7259 let describe_resp = namespace.describe_table(describe_req).await.unwrap();
7260
7261 assert!(
7263 describe_resp.managed_versioning.is_none(),
7264 "managed_versioning should be None when table_version_tracking_enabled=false, got: {:?}",
7265 describe_resp.managed_versioning
7266 );
7267 }
7268
7269 #[tokio::test]
7270 async fn test_list_table_versions() {
7271 use arrow::array::{Int32Array, RecordBatchIterator};
7272 use arrow::datatypes::{DataType, Field, Schema as ArrowSchema};
7273 use arrow::record_batch::RecordBatch;
7274 use lance::dataset::{Dataset, WriteMode, WriteParams};
7275 use lance_namespace::models::{CreateNamespaceRequest, ListTableVersionsRequest};
7276
7277 let temp_dir = TempStrDir::default();
7278 let temp_path: &str = &temp_dir;
7279
7280 let namespace: Arc<dyn LanceNamespace> = Arc::new(
7281 DirectoryNamespaceBuilder::new(temp_path)
7282 .table_version_tracking_enabled(true)
7283 .build()
7284 .await
7285 .unwrap(),
7286 );
7287
7288 let mut create_ns_req = CreateNamespaceRequest::new();
7290 create_ns_req.id = Some(vec!["workspace".to_string()]);
7291 namespace.create_namespace(create_ns_req).await.unwrap();
7292
7293 let table_id = vec!["workspace".to_string(), "test_table".to_string()];
7295 let arrow_schema = Arc::new(ArrowSchema::new(vec![Field::new(
7296 "id",
7297 DataType::Int32,
7298 false,
7299 )]));
7300 let batch = RecordBatch::try_new(
7301 arrow_schema.clone(),
7302 vec![Arc::new(Int32Array::from(vec![1, 2, 3]))],
7303 )
7304 .unwrap();
7305 let batches = RecordBatchIterator::new(vec![Ok(batch.clone())], arrow_schema.clone());
7306 let write_params = WriteParams {
7307 mode: WriteMode::Create,
7308 ..Default::default()
7309 };
7310 let mut dataset = Dataset::write_into_namespace(
7311 batches,
7312 namespace.clone(),
7313 table_id.clone(),
7314 Some(write_params),
7315 )
7316 .await
7317 .unwrap();
7318
7319 let batch2 = RecordBatch::try_new(
7321 arrow_schema.clone(),
7322 vec![Arc::new(Int32Array::from(vec![100, 200]))],
7323 )
7324 .unwrap();
7325 let batches = RecordBatchIterator::new(vec![Ok(batch2)], arrow_schema.clone());
7326 dataset.append(batches, None).await.unwrap();
7327
7328 let batch3 = RecordBatch::try_new(
7330 arrow_schema.clone(),
7331 vec![Arc::new(Int32Array::from(vec![300, 400]))],
7332 )
7333 .unwrap();
7334 let batches = RecordBatchIterator::new(vec![Ok(batch3)], arrow_schema);
7335 dataset.append(batches, None).await.unwrap();
7336
7337 let mut list_req = ListTableVersionsRequest::new();
7339 list_req.id = Some(table_id.clone());
7340 let list_resp = namespace.list_table_versions(list_req).await.unwrap();
7341
7342 assert_eq!(
7343 list_resp.versions.len(),
7344 3,
7345 "Should have 3 versions, got: {:?}",
7346 list_resp.versions
7347 );
7348
7349 for expected_version in 1..=3 {
7351 let version = list_resp
7352 .versions
7353 .iter()
7354 .find(|v| v.version == expected_version)
7355 .unwrap_or_else(|| panic!("Expected version {}", expected_version));
7356
7357 assert!(
7358 !version.manifest_path.is_empty(),
7359 "manifest_path should be set for version {}",
7360 expected_version
7361 );
7362 assert!(
7363 version.manifest_path.contains(".manifest"),
7364 "manifest_path should contain .manifest for version {}",
7365 expected_version
7366 );
7367 assert!(
7368 version.manifest_size.is_some(),
7369 "manifest_size should be set for version {}",
7370 expected_version
7371 );
7372 assert!(
7373 version.manifest_size.unwrap() > 0,
7374 "manifest_size should be > 0 for version {}",
7375 expected_version
7376 );
7377 assert!(
7378 version.timestamp_millis.is_some(),
7379 "timestamp_millis should be set for version {}",
7380 expected_version
7381 );
7382 }
7383 }
7384
7385 #[tokio::test]
7386 async fn test_describe_table_version() {
7387 use arrow::array::{Int32Array, RecordBatchIterator};
7388 use arrow::datatypes::{DataType, Field, Schema as ArrowSchema};
7389 use arrow::record_batch::RecordBatch;
7390 use lance::dataset::{Dataset, WriteMode, WriteParams};
7391 use lance_namespace::models::{CreateNamespaceRequest, DescribeTableVersionRequest};
7392
7393 let temp_dir = TempStrDir::default();
7394 let temp_path: &str = &temp_dir;
7395
7396 let namespace: Arc<dyn LanceNamespace> = Arc::new(
7397 DirectoryNamespaceBuilder::new(temp_path)
7398 .table_version_tracking_enabled(true)
7399 .build()
7400 .await
7401 .unwrap(),
7402 );
7403
7404 let mut create_ns_req = CreateNamespaceRequest::new();
7406 create_ns_req.id = Some(vec!["workspace".to_string()]);
7407 namespace.create_namespace(create_ns_req).await.unwrap();
7408
7409 let table_id = vec!["workspace".to_string(), "test_table".to_string()];
7411 let arrow_schema = Arc::new(ArrowSchema::new(vec![Field::new(
7412 "id",
7413 DataType::Int32,
7414 false,
7415 )]));
7416 let batch = RecordBatch::try_new(
7417 arrow_schema.clone(),
7418 vec![Arc::new(Int32Array::from(vec![1, 2, 3]))],
7419 )
7420 .unwrap();
7421 let batches = RecordBatchIterator::new(vec![Ok(batch)], arrow_schema.clone());
7422 let write_params = WriteParams {
7423 mode: WriteMode::Create,
7424 ..Default::default()
7425 };
7426 let mut dataset = Dataset::write_into_namespace(
7427 batches,
7428 namespace.clone(),
7429 table_id.clone(),
7430 Some(write_params),
7431 )
7432 .await
7433 .unwrap();
7434
7435 let batch2 = RecordBatch::try_new(
7437 arrow_schema.clone(),
7438 vec![Arc::new(Int32Array::from(vec![100, 200]))],
7439 )
7440 .unwrap();
7441 let batches = RecordBatchIterator::new(vec![Ok(batch2)], arrow_schema);
7442 dataset.append(batches, None).await.unwrap();
7443
7444 let mut describe_req = DescribeTableVersionRequest::new();
7446 describe_req.id = Some(table_id.clone());
7447 describe_req.version = Some(1);
7448 let describe_resp = namespace
7449 .describe_table_version(describe_req)
7450 .await
7451 .unwrap();
7452
7453 let version = &describe_resp.version;
7454 assert_eq!(version.version, 1);
7455 assert!(version.timestamp_millis.is_some());
7456 assert!(
7457 !version.manifest_path.is_empty(),
7458 "manifest_path should be set"
7459 );
7460 assert!(
7461 version.manifest_path.contains(".manifest"),
7462 "manifest_path should contain .manifest"
7463 );
7464 assert!(
7465 version.manifest_size.is_some(),
7466 "manifest_size should be set"
7467 );
7468 assert!(
7469 version.manifest_size.unwrap() > 0,
7470 "manifest_size should be > 0"
7471 );
7472
7473 let mut describe_req = DescribeTableVersionRequest::new();
7475 describe_req.id = Some(table_id.clone());
7476 describe_req.version = Some(2);
7477 let describe_resp = namespace
7478 .describe_table_version(describe_req)
7479 .await
7480 .unwrap();
7481
7482 let version = &describe_resp.version;
7483 assert_eq!(version.version, 2);
7484 assert!(version.timestamp_millis.is_some());
7485 assert!(
7486 !version.manifest_path.is_empty(),
7487 "manifest_path should be set"
7488 );
7489 assert!(
7490 version.manifest_size.is_some(),
7491 "manifest_size should be set"
7492 );
7493 assert!(
7494 version.manifest_size.unwrap() > 0,
7495 "manifest_size should be > 0"
7496 );
7497 }
7498
7499 #[tokio::test]
7500 async fn test_describe_table_version_latest() {
7501 use arrow::array::{Int32Array, RecordBatchIterator};
7502 use arrow::datatypes::{DataType, Field, Schema as ArrowSchema};
7503 use arrow::record_batch::RecordBatch;
7504 use lance::dataset::{Dataset, WriteMode, WriteParams};
7505 use lance_namespace::models::{CreateNamespaceRequest, DescribeTableVersionRequest};
7506
7507 let temp_dir = TempStrDir::default();
7508 let temp_path: &str = &temp_dir;
7509
7510 let namespace: Arc<dyn LanceNamespace> = Arc::new(
7511 DirectoryNamespaceBuilder::new(temp_path)
7512 .table_version_tracking_enabled(true)
7513 .build()
7514 .await
7515 .unwrap(),
7516 );
7517
7518 let mut create_ns_req = CreateNamespaceRequest::new();
7520 create_ns_req.id = Some(vec!["workspace".to_string()]);
7521 namespace.create_namespace(create_ns_req).await.unwrap();
7522
7523 let table_id = vec!["workspace".to_string(), "test_table".to_string()];
7525 let arrow_schema = Arc::new(ArrowSchema::new(vec![Field::new(
7526 "id",
7527 DataType::Int32,
7528 false,
7529 )]));
7530 let batch = RecordBatch::try_new(
7531 arrow_schema.clone(),
7532 vec![Arc::new(Int32Array::from(vec![1, 2, 3]))],
7533 )
7534 .unwrap();
7535 let batches = RecordBatchIterator::new(vec![Ok(batch)], arrow_schema.clone());
7536 let write_params = WriteParams {
7537 mode: WriteMode::Create,
7538 ..Default::default()
7539 };
7540 let mut dataset = Dataset::write_into_namespace(
7541 batches,
7542 namespace.clone(),
7543 table_id.clone(),
7544 Some(write_params),
7545 )
7546 .await
7547 .unwrap();
7548
7549 let batch2 = RecordBatch::try_new(
7551 arrow_schema.clone(),
7552 vec![Arc::new(Int32Array::from(vec![100, 200]))],
7553 )
7554 .unwrap();
7555 let batches = RecordBatchIterator::new(vec![Ok(batch2)], arrow_schema.clone());
7556 dataset.append(batches, None).await.unwrap();
7557
7558 let batch3 = RecordBatch::try_new(
7560 arrow_schema.clone(),
7561 vec![Arc::new(Int32Array::from(vec![300, 400]))],
7562 )
7563 .unwrap();
7564 let batches = RecordBatchIterator::new(vec![Ok(batch3)], arrow_schema);
7565 dataset.append(batches, None).await.unwrap();
7566
7567 let mut describe_req = DescribeTableVersionRequest::new();
7569 describe_req.id = Some(table_id.clone());
7570 describe_req.version = None;
7571 let describe_resp = namespace
7572 .describe_table_version(describe_req)
7573 .await
7574 .unwrap();
7575
7576 assert_eq!(describe_resp.version.version, 3);
7578 }
7579
7580 #[tokio::test]
7581 async fn test_create_table_version() {
7582 use futures::TryStreamExt;
7583 use lance::dataset::builder::DatasetBuilder;
7584 use lance_namespace::models::CreateTableVersionRequest;
7585
7586 let temp_dir = TempStrDir::default();
7587 let temp_path: &str = &temp_dir;
7588
7589 let namespace: Arc<dyn LanceNamespace> = Arc::new(
7590 DirectoryNamespaceBuilder::new(temp_path)
7591 .table_version_tracking_enabled(true)
7592 .build()
7593 .await
7594 .unwrap(),
7595 );
7596
7597 let schema = create_test_schema();
7599 let ipc_data = create_test_ipc_data(&schema);
7600 let mut create_req = CreateTableRequest::new();
7601 create_req.id = Some(vec!["test_table".to_string()]);
7602 namespace
7603 .create_table(create_req, bytes::Bytes::from(ipc_data))
7604 .await
7605 .unwrap();
7606
7607 let table_id = vec!["test_table".to_string()];
7609 let dataset = DatasetBuilder::from_namespace(namespace.clone(), table_id.clone())
7610 .await
7611 .unwrap()
7612 .load()
7613 .await
7614 .unwrap();
7615
7616 let versions_path = dataset.versions_dir();
7618 let manifest_metas: Vec<_> = dataset
7619 .object_store(None)
7620 .await
7621 .unwrap()
7622 .inner
7623 .list(Some(&versions_path))
7624 .try_collect()
7625 .await
7626 .unwrap();
7627
7628 let manifest_meta = manifest_metas
7629 .iter()
7630 .find(|m| {
7631 m.location
7632 .filename()
7633 .map(|f| f.ends_with(".manifest"))
7634 .unwrap_or(false)
7635 })
7636 .expect("No manifest file found");
7637
7638 let manifest_data = dataset
7640 .object_store(None)
7641 .await
7642 .unwrap()
7643 .inner
7644 .get(&manifest_meta.location)
7645 .await
7646 .unwrap()
7647 .bytes()
7648 .await
7649 .unwrap();
7650
7651 let staging_path = dataset.versions_dir().join("staging_manifest");
7653 dataset
7654 .object_store(None)
7655 .await
7656 .unwrap()
7657 .inner
7658 .put(&staging_path, manifest_data.into())
7659 .await
7660 .unwrap();
7661
7662 let mut create_version_req = CreateTableVersionRequest::new(2, staging_path.to_string());
7665 create_version_req.id = Some(table_id.clone());
7666 create_version_req.naming_scheme = Some("V2".to_string());
7667
7668 let result = namespace.create_table_version(create_version_req).await;
7669 assert!(
7670 result.is_ok(),
7671 "create_table_version should succeed: {:?}",
7672 result
7673 );
7674
7675 let response = result.unwrap();
7677 let version_info = response
7678 .version
7679 .expect("response should contain version info");
7680 let version_2_path = Path::parse(&version_info.manifest_path).unwrap();
7681 let head_result = dataset
7682 .object_store(None)
7683 .await
7684 .unwrap()
7685 .inner
7686 .head(&version_2_path)
7687 .await;
7688 assert!(
7689 head_result.is_ok(),
7690 "Version 2 manifest should exist at {}",
7691 version_2_path
7692 );
7693
7694 let staging_head_result = dataset
7696 .object_store(None)
7697 .await
7698 .unwrap()
7699 .inner
7700 .head(&staging_path)
7701 .await;
7702 assert!(
7703 staging_head_result.is_err(),
7704 "Staging manifest should have been deleted after create_table_version"
7705 );
7706 }
7707
7708 #[tokio::test]
7709 async fn test_create_table_version_conflict() {
7710 use futures::TryStreamExt;
7713 use lance::dataset::builder::DatasetBuilder;
7714 use lance_namespace::models::CreateTableVersionRequest;
7715
7716 let temp_dir = TempStrDir::default();
7717 let temp_path: &str = &temp_dir;
7718
7719 let namespace: Arc<dyn LanceNamespace> = Arc::new(
7720 DirectoryNamespaceBuilder::new(temp_path)
7721 .table_version_tracking_enabled(true)
7722 .build()
7723 .await
7724 .unwrap(),
7725 );
7726
7727 let schema = create_test_schema();
7729 let ipc_data = create_test_ipc_data(&schema);
7730 let mut create_req = CreateTableRequest::new();
7731 create_req.id = Some(vec!["test_table".to_string()]);
7732 namespace
7733 .create_table(create_req, bytes::Bytes::from(ipc_data))
7734 .await
7735 .unwrap();
7736
7737 let table_id = vec!["test_table".to_string()];
7739 let dataset = DatasetBuilder::from_namespace(namespace.clone(), table_id.clone())
7740 .await
7741 .unwrap()
7742 .load()
7743 .await
7744 .unwrap();
7745
7746 let versions_path = dataset.versions_dir();
7748 let manifest_metas: Vec<_> = dataset
7749 .object_store(None)
7750 .await
7751 .unwrap()
7752 .inner
7753 .list(Some(&versions_path))
7754 .try_collect()
7755 .await
7756 .unwrap();
7757
7758 let manifest_meta = manifest_metas
7759 .iter()
7760 .find(|m| {
7761 m.location
7762 .filename()
7763 .map(|f| f.ends_with(".manifest"))
7764 .unwrap_or(false)
7765 })
7766 .expect("No manifest file found");
7767
7768 let manifest_data = dataset
7770 .object_store(None)
7771 .await
7772 .unwrap()
7773 .inner
7774 .get(&manifest_meta.location)
7775 .await
7776 .unwrap()
7777 .bytes()
7778 .await
7779 .unwrap();
7780
7781 let staging_path = dataset.versions_dir().join("staging_manifest");
7783 dataset
7784 .object_store(None)
7785 .await
7786 .unwrap()
7787 .inner
7788 .put(&staging_path, manifest_data.into())
7789 .await
7790 .unwrap();
7791
7792 let mut create_version_req = CreateTableVersionRequest::new(2, staging_path.to_string());
7794 create_version_req.id = Some(table_id.clone());
7795 create_version_req.naming_scheme = Some("V2".to_string());
7796 let first_result = namespace.create_table_version(create_version_req).await;
7797 assert!(
7798 first_result.is_ok(),
7799 "First create_table_version for version 2 should succeed: {:?}",
7800 first_result
7801 );
7802
7803 let version_2_path = Path::parse(
7805 &first_result
7806 .unwrap()
7807 .version
7808 .expect("response should contain version info")
7809 .manifest_path,
7810 )
7811 .unwrap();
7812
7813 let mut create_version_req = CreateTableVersionRequest::new(2, staging_path.to_string());
7815 create_version_req.id = Some(table_id.clone());
7816 create_version_req.naming_scheme = Some("V2".to_string());
7817
7818 let result = namespace.create_table_version(create_version_req).await;
7819 assert!(
7820 result.is_err(),
7821 "create_table_version should fail for existing version"
7822 );
7823
7824 let head_result = dataset
7826 .object_store(None)
7827 .await
7828 .unwrap()
7829 .inner
7830 .head(&version_2_path)
7831 .await;
7832 assert!(
7833 head_result.is_ok(),
7834 "Version 2 manifest should still exist at {}",
7835 version_2_path
7836 );
7837 }
7838
7839 #[tokio::test]
7840 async fn test_create_table_version_table_not_found() {
7841 use lance_namespace::models::CreateTableVersionRequest;
7842
7843 let temp_dir = TempStdDir::default();
7844 let temp_path = temp_dir.to_str().unwrap();
7845
7846 let namespace = DirectoryNamespaceBuilder::new(temp_path)
7847 .table_version_tracking_enabled(true)
7848 .build()
7849 .await
7850 .unwrap();
7851
7852 let mut create_version_req =
7854 CreateTableVersionRequest::new(1, "/some/staging/path".to_string());
7855 create_version_req.id = Some(vec!["non_existent_table".to_string()]);
7856
7857 let result = namespace.create_table_version(create_version_req).await;
7858 assert!(
7859 result.is_err(),
7860 "create_table_version should fail for non-existent table"
7861 );
7862 let err_msg = result.unwrap_err().to_string();
7863 assert!(
7864 err_msg.contains("Table not found"),
7865 "Error should mention table not found, got: {}",
7866 err_msg
7867 );
7868 }
7869
7870 mod e2e_table_version_tracking {
7872 use super::*;
7873 use std::sync::atomic::{AtomicUsize, Ordering};
7874
7875 struct TrackingNamespace {
7877 inner: DirectoryNamespace,
7878 create_table_version_count: AtomicUsize,
7879 describe_table_version_count: AtomicUsize,
7880 list_table_versions_count: AtomicUsize,
7881 }
7882
7883 impl TrackingNamespace {
7884 fn new(inner: DirectoryNamespace) -> Self {
7885 Self {
7886 inner,
7887 create_table_version_count: AtomicUsize::new(0),
7888 describe_table_version_count: AtomicUsize::new(0),
7889 list_table_versions_count: AtomicUsize::new(0),
7890 }
7891 }
7892
7893 fn create_table_version_calls(&self) -> usize {
7894 self.create_table_version_count.load(Ordering::SeqCst)
7895 }
7896
7897 fn describe_table_version_calls(&self) -> usize {
7898 self.describe_table_version_count.load(Ordering::SeqCst)
7899 }
7900
7901 fn list_table_versions_calls(&self) -> usize {
7902 self.list_table_versions_count.load(Ordering::SeqCst)
7903 }
7904 }
7905
7906 impl std::fmt::Debug for TrackingNamespace {
7907 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
7908 f.debug_struct("TrackingNamespace")
7909 .field(
7910 "create_table_version_calls",
7911 &self.create_table_version_calls(),
7912 )
7913 .finish()
7914 }
7915 }
7916
7917 #[async_trait]
7918 impl LanceNamespace for TrackingNamespace {
7919 async fn create_namespace(
7920 &self,
7921 request: CreateNamespaceRequest,
7922 ) -> Result<CreateNamespaceResponse> {
7923 self.inner.create_namespace(request).await
7924 }
7925
7926 async fn describe_namespace(
7927 &self,
7928 request: DescribeNamespaceRequest,
7929 ) -> Result<DescribeNamespaceResponse> {
7930 self.inner.describe_namespace(request).await
7931 }
7932
7933 async fn namespace_exists(&self, request: NamespaceExistsRequest) -> Result<()> {
7934 self.inner.namespace_exists(request).await
7935 }
7936
7937 async fn list_namespaces(
7938 &self,
7939 request: ListNamespacesRequest,
7940 ) -> Result<ListNamespacesResponse> {
7941 self.inner.list_namespaces(request).await
7942 }
7943
7944 async fn drop_namespace(
7945 &self,
7946 request: DropNamespaceRequest,
7947 ) -> Result<DropNamespaceResponse> {
7948 self.inner.drop_namespace(request).await
7949 }
7950
7951 async fn list_tables(&self, request: ListTablesRequest) -> Result<ListTablesResponse> {
7952 self.inner.list_tables(request).await
7953 }
7954
7955 async fn describe_table(
7956 &self,
7957 request: DescribeTableRequest,
7958 ) -> Result<DescribeTableResponse> {
7959 self.inner.describe_table(request).await
7960 }
7961
7962 async fn table_exists(&self, request: TableExistsRequest) -> Result<()> {
7963 self.inner.table_exists(request).await
7964 }
7965
7966 async fn drop_table(&self, request: DropTableRequest) -> Result<DropTableResponse> {
7967 self.inner.drop_table(request).await
7968 }
7969
7970 async fn create_table(
7971 &self,
7972 request: CreateTableRequest,
7973 request_data: Bytes,
7974 ) -> Result<CreateTableResponse> {
7975 self.inner.create_table(request, request_data).await
7976 }
7977
7978 async fn declare_table(
7979 &self,
7980 request: DeclareTableRequest,
7981 ) -> Result<DeclareTableResponse> {
7982 self.inner.declare_table(request).await
7983 }
7984
7985 async fn list_table_versions(
7986 &self,
7987 request: ListTableVersionsRequest,
7988 ) -> Result<ListTableVersionsResponse> {
7989 self.list_table_versions_count
7990 .fetch_add(1, Ordering::SeqCst);
7991 self.inner.list_table_versions(request).await
7992 }
7993
7994 async fn create_table_version(
7995 &self,
7996 request: CreateTableVersionRequest,
7997 ) -> Result<CreateTableVersionResponse> {
7998 self.create_table_version_count
7999 .fetch_add(1, Ordering::SeqCst);
8000 self.inner.create_table_version(request).await
8001 }
8002
8003 async fn describe_table_version(
8004 &self,
8005 request: DescribeTableVersionRequest,
8006 ) -> Result<DescribeTableVersionResponse> {
8007 self.describe_table_version_count
8008 .fetch_add(1, Ordering::SeqCst);
8009 self.inner.describe_table_version(request).await
8010 }
8011
8012 async fn batch_delete_table_versions(
8013 &self,
8014 request: BatchDeleteTableVersionsRequest,
8015 ) -> Result<BatchDeleteTableVersionsResponse> {
8016 self.inner.batch_delete_table_versions(request).await
8017 }
8018
8019 fn namespace_id(&self) -> String {
8020 self.inner.namespace_id()
8021 }
8022 }
8023
8024 #[tokio::test]
8025 async fn test_describe_table_returns_managed_versioning() {
8026 use lance_namespace::models::{CreateNamespaceRequest, DescribeTableRequest};
8027
8028 let temp_dir = TempStdDir::default();
8029 let temp_path = temp_dir.to_str().unwrap();
8030
8031 let ns = DirectoryNamespaceBuilder::new(temp_path)
8033 .table_version_tracking_enabled(true)
8034 .manifest_enabled(true)
8035 .build()
8036 .await
8037 .unwrap();
8038
8039 let mut create_ns_req = CreateNamespaceRequest::new();
8041 create_ns_req.id = Some(vec!["workspace".to_string()]);
8042 ns.create_namespace(create_ns_req).await.unwrap();
8043
8044 let schema = create_test_schema();
8046 let ipc_data = create_test_ipc_data(&schema);
8047 let mut create_req = CreateTableRequest::new();
8048 create_req.id = Some(vec!["workspace".to_string(), "test_table".to_string()]);
8049 ns.create_table(create_req, bytes::Bytes::from(ipc_data))
8050 .await
8051 .unwrap();
8052
8053 let mut describe_req = DescribeTableRequest::new();
8055 describe_req.id = Some(vec!["workspace".to_string(), "test_table".to_string()]);
8056 let describe_resp = ns.describe_table(describe_req).await.unwrap();
8057
8058 assert_eq!(
8060 describe_resp.managed_versioning,
8061 Some(true),
8062 "managed_versioning should be true when table_version_tracking_enabled=true"
8063 );
8064 }
8065
8066 #[tokio::test]
8067 async fn test_external_manifest_store_invokes_namespace_apis() {
8068 use arrow::array::{Int32Array, StringArray};
8069 use arrow::datatypes::{DataType, Field, Schema as ArrowSchema};
8070 use arrow::record_batch::RecordBatch;
8071 use lance::Dataset;
8072 use lance::dataset::builder::DatasetBuilder;
8073 use lance::dataset::{WriteMode, WriteParams};
8074 use lance_namespace::models::CreateNamespaceRequest;
8075
8076 let temp_dir = TempStdDir::default();
8077 let temp_path = temp_dir.to_str().unwrap();
8078
8079 let inner_ns = DirectoryNamespaceBuilder::new(temp_path)
8081 .table_version_tracking_enabled(true)
8082 .manifest_enabled(true)
8083 .build()
8084 .await
8085 .unwrap();
8086
8087 let tracking_ns = Arc::new(TrackingNamespace::new(inner_ns));
8088 let ns: Arc<dyn LanceNamespace> = tracking_ns.clone();
8089
8090 let mut create_ns_req = CreateNamespaceRequest::new();
8092 create_ns_req.id = Some(vec!["workspace".to_string()]);
8093 ns.create_namespace(create_ns_req).await.unwrap();
8094
8095 let table_id = vec!["workspace".to_string(), "test_table".to_string()];
8097
8098 let arrow_schema = Arc::new(ArrowSchema::new(vec![
8100 Field::new("id", DataType::Int32, false),
8101 Field::new("name", DataType::Utf8, true),
8102 ]));
8103 let batch = RecordBatch::try_new(
8104 arrow_schema.clone(),
8105 vec![
8106 Arc::new(Int32Array::from(vec![1, 2, 3])),
8107 Arc::new(StringArray::from(vec!["a", "b", "c"])),
8108 ],
8109 )
8110 .unwrap();
8111
8112 let batches = RecordBatchIterator::new(vec![Ok(batch.clone())], arrow_schema.clone());
8114 let write_params = WriteParams {
8115 mode: WriteMode::Create,
8116 ..Default::default()
8117 };
8118 let mut dataset = Dataset::write_into_namespace(
8119 batches,
8120 ns.clone(),
8121 table_id.clone(),
8122 Some(write_params),
8123 )
8124 .await
8125 .unwrap();
8126 assert_eq!(dataset.version().version, 1);
8127
8128 assert_eq!(
8130 tracking_ns.create_table_version_calls(),
8131 1,
8132 "create_table_version should have been called once during initial write_into_namespace"
8133 );
8134
8135 let append_batch = RecordBatch::try_new(
8137 arrow_schema.clone(),
8138 vec![
8139 Arc::new(Int32Array::from(vec![4, 5, 6])),
8140 Arc::new(StringArray::from(vec!["d", "e", "f"])),
8141 ],
8142 )
8143 .unwrap();
8144 let append_batches = RecordBatchIterator::new(vec![Ok(append_batch)], arrow_schema);
8145 dataset.append(append_batches, None).await.unwrap();
8146
8147 assert_eq!(
8148 tracking_ns.create_table_version_calls(),
8149 2,
8150 "create_table_version should have been called twice (once for create, once for append)"
8151 );
8152
8153 let initial_list_calls = tracking_ns.list_table_versions_calls();
8155 let latest_dataset = DatasetBuilder::from_namespace(ns.clone(), table_id.clone())
8156 .await
8157 .unwrap()
8158 .load()
8159 .await
8160 .unwrap();
8161 assert_eq!(latest_dataset.version().version, 2);
8162 assert_eq!(
8163 tracking_ns.list_table_versions_calls(),
8164 initial_list_calls + 1,
8165 "list_table_versions should have been called exactly once during checkout_latest"
8166 );
8167
8168 let initial_describe_calls = tracking_ns.describe_table_version_calls();
8170 let v1_dataset = DatasetBuilder::from_namespace(ns.clone(), table_id.clone())
8171 .await
8172 .unwrap()
8173 .with_version(1)
8174 .load()
8175 .await
8176 .unwrap();
8177 assert_eq!(v1_dataset.version().version, 1);
8178 assert_eq!(
8179 tracking_ns.describe_table_version_calls(),
8180 initial_describe_calls + 1,
8181 "describe_table_version should have been called exactly once during checkout to version 1"
8182 );
8183 }
8184
8185 #[tokio::test]
8186 async fn test_dataset_commit_with_external_manifest_store() {
8187 use arrow::array::{Int32Array, StringArray};
8188 use arrow::datatypes::{DataType, Field, Schema as ArrowSchema};
8189 use arrow::record_batch::RecordBatch;
8190 use futures::TryStreamExt;
8191 use lance::dataset::{Dataset, WriteMode, WriteParams};
8192 use lance_namespace::models::CreateNamespaceRequest;
8193 use lance_table::io::commit::ManifestNamingScheme;
8194
8195 let temp_dir = TempStdDir::default();
8196 let temp_path = temp_dir.to_str().unwrap();
8197
8198 let inner_ns = DirectoryNamespaceBuilder::new(temp_path)
8200 .table_version_tracking_enabled(true)
8201 .manifest_enabled(true)
8202 .build()
8203 .await
8204 .unwrap();
8205
8206 let tracking_ns: Arc<dyn LanceNamespace> = Arc::new(TrackingNamespace::new(inner_ns));
8207
8208 let mut create_ns_req = CreateNamespaceRequest::new();
8210 create_ns_req.id = Some(vec!["workspace".to_string()]);
8211 tracking_ns.create_namespace(create_ns_req).await.unwrap();
8212
8213 let table_id = vec!["workspace".to_string(), "test_table".to_string()];
8215 let arrow_schema = Arc::new(ArrowSchema::new(vec![
8216 Field::new("id", DataType::Int32, false),
8217 Field::new("name", DataType::Utf8, true),
8218 ]));
8219 let batch = RecordBatch::try_new(
8220 arrow_schema.clone(),
8221 vec![
8222 Arc::new(Int32Array::from(vec![1, 2, 3])),
8223 Arc::new(StringArray::from(vec!["a", "b", "c"])),
8224 ],
8225 )
8226 .unwrap();
8227 let batches = RecordBatchIterator::new(vec![Ok(batch)], arrow_schema.clone());
8228 let write_params = WriteParams {
8229 mode: WriteMode::Create,
8230 ..Default::default()
8231 };
8232 let dataset = Dataset::write_into_namespace(
8233 batches,
8234 tracking_ns.clone(),
8235 table_id.clone(),
8236 Some(write_params),
8237 )
8238 .await
8239 .unwrap();
8240 assert_eq!(dataset.version().version, 1);
8241
8242 let batch2 = RecordBatch::try_new(
8244 arrow_schema.clone(),
8245 vec![
8246 Arc::new(Int32Array::from(vec![4, 5, 6])),
8247 Arc::new(StringArray::from(vec!["d", "e", "f"])),
8248 ],
8249 )
8250 .unwrap();
8251 let batches = RecordBatchIterator::new(vec![Ok(batch2)], arrow_schema);
8252 let write_params = WriteParams {
8253 mode: WriteMode::Append,
8254 ..Default::default()
8255 };
8256 Dataset::write_into_namespace(
8257 batches,
8258 tracking_ns.clone(),
8259 table_id.clone(),
8260 Some(write_params),
8261 )
8262 .await
8263 .unwrap();
8264
8265 let manifest_metas: Vec<_> = dataset
8268 .object_store(None)
8269 .await
8270 .unwrap()
8271 .inner
8272 .list(Some(&dataset.versions_dir()))
8273 .try_collect()
8274 .await
8275 .unwrap();
8276 let version_2_found = manifest_metas.iter().any(|m| {
8277 m.location
8278 .filename()
8279 .map(|f| {
8280 f.ends_with(".manifest")
8281 && ManifestNamingScheme::V2.parse_version(f) == Some(2)
8282 })
8283 .unwrap_or(false)
8284 });
8285 assert!(
8286 version_2_found,
8287 "Version 2 manifest should exist in versions directory"
8288 );
8289 }
8290
8291 async fn create_ns_with_table() -> (DirectoryNamespace, TempStdDir, Vec<String>) {
8293 use arrow::array::{Int32Array, StringArray};
8294 use arrow::ipc::writer::StreamWriter;
8295
8296 let (namespace, temp_dir) = create_test_namespace().await;
8297
8298 let schema = create_test_schema();
8299 let arrow_schema = convert_json_arrow_schema(&schema).unwrap();
8300 let arrow_schema = Arc::new(arrow_schema);
8301
8302 let id_array = Int32Array::from(vec![1, 2, 3]);
8303 let name_array = StringArray::from(vec!["Alice", "Bob", "Charlie"]);
8304 let batch = arrow::record_batch::RecordBatch::try_new(
8305 arrow_schema.clone(),
8306 vec![Arc::new(id_array), Arc::new(name_array)],
8307 )
8308 .unwrap();
8309
8310 let mut buffer = Vec::new();
8311 {
8312 let mut writer = StreamWriter::try_new(&mut buffer, &arrow_schema).unwrap();
8313 writer.write(&batch).unwrap();
8314 writer.finish().unwrap();
8315 }
8316
8317 let mut request = CreateTableRequest::new();
8318 let table_id = vec!["test_ops_table".to_string()];
8319 request.id = Some(table_id.clone());
8320
8321 namespace
8322 .create_table(request, Bytes::from(buffer))
8323 .await
8324 .unwrap();
8325
8326 (namespace, temp_dir, table_id)
8327 }
8328
8329 #[tokio::test]
8330 async fn test_count_table_rows_basic() {
8331 let (namespace, _temp_dir, table_id) = create_ns_with_table().await;
8332
8333 let request = CountTableRowsRequest {
8334 id: Some(table_id),
8335 version: None,
8336 predicate: None,
8337 ..Default::default()
8338 };
8339
8340 let count = namespace.count_table_rows(request).await.unwrap();
8341 assert_eq!(count, 3);
8342 }
8343
8344 #[tokio::test]
8345 async fn test_count_table_rows_with_predicate() {
8346 let (namespace, _temp_dir, table_id) = create_ns_with_table().await;
8347
8348 let request = CountTableRowsRequest {
8349 id: Some(table_id),
8350 version: None,
8351 predicate: Some("id > 1".to_string()),
8352 ..Default::default()
8353 };
8354
8355 let count = namespace.count_table_rows(request).await.unwrap();
8356 assert_eq!(count, 2);
8357 }
8358
8359 #[tokio::test]
8360 async fn test_query_table_invalid_distance_type() {
8361 let (namespace, _temp_dir, table_id) = create_ns_with_vector_table().await;
8362
8363 let vector = Box::new(lance_namespace::models::QueryTableRequestVector {
8364 single_vector: Some(vec![1.0, 0.0, 0.0, 0.0]),
8365 multi_vector: None,
8366 });
8367
8368 let request = QueryTableRequest {
8369 id: Some(table_id),
8370 k: 2,
8371 vector,
8372 vector_column: Some("vector".to_string()),
8373 distance_type: Some("invalid_metric".to_string()),
8374 filter: None,
8375 offset: None,
8376 version: None,
8377 ..Default::default()
8378 };
8379
8380 let result = namespace.query_table(request).await;
8381 assert!(result.is_err());
8382 let err_msg = result.unwrap_err().to_string();
8383 assert!(
8384 err_msg.contains("Unknown distance type"),
8385 "Expected error about unknown distance type, got: {}",
8386 err_msg
8387 );
8388 }
8389
8390 #[tokio::test]
8391 async fn test_insert_into_table_append() {
8392 use arrow::array::{Int32Array, StringArray};
8393 use arrow::ipc::writer::StreamWriter;
8394
8395 let (namespace, _temp_dir, table_id) = create_ns_with_table().await;
8396
8397 let schema = create_test_schema();
8399 let arrow_schema = convert_json_arrow_schema(&schema).unwrap();
8400 let arrow_schema = Arc::new(arrow_schema);
8401
8402 let id_array = Int32Array::from(vec![4, 5]);
8403 let name_array = StringArray::from(vec!["Dave", "Eve"]);
8404 let batch = arrow::record_batch::RecordBatch::try_new(
8405 arrow_schema.clone(),
8406 vec![Arc::new(id_array), Arc::new(name_array)],
8407 )
8408 .unwrap();
8409
8410 let mut buffer = Vec::new();
8411 {
8412 let mut writer = StreamWriter::try_new(&mut buffer, &arrow_schema).unwrap();
8413 writer.write(&batch).unwrap();
8414 writer.finish().unwrap();
8415 }
8416
8417 let request = InsertIntoTableRequest {
8418 id: Some(table_id.clone()),
8419 mode: Some("append".to_string()),
8420 ..Default::default()
8421 };
8422
8423 let response = namespace
8424 .insert_into_table(request, Bytes::from(buffer))
8425 .await
8426 .unwrap();
8427 assert!(response.transaction_id.is_none());
8428
8429 let count_req = CountTableRowsRequest {
8431 id: Some(table_id),
8432 version: None,
8433 predicate: None,
8434 ..Default::default()
8435 };
8436 let count = namespace.count_table_rows(count_req).await.unwrap();
8437 assert_eq!(count, 5);
8438 }
8439
8440 #[tokio::test]
8441 async fn test_insert_into_table_overwrite() {
8442 use arrow::array::{Int32Array, StringArray};
8443 use arrow::ipc::writer::StreamWriter;
8444
8445 let (namespace, _temp_dir, table_id) = create_ns_with_table().await;
8446
8447 let schema = create_test_schema();
8448 let arrow_schema = convert_json_arrow_schema(&schema).unwrap();
8449 let arrow_schema = Arc::new(arrow_schema);
8450
8451 let id_array = Int32Array::from(vec![10, 20]);
8452 let name_array = StringArray::from(vec!["X", "Y"]);
8453 let batch = arrow::record_batch::RecordBatch::try_new(
8454 arrow_schema.clone(),
8455 vec![Arc::new(id_array), Arc::new(name_array)],
8456 )
8457 .unwrap();
8458
8459 let mut buffer = Vec::new();
8460 {
8461 let mut writer = StreamWriter::try_new(&mut buffer, &arrow_schema).unwrap();
8462 writer.write(&batch).unwrap();
8463 writer.finish().unwrap();
8464 }
8465
8466 let request = InsertIntoTableRequest {
8467 id: Some(table_id.clone()),
8468 mode: Some("overwrite".to_string()),
8469 ..Default::default()
8470 };
8471
8472 namespace
8473 .insert_into_table(request, Bytes::from(buffer))
8474 .await
8475 .unwrap();
8476
8477 let count_req = CountTableRowsRequest {
8479 id: Some(table_id),
8480 version: None,
8481 predicate: None,
8482 ..Default::default()
8483 };
8484 let count = namespace.count_table_rows(count_req).await.unwrap();
8485 assert_eq!(count, 2);
8486 }
8487
8488 #[tokio::test]
8489 async fn test_insert_into_table_empty_data() {
8490 let (namespace, _temp_dir, table_id) = create_ns_with_table().await;
8491
8492 let request = InsertIntoTableRequest {
8493 id: Some(table_id),
8494 mode: None,
8495 ..Default::default()
8496 };
8497
8498 let result = namespace.insert_into_table(request, Bytes::new()).await;
8499 assert!(result.is_err());
8500 assert!(
8501 result
8502 .unwrap_err()
8503 .to_string()
8504 .contains("Arrow IPC stream) is required")
8505 );
8506 }
8507
8508 #[tokio::test]
8509 async fn test_insert_into_table_with_storage_options() {
8510 use arrow::array::{Int32Array, StringArray};
8511 use arrow::ipc::writer::StreamWriter;
8512
8513 let temp_dir = TempStdDir::default();
8514
8515 let namespace = DirectoryNamespaceBuilder::new(temp_dir.to_str().unwrap())
8517 .storage_option("allow_http", "true")
8518 .build()
8519 .await
8520 .unwrap();
8521
8522 let schema = create_test_schema();
8524 let ipc_data = create_test_ipc_data(&schema);
8525 let mut create_req = CreateTableRequest::new();
8526 let table_id = vec!["so_table".to_string()];
8527 create_req.id = Some(table_id.clone());
8528 namespace
8529 .create_table(create_req, Bytes::from(ipc_data))
8530 .await
8531 .unwrap();
8532
8533 let arrow_schema = convert_json_arrow_schema(&schema).unwrap();
8535 let arrow_schema = Arc::new(arrow_schema);
8536
8537 let id_array = Int32Array::from(vec![10, 20]);
8538 let name_array = StringArray::from(vec!["X", "Y"]);
8539 let batch = arrow::record_batch::RecordBatch::try_new(
8540 arrow_schema.clone(),
8541 vec![Arc::new(id_array), Arc::new(name_array)],
8542 )
8543 .unwrap();
8544
8545 let mut buffer = Vec::new();
8546 {
8547 let mut writer = StreamWriter::try_new(&mut buffer, &arrow_schema).unwrap();
8548 writer.write(&batch).unwrap();
8549 writer.finish().unwrap();
8550 }
8551
8552 let request = InsertIntoTableRequest {
8553 id: Some(table_id.clone()),
8554 mode: Some("append".to_string()),
8555 ..Default::default()
8556 };
8557
8558 let response = namespace
8559 .insert_into_table(request, Bytes::from(buffer))
8560 .await
8561 .unwrap();
8562 assert!(response.transaction_id.is_none());
8563
8564 let count_req = CountTableRowsRequest {
8566 id: Some(table_id),
8567 version: None,
8568 predicate: None,
8569 ..Default::default()
8570 };
8571 let count = namespace.count_table_rows(count_req).await.unwrap();
8572 assert_eq!(count, 2);
8573 }
8574
8575 #[tokio::test]
8576 async fn test_query_table_basic() {
8577 let (namespace, _temp_dir, table_id) = create_ns_with_table().await;
8578
8579 let request = QueryTableRequest {
8580 id: Some(table_id),
8581 k: 10,
8582 filter: None,
8583 offset: None,
8584 version: None,
8585 ..Default::default()
8586 };
8587
8588 let bytes = namespace.query_table(request).await.unwrap();
8589
8590 let cursor = Cursor::new(bytes.to_vec());
8592 let reader = FileReader::try_new(cursor, None).unwrap();
8593 let batches: Vec<_> = reader.into_iter().map(|b| b.unwrap()).collect();
8594 let total_rows: usize = batches.iter().map(|b| b.num_rows()).sum();
8595 assert_eq!(total_rows, 3);
8596 }
8597
8598 #[tokio::test]
8599 async fn test_query_table_with_filter() {
8600 let (namespace, _temp_dir, table_id) = create_ns_with_table().await;
8601
8602 let request = QueryTableRequest {
8603 id: Some(table_id),
8604 k: 10,
8605 filter: Some("id <= 2".to_string()),
8606 offset: None,
8607 version: None,
8608 ..Default::default()
8609 };
8610
8611 let bytes = namespace.query_table(request).await.unwrap();
8612
8613 let cursor = Cursor::new(bytes.to_vec());
8614 let reader = FileReader::try_new(cursor, None).unwrap();
8615 let batches: Vec<_> = reader.into_iter().map(|b| b.unwrap()).collect();
8616 let total_rows: usize = batches.iter().map(|b| b.num_rows()).sum();
8617 assert_eq!(total_rows, 2);
8618 }
8619
8620 #[tokio::test]
8621 async fn test_query_table_with_limit_and_offset() {
8622 let (namespace, _temp_dir, table_id) = create_ns_with_table().await;
8623
8624 let request = QueryTableRequest {
8625 id: Some(table_id),
8626 k: 2,
8627 filter: None,
8628 offset: Some(1),
8629 version: None,
8630 ..Default::default()
8631 };
8632
8633 let bytes = namespace.query_table(request).await.unwrap();
8634
8635 let cursor = Cursor::new(bytes.to_vec());
8636 let reader = FileReader::try_new(cursor, None).unwrap();
8637 let batches: Vec<_> = reader.into_iter().map(|b| b.unwrap()).collect();
8638 let total_rows: usize = batches.iter().map(|b| b.num_rows()).sum();
8639 assert_eq!(total_rows, 2);
8640 }
8641
8642 #[tokio::test]
8643 async fn test_query_table_no_limit() {
8644 let (namespace, _temp_dir, table_id) = create_ns_with_table().await;
8645
8646 let request = QueryTableRequest {
8648 id: Some(table_id),
8649 k: 0,
8650 filter: None,
8651 offset: None,
8652 version: None,
8653 ..Default::default()
8654 };
8655
8656 let bytes = namespace.query_table(request).await.unwrap();
8657
8658 let cursor = Cursor::new(bytes.to_vec());
8659 let reader = FileReader::try_new(cursor, None).unwrap();
8660 let batches: Vec<_> = reader.into_iter().map(|b| b.unwrap()).collect();
8661 let total_rows: usize = batches.iter().map(|b| b.num_rows()).sum();
8662 assert_eq!(total_rows, 3);
8663 }
8664
8665 #[tokio::test]
8666 async fn test_query_table_with_columns() {
8667 let (namespace, _temp_dir, table_id) = create_ns_with_table().await;
8668
8669 let columns = Box::new(lance_namespace::models::QueryTableRequestColumns {
8670 column_names: Some(vec!["id".to_string()]),
8671 column_aliases: None,
8672 });
8673
8674 let request = QueryTableRequest {
8675 id: Some(table_id),
8676 k: 10,
8677 filter: None,
8678 offset: None,
8679 version: None,
8680 columns: Some(columns),
8681 ..Default::default()
8682 };
8683
8684 let bytes = namespace.query_table(request).await.unwrap();
8685
8686 let cursor = Cursor::new(bytes.to_vec());
8687 let reader = FileReader::try_new(cursor, None).unwrap();
8688 let schema = reader.schema();
8689 assert_eq!(schema.fields().len(), 1);
8690 assert_eq!(schema.field(0).name(), "id");
8691 let batches: Vec<_> = reader.into_iter().map(|b| b.unwrap()).collect();
8692 let total_rows: usize = batches.iter().map(|b| b.num_rows()).sum();
8693 assert_eq!(total_rows, 3);
8694 }
8695
8696 #[tokio::test]
8697 async fn test_count_table_rows_with_version() {
8698 use arrow::array::{Int32Array, StringArray};
8699 use arrow::ipc::writer::StreamWriter;
8700
8701 let (namespace, _temp_dir, table_id) = create_ns_with_table().await;
8702
8703 let schema = create_test_schema();
8705 let arrow_schema = convert_json_arrow_schema(&schema).unwrap();
8706 let arrow_schema = Arc::new(arrow_schema);
8707
8708 let id_array = Int32Array::from(vec![4, 5]);
8709 let name_array = StringArray::from(vec!["Dave", "Eve"]);
8710 let batch = arrow::record_batch::RecordBatch::try_new(
8711 arrow_schema.clone(),
8712 vec![Arc::new(id_array), Arc::new(name_array)],
8713 )
8714 .unwrap();
8715
8716 let mut buffer = Vec::new();
8717 {
8718 let mut writer = StreamWriter::try_new(&mut buffer, &arrow_schema).unwrap();
8719 writer.write(&batch).unwrap();
8720 writer.finish().unwrap();
8721 }
8722
8723 let request = InsertIntoTableRequest {
8724 id: Some(table_id.clone()),
8725 mode: None,
8726 ..Default::default()
8727 };
8728 namespace
8729 .insert_into_table(request, Bytes::from(buffer))
8730 .await
8731 .unwrap();
8732
8733 let count_req = CountTableRowsRequest {
8735 id: Some(table_id.clone()),
8736 version: Some(1),
8737 predicate: None,
8738 ..Default::default()
8739 };
8740 let count = namespace.count_table_rows(count_req).await.unwrap();
8741 assert_eq!(count, 3);
8742
8743 let count_req = CountTableRowsRequest {
8745 id: Some(table_id),
8746 version: None,
8747 predicate: None,
8748 ..Default::default()
8749 };
8750 let count = namespace.count_table_rows(count_req).await.unwrap();
8751 assert_eq!(count, 5);
8752 }
8753
8754 #[tokio::test]
8755 async fn test_query_table_with_version() {
8756 use arrow::array::{Int32Array, StringArray};
8757 use arrow::ipc::writer::StreamWriter;
8758
8759 let (namespace, _temp_dir, table_id) = create_ns_with_table().await;
8760
8761 let schema = create_test_schema();
8763 let arrow_schema = convert_json_arrow_schema(&schema).unwrap();
8764 let arrow_schema = Arc::new(arrow_schema);
8765
8766 let id_array = Int32Array::from(vec![4, 5]);
8767 let name_array = StringArray::from(vec!["Dave", "Eve"]);
8768 let batch = arrow::record_batch::RecordBatch::try_new(
8769 arrow_schema.clone(),
8770 vec![Arc::new(id_array), Arc::new(name_array)],
8771 )
8772 .unwrap();
8773
8774 let mut buffer = Vec::new();
8775 {
8776 let mut writer = StreamWriter::try_new(&mut buffer, &arrow_schema).unwrap();
8777 writer.write(&batch).unwrap();
8778 writer.finish().unwrap();
8779 }
8780
8781 let request = InsertIntoTableRequest {
8782 id: Some(table_id.clone()),
8783 mode: None,
8784 ..Default::default()
8785 };
8786 namespace
8787 .insert_into_table(request, Bytes::from(buffer))
8788 .await
8789 .unwrap();
8790
8791 let request = QueryTableRequest {
8793 id: Some(table_id.clone()),
8794 k: 100,
8795 filter: None,
8796 offset: None,
8797 version: Some(1),
8798 ..Default::default()
8799 };
8800
8801 let bytes = namespace.query_table(request).await.unwrap();
8802 let cursor = Cursor::new(bytes.to_vec());
8803 let reader = FileReader::try_new(cursor, None).unwrap();
8804 let batches: Vec<_> = reader.into_iter().map(|b| b.unwrap()).collect();
8805 let total_rows: usize = batches.iter().map(|b| b.num_rows()).sum();
8806 assert_eq!(total_rows, 3);
8807
8808 let request = QueryTableRequest {
8810 id: Some(table_id),
8811 k: 100,
8812 filter: None,
8813 offset: None,
8814 version: None,
8815 ..Default::default()
8816 };
8817
8818 let bytes = namespace.query_table(request).await.unwrap();
8819 let cursor = Cursor::new(bytes.to_vec());
8820 let reader = FileReader::try_new(cursor, None).unwrap();
8821 let batches: Vec<_> = reader.into_iter().map(|b| b.unwrap()).collect();
8822 let total_rows: usize = batches.iter().map(|b| b.num_rows()).sum();
8823 assert_eq!(total_rows, 5);
8824 }
8825
8826 async fn create_ns_with_vector_table() -> (DirectoryNamespace, TempStdDir, Vec<String>) {
8829 use arrow::array::{FixedSizeListArray, Float32Array, Int32Array};
8830 use arrow::ipc::writer::StreamWriter;
8831
8832 let (namespace, temp_dir) = create_test_namespace().await;
8833
8834 let arrow_schema = Arc::new(arrow::datatypes::Schema::new(vec![
8836 arrow::datatypes::Field::new("id", arrow::datatypes::DataType::Int32, false),
8837 arrow::datatypes::Field::new(
8838 "vector",
8839 arrow::datatypes::DataType::FixedSizeList(
8840 Arc::new(arrow::datatypes::Field::new(
8841 "item",
8842 arrow::datatypes::DataType::Float32,
8843 true,
8844 )),
8845 4,
8846 ),
8847 true,
8848 ),
8849 ]));
8850
8851 let id_array = Int32Array::from(vec![1, 2, 3]);
8852 let values = Float32Array::from(vec![
8853 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, ]);
8857 let vector_array = FixedSizeListArray::try_new(
8858 Arc::new(arrow::datatypes::Field::new(
8859 "item",
8860 arrow::datatypes::DataType::Float32,
8861 true,
8862 )),
8863 4,
8864 Arc::new(values),
8865 None,
8866 )
8867 .unwrap();
8868
8869 let batch = arrow::record_batch::RecordBatch::try_new(
8870 arrow_schema.clone(),
8871 vec![Arc::new(id_array), Arc::new(vector_array)],
8872 )
8873 .unwrap();
8874
8875 let mut buffer = Vec::new();
8876 {
8877 let mut writer = StreamWriter::try_new(&mut buffer, &arrow_schema).unwrap();
8878 writer.write(&batch).unwrap();
8879 writer.finish().unwrap();
8880 }
8881
8882 let table_name = "vector_table";
8884 let table_uri = format!("{}/{}.lance", temp_dir.to_str().unwrap(), table_name);
8885 let reader = arrow::record_batch::RecordBatchIterator::new(
8886 vec![Ok(batch)],
8887 arrow_schema.clone(),
8888 );
8889 Dataset::write(reader, &table_uri, None).await.unwrap();
8890
8891 let table_id = vec![table_name.to_string()];
8892 (namespace, temp_dir, table_id)
8893 }
8894
8895 #[tokio::test]
8896 async fn test_query_table_vector_search() {
8897 let (namespace, _temp_dir, table_id) = create_ns_with_vector_table().await;
8898
8899 let vector = Box::new(lance_namespace::models::QueryTableRequestVector {
8900 single_vector: Some(vec![1.0, 0.0, 0.0, 0.0]),
8901 multi_vector: None,
8902 });
8903
8904 let request = QueryTableRequest {
8905 id: Some(table_id),
8906 k: 2,
8907 vector,
8908 filter: None,
8909 offset: None,
8910 version: None,
8911 ..Default::default()
8912 };
8913
8914 let bytes = namespace.query_table(request).await.unwrap();
8915
8916 let cursor = Cursor::new(bytes.to_vec());
8917 let reader = FileReader::try_new(cursor, None).unwrap();
8918 let batches: Vec<_> = reader.into_iter().map(|b| b.unwrap()).collect();
8919 let total_rows: usize = batches.iter().map(|b| b.num_rows()).sum();
8920 assert_eq!(total_rows, 2);
8921 }
8922
8923 #[tokio::test]
8924 async fn test_query_table_vector_search_with_distance_type() {
8925 let (namespace, _temp_dir, table_id) = create_ns_with_vector_table().await;
8926
8927 let vector = Box::new(lance_namespace::models::QueryTableRequestVector {
8928 single_vector: Some(vec![1.0, 0.0, 0.0, 0.0]),
8929 multi_vector: None,
8930 });
8931
8932 let request = QueryTableRequest {
8933 id: Some(table_id),
8934 k: 3,
8935 vector,
8936 filter: None,
8937 offset: None,
8938 version: None,
8939 distance_type: Some("cosine".to_string()),
8940 ..Default::default()
8941 };
8942
8943 let bytes = namespace.query_table(request).await.unwrap();
8944
8945 let cursor = Cursor::new(bytes.to_vec());
8946 let reader = FileReader::try_new(cursor, None).unwrap();
8947 let batches: Vec<_> = reader.into_iter().map(|b| b.unwrap()).collect();
8948 let total_rows: usize = batches.iter().map(|b| b.num_rows()).sum();
8949 assert_eq!(total_rows, 3);
8950 }
8951
8952 #[tokio::test]
8953 async fn test_query_table_vector_search_with_filter() {
8954 let (namespace, _temp_dir, table_id) = create_ns_with_vector_table().await;
8955
8956 let vector = Box::new(lance_namespace::models::QueryTableRequestVector {
8957 single_vector: Some(vec![1.0, 0.0, 0.0, 0.0]),
8958 multi_vector: None,
8959 });
8960
8961 let request = QueryTableRequest {
8962 id: Some(table_id),
8963 k: 10,
8964 vector,
8965 filter: Some("id <= 2".to_string()),
8966 offset: None,
8967 version: None,
8968 ..Default::default()
8969 };
8970
8971 let bytes = namespace.query_table(request).await.unwrap();
8972
8973 let cursor = Cursor::new(bytes.to_vec());
8974 let reader = FileReader::try_new(cursor, None).unwrap();
8975 let batches: Vec<_> = reader.into_iter().map(|b| b.unwrap()).collect();
8976 let total_rows: usize = batches.iter().map(|b| b.num_rows()).sum();
8977 assert!(total_rows <= 2);
8978 }
8979
8980 #[tokio::test]
8981 async fn test_query_table_vector_search_with_nprobes_and_refine() {
8982 let (namespace, _temp_dir, table_id) = create_ns_with_vector_table().await;
8983
8984 let vector = Box::new(lance_namespace::models::QueryTableRequestVector {
8985 single_vector: Some(vec![0.0, 1.0, 0.0, 0.0]),
8986 multi_vector: None,
8987 });
8988
8989 let request = QueryTableRequest {
8990 id: Some(table_id),
8991 k: 2,
8992 vector,
8993 filter: None,
8994 offset: None,
8995 version: None,
8996 nprobes: Some(1),
8997 refine_factor: Some(1),
8998 prefilter: Some(true),
8999 ..Default::default()
9000 };
9001
9002 let bytes = namespace.query_table(request).await.unwrap();
9003
9004 let cursor = Cursor::new(bytes.to_vec());
9005 let reader = FileReader::try_new(cursor, None).unwrap();
9006 let batches: Vec<_> = reader.into_iter().map(|b| b.unwrap()).collect();
9007 let total_rows: usize = batches.iter().map(|b| b.num_rows()).sum();
9008 assert_eq!(total_rows, 2);
9009 }
9010
9011 #[tokio::test]
9012 async fn test_namespace_id() {
9013 let (namespace, _temp_dir) = create_test_namespace().await;
9014 let id = namespace.namespace_id();
9015 assert!(id.contains("DirectoryNamespace"));
9016 assert!(id.contains("root"));
9017 }
9018
9019 #[tokio::test]
9020 async fn test_query_table_empty_table() {
9021 let (namespace, _temp_dir) = create_test_namespace().await;
9022
9023 let schema = create_test_schema();
9025 let ipc_data = create_test_ipc_data(&schema);
9026 let mut create_request = CreateTableRequest::new();
9027 create_request.id = Some(vec!["empty_table".to_string()]);
9028 namespace
9029 .create_table(create_request, bytes::Bytes::from(ipc_data))
9030 .await
9031 .unwrap();
9032
9033 let vector = Box::new(lance_namespace::models::QueryTableRequestVector {
9035 single_vector: None,
9036 multi_vector: None,
9037 });
9038 let request = QueryTableRequest {
9039 id: Some(vec!["empty_table".to_string()]),
9040 k: 10,
9041 vector,
9042 ..Default::default()
9043 };
9044 let bytes = namespace.query_table(request).await.unwrap();
9045
9046 let cursor = Cursor::new(bytes.to_vec());
9047 let reader = FileReader::try_new(cursor, None).unwrap();
9048 let batches: Vec<_> = reader.collect::<std::result::Result<Vec<_>, _>>().unwrap();
9049 let total_rows: usize = batches.iter().map(|b| b.num_rows()).sum();
9050 assert_eq!(total_rows, 0, "empty table should yield no rows");
9051 }
9052
9053 #[tokio::test]
9054 async fn test_query_table_with_plain_filter_no_vector() {
9055 let (namespace, _temp_dir, table_id) = create_ns_with_table().await;
9056
9057 let vector = Box::new(lance_namespace::models::QueryTableRequestVector {
9059 single_vector: None,
9060 multi_vector: None,
9061 });
9062 let request = QueryTableRequest {
9063 id: Some(table_id),
9064 k: 0,
9065 vector,
9066 filter: Some("id > 1".to_string()),
9067 ..Default::default()
9068 };
9069 let bytes = namespace.query_table(request).await.unwrap();
9070
9071 let cursor = Cursor::new(bytes.to_vec());
9072 let reader = FileReader::try_new(cursor, None).unwrap();
9073 let batches: Vec<_> = reader.into_iter().map(|b| b.unwrap()).collect();
9074 let total_rows: usize = batches.iter().map(|b| b.num_rows()).sum();
9075 assert!(total_rows > 0);
9076 assert!(total_rows < 3);
9077 }
9078 }
9079
9080 mod multi_table_transactions {
9082 use super::*;
9083 use futures::TryStreamExt;
9084 use lance::dataset::builder::DatasetBuilder;
9085 use lance_namespace::models::CreateTableVersionRequest;
9086
9087 async fn create_managed_namespace(temp_path: &str) -> Arc<DirectoryNamespace> {
9089 Arc::new(
9090 DirectoryNamespaceBuilder::new(temp_path)
9091 .table_version_tracking_enabled(true)
9092 .table_version_storage_enabled(true)
9093 .manifest_enabled(true)
9094 .build()
9095 .await
9096 .unwrap(),
9097 )
9098 }
9099
9100 async fn create_table_and_get_staging(
9102 namespace: Arc<dyn LanceNamespace>,
9103 table_name: &str,
9104 ) -> (Vec<String>, object_store::path::Path) {
9105 let schema = create_test_schema();
9106 let ipc_data = create_test_ipc_data(&schema);
9107 let mut create_req = CreateTableRequest::new();
9108 create_req.id = Some(vec![table_name.to_string()]);
9109 namespace
9110 .create_table(create_req, bytes::Bytes::from(ipc_data))
9111 .await
9112 .unwrap();
9113
9114 let table_id = vec![table_name.to_string()];
9115 let dataset = DatasetBuilder::from_namespace(namespace.clone(), table_id.clone())
9116 .await
9117 .unwrap()
9118 .load()
9119 .await
9120 .unwrap();
9121
9122 let versions_path = dataset.versions_dir();
9124 let manifest_metas: Vec<_> = dataset
9125 .object_store(None)
9126 .await
9127 .unwrap()
9128 .inner
9129 .list(Some(&versions_path))
9130 .try_collect()
9131 .await
9132 .unwrap();
9133
9134 let manifest_meta = manifest_metas
9135 .iter()
9136 .find(|m| {
9137 m.location
9138 .filename()
9139 .map(|f| f.ends_with(".manifest"))
9140 .unwrap_or(false)
9141 })
9142 .expect("No manifest file found");
9143
9144 let manifest_data = dataset
9145 .object_store(None)
9146 .await
9147 .unwrap()
9148 .inner
9149 .get(&manifest_meta.location)
9150 .await
9151 .unwrap()
9152 .bytes()
9153 .await
9154 .unwrap();
9155
9156 let staging_path = dataset
9157 .versions_dir()
9158 .join(format!("staging_{}", table_name));
9159 dataset
9160 .object_store(None)
9161 .await
9162 .unwrap()
9163 .inner
9164 .put(&staging_path, manifest_data.into())
9165 .await
9166 .unwrap();
9167
9168 (table_id, staging_path)
9169 }
9170
9171 #[tokio::test]
9172 async fn test_table_version_storage_enabled_requires_manifest() {
9173 let temp_dir = TempStdDir::default();
9175 let temp_path = temp_dir.to_str().unwrap();
9176
9177 let result = DirectoryNamespaceBuilder::new(temp_path)
9178 .table_version_storage_enabled(true)
9179 .manifest_enabled(false)
9180 .build()
9181 .await;
9182
9183 assert!(
9184 result.is_err(),
9185 "Should fail when table_version_storage_enabled=true but manifest_enabled=false"
9186 );
9187 }
9188
9189 #[tokio::test]
9190 async fn test_create_table_version_records_in_manifest() {
9191 let temp_dir = TempStrDir::default();
9194 let temp_path: &str = &temp_dir;
9195
9196 let namespace = create_managed_namespace(temp_path).await;
9197 let ns: Arc<dyn LanceNamespace> = namespace.clone();
9198
9199 let (table_id, staging_path) =
9200 create_table_and_get_staging(ns.clone(), "table_managed").await;
9201
9202 let mut create_req = CreateTableVersionRequest::new(2, staging_path.to_string());
9204 create_req.id = Some(table_id.clone());
9205 create_req.naming_scheme = Some("V2".to_string());
9206 let response = namespace.create_table_version(create_req).await.unwrap();
9207
9208 assert!(response.version.is_some());
9209 let version = response.version.unwrap();
9210 assert_eq!(version.version, 2);
9211
9212 let manifest_ns = namespace.manifest_ns.as_ref().unwrap();
9214 let table_id_str = manifest::ManifestNamespace::str_object_id(&table_id);
9215 let versions = manifest_ns
9216 .query_table_versions(&table_id_str, false, None)
9217 .await
9218 .unwrap();
9219
9220 assert!(
9221 !versions.is_empty(),
9222 "Version should be recorded in __manifest"
9223 );
9224 let (ver, _path) = &versions[0];
9225 assert_eq!(*ver, 2, "Recorded version should be 2");
9226 }
9227 }
9228
9229 #[tokio::test]
9230 async fn test_list_all_tables() {
9231 use lance_namespace::models::ListTablesRequest;
9232
9233 let (namespace, _temp_dir) = create_test_namespace().await;
9234 create_scalar_table(&namespace, "alpha").await;
9235 create_scalar_table(&namespace, "beta").await;
9236
9237 let request = ListTablesRequest {
9238 id: Some(vec![]),
9239 page_token: None,
9240 limit: None,
9241 ..Default::default()
9242 };
9243 let response = namespace.list_all_tables(request).await.unwrap();
9244 let mut tables = response.tables;
9245 tables.sort();
9246 assert_eq!(tables, vec!["alpha", "beta"]);
9247 }
9248
9249 #[tokio::test]
9250 async fn test_restore_table() {
9251 use lance_namespace::models::RestoreTableRequest;
9252
9253 let (namespace, _temp_dir) = create_test_namespace().await;
9254 create_scalar_table(&namespace, "users").await;
9255
9256 create_scalar_index(&namespace, "users", "users_id_idx").await;
9258
9259 let dataset = open_dataset(&namespace, "users").await;
9260 let current_version = dataset.version().version;
9261 assert!(current_version >= 2, "Should have at least 2 versions");
9262
9263 let mut restore_req = RestoreTableRequest::new(1);
9265 restore_req.id = Some(vec!["users".to_string()]);
9266 let response = namespace.restore_table(restore_req).await.unwrap();
9267
9268 assert!(
9270 response.transaction_id.is_some(),
9271 "restore_table should return a transaction_id"
9272 );
9273
9274 let dataset_after = open_dataset(&namespace, "users").await;
9276 assert!(
9277 dataset_after.version().version > current_version,
9278 "Restore should create a new version"
9279 );
9280 }
9281
9282 #[tokio::test]
9283 async fn test_update_table_schema_metadata() {
9284 use lance_namespace::models::UpdateTableSchemaMetadataRequest;
9285
9286 let (namespace, _temp_dir) = create_test_namespace().await;
9287 create_scalar_table(&namespace, "products").await;
9288
9289 let mut metadata = HashMap::new();
9290 metadata.insert("owner".to_string(), "team_a".to_string());
9291 metadata.insert("version".to_string(), "1.0".to_string());
9292
9293 let mut req = UpdateTableSchemaMetadataRequest::new();
9294 req.id = Some(vec!["products".to_string()]);
9295 req.metadata = Some(metadata.clone());
9296
9297 let response = namespace.update_table_schema_metadata(req).await.unwrap();
9298
9299 assert!(response.metadata.is_some());
9300 let returned = response.metadata.unwrap();
9301 assert_eq!(returned.get("owner"), Some(&"team_a".to_string()));
9302 assert_eq!(returned.get("version"), Some(&"1.0".to_string()));
9303 assert!(
9304 response.transaction_id.is_some(),
9305 "update_table_schema_metadata should return a transaction_id"
9306 );
9307 }
9308
9309 #[tokio::test]
9310 async fn test_get_table_stats() {
9311 use lance_namespace::models::GetTableStatsRequest;
9312
9313 let (namespace, _temp_dir) = create_test_namespace().await;
9314 create_scalar_table(&namespace, "items").await;
9315 create_scalar_index(&namespace, "items", "items_id_idx").await;
9316
9317 let mut req = GetTableStatsRequest::new();
9318 req.id = Some(vec!["items".to_string()]);
9319
9320 let response = namespace.get_table_stats(req).await.unwrap();
9321 assert_eq!(response.num_rows, 3);
9322 assert_eq!(response.num_indices, 1);
9323 }
9324
9325 #[tokio::test]
9326 async fn test_explain_table_query_plan() {
9327 use lance_namespace::models::QueryTableRequestVector;
9328 use lance_namespace::models::{ExplainTableQueryPlanRequest, QueryTableRequest};
9329
9330 let (namespace, _temp_dir) = create_test_namespace().await;
9331 create_scalar_table(&namespace, "catalog").await;
9332
9333 let mut query = QueryTableRequest::new(1, QueryTableRequestVector::new());
9334 query.filter = Some("id > 1".to_string());
9335 query.columns = Some(Box::new(QueryTableRequestColumns {
9336 column_names: Some(vec!["id".to_string(), "name".to_string()]),
9337 column_aliases: None,
9338 }));
9339 query.with_row_id = Some(true);
9340
9341 let mut req = ExplainTableQueryPlanRequest::new(query);
9342 req.id = Some(vec!["catalog".to_string()]);
9343
9344 let plan_str = namespace.explain_table_query_plan(req).await.unwrap();
9345 assert_plan_contains_all(
9346 &plan_str,
9347 &[
9348 "ProjectionExec: expr=[id@0 as id, name@2 as name",
9349 "Take: columns=\"id, _rowid, (name)\"",
9350 "LanceRead: uri=",
9351 "projection=[id]",
9352 "row_id=true, row_addr=false",
9353 "full_filter=id > Int32(1)",
9354 "refine_filter=id > Int32(1)",
9355 ],
9356 "Filtered explain plan should preserve late materialization and filter pushdown",
9357 );
9358 }
9359
9360 #[tokio::test]
9361 async fn test_analyze_table_query_plan() {
9362 use lance_namespace::models::AnalyzeTableQueryPlanRequest;
9363 use lance_namespace::models::QueryTableRequestVector;
9364
9365 let (namespace, _temp_dir) = create_test_namespace().await;
9366 create_scalar_table(&namespace, "catalog").await;
9367
9368 let mut req = AnalyzeTableQueryPlanRequest::new(1, QueryTableRequestVector::new());
9369 req.id = Some(vec!["catalog".to_string()]);
9370 req.filter = Some("id > 0".to_string());
9371 req.columns = Some(Box::new(QueryTableRequestColumns {
9372 column_names: Some(vec!["id".to_string(), "name".to_string()]),
9373 column_aliases: None,
9374 }));
9375 req.with_row_id = Some(true);
9376
9377 let analysis_str = namespace.analyze_table_query_plan(req).await.unwrap();
9378 assert_plan_contains_all(
9379 &analysis_str,
9380 &[
9381 "AnalyzeExec verbose=true",
9382 "ProjectionExec: elapsed=",
9383 "expr=[id@0 as id, name@2 as name",
9384 "Take: elapsed=",
9385 "columns=\"id, _rowid, (name)\"",
9386 "CoalesceBatchesExec: elapsed=",
9387 "LanceRead: elapsed=",
9388 "projection=[id]",
9389 "row_id=true, row_addr=false",
9390 "full_filter=id > Int32(0)",
9391 "refine_filter=id > Int32(0)",
9392 "metrics=[output_rows=",
9393 ],
9394 "Filtered analyze plan should preserve late materialization and filter pushdown",
9395 );
9396 }
9397
9398 #[tokio::test]
9399 async fn test_dir_listing_no_extra_calls_without_migration() {
9400 let temp_dir = TempStdDir::default();
9401 let temp_path = temp_dir.to_str().unwrap();
9402 let root_uri = file_object_store_uri(temp_path);
9403 let listing_count = Arc::new(AtomicUsize::new(0));
9404 let session = build_listing_counting_session(listing_count.clone());
9405
9406 let dir_only_ns = DirectoryNamespaceBuilder::new(root_uri.clone())
9408 .session(session.clone())
9409 .manifest_enabled(false)
9410 .dir_listing_enabled(true)
9411 .build()
9412 .await
9413 .unwrap();
9414
9415 let schema = create_test_schema();
9416 let ipc_data = create_test_ipc_data(&schema);
9417 let mut create_req = CreateTableRequest::new();
9418 create_req.id = Some(vec!["test_table".to_string()]);
9419 dir_only_ns
9420 .create_table(create_req, Bytes::from(ipc_data))
9421 .await
9422 .unwrap();
9423
9424 let hybrid_ns = DirectoryNamespaceBuilder::new(root_uri)
9426 .session(session)
9427 .manifest_enabled(true)
9428 .dir_listing_enabled(true)
9429 .dir_listing_to_manifest_migration_enabled(false)
9430 .build()
9431 .await
9432 .unwrap();
9433
9434 listing_count.store(0, Ordering::SeqCst);
9436
9437 let mut exists_req = TableExistsRequest::new();
9439 exists_req.id = Some(vec!["test_table".to_string()]);
9440 hybrid_ns.table_exists(exists_req).await.unwrap();
9441
9442 let count = listing_count.load(Ordering::SeqCst);
9443 assert_eq!(
9444 count, 1,
9445 "Expected exactly 1 listing call for table_exists \
9446 without migration mode, but got {}",
9447 count
9448 );
9449
9450 listing_count.store(0, Ordering::SeqCst);
9452
9453 let mut describe_req = DescribeTableRequest::new();
9454 describe_req.id = Some(vec!["test_table".to_string()]);
9455 hybrid_ns.describe_table(describe_req).await.unwrap();
9456
9457 let count = listing_count.load(Ordering::SeqCst);
9458 assert_eq!(
9459 count, 1,
9460 "Expected exactly 1 listing call for describe_table \
9461 without migration mode, but got {}",
9462 count
9463 );
9464 }
9465
9466 #[tokio::test]
9467 async fn test_describe_declared_table_checks_versions_only_when_requested() {
9468 let temp_dir = TempStdDir::default();
9469 let temp_path = temp_dir.to_str().unwrap();
9470 let root_uri = file_object_store_uri(temp_path);
9471 let listing_count = Arc::new(AtomicUsize::new(0));
9472 let session = build_listing_counting_session(listing_count.clone());
9473
9474 let namespace = DirectoryNamespaceBuilder::new(root_uri)
9475 .session(session)
9476 .manifest_enabled(false)
9477 .dir_listing_enabled(true)
9478 .build()
9479 .await
9480 .unwrap();
9481
9482 let mut declare_req = DeclareTableRequest::new();
9483 declare_req.id = Some(vec!["test_table".to_string()]);
9484 namespace.declare_table(declare_req).await.unwrap();
9485
9486 listing_count.store(0, Ordering::SeqCst);
9487
9488 let mut describe_req = DescribeTableRequest::new();
9489 describe_req.id = Some(vec!["test_table".to_string()]);
9490 let describe_response = namespace.describe_table(describe_req).await.unwrap();
9491
9492 assert_eq!(describe_response.is_only_declared, None);
9493 assert_eq!(
9494 listing_count.load(Ordering::SeqCst),
9495 1,
9496 "Default describe_table should only list the table directory"
9497 );
9498
9499 listing_count.store(0, Ordering::SeqCst);
9500
9501 let mut describe_req = DescribeTableRequest::new();
9502 describe_req.id = Some(vec!["test_table".to_string()]);
9503 describe_req.check_declared = Some(true);
9504 let describe_response = namespace.describe_table(describe_req).await.unwrap();
9505
9506 assert_eq!(describe_response.is_only_declared, Some(true));
9507 assert_eq!(
9508 listing_count.load(Ordering::SeqCst),
9509 2,
9510 "check_declared describe_table should list the table directory and _versions"
9511 );
9512 }
9513
9514 #[tokio::test]
9515 async fn test_dir_listing_extra_calls_with_migration() {
9516 let temp_dir = TempStdDir::default();
9517 let temp_path = temp_dir.to_str().unwrap();
9518 let root_uri = file_object_store_uri(temp_path);
9519 let listing_count = Arc::new(AtomicUsize::new(0));
9520 let session = build_listing_counting_session(listing_count.clone());
9521
9522 let dir_only_ns = DirectoryNamespaceBuilder::new(root_uri.clone())
9524 .session(session.clone())
9525 .manifest_enabled(false)
9526 .dir_listing_enabled(true)
9527 .build()
9528 .await
9529 .unwrap();
9530
9531 let schema = create_test_schema();
9532 let ipc_data = create_test_ipc_data(&schema);
9533 let mut create_req = CreateTableRequest::new();
9534 create_req.id = Some(vec!["test_table".to_string()]);
9535 dir_only_ns
9536 .create_table(create_req, Bytes::from(ipc_data))
9537 .await
9538 .unwrap();
9539
9540 let hybrid_ns = DirectoryNamespaceBuilder::new(root_uri)
9541 .session(session)
9542 .manifest_enabled(true)
9543 .dir_listing_enabled(true)
9544 .dir_listing_to_manifest_migration_enabled(true)
9545 .build()
9546 .await
9547 .unwrap();
9548
9549 listing_count.store(0, Ordering::SeqCst);
9553
9554 let mut exists_req = TableExistsRequest::new();
9555 exists_req.id = Some(vec!["test_table".to_string()]);
9556 hybrid_ns.table_exists(exists_req).await.unwrap();
9557
9558 let count = listing_count.load(Ordering::SeqCst);
9559 assert_eq!(
9560 count, 1,
9561 "Expected exactly 1 listing call for table_exists with migration mode \
9562 (table directory fallback; manifest reload uses the version hint), but got {}",
9563 count
9564 );
9565
9566 listing_count.store(0, Ordering::SeqCst);
9568
9569 let mut describe_req = DescribeTableRequest::new();
9570 describe_req.id = Some(vec!["test_table".to_string()]);
9571 hybrid_ns.describe_table(describe_req).await.unwrap();
9572
9573 let count = listing_count.load(Ordering::SeqCst);
9574 assert_eq!(
9575 count, 1,
9576 "Expected exactly 1 listing call for describe_table with migration mode \
9577 (table directory fallback; manifest reload uses the version hint), but got {}",
9578 count
9579 );
9580 }
9581
9582 #[tokio::test]
9583 async fn test_migration_not_found_errors_include_table_id() {
9584 let temp_dir = TempStdDir::default();
9585 let temp_path = temp_dir.to_str().unwrap();
9586
9587 let namespace = DirectoryNamespaceBuilder::new(temp_path)
9588 .manifest_enabled(true)
9589 .dir_listing_enabled(true)
9590 .dir_listing_to_manifest_migration_enabled(true)
9591 .build()
9592 .await
9593 .unwrap();
9594
9595 let mut exists_req = TableExistsRequest::new();
9596 exists_req.id = Some(vec!["missing_table".to_string()]);
9597 let err = namespace.table_exists(exists_req).await.unwrap_err();
9598 assert!(matches!(err, Error::Namespace { .. }));
9599 let err_msg = err.to_string();
9600 assert!(err_msg.contains("Table not found"));
9601 assert!(err_msg.contains("table id 'missing_table'"));
9602
9603 let mut describe_req = DescribeTableRequest::new();
9604 describe_req.id = Some(vec!["missing_table".to_string()]);
9605 let err = namespace.describe_table(describe_req).await.unwrap_err();
9606 assert!(matches!(err, Error::Namespace { .. }));
9607 let err_msg = err.to_string();
9608 assert!(err_msg.contains("Table not found"));
9609 assert!(err_msg.contains("table id 'missing_table'"));
9610 }
9611
9612 #[tokio::test]
9613 async fn test_manifest_not_found_errors_include_full_table_id() {
9614 use lance_namespace::models::CreateNamespaceRequest;
9615
9616 let temp_dir = TempStdDir::default();
9617 let temp_path = temp_dir.to_str().unwrap();
9618
9619 let namespace = DirectoryNamespaceBuilder::new(temp_path)
9620 .manifest_enabled(true)
9621 .dir_listing_enabled(true)
9622 .build()
9623 .await
9624 .unwrap();
9625
9626 let mut create_ns_req = CreateNamespaceRequest::new();
9627 create_ns_req.id = Some(vec!["workspace".to_string()]);
9628 namespace.create_namespace(create_ns_req).await.unwrap();
9629
9630 let missing_table_id = vec!["workspace".to_string(), "missing_table".to_string()];
9631
9632 let mut exists_req = TableExistsRequest::new();
9633 exists_req.id = Some(missing_table_id.clone());
9634 let err = namespace.table_exists(exists_req).await.unwrap_err();
9635 assert!(matches!(err, Error::Namespace { .. }));
9636 let err_msg = err.to_string();
9637 assert!(err_msg.contains("Table not found"));
9638 assert!(err_msg.contains("table id 'workspace$missing_table'"));
9639
9640 let mut describe_req = DescribeTableRequest::new();
9641 describe_req.id = Some(missing_table_id);
9642 let err = namespace.describe_table(describe_req).await.unwrap_err();
9643 assert!(matches!(err, Error::Namespace { .. }));
9644 let err_msg = err.to_string();
9645 assert!(err_msg.contains("Table not found"));
9646 assert!(err_msg.contains("table id 'workspace$missing_table'"));
9647 }
9648}