1pub mod manifest;
10
11use arrow::array::Float32Array;
12use arrow::record_batch::RecordBatchIterator;
13use arrow_ipc::reader::StreamReader;
14use async_trait::async_trait;
15use bytes::Bytes;
16use futures::{StreamExt, TryStreamExt};
17use lance::dataset::builder::DatasetBuilder;
18use lance::dataset::scanner::Scanner;
19use lance::dataset::statistics::DatasetStatisticsExt;
20use lance::dataset::transaction::{Operation, Transaction};
21use lance::dataset::{
22 Dataset, MergeInsertBuilder, WhenMatched, WhenNotMatched, WhenNotMatchedBySource, WriteMode,
23 WriteParams,
24};
25use lance::index::{DatasetIndexExt, IndexParams, vector::VectorIndexParams};
26use lance::session::Session;
27use lance_index::scalar::{
28 BuiltinIndexType, FullTextSearchQuery, InvertedIndexParams, ScalarIndexParams,
29};
30use lance_index::vector::{
31 bq::RQBuildParams, hnsw::builder::HnswBuildParams, ivf::IvfBuildParams, pq::PQBuildParams,
32 sq::builder::SQBuildParams,
33};
34use lance_index::{IndexType, is_system_index};
35use lance_io::object_store::{ObjectStore, ObjectStoreParams, ObjectStoreRegistry};
36use lance_linalg::distance::MetricType;
37use lance_table::io::commit::{ManifestNamingScheme, VERSIONS_DIR};
38use object_store::path::Path;
39use object_store::{Error as ObjectStoreError, ObjectStore as OSObjectStore, PutMode, PutOptions};
40use std::collections::HashMap;
41use std::io::Cursor;
42use std::sync::{Arc, Mutex};
43
44use crate::context::DynamicContextProvider;
45use lance_namespace::models::{
46 AnalyzeTableQueryPlanRequest, BatchDeleteTableVersionsRequest,
47 BatchDeleteTableVersionsResponse, CountTableRowsRequest, CreateNamespaceRequest,
48 CreateNamespaceResponse, CreateTableIndexRequest, CreateTableIndexResponse, CreateTableRequest,
49 CreateTableResponse, CreateTableScalarIndexResponse, CreateTableVersionRequest,
50 CreateTableVersionResponse, DeclareTableRequest, DeclareTableResponse,
51 DescribeNamespaceRequest, DescribeNamespaceResponse, DescribeTableIndexStatsRequest,
52 DescribeTableIndexStatsResponse, DescribeTableRequest, DescribeTableResponse,
53 DescribeTableVersionRequest, DescribeTableVersionResponse, DescribeTransactionRequest,
54 DescribeTransactionResponse, DropNamespaceRequest, DropNamespaceResponse,
55 DropTableIndexRequest, DropTableIndexResponse, DropTableRequest, DropTableResponse,
56 ExplainTableQueryPlanRequest, FragmentStats, FragmentSummary, GetTableStatsRequest,
57 GetTableStatsResponse, Identity, IndexContent, InsertIntoTableRequest, InsertIntoTableResponse,
58 ListNamespacesRequest, ListNamespacesResponse, ListTableIndicesRequest,
59 ListTableIndicesResponse, ListTableVersionsRequest, ListTableVersionsResponse,
60 ListTablesRequest, ListTablesResponse, MergeInsertIntoTableRequest,
61 MergeInsertIntoTableResponse, NamespaceExistsRequest, QueryTableRequest,
62 QueryTableRequestColumns, QueryTableRequestVector, RestoreTableRequest, RestoreTableResponse,
63 TableExistsRequest, TableVersion, UpdateTableSchemaMetadataRequest,
64 UpdateTableSchemaMetadataResponse,
65};
66
67use lance_core::{Error, Result};
68use lance_namespace::LanceNamespace;
69use lance_namespace::error::NamespaceError;
70use lance_namespace::schema::arrow_schema_to_json;
71
72use crate::credentials::{
73 CredentialVendor, create_credential_vendor_for_location, has_credential_vendor_config,
74};
75
76#[derive(Debug, Default)]
81pub struct OpsMetrics {
82 counters: Mutex<HashMap<String, u64>>,
83}
84
85impl OpsMetrics {
86 pub fn increment(&self, operation: &str) {
88 if let Ok(mut counters) = self.counters.lock() {
89 *counters.entry(operation.to_string()).or_insert(0) += 1;
90 }
91 }
92
93 pub fn retrieve(&self) -> HashMap<String, u64> {
95 self.counters.lock().map(|c| c.clone()).unwrap_or_default()
96 }
97
98 pub fn reset(&self) {
100 if let Ok(mut counters) = self.counters.lock() {
101 counters.clear();
102 }
103 }
104}
105
106pub(crate) struct TableStatus {
111 pub(crate) exists: bool,
113 pub(crate) is_deregistered: bool,
115 pub(crate) has_reserved_file: bool,
117}
118
119enum DirectoryIndexParams {
120 Scalar {
121 index_type: IndexType,
122 params: ScalarIndexParams,
123 },
124 Inverted(InvertedIndexParams),
125 Vector {
126 index_type: IndexType,
127 params: VectorIndexParams,
128 },
129}
130
131impl DirectoryIndexParams {
132 fn index_type(&self) -> IndexType {
133 match self {
134 Self::Scalar { index_type, .. } | Self::Vector { index_type, .. } => *index_type,
135 Self::Inverted(_) => IndexType::Inverted,
136 }
137 }
138
139 fn params(&self) -> &dyn IndexParams {
140 match self {
141 Self::Scalar { params, .. } => params,
142 Self::Inverted(params) => params,
143 Self::Vector { params, .. } => params,
144 }
145 }
146}
147
148#[derive(Clone)]
183pub struct DirectoryNamespaceBuilder {
184 root: String,
185 storage_options: Option<HashMap<String, String>>,
186 session: Option<Arc<Session>>,
187 manifest_enabled: bool,
188 dir_listing_enabled: bool,
189 inline_optimization_enabled: bool,
190 table_version_tracking_enabled: bool,
191 table_version_storage_enabled: bool,
194 dir_listing_to_manifest_migration_enabled: bool,
199 credential_vendor_properties: HashMap<String, String>,
200 context_provider: Option<Arc<dyn DynamicContextProvider>>,
201 commit_retries: Option<u32>,
202 vend_input_storage_options: bool,
205 vend_input_storage_options_refresh_interval_millis: Option<u64>,
210 ops_metrics_enabled: bool,
212}
213
214impl std::fmt::Debug for DirectoryNamespaceBuilder {
215 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
216 f.debug_struct("DirectoryNamespaceBuilder")
217 .field("root", &self.root)
218 .field("storage_options", &self.storage_options)
219 .field("manifest_enabled", &self.manifest_enabled)
220 .field("dir_listing_enabled", &self.dir_listing_enabled)
221 .field(
222 "inline_optimization_enabled",
223 &self.inline_optimization_enabled,
224 )
225 .field(
226 "table_version_tracking_enabled",
227 &self.table_version_tracking_enabled,
228 )
229 .field(
230 "table_version_storage_enabled",
231 &self.table_version_storage_enabled,
232 )
233 .field(
234 "dir_listing_to_manifest_migration_enabled",
235 &self.dir_listing_to_manifest_migration_enabled,
236 )
237 .field(
238 "context_provider",
239 &self.context_provider.as_ref().map(|_| "Some(...)"),
240 )
241 .field(
242 "vend_input_storage_options",
243 &self.vend_input_storage_options,
244 )
245 .field(
246 "vend_input_storage_options_refresh_interval_millis",
247 &self.vend_input_storage_options_refresh_interval_millis,
248 )
249 .field("ops_metrics_enabled", &self.ops_metrics_enabled)
250 .finish()
251 }
252}
253
254impl DirectoryNamespaceBuilder {
255 pub fn new(root: impl Into<String>) -> Self {
261 Self {
262 root: root.into().trim_end_matches('/').to_string(),
263 storage_options: None,
264 session: None,
265 manifest_enabled: true,
266 dir_listing_enabled: true, inline_optimization_enabled: true,
268 table_version_tracking_enabled: false, table_version_storage_enabled: false, dir_listing_to_manifest_migration_enabled: false, credential_vendor_properties: HashMap::new(),
272 context_provider: None,
273 commit_retries: None,
274 vend_input_storage_options: false,
275 vend_input_storage_options_refresh_interval_millis: None,
276 ops_metrics_enabled: false,
277 }
278 }
279
280 pub fn manifest_enabled(mut self, enabled: bool) -> Self {
285 self.manifest_enabled = enabled;
286 self
287 }
288
289 pub fn dir_listing_enabled(mut self, enabled: bool) -> Self {
294 self.dir_listing_enabled = enabled;
295 self
296 }
297
298 pub fn dir_listing_to_manifest_migration_enabled(mut self, enabled: bool) -> Self {
305 self.dir_listing_to_manifest_migration_enabled = enabled;
306 self
307 }
308
309 pub fn inline_optimization_enabled(mut self, enabled: bool) -> Self {
315 self.inline_optimization_enabled = enabled;
316 self
317 }
318
319 pub fn table_version_tracking_enabled(mut self, enabled: bool) -> Self {
327 self.table_version_tracking_enabled = enabled;
328 self
329 }
330
331 pub fn table_version_storage_enabled(mut self, enabled: bool) -> Self {
340 self.table_version_storage_enabled = enabled;
341 self
342 }
343
344 pub fn from_properties(
412 properties: HashMap<String, String>,
413 session: Option<Arc<Session>>,
414 ) -> Result<Self> {
415 let root = properties.get("root").cloned().ok_or_else(|| {
417 lance_core::Error::from(NamespaceError::InvalidInput {
418 message: "Missing required property 'root' for directory namespace".to_string(),
419 })
420 })?;
421
422 let storage_options: HashMap<String, String> = properties
424 .iter()
425 .filter_map(|(k, v)| {
426 k.strip_prefix("storage.")
427 .map(|key| (key.to_string(), v.clone()))
428 })
429 .collect();
430
431 let storage_options = if storage_options.is_empty() {
432 None
433 } else {
434 Some(storage_options)
435 };
436
437 let manifest_enabled = properties
439 .get("manifest_enabled")
440 .and_then(|v| v.parse::<bool>().ok())
441 .unwrap_or(true);
442
443 let dir_listing_enabled = properties
445 .get("dir_listing_enabled")
446 .and_then(|v| v.parse::<bool>().ok())
447 .unwrap_or(true);
448
449 let inline_optimization_enabled = properties
451 .get("inline_optimization_enabled")
452 .and_then(|v| v.parse::<bool>().ok())
453 .unwrap_or(true);
454
455 let table_version_tracking_enabled = properties
457 .get("table_version_tracking_enabled")
458 .and_then(|v| v.parse::<bool>().ok())
459 .unwrap_or(false);
460
461 let table_version_storage_enabled = properties
463 .get("table_version_storage_enabled")
464 .and_then(|v| v.parse::<bool>().ok())
465 .unwrap_or(false);
466
467 let dir_listing_to_manifest_migration_enabled = properties
469 .get("dir_listing_to_manifest_migration_enabled")
470 .and_then(|v| v.parse::<bool>().ok())
471 .unwrap_or(false);
472
473 let credential_vendor_properties: HashMap<String, String> = properties
477 .iter()
478 .filter_map(|(k, v)| {
479 k.strip_prefix("credential_vendor.")
480 .map(|key| (key.to_string(), v.clone()))
481 })
482 .collect();
483
484 let commit_retries = properties
485 .get("commit_retries")
486 .and_then(|v| v.parse::<u32>().ok());
487
488 let vend_input_storage_options = properties
490 .get("vend_input_storage_options")
491 .and_then(|v| v.parse::<bool>().ok())
492 .unwrap_or(false);
493
494 let vend_input_storage_options_refresh_interval_millis = properties
496 .get("vend_input_storage_options_refresh_interval_millis")
497 .and_then(|v| v.parse::<u64>().ok());
498
499 let ops_metrics_enabled = properties
501 .get("ops_metrics_enabled")
502 .and_then(|v| v.parse::<bool>().ok())
503 .unwrap_or(false);
504
505 Ok(Self {
506 root: root.trim_end_matches('/').to_string(),
507 storage_options,
508 session,
509 manifest_enabled,
510 dir_listing_enabled,
511 inline_optimization_enabled,
512 table_version_tracking_enabled,
513 table_version_storage_enabled,
514 dir_listing_to_manifest_migration_enabled,
515 credential_vendor_properties,
516 context_provider: None,
517 commit_retries,
518 vend_input_storage_options,
519 vend_input_storage_options_refresh_interval_millis,
520 ops_metrics_enabled,
521 })
522 }
523
524 pub fn storage_option(mut self, key: impl Into<String>, value: impl Into<String>) -> Self {
531 self.storage_options
532 .get_or_insert_with(HashMap::new)
533 .insert(key.into(), value.into());
534 self
535 }
536
537 pub fn storage_options(mut self, options: HashMap<String, String>) -> Self {
543 self.storage_options
544 .get_or_insert_with(HashMap::new)
545 .extend(options);
546 self
547 }
548
549 pub fn session(mut self, session: Arc<Session>) -> Self {
559 self.session = Some(session);
560 self
561 }
562
563 pub fn commit_retries(mut self, retries: u32) -> Self {
566 self.commit_retries = Some(retries);
567 self
568 }
569
570 pub fn credential_vendor_property(
598 mut self,
599 key: impl Into<String>,
600 value: impl Into<String>,
601 ) -> Self {
602 self.credential_vendor_properties
603 .insert(key.into(), value.into());
604 self
605 }
606
607 pub fn credential_vendor_properties(mut self, properties: HashMap<String, String>) -> Self {
615 self.credential_vendor_properties.extend(properties);
616 self
617 }
618
619 pub fn context_provider(mut self, provider: Arc<dyn DynamicContextProvider>) -> Self {
629 self.context_provider = Some(provider);
630 self
631 }
632
633 pub fn vend_input_storage_options(mut self, enabled: bool) -> Self {
642 self.vend_input_storage_options = enabled;
643 self
644 }
645
646 pub fn vend_input_storage_options_refresh_interval_millis(
658 mut self,
659 interval_millis: u64,
660 ) -> Self {
661 self.vend_input_storage_options_refresh_interval_millis = Some(interval_millis);
662 self
663 }
664
665 pub fn ops_metrics_enabled(mut self, enabled: bool) -> Self {
673 self.ops_metrics_enabled = enabled;
674 self
675 }
676
677 pub async fn build(self) -> Result<DirectoryNamespace> {
690 if self.table_version_storage_enabled && !self.manifest_enabled {
692 return Err(NamespaceError::InvalidInput {
693 message: "table_version_storage_enabled requires manifest_enabled=true".to_string(),
694 }
695 .into());
696 }
697
698 let (object_store, base_path) =
699 Self::initialize_object_store(&self.root, &self.storage_options, &self.session).await?;
700
701 let manifest_ns = if self.manifest_enabled {
702 match manifest::ManifestNamespace::from_directory(
703 self.root.clone(),
704 self.storage_options.clone(),
705 self.session.clone(),
706 object_store.clone(),
707 base_path.clone(),
708 self.dir_listing_enabled,
709 self.inline_optimization_enabled,
710 self.commit_retries,
711 self.table_version_storage_enabled,
712 )
713 .await
714 {
715 Ok(ns) => Some(Arc::new(ns)),
716 Err(e) => {
717 log::warn!(
719 "Failed to initialize manifest namespace, falling back to directory listing only: {}",
720 e
721 );
722 None
723 }
724 }
725 } else {
726 None
727 };
728
729 let credential_vendor = if has_credential_vendor_config(&self.credential_vendor_properties)
731 {
732 create_credential_vendor_for_location(&self.root, &self.credential_vendor_properties)
733 .await?
734 .map(Arc::from)
735 } else {
736 None
737 };
738
739 let ops_metrics = if self.ops_metrics_enabled {
740 Some(Arc::new(OpsMetrics::default()))
741 } else {
742 None
743 };
744
745 Ok(DirectoryNamespace {
746 root: self.root,
747 storage_options: self.storage_options,
748 session: self.session,
749 object_store,
750 base_path,
751 manifest_ns,
752 dir_listing_enabled: self.dir_listing_enabled,
753 dir_listing_to_manifest_migration_enabled: self
754 .dir_listing_to_manifest_migration_enabled,
755 table_version_tracking_enabled: self.table_version_tracking_enabled,
756 table_version_storage_enabled: self.table_version_storage_enabled,
757 credential_vendor,
758 context_provider: self.context_provider,
759 vend_input_storage_options: self.vend_input_storage_options,
760 vend_input_storage_options_refresh_interval_millis: self
761 .vend_input_storage_options_refresh_interval_millis,
762 ops_metrics,
763 })
764 }
765
766 async fn initialize_object_store(
768 root: &str,
769 storage_options: &Option<HashMap<String, String>>,
770 session: &Option<Arc<Session>>,
771 ) -> Result<(Arc<ObjectStore>, Path)> {
772 let accessor = storage_options.clone().map(|opts| {
774 Arc::new(lance_io::object_store::StorageOptionsAccessor::with_static_options(opts))
775 });
776 let params = ObjectStoreParams {
777 storage_options_accessor: accessor,
778 ..Default::default()
779 };
780
781 let registry = if let Some(session) = session {
783 session.store_registry()
784 } else {
785 Arc::new(ObjectStoreRegistry::default())
786 };
787
788 let (object_store, base_path) = ObjectStore::from_uri_and_params(registry, root, ¶ms)
790 .await
791 .map_err(|e| {
792 lance_core::Error::from(NamespaceError::Internal {
793 message: format!("Failed to create object store: {:?}", e),
794 })
795 })?;
796
797 Ok((object_store, base_path))
798 }
799}
800
801pub struct DirectoryNamespace {
825 root: String,
826 storage_options: Option<HashMap<String, String>>,
827 session: Option<Arc<Session>>,
828 object_store: Arc<ObjectStore>,
829 base_path: Path,
830 manifest_ns: Option<Arc<manifest::ManifestNamespace>>,
831 dir_listing_enabled: bool,
832 dir_listing_to_manifest_migration_enabled: bool,
836 table_version_tracking_enabled: bool,
839 table_version_storage_enabled: bool,
841 credential_vendor: Option<Arc<dyn CredentialVendor>>,
844 #[allow(dead_code)]
847 context_provider: Option<Arc<dyn DynamicContextProvider>>,
848 vend_input_storage_options: bool,
850 vend_input_storage_options_refresh_interval_millis: Option<u64>,
853 ops_metrics: Option<Arc<OpsMetrics>>,
855}
856
857impl std::fmt::Debug for DirectoryNamespace {
858 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
859 write!(f, "{}", self.namespace_id())
860 }
861}
862
863impl std::fmt::Display for DirectoryNamespace {
864 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
865 write!(f, "{}", self.namespace_id())
866 }
867}
868
869struct TableDeleteEntry {
872 table_id: Option<Vec<String>>,
873 ranges: Vec<(i64, i64)>,
874}
875
876impl DirectoryNamespace {
877 fn apply_pagination(
890 names: &mut Vec<String>,
891 page_token: Option<String>,
892 limit: Option<i32>,
893 ) -> Option<String> {
894 names.sort();
896
897 if let Some(start_after) = page_token {
899 if let Some(index) = names
900 .iter()
901 .position(|name| name.as_str() > start_after.as_str())
902 {
903 names.drain(0..index);
904 } else {
905 names.clear();
906 }
907 }
908
909 if let Some(limit) = limit
911 && limit >= 0
912 {
913 let limit = limit as usize;
914 if names.len() > limit {
915 let next_page_token = if limit > 0 {
916 Some(names[limit - 1].clone())
917 } else {
918 None
919 };
920 names.truncate(limit);
921 return next_page_token;
922 }
923 }
924
925 None
926 }
927
928 async fn list_directory_tables(&self) -> Result<Vec<String>> {
930 let mut tables = Vec::new();
931 let entries = self
932 .object_store
933 .read_dir(self.base_path.clone())
934 .await
935 .map_err(|e| {
936 lance_core::Error::from(NamespaceError::Internal {
937 message: format!("Failed to list directory: {:?}", e),
938 })
939 })?;
940
941 for entry in entries {
942 let path = entry.trim_end_matches('/');
943 if !path.ends_with(".lance") {
944 continue;
945 }
946
947 let table_name = &path[..path.len() - 6];
948
949 let status = self.check_table_status(table_name).await;
951 if status.is_deregistered {
952 continue;
953 }
954
955 tables.push(table_name.to_string());
956 }
957
958 Ok(tables)
959 }
960
961 fn validate_root_namespace_id(id: &Option<Vec<String>>) -> Result<()> {
963 if let Some(id) = id
964 && !id.is_empty()
965 {
966 return Err(NamespaceError::Unsupported {
967 message: format!(
968 "Directory namespace only supports root namespace operations, but got namespace ID: {:?}. Expected empty ID.",
969 id
970 ),
971 }
972 .into());
973 }
974 Ok(())
975 }
976
977 fn table_name_from_id(id: &Option<Vec<String>>) -> Result<String> {
979 let id = id.as_ref().ok_or_else(|| {
980 lance_core::Error::from(NamespaceError::InvalidInput {
981 message: "Directory namespace table ID cannot be empty".to_string(),
982 })
983 })?;
984
985 if id.len() != 1 {
986 return Err(NamespaceError::Unsupported {
987 message: format!(
988 "Multi-level table IDs are only supported when manifest mode is enabled, but got: {:?}",
989 id
990 ),
991 }
992 .into());
993 }
994
995 Ok(id[0].clone())
996 }
997
998 fn format_table_id(table_id: &[String]) -> String {
999 format!(
1000 "table id '{}'",
1001 manifest::ManifestNamespace::str_object_id(table_id)
1002 )
1003 }
1004
1005 fn format_table_id_from_request(id: &Option<Vec<String>>) -> String {
1006 id.as_ref()
1007 .map(|table_id| Self::format_table_id(table_id))
1008 .unwrap_or_else(|| "table id '<unknown>'".to_string())
1009 }
1010
1011 async fn resolve_table_location(&self, id: &Option<Vec<String>>) -> Result<String> {
1012 let mut describe_req = DescribeTableRequest::new();
1013 describe_req.id = id.clone();
1014 describe_req.load_detailed_metadata = Some(false);
1015
1016 let describe_resp = self.describe_table_impl(describe_req).await?;
1018
1019 describe_resp.location.ok_or_else(|| {
1020 lance_core::Error::from(NamespaceError::TableNotFound {
1021 message: format!("Table location not found for: {:?}", id),
1022 })
1023 })
1024 }
1025
1026 async fn table_has_actual_manifests(&self, table_name: &str) -> Result<bool> {
1027 manifest::ManifestNamespace::path_has_actual_manifests(
1028 &self.object_store,
1029 &self.table_path(table_name),
1030 )
1031 .await
1032 }
1033
1034 async fn filter_declared_tables(
1035 &self,
1036 tables: Vec<String>,
1037 include_declared: bool,
1038 ) -> Result<Vec<String>> {
1039 if include_declared {
1040 return Ok(tables);
1041 }
1042
1043 let mut stream = futures::stream::iter(tables.into_iter().map(|table_name| async move {
1044 if self.table_has_actual_manifests(&table_name).await? {
1048 Ok::<Option<String>, Error>(Some(table_name))
1049 } else {
1050 Ok::<Option<String>, Error>(None)
1051 }
1052 }))
1053 .buffered(manifest::DECLARED_FILTER_CONCURRENCY);
1054
1055 let mut filtered = Vec::new();
1056 while let Some(result) = stream.next().await {
1057 if let Some(table_name) = result? {
1058 filtered.push(table_name);
1059 }
1060 }
1061 Ok(filtered)
1062 }
1063
1064 fn ipc_reader_from_request_data(
1065 request_data: &Bytes,
1066 operation: &str,
1067 ) -> Result<(
1068 Box<dyn arrow::record_batch::RecordBatchReader + Send>,
1069 usize,
1070 )> {
1071 if request_data.is_empty() {
1072 return Err(NamespaceError::InvalidInput {
1073 message: format!(
1074 "Request data (Arrow IPC stream) is required for {}",
1075 operation
1076 ),
1077 }
1078 .into());
1079 }
1080
1081 let cursor = Cursor::new(request_data.as_ref());
1082 let stream_reader =
1083 StreamReader::try_new(cursor, None).map_err(|e| NamespaceError::InvalidInput {
1084 message: format!("Invalid Arrow IPC stream: {}", e),
1085 })?;
1086 let arrow_schema = stream_reader.schema();
1087
1088 let mut num_rows = 0usize;
1089 let mut batches = Vec::new();
1090 for batch_result in stream_reader {
1091 let batch = batch_result.map_err(|e| NamespaceError::Internal {
1092 message: format!("Failed to read batch from IPC stream: {}", e),
1093 })?;
1094 num_rows += batch.num_rows();
1095 batches.push(batch);
1096 }
1097
1098 let reader: Box<dyn arrow::record_batch::RecordBatchReader + Send> = if batches.is_empty() {
1099 let batch = arrow::record_batch::RecordBatch::new_empty(arrow_schema.clone());
1100 Box::new(RecordBatchIterator::new(vec![Ok(batch)], arrow_schema))
1101 } else {
1102 let batch_results: Vec<_> = batches.into_iter().map(Ok).collect();
1103 Box::new(RecordBatchIterator::new(batch_results, arrow_schema))
1104 };
1105
1106 Ok((reader, num_rows))
1107 }
1108
1109 async fn table_uri_has_actual_manifests(&self, table_uri: &str) -> Result<bool> {
1110 let table_path = self.object_store_path_from_uri(table_uri)?;
1111 manifest::ManifestNamespace::path_has_actual_manifests(&self.object_store, &table_path)
1112 .await
1113 }
1114
1115 fn object_store_path_from_uri(&self, uri: &str) -> Result<Path> {
1116 let registry = self
1117 .session
1118 .as_ref()
1119 .map(|session| session.store_registry())
1120 .unwrap_or_else(|| Arc::new(ObjectStoreRegistry::default()));
1121 ObjectStore::extract_path_from_uri(registry, uri)
1122 }
1123
1124 fn validate_dir_only_properties(
1125 properties: Option<&HashMap<String, String>>,
1126 operation: &str,
1127 ) -> Result<()> {
1128 if properties.is_some_and(|properties| !properties.is_empty()) {
1132 return Err(NamespaceError::Unsupported {
1133 message: format!(
1134 "{} with non-empty table properties requires manifest_enabled=true",
1135 operation
1136 ),
1137 }
1138 .into());
1139 }
1140 Ok(())
1141 }
1142
1143 async fn write_reader_to_table(
1144 &self,
1145 table_uri: &str,
1146 reader: Box<dyn arrow::record_batch::RecordBatchReader + Send>,
1147 mode: WriteMode,
1148 extra_storage_options: Option<HashMap<String, String>>,
1149 ) -> Result<Dataset> {
1150 let mut merged_storage_options = self.storage_options.clone().unwrap_or_default();
1153 if let Some(extra_storage_options) = extra_storage_options {
1154 merged_storage_options.extend(extra_storage_options);
1155 }
1156 let store_params = (!merged_storage_options.is_empty()).then(|| ObjectStoreParams {
1157 storage_options_accessor: Some(Arc::new(
1158 lance_io::object_store::StorageOptionsAccessor::with_static_options(
1159 merged_storage_options,
1160 ),
1161 )),
1162 ..Default::default()
1163 });
1164
1165 let write_params = WriteParams {
1166 mode,
1167 store_params,
1168 session: self.session.clone(),
1169 ..Default::default()
1170 };
1171
1172 let dataset = Dataset::write(reader, table_uri, Some(write_params))
1173 .await
1174 .map_err(|e| NamespaceError::Internal {
1175 message: format!("Failed to write table at '{}': {}", table_uri, e),
1176 })?;
1177
1178 Ok(dataset)
1179 }
1180
1181 async fn list_table_versions_from_storage(
1182 &self,
1183 table_uri: &str,
1184 descending: bool,
1185 limit: Option<i32>,
1186 ) -> Result<Vec<TableVersion>> {
1187 let table_path = self.object_store_path_from_uri(table_uri)?;
1188 let versions_dir = table_path.child(VERSIONS_DIR);
1189 let manifest_metas: Vec<_> = self
1190 .object_store
1191 .read_dir_all(&versions_dir, None)
1192 .try_collect()
1193 .await
1194 .map_err(|e| {
1195 lance_core::Error::from(NamespaceError::Internal {
1196 message: format!(
1197 "Failed to list manifest files for table at '{}': {}",
1198 table_uri, e
1199 ),
1200 })
1201 })?;
1202
1203 let is_v2_naming = manifest_metas
1204 .first()
1205 .is_some_and(|meta| meta.location.filename().is_some_and(|f| f.len() == 29));
1206
1207 let mut table_versions: Vec<TableVersion> = manifest_metas
1208 .into_iter()
1209 .filter_map(|meta| {
1210 let filename = meta.location.filename()?;
1211 let version_str = filename.strip_suffix(".manifest")?;
1212 if version_str.starts_with('d') {
1213 return None;
1214 }
1215 let file_version: u64 = version_str.parse().ok()?;
1216
1217 let actual_version = if file_version > u64::MAX / 2 {
1218 u64::MAX - file_version
1219 } else {
1220 file_version
1221 };
1222
1223 Some(TableVersion {
1224 version: actual_version as i64,
1225 manifest_path: meta.location.to_string(),
1226 manifest_size: Some(meta.size as i64),
1227 e_tag: meta.e_tag,
1228 timestamp_millis: Some(meta.last_modified.timestamp_millis()),
1229 metadata: None,
1230 })
1231 })
1232 .collect();
1233
1234 let list_is_ordered = self.object_store.list_is_lexically_ordered;
1235
1236 let needs_sort = if list_is_ordered {
1237 if is_v2_naming {
1238 !descending
1239 } else {
1240 descending
1241 }
1242 } else {
1243 true
1244 };
1245
1246 if needs_sort {
1247 if descending {
1248 table_versions.sort_by(|a, b| b.version.cmp(&a.version));
1249 } else {
1250 table_versions.sort_by(|a, b| a.version.cmp(&b.version));
1251 }
1252 }
1253
1254 if let Some(limit) = limit {
1255 table_versions.truncate(limit as usize);
1256 }
1257
1258 Ok(table_versions)
1259 }
1260
1261 async fn describe_table_impl(
1265 &self,
1266 request: DescribeTableRequest,
1267 ) -> Result<DescribeTableResponse> {
1268 let is_root_level = request.id.as_ref().is_some_and(|id| id.len() == 1);
1269 let skip_manifest_for_root = self.dir_listing_enabled
1270 && is_root_level
1271 && !self.dir_listing_to_manifest_migration_enabled;
1272 if let Some(ref manifest_ns) = self.manifest_ns
1273 && !skip_manifest_for_root
1274 {
1275 match manifest_ns.describe_table(request.clone()).await {
1276 Ok(mut response) => {
1277 if let Some(ref table_uri) = response.table_uri {
1278 let vend = request.vend_credentials.unwrap_or(true);
1280 let identity = request.identity.as_deref();
1281 response.storage_options = self
1282 .get_storage_options_for_table(table_uri, vend, identity)
1283 .await?;
1284 }
1285 if self.table_version_tracking_enabled {
1287 response.managed_versioning = Some(true);
1288 }
1289 return Ok(response);
1290 }
1291 Err(_) if self.dir_listing_enabled && is_root_level => {
1292 }
1294 Err(e) => return Err(e),
1295 }
1296 }
1297
1298 let table_name = Self::table_name_from_id(&request.id)?;
1299 let table_id = Self::format_table_id_from_request(&request.id);
1300 let table_uri = self.table_full_uri(&table_name);
1301
1302 let status = self.check_table_status(&table_name).await;
1304
1305 if !status.exists {
1306 return Err(NamespaceError::TableNotFound {
1307 message: table_id.clone(),
1308 }
1309 .into());
1310 }
1311
1312 if status.is_deregistered {
1313 return Err(NamespaceError::TableNotFound {
1314 message: format!("Table is deregistered: {}", table_id),
1315 }
1316 .into());
1317 }
1318
1319 let load_detailed_metadata = request.load_detailed_metadata.unwrap_or(false);
1320 let should_check_declared =
1321 load_detailed_metadata || request.check_declared.unwrap_or(false);
1322 let vend_credentials = request.vend_credentials.unwrap_or(true);
1324 let identity = request.identity.as_deref();
1325 let is_only_declared = if should_check_declared {
1326 if status.has_reserved_file {
1327 Some(!self.table_has_actual_manifests(&table_name).await?)
1328 } else {
1329 Some(false)
1330 }
1331 } else {
1332 None
1333 };
1334
1335 if !load_detailed_metadata {
1336 let storage_options = self
1337 .get_storage_options_for_table(&table_uri, vend_credentials, identity)
1338 .await?;
1339 return Ok(DescribeTableResponse {
1340 table: Some(table_name),
1341 namespace: request.id.as_ref().map(|id| {
1342 if id.len() > 1 {
1343 id[..id.len() - 1].to_vec()
1344 } else {
1345 vec![]
1346 }
1347 }),
1348 location: Some(table_uri.clone()),
1349 table_uri: Some(table_uri),
1350 storage_options,
1351 is_only_declared,
1352 managed_versioning: if self.table_version_tracking_enabled {
1353 Some(true)
1354 } else {
1355 None
1356 },
1357 ..Default::default()
1358 });
1359 }
1360
1361 if is_only_declared == Some(true) {
1362 let storage_options = self
1363 .get_storage_options_for_table(&table_uri, vend_credentials, identity)
1364 .await?;
1365 return Ok(DescribeTableResponse {
1366 table: Some(table_name),
1367 namespace: request.id.as_ref().map(|id| {
1368 if id.len() > 1 {
1369 id[..id.len() - 1].to_vec()
1370 } else {
1371 vec![]
1372 }
1373 }),
1374 location: Some(table_uri.clone()),
1375 table_uri: Some(table_uri),
1376 storage_options,
1377 is_only_declared,
1378 managed_versioning: if self.table_version_tracking_enabled {
1379 Some(true)
1380 } else {
1381 None
1382 },
1383 ..Default::default()
1384 });
1385 }
1386
1387 let mut builder = DatasetBuilder::from_uri(&table_uri);
1390 if let Some(opts) = &self.storage_options {
1391 builder = builder.with_storage_options(opts.clone());
1392 }
1393 if let Some(sess) = &self.session {
1394 builder = builder.with_session(sess.clone());
1395 }
1396 match builder.load().await {
1397 Ok(mut dataset) => {
1398 if let Some(requested_version) = request.version {
1400 dataset = dataset
1401 .checkout_version(requested_version as u64)
1402 .await
1403 .map_err(|e| {
1404 lance_core::Error::from(NamespaceError::TableVersionNotFound {
1405 message: format!(
1406 "Version {} not found for table '{}': {}",
1407 requested_version, table_name, e
1408 ),
1409 })
1410 })?;
1411 }
1412
1413 let version_info = dataset.version();
1414 let lance_schema = dataset.schema();
1415 let arrow_schema: arrow_schema::Schema = lance_schema.into();
1416 let json_schema = arrow_schema_to_json(&arrow_schema)?;
1417 let storage_options = self
1418 .get_storage_options_for_table(&table_uri, vend_credentials, identity)
1419 .await?;
1420
1421 let metadata: std::collections::HashMap<String, String> =
1423 version_info.metadata.into_iter().collect();
1424
1425 Ok(DescribeTableResponse {
1426 table: Some(table_name),
1427 namespace: request.id.as_ref().map(|id| {
1428 if id.len() > 1 {
1429 id[..id.len() - 1].to_vec()
1430 } else {
1431 vec![]
1432 }
1433 }),
1434 version: Some(version_info.version as i64),
1435 location: Some(table_uri.clone()),
1436 table_uri: Some(table_uri),
1437 schema: Some(Box::new(json_schema)),
1438 storage_options,
1439 metadata: Some(metadata),
1440 is_only_declared,
1441 managed_versioning: if self.table_version_tracking_enabled {
1442 Some(true)
1443 } else {
1444 None
1445 },
1446 ..Default::default()
1447 })
1448 }
1449 Err(err) => {
1450 if manifest::ManifestNamespace::is_not_found_load_error(&err)
1451 && is_only_declared == Some(true)
1452 {
1453 let storage_options = self
1454 .get_storage_options_for_table(&table_uri, vend_credentials, identity)
1455 .await?;
1456 Ok(DescribeTableResponse {
1457 table: Some(table_name),
1458 namespace: request.id.as_ref().map(|id| {
1459 if id.len() > 1 {
1460 id[..id.len() - 1].to_vec()
1461 } else {
1462 vec![]
1463 }
1464 }),
1465 location: Some(table_uri.clone()),
1466 table_uri: Some(table_uri),
1467 storage_options,
1468 is_only_declared,
1469 managed_versioning: if self.table_version_tracking_enabled {
1470 Some(true)
1471 } else {
1472 None
1473 },
1474 ..Default::default()
1475 })
1476 } else {
1477 Err(NamespaceError::Internal {
1478 message: format!(
1479 "Table directory exists but cannot load dataset {}: {:?}",
1480 table_name, err
1481 ),
1482 }
1483 .into())
1484 }
1485 }
1486 }
1487 }
1488
1489 async fn load_dataset(
1490 &self,
1491 table_uri: &str,
1492 version: Option<i64>,
1493 operation: &str,
1494 ) -> Result<Dataset> {
1495 if let Some(version) = version
1496 && version < 0
1497 {
1498 return Err(NamespaceError::InvalidInput {
1499 message: format!(
1500 "Table version for {} must be non-negative, got {}",
1501 operation, version
1502 ),
1503 }
1504 .into());
1505 }
1506
1507 let mut builder = DatasetBuilder::from_uri(table_uri);
1508 if let Some(opts) = &self.storage_options {
1509 builder = builder.with_storage_options(opts.clone());
1510 }
1511 if let Some(sess) = &self.session {
1512 builder = builder.with_session(sess.clone());
1513 }
1514
1515 let dataset = builder.load().await.map_err(|e| {
1516 lance_core::Error::from(NamespaceError::TableNotFound {
1517 message: format!(
1518 "Failed to open table at '{}' for {}: {}",
1519 table_uri, operation, e
1520 ),
1521 })
1522 })?;
1523
1524 if let Some(version) = version {
1525 return dataset.checkout_version(version as u64).await.map_err(|e| {
1526 lance_core::Error::from(NamespaceError::TableVersionNotFound {
1527 message: format!(
1528 "Failed to checkout version {} for table at '{}' during {}: {}",
1529 version, table_uri, operation, e
1530 ),
1531 })
1532 });
1533 }
1534
1535 Ok(dataset)
1536 }
1537
1538 fn parse_index_type(index_type: &str) -> Result<IndexType> {
1539 match index_type.trim().to_ascii_uppercase().as_str() {
1540 "SCALAR" | "BTREE" => Ok(IndexType::BTree),
1541 "BITMAP" => Ok(IndexType::Bitmap),
1542 "LABEL_LIST" | "LABELLIST" => Ok(IndexType::LabelList),
1543 "INVERTED" | "FTS" => Ok(IndexType::Inverted),
1544 "NGRAM" => Ok(IndexType::NGram),
1545 "ZONEMAP" | "ZONE_MAP" => Ok(IndexType::ZoneMap),
1546 "BLOOMFILTER" | "BLOOM_FILTER" => Ok(IndexType::BloomFilter),
1547 "RTREE" | "R_TREE" => Ok(IndexType::RTree),
1548 "VECTOR" | "IVF_PQ" => Ok(IndexType::IvfPq),
1549 "IVF_FLAT" => Ok(IndexType::IvfFlat),
1550 "IVF_SQ" => Ok(IndexType::IvfSq),
1551 "IVF_RQ" => Ok(IndexType::IvfRq),
1552 "IVF_HNSW_FLAT" => Ok(IndexType::IvfHnswFlat),
1553 "IVF_HNSW_SQ" => Ok(IndexType::IvfHnswSq),
1554 "IVF_HNSW_PQ" => Ok(IndexType::IvfHnswPq),
1555 other => Err(NamespaceError::InvalidInput {
1556 message: format!("Unsupported index_type '{}'", other),
1557 }
1558 .into()),
1559 }
1560 }
1561
1562 fn parse_metric_type(distance_type: Option<&str>) -> Result<MetricType> {
1563 let distance_type = distance_type.unwrap_or("l2");
1564 MetricType::try_from(distance_type).map_err(|e| {
1565 lance_core::Error::from(NamespaceError::InvalidInput {
1566 message: format!(
1567 "Unsupported distance_type '{}' for vector index: {}",
1568 distance_type, e
1569 ),
1570 })
1571 })
1572 }
1573
1574 fn build_index_params(request: &CreateTableIndexRequest) -> Result<DirectoryIndexParams> {
1575 let index_type = Self::parse_index_type(&request.index_type)?;
1576 Ok(match index_type {
1577 IndexType::BTree => DirectoryIndexParams::Scalar {
1578 index_type,
1579 params: ScalarIndexParams::for_builtin(BuiltinIndexType::BTree),
1580 },
1581 IndexType::Bitmap => DirectoryIndexParams::Scalar {
1582 index_type,
1583 params: ScalarIndexParams::for_builtin(BuiltinIndexType::Bitmap),
1584 },
1585 IndexType::LabelList => DirectoryIndexParams::Scalar {
1586 index_type,
1587 params: ScalarIndexParams::for_builtin(BuiltinIndexType::LabelList),
1588 },
1589 IndexType::NGram => DirectoryIndexParams::Scalar {
1590 index_type,
1591 params: ScalarIndexParams::for_builtin(BuiltinIndexType::NGram),
1592 },
1593 IndexType::ZoneMap => DirectoryIndexParams::Scalar {
1594 index_type,
1595 params: ScalarIndexParams::for_builtin(BuiltinIndexType::ZoneMap),
1596 },
1597 IndexType::BloomFilter => DirectoryIndexParams::Scalar {
1598 index_type,
1599 params: ScalarIndexParams::for_builtin(BuiltinIndexType::BloomFilter),
1600 },
1601 IndexType::RTree => DirectoryIndexParams::Scalar {
1602 index_type,
1603 params: ScalarIndexParams::for_builtin(BuiltinIndexType::RTree),
1604 },
1605 IndexType::Inverted => {
1606 let mut params = InvertedIndexParams::default();
1607 if let Some(with_position) = request.with_position {
1608 params = params.with_position(with_position);
1609 }
1610 if let Some(base_tokenizer) = &request.base_tokenizer {
1611 params = params.base_tokenizer(base_tokenizer.clone());
1612 }
1613 if let Some(language) = &request.language {
1614 params = params.language(language)?;
1615 }
1616 if let Some(max_token_length) = request.max_token_length {
1617 if max_token_length < 0 {
1618 return Err(NamespaceError::InvalidInput {
1619 message: format!(
1620 "FTS max_token_length must be non-negative, got {}",
1621 max_token_length
1622 ),
1623 }
1624 .into());
1625 }
1626 params = params.max_token_length(Some(max_token_length as usize));
1627 }
1628 if let Some(lower_case) = request.lower_case {
1629 params = params.lower_case(lower_case);
1630 }
1631 if let Some(stem) = request.stem {
1632 params = params.stem(stem);
1633 }
1634 if let Some(remove_stop_words) = request.remove_stop_words {
1635 params = params.remove_stop_words(remove_stop_words);
1636 }
1637 if let Some(ascii_folding) = request.ascii_folding {
1638 params = params.ascii_folding(ascii_folding);
1639 }
1640 DirectoryIndexParams::Inverted(params)
1641 }
1642 IndexType::IvfFlat => DirectoryIndexParams::Vector {
1643 index_type,
1644 params: VectorIndexParams::with_ivf_flat_params(
1645 Self::parse_metric_type(request.distance_type.as_deref())?,
1646 IvfBuildParams::default(),
1647 ),
1648 },
1649 IndexType::IvfPq => DirectoryIndexParams::Vector {
1650 index_type,
1651 params: VectorIndexParams::with_ivf_pq_params(
1652 Self::parse_metric_type(request.distance_type.as_deref())?,
1653 IvfBuildParams::default(),
1654 PQBuildParams::default(),
1655 ),
1656 },
1657 IndexType::IvfSq => DirectoryIndexParams::Vector {
1658 index_type,
1659 params: VectorIndexParams::with_ivf_sq_params(
1660 Self::parse_metric_type(request.distance_type.as_deref())?,
1661 IvfBuildParams::default(),
1662 SQBuildParams::default(),
1663 ),
1664 },
1665 IndexType::IvfRq => DirectoryIndexParams::Vector {
1666 index_type,
1667 params: VectorIndexParams::with_ivf_rq_params(
1668 Self::parse_metric_type(request.distance_type.as_deref())?,
1669 IvfBuildParams::default(),
1670 RQBuildParams::default(),
1671 ),
1672 },
1673 IndexType::IvfHnswFlat => DirectoryIndexParams::Vector {
1674 index_type,
1675 params: VectorIndexParams::ivf_hnsw(
1676 Self::parse_metric_type(request.distance_type.as_deref())?,
1677 IvfBuildParams::default(),
1678 HnswBuildParams::default(),
1679 ),
1680 },
1681 IndexType::IvfHnswSq => DirectoryIndexParams::Vector {
1682 index_type,
1683 params: VectorIndexParams::with_ivf_hnsw_sq_params(
1684 Self::parse_metric_type(request.distance_type.as_deref())?,
1685 IvfBuildParams::default(),
1686 HnswBuildParams::default(),
1687 SQBuildParams::default(),
1688 ),
1689 },
1690 IndexType::IvfHnswPq => DirectoryIndexParams::Vector {
1691 index_type,
1692 params: VectorIndexParams::with_ivf_hnsw_pq_params(
1693 Self::parse_metric_type(request.distance_type.as_deref())?,
1694 IvfBuildParams::default(),
1695 HnswBuildParams::default(),
1696 PQBuildParams::default(),
1697 ),
1698 },
1699 other => {
1700 return Err(NamespaceError::InvalidInput {
1701 message: format!("Unsupported index type for namespace API: {}", other),
1702 }
1703 .into());
1704 }
1705 })
1706 }
1707
1708 fn paginate_indices(
1709 indices: &mut Vec<IndexContent>,
1710 page_token: Option<String>,
1711 limit: Option<i32>,
1712 ) -> Option<String> {
1713 indices.sort_by(|a, b| a.index_name.cmp(&b.index_name));
1714
1715 if let Some(start_after) = page_token {
1716 if let Some(index) = indices
1717 .iter()
1718 .position(|index| index.index_name.as_str() > start_after.as_str())
1719 {
1720 indices.drain(0..index);
1721 } else {
1722 indices.clear();
1723 }
1724 }
1725
1726 let mut next_page_token = None;
1727 if let Some(limit) = limit
1728 && limit >= 0
1729 {
1730 let limit = limit as usize;
1731 if limit > 0 && indices.len() > limit {
1732 next_page_token = Some(indices[limit - 1].index_name.clone());
1733 }
1734 indices.truncate(limit);
1735 }
1736 if indices.is_empty() {
1737 None
1738 } else {
1739 next_page_token
1740 }
1741 }
1742
1743 fn transaction_operation_name(transaction: &Transaction) -> String {
1744 match &transaction.operation {
1745 Operation::CreateIndex {
1746 new_indices,
1747 removed_indices,
1748 } if new_indices.is_empty() && !removed_indices.is_empty() => "DropIndex".to_string(),
1749 _ => transaction.operation.to_string(),
1750 }
1751 }
1752
1753 fn transaction_response(
1754 version: u64,
1755 transaction: &Transaction,
1756 ) -> DescribeTransactionResponse {
1757 let mut properties = transaction
1758 .transaction_properties
1759 .as_ref()
1760 .map(|properties| (**properties).clone())
1761 .unwrap_or_default();
1762 properties.insert("uuid".to_string(), transaction.uuid.clone());
1763 properties.insert("version".to_string(), version.to_string());
1764 properties.insert(
1765 "read_version".to_string(),
1766 transaction.read_version.to_string(),
1767 );
1768 properties.insert(
1769 "operation".to_string(),
1770 Self::transaction_operation_name(transaction),
1771 );
1772 if let Some(tag) = &transaction.tag {
1773 properties.insert("tag".to_string(), tag.clone());
1774 }
1775
1776 DescribeTransactionResponse {
1777 status: "SUCCEEDED".to_string(),
1778 properties: Some(properties),
1779 }
1780 }
1781
1782 fn describe_table_index_stats_response(
1783 stats: &serde_json::Value,
1784 ) -> DescribeTableIndexStatsResponse {
1785 let get_i64 = |key: &str| {
1786 stats.get(key).and_then(|value| {
1787 value
1788 .as_i64()
1789 .or_else(|| value.as_u64().and_then(|v| i64::try_from(v).ok()))
1790 })
1791 };
1792
1793 DescribeTableIndexStatsResponse {
1794 distance_type: stats
1795 .get("distance_type")
1796 .and_then(|value| value.as_str())
1797 .map(str::to_string),
1798 index_type: stats
1799 .get("index_type")
1800 .and_then(|value| value.as_str())
1801 .map(str::to_string),
1802 num_indexed_rows: get_i64("num_indexed_rows"),
1803 num_unindexed_rows: get_i64("num_unindexed_rows"),
1804 num_indices: get_i64("num_indices").and_then(|value| i32::try_from(value).ok()),
1805 }
1806 }
1807
1808 async fn find_transaction(&self, dataset: &Dataset, id: &str) -> Result<(u64, Transaction)> {
1813 if let Ok(version) = id.parse::<u64>() {
1814 let transaction = dataset
1815 .read_transaction_by_version(version)
1816 .await
1817 .map_err(|e| {
1818 lance_core::Error::from(NamespaceError::TransactionNotFound {
1819 message: format!(
1820 "Failed to read transaction for version {}: {}",
1821 version, e
1822 ),
1823 })
1824 })?
1825 .ok_or_else(|| {
1826 lance_core::Error::from(NamespaceError::TransactionNotFound {
1827 message: format!("version {}", version),
1828 })
1829 })?;
1830 return Ok((version, transaction));
1831 }
1832
1833 let versions = dataset.versions().await.map_err(|e| {
1834 lance_core::Error::from(NamespaceError::Internal {
1835 message: format!(
1836 "Failed to list table versions while resolving transaction '{}': {}",
1837 id, e
1838 ),
1839 })
1840 })?;
1841
1842 for version in versions.into_iter().rev() {
1843 if let Some(transaction) = dataset
1844 .read_transaction_by_version(version.version)
1845 .await
1846 .map_err(|e| {
1847 lance_core::Error::from(NamespaceError::Internal {
1848 message: format!(
1849 "Failed to read transaction for version {} while resolving '{}': {}",
1850 version.version, id, e
1851 ),
1852 })
1853 })?
1854 && transaction.uuid == id
1855 {
1856 return Ok((version.version, transaction));
1857 }
1858 }
1859
1860 Err(NamespaceError::TransactionNotFound {
1861 message: id.to_string(),
1862 }
1863 .into())
1864 }
1865
1866 fn table_full_uri(&self, table_name: &str) -> String {
1867 format!("{}/{}.lance", &self.root, table_name)
1868 }
1869
1870 fn table_path(&self, table_name: &str) -> Path {
1872 self.base_path
1873 .child(format!("{}.lance", table_name).as_str())
1874 }
1875
1876 fn table_reserved_file_path(&self, table_name: &str) -> Path {
1878 self.base_path
1879 .child(format!("{}.lance", table_name).as_str())
1880 .child(".lance-reserved")
1881 }
1882
1883 fn table_deregistered_file_path(&self, table_name: &str) -> Path {
1885 self.base_path
1886 .child(format!("{}.lance", table_name).as_str())
1887 .child(".lance-deregistered")
1888 }
1889
1890 pub(crate) async fn check_table_status(&self, table_name: &str) -> TableStatus {
1896 let table_path = self.table_path(table_name);
1897 match self.object_store.read_dir(table_path).await {
1898 Ok(entries) => {
1899 let exists = !entries.is_empty();
1900 let is_deregistered = entries.iter().any(|e| e.ends_with(".lance-deregistered"));
1901 let has_reserved_file = entries.iter().any(|e| e.ends_with(".lance-reserved"));
1902 TableStatus {
1903 exists,
1904 is_deregistered,
1905 has_reserved_file,
1906 }
1907 }
1908 Err(_) => TableStatus {
1909 exists: false,
1910 is_deregistered: false,
1911 has_reserved_file: false,
1912 },
1913 }
1914 }
1915
1916 async fn put_marker_file_atomic(
1917 &self,
1918 path: &Path,
1919 file_description: &str,
1920 ) -> std::result::Result<(), String> {
1921 let put_opts = PutOptions {
1922 mode: PutMode::Create,
1923 ..Default::default()
1924 };
1925
1926 match self
1927 .object_store
1928 .inner
1929 .put_opts(path, bytes::Bytes::new().into(), put_opts)
1930 .await
1931 {
1932 Ok(_) => Ok(()),
1933 Err(ObjectStoreError::AlreadyExists { .. })
1934 | Err(ObjectStoreError::Precondition { .. }) => {
1935 Err(format!("{} already exists", file_description))
1936 }
1937 Err(e) => Err(format!("Failed to create {}: {:?}", file_description, e)),
1938 }
1939 }
1940
1941 async fn get_storage_options_for_table(
1961 &self,
1962 table_uri: &str,
1963 vend_credentials: bool,
1964 identity: Option<&Identity>,
1965 ) -> Result<Option<HashMap<String, String>>> {
1966 if vend_credentials && let Some(ref vendor) = self.credential_vendor {
1967 let vended = vendor.vend_credentials(table_uri, identity).await?;
1968 return Ok(Some(vended.storage_options));
1969 }
1970 if self.vend_input_storage_options {
1973 let mut options = self.storage_options.clone().unwrap_or_default();
1974 if let Some(refresh_interval_millis) =
1976 self.vend_input_storage_options_refresh_interval_millis
1977 {
1978 let now_millis = std::time::SystemTime::now()
1979 .duration_since(std::time::UNIX_EPOCH)
1980 .unwrap()
1981 .as_millis() as u64;
1982 let expires_at_millis = now_millis + refresh_interval_millis;
1983 options.insert(
1984 "expires_at_millis".to_string(),
1985 expires_at_millis.to_string(),
1986 );
1987 }
1988 return Ok(Some(options));
1989 }
1990 Ok(None)
1993 }
1994
1995 pub async fn migrate(&self) -> Result<usize> {
2048 let Some(ref manifest_ns) = self.manifest_ns else {
2050 return Ok(0); };
2052
2053 let manifest_locations = manifest_ns.list_manifest_table_locations().await?;
2055
2056 let dir_tables = self
2059 .filter_declared_tables(self.list_directory_tables().await?, false)
2060 .await?;
2061
2062 let mut migrated_count = 0;
2067 for table_name in dir_tables {
2068 let dir_name = format!("{}.lance", table_name);
2070 if !manifest_locations.contains(&dir_name) {
2071 manifest_ns.register_table(&table_name, dir_name).await?;
2072 migrated_count += 1;
2073 }
2074 }
2075
2076 Ok(migrated_count)
2077 }
2078
2079 async fn delete_physical_version_files(
2088 &self,
2089 table_entries: &[TableDeleteEntry],
2090 best_effort: bool,
2091 ) -> Result<i64> {
2092 let mut deleted_count = 0i64;
2093 for te in table_entries {
2094 let table_uri = self.resolve_table_location(&te.table_id).await?;
2095 let table_path = self.object_store_path_from_uri(&table_uri)?;
2096 let versions_dir_path = table_path.child(VERSIONS_DIR);
2097
2098 for (start, end) in &te.ranges {
2099 for version in *start..=*end {
2100 let version_path =
2101 versions_dir_path.child(format!("{}.manifest", version as u64));
2102 match self.object_store.inner.delete(&version_path).await {
2103 Ok(_) => {
2104 deleted_count += 1;
2105 }
2106 Err(object_store::Error::NotFound { .. }) => {}
2107 Err(e) => {
2108 if best_effort {
2109 log::warn!(
2110 "Failed to delete manifest file for version {} of table {:?}: {:?}",
2111 version,
2112 te.table_id,
2113 e
2114 );
2115 } else {
2116 return Err(NamespaceError::Internal {
2117 message: format!(
2118 "Failed to delete version {} for table at '{}': {}",
2119 version, table_uri, e
2120 ),
2121 }
2122 .into());
2123 }
2124 }
2125 }
2126 }
2127 }
2128 }
2129 Ok(deleted_count)
2130 }
2131
2132 #[allow(clippy::too_many_arguments)]
2137 fn apply_query_params_to_scanner(
2138 scanner: &mut Scanner,
2139 filter: Option<&str>,
2140 columns: Option<&QueryTableRequestColumns>,
2141 vector_column: Option<&str>,
2142 vector: &QueryTableRequestVector,
2143 k: i32,
2144 offset: Option<i32>,
2145 prefilter: Option<bool>,
2146 bypass_vector_index: Option<bool>,
2147 nprobes: Option<i32>,
2148 ef: Option<i32>,
2149 refine_factor: Option<i32>,
2150 distance_type: Option<&str>,
2151 fast_search_flag: Option<bool>,
2152 with_row_id: Option<bool>,
2153 lower_bound: Option<f32>,
2154 upper_bound: Option<f32>,
2155 operation: &str,
2156 ) -> Result<()> {
2157 if let Some(pf) = prefilter {
2159 scanner.prefilter(pf);
2160 }
2161
2162 if let Some(filter) = filter {
2163 scanner.filter(filter).map_err(|e| {
2164 Error::invalid_input_source(
2165 format!("Invalid filter expression for {}: {}", operation, e).into(),
2166 )
2167 })?;
2168 }
2169
2170 if let Some(cols) = columns {
2171 if let Some(ref names) = cols.column_names {
2172 scanner.project(names.as_slice()).map_err(|e| {
2173 Error::invalid_input_source(
2174 format!("Invalid column projection for {}: {}", operation, e).into(),
2175 )
2176 })?;
2177 } else if let Some(ref aliases) = cols.column_aliases {
2178 let pairs: Vec<(&str, &str)> = aliases
2180 .iter()
2181 .map(|(alias, src)| (alias.as_str(), src.as_str()))
2182 .collect();
2183 scanner.project_with_transform(&pairs).map_err(|e| {
2184 Error::invalid_input_source(
2185 format!("Invalid column aliases for {}: {}", operation, e).into(),
2186 )
2187 })?;
2188 }
2189 }
2190
2191 let query_vec: Option<Vec<f32>> = vector
2193 .single_vector
2194 .as_ref()
2195 .filter(|v| !v.is_empty())
2196 .cloned()
2197 .or_else(|| {
2198 vector
2199 .multi_vector
2200 .as_ref()
2201 .and_then(|mv| mv.first())
2202 .filter(|v| !v.is_empty())
2203 .cloned()
2204 });
2205
2206 if let Some(q_vec) = query_vec {
2207 let col = vector_column.unwrap_or("vector");
2208 let q = Arc::new(Float32Array::from(q_vec));
2209 scanner
2210 .nearest(col, q.as_ref(), k.max(1) as usize)
2211 .map_err(|e| {
2212 Error::invalid_input_source(
2213 format!("Invalid vector query for {}: {}", operation, e).into(),
2214 )
2215 })?;
2216
2217 if let Some(n) = nprobes {
2219 scanner.nprobes(n.max(1) as usize);
2220 }
2221 if let Some(e) = ef {
2222 scanner.ef(e.max(1) as usize);
2223 }
2224 if let Some(rf) = refine_factor {
2225 scanner.refine(rf.max(0) as u32);
2226 }
2227 if let Some(true) = bypass_vector_index {
2229 scanner.use_index(false);
2230 }
2231 if let Some(true) = fast_search_flag {
2232 scanner.fast_search();
2233 }
2234 if lower_bound.is_some() || upper_bound.is_some() {
2235 scanner.distance_range(lower_bound, upper_bound);
2236 }
2237 if let Some(dt) = distance_type {
2238 let metric = Self::parse_metric_type(Some(dt))?;
2239 scanner.distance_metric(metric);
2240 }
2241 if let Some(off) = offset.filter(|&o| o > 0) {
2243 scanner.limit(None, Some(off as i64)).map_err(|e| {
2244 Error::invalid_input_source(
2245 format!("Invalid offset for {}: {}", operation, e).into(),
2246 )
2247 })?;
2248 }
2249 } else {
2250 let limit = if k > 0 { Some(k as i64) } else { None };
2252 scanner
2253 .limit(limit, offset.map(|o| o as i64))
2254 .map_err(|e| {
2255 Error::invalid_input_source(
2256 format!("Invalid limit/offset for {}: {}", operation, e).into(),
2257 )
2258 })?;
2259 }
2260
2261 if let Some(true) = with_row_id {
2262 scanner.with_row_id();
2263 }
2264
2265 Ok(())
2266 }
2267
2268 pub fn retrieve_ops_metrics(&self) -> HashMap<String, u64> {
2275 self.ops_metrics
2276 .as_ref()
2277 .map(|m| m.retrieve())
2278 .unwrap_or_default()
2279 }
2280
2281 pub fn reset_ops_metrics(&self) {
2285 if let Some(ref metrics) = self.ops_metrics {
2286 metrics.reset();
2287 }
2288 }
2289
2290 fn record_op(&self, operation: &str) {
2292 if let Some(ref metrics) = self.ops_metrics {
2293 metrics.increment(operation);
2294 }
2295 }
2296}
2297
2298#[async_trait]
2299impl LanceNamespace for DirectoryNamespace {
2300 async fn list_namespaces(
2301 &self,
2302 request: ListNamespacesRequest,
2303 ) -> Result<ListNamespacesResponse> {
2304 self.record_op("list_namespaces");
2305 if let Some(ref manifest_ns) = self.manifest_ns {
2306 return manifest_ns.list_namespaces(request).await;
2307 }
2308
2309 Self::validate_root_namespace_id(&request.id)?;
2310 Ok(ListNamespacesResponse::new(vec![]))
2311 }
2312
2313 async fn describe_namespace(
2314 &self,
2315 request: DescribeNamespaceRequest,
2316 ) -> Result<DescribeNamespaceResponse> {
2317 self.record_op("describe_namespace");
2318 if let Some(ref manifest_ns) = self.manifest_ns {
2319 return manifest_ns.describe_namespace(request).await;
2320 }
2321
2322 Self::validate_root_namespace_id(&request.id)?;
2323 #[allow(clippy::needless_update)]
2324 Ok(DescribeNamespaceResponse {
2325 properties: Some(HashMap::new()),
2326 ..Default::default()
2327 })
2328 }
2329
2330 async fn create_namespace(
2331 &self,
2332 request: CreateNamespaceRequest,
2333 ) -> Result<CreateNamespaceResponse> {
2334 self.record_op("create_namespace");
2335 if let Some(ref manifest_ns) = self.manifest_ns {
2336 return manifest_ns.create_namespace(request).await;
2337 }
2338
2339 if request.id.is_none() || request.id.as_ref().unwrap().is_empty() {
2340 return Err(NamespaceError::NamespaceAlreadyExists {
2341 message: "root namespace".to_string(),
2342 }
2343 .into());
2344 }
2345
2346 Err(NamespaceError::Unsupported {
2347 message: "Child namespaces are only supported when manifest mode is enabled"
2348 .to_string(),
2349 }
2350 .into())
2351 }
2352
2353 async fn drop_namespace(&self, request: DropNamespaceRequest) -> Result<DropNamespaceResponse> {
2354 self.record_op("drop_namespace");
2355 if let Some(ref manifest_ns) = self.manifest_ns {
2356 return manifest_ns.drop_namespace(request).await;
2357 }
2358
2359 if request.id.is_none() || request.id.as_ref().unwrap().is_empty() {
2360 return Err(NamespaceError::InvalidInput {
2361 message: "Root namespace cannot be dropped".to_string(),
2362 }
2363 .into());
2364 }
2365
2366 Err(NamespaceError::Unsupported {
2367 message: "Child namespaces are only supported when manifest mode is enabled"
2368 .to_string(),
2369 }
2370 .into())
2371 }
2372
2373 async fn namespace_exists(&self, request: NamespaceExistsRequest) -> Result<()> {
2374 self.record_op("namespace_exists");
2375 if let Some(ref manifest_ns) = self.manifest_ns {
2376 return manifest_ns.namespace_exists(request).await;
2377 }
2378
2379 if request.id.is_none() || request.id.as_ref().unwrap().is_empty() {
2380 return Ok(());
2381 }
2382
2383 Err(NamespaceError::NamespaceNotFound {
2384 message: "Child namespaces are only supported when manifest mode is enabled"
2385 .to_string(),
2386 }
2387 .into())
2388 }
2389
2390 async fn list_tables(&self, request: ListTablesRequest) -> Result<ListTablesResponse> {
2391 self.record_op("list_tables");
2392 let namespace_id = request.id.as_ref().ok_or_else(|| {
2394 lance_core::Error::from(NamespaceError::InvalidInput {
2395 message: "Namespace ID is required".to_string(),
2396 })
2397 })?;
2398
2399 if !namespace_id.is_empty() {
2401 if let Some(ref manifest_ns) = self.manifest_ns {
2402 return manifest_ns.list_tables(request).await;
2403 }
2404 return Err(NamespaceError::Unsupported {
2405 message: "Child namespaces are only supported when manifest mode is enabled"
2406 .to_string(),
2407 }
2408 .into());
2409 }
2410
2411 if let Some(ref manifest_ns) = self.manifest_ns
2413 && !self.dir_listing_enabled
2414 {
2415 return manifest_ns.list_tables(request).await;
2416 }
2417
2418 let mut tables = if self.manifest_ns.is_some()
2421 && self.dir_listing_enabled
2422 && self.dir_listing_to_manifest_migration_enabled
2423 {
2424 let manifest_locations = if let Some(ref manifest_ns) = self.manifest_ns {
2426 manifest_ns.list_manifest_table_locations().await?
2427 } else {
2428 std::collections::HashSet::new()
2429 };
2430
2431 let mut manifest_request = request.clone();
2433 manifest_request.limit = None;
2434 manifest_request.page_token = None;
2435 let manifest_tables = if let Some(ref manifest_ns) = self.manifest_ns {
2436 let manifest_response = manifest_ns.list_tables(manifest_request).await?;
2437 manifest_response.tables
2438 } else {
2439 vec![]
2440 };
2441
2442 let mut all_tables: Vec<String> = manifest_tables;
2445 let dir_tables = self.list_directory_tables().await?;
2446 for table_name in dir_tables {
2447 let full_location = format!("{}/{}.lance", self.root, table_name);
2450 let relative_location = format!("{}.lance", table_name);
2451 if !manifest_locations.contains(&full_location)
2452 && !manifest_locations.contains(&relative_location)
2453 {
2454 all_tables.push(table_name);
2455 }
2456 }
2457
2458 all_tables
2459 } else {
2460 self.list_directory_tables().await?
2461 };
2462
2463 tables = self
2464 .filter_declared_tables(tables, request.include_declared.unwrap_or(true))
2465 .await?;
2466
2467 let next_page_token =
2469 Self::apply_pagination(&mut tables, request.page_token, request.limit);
2470 let mut response = ListTablesResponse::new(tables);
2471 response.page_token = next_page_token;
2472 Ok(response)
2473 }
2474
2475 async fn describe_table(&self, request: DescribeTableRequest) -> Result<DescribeTableResponse> {
2476 self.record_op("describe_table");
2477 self.describe_table_impl(request).await
2478 }
2479
2480 async fn table_exists(&self, request: TableExistsRequest) -> Result<()> {
2481 self.record_op("table_exists");
2482 let is_root_level = request.id.as_ref().is_some_and(|id| id.len() == 1);
2483 let skip_manifest_for_root = self.dir_listing_enabled
2484 && is_root_level
2485 && !self.dir_listing_to_manifest_migration_enabled;
2486 if let Some(ref manifest_ns) = self.manifest_ns
2487 && !skip_manifest_for_root
2488 {
2489 match manifest_ns.table_exists(request.clone()).await {
2490 Ok(()) => return Ok(()),
2491 Err(_) if self.dir_listing_enabled && is_root_level => {
2492 }
2494 Err(e) => return Err(e),
2495 }
2496 }
2497
2498 let table_name = Self::table_name_from_id(&request.id)?;
2499 let table_id = Self::format_table_id_from_request(&request.id);
2500
2501 let status = self.check_table_status(&table_name).await;
2503
2504 if !status.exists {
2505 return Err(NamespaceError::TableNotFound {
2506 message: table_id.clone(),
2507 }
2508 .into());
2509 }
2510
2511 if status.is_deregistered {
2512 return Err(NamespaceError::TableNotFound {
2513 message: format!("Table is deregistered: {}", table_id),
2514 }
2515 .into());
2516 }
2517
2518 Ok(())
2519 }
2520
2521 async fn drop_table(&self, request: DropTableRequest) -> Result<DropTableResponse> {
2522 self.record_op("drop_table");
2523 if let Some(ref manifest_ns) = self.manifest_ns {
2524 return manifest_ns.drop_table(request).await;
2525 }
2526
2527 let table_name = Self::table_name_from_id(&request.id)?;
2528 let table_uri = self.table_full_uri(&table_name);
2529 let table_path = self.table_path(&table_name);
2530
2531 self.object_store
2532 .remove_dir_all(table_path)
2533 .await
2534 .map_err(|e| {
2535 lance_core::Error::from(NamespaceError::Internal {
2536 message: format!("Failed to drop table {}: {:?}", table_name, e),
2537 })
2538 })?;
2539
2540 Ok(DropTableResponse {
2541 id: request.id,
2542 location: Some(table_uri),
2543 ..Default::default()
2544 })
2545 }
2546
2547 async fn create_table(
2548 &self,
2549 request: CreateTableRequest,
2550 request_data: Bytes,
2551 ) -> Result<CreateTableResponse> {
2552 self.record_op("create_table");
2553 if let Some(ref manifest_ns) = self.manifest_ns {
2554 return manifest_ns.create_table(request, request_data).await;
2555 }
2556
2557 Self::validate_dir_only_properties(request.properties.as_ref(), "create_table")?;
2558
2559 let table_name = Self::table_name_from_id(&request.id)?;
2560 let table_uri = self.table_full_uri(&table_name);
2561 let status = self.check_table_status(&table_name).await;
2562 let (reader, _num_rows) =
2563 Self::ipc_reader_from_request_data(&request_data, "create_table")?;
2564
2565 if status.exists && self.table_has_actual_manifests(&table_name).await? {
2566 return Err(NamespaceError::TableAlreadyExists {
2567 message: table_name,
2568 }
2569 .into());
2570 }
2571
2572 let write_result = self
2573 .write_reader_to_table(
2574 &table_uri,
2575 reader,
2576 WriteMode::Create,
2577 request.storage_options.clone(),
2578 )
2579 .await;
2580 if let Err(err) = write_result {
2581 if self.table_uri_has_actual_manifests(&table_uri).await? {
2582 return Err(NamespaceError::TableAlreadyExists {
2583 message: table_name,
2584 }
2585 .into());
2586 }
2587 return Err(err);
2588 }
2589 Ok(CreateTableResponse {
2590 version: Some(1),
2591 location: Some(table_uri),
2592 storage_options: self.storage_options.clone(),
2593 properties: request.properties,
2594 ..Default::default()
2595 })
2596 }
2597
2598 async fn declare_table(&self, request: DeclareTableRequest) -> Result<DeclareTableResponse> {
2599 self.record_op("declare_table");
2600 if let Some(ref manifest_ns) = self.manifest_ns {
2601 let mut response = manifest_ns.declare_table(request.clone()).await?;
2602 if let Some(ref location) = response.location {
2603 let vend = request.vend_credentials.unwrap_or(true);
2605 let identity = request.identity.as_deref();
2606 response.storage_options = self
2607 .get_storage_options_for_table(location, vend, identity)
2608 .await?;
2609 }
2610 if self.table_version_tracking_enabled {
2612 response.managed_versioning = Some(true);
2613 }
2614 return Ok(response);
2615 }
2616
2617 Self::validate_dir_only_properties(request.properties.as_ref(), "declare_table")?;
2618
2619 let table_name = Self::table_name_from_id(&request.id)?;
2620 let table_uri = self.table_full_uri(&table_name);
2621
2622 if let Some(location) = &request.location {
2624 let location = location.trim_end_matches('/');
2625 if location != table_uri {
2626 return Err(NamespaceError::InvalidInput {
2627 message: format!(
2628 "Cannot declare table {} at location {}, must be at location {}",
2629 table_name, location, table_uri
2630 ),
2631 }
2632 .into());
2633 }
2634 }
2635
2636 let status = self.check_table_status(&table_name).await;
2640 if status.exists && !status.has_reserved_file {
2641 return Err(NamespaceError::TableAlreadyExists {
2643 message: table_name.to_string(),
2644 }
2645 .into());
2646 }
2647
2648 let reserved_file_path = self.table_reserved_file_path(&table_name);
2652
2653 self.put_marker_file_atomic(&reserved_file_path, &format!("table {}", table_name))
2654 .await
2655 .map_err(|e| {
2656 if e.contains("already exists") {
2657 lance_core::Error::from(NamespaceError::TableAlreadyExists {
2658 message: table_name.to_string(),
2659 })
2660 } else {
2661 lance_core::Error::from(NamespaceError::Internal { message: e })
2662 }
2663 })?;
2664
2665 let vend_credentials = request.vend_credentials.unwrap_or(true);
2667 let identity = request.identity.as_deref();
2668 let storage_options = self
2669 .get_storage_options_for_table(&table_uri, vend_credentials, identity)
2670 .await?;
2671
2672 Ok(DeclareTableResponse {
2673 location: Some(table_uri),
2674 storage_options,
2675 properties: request.properties,
2676 managed_versioning: if self.table_version_tracking_enabled {
2677 Some(true)
2678 } else {
2679 None
2680 },
2681 ..Default::default()
2682 })
2683 }
2684
2685 async fn register_table(
2686 &self,
2687 request: lance_namespace::models::RegisterTableRequest,
2688 ) -> Result<lance_namespace::models::RegisterTableResponse> {
2689 self.record_op("register_table");
2690 if let Some(ref manifest_ns) = self.manifest_ns {
2692 return LanceNamespace::register_table(manifest_ns.as_ref(), request).await;
2693 }
2694
2695 Err(NamespaceError::Unsupported {
2697 message: "register_table is only supported when manifest mode is enabled".to_string(),
2698 }
2699 .into())
2700 }
2701
2702 async fn deregister_table(
2703 &self,
2704 request: lance_namespace::models::DeregisterTableRequest,
2705 ) -> Result<lance_namespace::models::DeregisterTableResponse> {
2706 self.record_op("deregister_table");
2707 if let Some(ref manifest_ns) = self.manifest_ns {
2709 return LanceNamespace::deregister_table(manifest_ns.as_ref(), request).await;
2710 }
2711
2712 let table_name = Self::table_name_from_id(&request.id)?;
2714 let table_uri = self.table_full_uri(&table_name);
2715
2716 let status = self.check_table_status(&table_name).await;
2719
2720 if !status.exists {
2721 return Err(NamespaceError::TableNotFound {
2722 message: table_name.to_string(),
2723 }
2724 .into());
2725 }
2726
2727 if status.is_deregistered {
2728 return Err(NamespaceError::TableNotFound {
2729 message: format!("Table is already deregistered: {}", table_name),
2730 }
2731 .into());
2732 }
2733
2734 let deregistered_path = self.table_deregistered_file_path(&table_name);
2740 self.put_marker_file_atomic(
2741 &deregistered_path,
2742 &format!("deregistration marker for table {}", table_name),
2743 )
2744 .await
2745 .map_err(|e| {
2746 if e.contains("already exists") {
2747 lance_core::Error::from(NamespaceError::InvalidTableState {
2748 message: format!("Table is already deregistered: {}", table_name),
2749 })
2750 } else {
2751 lance_core::Error::from(NamespaceError::Internal { message: e })
2752 }
2753 })?;
2754
2755 Ok(lance_namespace::models::DeregisterTableResponse {
2756 id: request.id,
2757 location: Some(table_uri),
2758 ..Default::default()
2759 })
2760 }
2761
2762 async fn list_table_versions(
2763 &self,
2764 request: ListTableVersionsRequest,
2765 ) -> Result<ListTableVersionsResponse> {
2766 self.record_op("list_table_versions");
2767 if self.table_version_storage_enabled
2769 && let Some(ref manifest_ns) = self.manifest_ns
2770 {
2771 let table_id = request.id.clone().unwrap_or_default();
2772 let want_descending = request.descending == Some(true);
2773 return manifest_ns
2774 .list_table_versions(&table_id, want_descending, request.limit)
2775 .await;
2776 }
2777
2778 let table_uri = self.resolve_table_location(&request.id).await?;
2780 let want_descending = request.descending == Some(true);
2781 let table_versions = self
2782 .list_table_versions_from_storage(&table_uri, want_descending, request.limit)
2783 .await?;
2784
2785 Ok(ListTableVersionsResponse {
2786 versions: table_versions,
2787 page_token: None,
2788 })
2789 }
2790
2791 async fn create_table_version(
2792 &self,
2793 request: CreateTableVersionRequest,
2794 ) -> Result<CreateTableVersionResponse> {
2795 self.record_op("create_table_version");
2796 let table_uri = self.resolve_table_location(&request.id).await?;
2797
2798 let staging_manifest_path = &request.manifest_path;
2799 let version = request.version as u64;
2800
2801 let table_path = self.object_store_path_from_uri(&table_uri)?;
2802
2803 let naming_scheme = match request.naming_scheme.as_deref() {
2805 Some("V1") => ManifestNamingScheme::V1,
2806 _ => ManifestNamingScheme::V2,
2807 };
2808
2809 let final_path = naming_scheme.manifest_path(&table_path, version);
2811
2812 let staging_path = Path::parse(staging_manifest_path).map_err(|e| {
2813 lance_core::Error::from(NamespaceError::InvalidInput {
2814 message: format!(
2815 "Invalid staging manifest path '{}': {}",
2816 staging_manifest_path, e
2817 ),
2818 })
2819 })?;
2820
2821 let copy_result = match self
2822 .object_store
2823 .inner
2824 .copy_if_not_exists(&staging_path, &final_path)
2825 .await
2826 {
2827 Ok(()) => Ok(()),
2828 Err(ObjectStoreError::NotImplemented) | Err(ObjectStoreError::NotSupported { .. }) => {
2829 let manifest_data = self
2830 .object_store
2831 .inner
2832 .get(&staging_path)
2833 .await
2834 .map_err(|e| {
2835 lance_core::Error::from(NamespaceError::Internal {
2836 message: format!(
2837 "Failed to read staging manifest at '{}': {}",
2838 staging_manifest_path, e
2839 ),
2840 })
2841 })?
2842 .bytes()
2843 .await
2844 .map_err(|e| {
2845 lance_core::Error::from(NamespaceError::Internal {
2846 message: format!(
2847 "Failed to read staging manifest bytes at '{}': {}",
2848 staging_manifest_path, e
2849 ),
2850 })
2851 })?;
2852 self.object_store
2853 .inner
2854 .put_opts(
2855 &final_path,
2856 manifest_data.into(),
2857 PutOptions {
2858 mode: PutMode::Create,
2859 ..Default::default()
2860 },
2861 )
2862 .await
2863 .map(|_| ())
2864 }
2865 Err(e) => Err(e),
2866 };
2867
2868 match copy_result {
2869 Ok(()) => {}
2870 Err(ObjectStoreError::AlreadyExists { .. })
2871 | Err(ObjectStoreError::Precondition { .. }) => {
2872 return Err(lance_core::Error::from(
2873 NamespaceError::ConcurrentModification {
2874 message: format!(
2875 "Version {} already exists for table at '{}'",
2876 version, table_uri
2877 ),
2878 },
2879 ));
2880 }
2881 Err(e) => {
2882 return Err(lance_core::Error::from(NamespaceError::Internal {
2883 message: format!(
2884 "Failed to create version {} for table at '{}': {}",
2885 version, table_uri, e
2886 ),
2887 }));
2888 }
2889 }
2890
2891 let final_meta = self
2892 .object_store
2893 .inner
2894 .head(&final_path)
2895 .await
2896 .map_err(|e| {
2897 lance_core::Error::from(NamespaceError::Internal {
2898 message: format!(
2899 "Failed to stat created version {} for table at '{}': {}",
2900 version, table_uri, e
2901 ),
2902 })
2903 })?;
2904 let manifest_size = final_meta.size as i64;
2905
2906 if let Err(e) = self.object_store.inner.delete(&staging_path).await {
2908 log::warn!(
2909 "Failed to delete staging manifest at '{}': {:?}",
2910 staging_path,
2911 e
2912 );
2913 }
2914
2915 if self.table_version_storage_enabled
2917 && let Some(ref manifest_ns) = self.manifest_ns
2918 {
2919 let table_id_str =
2920 manifest::ManifestNamespace::str_object_id(&request.id.clone().unwrap_or_default());
2921 let object_id =
2922 manifest::ManifestNamespace::build_version_object_id(&table_id_str, version as i64);
2923 let metadata_json = serde_json::json!({
2924 "manifest_path": final_path.to_string(),
2925 "manifest_size": manifest_size,
2926 "e_tag": final_meta.e_tag,
2927 "naming_scheme": request.naming_scheme.as_deref().unwrap_or("V2"),
2928 })
2929 .to_string();
2930
2931 if let Err(e) = manifest_ns
2932 .insert_into_manifest_with_metadata(
2933 vec![manifest::ManifestEntry {
2934 object_id,
2935 object_type: manifest::ObjectType::TableVersion,
2936 location: None,
2937 metadata: Some(metadata_json),
2938 }],
2939 None,
2940 )
2941 .await
2942 {
2943 log::warn!(
2944 "Failed to record table version in __manifest (best-effort): {:?}",
2945 e
2946 );
2947 }
2948 }
2949
2950 Ok(CreateTableVersionResponse {
2951 transaction_id: None,
2952 version: Some(Box::new(TableVersion {
2953 version: version as i64,
2954 manifest_path: final_path.to_string(),
2955 manifest_size: Some(manifest_size),
2956 e_tag: final_meta.e_tag,
2957 timestamp_millis: None,
2958 metadata: None,
2959 })),
2960 })
2961 }
2962
2963 async fn describe_table_version(
2964 &self,
2965 request: DescribeTableVersionRequest,
2966 ) -> Result<DescribeTableVersionResponse> {
2967 self.record_op("describe_table_version");
2968 if self.table_version_storage_enabled
2971 && let (Some(manifest_ns), Some(version)) = (&self.manifest_ns, request.version)
2972 {
2973 let table_id = request.id.clone().unwrap_or_default();
2974 return manifest_ns.describe_table_version(&table_id, version).await;
2975 }
2976
2977 let table_uri = self.resolve_table_location(&request.id).await?;
2979 let versions = self
2980 .list_table_versions_from_storage(&table_uri, true, None)
2981 .await?;
2982 let table_version = if let Some(requested_version) = request.version {
2983 versions
2984 .into_iter()
2985 .find(|version| version.version == requested_version)
2986 .ok_or_else(|| {
2987 lance_core::Error::from(NamespaceError::TableVersionNotFound {
2988 message: format!(
2989 "version {} for table {}",
2990 requested_version,
2991 Self::format_table_id_from_request(&request.id)
2992 ),
2993 })
2994 })?
2995 } else {
2996 versions.into_iter().next().ok_or_else(|| {
2997 lance_core::Error::from(NamespaceError::TableVersionNotFound {
2998 message: format!(
2999 "latest version for table {}",
3000 Self::format_table_id_from_request(&request.id)
3001 ),
3002 })
3003 })?
3004 };
3005
3006 Ok(DescribeTableVersionResponse {
3007 version: Box::new(table_version),
3008 })
3009 }
3010
3011 async fn batch_delete_table_versions(
3012 &self,
3013 request: BatchDeleteTableVersionsRequest,
3014 ) -> Result<BatchDeleteTableVersionsResponse> {
3015 self.record_op("batch_delete_table_versions");
3016 let ranges: Vec<(i64, i64)> = request
3019 .ranges
3020 .iter()
3021 .map(|r| {
3022 let start = r.start_version;
3023 let end = if r.end_version > 0 {
3024 r.end_version
3025 } else {
3026 start
3027 };
3028 (start, end)
3029 })
3030 .collect();
3031 let table_entries = vec![TableDeleteEntry {
3032 table_id: request.id.clone(),
3033 ranges,
3034 }];
3035
3036 let mut total_deleted_count = 0i64;
3037
3038 if self.table_version_storage_enabled
3039 && let Some(ref manifest_ns) = self.manifest_ns
3040 {
3041 let mut all_object_ids: Vec<String> = Vec::new();
3048 for te in &table_entries {
3049 let table_id_str = manifest::ManifestNamespace::str_object_id(
3050 &te.table_id.clone().unwrap_or_default(),
3051 );
3052 for (start, end) in &te.ranges {
3053 for version in *start..=*end {
3054 let object_id = manifest::ManifestNamespace::build_version_object_id(
3055 &table_id_str,
3056 version,
3057 );
3058 all_object_ids.push(object_id);
3059 }
3060 }
3061 }
3062
3063 if !all_object_ids.is_empty() {
3064 total_deleted_count = manifest_ns
3065 .batch_delete_table_versions_by_object_ids(&all_object_ids)
3066 .await?;
3067 }
3068
3069 let _ = self
3074 .delete_physical_version_files(&table_entries, true)
3075 .await;
3076
3077 return Ok(BatchDeleteTableVersionsResponse {
3078 deleted_count: Some(total_deleted_count),
3079 transaction_id: None,
3080 });
3081 }
3082
3083 total_deleted_count = self
3085 .delete_physical_version_files(&table_entries, false)
3086 .await?;
3087
3088 Ok(BatchDeleteTableVersionsResponse {
3089 deleted_count: Some(total_deleted_count),
3090 transaction_id: None,
3091 })
3092 }
3093
3094 async fn create_table_index(
3095 &self,
3096 request: CreateTableIndexRequest,
3097 ) -> Result<CreateTableIndexResponse> {
3098 self.record_op("create_table_index");
3099 let table_uri = self.resolve_table_location(&request.id).await?;
3100 let mut dataset = self
3101 .load_dataset(&table_uri, None, "create_table_index")
3102 .await?;
3103 let index_request = Self::build_index_params(&request)?;
3104
3105 dataset
3106 .create_index(
3107 &[request.column.as_str()],
3108 index_request.index_type(),
3109 request.name.clone(),
3110 index_request.params(),
3111 false,
3112 )
3113 .await
3114 .map_err(|e| {
3115 let err_msg = format!("{}", e);
3116 let ns_err = if err_msg.contains("already exists") {
3117 NamespaceError::TableIndexAlreadyExists {
3118 message: format!(
3119 "Index '{}' already exists on table '{}': {:?}",
3120 request.name.as_deref().unwrap_or("<auto-generated>"),
3121 table_uri,
3122 e
3123 ),
3124 }
3125 } else if err_msg.contains("not found") || err_msg.contains("does not exist") {
3126 NamespaceError::TableColumnNotFound {
3127 message: format!(
3128 "Column '{}' not found for table '{}': {:?}",
3129 request.column, table_uri, e
3130 ),
3131 }
3132 } else {
3133 NamespaceError::Internal {
3134 message: format!(
3135 "Failed to create {} index '{}' on column '{}' for table '{}': {:?}",
3136 request.index_type,
3137 request.name.as_deref().unwrap_or("<auto-generated>"),
3138 request.column,
3139 table_uri,
3140 e
3141 ),
3142 }
3143 };
3144 lance_core::Error::from(ns_err)
3145 })?;
3146
3147 let transaction_id = dataset
3148 .read_transaction()
3149 .await
3150 .map_err(|e| {
3151 lance_core::Error::from(NamespaceError::Internal {
3152 message: format!(
3153 "Failed to read committed transaction after creating index on '{}': {}",
3154 table_uri, e
3155 ),
3156 })
3157 })?
3158 .map(|transaction| transaction.uuid);
3159
3160 Ok(CreateTableIndexResponse { transaction_id })
3161 }
3162
3163 async fn list_table_indices(
3164 &self,
3165 request: ListTableIndicesRequest,
3166 ) -> Result<ListTableIndicesResponse> {
3167 self.record_op("list_table_indices");
3168 let table_uri = self.resolve_table_location(&request.id).await?;
3169 let dataset = self
3170 .load_dataset(&table_uri, request.version, "list_table_indices")
3171 .await?;
3172 let mut indices = dataset
3173 .describe_indices(None)
3174 .await
3175 .map_err(|e| {
3176 lance_core::Error::from(NamespaceError::Internal {
3177 message: format!("Failed to describe table indices for '{}': {:?}", table_uri, e),
3178 })
3179 })?
3180 .into_iter()
3181 .filter(|description| {
3182 description
3183 .metadata()
3184 .first()
3185 .map(|metadata| !is_system_index(metadata))
3186 .unwrap_or(false)
3187 })
3188 .map(|description| {
3189 let columns = description
3190 .field_ids()
3191 .iter()
3192 .map(|field_id| {
3193 dataset
3194 .schema()
3195 .field_path(i32::try_from(*field_id).map_err(|e| {
3196 lance_core::Error::from(NamespaceError::Internal {
3197 message: format!(
3198 "Field id {} does not fit in i32 for table '{}': {}",
3199 field_id, table_uri, e
3200 ),
3201 })
3202 })?)
3203 .map_err(|e| {
3204 lance_core::Error::from(NamespaceError::Internal {
3205 message: format!(
3206 "Failed to resolve field path for field_id {} in table '{}': {}",
3207 field_id, table_uri, e
3208 ),
3209 })
3210 })
3211 })
3212 .collect::<Result<Vec<_>>>()?;
3213
3214 Ok(IndexContent {
3215 index_name: description.name().to_string(),
3216 index_uuid: description.metadata()[0].uuid.to_string(),
3217 columns,
3218 status: "SUCCEEDED".to_string(),
3219 })
3220 })
3221 .collect::<Result<Vec<_>>>()?;
3222
3223 let page_token = Self::paginate_indices(&mut indices, request.page_token, request.limit);
3224 Ok(ListTableIndicesResponse {
3225 indexes: indices,
3226 page_token,
3227 })
3228 }
3229
3230 async fn describe_table_index_stats(
3231 &self,
3232 request: DescribeTableIndexStatsRequest,
3233 ) -> Result<DescribeTableIndexStatsResponse> {
3234 self.record_op("describe_table_index_stats");
3235 let table_uri = self.resolve_table_location(&request.id).await?;
3236 let dataset = self
3237 .load_dataset(&table_uri, request.version, "describe_table_index_stats")
3238 .await?;
3239 let index_name = request.index_name.as_deref().ok_or_else(|| {
3240 lance_core::Error::from(NamespaceError::InvalidInput {
3241 message: "Index name is required for describe_table_index_stats".to_string(),
3242 })
3243 })?;
3244 let metadatas = dataset
3245 .load_indices_by_name(index_name)
3246 .await
3247 .map_err(|e| {
3248 lance_core::Error::from(NamespaceError::TableIndexNotFound {
3249 message: format!(
3250 "Failed to load index '{}' metadata for table '{}': {}",
3251 index_name, table_uri, e
3252 ),
3253 })
3254 })?;
3255 if metadatas.first().is_some_and(is_system_index) {
3256 return Err(NamespaceError::Unsupported {
3257 message: format!("System index '{}' is not exposed by this API", index_name),
3258 }
3259 .into());
3260 }
3261
3262 let stats = <Dataset as DatasetIndexExt>::index_statistics(&dataset, index_name)
3263 .await
3264 .map_err(|e| {
3265 lance_core::Error::from(NamespaceError::TableIndexNotFound {
3266 message: format!(
3267 "Failed to describe index statistics for '{}' on table '{}': {}",
3268 index_name, table_uri, e
3269 ),
3270 })
3271 })?;
3272 let stats: serde_json::Value = serde_json::from_str(&stats).map_err(|e| {
3273 lance_core::Error::from(NamespaceError::Internal {
3274 message: format!(
3275 "Failed to parse index statistics for '{}' on table '{}': {}",
3276 index_name, table_uri, e
3277 ),
3278 })
3279 })?;
3280
3281 Ok(Self::describe_table_index_stats_response(&stats))
3282 }
3283
3284 async fn describe_transaction(
3285 &self,
3286 request: DescribeTransactionRequest,
3287 ) -> Result<DescribeTransactionResponse> {
3288 self.record_op("describe_transaction");
3289 let mut request_id = request.id.ok_or_else(|| {
3290 lance_core::Error::from(NamespaceError::InvalidInput {
3291 message: "Transaction id must include table id and transaction identifier"
3292 .to_string(),
3293 })
3294 })?;
3295 if request_id.len() < 2 {
3296 return Err(NamespaceError::InvalidInput {
3297 message: format!(
3298 "Transaction request id must include table id and transaction identifier, got {:?}",
3299 request_id
3300 ),
3301 }
3302 .into());
3303 }
3304
3305 let id = request_id.pop().expect("request_id len checked above");
3306 let table_id = Some(request_id);
3307 let table_uri = self.resolve_table_location(&table_id).await?;
3308 let dataset = self
3309 .load_dataset(&table_uri, None, "describe_transaction")
3310 .await?;
3311 let (version, transaction) = self.find_transaction(&dataset, &id).await?;
3312
3313 Ok(Self::transaction_response(version, &transaction))
3314 }
3315
3316 async fn create_table_scalar_index(
3317 &self,
3318 request: CreateTableIndexRequest,
3319 ) -> Result<CreateTableScalarIndexResponse> {
3320 self.record_op("create_table_scalar_index");
3321 let index_type = Self::parse_index_type(&request.index_type)?;
3322 if !index_type.is_scalar() {
3323 return Err(NamespaceError::InvalidInput {
3324 message: format!(
3325 "create_table_scalar_index only supports scalar index types, got {}",
3326 request.index_type
3327 ),
3328 }
3329 .into());
3330 }
3331
3332 let response = self.create_table_index(request).await?;
3333 Ok(CreateTableScalarIndexResponse {
3334 transaction_id: response.transaction_id,
3335 })
3336 }
3337
3338 async fn drop_table_index(
3339 &self,
3340 request: DropTableIndexRequest,
3341 ) -> Result<DropTableIndexResponse> {
3342 self.record_op("drop_table_index");
3343 let table_uri = self.resolve_table_location(&request.id).await?;
3344 let index_name = request.index_name.as_deref().ok_or_else(|| {
3345 lance_core::Error::from(NamespaceError::InvalidInput {
3346 message: "Index name is required for drop_table_index".to_string(),
3347 })
3348 })?;
3349 let mut dataset = self
3350 .load_dataset(&table_uri, None, "drop_table_index")
3351 .await?;
3352 let metadatas = dataset
3353 .load_indices_by_name(index_name)
3354 .await
3355 .map_err(|e| {
3356 lance_core::Error::from(NamespaceError::TableIndexNotFound {
3357 message: format!(
3358 "Failed to load index '{}' before dropping it from table '{}': {}",
3359 index_name, table_uri, e
3360 ),
3361 })
3362 })?;
3363 if metadatas.first().is_some_and(is_system_index) {
3364 return Err(NamespaceError::Unsupported {
3365 message: format!(
3366 "System index '{}' cannot be dropped via this API",
3367 index_name
3368 ),
3369 }
3370 .into());
3371 }
3372
3373 dataset.drop_index(index_name).await.map_err(|e| {
3374 lance_core::Error::from(NamespaceError::TableIndexNotFound {
3375 message: format!(
3376 "Failed to drop index '{}' from table '{}': {}",
3377 index_name, table_uri, e
3378 ),
3379 })
3380 })?;
3381
3382 let transaction_id = dataset
3383 .read_transaction()
3384 .await
3385 .map_err(|e| {
3386 lance_core::Error::from(NamespaceError::Internal {
3387 message: format!(
3388 "Failed to read committed transaction after dropping index '{}' from '{}': {}",
3389 index_name, table_uri, e
3390 ),
3391 })
3392 })?
3393 .map(|transaction| transaction.uuid);
3394
3395 Ok(DropTableIndexResponse { transaction_id })
3396 }
3397
3398 async fn list_all_tables(&self, request: ListTablesRequest) -> Result<ListTablesResponse> {
3399 let mut tables = self.list_directory_tables().await?;
3402 tables = self
3403 .filter_declared_tables(tables, request.include_declared.unwrap_or(true))
3404 .await?;
3405 Self::apply_pagination(&mut tables, request.page_token, request.limit);
3406 Ok(ListTablesResponse::new(tables))
3407 }
3408
3409 async fn restore_table(&self, request: RestoreTableRequest) -> Result<RestoreTableResponse> {
3410 let version = request.version;
3411 if version < 0 {
3412 return Err(Error::invalid_input_source(
3413 format!(
3414 "Table version for restore_table must be non-negative, got {}",
3415 version
3416 )
3417 .into(),
3418 ));
3419 }
3420
3421 let table_uri = self.resolve_table_location(&request.id).await?;
3422 let mut dataset = self.load_dataset(&table_uri, None, "restore_table").await?;
3423
3424 dataset = dataset
3425 .checkout_version(version as u64)
3426 .await
3427 .map_err(|e| {
3428 Error::namespace_source(
3429 format!(
3430 "Failed to checkout version {} for restore at '{}': {}",
3431 version, table_uri, e
3432 )
3433 .into(),
3434 )
3435 })?;
3436
3437 dataset.restore().await.map_err(|e| {
3438 Error::namespace_source(
3439 format!(
3440 "Failed to restore table at '{}' to version {}: {}",
3441 table_uri, version, e
3442 )
3443 .into(),
3444 )
3445 })?;
3446
3447 let transaction_id = dataset
3448 .read_transaction()
3449 .await
3450 .map_err(|e| {
3451 Error::namespace_source(
3452 format!(
3453 "Failed to read transaction after restoring '{}': {}",
3454 table_uri, e
3455 )
3456 .into(),
3457 )
3458 })?
3459 .map(|t| t.uuid);
3460
3461 Ok(RestoreTableResponse { transaction_id })
3462 }
3463
3464 async fn update_table_schema_metadata(
3465 &self,
3466 request: UpdateTableSchemaMetadataRequest,
3467 ) -> Result<UpdateTableSchemaMetadataResponse> {
3468 let table_uri = self.resolve_table_location(&request.id).await?;
3469 let mut dataset = self
3470 .load_dataset(&table_uri, None, "update_table_schema_metadata")
3471 .await?;
3472
3473 let new_metadata = request.metadata.unwrap_or_default();
3474 let updated_metadata = dataset
3475 .update_schema_metadata(new_metadata.iter().map(|(k, v)| (k.as_str(), v.as_str())))
3476 .await
3477 .map_err(|e| {
3478 Error::namespace_source(
3479 format!(
3480 "Failed to update schema metadata for table at '{}': {}",
3481 table_uri, e
3482 )
3483 .into(),
3484 )
3485 })?;
3486
3487 let transaction_id = dataset
3488 .read_transaction()
3489 .await
3490 .map_err(|e| {
3491 Error::namespace_source(
3492 format!(
3493 "Failed to read transaction after updating metadata for '{}': {}",
3494 table_uri, e
3495 )
3496 .into(),
3497 )
3498 })?
3499 .map(|t| t.uuid);
3500
3501 Ok(UpdateTableSchemaMetadataResponse {
3502 metadata: Some(updated_metadata),
3503 transaction_id,
3504 })
3505 }
3506
3507 async fn get_table_stats(
3508 &self,
3509 request: GetTableStatsRequest,
3510 ) -> Result<GetTableStatsResponse> {
3511 let table_uri = self.resolve_table_location(&request.id).await?;
3512 let dataset = Arc::new(
3513 self.load_dataset(&table_uri, None, "get_table_stats")
3514 .await?,
3515 );
3516
3517 let data_stats = dataset.calculate_data_stats().await.map_err(|e| {
3519 Error::namespace_source(
3520 format!(
3521 "Failed to calculate data statistics for table at '{}': {}",
3522 table_uri, e
3523 )
3524 .into(),
3525 )
3526 })?;
3527 let total_bytes: i64 = data_stats
3528 .fields
3529 .iter()
3530 .map(|f| f.bytes_on_disk as i64)
3531 .sum();
3532
3533 let fragment_row_futures: Vec<_> = dataset
3535 .get_fragments()
3536 .into_iter()
3537 .map(|f| async move { f.physical_rows().await })
3538 .collect();
3539 let fragment_row_results = futures::future::join_all(fragment_row_futures).await;
3540 let mut fragment_row_counts: Vec<i64> = fragment_row_results
3541 .into_iter()
3542 .filter_map(|r| r.ok())
3543 .map(|r| r as i64)
3544 .collect();
3545
3546 let num_fragments = fragment_row_counts.len() as i64;
3547 let num_rows: i64 = fragment_row_counts.iter().sum();
3548
3549 const SMALL_FRAGMENT_THRESHOLD: i64 = 1024 * 1024;
3552 let num_small_fragments = fragment_row_counts
3553 .iter()
3554 .filter(|&&r| r < SMALL_FRAGMENT_THRESHOLD)
3555 .count() as i64;
3556
3557 fragment_row_counts.sort_unstable();
3559 let lengths = if fragment_row_counts.is_empty() {
3560 FragmentSummary::new(0, 0, 0, 0, 0, 0, 0)
3561 } else {
3562 let len = fragment_row_counts.len();
3563 let min = fragment_row_counts[0];
3564 let max = fragment_row_counts[len - 1];
3565 let mean = num_rows / num_fragments;
3566 let pct = |p: f64| fragment_row_counts[((len - 1) as f64 * p) as usize];
3567 FragmentSummary::new(min, max, mean, pct(0.25), pct(0.50), pct(0.75), pct(0.99))
3568 };
3569
3570 let indices = dataset.load_indices().await.map_err(|e| {
3572 Error::namespace_source(
3573 format!("Failed to load indices for table at '{}': {}", table_uri, e).into(),
3574 )
3575 })?;
3576 let num_indices = indices.iter().filter(|m| !is_system_index(m)).count() as i64;
3577
3578 let fragment_stats = FragmentStats::new(num_fragments, num_small_fragments, lengths);
3579 Ok(GetTableStatsResponse::new(
3580 total_bytes,
3581 num_rows,
3582 num_indices,
3583 fragment_stats,
3584 ))
3585 }
3586
3587 async fn explain_table_query_plan(
3588 &self,
3589 request: ExplainTableQueryPlanRequest,
3590 ) -> Result<String> {
3591 let table_uri = self.resolve_table_location(&request.id).await?;
3592 let dataset = self
3593 .load_dataset(
3594 &table_uri,
3595 request.query.version,
3596 "explain_table_query_plan",
3597 )
3598 .await?;
3599 let verbose = request.verbose.unwrap_or(false);
3600
3601 let mut scanner = dataset.scan();
3602 Self::apply_query_params_to_scanner(
3603 &mut scanner,
3604 request.query.filter.as_deref(),
3605 request.query.columns.as_deref(),
3606 request.query.vector_column.as_deref(),
3607 &request.query.vector,
3608 request.query.k,
3609 request.query.offset,
3610 request.query.prefilter,
3611 request.query.bypass_vector_index,
3612 request.query.nprobes,
3613 request.query.ef,
3614 request.query.refine_factor,
3615 request.query.distance_type.as_deref(),
3616 request.query.fast_search,
3617 request.query.with_row_id,
3618 request.query.lower_bound,
3619 request.query.upper_bound,
3620 "explain_table_query_plan",
3621 )?;
3622
3623 scanner.explain_plan(verbose).await.map_err(|e| {
3624 Error::namespace_source(
3625 format!(
3626 "Failed to explain query plan for table at '{}': {}",
3627 table_uri, e
3628 )
3629 .into(),
3630 )
3631 })
3632 }
3633
3634 async fn analyze_table_query_plan(
3635 &self,
3636 request: AnalyzeTableQueryPlanRequest,
3637 ) -> Result<String> {
3638 let table_uri = self.resolve_table_location(&request.id).await?;
3639 let dataset = self
3640 .load_dataset(&table_uri, request.version, "analyze_table_query_plan")
3641 .await?;
3642
3643 let mut scanner = dataset.scan();
3644 Self::apply_query_params_to_scanner(
3645 &mut scanner,
3646 request.filter.as_deref(),
3647 request.columns.as_deref(),
3648 request.vector_column.as_deref(),
3649 &request.vector,
3650 request.k,
3651 request.offset,
3652 request.prefilter,
3653 request.bypass_vector_index,
3654 request.nprobes,
3655 request.ef,
3656 request.refine_factor,
3657 request.distance_type.as_deref(),
3658 request.fast_search,
3659 request.with_row_id,
3660 request.lower_bound,
3661 request.upper_bound,
3662 "analyze_table_query_plan",
3663 )?;
3664
3665 scanner.analyze_plan().await.map_err(|e| {
3666 Error::namespace_source(
3667 format!(
3668 "Failed to analyze query plan for table at '{}': {}",
3669 table_uri, e
3670 )
3671 .into(),
3672 )
3673 })
3674 }
3675
3676 async fn count_table_rows(&self, request: CountTableRowsRequest) -> Result<i64> {
3677 self.record_op("count_table_rows");
3678 let table_uri = self.resolve_table_location(&request.id).await?;
3679 let dataset = self
3680 .load_dataset(&table_uri, request.version, "count_table_rows")
3681 .await?;
3682
3683 let count =
3684 dataset
3685 .count_rows(request.predicate)
3686 .await
3687 .map_err(|e| NamespaceError::Internal {
3688 message: format!("Failed to count rows for table at '{}': {:?}", table_uri, e),
3689 })?;
3690
3691 Ok(count as i64)
3692 }
3693
3694 async fn insert_into_table(
3695 &self,
3696 request: InsertIntoTableRequest,
3697 request_data: Bytes,
3698 ) -> Result<InsertIntoTableResponse> {
3699 self.record_op("insert_into_table");
3700 let table_uri = self.resolve_table_location(&request.id).await?;
3701 let (reader, _num_rows) =
3702 Self::ipc_reader_from_request_data(&request_data, "insert_into_table")?;
3703
3704 let mode = match request.mode.as_deref() {
3705 Some(m) if m.eq_ignore_ascii_case("overwrite") => WriteMode::Overwrite,
3706 Some(m) if m.eq_ignore_ascii_case("append") => WriteMode::Append,
3707 None => WriteMode::Append,
3708 Some(m) => {
3709 return Err(lance_namespace::error::NamespaceError::InvalidInput {
3710 message: format!(
3711 "Unsupported write mode '{}'. Supported modes are: 'append', 'overwrite'",
3712 m
3713 ),
3714 }
3715 .into());
3716 }
3717 };
3718
3719 if !self.table_uri_has_actual_manifests(&table_uri).await? {
3720 self.write_reader_to_table(&table_uri, reader, WriteMode::Create, None)
3721 .await?;
3722 } else {
3723 self.write_reader_to_table(&table_uri, reader, mode, None)
3724 .await?;
3725 }
3726
3727 Ok(InsertIntoTableResponse {
3728 transaction_id: None,
3729 })
3730 }
3731
3732 async fn merge_insert_into_table(
3733 &self,
3734 request: MergeInsertIntoTableRequest,
3735 request_data: Bytes,
3736 ) -> Result<MergeInsertIntoTableResponse> {
3737 self.record_op("merge_insert_into_table");
3738 let table_uri = self.resolve_table_location(&request.id).await?;
3739 let on = request.on.as_ref().ok_or_else(|| {
3740 lance_core::Error::from(NamespaceError::InvalidInput {
3741 message: "'on' field is required for merge_insert_into_table".to_string(),
3742 })
3743 })?;
3744
3745 let table_has_manifests = self.table_uri_has_actual_manifests(&table_uri).await?;
3746 let (reader, num_rows) =
3747 Self::ipc_reader_from_request_data(&request_data, "merge_insert_into_table")?;
3748
3749 if !table_has_manifests {
3750 let dataset = self
3751 .write_reader_to_table(&table_uri, reader, WriteMode::Create, None)
3752 .await?;
3753 let version = dataset.version().version as i64;
3754 return Ok(MergeInsertIntoTableResponse {
3755 transaction_id: None,
3756 num_updated_rows: Some(0),
3757 num_inserted_rows: Some(num_rows as i64),
3758 num_deleted_rows: Some(0),
3759 version: Some(version),
3760 });
3761 }
3762
3763 let dataset = Arc::new(
3764 self.load_dataset(&table_uri, None, "merge_insert_into_table")
3765 .await?,
3766 );
3767
3768 let mut merge_builder = MergeInsertBuilder::try_new(dataset.clone(), vec![on.clone()])
3769 .map_err(|e| {
3770 lance_core::Error::from(NamespaceError::InvalidInput {
3771 message: format!("Failed to create merge_insert_into_table builder: {}", e),
3772 })
3773 })?;
3774
3775 if let Some(filter) = request.when_matched_update_all_filt.as_deref() {
3776 let behavior = WhenMatched::update_if(dataset.as_ref(), filter).map_err(|e| {
3777 lance_core::Error::from(NamespaceError::InvalidInput {
3778 message: format!(
3779 "Invalid when_matched_update_all_filt for merge_insert_into_table: {}",
3780 e
3781 ),
3782 })
3783 })?;
3784 merge_builder.when_matched(behavior);
3785 } else if request.when_matched_update_all.unwrap_or(false) {
3786 merge_builder.when_matched(WhenMatched::UpdateAll);
3787 }
3788
3789 if matches!(request.when_not_matched_insert_all, Some(false)) {
3790 merge_builder.when_not_matched(WhenNotMatched::DoNothing);
3791 } else {
3792 merge_builder.when_not_matched(WhenNotMatched::InsertAll);
3793 }
3794
3795 if let Some(filter) = request.when_not_matched_by_source_delete_filt.as_deref() {
3796 let behavior = WhenNotMatchedBySource::delete_if(dataset.as_ref(), filter).map_err(|e| {
3797 lance_core::Error::from(NamespaceError::InvalidInput {
3798 message: format!(
3799 "Invalid when_not_matched_by_source_delete_filt for merge_insert_into_table: {}",
3800 e
3801 ),
3802 })
3803 })?;
3804 merge_builder.when_not_matched_by_source(behavior);
3805 } else if request.when_not_matched_by_source_delete.unwrap_or(false) {
3806 merge_builder.when_not_matched_by_source(WhenNotMatchedBySource::Delete);
3807 }
3808
3809 if let Some(use_index) = request.use_index {
3810 merge_builder.use_index(use_index);
3811 }
3812
3813 let (dataset, stats) = merge_builder
3814 .try_build()
3815 .map_err(|e| {
3816 lance_core::Error::from(NamespaceError::InvalidInput {
3817 message: format!("Failed to build merge_insert_into_table job: {}", e),
3818 })
3819 })?
3820 .execute_reader(reader)
3821 .await
3822 .map_err(|e| NamespaceError::Internal {
3823 message: format!(
3824 "Failed to merge_insert_into_table at '{}': {}",
3825 table_uri, e
3826 ),
3827 })?;
3828
3829 Ok(MergeInsertIntoTableResponse {
3830 transaction_id: None,
3831 num_updated_rows: Some(stats.num_updated_rows as i64),
3832 num_inserted_rows: Some(stats.num_inserted_rows as i64),
3833 num_deleted_rows: Some(stats.num_deleted_rows as i64),
3834 version: Some(dataset.version().version as i64),
3835 })
3836 }
3837
3838 async fn query_table(&self, request: QueryTableRequest) -> Result<Bytes> {
3839 use arrow::ipc::writer::FileWriter;
3840
3841 self.record_op("query_table");
3842 let table_uri = self.resolve_table_location(&request.id).await?;
3843 let dataset = self
3844 .load_dataset(&table_uri, request.version, "query_table")
3845 .await?;
3846
3847 let mut scanner = dataset.scan();
3849
3850 let has_vector_query = request
3853 .vector
3854 .single_vector
3855 .as_ref()
3856 .map(|sv| !sv.is_empty())
3857 .unwrap_or(false)
3858 || request
3859 .vector
3860 .multi_vector
3861 .as_ref()
3862 .map(|mv| !mv.is_empty())
3863 .unwrap_or(false);
3864
3865 if let Some(prefilter) = request.prefilter {
3867 scanner.prefilter(prefilter);
3868 }
3869
3870 if has_vector_query {
3872 let vector_column = request.vector_column.as_deref().unwrap_or("vector");
3873
3874 let query_vector: Vec<f32> = request
3876 .vector
3877 .single_vector
3878 .clone()
3879 .or_else(|| {
3880 request
3881 .vector
3882 .multi_vector
3883 .as_ref()
3884 .and_then(|mv| mv.first().cloned())
3885 })
3886 .unwrap_or_default();
3887
3888 if !query_vector.is_empty() {
3889 let k = if request.k > 0 {
3890 request.k as usize
3891 } else {
3892 10
3893 };
3894 let query_array = Float32Array::from(query_vector);
3895 scanner
3896 .nearest(vector_column, &query_array, k)
3897 .map_err(|e| NamespaceError::InvalidInput {
3898 message: format!("Invalid vector search: {:?}", e),
3899 })?;
3900
3901 if let Some(ref distance_type) = request.distance_type {
3903 let metric = match distance_type.to_lowercase().as_str() {
3904 "l2" | "euclidean" => MetricType::L2,
3905 "cosine" => MetricType::Cosine,
3906 "dot" | "inner_product" => MetricType::Dot,
3907 "hamming" => MetricType::Hamming,
3908 _ => {
3909 return Err(NamespaceError::InvalidInput {
3910 message: format!("Unknown distance type: {}", distance_type),
3911 }
3912 .into());
3913 }
3914 };
3915 scanner.distance_metric(metric);
3916 }
3917
3918 if let Some(nprobes) = request.nprobes {
3920 scanner.minimum_nprobes(nprobes as usize);
3921 }
3922
3923 if let Some(ef) = request.ef {
3925 scanner.ef(ef as usize);
3926 }
3927
3928 if let Some(refine_factor) = request.refine_factor {
3930 scanner.refine(refine_factor as u32);
3931 }
3932
3933 if request.lower_bound.is_some() || request.upper_bound.is_some() {
3935 scanner.distance_range(request.lower_bound, request.upper_bound);
3936 }
3937
3938 if let Some(bypass) = request.bypass_vector_index {
3940 scanner.use_index(!bypass);
3941 }
3942
3943 if request.fast_search == Some(true) {
3945 scanner.fast_search();
3946 }
3947 }
3948 }
3949
3950 if let Some(ref fts_query) = request.full_text_query {
3952 if let Some(ref string_query) = fts_query.string_query {
3954 let mut fts = FullTextSearchQuery::new(string_query.query.clone());
3955
3956 if let Some(ref columns) = string_query.columns
3958 && !columns.is_empty()
3959 {
3960 fts = fts
3961 .with_columns(columns)
3962 .map_err(|e| NamespaceError::InvalidInput {
3963 message: format!("Invalid FTS columns: {:?}", e),
3964 })?;
3965 }
3966
3967 scanner
3968 .full_text_search(fts)
3969 .map_err(|e| NamespaceError::InvalidInput {
3970 message: format!("Invalid full text search: {:?}", e),
3971 })?;
3972 }
3973 }
3976
3977 if let Some(ref columns) = request.columns {
3979 if let Some(ref column_names) = columns.column_names
3980 && !column_names.is_empty()
3981 {
3982 scanner
3983 .project(column_names)
3984 .map_err(|e| NamespaceError::InvalidInput {
3985 message: format!("Invalid column projection: {:?}", e),
3986 })?;
3987 } else if let Some(ref column_aliases) = columns.column_aliases
3988 && !column_aliases.is_empty()
3989 {
3990 let transform_pairs: Vec<(String, String)> = column_aliases
3992 .iter()
3993 .map(|(alias, sql)| (alias.clone(), sql.clone()))
3994 .collect();
3995 scanner
3996 .project_with_transform(
3997 &transform_pairs
3998 .iter()
3999 .map(|(a, s)| (a.as_str(), s.as_str()))
4000 .collect::<Vec<_>>(),
4001 )
4002 .map_err(|e| NamespaceError::InvalidInput {
4003 message: format!("Invalid column alias expression: {:?}", e),
4004 })?;
4005 }
4006 }
4007
4008 if let Some(ref filter) = request.filter
4010 && !filter.is_empty()
4011 {
4012 scanner
4013 .filter(filter)
4014 .map_err(|e| NamespaceError::InvalidInput {
4015 message: format!("Invalid filter expression: {:?}", e),
4016 })?;
4017 }
4018
4019 if request.with_row_id == Some(true) {
4021 scanner.with_row_id();
4022 }
4023
4024 if !has_vector_query && request.k > 0 {
4028 let offset = request.offset.map(|o| o as i64);
4029 scanner.limit(Some(request.k as i64), offset).map_err(|e| {
4030 NamespaceError::InvalidInput {
4031 message: format!("Invalid limit/offset: {:?}", e),
4032 }
4033 })?;
4034 } else if has_vector_query && request.offset.is_some() {
4035 let offset = request.offset.map(|o| o as i64);
4037 scanner
4038 .limit(None, offset)
4039 .map_err(|e| NamespaceError::InvalidInput {
4040 message: format!("Invalid offset: {:?}", e),
4041 })?;
4042 }
4043
4044 let batch = scanner
4046 .try_into_batch()
4047 .await
4048 .map_err(|e| NamespaceError::Internal {
4049 message: format!("Failed to execute query: {:?}", e),
4050 })?;
4051
4052 let schema = batch.schema();
4054 let mut buffer = Vec::new();
4055 {
4056 let mut writer = FileWriter::try_new(&mut buffer, &schema).map_err(|e| {
4057 NamespaceError::Internal {
4058 message: format!("Failed to create IPC writer: {:?}", e),
4059 }
4060 })?;
4061 writer.write(&batch).map_err(|e| NamespaceError::Internal {
4062 message: format!("Failed to write batch to IPC: {:?}", e),
4063 })?;
4064 writer.finish().map_err(|e| NamespaceError::Internal {
4065 message: format!("Failed to finish IPC writer: {:?}", e),
4066 })?;
4067 }
4068
4069 Ok(Bytes::from(buffer))
4070 }
4071
4072 fn namespace_id(&self) -> String {
4073 format!("DirectoryNamespace {{ root: {:?} }}", self.root)
4074 }
4075}
4076
4077#[cfg(test)]
4078mod tests {
4079 use super::*;
4080 use arrow_ipc::reader::{FileReader, StreamReader};
4081 use lance::dataset::Dataset;
4082 use lance::index::DatasetIndexExt;
4083 use lance_core::utils::tempfile::{TempStdDir, TempStrDir};
4084 use lance_core::utils::testing::CountingObjectStore;
4085 use lance_io::object_store::{providers::local::FileStoreProvider, uri_to_url};
4086 use lance_namespace::models::{
4087 CreateTableRequest, JsonArrowDataType, JsonArrowField, JsonArrowSchema, ListTablesRequest,
4088 QueryTableRequestColumns,
4089 };
4090 use lance_namespace::schema::convert_json_arrow_schema;
4091 use std::io::Cursor;
4092 use std::sync::{
4093 Arc,
4094 atomic::{AtomicUsize, Ordering},
4095 };
4096 use url::Url;
4097
4098 fn assert_plan_contains_all(plan: &str, expected_fragments: &[&str], context: &str) {
4099 for expected_fragment in expected_fragments {
4100 assert!(
4101 plan.contains(expected_fragment),
4102 "{}. Missing fragment: '{}'. Plan:\n{}",
4103 context,
4104 expected_fragment,
4105 plan
4106 );
4107 }
4108 }
4109
4110 async fn create_test_namespace() -> (DirectoryNamespace, TempStdDir) {
4112 let temp_dir = TempStdDir::default();
4113
4114 let namespace = DirectoryNamespaceBuilder::new(temp_dir.to_str().unwrap())
4115 .build()
4116 .await
4117 .unwrap();
4118 (namespace, temp_dir)
4119 }
4120
4121 #[derive(Debug)]
4122 struct CountingFileStoreProvider {
4123 listing_count: Arc<AtomicUsize>,
4124 }
4125
4126 #[async_trait]
4127 impl lance_io::object_store::ObjectStoreProvider for CountingFileStoreProvider {
4128 async fn new_store(
4129 &self,
4130 base_path: Url,
4131 params: &ObjectStoreParams,
4132 ) -> Result<ObjectStore> {
4133 let provider = FileStoreProvider;
4134 let mut store = provider.new_store(base_path, params).await?;
4135 store.inner = Arc::new(CountingObjectStore::new(
4136 store.inner.clone(),
4137 self.listing_count.clone(),
4138 ));
4139 Ok(store)
4140 }
4141
4142 fn extract_path(&self, url: &Url) -> Result<Path> {
4143 let provider = FileStoreProvider;
4144 provider.extract_path(url)
4145 }
4146
4147 fn calculate_object_store_prefix(
4148 &self,
4149 url: &Url,
4150 storage_options: Option<&HashMap<String, String>>,
4151 ) -> Result<String> {
4152 let provider = FileStoreProvider;
4153 provider.calculate_object_store_prefix(url, storage_options)
4154 }
4155 }
4156
4157 fn file_object_store_uri(path: &str) -> String {
4158 let file_url = uri_to_url(path).unwrap();
4159 let mut url = Url::parse("file-object-store:///").unwrap();
4160 url.set_path(file_url.path());
4161 url.to_string()
4162 }
4163
4164 fn build_listing_counting_session(listing_count: Arc<AtomicUsize>) -> Arc<Session> {
4165 let registry = Arc::new(ObjectStoreRegistry::default());
4166 registry.insert(
4167 "file-object-store",
4168 Arc::new(CountingFileStoreProvider { listing_count }),
4169 );
4170 Arc::new(Session::new(0, 0, registry))
4171 }
4172
4173 fn create_test_ipc_data(schema: &JsonArrowSchema) -> Vec<u8> {
4175 use arrow::ipc::writer::StreamWriter;
4176
4177 let arrow_schema = convert_json_arrow_schema(schema).unwrap();
4178 let arrow_schema = Arc::new(arrow_schema);
4179 let batch = arrow::record_batch::RecordBatch::new_empty(arrow_schema.clone());
4180 let mut buffer = Vec::new();
4181 {
4182 let mut writer = StreamWriter::try_new(&mut buffer, &arrow_schema).unwrap();
4183 writer.write(&batch).unwrap();
4184 writer.finish().unwrap();
4185 }
4186 buffer
4187 }
4188
4189 fn create_ipc_data_from_batches(
4190 schema: Arc<arrow_schema::Schema>,
4191 batches: Vec<arrow::record_batch::RecordBatch>,
4192 ) -> Vec<u8> {
4193 use arrow::ipc::writer::StreamWriter;
4194
4195 let mut buffer = Vec::new();
4196 {
4197 let mut writer = StreamWriter::try_new(&mut buffer, &schema).unwrap();
4198 for batch in &batches {
4199 writer.write(batch).unwrap();
4200 }
4201 writer.finish().unwrap();
4202 }
4203 buffer
4204 }
4205
4206 fn create_non_empty_test_ipc_data() -> Vec<u8> {
4207 use arrow::array::{Int32Array, StringArray};
4208 use arrow::record_batch::RecordBatch;
4209
4210 let schema = Arc::new(convert_json_arrow_schema(&create_test_schema()).unwrap());
4211 let batch = RecordBatch::try_new(
4212 schema.clone(),
4213 vec![
4214 Arc::new(Int32Array::from(vec![1, 2])),
4215 Arc::new(StringArray::from(vec![Some("alice"), Some("bob")])),
4216 ],
4217 )
4218 .unwrap();
4219 create_ipc_data_from_batches(schema, vec![batch])
4220 }
4221
4222 fn create_single_row_test_ipc_data() -> Vec<u8> {
4223 use arrow::array::{Int32Array, StringArray};
4224 use arrow::record_batch::RecordBatch;
4225
4226 let schema = Arc::new(convert_json_arrow_schema(&create_test_schema()).unwrap());
4227 let batch = RecordBatch::try_new(
4228 schema.clone(),
4229 vec![
4230 Arc::new(Int32Array::from(vec![10])),
4231 Arc::new(StringArray::from(vec![Some("carol")])),
4232 ],
4233 )
4234 .unwrap();
4235 create_ipc_data_from_batches(schema, vec![batch])
4236 }
4237
4238 fn create_test_schema() -> JsonArrowSchema {
4240 let int_type = JsonArrowDataType::new("int32".to_string());
4241 let string_type = JsonArrowDataType::new("utf8".to_string());
4242
4243 let id_field = JsonArrowField {
4244 name: "id".to_string(),
4245 r#type: Box::new(int_type),
4246 nullable: false,
4247 metadata: None,
4248 };
4249
4250 let name_field = JsonArrowField {
4251 name: "name".to_string(),
4252 r#type: Box::new(string_type),
4253 nullable: true,
4254 metadata: None,
4255 };
4256
4257 JsonArrowSchema {
4258 fields: vec![id_field, name_field],
4259 metadata: None,
4260 }
4261 }
4262
4263 fn create_scalar_table_ipc_data() -> Vec<u8> {
4264 use arrow::array::{Int32Array, StringArray};
4265 use arrow::datatypes::{DataType, Field, Schema as ArrowSchema};
4266
4267 let schema = Arc::new(ArrowSchema::new(vec![
4268 Field::new("id", DataType::Int32, false),
4269 Field::new("name", DataType::Utf8, true),
4270 ]));
4271 let batch = arrow::record_batch::RecordBatch::try_new(
4272 schema.clone(),
4273 vec![
4274 Arc::new(Int32Array::from(vec![1, 2, 3])),
4275 Arc::new(StringArray::from(vec!["alice", "bob", "cory"])),
4276 ],
4277 )
4278 .unwrap();
4279 create_ipc_data_from_batches(schema, vec![batch])
4280 }
4281
4282 fn create_vector_table_ipc_data() -> Vec<u8> {
4283 use arrow::array::{FixedSizeListArray, Float32Array, Int32Array};
4284 use arrow::datatypes::{DataType, Field, Schema as ArrowSchema};
4285
4286 let schema = Arc::new(ArrowSchema::new(vec![
4287 Field::new("id", DataType::Int32, false),
4288 Field::new(
4289 "vector",
4290 DataType::FixedSizeList(Arc::new(Field::new("item", DataType::Float32, true)), 2),
4291 true,
4292 ),
4293 ]));
4294 let vector_field = Arc::new(Field::new("item", DataType::Float32, true));
4295 let vectors = FixedSizeListArray::try_new(
4296 vector_field,
4297 2,
4298 Arc::new(Float32Array::from(vec![0.1, 0.2, 0.3, 0.4, 0.5, 0.6])),
4299 None,
4300 )
4301 .unwrap();
4302 let batch = arrow::record_batch::RecordBatch::try_new(
4303 schema.clone(),
4304 vec![Arc::new(Int32Array::from(vec![1, 2, 3])), Arc::new(vectors)],
4305 )
4306 .unwrap();
4307 create_ipc_data_from_batches(schema, vec![batch])
4308 }
4309
4310 async fn create_scalar_table(namespace: &DirectoryNamespace, table_name: &str) {
4311 let mut create_table_request = CreateTableRequest::new();
4312 create_table_request.id = Some(vec![table_name.to_string()]);
4313 namespace
4314 .create_table(
4315 create_table_request,
4316 Bytes::from(create_scalar_table_ipc_data()),
4317 )
4318 .await
4319 .unwrap();
4320 }
4321
4322 async fn create_vector_table(namespace: &DirectoryNamespace, table_name: &str) {
4323 let mut create_table_request = CreateTableRequest::new();
4324 create_table_request.id = Some(vec![table_name.to_string()]);
4325 namespace
4326 .create_table(
4327 create_table_request,
4328 Bytes::from(create_vector_table_ipc_data()),
4329 )
4330 .await
4331 .unwrap();
4332 }
4333
4334 async fn open_dataset(namespace: &DirectoryNamespace, table_name: &str) -> Dataset {
4335 let mut describe_request = DescribeTableRequest::new();
4336 describe_request.id = Some(vec![table_name.to_string()]);
4337 let table_uri = namespace
4338 .describe_table(describe_request)
4339 .await
4340 .unwrap()
4341 .location
4342 .expect("table location should exist");
4343 Dataset::open(&table_uri).await.unwrap()
4344 }
4345
4346 async fn create_scalar_index(
4347 namespace: &DirectoryNamespace,
4348 table_name: &str,
4349 index_name: &str,
4350 ) -> Option<String> {
4351 use lance_namespace::models::CreateTableIndexRequest;
4352
4353 let mut create_index_request =
4354 CreateTableIndexRequest::new("id".to_string(), "BTREE".to_string());
4355 create_index_request.id = Some(vec![table_name.to_string()]);
4356 create_index_request.name = Some(index_name.to_string());
4357 namespace
4358 .create_table_scalar_index(create_index_request)
4359 .await
4360 .unwrap()
4361 .transaction_id
4362 }
4363
4364 #[tokio::test]
4365 async fn test_create_table() {
4366 let (namespace, _temp_dir) = create_test_namespace().await;
4367
4368 let schema = create_test_schema();
4370 let ipc_data = create_test_ipc_data(&schema);
4371
4372 let mut request = CreateTableRequest::new();
4373 request.id = Some(vec!["test_table".to_string()]);
4374
4375 let response = namespace
4376 .create_table(request, bytes::Bytes::from(ipc_data))
4377 .await
4378 .unwrap();
4379
4380 assert!(response.location.is_some());
4381 assert!(response.location.unwrap().ends_with("test_table.lance"));
4382 assert_eq!(response.version, Some(1));
4383 }
4384
4385 #[tokio::test]
4386 async fn test_create_table_without_data() {
4387 let (namespace, _temp_dir) = create_test_namespace().await;
4388
4389 let mut request = CreateTableRequest::new();
4390 request.id = Some(vec!["test_table".to_string()]);
4391
4392 let result = namespace.create_table(request, bytes::Bytes::new()).await;
4393 assert!(result.is_err());
4394 assert!(
4395 result
4396 .unwrap_err()
4397 .to_string()
4398 .contains("Arrow IPC stream) is required")
4399 );
4400 }
4401
4402 #[tokio::test]
4403 async fn test_create_table_with_invalid_id() {
4404 let (namespace, _temp_dir) = create_test_namespace().await;
4405
4406 let schema = create_test_schema();
4408 let ipc_data = create_test_ipc_data(&schema);
4409
4410 let mut request = CreateTableRequest::new();
4412 request.id = Some(vec![]);
4413
4414 let result = namespace
4415 .create_table(request, bytes::Bytes::from(ipc_data.clone()))
4416 .await;
4417 assert!(result.is_err());
4418
4419 let mut create_ns_req = CreateNamespaceRequest::new();
4422 create_ns_req.id = Some(vec!["test_namespace".to_string()]);
4423 namespace.create_namespace(create_ns_req).await.unwrap();
4424
4425 let mut request = CreateTableRequest::new();
4427 request.id = Some(vec!["test_namespace".to_string(), "table".to_string()]);
4428
4429 let result = namespace
4430 .create_table(request, bytes::Bytes::from(ipc_data))
4431 .await;
4432 assert!(
4434 result.is_ok(),
4435 "Multi-level table IDs should work with manifest enabled"
4436 );
4437 }
4438
4439 #[tokio::test]
4440 async fn test_list_tables() {
4441 let (namespace, _temp_dir) = create_test_namespace().await;
4442
4443 let mut request = ListTablesRequest::new();
4445 request.id = Some(vec![]);
4446 let response = namespace.list_tables(request).await.unwrap();
4447 assert_eq!(response.tables.len(), 0);
4448
4449 let schema = create_test_schema();
4451 let ipc_data = create_test_ipc_data(&schema);
4452
4453 let mut create_request = CreateTableRequest::new();
4455 create_request.id = Some(vec!["table1".to_string()]);
4456 namespace
4457 .create_table(create_request, bytes::Bytes::from(ipc_data.clone()))
4458 .await
4459 .unwrap();
4460
4461 let mut create_request = CreateTableRequest::new();
4463 create_request.id = Some(vec!["table2".to_string()]);
4464 namespace
4465 .create_table(create_request, bytes::Bytes::from(ipc_data))
4466 .await
4467 .unwrap();
4468
4469 let mut request = ListTablesRequest::new();
4471 request.id = Some(vec![]);
4472 let response = namespace.list_tables(request).await.unwrap();
4473 let tables = response.tables;
4474 assert_eq!(tables.len(), 2);
4475 assert!(tables.contains(&"table1".to_string()));
4476 assert!(tables.contains(&"table2".to_string()));
4477 }
4478
4479 #[tokio::test]
4480 async fn test_list_tables_pagination() {
4481 let (namespace, _temp_dir) = create_test_namespace().await;
4482
4483 let schema = create_test_schema();
4484 let ipc_data = create_test_ipc_data(&schema);
4485
4486 for name in ["alpha", "bravo", "charlie"] {
4487 let mut req = CreateTableRequest::new();
4488 req.id = Some(vec![name.to_string()]);
4489 namespace
4490 .create_table(req, bytes::Bytes::from(ipc_data.clone()))
4491 .await
4492 .unwrap();
4493 }
4494
4495 let first_page = namespace
4497 .list_tables(ListTablesRequest {
4498 id: Some(vec![]),
4499 limit: Some(2),
4500 ..Default::default()
4501 })
4502 .await
4503 .unwrap();
4504
4505 assert_eq!(first_page.tables, vec!["alpha", "bravo"]);
4506 assert_eq!(first_page.page_token.as_deref(), Some("bravo"));
4507
4508 let second_page = namespace
4510 .list_tables(ListTablesRequest {
4511 id: Some(vec![]),
4512 limit: Some(2),
4513 page_token: first_page.page_token.clone(),
4514 ..Default::default()
4515 })
4516 .await
4517 .unwrap();
4518
4519 assert_eq!(second_page.tables, vec!["charlie"]);
4520 assert!(second_page.page_token.is_none());
4521 }
4522
4523 #[tokio::test]
4524 async fn test_list_tables_pagination_limit_zero() {
4525 let (namespace, _temp_dir) = create_test_namespace().await;
4526
4527 let schema = create_test_schema();
4528 let ipc_data = create_test_ipc_data(&schema);
4529
4530 let mut req = CreateTableRequest::new();
4531 req.id = Some(vec!["alpha".to_string()]);
4532 namespace
4533 .create_table(req, bytes::Bytes::from(ipc_data))
4534 .await
4535 .unwrap();
4536
4537 let response = namespace
4538 .list_tables(ListTablesRequest {
4539 id: Some(vec![]),
4540 limit: Some(0),
4541 ..Default::default()
4542 })
4543 .await
4544 .unwrap();
4545
4546 assert!(response.tables.is_empty());
4547 assert!(response.page_token.is_none());
4548 }
4549
4550 #[tokio::test]
4551 async fn test_list_tables_with_namespace_id() {
4552 let (namespace, _temp_dir) = create_test_namespace().await;
4553
4554 let mut create_ns_req = CreateNamespaceRequest::new();
4556 create_ns_req.id = Some(vec!["test_namespace".to_string()]);
4557 namespace.create_namespace(create_ns_req).await.unwrap();
4558
4559 let mut request = ListTablesRequest::new();
4561 request.id = Some(vec!["test_namespace".to_string()]);
4562
4563 let result = namespace.list_tables(request).await;
4564 assert!(
4566 result.is_ok(),
4567 "list_tables should work with child namespace when manifest is enabled"
4568 );
4569 let response = result.unwrap();
4570 assert_eq!(
4571 response.tables.len(),
4572 0,
4573 "Namespace should have no tables yet"
4574 );
4575 }
4576
4577 #[tokio::test]
4578 async fn test_create_scalar_index() {
4579 let (namespace, _temp_dir) = create_test_namespace().await;
4580 create_scalar_table(&namespace, "users").await;
4581
4582 let transaction_id = create_scalar_index(&namespace, "users", "users_id_idx").await;
4583 let dataset = open_dataset(&namespace, "users").await;
4584 let expected_transaction_id = dataset
4585 .read_transaction()
4586 .await
4587 .unwrap()
4588 .map(|transaction| transaction.uuid);
4589 assert_eq!(transaction_id, expected_transaction_id);
4590 let indices = dataset.load_indices().await.unwrap();
4591 assert!(indices.iter().any(|index| index.name == "users_id_idx"));
4592 }
4593
4594 #[tokio::test]
4595 async fn test_create_vector_index() {
4596 use lance_namespace::models::CreateTableIndexRequest;
4597
4598 let (namespace, _temp_dir) = create_test_namespace().await;
4599 create_vector_table(&namespace, "vectors").await;
4600
4601 let mut create_index_request =
4602 CreateTableIndexRequest::new("vector".to_string(), "IVF_FLAT".to_string());
4603 create_index_request.id = Some(vec!["vectors".to_string()]);
4604 create_index_request.name = Some("vector_idx".to_string());
4605 create_index_request.distance_type = Some("l2".to_string());
4606 let transaction_id = namespace
4607 .create_table_index(create_index_request)
4608 .await
4609 .unwrap()
4610 .transaction_id;
4611
4612 let dataset = open_dataset(&namespace, "vectors").await;
4613 let expected_transaction_id = dataset
4614 .read_transaction()
4615 .await
4616 .unwrap()
4617 .map(|transaction| transaction.uuid);
4618 assert_eq!(transaction_id, expected_transaction_id);
4619 let indices = dataset.load_indices().await.unwrap();
4620 assert!(indices.iter().any(|index| index.name == "vector_idx"));
4621 }
4622
4623 #[tokio::test]
4624 async fn test_list_table_indices() {
4625 use lance_namespace::models::ListTableIndicesRequest;
4626
4627 let (namespace, _temp_dir) = create_test_namespace().await;
4628 create_scalar_table(&namespace, "users").await;
4629 create_scalar_index(&namespace, "users", "a_idx").await;
4630 create_scalar_index(&namespace, "users", "b_idx").await;
4631 let transaction_id = create_scalar_index(&namespace, "users", "users_id_idx").await;
4632
4633 let response = namespace
4634 .list_table_indices(ListTableIndicesRequest {
4635 id: Some(vec!["users".to_string()]),
4636 ..Default::default()
4637 })
4638 .await
4639 .unwrap();
4640
4641 assert_eq!(response.indexes.len(), 3);
4642 assert_eq!(response.indexes[0].index_name, "a_idx");
4643 assert_eq!(response.indexes[1].index_name, "b_idx");
4644 assert_eq!(response.indexes[2].index_name, "users_id_idx");
4645 assert!(response.page_token.is_none());
4646 let users_id_idx = response
4647 .indexes
4648 .iter()
4649 .find(|index| index.index_name == "users_id_idx")
4650 .unwrap();
4651 assert_eq!(users_id_idx.columns, vec!["id"]);
4652 assert_eq!(users_id_idx.status, "SUCCEEDED");
4653
4654 let dataset = open_dataset(&namespace, "users").await;
4655 let expected_transaction_id = dataset
4656 .read_transaction()
4657 .await
4658 .unwrap()
4659 .map(|transaction| transaction.uuid);
4660 assert_eq!(transaction_id, expected_transaction_id);
4661 let indices = dataset.load_indices().await.unwrap();
4662 assert_eq!(
4663 indices
4664 .iter()
4665 .filter(|index| index.name == "users_id_idx")
4666 .count(),
4667 1
4668 );
4669
4670 let first_page = namespace
4671 .list_table_indices(ListTableIndicesRequest {
4672 id: Some(vec!["users".to_string()]),
4673 limit: Some(2),
4674 ..Default::default()
4675 })
4676 .await
4677 .unwrap();
4678
4679 assert_eq!(first_page.indexes.len(), 2);
4680 assert_eq!(first_page.indexes[0].index_name, "a_idx");
4681 assert_eq!(first_page.indexes[1].index_name, "b_idx");
4682 assert_eq!(first_page.page_token.as_deref(), Some("b_idx"));
4683
4684 let second_page = namespace
4685 .list_table_indices(ListTableIndicesRequest {
4686 id: Some(vec!["users".to_string()]),
4687 page_token: first_page.page_token.clone(),
4688 limit: Some(2),
4689 ..Default::default()
4690 })
4691 .await
4692 .unwrap();
4693
4694 assert_eq!(second_page.indexes.len(), 1);
4695 assert_eq!(second_page.indexes[0].index_name, "users_id_idx");
4696 assert!(second_page.page_token.is_none());
4697 }
4698
4699 #[tokio::test]
4700 async fn test_describe_table_index_stats() {
4701 use lance_namespace::models::DescribeTableIndexStatsRequest;
4702
4703 let (namespace, _temp_dir) = create_test_namespace().await;
4704 create_scalar_table(&namespace, "users").await;
4705 let transaction_id = create_scalar_index(&namespace, "users", "users_id_idx").await;
4706
4707 let response = namespace
4708 .describe_table_index_stats(DescribeTableIndexStatsRequest {
4709 id: Some(vec!["users".to_string()]),
4710 index_name: Some("users_id_idx".to_string()),
4711 ..Default::default()
4712 })
4713 .await
4714 .unwrap();
4715 assert_eq!(response.index_type, Some("BTree".to_string()));
4716 assert_eq!(response.num_indices, Some(1));
4717 assert_eq!(response.num_indexed_rows, Some(3));
4718 assert_eq!(response.num_unindexed_rows, Some(0));
4719
4720 let dataset = open_dataset(&namespace, "users").await;
4721 let expected_transaction_id = dataset
4722 .read_transaction()
4723 .await
4724 .unwrap()
4725 .map(|transaction| transaction.uuid);
4726 assert_eq!(transaction_id, expected_transaction_id);
4727 let stats: serde_json::Value =
4728 serde_json::from_str(&dataset.index_statistics("users_id_idx").await.unwrap()).unwrap();
4729 assert_eq!(stats["index_type"], "BTree");
4730 assert_eq!(stats["num_indices"], 1);
4731 assert_eq!(stats["num_indexed_rows"], 3);
4732 assert_eq!(stats["num_unindexed_rows"], 0);
4733 }
4734
4735 #[tokio::test]
4736 async fn test_describe_transaction() {
4737 use lance_namespace::models::DescribeTransactionRequest;
4738
4739 let (namespace, _temp_dir) = create_test_namespace().await;
4740 create_scalar_table(&namespace, "users").await;
4741 let transaction_id = create_scalar_index(&namespace, "users", "users_id_idx").await;
4742 let dataset = open_dataset(&namespace, "users").await;
4743 let latest_transaction = dataset.read_transaction().await.unwrap();
4744 assert_eq!(
4745 transaction_id,
4746 latest_transaction
4747 .as_ref()
4748 .map(|transaction| transaction.uuid.clone())
4749 );
4750
4751 if let Some(transaction_id) = transaction_id {
4752 let response = namespace
4753 .describe_transaction(DescribeTransactionRequest {
4754 id: Some(vec!["users".to_string(), transaction_id.clone()]),
4755 ..Default::default()
4756 })
4757 .await
4758 .unwrap();
4759 assert_eq!(response.status, "SUCCEEDED");
4760 assert_eq!(
4761 response
4762 .properties
4763 .as_ref()
4764 .and_then(|props| props.get("operation")),
4765 Some(&"CreateIndex".to_string())
4766 );
4767 assert_eq!(
4768 response
4769 .properties
4770 .as_ref()
4771 .and_then(|props| props.get("uuid")),
4772 Some(&transaction_id)
4773 );
4774 } else {
4775 assert!(latest_transaction.is_none());
4776 }
4777 }
4778
4779 #[tokio::test]
4780 async fn test_drop_table_index() {
4781 use lance_namespace::models::{DropTableIndexRequest, ListTableIndicesRequest};
4782
4783 let (namespace, _temp_dir) = create_test_namespace().await;
4784 create_scalar_table(&namespace, "users").await;
4785 let create_transaction_id = create_scalar_index(&namespace, "users", "users_id_idx").await;
4786
4787 let drop_transaction_id = namespace
4788 .drop_table_index(DropTableIndexRequest {
4789 id: Some(vec!["users".to_string()]),
4790 index_name: Some("users_id_idx".to_string()),
4791 ..Default::default()
4792 })
4793 .await
4794 .unwrap()
4795 .transaction_id;
4796
4797 let dataset = open_dataset(&namespace, "users").await;
4798 let previous_dataset = dataset
4799 .checkout_version(dataset.version().version - 1)
4800 .await
4801 .unwrap();
4802 let previous_transaction_id = previous_dataset
4803 .read_transaction()
4804 .await
4805 .unwrap()
4806 .map(|transaction| transaction.uuid);
4807 assert_eq!(create_transaction_id, previous_transaction_id);
4808 let expected_drop_transaction_id = dataset
4809 .read_transaction()
4810 .await
4811 .unwrap()
4812 .map(|transaction| transaction.uuid);
4813 assert_eq!(drop_transaction_id, expected_drop_transaction_id);
4814 let indices = dataset.load_indices().await.unwrap();
4815 assert!(!indices.iter().any(|index| index.name == "users_id_idx"));
4816
4817 let list_response = namespace
4818 .list_table_indices(ListTableIndicesRequest {
4819 id: Some(vec!["users".to_string()]),
4820 ..Default::default()
4821 })
4822 .await
4823 .unwrap();
4824 assert!(list_response.indexes.is_empty());
4825 }
4826
4827 #[tokio::test]
4828 async fn test_describe_table() {
4829 let (namespace, _temp_dir) = create_test_namespace().await;
4830
4831 let schema = create_test_schema();
4833 let ipc_data = create_test_ipc_data(&schema);
4834
4835 let mut create_request = CreateTableRequest::new();
4836 create_request.id = Some(vec!["test_table".to_string()]);
4837 namespace
4838 .create_table(create_request, bytes::Bytes::from(ipc_data))
4839 .await
4840 .unwrap();
4841
4842 let mut request = DescribeTableRequest::new();
4844 request.id = Some(vec!["test_table".to_string()]);
4845 let response = namespace.describe_table(request).await.unwrap();
4846
4847 assert!(response.location.is_some());
4848 assert!(response.location.unwrap().ends_with("test_table.lance"));
4849 }
4850
4851 #[tokio::test]
4852 async fn test_describe_nonexistent_table() {
4853 let (namespace, _temp_dir) = create_test_namespace().await;
4854
4855 let mut request = DescribeTableRequest::new();
4856 request.id = Some(vec!["nonexistent".to_string()]);
4857
4858 let result = namespace.describe_table(request).await;
4859 assert!(result.is_err());
4860 assert!(result.unwrap_err().to_string().contains("Table not found"));
4861 }
4862
4863 #[tokio::test]
4864 async fn test_table_exists() {
4865 let (namespace, _temp_dir) = create_test_namespace().await;
4866
4867 let schema = create_test_schema();
4869 let ipc_data = create_test_ipc_data(&schema);
4870
4871 let mut create_request = CreateTableRequest::new();
4872 create_request.id = Some(vec!["existing_table".to_string()]);
4873 namespace
4874 .create_table(create_request, bytes::Bytes::from(ipc_data))
4875 .await
4876 .unwrap();
4877
4878 let mut request = TableExistsRequest::new();
4880 request.id = Some(vec!["existing_table".to_string()]);
4881 let result = namespace.table_exists(request).await;
4882 assert!(result.is_ok());
4883
4884 let mut request = TableExistsRequest::new();
4886 request.id = Some(vec!["nonexistent".to_string()]);
4887 let result = namespace.table_exists(request).await;
4888 assert!(result.is_err());
4889 assert!(result.unwrap_err().to_string().contains("Table not found"));
4890 }
4891
4892 #[tokio::test]
4893 async fn test_drop_table() {
4894 let (namespace, _temp_dir) = create_test_namespace().await;
4895
4896 let schema = create_test_schema();
4898 let ipc_data = create_test_ipc_data(&schema);
4899
4900 let mut create_request = CreateTableRequest::new();
4901 create_request.id = Some(vec!["table_to_drop".to_string()]);
4902 namespace
4903 .create_table(create_request, bytes::Bytes::from(ipc_data))
4904 .await
4905 .unwrap();
4906
4907 let mut exists_request = TableExistsRequest::new();
4909 exists_request.id = Some(vec!["table_to_drop".to_string()]);
4910 assert!(namespace.table_exists(exists_request.clone()).await.is_ok());
4911
4912 let mut drop_request = DropTableRequest::new();
4914 drop_request.id = Some(vec!["table_to_drop".to_string()]);
4915 let response = namespace.drop_table(drop_request).await.unwrap();
4916 assert!(response.location.is_some());
4917
4918 assert!(namespace.table_exists(exists_request).await.is_err());
4920 }
4921
4922 #[tokio::test]
4923 async fn test_drop_nonexistent_table() {
4924 let (namespace, _temp_dir) = create_test_namespace().await;
4925
4926 let mut request = DropTableRequest::new();
4927 request.id = Some(vec!["nonexistent".to_string()]);
4928
4929 let result = namespace.drop_table(request).await;
4931 let _ = result;
4934 }
4935
4936 #[tokio::test]
4937 async fn test_root_namespace_operations() {
4938 let (namespace, _temp_dir) = create_test_namespace().await;
4939
4940 let mut request = ListNamespacesRequest::new();
4942 request.id = Some(vec![]);
4943 let result = namespace.list_namespaces(request).await;
4944 assert!(result.is_ok());
4945 assert_eq!(result.unwrap().namespaces.len(), 0);
4946
4947 let mut request = DescribeNamespaceRequest::new();
4949 request.id = Some(vec![]);
4950 let result = namespace.describe_namespace(request).await;
4951 assert!(result.is_ok());
4952
4953 let mut request = NamespaceExistsRequest::new();
4955 request.id = Some(vec![]);
4956 let result = namespace.namespace_exists(request).await;
4957 assert!(result.is_ok());
4958
4959 let mut request = CreateNamespaceRequest::new();
4961 request.id = Some(vec![]);
4962 let result = namespace.create_namespace(request).await;
4963 assert!(result.is_err());
4964 assert!(result.unwrap_err().to_string().contains("already exists"));
4965
4966 let mut request = DropNamespaceRequest::new();
4968 request.id = Some(vec![]);
4969 let result = namespace.drop_namespace(request).await;
4970 assert!(result.is_err());
4971 assert!(
4972 result
4973 .unwrap_err()
4974 .to_string()
4975 .contains("cannot be dropped")
4976 );
4977 }
4978
4979 #[tokio::test]
4980 async fn test_non_root_namespace_operations() {
4981 let (namespace, _temp_dir) = create_test_namespace().await;
4982
4983 let mut request = CreateNamespaceRequest::new();
4986 request.id = Some(vec!["child".to_string()]);
4987 let result = namespace.create_namespace(request).await;
4988 assert!(
4989 result.is_ok(),
4990 "Child namespace creation should succeed with manifest enabled"
4991 );
4992
4993 let mut request = NamespaceExistsRequest::new();
4995 request.id = Some(vec!["child".to_string()]);
4996 let result = namespace.namespace_exists(request).await;
4997 assert!(
4998 result.is_ok(),
4999 "Child namespace should exist after creation"
5000 );
5001
5002 let mut request = DropNamespaceRequest::new();
5004 request.id = Some(vec!["child".to_string()]);
5005 let result = namespace.drop_namespace(request).await;
5006 assert!(
5007 result.is_ok(),
5008 "Child namespace drop should succeed with manifest enabled"
5009 );
5010
5011 let mut request = NamespaceExistsRequest::new();
5013 request.id = Some(vec!["child".to_string()]);
5014 let result = namespace.namespace_exists(request).await;
5015 assert!(
5016 result.is_err(),
5017 "Child namespace should not exist after drop"
5018 );
5019 }
5020
5021 #[tokio::test]
5022 async fn test_config_custom_root() {
5023 let temp_dir = TempStdDir::default();
5024 let custom_path = temp_dir.join("custom");
5025 std::fs::create_dir(&custom_path).unwrap();
5026
5027 let namespace = DirectoryNamespaceBuilder::new(custom_path.to_string_lossy().to_string())
5028 .build()
5029 .await
5030 .unwrap();
5031
5032 let schema = create_test_schema();
5034 let ipc_data = create_test_ipc_data(&schema);
5035
5036 let mut request = CreateTableRequest::new();
5038 request.id = Some(vec!["test_table".to_string()]);
5039
5040 let response = namespace
5041 .create_table(request, bytes::Bytes::from(ipc_data))
5042 .await
5043 .unwrap();
5044
5045 assert!(response.location.unwrap().contains("custom"));
5046 }
5047
5048 #[tokio::test]
5049 async fn test_config_storage_options() {
5050 let temp_dir = TempStdDir::default();
5051
5052 let namespace = DirectoryNamespaceBuilder::new(temp_dir.to_str().unwrap())
5053 .storage_option("option1", "value1")
5054 .storage_option("option2", "value2")
5055 .build()
5056 .await
5057 .unwrap();
5058
5059 let schema = create_test_schema();
5061 let ipc_data = create_test_ipc_data(&schema);
5062
5063 let mut request = CreateTableRequest::new();
5065 request.id = Some(vec!["test_table".to_string()]);
5066
5067 let response = namespace
5068 .create_table(request, bytes::Bytes::from(ipc_data))
5069 .await
5070 .unwrap();
5071
5072 let storage_options = response.storage_options.unwrap();
5073 assert_eq!(storage_options.get("option1"), Some(&"value1".to_string()));
5074 assert_eq!(storage_options.get("option2"), Some(&"value2".to_string()));
5075 }
5076
5077 #[tokio::test]
5081 async fn test_no_storage_options_without_vendor() {
5082 use lance_namespace::models::DeclareTableRequest;
5083
5084 let temp_dir = TempStdDir::default();
5085
5086 let namespace = DirectoryNamespaceBuilder::new(temp_dir.to_str().unwrap())
5088 .manifest_enabled(false)
5089 .storage_option("aws_access_key_id", "AKID")
5090 .storage_option("aws_secret_access_key", "SECRET")
5091 .storage_option("region", "us-east-1")
5092 .build()
5093 .await
5094 .unwrap();
5095
5096 let schema = create_test_schema();
5097 let ipc_data = create_test_ipc_data(&schema);
5098
5099 let mut create_req = CreateTableRequest::new();
5101 create_req.id = Some(vec!["t1".to_string()]);
5102 namespace
5103 .create_table(create_req, bytes::Bytes::from(ipc_data))
5104 .await
5105 .unwrap();
5106
5107 let mut desc_req = DescribeTableRequest::new();
5109 desc_req.id = Some(vec!["t1".to_string()]);
5110 let resp = namespace.describe_table(desc_req).await.unwrap();
5111 assert!(resp.storage_options.is_none());
5112
5113 let mut decl_req = DeclareTableRequest::new();
5115 decl_req.id = Some(vec!["t2".to_string()]);
5116 let resp = namespace.declare_table(decl_req).await.unwrap();
5117 assert!(resp.storage_options.is_none());
5118 }
5119
5120 #[tokio::test]
5122 async fn test_no_storage_options_without_vendor_manifest() {
5123 let temp_dir = TempStdDir::default();
5124
5125 let namespace = DirectoryNamespaceBuilder::new(temp_dir.to_str().unwrap())
5126 .storage_option("aws_access_key_id", "AKID")
5127 .storage_option("aws_secret_access_key", "SECRET")
5128 .storage_option("region", "us-east-1")
5129 .build()
5130 .await
5131 .unwrap();
5132
5133 let schema = create_test_schema();
5134 let ipc_data = create_test_ipc_data(&schema);
5135
5136 let mut create_req = CreateTableRequest::new();
5137 create_req.id = Some(vec!["t1".to_string()]);
5138 namespace
5139 .create_table(create_req, bytes::Bytes::from(ipc_data))
5140 .await
5141 .unwrap();
5142
5143 let mut desc_req = DescribeTableRequest::new();
5145 desc_req.id = Some(vec!["t1".to_string()]);
5146 let resp = namespace.describe_table(desc_req).await.unwrap();
5147 assert!(resp.storage_options.is_none());
5148 }
5149
5150 #[tokio::test]
5151 async fn test_from_properties_manifest_enabled() {
5152 let temp_dir = TempStdDir::default();
5153
5154 let mut properties = HashMap::new();
5155 properties.insert("root".to_string(), temp_dir.to_str().unwrap().to_string());
5156 properties.insert("manifest_enabled".to_string(), "true".to_string());
5157 properties.insert("dir_listing_enabled".to_string(), "false".to_string());
5158
5159 let builder = DirectoryNamespaceBuilder::from_properties(properties, None).unwrap();
5160 assert!(builder.manifest_enabled);
5161 assert!(!builder.dir_listing_enabled);
5162
5163 let namespace = builder.build().await.unwrap();
5164
5165 let schema = create_test_schema();
5167 let ipc_data = create_test_ipc_data(&schema);
5168
5169 let mut request = CreateTableRequest::new();
5171 request.id = Some(vec!["test_table".to_string()]);
5172
5173 let response = namespace
5174 .create_table(request, bytes::Bytes::from(ipc_data))
5175 .await
5176 .unwrap();
5177
5178 assert!(response.location.is_some());
5179 }
5180
5181 #[tokio::test]
5182 async fn test_from_properties_dir_listing_enabled() {
5183 let temp_dir = TempStdDir::default();
5184
5185 let mut properties = HashMap::new();
5186 properties.insert("root".to_string(), temp_dir.to_str().unwrap().to_string());
5187 properties.insert("manifest_enabled".to_string(), "false".to_string());
5188 properties.insert("dir_listing_enabled".to_string(), "true".to_string());
5189
5190 let builder = DirectoryNamespaceBuilder::from_properties(properties, None).unwrap();
5191 assert!(!builder.manifest_enabled);
5192 assert!(builder.dir_listing_enabled);
5193
5194 let namespace = builder.build().await.unwrap();
5195
5196 let schema = create_test_schema();
5198 let ipc_data = create_test_ipc_data(&schema);
5199
5200 let mut request = CreateTableRequest::new();
5202 request.id = Some(vec!["test_table".to_string()]);
5203
5204 let response = namespace
5205 .create_table(request, bytes::Bytes::from(ipc_data))
5206 .await
5207 .unwrap();
5208
5209 assert!(response.location.is_some());
5210 }
5211
5212 #[tokio::test]
5213 async fn test_from_properties_defaults() {
5214 let temp_dir = TempStdDir::default();
5215
5216 let mut properties = HashMap::new();
5217 properties.insert("root".to_string(), temp_dir.to_str().unwrap().to_string());
5218
5219 let builder = DirectoryNamespaceBuilder::from_properties(properties, None).unwrap();
5220 assert!(builder.manifest_enabled);
5222 assert!(builder.dir_listing_enabled);
5223 }
5224
5225 #[tokio::test]
5226 async fn test_from_properties_with_storage_options() {
5227 let temp_dir = TempStdDir::default();
5228
5229 let mut properties = HashMap::new();
5230 properties.insert("root".to_string(), temp_dir.to_str().unwrap().to_string());
5231 properties.insert("manifest_enabled".to_string(), "true".to_string());
5232 properties.insert("storage.region".to_string(), "us-west-2".to_string());
5233 properties.insert("storage.bucket".to_string(), "my-bucket".to_string());
5234
5235 let builder = DirectoryNamespaceBuilder::from_properties(properties, None).unwrap();
5236 assert!(builder.manifest_enabled);
5237 assert!(builder.storage_options.is_some());
5238
5239 let storage_options = builder.storage_options.unwrap();
5240 assert_eq!(
5241 storage_options.get("region"),
5242 Some(&"us-west-2".to_string())
5243 );
5244 assert_eq!(
5245 storage_options.get("bucket"),
5246 Some(&"my-bucket".to_string())
5247 );
5248 }
5249
5250 #[tokio::test]
5251 async fn test_various_arrow_types() {
5252 let (namespace, _temp_dir) = create_test_namespace().await;
5253
5254 let fields = vec![
5256 JsonArrowField {
5257 name: "bool_col".to_string(),
5258 r#type: Box::new(JsonArrowDataType::new("bool".to_string())),
5259 nullable: true,
5260 metadata: None,
5261 },
5262 JsonArrowField {
5263 name: "int8_col".to_string(),
5264 r#type: Box::new(JsonArrowDataType::new("int8".to_string())),
5265 nullable: true,
5266 metadata: None,
5267 },
5268 JsonArrowField {
5269 name: "float64_col".to_string(),
5270 r#type: Box::new(JsonArrowDataType::new("float64".to_string())),
5271 nullable: true,
5272 metadata: None,
5273 },
5274 JsonArrowField {
5275 name: "binary_col".to_string(),
5276 r#type: Box::new(JsonArrowDataType::new("binary".to_string())),
5277 nullable: true,
5278 metadata: None,
5279 },
5280 ];
5281
5282 let schema = JsonArrowSchema {
5283 fields,
5284 metadata: None,
5285 };
5286
5287 let ipc_data = create_test_ipc_data(&schema);
5289
5290 let mut request = CreateTableRequest::new();
5291 request.id = Some(vec!["complex_table".to_string()]);
5292
5293 let response = namespace
5294 .create_table(request, bytes::Bytes::from(ipc_data))
5295 .await
5296 .unwrap();
5297
5298 assert!(response.location.is_some());
5299 }
5300
5301 #[tokio::test]
5302 async fn test_connect_dir() {
5303 let temp_dir = TempStdDir::default();
5304
5305 let namespace = DirectoryNamespaceBuilder::new(temp_dir.to_str().unwrap())
5306 .build()
5307 .await
5308 .unwrap();
5309
5310 let mut request = ListTablesRequest::new();
5312 request.id = Some(vec![]);
5313 let response = namespace.list_tables(request).await.unwrap();
5314 assert_eq!(response.tables.len(), 0);
5315 }
5316
5317 #[tokio::test]
5318 async fn test_create_table_with_ipc_data() {
5319 use arrow::array::{Int32Array, StringArray};
5320 use arrow::ipc::writer::StreamWriter;
5321
5322 let (namespace, _temp_dir) = create_test_namespace().await;
5323
5324 let schema = create_test_schema();
5326
5327 let arrow_schema = convert_json_arrow_schema(&schema).unwrap();
5329 let arrow_schema = Arc::new(arrow_schema);
5330
5331 let id_array = Int32Array::from(vec![1, 2, 3]);
5333 let name_array = StringArray::from(vec!["Alice", "Bob", "Charlie"]);
5334 let batch = arrow::record_batch::RecordBatch::try_new(
5335 arrow_schema.clone(),
5336 vec![Arc::new(id_array), Arc::new(name_array)],
5337 )
5338 .unwrap();
5339
5340 let mut buffer = Vec::new();
5342 {
5343 let mut writer = StreamWriter::try_new(&mut buffer, &arrow_schema).unwrap();
5344 writer.write(&batch).unwrap();
5345 writer.finish().unwrap();
5346 }
5347
5348 let mut request = CreateTableRequest::new();
5350 request.id = Some(vec!["test_table_with_data".to_string()]);
5351
5352 let response = namespace
5353 .create_table(request, Bytes::from(buffer))
5354 .await
5355 .unwrap();
5356
5357 assert_eq!(response.version, Some(1));
5358 assert!(
5359 response
5360 .location
5361 .unwrap()
5362 .contains("test_table_with_data.lance")
5363 );
5364
5365 let mut exists_request = TableExistsRequest::new();
5367 exists_request.id = Some(vec!["test_table_with_data".to_string()]);
5368 namespace.table_exists(exists_request).await.unwrap();
5369 }
5370
5371 #[tokio::test]
5372 async fn test_child_namespace_create_and_list() {
5373 let (namespace, _temp_dir) = create_test_namespace().await;
5374
5375 for i in 1..=3 {
5377 let mut create_req = CreateNamespaceRequest::new();
5378 create_req.id = Some(vec![format!("ns{}", i)]);
5379 let result = namespace.create_namespace(create_req).await;
5380 assert!(result.is_ok(), "Failed to create child namespace ns{}", i);
5381 }
5382
5383 let list_req = ListNamespacesRequest {
5385 id: Some(vec![]),
5386 ..Default::default()
5387 };
5388 let result = namespace.list_namespaces(list_req).await;
5389 assert!(result.is_ok());
5390 let namespaces = result.unwrap().namespaces;
5391 assert_eq!(namespaces.len(), 3);
5392 assert!(namespaces.contains(&"ns1".to_string()));
5393 assert!(namespaces.contains(&"ns2".to_string()));
5394 assert!(namespaces.contains(&"ns3".to_string()));
5395 }
5396
5397 #[tokio::test]
5398 async fn test_nested_namespace_hierarchy() {
5399 let (namespace, _temp_dir) = create_test_namespace().await;
5400
5401 let mut create_req = CreateNamespaceRequest::new();
5403 create_req.id = Some(vec!["parent".to_string()]);
5404 namespace.create_namespace(create_req).await.unwrap();
5405
5406 let mut create_req = CreateNamespaceRequest::new();
5408 create_req.id = Some(vec!["parent".to_string(), "child1".to_string()]);
5409 namespace.create_namespace(create_req).await.unwrap();
5410
5411 let mut create_req = CreateNamespaceRequest::new();
5412 create_req.id = Some(vec!["parent".to_string(), "child2".to_string()]);
5413 namespace.create_namespace(create_req).await.unwrap();
5414
5415 let list_req = ListNamespacesRequest {
5417 id: Some(vec!["parent".to_string()]),
5418 ..Default::default()
5419 };
5420 let result = namespace.list_namespaces(list_req).await;
5421 assert!(result.is_ok());
5422 let children = result.unwrap().namespaces;
5423 assert_eq!(children.len(), 2);
5424 assert!(children.contains(&"child1".to_string()));
5425 assert!(children.contains(&"child2".to_string()));
5426
5427 let list_req = ListNamespacesRequest {
5429 id: Some(vec![]),
5430 ..Default::default()
5431 };
5432 let result = namespace.list_namespaces(list_req).await;
5433 assert!(result.is_ok());
5434 let root_namespaces = result.unwrap().namespaces;
5435 assert_eq!(root_namespaces.len(), 1);
5436 assert_eq!(root_namespaces[0], "parent");
5437 }
5438
5439 #[tokio::test]
5440 async fn test_table_in_child_namespace() {
5441 let (namespace, _temp_dir) = create_test_namespace().await;
5442
5443 let mut create_ns_req = CreateNamespaceRequest::new();
5445 create_ns_req.id = Some(vec!["test_ns".to_string()]);
5446 namespace.create_namespace(create_ns_req).await.unwrap();
5447
5448 let schema = create_test_schema();
5450 let ipc_data = create_test_ipc_data(&schema);
5451 let mut create_table_req = CreateTableRequest::new();
5452 create_table_req.id = Some(vec!["test_ns".to_string(), "table1".to_string()]);
5453 let result = namespace
5454 .create_table(create_table_req, bytes::Bytes::from(ipc_data))
5455 .await;
5456 assert!(result.is_ok(), "Failed to create table in child namespace");
5457
5458 let list_req = ListTablesRequest {
5460 id: Some(vec!["test_ns".to_string()]),
5461 ..Default::default()
5462 };
5463 let result = namespace.list_tables(list_req).await;
5464 assert!(result.is_ok());
5465 let tables = result.unwrap().tables;
5466 assert_eq!(tables.len(), 1);
5467 assert_eq!(tables[0], "table1");
5468
5469 let mut exists_req = TableExistsRequest::new();
5471 exists_req.id = Some(vec!["test_ns".to_string(), "table1".to_string()]);
5472 let result = namespace.table_exists(exists_req).await;
5473 assert!(result.is_ok());
5474
5475 let mut describe_req = DescribeTableRequest::new();
5477 describe_req.id = Some(vec!["test_ns".to_string(), "table1".to_string()]);
5478 let result = namespace.describe_table(describe_req).await;
5479 assert!(result.is_ok());
5480 let response = result.unwrap();
5481 assert!(response.location.is_some());
5482 }
5483
5484 #[tokio::test]
5485 async fn test_multiple_tables_in_child_namespace() {
5486 let (namespace, _temp_dir) = create_test_namespace().await;
5487
5488 let mut create_ns_req = CreateNamespaceRequest::new();
5490 create_ns_req.id = Some(vec!["test_ns".to_string()]);
5491 namespace.create_namespace(create_ns_req).await.unwrap();
5492
5493 let schema = create_test_schema();
5495 let ipc_data = create_test_ipc_data(&schema);
5496 for i in 1..=3 {
5497 let mut create_table_req = CreateTableRequest::new();
5498 create_table_req.id = Some(vec!["test_ns".to_string(), format!("table{}", i)]);
5499 namespace
5500 .create_table(create_table_req, bytes::Bytes::from(ipc_data.clone()))
5501 .await
5502 .unwrap();
5503 }
5504
5505 let list_req = ListTablesRequest {
5507 id: Some(vec!["test_ns".to_string()]),
5508 ..Default::default()
5509 };
5510 let result = namespace.list_tables(list_req).await;
5511 assert!(result.is_ok());
5512 let tables = result.unwrap().tables;
5513 assert_eq!(tables.len(), 3);
5514 assert!(tables.contains(&"table1".to_string()));
5515 assert!(tables.contains(&"table2".to_string()));
5516 assert!(tables.contains(&"table3".to_string()));
5517 }
5518
5519 #[tokio::test]
5520 async fn test_drop_table_in_child_namespace() {
5521 let (namespace, _temp_dir) = create_test_namespace().await;
5522
5523 let mut create_ns_req = CreateNamespaceRequest::new();
5525 create_ns_req.id = Some(vec!["test_ns".to_string()]);
5526 namespace.create_namespace(create_ns_req).await.unwrap();
5527
5528 let schema = create_test_schema();
5530 let ipc_data = create_test_ipc_data(&schema);
5531 let mut create_table_req = CreateTableRequest::new();
5532 create_table_req.id = Some(vec!["test_ns".to_string(), "table1".to_string()]);
5533 namespace
5534 .create_table(create_table_req, bytes::Bytes::from(ipc_data))
5535 .await
5536 .unwrap();
5537
5538 let mut drop_req = DropTableRequest::new();
5540 drop_req.id = Some(vec!["test_ns".to_string(), "table1".to_string()]);
5541 let result = namespace.drop_table(drop_req).await;
5542 assert!(result.is_ok(), "Failed to drop table in child namespace");
5543
5544 let mut exists_req = TableExistsRequest::new();
5546 exists_req.id = Some(vec!["test_ns".to_string(), "table1".to_string()]);
5547 let result = namespace.table_exists(exists_req).await;
5548 assert!(result.is_err());
5549 }
5550
5551 #[tokio::test]
5552 async fn test_deeply_nested_namespace() {
5553 let (namespace, _temp_dir) = create_test_namespace().await;
5554
5555 let mut create_req = CreateNamespaceRequest::new();
5557 create_req.id = Some(vec!["level1".to_string()]);
5558 namespace.create_namespace(create_req).await.unwrap();
5559
5560 let mut create_req = CreateNamespaceRequest::new();
5561 create_req.id = Some(vec!["level1".to_string(), "level2".to_string()]);
5562 namespace.create_namespace(create_req).await.unwrap();
5563
5564 let mut create_req = CreateNamespaceRequest::new();
5565 create_req.id = Some(vec![
5566 "level1".to_string(),
5567 "level2".to_string(),
5568 "level3".to_string(),
5569 ]);
5570 namespace.create_namespace(create_req).await.unwrap();
5571
5572 let schema = create_test_schema();
5574 let ipc_data = create_test_ipc_data(&schema);
5575 let mut create_table_req = CreateTableRequest::new();
5576 create_table_req.id = Some(vec![
5577 "level1".to_string(),
5578 "level2".to_string(),
5579 "level3".to_string(),
5580 "table1".to_string(),
5581 ]);
5582 let result = namespace
5583 .create_table(create_table_req, bytes::Bytes::from(ipc_data))
5584 .await;
5585 assert!(
5586 result.is_ok(),
5587 "Failed to create table in deeply nested namespace"
5588 );
5589
5590 let mut exists_req = TableExistsRequest::new();
5592 exists_req.id = Some(vec![
5593 "level1".to_string(),
5594 "level2".to_string(),
5595 "level3".to_string(),
5596 "table1".to_string(),
5597 ]);
5598 let result = namespace.table_exists(exists_req).await;
5599 assert!(result.is_ok());
5600 }
5601
5602 #[tokio::test]
5603 async fn test_namespace_with_properties() {
5604 let (namespace, _temp_dir) = create_test_namespace().await;
5605
5606 let mut properties = HashMap::new();
5608 properties.insert("owner".to_string(), "test_user".to_string());
5609 properties.insert("description".to_string(), "Test namespace".to_string());
5610
5611 let mut create_req = CreateNamespaceRequest::new();
5612 create_req.id = Some(vec!["test_ns".to_string()]);
5613 create_req.properties = Some(properties.clone());
5614 namespace.create_namespace(create_req).await.unwrap();
5615
5616 let describe_req = DescribeNamespaceRequest {
5618 id: Some(vec!["test_ns".to_string()]),
5619 ..Default::default()
5620 };
5621 let result = namespace.describe_namespace(describe_req).await;
5622 assert!(result.is_ok());
5623 let response = result.unwrap();
5624 assert!(response.properties.is_some());
5625 let props = response.properties.unwrap();
5626 assert_eq!(props.get("owner"), Some(&"test_user".to_string()));
5627 assert_eq!(
5628 props.get("description"),
5629 Some(&"Test namespace".to_string())
5630 );
5631 }
5632
5633 #[tokio::test]
5634 async fn test_cannot_drop_namespace_with_tables() {
5635 let (namespace, _temp_dir) = create_test_namespace().await;
5636
5637 let mut create_ns_req = CreateNamespaceRequest::new();
5639 create_ns_req.id = Some(vec!["test_ns".to_string()]);
5640 namespace.create_namespace(create_ns_req).await.unwrap();
5641
5642 let schema = create_test_schema();
5644 let ipc_data = create_test_ipc_data(&schema);
5645 let mut create_table_req = CreateTableRequest::new();
5646 create_table_req.id = Some(vec!["test_ns".to_string(), "table1".to_string()]);
5647 namespace
5648 .create_table(create_table_req, bytes::Bytes::from(ipc_data))
5649 .await
5650 .unwrap();
5651
5652 let mut drop_req = DropNamespaceRequest::new();
5654 drop_req.id = Some(vec!["test_ns".to_string()]);
5655 let result = namespace.drop_namespace(drop_req).await;
5656 assert!(
5657 result.is_err(),
5658 "Should not be able to drop namespace with tables"
5659 );
5660 }
5661
5662 #[tokio::test]
5663 async fn test_isolation_between_namespaces() {
5664 let (namespace, _temp_dir) = create_test_namespace().await;
5665
5666 let mut create_req = CreateNamespaceRequest::new();
5668 create_req.id = Some(vec!["ns1".to_string()]);
5669 namespace.create_namespace(create_req).await.unwrap();
5670
5671 let mut create_req = CreateNamespaceRequest::new();
5672 create_req.id = Some(vec!["ns2".to_string()]);
5673 namespace.create_namespace(create_req).await.unwrap();
5674
5675 let schema = create_test_schema();
5677 let ipc_data = create_test_ipc_data(&schema);
5678
5679 let mut create_table_req = CreateTableRequest::new();
5680 create_table_req.id = Some(vec!["ns1".to_string(), "table1".to_string()]);
5681 namespace
5682 .create_table(create_table_req, bytes::Bytes::from(ipc_data.clone()))
5683 .await
5684 .unwrap();
5685
5686 let mut create_table_req = CreateTableRequest::new();
5687 create_table_req.id = Some(vec!["ns2".to_string(), "table1".to_string()]);
5688 namespace
5689 .create_table(create_table_req, bytes::Bytes::from(ipc_data))
5690 .await
5691 .unwrap();
5692
5693 let list_req = ListTablesRequest {
5695 id: Some(vec!["ns1".to_string()]),
5696 page_token: None,
5697 limit: None,
5698 ..Default::default()
5699 };
5700 let result = namespace.list_tables(list_req).await.unwrap();
5701 assert_eq!(result.tables.len(), 1);
5702 assert_eq!(result.tables[0], "table1");
5703
5704 let list_req = ListTablesRequest {
5705 id: Some(vec!["ns2".to_string()]),
5706 page_token: None,
5707 limit: None,
5708 ..Default::default()
5709 };
5710 let result = namespace.list_tables(list_req).await.unwrap();
5711 assert_eq!(result.tables.len(), 1);
5712 assert_eq!(result.tables[0], "table1");
5713
5714 let mut drop_req = DropTableRequest::new();
5716 drop_req.id = Some(vec!["ns1".to_string(), "table1".to_string()]);
5717 namespace.drop_table(drop_req).await.unwrap();
5718
5719 let mut exists_req = TableExistsRequest::new();
5721 exists_req.id = Some(vec!["ns1".to_string(), "table1".to_string()]);
5722 assert!(namespace.table_exists(exists_req).await.is_err());
5723
5724 let mut exists_req = TableExistsRequest::new();
5725 exists_req.id = Some(vec!["ns2".to_string(), "table1".to_string()]);
5726 assert!(namespace.table_exists(exists_req).await.is_ok());
5727 }
5728
5729 #[tokio::test]
5730 async fn test_migrate_directory_tables() {
5731 let temp_dir = TempStdDir::default();
5732 let temp_path = temp_dir.to_str().unwrap();
5733
5734 let dir_only_ns = DirectoryNamespaceBuilder::new(temp_path)
5736 .manifest_enabled(false)
5737 .dir_listing_enabled(true)
5738 .build()
5739 .await
5740 .unwrap();
5741
5742 let schema = create_test_schema();
5744 let ipc_data = create_test_ipc_data(&schema);
5745
5746 for i in 1..=3 {
5747 let mut create_req = CreateTableRequest::new();
5748 create_req.id = Some(vec![format!("table{}", i)]);
5749 dir_only_ns
5750 .create_table(create_req, bytes::Bytes::from(ipc_data.clone()))
5751 .await
5752 .unwrap();
5753 }
5754
5755 drop(dir_only_ns);
5756
5757 let dual_mode_ns = DirectoryNamespaceBuilder::new(temp_path)
5759 .manifest_enabled(true)
5760 .dir_listing_enabled(true)
5761 .build()
5762 .await
5763 .unwrap();
5764
5765 let mut list_req = ListTablesRequest::new();
5767 list_req.id = Some(vec![]);
5768 let tables = dual_mode_ns.list_tables(list_req).await.unwrap().tables;
5769 assert_eq!(tables.len(), 3);
5770
5771 let migrated_count = dual_mode_ns.migrate().await.unwrap();
5773 assert_eq!(migrated_count, 3, "Should migrate all 3 tables");
5774
5775 let mut list_req = ListTablesRequest::new();
5777 list_req.id = Some(vec![]);
5778 let tables = dual_mode_ns.list_tables(list_req).await.unwrap().tables;
5779 assert_eq!(tables.len(), 3);
5780
5781 let migrated_count = dual_mode_ns.migrate().await.unwrap();
5783 assert_eq!(
5784 migrated_count, 0,
5785 "Should not migrate already-migrated tables"
5786 );
5787
5788 drop(dual_mode_ns);
5789
5790 let manifest_only_ns = DirectoryNamespaceBuilder::new(temp_path)
5792 .manifest_enabled(true)
5793 .dir_listing_enabled(false)
5794 .build()
5795 .await
5796 .unwrap();
5797
5798 let mut list_req = ListTablesRequest::new();
5800 list_req.id = Some(vec![]);
5801 let tables = manifest_only_ns.list_tables(list_req).await.unwrap().tables;
5802 assert_eq!(tables.len(), 3);
5803 assert!(tables.contains(&"table1".to_string()));
5804 assert!(tables.contains(&"table2".to_string()));
5805 assert!(tables.contains(&"table3".to_string()));
5806 }
5807
5808 #[tokio::test]
5809 async fn test_migrate_without_manifest() {
5810 let temp_dir = TempStdDir::default();
5811 let temp_path = temp_dir.to_str().unwrap();
5812
5813 let namespace = DirectoryNamespaceBuilder::new(temp_path)
5815 .manifest_enabled(false)
5816 .dir_listing_enabled(true)
5817 .build()
5818 .await
5819 .unwrap();
5820
5821 let migrated_count = namespace.migrate().await.unwrap();
5823 assert_eq!(migrated_count, 0);
5824 }
5825
5826 #[tokio::test]
5827 async fn test_register_table() {
5828 use lance_namespace::models::{RegisterTableRequest, TableExistsRequest};
5829
5830 let temp_dir = TempStdDir::default();
5831 let temp_path = temp_dir.to_str().unwrap();
5832
5833 let namespace = DirectoryNamespaceBuilder::new(temp_path)
5834 .dir_listing_to_manifest_migration_enabled(true)
5835 .build()
5836 .await
5837 .unwrap();
5838
5839 let schema = create_test_schema();
5841 let ipc_data = create_test_ipc_data(&schema);
5842
5843 let table_uri = format!("{}/external_table.lance", temp_path);
5844 let cursor = Cursor::new(ipc_data);
5845 let stream_reader = StreamReader::try_new(cursor, None).unwrap();
5846 let batches: Vec<_> = stream_reader
5847 .collect::<std::result::Result<Vec<_>, _>>()
5848 .unwrap();
5849 let schema = batches[0].schema();
5850 let batch_results: Vec<_> = batches.into_iter().map(Ok).collect();
5851 let reader = RecordBatchIterator::new(batch_results, schema);
5852 Dataset::write(Box::new(reader), &table_uri, None)
5853 .await
5854 .unwrap();
5855
5856 let mut register_req = RegisterTableRequest::new("external_table.lance".to_string());
5858 register_req.id = Some(vec!["registered_table".to_string()]);
5859
5860 let response = namespace.register_table(register_req).await.unwrap();
5861 assert_eq!(response.location, Some("external_table.lance".to_string()));
5862
5863 let mut exists_req = TableExistsRequest::new();
5865 exists_req.id = Some(vec!["registered_table".to_string()]);
5866 assert!(namespace.table_exists(exists_req).await.is_ok());
5867
5868 let mut list_req = ListTablesRequest::new();
5870 list_req.id = Some(vec![]);
5871 let tables = namespace.list_tables(list_req).await.unwrap();
5872 assert!(tables.tables.contains(&"registered_table".to_string()));
5873 }
5874
5875 #[tokio::test]
5876 async fn test_register_table_duplicate_fails() {
5877 use lance_namespace::models::RegisterTableRequest;
5878
5879 let temp_dir = TempStdDir::default();
5880 let temp_path = temp_dir.to_str().unwrap();
5881
5882 let namespace = DirectoryNamespaceBuilder::new(temp_path)
5883 .build()
5884 .await
5885 .unwrap();
5886
5887 let mut register_req = RegisterTableRequest::new("test_table.lance".to_string());
5889 register_req.id = Some(vec!["test_table".to_string()]);
5890
5891 namespace
5892 .register_table(register_req.clone())
5893 .await
5894 .unwrap();
5895
5896 let result = namespace.register_table(register_req).await;
5898 assert!(result.is_err());
5899 assert!(result.unwrap_err().to_string().contains("already exists"));
5900 }
5901
5902 #[tokio::test]
5903 async fn test_deregister_table() {
5904 use lance_namespace::models::{DeregisterTableRequest, TableExistsRequest};
5905
5906 let temp_dir = TempStdDir::default();
5907 let temp_path = temp_dir.to_str().unwrap();
5908
5909 let namespace = DirectoryNamespaceBuilder::new(temp_path)
5912 .manifest_enabled(true)
5913 .dir_listing_enabled(false)
5914 .build()
5915 .await
5916 .unwrap();
5917
5918 let schema = create_test_schema();
5920 let ipc_data = create_test_ipc_data(&schema);
5921
5922 let mut create_req = CreateTableRequest::new();
5923 create_req.id = Some(vec!["test_table".to_string()]);
5924 namespace
5925 .create_table(create_req, bytes::Bytes::from(ipc_data))
5926 .await
5927 .unwrap();
5928
5929 let mut exists_req = TableExistsRequest::new();
5931 exists_req.id = Some(vec!["test_table".to_string()]);
5932 assert!(namespace.table_exists(exists_req.clone()).await.is_ok());
5933
5934 let mut deregister_req = DeregisterTableRequest::new();
5936 deregister_req.id = Some(vec!["test_table".to_string()]);
5937 let response = namespace.deregister_table(deregister_req).await.unwrap();
5938
5939 assert!(
5941 response.location.is_some(),
5942 "Deregister should return location"
5943 );
5944 let location = response.location.as_ref().unwrap();
5945 let expected_url = lance_io::object_store::uri_to_url(temp_path)
5948 .expect("Failed to convert temp path to URL");
5949 let expected_prefix = expected_url.to_string();
5950 assert!(
5951 location.starts_with(&expected_prefix),
5952 "Location should start with '{}', got: {}",
5953 expected_prefix,
5954 location
5955 );
5956 assert!(
5957 location.contains("test_table"),
5958 "Location should contain table name: {}",
5959 location
5960 );
5961 assert_eq!(response.id, Some(vec!["test_table".to_string()]));
5962
5963 assert!(namespace.table_exists(exists_req).await.is_err());
5965
5966 let dataset = Dataset::open(location).await;
5968 assert!(
5969 dataset.is_ok(),
5970 "Physical table data should still exist at {}",
5971 location
5972 );
5973 }
5974
5975 #[tokio::test]
5976 async fn test_deregister_table_in_child_namespace() {
5977 use lance_namespace::models::{
5978 CreateNamespaceRequest, DeregisterTableRequest, TableExistsRequest,
5979 };
5980
5981 let temp_dir = TempStdDir::default();
5982 let temp_path = temp_dir.to_str().unwrap();
5983
5984 let namespace = DirectoryNamespaceBuilder::new(temp_path)
5985 .build()
5986 .await
5987 .unwrap();
5988
5989 let mut create_ns_req = CreateNamespaceRequest::new();
5991 create_ns_req.id = Some(vec!["test_ns".to_string()]);
5992 namespace.create_namespace(create_ns_req).await.unwrap();
5993
5994 let schema = create_test_schema();
5996 let ipc_data = create_test_ipc_data(&schema);
5997
5998 let mut create_req = CreateTableRequest::new();
5999 create_req.id = Some(vec!["test_ns".to_string(), "test_table".to_string()]);
6000 namespace
6001 .create_table(create_req, bytes::Bytes::from(ipc_data))
6002 .await
6003 .unwrap();
6004
6005 let mut deregister_req = DeregisterTableRequest::new();
6007 deregister_req.id = Some(vec!["test_ns".to_string(), "test_table".to_string()]);
6008 let response = namespace.deregister_table(deregister_req).await.unwrap();
6009
6010 assert!(
6012 response.location.is_some(),
6013 "Deregister should return location"
6014 );
6015 let location = response.location.as_ref().unwrap();
6016 let expected_url = lance_io::object_store::uri_to_url(temp_path)
6019 .expect("Failed to convert temp path to URL");
6020 let expected_prefix = expected_url.to_string();
6021 assert!(
6022 location.starts_with(&expected_prefix),
6023 "Location should start with '{}', got: {}",
6024 expected_prefix,
6025 location
6026 );
6027 assert!(
6028 location.contains("test_ns") && location.contains("test_table"),
6029 "Location should contain namespace and table name: {}",
6030 location
6031 );
6032 assert_eq!(
6033 response.id,
6034 Some(vec!["test_ns".to_string(), "test_table".to_string()])
6035 );
6036
6037 let mut exists_req = TableExistsRequest::new();
6039 exists_req.id = Some(vec!["test_ns".to_string(), "test_table".to_string()]);
6040 assert!(namespace.table_exists(exists_req).await.is_err());
6041 }
6042
6043 #[tokio::test]
6044 async fn test_register_without_manifest_fails() {
6045 use lance_namespace::models::RegisterTableRequest;
6046
6047 let temp_dir = TempStdDir::default();
6048 let temp_path = temp_dir.to_str().unwrap();
6049
6050 let namespace = DirectoryNamespaceBuilder::new(temp_path)
6052 .manifest_enabled(false)
6053 .build()
6054 .await
6055 .unwrap();
6056
6057 let mut register_req = RegisterTableRequest::new("test_table.lance".to_string());
6059 register_req.id = Some(vec!["test_table".to_string()]);
6060 let result = namespace.register_table(register_req).await;
6061 assert!(result.is_err());
6062 assert!(
6063 result
6064 .unwrap_err()
6065 .to_string()
6066 .contains("manifest mode is enabled")
6067 );
6068
6069 }
6072
6073 #[tokio::test]
6074 async fn test_register_table_rejects_absolute_uri() {
6075 use lance_namespace::models::RegisterTableRequest;
6076
6077 let temp_dir = TempStdDir::default();
6078 let temp_path = temp_dir.to_str().unwrap();
6079
6080 let namespace = DirectoryNamespaceBuilder::new(temp_path)
6081 .build()
6082 .await
6083 .unwrap();
6084
6085 let mut register_req = RegisterTableRequest::new("s3://bucket/table.lance".to_string());
6087 register_req.id = Some(vec!["test_table".to_string()]);
6088 let result = namespace.register_table(register_req).await;
6089 assert!(result.is_err());
6090 let err_msg = result.unwrap_err().to_string();
6091 assert!(err_msg.contains("Absolute URIs are not allowed"));
6092 }
6093
6094 #[tokio::test]
6095 async fn test_register_table_rejects_absolute_path() {
6096 use lance_namespace::models::RegisterTableRequest;
6097
6098 let temp_dir = TempStdDir::default();
6099 let temp_path = temp_dir.to_str().unwrap();
6100
6101 let namespace = DirectoryNamespaceBuilder::new(temp_path)
6102 .build()
6103 .await
6104 .unwrap();
6105
6106 let mut register_req = RegisterTableRequest::new("/tmp/table.lance".to_string());
6108 register_req.id = Some(vec!["test_table".to_string()]);
6109 let result = namespace.register_table(register_req).await;
6110 assert!(result.is_err());
6111 let err_msg = result.unwrap_err().to_string();
6112 assert!(err_msg.contains("Absolute paths are not allowed"));
6113 }
6114
6115 #[tokio::test]
6116 async fn test_register_table_rejects_path_traversal() {
6117 use lance_namespace::models::RegisterTableRequest;
6118
6119 let temp_dir = TempStdDir::default();
6120 let temp_path = temp_dir.to_str().unwrap();
6121
6122 let namespace = DirectoryNamespaceBuilder::new(temp_path)
6123 .build()
6124 .await
6125 .unwrap();
6126
6127 let mut register_req = RegisterTableRequest::new("../outside/table.lance".to_string());
6129 register_req.id = Some(vec!["test_table".to_string()]);
6130 let result = namespace.register_table(register_req).await;
6131 assert!(result.is_err());
6132 let err_msg = result.unwrap_err().to_string();
6133 assert!(err_msg.contains("Path traversal is not allowed"));
6134 }
6135
6136 #[tokio::test]
6137 async fn test_namespace_write() {
6138 use arrow::array::Int32Array;
6139 use arrow::datatypes::{DataType, Field as ArrowField, Schema as ArrowSchema};
6140 use arrow::record_batch::{RecordBatch, RecordBatchIterator};
6141 use lance::dataset::{Dataset, WriteMode, WriteParams};
6142 use lance_namespace::LanceNamespace;
6143
6144 let (namespace, _temp_dir) = create_test_namespace().await;
6145 let namespace = Arc::new(namespace) as Arc<dyn LanceNamespace>;
6146
6147 let table_id = vec!["test_ns".to_string(), "test_table".to_string()];
6149 let schema = Arc::new(ArrowSchema::new(vec![
6150 ArrowField::new("a", DataType::Int32, false),
6151 ArrowField::new("b", DataType::Int32, false),
6152 ]));
6153
6154 let data1 = RecordBatch::try_new(
6156 schema.clone(),
6157 vec![
6158 Arc::new(Int32Array::from(vec![1, 2, 3])),
6159 Arc::new(Int32Array::from(vec![10, 20, 30])),
6160 ],
6161 )
6162 .unwrap();
6163
6164 let reader1 = RecordBatchIterator::new(vec![data1].into_iter().map(Ok), schema.clone());
6165 let dataset =
6166 Dataset::write_into_namespace(reader1, namespace.clone(), table_id.clone(), None)
6167 .await
6168 .unwrap();
6169
6170 assert_eq!(dataset.count_rows(None).await.unwrap(), 3);
6171 assert_eq!(dataset.version().version, 1);
6172
6173 let data2 = RecordBatch::try_new(
6175 schema.clone(),
6176 vec![
6177 Arc::new(Int32Array::from(vec![4, 5])),
6178 Arc::new(Int32Array::from(vec![40, 50])),
6179 ],
6180 )
6181 .unwrap();
6182
6183 let params_append = WriteParams {
6184 mode: WriteMode::Append,
6185 ..Default::default()
6186 };
6187
6188 let reader2 = RecordBatchIterator::new(vec![data2].into_iter().map(Ok), schema.clone());
6189 let dataset = Dataset::write_into_namespace(
6190 reader2,
6191 namespace.clone(),
6192 table_id.clone(),
6193 Some(params_append),
6194 )
6195 .await
6196 .unwrap();
6197
6198 assert_eq!(dataset.count_rows(None).await.unwrap(), 5);
6199 assert_eq!(dataset.version().version, 2);
6200
6201 let data3 = RecordBatch::try_new(
6203 schema.clone(),
6204 vec![
6205 Arc::new(Int32Array::from(vec![100, 200])),
6206 Arc::new(Int32Array::from(vec![1000, 2000])),
6207 ],
6208 )
6209 .unwrap();
6210
6211 let params_overwrite = WriteParams {
6212 mode: WriteMode::Overwrite,
6213 ..Default::default()
6214 };
6215
6216 let reader3 = RecordBatchIterator::new(vec![data3].into_iter().map(Ok), schema.clone());
6217 let dataset = Dataset::write_into_namespace(
6218 reader3,
6219 namespace.clone(),
6220 table_id.clone(),
6221 Some(params_overwrite),
6222 )
6223 .await
6224 .unwrap();
6225
6226 assert_eq!(dataset.count_rows(None).await.unwrap(), 2);
6227 assert_eq!(dataset.version().version, 3);
6228
6229 let result = dataset.scan().try_into_batch().await.unwrap();
6231 let a_col = result
6232 .column_by_name("a")
6233 .unwrap()
6234 .as_any()
6235 .downcast_ref::<Int32Array>()
6236 .unwrap();
6237 assert_eq!(a_col.values(), &[100, 200]);
6238 }
6239
6240 #[tokio::test]
6245 async fn test_declare_table_v1_mode() {
6246 use lance_namespace::models::{
6247 DeclareTableRequest, DescribeTableRequest, ListTablesRequest, TableExistsRequest,
6248 };
6249
6250 let temp_dir = TempStdDir::default();
6251 let temp_path = temp_dir.to_str().unwrap();
6252
6253 let namespace = DirectoryNamespaceBuilder::new(temp_path)
6255 .manifest_enabled(false)
6256 .build()
6257 .await
6258 .unwrap();
6259
6260 let mut declare_req = DeclareTableRequest::new();
6262 declare_req.id = Some(vec!["test_table".to_string()]);
6263 let response = namespace.declare_table(declare_req).await.unwrap();
6264
6265 assert!(response.location.is_some());
6267 let location = response.location.as_ref().unwrap();
6268 assert!(location.ends_with("test_table.lance"));
6269
6270 let mut exists_req = TableExistsRequest::new();
6272 exists_req.id = Some(vec!["test_table".to_string()]);
6273 assert!(namespace.table_exists(exists_req).await.is_ok());
6274
6275 let mut describe_req = DescribeTableRequest::new();
6277 describe_req.id = Some(vec!["test_table".to_string()]);
6278 let describe_response = namespace.describe_table(describe_req).await.unwrap();
6279 assert!(describe_response.location.is_some());
6280 assert!(describe_response.version.is_none()); assert!(describe_response.schema.is_none()); assert_eq!(describe_response.is_only_declared, None);
6283
6284 let mut describe_req = DescribeTableRequest::new();
6285 describe_req.id = Some(vec!["test_table".to_string()]);
6286 describe_req.check_declared = Some(true);
6287 let describe_response = namespace.describe_table(describe_req).await.unwrap();
6288 assert_eq!(describe_response.is_only_declared, Some(true));
6289
6290 let mut list_req = ListTablesRequest::new();
6291 list_req.id = Some(vec![]);
6292 let list_response = namespace.list_tables(list_req.clone()).await.unwrap();
6293 assert_eq!(list_response.tables, vec!["test_table".to_string()]);
6294
6295 list_req.include_declared = Some(false);
6296 let list_response = namespace.list_tables(list_req).await.unwrap();
6297 assert!(list_response.tables.is_empty());
6298 }
6299
6300 #[tokio::test]
6301 async fn test_insert_into_declared_table_promotes_it_from_declared_state() {
6302 use lance_namespace::models::{
6303 DeclareTableRequest, DescribeTableRequest, InsertIntoTableRequest,
6304 };
6305
6306 let temp_dir = TempStdDir::default();
6307 let temp_path = temp_dir.to_str().unwrap();
6308
6309 let namespace = DirectoryNamespaceBuilder::new(temp_path)
6310 .manifest_enabled(false)
6311 .build()
6312 .await
6313 .unwrap();
6314
6315 let mut declare_req = DeclareTableRequest::new();
6316 declare_req.id = Some(vec!["test_table".to_string()]);
6317 namespace.declare_table(declare_req).await.unwrap();
6318
6319 let schema = create_test_schema();
6320 let ipc_data = create_test_ipc_data(&schema);
6321 let mut insert_req = InsertIntoTableRequest::new();
6322 insert_req.id = Some(vec!["test_table".to_string()]);
6323 namespace
6324 .insert_into_table(insert_req, bytes::Bytes::from(ipc_data))
6325 .await
6326 .unwrap();
6327
6328 let mut describe_req = DescribeTableRequest::new();
6329 describe_req.id = Some(vec!["test_table".to_string()]);
6330 describe_req.load_detailed_metadata = Some(true);
6331 let describe_response = namespace.describe_table(describe_req).await.unwrap();
6332
6333 assert_eq!(describe_response.is_only_declared, Some(false));
6334 assert_eq!(describe_response.version, Some(1));
6335 assert!(describe_response.schema.is_some());
6336
6337 let mut list_req = ListTablesRequest::new();
6338 list_req.id = Some(vec![]);
6339 list_req.include_declared = Some(false);
6340 assert_eq!(
6341 namespace.list_tables(list_req).await.unwrap().tables,
6342 vec!["test_table".to_string()]
6343 );
6344 }
6345
6346 #[tokio::test]
6347 async fn test_create_table_after_declare_table_v1_mode_creates_table() {
6348 use lance_namespace::models::{
6349 DeclareTableRequest, DescribeTableRequest, ListTablesRequest,
6350 };
6351
6352 let temp_dir = TempStdDir::default();
6353 let temp_path = temp_dir.to_str().unwrap();
6354
6355 let namespace = DirectoryNamespaceBuilder::new(temp_path)
6356 .manifest_enabled(false)
6357 .build()
6358 .await
6359 .unwrap();
6360
6361 let mut declare_req = DeclareTableRequest::new();
6362 declare_req.id = Some(vec!["test_table".to_string()]);
6363 namespace.declare_table(declare_req).await.unwrap();
6364
6365 let mut create_req = CreateTableRequest::new();
6366 create_req.id = Some(vec!["test_table".to_string()]);
6367 let response = namespace
6368 .create_table(
6369 create_req,
6370 bytes::Bytes::from(create_non_empty_test_ipc_data()),
6371 )
6372 .await
6373 .unwrap();
6374
6375 assert_eq!(response.version, Some(1));
6376
6377 let mut describe_req = DescribeTableRequest::new();
6378 describe_req.id = Some(vec!["test_table".to_string()]);
6379 describe_req.load_detailed_metadata = Some(true);
6380 let describe_response = namespace.describe_table(describe_req).await.unwrap();
6381 assert_eq!(describe_response.is_only_declared, Some(false));
6382 assert_eq!(describe_response.version, Some(1));
6383
6384 let mut list_req = ListTablesRequest::new();
6385 list_req.id = Some(vec![]);
6386 list_req.include_declared = Some(false);
6387 assert_eq!(
6388 namespace.list_tables(list_req).await.unwrap().tables,
6389 vec!["test_table".to_string()]
6390 );
6391 }
6392
6393 #[tokio::test]
6394 async fn test_insert_into_declared_table_with_manifest_promotes_it() {
6395 use lance_namespace::models::{
6396 DeclareTableRequest, DescribeTableRequest, InsertIntoTableRequest, ListTablesRequest,
6397 };
6398
6399 let temp_dir = TempStdDir::default();
6400 let temp_path = temp_dir.to_str().unwrap();
6401
6402 let namespace = DirectoryNamespaceBuilder::new(temp_path)
6403 .manifest_enabled(true)
6404 .dir_listing_enabled(false)
6405 .build()
6406 .await
6407 .unwrap();
6408
6409 let mut declare_req = DeclareTableRequest::new();
6410 declare_req.id = Some(vec!["test_table".to_string()]);
6411 namespace.declare_table(declare_req).await.unwrap();
6412
6413 let mut insert_req = InsertIntoTableRequest::new();
6414 insert_req.id = Some(vec!["test_table".to_string()]);
6415 namespace
6416 .insert_into_table(
6417 insert_req,
6418 bytes::Bytes::from(create_non_empty_test_ipc_data()),
6419 )
6420 .await
6421 .unwrap();
6422
6423 let mut describe_req = DescribeTableRequest::new();
6424 describe_req.id = Some(vec!["test_table".to_string()]);
6425 describe_req.load_detailed_metadata = Some(true);
6426 let describe_response = namespace.describe_table(describe_req).await.unwrap();
6427 assert_eq!(describe_response.is_only_declared, Some(false));
6428 assert_eq!(describe_response.version, Some(1));
6429
6430 let mut list_req = ListTablesRequest::new();
6431 list_req.id = Some(vec![]);
6432 list_req.include_declared = Some(false);
6433 assert_eq!(
6434 namespace.list_tables(list_req).await.unwrap().tables,
6435 vec!["test_table".to_string()]
6436 );
6437 }
6438
6439 #[tokio::test]
6440 async fn test_create_table_after_declare_table_with_manifest_creates_table() {
6441 use lance_namespace::models::{
6442 CreateTableRequest, DeclareTableRequest, DescribeTableRequest, ListTablesRequest,
6443 };
6444
6445 let temp_dir = TempStdDir::default();
6446 let temp_path = temp_dir.to_str().unwrap();
6447
6448 let namespace = DirectoryNamespaceBuilder::new(temp_path)
6449 .manifest_enabled(true)
6450 .dir_listing_enabled(false)
6451 .build()
6452 .await
6453 .unwrap();
6454
6455 let mut declare_req = DeclareTableRequest::new();
6456 declare_req.id = Some(vec!["test_table".to_string()]);
6457 declare_req.properties = Some(HashMap::from([("owner".to_string(), "alice".to_string())]));
6458 namespace.declare_table(declare_req).await.unwrap();
6459
6460 let mut create_req = CreateTableRequest::new();
6461 create_req.id = Some(vec!["test_table".to_string()]);
6462 create_req.mode = Some("Overwrite".to_string());
6463 let response = namespace
6464 .create_table(
6465 create_req,
6466 bytes::Bytes::from(create_non_empty_test_ipc_data()),
6467 )
6468 .await
6469 .unwrap();
6470
6471 assert_eq!(response.version, Some(1));
6472 assert_eq!(
6473 response
6474 .properties
6475 .as_ref()
6476 .and_then(|properties| properties.get("owner")),
6477 Some(&"alice".to_string())
6478 );
6479
6480 let mut describe_req = DescribeTableRequest::new();
6481 describe_req.id = Some(vec!["test_table".to_string()]);
6482 describe_req.load_detailed_metadata = Some(true);
6483 let describe_response = namespace.describe_table(describe_req).await.unwrap();
6484 assert_eq!(describe_response.is_only_declared, Some(false));
6485 assert_eq!(describe_response.version, Some(1));
6486 assert_eq!(
6487 describe_response
6488 .properties
6489 .as_ref()
6490 .and_then(|properties| properties.get("owner")),
6491 Some(&"alice".to_string())
6492 );
6493
6494 let mut list_req = ListTablesRequest::new();
6495 list_req.id = Some(vec![]);
6496 list_req.include_declared = Some(false);
6497 assert_eq!(
6498 namespace.list_tables(list_req).await.unwrap().tables,
6499 vec!["test_table".to_string()]
6500 );
6501 }
6502
6503 #[tokio::test]
6504 async fn test_create_table_after_declare_table_with_manifest_rejects_new_properties() {
6505 use lance_namespace::models::{CreateTableRequest, DeclareTableRequest};
6506
6507 let temp_dir = TempStdDir::default();
6508 let temp_path = temp_dir.to_str().unwrap();
6509
6510 let namespace = DirectoryNamespaceBuilder::new(temp_path)
6511 .manifest_enabled(true)
6512 .dir_listing_enabled(false)
6513 .build()
6514 .await
6515 .unwrap();
6516
6517 let mut declare_req = DeclareTableRequest::new();
6518 declare_req.id = Some(vec!["test_table".to_string()]);
6519 declare_req.properties = Some(HashMap::from([("owner".to_string(), "alice".to_string())]));
6520 namespace.declare_table(declare_req).await.unwrap();
6521
6522 let mut create_req = CreateTableRequest::new();
6523 create_req.id = Some(vec!["test_table".to_string()]);
6524 create_req.properties = Some(HashMap::from([("owner".to_string(), "bob".to_string())]));
6525
6526 let result = namespace
6527 .create_table(
6528 create_req,
6529 bytes::Bytes::from(create_non_empty_test_ipc_data()),
6530 )
6531 .await;
6532
6533 assert!(result.is_err());
6534 assert!(
6535 result
6536 .unwrap_err()
6537 .to_string()
6538 .contains("cannot set properties for already declared table")
6539 );
6540 }
6541
6542 #[tokio::test]
6543 async fn test_create_table_with_manifest_exist_ok_keeps_existing_table() {
6544 use lance_namespace::models::{CreateTableRequest, DescribeTableRequest};
6545
6546 let temp_dir = TempStdDir::default();
6547 let temp_path = temp_dir.to_str().unwrap();
6548
6549 let namespace = DirectoryNamespaceBuilder::new(temp_path)
6550 .manifest_enabled(true)
6551 .dir_listing_enabled(false)
6552 .build()
6553 .await
6554 .unwrap();
6555
6556 let mut create_req = CreateTableRequest::new();
6557 create_req.id = Some(vec!["test_table".to_string()]);
6558 create_req.properties = Some(HashMap::from([("owner".to_string(), "alice".to_string())]));
6559 namespace
6560 .create_table(
6561 create_req,
6562 bytes::Bytes::from(create_non_empty_test_ipc_data()),
6563 )
6564 .await
6565 .unwrap();
6566
6567 let mut create_req = CreateTableRequest::new();
6568 create_req.id = Some(vec!["test_table".to_string()]);
6569 create_req.mode = Some("ExistOk".to_string());
6570 create_req.properties = Some(HashMap::from([("owner".to_string(), "bob".to_string())]));
6571 let response = namespace
6572 .create_table(
6573 create_req,
6574 bytes::Bytes::from(create_single_row_test_ipc_data()),
6575 )
6576 .await
6577 .unwrap();
6578
6579 assert_eq!(
6580 response
6581 .properties
6582 .as_ref()
6583 .and_then(|properties| properties.get("owner")),
6584 Some(&"alice".to_string())
6585 );
6586 assert_eq!(
6587 open_dataset(&namespace, "test_table")
6588 .await
6589 .count_rows(None)
6590 .await
6591 .unwrap(),
6592 2
6593 );
6594
6595 let mut describe_req = DescribeTableRequest::new();
6596 describe_req.id = Some(vec!["test_table".to_string()]);
6597 let describe_response = namespace.describe_table(describe_req).await.unwrap();
6598 assert_eq!(
6599 describe_response
6600 .properties
6601 .as_ref()
6602 .and_then(|properties| properties.get("owner")),
6603 Some(&"alice".to_string())
6604 );
6605 }
6606
6607 #[tokio::test]
6608 async fn test_create_table_with_manifest_overwrite_replaces_existing_table() {
6609 use lance_namespace::models::{CreateTableRequest, DescribeTableRequest};
6610
6611 let temp_dir = TempStdDir::default();
6612 let temp_path = temp_dir.to_str().unwrap();
6613
6614 let namespace = DirectoryNamespaceBuilder::new(temp_path)
6615 .manifest_enabled(true)
6616 .dir_listing_enabled(false)
6617 .build()
6618 .await
6619 .unwrap();
6620
6621 let mut create_req = CreateTableRequest::new();
6622 create_req.id = Some(vec!["test_table".to_string()]);
6623 create_req.properties = Some(HashMap::from([("owner".to_string(), "alice".to_string())]));
6624 namespace
6625 .create_table(
6626 create_req,
6627 bytes::Bytes::from(create_non_empty_test_ipc_data()),
6628 )
6629 .await
6630 .unwrap();
6631
6632 let mut create_req = CreateTableRequest::new();
6633 create_req.id = Some(vec!["test_table".to_string()]);
6634 create_req.mode = Some("overwrite".to_string());
6635 create_req.properties = Some(HashMap::from([("owner".to_string(), "bob".to_string())]));
6636 let response = namespace
6637 .create_table(
6638 create_req,
6639 bytes::Bytes::from(create_single_row_test_ipc_data()),
6640 )
6641 .await
6642 .unwrap();
6643
6644 assert_eq!(response.version, Some(2));
6645 assert_eq!(
6646 response
6647 .properties
6648 .as_ref()
6649 .and_then(|properties| properties.get("owner")),
6650 Some(&"bob".to_string())
6651 );
6652 assert_eq!(
6653 open_dataset(&namespace, "test_table")
6654 .await
6655 .count_rows(None)
6656 .await
6657 .unwrap(),
6658 1
6659 );
6660
6661 let mut describe_req = DescribeTableRequest::new();
6662 describe_req.id = Some(vec!["test_table".to_string()]);
6663 let describe_response = namespace.describe_table(describe_req).await.unwrap();
6664 assert_eq!(
6665 describe_response
6666 .properties
6667 .as_ref()
6668 .and_then(|properties| properties.get("owner")),
6669 Some(&"bob".to_string())
6670 );
6671 }
6672
6673 #[tokio::test]
6674 async fn test_create_table_with_manifest_invalid_mode_rejected() {
6675 use lance_namespace::models::CreateTableRequest;
6676
6677 let temp_dir = TempStdDir::default();
6678 let temp_path = temp_dir.to_str().unwrap();
6679
6680 let namespace = DirectoryNamespaceBuilder::new(temp_path)
6681 .manifest_enabled(true)
6682 .dir_listing_enabled(false)
6683 .build()
6684 .await
6685 .unwrap();
6686
6687 let mut create_req = CreateTableRequest::new();
6688 create_req.id = Some(vec!["test_table".to_string()]);
6689 create_req.mode = Some("append".to_string());
6690 let result = namespace
6691 .create_table(
6692 create_req,
6693 bytes::Bytes::from(create_non_empty_test_ipc_data()),
6694 )
6695 .await;
6696
6697 assert!(result.is_err());
6698 assert!(
6699 result
6700 .unwrap_err()
6701 .to_string()
6702 .contains("Unsupported create_table mode")
6703 );
6704 }
6705
6706 #[tokio::test]
6707 async fn test_merge_insert_into_declared_table_v1_mode_creates_table() {
6708 use lance_namespace::models::{
6709 DeclareTableRequest, DescribeTableRequest, ListTablesRequest,
6710 MergeInsertIntoTableRequest,
6711 };
6712
6713 let temp_dir = TempStdDir::default();
6714 let temp_path = temp_dir.to_str().unwrap();
6715
6716 let namespace = DirectoryNamespaceBuilder::new(temp_path)
6717 .manifest_enabled(false)
6718 .build()
6719 .await
6720 .unwrap();
6721
6722 let mut declare_req = DeclareTableRequest::new();
6723 declare_req.id = Some(vec!["test_table".to_string()]);
6724 namespace.declare_table(declare_req).await.unwrap();
6725
6726 let mut merge_req = MergeInsertIntoTableRequest::new();
6727 merge_req.id = Some(vec!["test_table".to_string()]);
6728 merge_req.on = Some("id".to_string());
6729 let response = namespace
6730 .merge_insert_into_table(
6731 merge_req,
6732 bytes::Bytes::from(create_non_empty_test_ipc_data()),
6733 )
6734 .await
6735 .unwrap();
6736
6737 assert_eq!(response.num_inserted_rows, Some(2));
6738 assert_eq!(response.num_updated_rows, Some(0));
6739
6740 let mut describe_req = DescribeTableRequest::new();
6741 describe_req.id = Some(vec!["test_table".to_string()]);
6742 describe_req.load_detailed_metadata = Some(true);
6743 let describe_response = namespace.describe_table(describe_req).await.unwrap();
6744 assert_eq!(describe_response.is_only_declared, Some(false));
6745 assert_eq!(describe_response.version, Some(1));
6746
6747 let mut list_req = ListTablesRequest::new();
6748 list_req.id = Some(vec![]);
6749 list_req.include_declared = Some(false);
6750 assert_eq!(
6751 namespace.list_tables(list_req).await.unwrap().tables,
6752 vec!["test_table".to_string()]
6753 );
6754 }
6755
6756 #[tokio::test]
6757 async fn test_merge_insert_into_declared_table_with_manifest_creates_table() {
6758 use lance_namespace::models::{
6759 DeclareTableRequest, DescribeTableRequest, ListTablesRequest,
6760 MergeInsertIntoTableRequest,
6761 };
6762
6763 let temp_dir = TempStdDir::default();
6764 let temp_path = temp_dir.to_str().unwrap();
6765
6766 let namespace = DirectoryNamespaceBuilder::new(temp_path)
6767 .manifest_enabled(true)
6768 .dir_listing_enabled(false)
6769 .build()
6770 .await
6771 .unwrap();
6772
6773 let mut declare_req = DeclareTableRequest::new();
6774 declare_req.id = Some(vec!["test_table".to_string()]);
6775 namespace.declare_table(declare_req).await.unwrap();
6776
6777 let mut merge_req = MergeInsertIntoTableRequest::new();
6778 merge_req.id = Some(vec!["test_table".to_string()]);
6779 merge_req.on = Some("id".to_string());
6780 let response = namespace
6781 .merge_insert_into_table(
6782 merge_req,
6783 bytes::Bytes::from(create_non_empty_test_ipc_data()),
6784 )
6785 .await
6786 .unwrap();
6787
6788 assert_eq!(response.num_inserted_rows, Some(2));
6789 assert_eq!(response.num_updated_rows, Some(0));
6790
6791 let mut describe_req = DescribeTableRequest::new();
6792 describe_req.id = Some(vec!["test_table".to_string()]);
6793 describe_req.load_detailed_metadata = Some(true);
6794 let describe_response = namespace.describe_table(describe_req).await.unwrap();
6795 assert_eq!(describe_response.is_only_declared, Some(false));
6796 assert_eq!(describe_response.version, Some(1));
6797
6798 let mut list_req = ListTablesRequest::new();
6799 list_req.id = Some(vec![]);
6800 list_req.include_declared = Some(false);
6801 assert_eq!(
6802 namespace.list_tables(list_req).await.unwrap().tables,
6803 vec!["test_table".to_string()]
6804 );
6805 }
6806
6807 #[tokio::test]
6808 async fn test_declare_table_with_manifest() {
6809 use lance_namespace::models::{
6810 DeclareTableRequest, DescribeTableRequest, ListTablesRequest, TableExistsRequest,
6811 };
6812
6813 let temp_dir = TempStdDir::default();
6814 let temp_path = temp_dir.to_str().unwrap();
6815
6816 let namespace = DirectoryNamespaceBuilder::new(temp_path)
6818 .manifest_enabled(true)
6819 .dir_listing_enabled(false)
6820 .build()
6821 .await
6822 .unwrap();
6823
6824 let mut declare_req = DeclareTableRequest::new();
6826 declare_req.id = Some(vec!["test_table".to_string()]);
6827 declare_req.properties = Some(HashMap::from([("owner".to_string(), "alice".to_string())]));
6828 let response = namespace.declare_table(declare_req).await.unwrap();
6829
6830 assert!(response.location.is_some());
6832 assert_eq!(
6833 response
6834 .properties
6835 .as_ref()
6836 .and_then(|properties| properties.get("owner")),
6837 Some(&"alice".to_string())
6838 );
6839
6840 let mut exists_req = TableExistsRequest::new();
6842 exists_req.id = Some(vec!["test_table".to_string()]);
6843 assert!(namespace.table_exists(exists_req).await.is_ok());
6844
6845 let mut describe_req = DescribeTableRequest::new();
6846 describe_req.id = Some(vec!["test_table".to_string()]);
6847 let describe_response = namespace.describe_table(describe_req).await.unwrap();
6848 assert_eq!(describe_response.is_only_declared, None);
6849
6850 let mut describe_req = DescribeTableRequest::new();
6851 describe_req.id = Some(vec!["test_table".to_string()]);
6852 describe_req.check_declared = Some(true);
6853 let describe_response = namespace.describe_table(describe_req).await.unwrap();
6854 assert_eq!(describe_response.is_only_declared, Some(true));
6855 assert_eq!(
6856 describe_response
6857 .properties
6858 .as_ref()
6859 .and_then(|properties| properties.get("owner")),
6860 Some(&"alice".to_string())
6861 );
6862
6863 let mut list_req = ListTablesRequest::new();
6864 list_req.id = Some(vec![]);
6865 assert_eq!(
6866 namespace
6867 .list_tables(list_req.clone())
6868 .await
6869 .unwrap()
6870 .tables,
6871 vec!["test_table".to_string()]
6872 );
6873 list_req.include_declared = Some(false);
6874 assert!(
6875 namespace
6876 .list_tables(list_req)
6877 .await
6878 .unwrap()
6879 .tables
6880 .is_empty()
6881 );
6882 }
6883
6884 #[tokio::test]
6885 async fn test_declare_table_when_table_exists() {
6886 use lance_namespace::models::DeclareTableRequest;
6887
6888 let temp_dir = TempStdDir::default();
6889 let temp_path = temp_dir.to_str().unwrap();
6890
6891 let namespace = DirectoryNamespaceBuilder::new(temp_path)
6892 .manifest_enabled(false)
6893 .build()
6894 .await
6895 .unwrap();
6896
6897 let schema = create_test_schema();
6899 let ipc_data = create_test_ipc_data(&schema);
6900 let mut create_req = CreateTableRequest::new();
6901 create_req.id = Some(vec!["test_table".to_string()]);
6902 namespace
6903 .create_table(create_req, bytes::Bytes::from(ipc_data))
6904 .await
6905 .unwrap();
6906
6907 let mut declare_req = DeclareTableRequest::new();
6909 declare_req.id = Some(vec!["test_table".to_string()]);
6910 let result = namespace.declare_table(declare_req).await;
6911 assert!(result.is_err());
6912 }
6913
6914 #[tokio::test]
6919 async fn test_deregister_table_v1_mode() {
6920 use lance_namespace::models::{DeregisterTableRequest, TableExistsRequest};
6921
6922 let temp_dir = TempStdDir::default();
6923 let temp_path = temp_dir.to_str().unwrap();
6924
6925 let namespace = DirectoryNamespaceBuilder::new(temp_path)
6927 .manifest_enabled(false)
6928 .dir_listing_enabled(true)
6929 .build()
6930 .await
6931 .unwrap();
6932
6933 let schema = create_test_schema();
6935 let ipc_data = create_test_ipc_data(&schema);
6936 let mut create_req = CreateTableRequest::new();
6937 create_req.id = Some(vec!["test_table".to_string()]);
6938 namespace
6939 .create_table(create_req, bytes::Bytes::from(ipc_data))
6940 .await
6941 .unwrap();
6942
6943 let mut exists_req = TableExistsRequest::new();
6945 exists_req.id = Some(vec!["test_table".to_string()]);
6946 assert!(namespace.table_exists(exists_req.clone()).await.is_ok());
6947
6948 let mut deregister_req = DeregisterTableRequest::new();
6950 deregister_req.id = Some(vec!["test_table".to_string()]);
6951 let response = namespace.deregister_table(deregister_req).await.unwrap();
6952
6953 assert!(response.location.is_some());
6955 let location = response.location.as_ref().unwrap();
6956 assert!(location.contains("test_table"));
6957
6958 let result = namespace.table_exists(exists_req).await;
6960 assert!(result.is_err());
6961 assert!(result.unwrap_err().to_string().contains("deregistered"));
6962
6963 let dataset = Dataset::open(location).await;
6965 assert!(dataset.is_ok(), "Physical table data should still exist");
6966 }
6967
6968 #[tokio::test]
6969 async fn test_deregister_table_v1_already_deregistered() {
6970 use lance_namespace::models::DeregisterTableRequest;
6971
6972 let temp_dir = TempStdDir::default();
6973 let temp_path = temp_dir.to_str().unwrap();
6974
6975 let namespace = DirectoryNamespaceBuilder::new(temp_path)
6976 .manifest_enabled(false)
6977 .dir_listing_enabled(true)
6978 .build()
6979 .await
6980 .unwrap();
6981
6982 let schema = create_test_schema();
6984 let ipc_data = create_test_ipc_data(&schema);
6985 let mut create_req = CreateTableRequest::new();
6986 create_req.id = Some(vec!["test_table".to_string()]);
6987 namespace
6988 .create_table(create_req, bytes::Bytes::from(ipc_data))
6989 .await
6990 .unwrap();
6991
6992 let mut deregister_req = DeregisterTableRequest::new();
6994 deregister_req.id = Some(vec!["test_table".to_string()]);
6995 namespace
6996 .deregister_table(deregister_req.clone())
6997 .await
6998 .unwrap();
6999
7000 let result = namespace.deregister_table(deregister_req).await;
7002 assert!(result.is_err());
7003 assert!(
7004 result
7005 .unwrap_err()
7006 .to_string()
7007 .contains("already deregistered")
7008 );
7009 }
7010
7011 #[tokio::test]
7016 async fn test_list_tables_skips_deregistered_v1() {
7017 use lance_namespace::models::DeregisterTableRequest;
7018
7019 let temp_dir = TempStdDir::default();
7020 let temp_path = temp_dir.to_str().unwrap();
7021
7022 let namespace = DirectoryNamespaceBuilder::new(temp_path)
7023 .manifest_enabled(false)
7024 .dir_listing_enabled(true)
7025 .build()
7026 .await
7027 .unwrap();
7028
7029 let schema = create_test_schema();
7031 let ipc_data = create_test_ipc_data(&schema);
7032
7033 let mut create_req1 = CreateTableRequest::new();
7034 create_req1.id = Some(vec!["table1".to_string()]);
7035 namespace
7036 .create_table(create_req1, bytes::Bytes::from(ipc_data.clone()))
7037 .await
7038 .unwrap();
7039
7040 let mut create_req2 = CreateTableRequest::new();
7041 create_req2.id = Some(vec!["table2".to_string()]);
7042 namespace
7043 .create_table(create_req2, bytes::Bytes::from(ipc_data))
7044 .await
7045 .unwrap();
7046
7047 let mut list_req = ListTablesRequest::new();
7049 list_req.id = Some(vec![]);
7050 let list_response = namespace.list_tables(list_req.clone()).await.unwrap();
7051 assert_eq!(list_response.tables.len(), 2);
7052
7053 let mut deregister_req = DeregisterTableRequest::new();
7055 deregister_req.id = Some(vec!["table1".to_string()]);
7056 namespace.deregister_table(deregister_req).await.unwrap();
7057
7058 let list_response = namespace.list_tables(list_req).await.unwrap();
7060 assert_eq!(list_response.tables.len(), 1);
7061 assert!(list_response.tables.contains(&"table2".to_string()));
7062 assert!(!list_response.tables.contains(&"table1".to_string()));
7063 }
7064
7065 #[tokio::test]
7070 async fn test_describe_table_fails_for_deregistered_v1() {
7071 use lance_namespace::models::{DeregisterTableRequest, DescribeTableRequest};
7072
7073 let temp_dir = TempStdDir::default();
7074 let temp_path = temp_dir.to_str().unwrap();
7075
7076 let namespace = DirectoryNamespaceBuilder::new(temp_path)
7077 .manifest_enabled(false)
7078 .dir_listing_enabled(true)
7079 .build()
7080 .await
7081 .unwrap();
7082
7083 let schema = create_test_schema();
7085 let ipc_data = create_test_ipc_data(&schema);
7086 let mut create_req = CreateTableRequest::new();
7087 create_req.id = Some(vec!["test_table".to_string()]);
7088 namespace
7089 .create_table(create_req, bytes::Bytes::from(ipc_data))
7090 .await
7091 .unwrap();
7092
7093 let mut describe_req = DescribeTableRequest::new();
7095 describe_req.id = Some(vec!["test_table".to_string()]);
7096 assert!(namespace.describe_table(describe_req.clone()).await.is_ok());
7097
7098 let mut deregister_req = DeregisterTableRequest::new();
7100 deregister_req.id = Some(vec!["test_table".to_string()]);
7101 namespace.deregister_table(deregister_req).await.unwrap();
7102
7103 let result = namespace.describe_table(describe_req).await;
7105 assert!(result.is_err());
7106 let err = result.unwrap_err();
7107 assert!(matches!(err, Error::Namespace { .. }));
7108 let err_msg = err.to_string();
7109 assert!(err_msg.contains("deregistered"));
7110 assert!(err_msg.contains("table id 'test_table'"));
7111 }
7112
7113 #[tokio::test]
7114 async fn test_table_exists_fails_for_deregistered_v1() {
7115 use lance_namespace::models::{DeregisterTableRequest, TableExistsRequest};
7116
7117 let temp_dir = TempStdDir::default();
7118 let temp_path = temp_dir.to_str().unwrap();
7119
7120 let namespace = DirectoryNamespaceBuilder::new(temp_path)
7121 .manifest_enabled(false)
7122 .dir_listing_enabled(true)
7123 .build()
7124 .await
7125 .unwrap();
7126
7127 let schema = create_test_schema();
7129 let ipc_data = create_test_ipc_data(&schema);
7130 let mut create_req = CreateTableRequest::new();
7131 create_req.id = Some(vec!["test_table".to_string()]);
7132 namespace
7133 .create_table(create_req, bytes::Bytes::from(ipc_data))
7134 .await
7135 .unwrap();
7136
7137 let mut exists_req = TableExistsRequest::new();
7139 exists_req.id = Some(vec!["test_table".to_string()]);
7140 assert!(namespace.table_exists(exists_req.clone()).await.is_ok());
7141
7142 let mut deregister_req = DeregisterTableRequest::new();
7144 deregister_req.id = Some(vec!["test_table".to_string()]);
7145 namespace.deregister_table(deregister_req).await.unwrap();
7146
7147 let result = namespace.table_exists(exists_req).await;
7149 assert!(result.is_err());
7150 let err = result.unwrap_err();
7151 assert!(matches!(err, Error::Namespace { .. }));
7152 let err_msg = err.to_string();
7153 assert!(err_msg.contains("deregistered"));
7154 assert!(err_msg.contains("table id 'test_table'"));
7155 }
7156
7157 #[tokio::test]
7158 async fn test_atomic_table_status_check() {
7159 let temp_dir = TempStdDir::default();
7163 let temp_path = temp_dir.to_str().unwrap();
7164
7165 let namespace = DirectoryNamespaceBuilder::new(temp_path)
7166 .manifest_enabled(false)
7167 .dir_listing_enabled(true)
7168 .build()
7169 .await
7170 .unwrap();
7171
7172 let schema = create_test_schema();
7174 let ipc_data = create_test_ipc_data(&schema);
7175 let mut create_req = CreateTableRequest::new();
7176 create_req.id = Some(vec!["test_table".to_string()]);
7177 namespace
7178 .create_table(create_req, bytes::Bytes::from(ipc_data))
7179 .await
7180 .unwrap();
7181
7182 let status = namespace.check_table_status("test_table").await;
7184 assert!(status.exists);
7185 assert!(!status.is_deregistered);
7186 assert!(!status.has_reserved_file);
7187 }
7188
7189 #[tokio::test]
7190 async fn test_table_version_tracking_enabled_managed_versioning() {
7191 use lance_namespace::models::DescribeTableRequest;
7192
7193 let temp_dir = TempStdDir::default();
7194 let temp_path = temp_dir.to_str().unwrap();
7195
7196 let namespace = DirectoryNamespaceBuilder::new(temp_path)
7198 .table_version_tracking_enabled(true)
7199 .build()
7200 .await
7201 .unwrap();
7202
7203 let schema = create_test_schema();
7205 let ipc_data = create_test_ipc_data(&schema);
7206 let mut create_req = CreateTableRequest::new();
7207 create_req.id = Some(vec!["test_table".to_string()]);
7208 namespace
7209 .create_table(create_req, bytes::Bytes::from(ipc_data))
7210 .await
7211 .unwrap();
7212
7213 let mut describe_req = DescribeTableRequest::new();
7215 describe_req.id = Some(vec!["test_table".to_string()]);
7216 let describe_resp = namespace.describe_table(describe_req).await.unwrap();
7217
7218 assert_eq!(
7220 describe_resp.managed_versioning,
7221 Some(true),
7222 "managed_versioning should be true when table_version_tracking_enabled=true"
7223 );
7224 }
7225
7226 #[tokio::test]
7227 async fn test_table_version_tracking_disabled_no_managed_versioning() {
7228 use lance_namespace::models::DescribeTableRequest;
7229
7230 let temp_dir = TempStdDir::default();
7231 let temp_path = temp_dir.to_str().unwrap();
7232
7233 let namespace = DirectoryNamespaceBuilder::new(temp_path)
7235 .table_version_tracking_enabled(false)
7236 .build()
7237 .await
7238 .unwrap();
7239
7240 let schema = create_test_schema();
7242 let ipc_data = create_test_ipc_data(&schema);
7243 let mut create_req = CreateTableRequest::new();
7244 create_req.id = Some(vec!["test_table".to_string()]);
7245 namespace
7246 .create_table(create_req, bytes::Bytes::from(ipc_data))
7247 .await
7248 .unwrap();
7249
7250 let mut describe_req = DescribeTableRequest::new();
7252 describe_req.id = Some(vec!["test_table".to_string()]);
7253 let describe_resp = namespace.describe_table(describe_req).await.unwrap();
7254
7255 assert!(
7257 describe_resp.managed_versioning.is_none(),
7258 "managed_versioning should be None when table_version_tracking_enabled=false, got: {:?}",
7259 describe_resp.managed_versioning
7260 );
7261 }
7262
7263 #[tokio::test]
7264 async fn test_list_table_versions() {
7265 use arrow::array::{Int32Array, RecordBatchIterator};
7266 use arrow::datatypes::{DataType, Field, Schema as ArrowSchema};
7267 use arrow::record_batch::RecordBatch;
7268 use lance::dataset::{Dataset, WriteMode, WriteParams};
7269 use lance_namespace::models::{CreateNamespaceRequest, ListTableVersionsRequest};
7270
7271 let temp_dir = TempStrDir::default();
7272 let temp_path: &str = &temp_dir;
7273
7274 let namespace: Arc<dyn LanceNamespace> = Arc::new(
7275 DirectoryNamespaceBuilder::new(temp_path)
7276 .table_version_tracking_enabled(true)
7277 .build()
7278 .await
7279 .unwrap(),
7280 );
7281
7282 let mut create_ns_req = CreateNamespaceRequest::new();
7284 create_ns_req.id = Some(vec!["workspace".to_string()]);
7285 namespace.create_namespace(create_ns_req).await.unwrap();
7286
7287 let table_id = vec!["workspace".to_string(), "test_table".to_string()];
7289 let arrow_schema = Arc::new(ArrowSchema::new(vec![Field::new(
7290 "id",
7291 DataType::Int32,
7292 false,
7293 )]));
7294 let batch = RecordBatch::try_new(
7295 arrow_schema.clone(),
7296 vec![Arc::new(Int32Array::from(vec![1, 2, 3]))],
7297 )
7298 .unwrap();
7299 let batches = RecordBatchIterator::new(vec![Ok(batch.clone())], arrow_schema.clone());
7300 let write_params = WriteParams {
7301 mode: WriteMode::Create,
7302 ..Default::default()
7303 };
7304 let mut dataset = Dataset::write_into_namespace(
7305 batches,
7306 namespace.clone(),
7307 table_id.clone(),
7308 Some(write_params),
7309 )
7310 .await
7311 .unwrap();
7312
7313 let batch2 = RecordBatch::try_new(
7315 arrow_schema.clone(),
7316 vec![Arc::new(Int32Array::from(vec![100, 200]))],
7317 )
7318 .unwrap();
7319 let batches = RecordBatchIterator::new(vec![Ok(batch2)], arrow_schema.clone());
7320 dataset.append(batches, None).await.unwrap();
7321
7322 let batch3 = RecordBatch::try_new(
7324 arrow_schema.clone(),
7325 vec![Arc::new(Int32Array::from(vec![300, 400]))],
7326 )
7327 .unwrap();
7328 let batches = RecordBatchIterator::new(vec![Ok(batch3)], arrow_schema);
7329 dataset.append(batches, None).await.unwrap();
7330
7331 let mut list_req = ListTableVersionsRequest::new();
7333 list_req.id = Some(table_id.clone());
7334 let list_resp = namespace.list_table_versions(list_req).await.unwrap();
7335
7336 assert_eq!(
7337 list_resp.versions.len(),
7338 3,
7339 "Should have 3 versions, got: {:?}",
7340 list_resp.versions
7341 );
7342
7343 for expected_version in 1..=3 {
7345 let version = list_resp
7346 .versions
7347 .iter()
7348 .find(|v| v.version == expected_version)
7349 .unwrap_or_else(|| panic!("Expected version {}", expected_version));
7350
7351 assert!(
7352 !version.manifest_path.is_empty(),
7353 "manifest_path should be set for version {}",
7354 expected_version
7355 );
7356 assert!(
7357 version.manifest_path.contains(".manifest"),
7358 "manifest_path should contain .manifest for version {}",
7359 expected_version
7360 );
7361 assert!(
7362 version.manifest_size.is_some(),
7363 "manifest_size should be set for version {}",
7364 expected_version
7365 );
7366 assert!(
7367 version.manifest_size.unwrap() > 0,
7368 "manifest_size should be > 0 for version {}",
7369 expected_version
7370 );
7371 assert!(
7372 version.timestamp_millis.is_some(),
7373 "timestamp_millis should be set for version {}",
7374 expected_version
7375 );
7376 }
7377 }
7378
7379 #[tokio::test]
7380 async fn test_describe_table_version() {
7381 use arrow::array::{Int32Array, RecordBatchIterator};
7382 use arrow::datatypes::{DataType, Field, Schema as ArrowSchema};
7383 use arrow::record_batch::RecordBatch;
7384 use lance::dataset::{Dataset, WriteMode, WriteParams};
7385 use lance_namespace::models::{CreateNamespaceRequest, DescribeTableVersionRequest};
7386
7387 let temp_dir = TempStrDir::default();
7388 let temp_path: &str = &temp_dir;
7389
7390 let namespace: Arc<dyn LanceNamespace> = Arc::new(
7391 DirectoryNamespaceBuilder::new(temp_path)
7392 .table_version_tracking_enabled(true)
7393 .build()
7394 .await
7395 .unwrap(),
7396 );
7397
7398 let mut create_ns_req = CreateNamespaceRequest::new();
7400 create_ns_req.id = Some(vec!["workspace".to_string()]);
7401 namespace.create_namespace(create_ns_req).await.unwrap();
7402
7403 let table_id = vec!["workspace".to_string(), "test_table".to_string()];
7405 let arrow_schema = Arc::new(ArrowSchema::new(vec![Field::new(
7406 "id",
7407 DataType::Int32,
7408 false,
7409 )]));
7410 let batch = RecordBatch::try_new(
7411 arrow_schema.clone(),
7412 vec![Arc::new(Int32Array::from(vec![1, 2, 3]))],
7413 )
7414 .unwrap();
7415 let batches = RecordBatchIterator::new(vec![Ok(batch)], arrow_schema.clone());
7416 let write_params = WriteParams {
7417 mode: WriteMode::Create,
7418 ..Default::default()
7419 };
7420 let mut dataset = Dataset::write_into_namespace(
7421 batches,
7422 namespace.clone(),
7423 table_id.clone(),
7424 Some(write_params),
7425 )
7426 .await
7427 .unwrap();
7428
7429 let batch2 = RecordBatch::try_new(
7431 arrow_schema.clone(),
7432 vec![Arc::new(Int32Array::from(vec![100, 200]))],
7433 )
7434 .unwrap();
7435 let batches = RecordBatchIterator::new(vec![Ok(batch2)], arrow_schema);
7436 dataset.append(batches, None).await.unwrap();
7437
7438 let mut describe_req = DescribeTableVersionRequest::new();
7440 describe_req.id = Some(table_id.clone());
7441 describe_req.version = Some(1);
7442 let describe_resp = namespace
7443 .describe_table_version(describe_req)
7444 .await
7445 .unwrap();
7446
7447 let version = &describe_resp.version;
7448 assert_eq!(version.version, 1);
7449 assert!(version.timestamp_millis.is_some());
7450 assert!(
7451 !version.manifest_path.is_empty(),
7452 "manifest_path should be set"
7453 );
7454 assert!(
7455 version.manifest_path.contains(".manifest"),
7456 "manifest_path should contain .manifest"
7457 );
7458 assert!(
7459 version.manifest_size.is_some(),
7460 "manifest_size should be set"
7461 );
7462 assert!(
7463 version.manifest_size.unwrap() > 0,
7464 "manifest_size should be > 0"
7465 );
7466
7467 let mut describe_req = DescribeTableVersionRequest::new();
7469 describe_req.id = Some(table_id.clone());
7470 describe_req.version = Some(2);
7471 let describe_resp = namespace
7472 .describe_table_version(describe_req)
7473 .await
7474 .unwrap();
7475
7476 let version = &describe_resp.version;
7477 assert_eq!(version.version, 2);
7478 assert!(version.timestamp_millis.is_some());
7479 assert!(
7480 !version.manifest_path.is_empty(),
7481 "manifest_path should be set"
7482 );
7483 assert!(
7484 version.manifest_size.is_some(),
7485 "manifest_size should be set"
7486 );
7487 assert!(
7488 version.manifest_size.unwrap() > 0,
7489 "manifest_size should be > 0"
7490 );
7491 }
7492
7493 #[tokio::test]
7494 async fn test_describe_table_version_latest() {
7495 use arrow::array::{Int32Array, RecordBatchIterator};
7496 use arrow::datatypes::{DataType, Field, Schema as ArrowSchema};
7497 use arrow::record_batch::RecordBatch;
7498 use lance::dataset::{Dataset, WriteMode, WriteParams};
7499 use lance_namespace::models::{CreateNamespaceRequest, DescribeTableVersionRequest};
7500
7501 let temp_dir = TempStrDir::default();
7502 let temp_path: &str = &temp_dir;
7503
7504 let namespace: Arc<dyn LanceNamespace> = Arc::new(
7505 DirectoryNamespaceBuilder::new(temp_path)
7506 .table_version_tracking_enabled(true)
7507 .build()
7508 .await
7509 .unwrap(),
7510 );
7511
7512 let mut create_ns_req = CreateNamespaceRequest::new();
7514 create_ns_req.id = Some(vec!["workspace".to_string()]);
7515 namespace.create_namespace(create_ns_req).await.unwrap();
7516
7517 let table_id = vec!["workspace".to_string(), "test_table".to_string()];
7519 let arrow_schema = Arc::new(ArrowSchema::new(vec![Field::new(
7520 "id",
7521 DataType::Int32,
7522 false,
7523 )]));
7524 let batch = RecordBatch::try_new(
7525 arrow_schema.clone(),
7526 vec![Arc::new(Int32Array::from(vec![1, 2, 3]))],
7527 )
7528 .unwrap();
7529 let batches = RecordBatchIterator::new(vec![Ok(batch)], arrow_schema.clone());
7530 let write_params = WriteParams {
7531 mode: WriteMode::Create,
7532 ..Default::default()
7533 };
7534 let mut dataset = Dataset::write_into_namespace(
7535 batches,
7536 namespace.clone(),
7537 table_id.clone(),
7538 Some(write_params),
7539 )
7540 .await
7541 .unwrap();
7542
7543 let batch2 = RecordBatch::try_new(
7545 arrow_schema.clone(),
7546 vec![Arc::new(Int32Array::from(vec![100, 200]))],
7547 )
7548 .unwrap();
7549 let batches = RecordBatchIterator::new(vec![Ok(batch2)], arrow_schema.clone());
7550 dataset.append(batches, None).await.unwrap();
7551
7552 let batch3 = RecordBatch::try_new(
7554 arrow_schema.clone(),
7555 vec![Arc::new(Int32Array::from(vec![300, 400]))],
7556 )
7557 .unwrap();
7558 let batches = RecordBatchIterator::new(vec![Ok(batch3)], arrow_schema);
7559 dataset.append(batches, None).await.unwrap();
7560
7561 let mut describe_req = DescribeTableVersionRequest::new();
7563 describe_req.id = Some(table_id.clone());
7564 describe_req.version = None;
7565 let describe_resp = namespace
7566 .describe_table_version(describe_req)
7567 .await
7568 .unwrap();
7569
7570 assert_eq!(describe_resp.version.version, 3);
7572 }
7573
7574 #[tokio::test]
7575 async fn test_create_table_version() {
7576 use futures::TryStreamExt;
7577 use lance::dataset::builder::DatasetBuilder;
7578 use lance_namespace::models::CreateTableVersionRequest;
7579
7580 let temp_dir = TempStrDir::default();
7581 let temp_path: &str = &temp_dir;
7582
7583 let namespace: Arc<dyn LanceNamespace> = Arc::new(
7584 DirectoryNamespaceBuilder::new(temp_path)
7585 .table_version_tracking_enabled(true)
7586 .build()
7587 .await
7588 .unwrap(),
7589 );
7590
7591 let schema = create_test_schema();
7593 let ipc_data = create_test_ipc_data(&schema);
7594 let mut create_req = CreateTableRequest::new();
7595 create_req.id = Some(vec!["test_table".to_string()]);
7596 namespace
7597 .create_table(create_req, bytes::Bytes::from(ipc_data))
7598 .await
7599 .unwrap();
7600
7601 let table_id = vec!["test_table".to_string()];
7603 let dataset = DatasetBuilder::from_namespace(namespace.clone(), table_id.clone())
7604 .await
7605 .unwrap()
7606 .load()
7607 .await
7608 .unwrap();
7609
7610 let versions_path = dataset.versions_dir();
7612 let manifest_metas: Vec<_> = dataset
7613 .object_store()
7614 .inner
7615 .list(Some(&versions_path))
7616 .try_collect()
7617 .await
7618 .unwrap();
7619
7620 let manifest_meta = manifest_metas
7621 .iter()
7622 .find(|m| {
7623 m.location
7624 .filename()
7625 .map(|f| f.ends_with(".manifest"))
7626 .unwrap_or(false)
7627 })
7628 .expect("No manifest file found");
7629
7630 let manifest_data = dataset
7632 .object_store()
7633 .inner
7634 .get(&manifest_meta.location)
7635 .await
7636 .unwrap()
7637 .bytes()
7638 .await
7639 .unwrap();
7640
7641 let staging_path = dataset.versions_dir().child("staging_manifest");
7643 dataset
7644 .object_store()
7645 .inner
7646 .put(&staging_path, manifest_data.into())
7647 .await
7648 .unwrap();
7649
7650 let mut create_version_req = CreateTableVersionRequest::new(2, staging_path.to_string());
7653 create_version_req.id = Some(table_id.clone());
7654 create_version_req.naming_scheme = Some("V2".to_string());
7655
7656 let result = namespace.create_table_version(create_version_req).await;
7657 assert!(
7658 result.is_ok(),
7659 "create_table_version should succeed: {:?}",
7660 result
7661 );
7662
7663 let response = result.unwrap();
7665 let version_info = response
7666 .version
7667 .expect("response should contain version info");
7668 let version_2_path = Path::parse(&version_info.manifest_path).unwrap();
7669 let head_result = dataset.object_store().inner.head(&version_2_path).await;
7670 assert!(
7671 head_result.is_ok(),
7672 "Version 2 manifest should exist at {}",
7673 version_2_path
7674 );
7675
7676 let staging_head_result = dataset.object_store().inner.head(&staging_path).await;
7678 assert!(
7679 staging_head_result.is_err(),
7680 "Staging manifest should have been deleted after create_table_version"
7681 );
7682 }
7683
7684 #[tokio::test]
7685 async fn test_create_table_version_conflict() {
7686 use futures::TryStreamExt;
7689 use lance::dataset::builder::DatasetBuilder;
7690 use lance_namespace::models::CreateTableVersionRequest;
7691
7692 let temp_dir = TempStrDir::default();
7693 let temp_path: &str = &temp_dir;
7694
7695 let namespace: Arc<dyn LanceNamespace> = Arc::new(
7696 DirectoryNamespaceBuilder::new(temp_path)
7697 .table_version_tracking_enabled(true)
7698 .build()
7699 .await
7700 .unwrap(),
7701 );
7702
7703 let schema = create_test_schema();
7705 let ipc_data = create_test_ipc_data(&schema);
7706 let mut create_req = CreateTableRequest::new();
7707 create_req.id = Some(vec!["test_table".to_string()]);
7708 namespace
7709 .create_table(create_req, bytes::Bytes::from(ipc_data))
7710 .await
7711 .unwrap();
7712
7713 let table_id = vec!["test_table".to_string()];
7715 let dataset = DatasetBuilder::from_namespace(namespace.clone(), table_id.clone())
7716 .await
7717 .unwrap()
7718 .load()
7719 .await
7720 .unwrap();
7721
7722 let versions_path = dataset.versions_dir();
7724 let manifest_metas: Vec<_> = dataset
7725 .object_store()
7726 .inner
7727 .list(Some(&versions_path))
7728 .try_collect()
7729 .await
7730 .unwrap();
7731
7732 let manifest_meta = manifest_metas
7733 .iter()
7734 .find(|m| {
7735 m.location
7736 .filename()
7737 .map(|f| f.ends_with(".manifest"))
7738 .unwrap_or(false)
7739 })
7740 .expect("No manifest file found");
7741
7742 let manifest_data = dataset
7744 .object_store()
7745 .inner
7746 .get(&manifest_meta.location)
7747 .await
7748 .unwrap()
7749 .bytes()
7750 .await
7751 .unwrap();
7752
7753 let staging_path = dataset.versions_dir().child("staging_manifest");
7755 dataset
7756 .object_store()
7757 .inner
7758 .put(&staging_path, manifest_data.into())
7759 .await
7760 .unwrap();
7761
7762 let mut create_version_req = CreateTableVersionRequest::new(2, staging_path.to_string());
7764 create_version_req.id = Some(table_id.clone());
7765 create_version_req.naming_scheme = Some("V2".to_string());
7766 let first_result = namespace.create_table_version(create_version_req).await;
7767 assert!(
7768 first_result.is_ok(),
7769 "First create_table_version for version 2 should succeed: {:?}",
7770 first_result
7771 );
7772
7773 let version_2_path = Path::parse(
7775 &first_result
7776 .unwrap()
7777 .version
7778 .expect("response should contain version info")
7779 .manifest_path,
7780 )
7781 .unwrap();
7782
7783 let mut create_version_req = CreateTableVersionRequest::new(2, staging_path.to_string());
7785 create_version_req.id = Some(table_id.clone());
7786 create_version_req.naming_scheme = Some("V2".to_string());
7787
7788 let result = namespace.create_table_version(create_version_req).await;
7789 assert!(
7790 result.is_err(),
7791 "create_table_version should fail for existing version"
7792 );
7793
7794 let head_result = dataset.object_store().inner.head(&version_2_path).await;
7796 assert!(
7797 head_result.is_ok(),
7798 "Version 2 manifest should still exist at {}",
7799 version_2_path
7800 );
7801 }
7802
7803 #[tokio::test]
7804 async fn test_create_table_version_table_not_found() {
7805 use lance_namespace::models::CreateTableVersionRequest;
7806
7807 let temp_dir = TempStdDir::default();
7808 let temp_path = temp_dir.to_str().unwrap();
7809
7810 let namespace = DirectoryNamespaceBuilder::new(temp_path)
7811 .table_version_tracking_enabled(true)
7812 .build()
7813 .await
7814 .unwrap();
7815
7816 let mut create_version_req =
7818 CreateTableVersionRequest::new(1, "/some/staging/path".to_string());
7819 create_version_req.id = Some(vec!["non_existent_table".to_string()]);
7820
7821 let result = namespace.create_table_version(create_version_req).await;
7822 assert!(
7823 result.is_err(),
7824 "create_table_version should fail for non-existent table"
7825 );
7826 let err_msg = result.unwrap_err().to_string();
7827 assert!(
7828 err_msg.contains("Table not found"),
7829 "Error should mention table not found, got: {}",
7830 err_msg
7831 );
7832 }
7833
7834 mod e2e_table_version_tracking {
7836 use super::*;
7837 use std::sync::atomic::{AtomicUsize, Ordering};
7838
7839 struct TrackingNamespace {
7841 inner: DirectoryNamespace,
7842 create_table_version_count: AtomicUsize,
7843 describe_table_version_count: AtomicUsize,
7844 list_table_versions_count: AtomicUsize,
7845 }
7846
7847 impl TrackingNamespace {
7848 fn new(inner: DirectoryNamespace) -> Self {
7849 Self {
7850 inner,
7851 create_table_version_count: AtomicUsize::new(0),
7852 describe_table_version_count: AtomicUsize::new(0),
7853 list_table_versions_count: AtomicUsize::new(0),
7854 }
7855 }
7856
7857 fn create_table_version_calls(&self) -> usize {
7858 self.create_table_version_count.load(Ordering::SeqCst)
7859 }
7860
7861 fn describe_table_version_calls(&self) -> usize {
7862 self.describe_table_version_count.load(Ordering::SeqCst)
7863 }
7864
7865 fn list_table_versions_calls(&self) -> usize {
7866 self.list_table_versions_count.load(Ordering::SeqCst)
7867 }
7868 }
7869
7870 impl std::fmt::Debug for TrackingNamespace {
7871 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
7872 f.debug_struct("TrackingNamespace")
7873 .field(
7874 "create_table_version_calls",
7875 &self.create_table_version_calls(),
7876 )
7877 .finish()
7878 }
7879 }
7880
7881 #[async_trait]
7882 impl LanceNamespace for TrackingNamespace {
7883 async fn create_namespace(
7884 &self,
7885 request: CreateNamespaceRequest,
7886 ) -> Result<CreateNamespaceResponse> {
7887 self.inner.create_namespace(request).await
7888 }
7889
7890 async fn describe_namespace(
7891 &self,
7892 request: DescribeNamespaceRequest,
7893 ) -> Result<DescribeNamespaceResponse> {
7894 self.inner.describe_namespace(request).await
7895 }
7896
7897 async fn namespace_exists(&self, request: NamespaceExistsRequest) -> Result<()> {
7898 self.inner.namespace_exists(request).await
7899 }
7900
7901 async fn list_namespaces(
7902 &self,
7903 request: ListNamespacesRequest,
7904 ) -> Result<ListNamespacesResponse> {
7905 self.inner.list_namespaces(request).await
7906 }
7907
7908 async fn drop_namespace(
7909 &self,
7910 request: DropNamespaceRequest,
7911 ) -> Result<DropNamespaceResponse> {
7912 self.inner.drop_namespace(request).await
7913 }
7914
7915 async fn list_tables(&self, request: ListTablesRequest) -> Result<ListTablesResponse> {
7916 self.inner.list_tables(request).await
7917 }
7918
7919 async fn describe_table(
7920 &self,
7921 request: DescribeTableRequest,
7922 ) -> Result<DescribeTableResponse> {
7923 self.inner.describe_table(request).await
7924 }
7925
7926 async fn table_exists(&self, request: TableExistsRequest) -> Result<()> {
7927 self.inner.table_exists(request).await
7928 }
7929
7930 async fn drop_table(&self, request: DropTableRequest) -> Result<DropTableResponse> {
7931 self.inner.drop_table(request).await
7932 }
7933
7934 async fn create_table(
7935 &self,
7936 request: CreateTableRequest,
7937 request_data: Bytes,
7938 ) -> Result<CreateTableResponse> {
7939 self.inner.create_table(request, request_data).await
7940 }
7941
7942 async fn declare_table(
7943 &self,
7944 request: DeclareTableRequest,
7945 ) -> Result<DeclareTableResponse> {
7946 self.inner.declare_table(request).await
7947 }
7948
7949 async fn list_table_versions(
7950 &self,
7951 request: ListTableVersionsRequest,
7952 ) -> Result<ListTableVersionsResponse> {
7953 self.list_table_versions_count
7954 .fetch_add(1, Ordering::SeqCst);
7955 self.inner.list_table_versions(request).await
7956 }
7957
7958 async fn create_table_version(
7959 &self,
7960 request: CreateTableVersionRequest,
7961 ) -> Result<CreateTableVersionResponse> {
7962 self.create_table_version_count
7963 .fetch_add(1, Ordering::SeqCst);
7964 self.inner.create_table_version(request).await
7965 }
7966
7967 async fn describe_table_version(
7968 &self,
7969 request: DescribeTableVersionRequest,
7970 ) -> Result<DescribeTableVersionResponse> {
7971 self.describe_table_version_count
7972 .fetch_add(1, Ordering::SeqCst);
7973 self.inner.describe_table_version(request).await
7974 }
7975
7976 async fn batch_delete_table_versions(
7977 &self,
7978 request: BatchDeleteTableVersionsRequest,
7979 ) -> Result<BatchDeleteTableVersionsResponse> {
7980 self.inner.batch_delete_table_versions(request).await
7981 }
7982
7983 fn namespace_id(&self) -> String {
7984 self.inner.namespace_id()
7985 }
7986 }
7987
7988 #[tokio::test]
7989 async fn test_describe_table_returns_managed_versioning() {
7990 use lance_namespace::models::{CreateNamespaceRequest, DescribeTableRequest};
7991
7992 let temp_dir = TempStdDir::default();
7993 let temp_path = temp_dir.to_str().unwrap();
7994
7995 let ns = DirectoryNamespaceBuilder::new(temp_path)
7997 .table_version_tracking_enabled(true)
7998 .manifest_enabled(true)
7999 .build()
8000 .await
8001 .unwrap();
8002
8003 let mut create_ns_req = CreateNamespaceRequest::new();
8005 create_ns_req.id = Some(vec!["workspace".to_string()]);
8006 ns.create_namespace(create_ns_req).await.unwrap();
8007
8008 let schema = create_test_schema();
8010 let ipc_data = create_test_ipc_data(&schema);
8011 let mut create_req = CreateTableRequest::new();
8012 create_req.id = Some(vec!["workspace".to_string(), "test_table".to_string()]);
8013 ns.create_table(create_req, bytes::Bytes::from(ipc_data))
8014 .await
8015 .unwrap();
8016
8017 let mut describe_req = DescribeTableRequest::new();
8019 describe_req.id = Some(vec!["workspace".to_string(), "test_table".to_string()]);
8020 let describe_resp = ns.describe_table(describe_req).await.unwrap();
8021
8022 assert_eq!(
8024 describe_resp.managed_versioning,
8025 Some(true),
8026 "managed_versioning should be true when table_version_tracking_enabled=true"
8027 );
8028 }
8029
8030 #[tokio::test]
8031 async fn test_external_manifest_store_invokes_namespace_apis() {
8032 use arrow::array::{Int32Array, StringArray};
8033 use arrow::datatypes::{DataType, Field, Schema as ArrowSchema};
8034 use arrow::record_batch::RecordBatch;
8035 use lance::Dataset;
8036 use lance::dataset::builder::DatasetBuilder;
8037 use lance::dataset::{WriteMode, WriteParams};
8038 use lance_namespace::models::CreateNamespaceRequest;
8039
8040 let temp_dir = TempStdDir::default();
8041 let temp_path = temp_dir.to_str().unwrap();
8042
8043 let inner_ns = DirectoryNamespaceBuilder::new(temp_path)
8045 .table_version_tracking_enabled(true)
8046 .manifest_enabled(true)
8047 .build()
8048 .await
8049 .unwrap();
8050
8051 let tracking_ns = Arc::new(TrackingNamespace::new(inner_ns));
8052 let ns: Arc<dyn LanceNamespace> = tracking_ns.clone();
8053
8054 let mut create_ns_req = CreateNamespaceRequest::new();
8056 create_ns_req.id = Some(vec!["workspace".to_string()]);
8057 ns.create_namespace(create_ns_req).await.unwrap();
8058
8059 let table_id = vec!["workspace".to_string(), "test_table".to_string()];
8061
8062 let arrow_schema = Arc::new(ArrowSchema::new(vec![
8064 Field::new("id", DataType::Int32, false),
8065 Field::new("name", DataType::Utf8, true),
8066 ]));
8067 let batch = RecordBatch::try_new(
8068 arrow_schema.clone(),
8069 vec![
8070 Arc::new(Int32Array::from(vec![1, 2, 3])),
8071 Arc::new(StringArray::from(vec!["a", "b", "c"])),
8072 ],
8073 )
8074 .unwrap();
8075
8076 let batches = RecordBatchIterator::new(vec![Ok(batch.clone())], arrow_schema.clone());
8078 let write_params = WriteParams {
8079 mode: WriteMode::Create,
8080 ..Default::default()
8081 };
8082 let mut dataset = Dataset::write_into_namespace(
8083 batches,
8084 ns.clone(),
8085 table_id.clone(),
8086 Some(write_params),
8087 )
8088 .await
8089 .unwrap();
8090 assert_eq!(dataset.version().version, 1);
8091
8092 assert_eq!(
8094 tracking_ns.create_table_version_calls(),
8095 1,
8096 "create_table_version should have been called once during initial write_into_namespace"
8097 );
8098
8099 let append_batch = RecordBatch::try_new(
8101 arrow_schema.clone(),
8102 vec![
8103 Arc::new(Int32Array::from(vec![4, 5, 6])),
8104 Arc::new(StringArray::from(vec!["d", "e", "f"])),
8105 ],
8106 )
8107 .unwrap();
8108 let append_batches = RecordBatchIterator::new(vec![Ok(append_batch)], arrow_schema);
8109 dataset.append(append_batches, None).await.unwrap();
8110
8111 assert_eq!(
8112 tracking_ns.create_table_version_calls(),
8113 2,
8114 "create_table_version should have been called twice (once for create, once for append)"
8115 );
8116
8117 let initial_list_calls = tracking_ns.list_table_versions_calls();
8119 let latest_dataset = DatasetBuilder::from_namespace(ns.clone(), table_id.clone())
8120 .await
8121 .unwrap()
8122 .load()
8123 .await
8124 .unwrap();
8125 assert_eq!(latest_dataset.version().version, 2);
8126 assert_eq!(
8127 tracking_ns.list_table_versions_calls(),
8128 initial_list_calls + 1,
8129 "list_table_versions should have been called exactly once during checkout_latest"
8130 );
8131
8132 let initial_describe_calls = tracking_ns.describe_table_version_calls();
8134 let v1_dataset = DatasetBuilder::from_namespace(ns.clone(), table_id.clone())
8135 .await
8136 .unwrap()
8137 .with_version(1)
8138 .load()
8139 .await
8140 .unwrap();
8141 assert_eq!(v1_dataset.version().version, 1);
8142 assert_eq!(
8143 tracking_ns.describe_table_version_calls(),
8144 initial_describe_calls + 1,
8145 "describe_table_version should have been called exactly once during checkout to version 1"
8146 );
8147 }
8148
8149 #[tokio::test]
8150 async fn test_dataset_commit_with_external_manifest_store() {
8151 use arrow::array::{Int32Array, StringArray};
8152 use arrow::datatypes::{DataType, Field, Schema as ArrowSchema};
8153 use arrow::record_batch::RecordBatch;
8154 use futures::TryStreamExt;
8155 use lance::dataset::{Dataset, WriteMode, WriteParams};
8156 use lance_namespace::models::CreateNamespaceRequest;
8157 use lance_table::io::commit::ManifestNamingScheme;
8158
8159 let temp_dir = TempStdDir::default();
8160 let temp_path = temp_dir.to_str().unwrap();
8161
8162 let inner_ns = DirectoryNamespaceBuilder::new(temp_path)
8164 .table_version_tracking_enabled(true)
8165 .manifest_enabled(true)
8166 .build()
8167 .await
8168 .unwrap();
8169
8170 let tracking_ns: Arc<dyn LanceNamespace> = Arc::new(TrackingNamespace::new(inner_ns));
8171
8172 let mut create_ns_req = CreateNamespaceRequest::new();
8174 create_ns_req.id = Some(vec!["workspace".to_string()]);
8175 tracking_ns.create_namespace(create_ns_req).await.unwrap();
8176
8177 let table_id = vec!["workspace".to_string(), "test_table".to_string()];
8179 let arrow_schema = Arc::new(ArrowSchema::new(vec![
8180 Field::new("id", DataType::Int32, false),
8181 Field::new("name", DataType::Utf8, true),
8182 ]));
8183 let batch = RecordBatch::try_new(
8184 arrow_schema.clone(),
8185 vec![
8186 Arc::new(Int32Array::from(vec![1, 2, 3])),
8187 Arc::new(StringArray::from(vec!["a", "b", "c"])),
8188 ],
8189 )
8190 .unwrap();
8191 let batches = RecordBatchIterator::new(vec![Ok(batch)], arrow_schema.clone());
8192 let write_params = WriteParams {
8193 mode: WriteMode::Create,
8194 ..Default::default()
8195 };
8196 let dataset = Dataset::write_into_namespace(
8197 batches,
8198 tracking_ns.clone(),
8199 table_id.clone(),
8200 Some(write_params),
8201 )
8202 .await
8203 .unwrap();
8204 assert_eq!(dataset.version().version, 1);
8205
8206 let batch2 = RecordBatch::try_new(
8208 arrow_schema.clone(),
8209 vec![
8210 Arc::new(Int32Array::from(vec![4, 5, 6])),
8211 Arc::new(StringArray::from(vec!["d", "e", "f"])),
8212 ],
8213 )
8214 .unwrap();
8215 let batches = RecordBatchIterator::new(vec![Ok(batch2)], arrow_schema);
8216 let write_params = WriteParams {
8217 mode: WriteMode::Append,
8218 ..Default::default()
8219 };
8220 Dataset::write_into_namespace(
8221 batches,
8222 tracking_ns.clone(),
8223 table_id.clone(),
8224 Some(write_params),
8225 )
8226 .await
8227 .unwrap();
8228
8229 let manifest_metas: Vec<_> = dataset
8232 .object_store()
8233 .inner
8234 .list(Some(&dataset.versions_dir()))
8235 .try_collect()
8236 .await
8237 .unwrap();
8238 let version_2_found = manifest_metas.iter().any(|m| {
8239 m.location
8240 .filename()
8241 .map(|f| {
8242 f.ends_with(".manifest")
8243 && ManifestNamingScheme::V2.parse_version(f) == Some(2)
8244 })
8245 .unwrap_or(false)
8246 });
8247 assert!(
8248 version_2_found,
8249 "Version 2 manifest should exist in versions directory"
8250 );
8251 }
8252
8253 async fn create_ns_with_table() -> (DirectoryNamespace, TempStdDir, Vec<String>) {
8255 use arrow::array::{Int32Array, StringArray};
8256 use arrow::ipc::writer::StreamWriter;
8257
8258 let (namespace, temp_dir) = create_test_namespace().await;
8259
8260 let schema = create_test_schema();
8261 let arrow_schema = convert_json_arrow_schema(&schema).unwrap();
8262 let arrow_schema = Arc::new(arrow_schema);
8263
8264 let id_array = Int32Array::from(vec![1, 2, 3]);
8265 let name_array = StringArray::from(vec!["Alice", "Bob", "Charlie"]);
8266 let batch = arrow::record_batch::RecordBatch::try_new(
8267 arrow_schema.clone(),
8268 vec![Arc::new(id_array), Arc::new(name_array)],
8269 )
8270 .unwrap();
8271
8272 let mut buffer = Vec::new();
8273 {
8274 let mut writer = StreamWriter::try_new(&mut buffer, &arrow_schema).unwrap();
8275 writer.write(&batch).unwrap();
8276 writer.finish().unwrap();
8277 }
8278
8279 let mut request = CreateTableRequest::new();
8280 let table_id = vec!["test_ops_table".to_string()];
8281 request.id = Some(table_id.clone());
8282
8283 namespace
8284 .create_table(request, Bytes::from(buffer))
8285 .await
8286 .unwrap();
8287
8288 (namespace, temp_dir, table_id)
8289 }
8290
8291 #[tokio::test]
8292 async fn test_count_table_rows_basic() {
8293 let (namespace, _temp_dir, table_id) = create_ns_with_table().await;
8294
8295 let request = CountTableRowsRequest {
8296 id: Some(table_id),
8297 version: None,
8298 predicate: None,
8299 ..Default::default()
8300 };
8301
8302 let count = namespace.count_table_rows(request).await.unwrap();
8303 assert_eq!(count, 3);
8304 }
8305
8306 #[tokio::test]
8307 async fn test_count_table_rows_with_predicate() {
8308 let (namespace, _temp_dir, table_id) = create_ns_with_table().await;
8309
8310 let request = CountTableRowsRequest {
8311 id: Some(table_id),
8312 version: None,
8313 predicate: Some("id > 1".to_string()),
8314 ..Default::default()
8315 };
8316
8317 let count = namespace.count_table_rows(request).await.unwrap();
8318 assert_eq!(count, 2);
8319 }
8320
8321 #[tokio::test]
8322 async fn test_query_table_invalid_distance_type() {
8323 let (namespace, _temp_dir, table_id) = create_ns_with_vector_table().await;
8324
8325 let vector = Box::new(lance_namespace::models::QueryTableRequestVector {
8326 single_vector: Some(vec![1.0, 0.0, 0.0, 0.0]),
8327 multi_vector: None,
8328 });
8329
8330 let request = QueryTableRequest {
8331 id: Some(table_id),
8332 k: 2,
8333 vector,
8334 vector_column: Some("vector".to_string()),
8335 distance_type: Some("invalid_metric".to_string()),
8336 filter: None,
8337 offset: None,
8338 version: None,
8339 ..Default::default()
8340 };
8341
8342 let result = namespace.query_table(request).await;
8343 assert!(result.is_err());
8344 let err_msg = result.unwrap_err().to_string();
8345 assert!(
8346 err_msg.contains("Unknown distance type"),
8347 "Expected error about unknown distance type, got: {}",
8348 err_msg
8349 );
8350 }
8351
8352 #[tokio::test]
8353 async fn test_insert_into_table_append() {
8354 use arrow::array::{Int32Array, StringArray};
8355 use arrow::ipc::writer::StreamWriter;
8356
8357 let (namespace, _temp_dir, table_id) = create_ns_with_table().await;
8358
8359 let schema = create_test_schema();
8361 let arrow_schema = convert_json_arrow_schema(&schema).unwrap();
8362 let arrow_schema = Arc::new(arrow_schema);
8363
8364 let id_array = Int32Array::from(vec![4, 5]);
8365 let name_array = StringArray::from(vec!["Dave", "Eve"]);
8366 let batch = arrow::record_batch::RecordBatch::try_new(
8367 arrow_schema.clone(),
8368 vec![Arc::new(id_array), Arc::new(name_array)],
8369 )
8370 .unwrap();
8371
8372 let mut buffer = Vec::new();
8373 {
8374 let mut writer = StreamWriter::try_new(&mut buffer, &arrow_schema).unwrap();
8375 writer.write(&batch).unwrap();
8376 writer.finish().unwrap();
8377 }
8378
8379 let request = InsertIntoTableRequest {
8380 id: Some(table_id.clone()),
8381 mode: Some("append".to_string()),
8382 ..Default::default()
8383 };
8384
8385 let response = namespace
8386 .insert_into_table(request, Bytes::from(buffer))
8387 .await
8388 .unwrap();
8389 assert!(response.transaction_id.is_none());
8390
8391 let count_req = CountTableRowsRequest {
8393 id: Some(table_id),
8394 version: None,
8395 predicate: None,
8396 ..Default::default()
8397 };
8398 let count = namespace.count_table_rows(count_req).await.unwrap();
8399 assert_eq!(count, 5);
8400 }
8401
8402 #[tokio::test]
8403 async fn test_insert_into_table_overwrite() {
8404 use arrow::array::{Int32Array, StringArray};
8405 use arrow::ipc::writer::StreamWriter;
8406
8407 let (namespace, _temp_dir, table_id) = create_ns_with_table().await;
8408
8409 let schema = create_test_schema();
8410 let arrow_schema = convert_json_arrow_schema(&schema).unwrap();
8411 let arrow_schema = Arc::new(arrow_schema);
8412
8413 let id_array = Int32Array::from(vec![10, 20]);
8414 let name_array = StringArray::from(vec!["X", "Y"]);
8415 let batch = arrow::record_batch::RecordBatch::try_new(
8416 arrow_schema.clone(),
8417 vec![Arc::new(id_array), Arc::new(name_array)],
8418 )
8419 .unwrap();
8420
8421 let mut buffer = Vec::new();
8422 {
8423 let mut writer = StreamWriter::try_new(&mut buffer, &arrow_schema).unwrap();
8424 writer.write(&batch).unwrap();
8425 writer.finish().unwrap();
8426 }
8427
8428 let request = InsertIntoTableRequest {
8429 id: Some(table_id.clone()),
8430 mode: Some("overwrite".to_string()),
8431 ..Default::default()
8432 };
8433
8434 namespace
8435 .insert_into_table(request, Bytes::from(buffer))
8436 .await
8437 .unwrap();
8438
8439 let count_req = CountTableRowsRequest {
8441 id: Some(table_id),
8442 version: None,
8443 predicate: None,
8444 ..Default::default()
8445 };
8446 let count = namespace.count_table_rows(count_req).await.unwrap();
8447 assert_eq!(count, 2);
8448 }
8449
8450 #[tokio::test]
8451 async fn test_insert_into_table_empty_data() {
8452 let (namespace, _temp_dir, table_id) = create_ns_with_table().await;
8453
8454 let request = InsertIntoTableRequest {
8455 id: Some(table_id),
8456 mode: None,
8457 ..Default::default()
8458 };
8459
8460 let result = namespace.insert_into_table(request, Bytes::new()).await;
8461 assert!(result.is_err());
8462 assert!(
8463 result
8464 .unwrap_err()
8465 .to_string()
8466 .contains("Arrow IPC stream) is required")
8467 );
8468 }
8469
8470 #[tokio::test]
8471 async fn test_insert_into_table_with_storage_options() {
8472 use arrow::array::{Int32Array, StringArray};
8473 use arrow::ipc::writer::StreamWriter;
8474
8475 let temp_dir = TempStdDir::default();
8476
8477 let namespace = DirectoryNamespaceBuilder::new(temp_dir.to_str().unwrap())
8479 .storage_option("allow_http", "true")
8480 .build()
8481 .await
8482 .unwrap();
8483
8484 let schema = create_test_schema();
8486 let ipc_data = create_test_ipc_data(&schema);
8487 let mut create_req = CreateTableRequest::new();
8488 let table_id = vec!["so_table".to_string()];
8489 create_req.id = Some(table_id.clone());
8490 namespace
8491 .create_table(create_req, Bytes::from(ipc_data))
8492 .await
8493 .unwrap();
8494
8495 let arrow_schema = convert_json_arrow_schema(&schema).unwrap();
8497 let arrow_schema = Arc::new(arrow_schema);
8498
8499 let id_array = Int32Array::from(vec![10, 20]);
8500 let name_array = StringArray::from(vec!["X", "Y"]);
8501 let batch = arrow::record_batch::RecordBatch::try_new(
8502 arrow_schema.clone(),
8503 vec![Arc::new(id_array), Arc::new(name_array)],
8504 )
8505 .unwrap();
8506
8507 let mut buffer = Vec::new();
8508 {
8509 let mut writer = StreamWriter::try_new(&mut buffer, &arrow_schema).unwrap();
8510 writer.write(&batch).unwrap();
8511 writer.finish().unwrap();
8512 }
8513
8514 let request = InsertIntoTableRequest {
8515 id: Some(table_id.clone()),
8516 mode: Some("append".to_string()),
8517 ..Default::default()
8518 };
8519
8520 let response = namespace
8521 .insert_into_table(request, Bytes::from(buffer))
8522 .await
8523 .unwrap();
8524 assert!(response.transaction_id.is_none());
8525
8526 let count_req = CountTableRowsRequest {
8528 id: Some(table_id),
8529 version: None,
8530 predicate: None,
8531 ..Default::default()
8532 };
8533 let count = namespace.count_table_rows(count_req).await.unwrap();
8534 assert_eq!(count, 2);
8535 }
8536
8537 #[tokio::test]
8538 async fn test_query_table_basic() {
8539 let (namespace, _temp_dir, table_id) = create_ns_with_table().await;
8540
8541 let request = QueryTableRequest {
8542 id: Some(table_id),
8543 k: 10,
8544 filter: None,
8545 offset: None,
8546 version: None,
8547 ..Default::default()
8548 };
8549
8550 let bytes = namespace.query_table(request).await.unwrap();
8551
8552 let cursor = Cursor::new(bytes.to_vec());
8554 let reader = FileReader::try_new(cursor, None).unwrap();
8555 let batches: Vec<_> = reader.into_iter().map(|b| b.unwrap()).collect();
8556 let total_rows: usize = batches.iter().map(|b| b.num_rows()).sum();
8557 assert_eq!(total_rows, 3);
8558 }
8559
8560 #[tokio::test]
8561 async fn test_query_table_with_filter() {
8562 let (namespace, _temp_dir, table_id) = create_ns_with_table().await;
8563
8564 let request = QueryTableRequest {
8565 id: Some(table_id),
8566 k: 10,
8567 filter: Some("id <= 2".to_string()),
8568 offset: None,
8569 version: None,
8570 ..Default::default()
8571 };
8572
8573 let bytes = namespace.query_table(request).await.unwrap();
8574
8575 let cursor = Cursor::new(bytes.to_vec());
8576 let reader = FileReader::try_new(cursor, None).unwrap();
8577 let batches: Vec<_> = reader.into_iter().map(|b| b.unwrap()).collect();
8578 let total_rows: usize = batches.iter().map(|b| b.num_rows()).sum();
8579 assert_eq!(total_rows, 2);
8580 }
8581
8582 #[tokio::test]
8583 async fn test_query_table_with_limit_and_offset() {
8584 let (namespace, _temp_dir, table_id) = create_ns_with_table().await;
8585
8586 let request = QueryTableRequest {
8587 id: Some(table_id),
8588 k: 2,
8589 filter: None,
8590 offset: Some(1),
8591 version: None,
8592 ..Default::default()
8593 };
8594
8595 let bytes = namespace.query_table(request).await.unwrap();
8596
8597 let cursor = Cursor::new(bytes.to_vec());
8598 let reader = FileReader::try_new(cursor, None).unwrap();
8599 let batches: Vec<_> = reader.into_iter().map(|b| b.unwrap()).collect();
8600 let total_rows: usize = batches.iter().map(|b| b.num_rows()).sum();
8601 assert_eq!(total_rows, 2);
8602 }
8603
8604 #[tokio::test]
8605 async fn test_query_table_no_limit() {
8606 let (namespace, _temp_dir, table_id) = create_ns_with_table().await;
8607
8608 let request = QueryTableRequest {
8610 id: Some(table_id),
8611 k: 0,
8612 filter: None,
8613 offset: None,
8614 version: None,
8615 ..Default::default()
8616 };
8617
8618 let bytes = namespace.query_table(request).await.unwrap();
8619
8620 let cursor = Cursor::new(bytes.to_vec());
8621 let reader = FileReader::try_new(cursor, None).unwrap();
8622 let batches: Vec<_> = reader.into_iter().map(|b| b.unwrap()).collect();
8623 let total_rows: usize = batches.iter().map(|b| b.num_rows()).sum();
8624 assert_eq!(total_rows, 3);
8625 }
8626
8627 #[tokio::test]
8628 async fn test_query_table_with_columns() {
8629 let (namespace, _temp_dir, table_id) = create_ns_with_table().await;
8630
8631 let columns = Box::new(lance_namespace::models::QueryTableRequestColumns {
8632 column_names: Some(vec!["id".to_string()]),
8633 column_aliases: None,
8634 });
8635
8636 let request = QueryTableRequest {
8637 id: Some(table_id),
8638 k: 10,
8639 filter: None,
8640 offset: None,
8641 version: None,
8642 columns: Some(columns),
8643 ..Default::default()
8644 };
8645
8646 let bytes = namespace.query_table(request).await.unwrap();
8647
8648 let cursor = Cursor::new(bytes.to_vec());
8649 let reader = FileReader::try_new(cursor, None).unwrap();
8650 let schema = reader.schema();
8651 assert_eq!(schema.fields().len(), 1);
8652 assert_eq!(schema.field(0).name(), "id");
8653 let batches: Vec<_> = reader.into_iter().map(|b| b.unwrap()).collect();
8654 let total_rows: usize = batches.iter().map(|b| b.num_rows()).sum();
8655 assert_eq!(total_rows, 3);
8656 }
8657
8658 #[tokio::test]
8659 async fn test_count_table_rows_with_version() {
8660 use arrow::array::{Int32Array, StringArray};
8661 use arrow::ipc::writer::StreamWriter;
8662
8663 let (namespace, _temp_dir, table_id) = create_ns_with_table().await;
8664
8665 let schema = create_test_schema();
8667 let arrow_schema = convert_json_arrow_schema(&schema).unwrap();
8668 let arrow_schema = Arc::new(arrow_schema);
8669
8670 let id_array = Int32Array::from(vec![4, 5]);
8671 let name_array = StringArray::from(vec!["Dave", "Eve"]);
8672 let batch = arrow::record_batch::RecordBatch::try_new(
8673 arrow_schema.clone(),
8674 vec![Arc::new(id_array), Arc::new(name_array)],
8675 )
8676 .unwrap();
8677
8678 let mut buffer = Vec::new();
8679 {
8680 let mut writer = StreamWriter::try_new(&mut buffer, &arrow_schema).unwrap();
8681 writer.write(&batch).unwrap();
8682 writer.finish().unwrap();
8683 }
8684
8685 let request = InsertIntoTableRequest {
8686 id: Some(table_id.clone()),
8687 mode: None,
8688 ..Default::default()
8689 };
8690 namespace
8691 .insert_into_table(request, Bytes::from(buffer))
8692 .await
8693 .unwrap();
8694
8695 let count_req = CountTableRowsRequest {
8697 id: Some(table_id.clone()),
8698 version: Some(1),
8699 predicate: None,
8700 ..Default::default()
8701 };
8702 let count = namespace.count_table_rows(count_req).await.unwrap();
8703 assert_eq!(count, 3);
8704
8705 let count_req = CountTableRowsRequest {
8707 id: Some(table_id),
8708 version: None,
8709 predicate: None,
8710 ..Default::default()
8711 };
8712 let count = namespace.count_table_rows(count_req).await.unwrap();
8713 assert_eq!(count, 5);
8714 }
8715
8716 #[tokio::test]
8717 async fn test_query_table_with_version() {
8718 use arrow::array::{Int32Array, StringArray};
8719 use arrow::ipc::writer::StreamWriter;
8720
8721 let (namespace, _temp_dir, table_id) = create_ns_with_table().await;
8722
8723 let schema = create_test_schema();
8725 let arrow_schema = convert_json_arrow_schema(&schema).unwrap();
8726 let arrow_schema = Arc::new(arrow_schema);
8727
8728 let id_array = Int32Array::from(vec![4, 5]);
8729 let name_array = StringArray::from(vec!["Dave", "Eve"]);
8730 let batch = arrow::record_batch::RecordBatch::try_new(
8731 arrow_schema.clone(),
8732 vec![Arc::new(id_array), Arc::new(name_array)],
8733 )
8734 .unwrap();
8735
8736 let mut buffer = Vec::new();
8737 {
8738 let mut writer = StreamWriter::try_new(&mut buffer, &arrow_schema).unwrap();
8739 writer.write(&batch).unwrap();
8740 writer.finish().unwrap();
8741 }
8742
8743 let request = InsertIntoTableRequest {
8744 id: Some(table_id.clone()),
8745 mode: None,
8746 ..Default::default()
8747 };
8748 namespace
8749 .insert_into_table(request, Bytes::from(buffer))
8750 .await
8751 .unwrap();
8752
8753 let request = QueryTableRequest {
8755 id: Some(table_id.clone()),
8756 k: 100,
8757 filter: None,
8758 offset: None,
8759 version: Some(1),
8760 ..Default::default()
8761 };
8762
8763 let bytes = namespace.query_table(request).await.unwrap();
8764 let cursor = Cursor::new(bytes.to_vec());
8765 let reader = FileReader::try_new(cursor, None).unwrap();
8766 let batches: Vec<_> = reader.into_iter().map(|b| b.unwrap()).collect();
8767 let total_rows: usize = batches.iter().map(|b| b.num_rows()).sum();
8768 assert_eq!(total_rows, 3);
8769
8770 let request = QueryTableRequest {
8772 id: Some(table_id),
8773 k: 100,
8774 filter: None,
8775 offset: None,
8776 version: None,
8777 ..Default::default()
8778 };
8779
8780 let bytes = namespace.query_table(request).await.unwrap();
8781 let cursor = Cursor::new(bytes.to_vec());
8782 let reader = FileReader::try_new(cursor, None).unwrap();
8783 let batches: Vec<_> = reader.into_iter().map(|b| b.unwrap()).collect();
8784 let total_rows: usize = batches.iter().map(|b| b.num_rows()).sum();
8785 assert_eq!(total_rows, 5);
8786 }
8787
8788 async fn create_ns_with_vector_table() -> (DirectoryNamespace, TempStdDir, Vec<String>) {
8791 use arrow::array::{FixedSizeListArray, Float32Array, Int32Array};
8792 use arrow::ipc::writer::StreamWriter;
8793
8794 let (namespace, temp_dir) = create_test_namespace().await;
8795
8796 let arrow_schema = Arc::new(arrow::datatypes::Schema::new(vec![
8798 arrow::datatypes::Field::new("id", arrow::datatypes::DataType::Int32, false),
8799 arrow::datatypes::Field::new(
8800 "vector",
8801 arrow::datatypes::DataType::FixedSizeList(
8802 Arc::new(arrow::datatypes::Field::new(
8803 "item",
8804 arrow::datatypes::DataType::Float32,
8805 true,
8806 )),
8807 4,
8808 ),
8809 true,
8810 ),
8811 ]));
8812
8813 let id_array = Int32Array::from(vec![1, 2, 3]);
8814 let values = Float32Array::from(vec![
8815 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, ]);
8819 let vector_array = FixedSizeListArray::try_new(
8820 Arc::new(arrow::datatypes::Field::new(
8821 "item",
8822 arrow::datatypes::DataType::Float32,
8823 true,
8824 )),
8825 4,
8826 Arc::new(values),
8827 None,
8828 )
8829 .unwrap();
8830
8831 let batch = arrow::record_batch::RecordBatch::try_new(
8832 arrow_schema.clone(),
8833 vec![Arc::new(id_array), Arc::new(vector_array)],
8834 )
8835 .unwrap();
8836
8837 let mut buffer = Vec::new();
8838 {
8839 let mut writer = StreamWriter::try_new(&mut buffer, &arrow_schema).unwrap();
8840 writer.write(&batch).unwrap();
8841 writer.finish().unwrap();
8842 }
8843
8844 let table_name = "vector_table";
8846 let table_uri = format!("{}/{}.lance", temp_dir.to_str().unwrap(), table_name);
8847 let reader = arrow::record_batch::RecordBatchIterator::new(
8848 vec![Ok(batch)],
8849 arrow_schema.clone(),
8850 );
8851 Dataset::write(reader, &table_uri, None).await.unwrap();
8852
8853 let table_id = vec![table_name.to_string()];
8854 (namespace, temp_dir, table_id)
8855 }
8856
8857 #[tokio::test]
8858 async fn test_query_table_vector_search() {
8859 let (namespace, _temp_dir, table_id) = create_ns_with_vector_table().await;
8860
8861 let vector = Box::new(lance_namespace::models::QueryTableRequestVector {
8862 single_vector: Some(vec![1.0, 0.0, 0.0, 0.0]),
8863 multi_vector: None,
8864 });
8865
8866 let request = QueryTableRequest {
8867 id: Some(table_id),
8868 k: 2,
8869 vector,
8870 filter: None,
8871 offset: None,
8872 version: None,
8873 ..Default::default()
8874 };
8875
8876 let bytes = namespace.query_table(request).await.unwrap();
8877
8878 let cursor = Cursor::new(bytes.to_vec());
8879 let reader = FileReader::try_new(cursor, None).unwrap();
8880 let batches: Vec<_> = reader.into_iter().map(|b| b.unwrap()).collect();
8881 let total_rows: usize = batches.iter().map(|b| b.num_rows()).sum();
8882 assert_eq!(total_rows, 2);
8883 }
8884
8885 #[tokio::test]
8886 async fn test_query_table_vector_search_with_distance_type() {
8887 let (namespace, _temp_dir, table_id) = create_ns_with_vector_table().await;
8888
8889 let vector = Box::new(lance_namespace::models::QueryTableRequestVector {
8890 single_vector: Some(vec![1.0, 0.0, 0.0, 0.0]),
8891 multi_vector: None,
8892 });
8893
8894 let request = QueryTableRequest {
8895 id: Some(table_id),
8896 k: 3,
8897 vector,
8898 filter: None,
8899 offset: None,
8900 version: None,
8901 distance_type: Some("cosine".to_string()),
8902 ..Default::default()
8903 };
8904
8905 let bytes = namespace.query_table(request).await.unwrap();
8906
8907 let cursor = Cursor::new(bytes.to_vec());
8908 let reader = FileReader::try_new(cursor, None).unwrap();
8909 let batches: Vec<_> = reader.into_iter().map(|b| b.unwrap()).collect();
8910 let total_rows: usize = batches.iter().map(|b| b.num_rows()).sum();
8911 assert_eq!(total_rows, 3);
8912 }
8913
8914 #[tokio::test]
8915 async fn test_query_table_vector_search_with_filter() {
8916 let (namespace, _temp_dir, table_id) = create_ns_with_vector_table().await;
8917
8918 let vector = Box::new(lance_namespace::models::QueryTableRequestVector {
8919 single_vector: Some(vec![1.0, 0.0, 0.0, 0.0]),
8920 multi_vector: None,
8921 });
8922
8923 let request = QueryTableRequest {
8924 id: Some(table_id),
8925 k: 10,
8926 vector,
8927 filter: Some("id <= 2".to_string()),
8928 offset: None,
8929 version: None,
8930 ..Default::default()
8931 };
8932
8933 let bytes = namespace.query_table(request).await.unwrap();
8934
8935 let cursor = Cursor::new(bytes.to_vec());
8936 let reader = FileReader::try_new(cursor, None).unwrap();
8937 let batches: Vec<_> = reader.into_iter().map(|b| b.unwrap()).collect();
8938 let total_rows: usize = batches.iter().map(|b| b.num_rows()).sum();
8939 assert!(total_rows <= 2);
8940 }
8941
8942 #[tokio::test]
8943 async fn test_query_table_vector_search_with_nprobes_and_refine() {
8944 let (namespace, _temp_dir, table_id) = create_ns_with_vector_table().await;
8945
8946 let vector = Box::new(lance_namespace::models::QueryTableRequestVector {
8947 single_vector: Some(vec![0.0, 1.0, 0.0, 0.0]),
8948 multi_vector: None,
8949 });
8950
8951 let request = QueryTableRequest {
8952 id: Some(table_id),
8953 k: 2,
8954 vector,
8955 filter: None,
8956 offset: None,
8957 version: None,
8958 nprobes: Some(1),
8959 refine_factor: Some(1),
8960 prefilter: Some(true),
8961 ..Default::default()
8962 };
8963
8964 let bytes = namespace.query_table(request).await.unwrap();
8965
8966 let cursor = Cursor::new(bytes.to_vec());
8967 let reader = FileReader::try_new(cursor, None).unwrap();
8968 let batches: Vec<_> = reader.into_iter().map(|b| b.unwrap()).collect();
8969 let total_rows: usize = batches.iter().map(|b| b.num_rows()).sum();
8970 assert_eq!(total_rows, 2);
8971 }
8972
8973 #[tokio::test]
8974 async fn test_namespace_id() {
8975 let (namespace, _temp_dir) = create_test_namespace().await;
8976 let id = namespace.namespace_id();
8977 assert!(id.contains("DirectoryNamespace"));
8978 assert!(id.contains("root"));
8979 }
8980
8981 #[tokio::test]
8982 async fn test_query_table_empty_table() {
8983 let (namespace, _temp_dir) = create_test_namespace().await;
8984
8985 let schema = create_test_schema();
8987 let ipc_data = create_test_ipc_data(&schema);
8988 let mut create_request = CreateTableRequest::new();
8989 create_request.id = Some(vec!["empty_table".to_string()]);
8990 namespace
8991 .create_table(create_request, bytes::Bytes::from(ipc_data))
8992 .await
8993 .unwrap();
8994
8995 let vector = Box::new(lance_namespace::models::QueryTableRequestVector {
8997 single_vector: None,
8998 multi_vector: None,
8999 });
9000 let request = QueryTableRequest {
9001 id: Some(vec!["empty_table".to_string()]),
9002 k: 10,
9003 vector,
9004 ..Default::default()
9005 };
9006 let bytes = namespace.query_table(request).await.unwrap();
9007
9008 let cursor = Cursor::new(bytes.to_vec());
9009 let reader = FileReader::try_new(cursor, None).unwrap();
9010 let batches: Vec<_> = reader.collect::<std::result::Result<Vec<_>, _>>().unwrap();
9011 let total_rows: usize = batches.iter().map(|b| b.num_rows()).sum();
9012 assert_eq!(total_rows, 0, "empty table should yield no rows");
9013 }
9014
9015 #[tokio::test]
9016 async fn test_query_table_with_plain_filter_no_vector() {
9017 let (namespace, _temp_dir, table_id) = create_ns_with_table().await;
9018
9019 let vector = Box::new(lance_namespace::models::QueryTableRequestVector {
9021 single_vector: None,
9022 multi_vector: None,
9023 });
9024 let request = QueryTableRequest {
9025 id: Some(table_id),
9026 k: 0,
9027 vector,
9028 filter: Some("id > 1".to_string()),
9029 ..Default::default()
9030 };
9031 let bytes = namespace.query_table(request).await.unwrap();
9032
9033 let cursor = Cursor::new(bytes.to_vec());
9034 let reader = FileReader::try_new(cursor, None).unwrap();
9035 let batches: Vec<_> = reader.into_iter().map(|b| b.unwrap()).collect();
9036 let total_rows: usize = batches.iter().map(|b| b.num_rows()).sum();
9037 assert!(total_rows > 0);
9038 assert!(total_rows < 3);
9039 }
9040 }
9041
9042 mod multi_table_transactions {
9044 use super::*;
9045 use futures::TryStreamExt;
9046 use lance::dataset::builder::DatasetBuilder;
9047 use lance_namespace::models::CreateTableVersionRequest;
9048
9049 async fn create_managed_namespace(temp_path: &str) -> Arc<DirectoryNamespace> {
9051 Arc::new(
9052 DirectoryNamespaceBuilder::new(temp_path)
9053 .table_version_tracking_enabled(true)
9054 .table_version_storage_enabled(true)
9055 .manifest_enabled(true)
9056 .build()
9057 .await
9058 .unwrap(),
9059 )
9060 }
9061
9062 async fn create_table_and_get_staging(
9064 namespace: Arc<dyn LanceNamespace>,
9065 table_name: &str,
9066 ) -> (Vec<String>, object_store::path::Path) {
9067 let schema = create_test_schema();
9068 let ipc_data = create_test_ipc_data(&schema);
9069 let mut create_req = CreateTableRequest::new();
9070 create_req.id = Some(vec![table_name.to_string()]);
9071 namespace
9072 .create_table(create_req, bytes::Bytes::from(ipc_data))
9073 .await
9074 .unwrap();
9075
9076 let table_id = vec![table_name.to_string()];
9077 let dataset = DatasetBuilder::from_namespace(namespace.clone(), table_id.clone())
9078 .await
9079 .unwrap()
9080 .load()
9081 .await
9082 .unwrap();
9083
9084 let versions_path = dataset.versions_dir();
9086 let manifest_metas: Vec<_> = dataset
9087 .object_store()
9088 .inner
9089 .list(Some(&versions_path))
9090 .try_collect()
9091 .await
9092 .unwrap();
9093
9094 let manifest_meta = manifest_metas
9095 .iter()
9096 .find(|m| {
9097 m.location
9098 .filename()
9099 .map(|f| f.ends_with(".manifest"))
9100 .unwrap_or(false)
9101 })
9102 .expect("No manifest file found");
9103
9104 let manifest_data = dataset
9105 .object_store()
9106 .inner
9107 .get(&manifest_meta.location)
9108 .await
9109 .unwrap()
9110 .bytes()
9111 .await
9112 .unwrap();
9113
9114 let staging_path = dataset
9115 .versions_dir()
9116 .child(format!("staging_{}", table_name));
9117 dataset
9118 .object_store()
9119 .inner
9120 .put(&staging_path, manifest_data.into())
9121 .await
9122 .unwrap();
9123
9124 (table_id, staging_path)
9125 }
9126
9127 #[tokio::test]
9128 async fn test_table_version_storage_enabled_requires_manifest() {
9129 let temp_dir = TempStdDir::default();
9131 let temp_path = temp_dir.to_str().unwrap();
9132
9133 let result = DirectoryNamespaceBuilder::new(temp_path)
9134 .table_version_storage_enabled(true)
9135 .manifest_enabled(false)
9136 .build()
9137 .await;
9138
9139 assert!(
9140 result.is_err(),
9141 "Should fail when table_version_storage_enabled=true but manifest_enabled=false"
9142 );
9143 }
9144
9145 #[tokio::test]
9146 async fn test_create_table_version_records_in_manifest() {
9147 let temp_dir = TempStrDir::default();
9150 let temp_path: &str = &temp_dir;
9151
9152 let namespace = create_managed_namespace(temp_path).await;
9153 let ns: Arc<dyn LanceNamespace> = namespace.clone();
9154
9155 let (table_id, staging_path) =
9156 create_table_and_get_staging(ns.clone(), "table_managed").await;
9157
9158 let mut create_req = CreateTableVersionRequest::new(2, staging_path.to_string());
9160 create_req.id = Some(table_id.clone());
9161 create_req.naming_scheme = Some("V2".to_string());
9162 let response = namespace.create_table_version(create_req).await.unwrap();
9163
9164 assert!(response.version.is_some());
9165 let version = response.version.unwrap();
9166 assert_eq!(version.version, 2);
9167
9168 let manifest_ns = namespace.manifest_ns.as_ref().unwrap();
9170 let table_id_str = manifest::ManifestNamespace::str_object_id(&table_id);
9171 let versions = manifest_ns
9172 .query_table_versions(&table_id_str, false, None)
9173 .await
9174 .unwrap();
9175
9176 assert!(
9177 !versions.is_empty(),
9178 "Version should be recorded in __manifest"
9179 );
9180 let (ver, _path) = &versions[0];
9181 assert_eq!(*ver, 2, "Recorded version should be 2");
9182 }
9183 }
9184
9185 #[tokio::test]
9186 async fn test_list_all_tables() {
9187 use lance_namespace::models::ListTablesRequest;
9188
9189 let (namespace, _temp_dir) = create_test_namespace().await;
9190 create_scalar_table(&namespace, "alpha").await;
9191 create_scalar_table(&namespace, "beta").await;
9192
9193 let request = ListTablesRequest {
9194 id: Some(vec![]),
9195 page_token: None,
9196 limit: None,
9197 ..Default::default()
9198 };
9199 let response = namespace.list_all_tables(request).await.unwrap();
9200 let mut tables = response.tables;
9201 tables.sort();
9202 assert_eq!(tables, vec!["alpha", "beta"]);
9203 }
9204
9205 #[tokio::test]
9206 async fn test_restore_table() {
9207 use lance_namespace::models::RestoreTableRequest;
9208
9209 let (namespace, _temp_dir) = create_test_namespace().await;
9210 create_scalar_table(&namespace, "users").await;
9211
9212 create_scalar_index(&namespace, "users", "users_id_idx").await;
9214
9215 let dataset = open_dataset(&namespace, "users").await;
9216 let current_version = dataset.version().version;
9217 assert!(current_version >= 2, "Should have at least 2 versions");
9218
9219 let mut restore_req = RestoreTableRequest::new(1);
9221 restore_req.id = Some(vec!["users".to_string()]);
9222 let response = namespace.restore_table(restore_req).await.unwrap();
9223
9224 assert!(
9226 response.transaction_id.is_some(),
9227 "restore_table should return a transaction_id"
9228 );
9229
9230 let dataset_after = open_dataset(&namespace, "users").await;
9232 assert!(
9233 dataset_after.version().version > current_version,
9234 "Restore should create a new version"
9235 );
9236 }
9237
9238 #[tokio::test]
9239 async fn test_update_table_schema_metadata() {
9240 use lance_namespace::models::UpdateTableSchemaMetadataRequest;
9241
9242 let (namespace, _temp_dir) = create_test_namespace().await;
9243 create_scalar_table(&namespace, "products").await;
9244
9245 let mut metadata = HashMap::new();
9246 metadata.insert("owner".to_string(), "team_a".to_string());
9247 metadata.insert("version".to_string(), "1.0".to_string());
9248
9249 let mut req = UpdateTableSchemaMetadataRequest::new();
9250 req.id = Some(vec!["products".to_string()]);
9251 req.metadata = Some(metadata.clone());
9252
9253 let response = namespace.update_table_schema_metadata(req).await.unwrap();
9254
9255 assert!(response.metadata.is_some());
9256 let returned = response.metadata.unwrap();
9257 assert_eq!(returned.get("owner"), Some(&"team_a".to_string()));
9258 assert_eq!(returned.get("version"), Some(&"1.0".to_string()));
9259 assert!(
9260 response.transaction_id.is_some(),
9261 "update_table_schema_metadata should return a transaction_id"
9262 );
9263 }
9264
9265 #[tokio::test]
9266 async fn test_get_table_stats() {
9267 use lance_namespace::models::GetTableStatsRequest;
9268
9269 let (namespace, _temp_dir) = create_test_namespace().await;
9270 create_scalar_table(&namespace, "items").await;
9271 create_scalar_index(&namespace, "items", "items_id_idx").await;
9272
9273 let mut req = GetTableStatsRequest::new();
9274 req.id = Some(vec!["items".to_string()]);
9275
9276 let response = namespace.get_table_stats(req).await.unwrap();
9277 assert_eq!(response.num_rows, 3);
9278 assert_eq!(response.num_indices, 1);
9279 }
9280
9281 #[tokio::test]
9282 async fn test_explain_table_query_plan() {
9283 use lance_namespace::models::QueryTableRequestVector;
9284 use lance_namespace::models::{ExplainTableQueryPlanRequest, QueryTableRequest};
9285
9286 let (namespace, _temp_dir) = create_test_namespace().await;
9287 create_scalar_table(&namespace, "catalog").await;
9288
9289 let mut query = QueryTableRequest::new(1, QueryTableRequestVector::new());
9290 query.filter = Some("id > 1".to_string());
9291 query.columns = Some(Box::new(QueryTableRequestColumns {
9292 column_names: Some(vec!["id".to_string(), "name".to_string()]),
9293 column_aliases: None,
9294 }));
9295 query.with_row_id = Some(true);
9296
9297 let mut req = ExplainTableQueryPlanRequest::new(query);
9298 req.id = Some(vec!["catalog".to_string()]);
9299
9300 let plan_str = namespace.explain_table_query_plan(req).await.unwrap();
9301 assert_plan_contains_all(
9302 &plan_str,
9303 &[
9304 "ProjectionExec: expr=[id@0 as id, name@2 as name",
9305 "Take: columns=\"id, _rowid, (name)\"",
9306 "LanceRead: uri=",
9307 "projection=[id]",
9308 "row_id=true, row_addr=false",
9309 "full_filter=id > Int32(1)",
9310 "refine_filter=id > Int32(1)",
9311 ],
9312 "Filtered explain plan should preserve late materialization and filter pushdown",
9313 );
9314 }
9315
9316 #[tokio::test]
9317 async fn test_analyze_table_query_plan() {
9318 use lance_namespace::models::AnalyzeTableQueryPlanRequest;
9319 use lance_namespace::models::QueryTableRequestVector;
9320
9321 let (namespace, _temp_dir) = create_test_namespace().await;
9322 create_scalar_table(&namespace, "catalog").await;
9323
9324 let mut req = AnalyzeTableQueryPlanRequest::new(1, QueryTableRequestVector::new());
9325 req.id = Some(vec!["catalog".to_string()]);
9326 req.filter = Some("id > 0".to_string());
9327 req.columns = Some(Box::new(QueryTableRequestColumns {
9328 column_names: Some(vec!["id".to_string(), "name".to_string()]),
9329 column_aliases: None,
9330 }));
9331 req.with_row_id = Some(true);
9332
9333 let analysis_str = namespace.analyze_table_query_plan(req).await.unwrap();
9334 assert_plan_contains_all(
9335 &analysis_str,
9336 &[
9337 "AnalyzeExec verbose=true",
9338 "ProjectionExec: elapsed=",
9339 "expr=[id@0 as id, name@2 as name",
9340 "Take: elapsed=",
9341 "columns=\"id, _rowid, (name)\"",
9342 "CoalesceBatchesExec: elapsed=",
9343 "LanceRead: elapsed=",
9344 "projection=[id]",
9345 "row_id=true, row_addr=false",
9346 "full_filter=id > Int32(0)",
9347 "refine_filter=id > Int32(0)",
9348 "metrics=[output_rows=",
9349 ],
9350 "Filtered analyze plan should preserve late materialization and filter pushdown",
9351 );
9352 }
9353
9354 #[tokio::test]
9355 async fn test_dir_listing_no_extra_calls_without_migration() {
9356 let temp_dir = TempStdDir::default();
9357 let temp_path = temp_dir.to_str().unwrap();
9358 let root_uri = file_object_store_uri(temp_path);
9359 let listing_count = Arc::new(AtomicUsize::new(0));
9360 let session = build_listing_counting_session(listing_count.clone());
9361
9362 let dir_only_ns = DirectoryNamespaceBuilder::new(root_uri.clone())
9364 .session(session.clone())
9365 .manifest_enabled(false)
9366 .dir_listing_enabled(true)
9367 .build()
9368 .await
9369 .unwrap();
9370
9371 let schema = create_test_schema();
9372 let ipc_data = create_test_ipc_data(&schema);
9373 let mut create_req = CreateTableRequest::new();
9374 create_req.id = Some(vec!["test_table".to_string()]);
9375 dir_only_ns
9376 .create_table(create_req, Bytes::from(ipc_data))
9377 .await
9378 .unwrap();
9379
9380 let hybrid_ns = DirectoryNamespaceBuilder::new(root_uri)
9382 .session(session)
9383 .manifest_enabled(true)
9384 .dir_listing_enabled(true)
9385 .dir_listing_to_manifest_migration_enabled(false)
9386 .build()
9387 .await
9388 .unwrap();
9389
9390 listing_count.store(0, Ordering::SeqCst);
9392
9393 let mut exists_req = TableExistsRequest::new();
9395 exists_req.id = Some(vec!["test_table".to_string()]);
9396 hybrid_ns.table_exists(exists_req).await.unwrap();
9397
9398 let count = listing_count.load(Ordering::SeqCst);
9399 assert_eq!(
9400 count, 1,
9401 "Expected exactly 1 listing call for table_exists \
9402 without migration mode, but got {}",
9403 count
9404 );
9405
9406 listing_count.store(0, Ordering::SeqCst);
9408
9409 let mut describe_req = DescribeTableRequest::new();
9410 describe_req.id = Some(vec!["test_table".to_string()]);
9411 hybrid_ns.describe_table(describe_req).await.unwrap();
9412
9413 let count = listing_count.load(Ordering::SeqCst);
9414 assert_eq!(
9415 count, 1,
9416 "Expected exactly 1 listing call for describe_table \
9417 without migration mode, but got {}",
9418 count
9419 );
9420 }
9421
9422 #[tokio::test]
9423 async fn test_describe_declared_table_checks_versions_only_when_requested() {
9424 let temp_dir = TempStdDir::default();
9425 let temp_path = temp_dir.to_str().unwrap();
9426 let root_uri = file_object_store_uri(temp_path);
9427 let listing_count = Arc::new(AtomicUsize::new(0));
9428 let session = build_listing_counting_session(listing_count.clone());
9429
9430 let namespace = DirectoryNamespaceBuilder::new(root_uri)
9431 .session(session)
9432 .manifest_enabled(false)
9433 .dir_listing_enabled(true)
9434 .build()
9435 .await
9436 .unwrap();
9437
9438 let mut declare_req = DeclareTableRequest::new();
9439 declare_req.id = Some(vec!["test_table".to_string()]);
9440 namespace.declare_table(declare_req).await.unwrap();
9441
9442 listing_count.store(0, Ordering::SeqCst);
9443
9444 let mut describe_req = DescribeTableRequest::new();
9445 describe_req.id = Some(vec!["test_table".to_string()]);
9446 let describe_response = namespace.describe_table(describe_req).await.unwrap();
9447
9448 assert_eq!(describe_response.is_only_declared, None);
9449 assert_eq!(
9450 listing_count.load(Ordering::SeqCst),
9451 1,
9452 "Default describe_table should only list the table directory"
9453 );
9454
9455 listing_count.store(0, Ordering::SeqCst);
9456
9457 let mut describe_req = DescribeTableRequest::new();
9458 describe_req.id = Some(vec!["test_table".to_string()]);
9459 describe_req.check_declared = Some(true);
9460 let describe_response = namespace.describe_table(describe_req).await.unwrap();
9461
9462 assert_eq!(describe_response.is_only_declared, Some(true));
9463 assert_eq!(
9464 listing_count.load(Ordering::SeqCst),
9465 2,
9466 "check_declared describe_table should list the table directory and _versions"
9467 );
9468 }
9469
9470 #[tokio::test]
9471 async fn test_dir_listing_extra_calls_with_migration() {
9472 let temp_dir = TempStdDir::default();
9473 let temp_path = temp_dir.to_str().unwrap();
9474 let root_uri = file_object_store_uri(temp_path);
9475 let listing_count = Arc::new(AtomicUsize::new(0));
9476 let session = build_listing_counting_session(listing_count.clone());
9477
9478 let dir_only_ns = DirectoryNamespaceBuilder::new(root_uri.clone())
9480 .session(session.clone())
9481 .manifest_enabled(false)
9482 .dir_listing_enabled(true)
9483 .build()
9484 .await
9485 .unwrap();
9486
9487 let schema = create_test_schema();
9488 let ipc_data = create_test_ipc_data(&schema);
9489 let mut create_req = CreateTableRequest::new();
9490 create_req.id = Some(vec!["test_table".to_string()]);
9491 dir_only_ns
9492 .create_table(create_req, Bytes::from(ipc_data))
9493 .await
9494 .unwrap();
9495
9496 let hybrid_ns = DirectoryNamespaceBuilder::new(root_uri)
9497 .session(session)
9498 .manifest_enabled(true)
9499 .dir_listing_enabled(true)
9500 .dir_listing_to_manifest_migration_enabled(true)
9501 .build()
9502 .await
9503 .unwrap();
9504
9505 listing_count.store(0, Ordering::SeqCst);
9508
9509 let mut exists_req = TableExistsRequest::new();
9510 exists_req.id = Some(vec!["test_table".to_string()]);
9511 hybrid_ns.table_exists(exists_req).await.unwrap();
9512
9513 let count = listing_count.load(Ordering::SeqCst);
9514 assert_eq!(
9515 count, 2,
9516 "Expected exactly 2 listing calls for table_exists with migration mode \
9517 (manifest reload + table directory fallback), but got {}",
9518 count
9519 );
9520
9521 listing_count.store(0, Ordering::SeqCst);
9523
9524 let mut describe_req = DescribeTableRequest::new();
9525 describe_req.id = Some(vec!["test_table".to_string()]);
9526 hybrid_ns.describe_table(describe_req).await.unwrap();
9527
9528 let count = listing_count.load(Ordering::SeqCst);
9529 assert_eq!(
9530 count, 2,
9531 "Expected exactly 2 listing calls for describe_table with migration mode \
9532 (manifest reload + table directory fallback), but got {}",
9533 count
9534 );
9535 }
9536
9537 #[tokio::test]
9538 async fn test_migration_not_found_errors_include_table_id() {
9539 let temp_dir = TempStdDir::default();
9540 let temp_path = temp_dir.to_str().unwrap();
9541
9542 let namespace = DirectoryNamespaceBuilder::new(temp_path)
9543 .manifest_enabled(true)
9544 .dir_listing_enabled(true)
9545 .dir_listing_to_manifest_migration_enabled(true)
9546 .build()
9547 .await
9548 .unwrap();
9549
9550 let mut exists_req = TableExistsRequest::new();
9551 exists_req.id = Some(vec!["missing_table".to_string()]);
9552 let err = namespace.table_exists(exists_req).await.unwrap_err();
9553 assert!(matches!(err, Error::Namespace { .. }));
9554 let err_msg = err.to_string();
9555 assert!(err_msg.contains("Table not found"));
9556 assert!(err_msg.contains("table id 'missing_table'"));
9557
9558 let mut describe_req = DescribeTableRequest::new();
9559 describe_req.id = Some(vec!["missing_table".to_string()]);
9560 let err = namespace.describe_table(describe_req).await.unwrap_err();
9561 assert!(matches!(err, Error::Namespace { .. }));
9562 let err_msg = err.to_string();
9563 assert!(err_msg.contains("Table not found"));
9564 assert!(err_msg.contains("table id 'missing_table'"));
9565 }
9566
9567 #[tokio::test]
9568 async fn test_manifest_not_found_errors_include_full_table_id() {
9569 use lance_namespace::models::CreateNamespaceRequest;
9570
9571 let temp_dir = TempStdDir::default();
9572 let temp_path = temp_dir.to_str().unwrap();
9573
9574 let namespace = DirectoryNamespaceBuilder::new(temp_path)
9575 .manifest_enabled(true)
9576 .dir_listing_enabled(true)
9577 .build()
9578 .await
9579 .unwrap();
9580
9581 let mut create_ns_req = CreateNamespaceRequest::new();
9582 create_ns_req.id = Some(vec!["workspace".to_string()]);
9583 namespace.create_namespace(create_ns_req).await.unwrap();
9584
9585 let missing_table_id = vec!["workspace".to_string(), "missing_table".to_string()];
9586
9587 let mut exists_req = TableExistsRequest::new();
9588 exists_req.id = Some(missing_table_id.clone());
9589 let err = namespace.table_exists(exists_req).await.unwrap_err();
9590 assert!(matches!(err, Error::Namespace { .. }));
9591 let err_msg = err.to_string();
9592 assert!(err_msg.contains("Table not found"));
9593 assert!(err_msg.contains("table id 'workspace$missing_table'"));
9594
9595 let mut describe_req = DescribeTableRequest::new();
9596 describe_req.id = Some(missing_table_id);
9597 let err = namespace.describe_table(describe_req).await.unwrap_err();
9598 assert!(matches!(err, Error::Namespace { .. }));
9599 let err_msg = err.to_string();
9600 assert!(err_msg.contains("Table not found"));
9601 assert!(err_msg.contains("table id 'workspace$missing_table'"));
9602 }
9603}